From: Will Deacon Date: Mon, 14 Mar 2022 19:02:52 +0000 (+0000) Subject: Merge branch 'for-next/strings' into for-next/core X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=515e5da7b6b52c5d6c7a54fe34165b86361041b5;p=linux.git Merge branch 'for-next/strings' into for-next/core * for-next/strings: Revert "arm64: Mitigate MTE issues with str{n}cmp()" arm64: lib: Import latest version of Arm Optimized Routines' strncmp arm64: lib: Import latest version of Arm Optimized Routines' strcmp --- 515e5da7b6b52c5d6c7a54fe34165b86361041b5 diff --cc arch/arm64/lib/strcmp.S index cda7de747efcf,e6815a3dd2656..9b89b45336074 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@@ -32,87 -30,79 +30,79 @@@ #define data2w w3 #define has_nul x4 #define diff x5 + #define off1 x5 #define syndrome x6 - #define tmp1 x7 - #define tmp2 x8 - #define tmp3 x9 - #define zeroones x10 - #define pos x11 - - /* Start of performance-critical section -- one 64B cache line. */ - .align 6 + #define tmp x6 + #define data3 x7 + #define zeroones x8 + #define shift x9 + #define off2 x10 + + /* On big-endian early bytes are at MSB and on little-endian LSB. + LS_FW means shifting towards early bytes. */ + #ifdef __AARCH64EB__ + # define LS_FW lsl + #else + # define LS_FW lsr + #endif + + /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. + Since carry propagation makes 0x1 bytes before a NUL byte appear + NUL too in big-endian, byte-reverse the data before the NUL check. */ + + -SYM_FUNC_START_WEAK_PI(strcmp) +SYM_FUNC_START(__pi_strcmp) - eor tmp1, src1, src2 - mov zeroones, #REP8_01 - tst tmp1, #7 + sub off2, src2, src1 + mov zeroones, REP8_01 + and tmp, src1, 7 + tst off2, 7 b.ne L(misaligned8) - ands tmp1, src1, #7 - b.ne L(mutual_align) - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. */ + cbnz tmp, L(mutual_align) + + .p2align 4 + L(loop_aligned): - ldr data1, [src1], #8 - ldr data2, [src2], #8 + ldr data2, [src1, off2] + ldr data1, [src1], 8 L(start_realigned): - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ + #ifdef __AARCH64EB__ + rev tmp, data1 + sub has_nul, tmp, zeroones + orr tmp, tmp, REP8_7f + #else + sub has_nul, data1, zeroones + orr tmp, data1, REP8_7f + #endif + bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ + ccmp data1, data2, 0, eq + b.eq L(loop_aligned) + #ifdef __AARCH64EB__ + rev has_nul, has_nul + #endif + eor diff, data1, data2 orr syndrome, diff, has_nul - cbz syndrome, L(loop_aligned) - /* End of performance-critical section -- one 64B cache line. */ - L(end): - #ifndef __AARCH64EB__ + #ifndef __AARCH64EB__ rev syndrome, syndrome rev data1, data1 - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ - clz pos, syndrome rev data2, data2 - lsl data1, data1, pos - lsl data2, data2, pos - /* But we need to zero-extend (char is unsigned) the value and then - perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret - #else - /* For big-endian we cannot use the trick with the syndrome value - as carry-propagation can corrupt the upper bits if the trailing - bytes in the string contain 0x01. */ - /* However, if there is no NUL byte in the dword, we can generate - the result directly. We can't just subtract the bytes as the - MSB might be significant. */ - cbnz has_nul, 1f - cmp data1, data2 - cset result, ne - cneg result, result, lo - ret - 1: - /* Re-compute the NUL-byte detection, using a byte-reversed value. */ - rev tmp3, data1 - sub tmp1, tmp3, zeroones - orr tmp2, tmp3, #REP8_7f - bic has_nul, tmp1, tmp2 - rev has_nul, has_nul - orr syndrome, diff, has_nul - clz pos, syndrome - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. + #endif + clz shift, syndrome + /* The most-significant-non-zero bit of the syndrome marks either the + first bit that is different, or the top bit of the first zero byte. Shifting left now will bring the critical information into the top bits. */ - lsl data1, data1, pos - lsl data2, data2, pos + lsl data1, data1, shift + lsl data2, data2, shift /* But we need to zero-extend (char is unsigned) the value and then perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 + lsr data1, data1, 56 + sub result, data1, data2, lsr 56 ret - #endif + + .p2align 4 L(mutual_align): /* Sources are mutually aligned, but are not currently at an @@@ -171,6 -185,6 +185,6 @@@ L(tail) L(done): sub result, data1, data2 ret - -SYM_FUNC_END_PI(strcmp) +SYM_FUNC_END(__pi_strcmp) +SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp) - EXPORT_SYMBOL_NOHWKASAN(strcmp) + EXPORT_SYMBOL_NOKASAN(strcmp) diff --cc arch/arm64/lib/strncmp.S index a848abcec975e,bc195cb866932..fe7bbc0b42a78 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@@ -39,12 -39,26 +39,26 @@@ #define tmp3 x10 #define zeroones x11 #define pos x12 - #define limit_wd x13 - #define mask x14 - #define endloop x15 + #define mask x13 + #define endloop x14 #define count mask + #define offset pos + #define neg_offset x15 + + /* Define endian dependent shift operations. + On big-endian early bytes are at MSB and on little-endian LSB. + LS_FW means shifting towards early bytes. + LS_BK means shifting towards later bytes. + */ + #ifdef __AARCH64EB__ + #define LS_FW lsl + #define LS_BK lsr + #else + #define LS_FW lsr + #define LS_BK lsl + #endif -SYM_FUNC_START_WEAK_PI(strncmp) +SYM_FUNC_START(__pi_strncmp) cbz limit, L(ret0) eor tmp1, src1, src2 mov zeroones, #REP8_01 @@@ -256,6 -305,5 +305,6 @@@ L(syndrome_check) L(ret0): mov result, #0 ret -SYM_FUNC_END_PI(strncmp) +SYM_FUNC_END(__pi_strncmp) +SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp) - EXPORT_SYMBOL_NOHWKASAN(strncmp) + EXPORT_SYMBOL_NOKASAN(strncmp)