; ; linux/arch/c6x/lib/csum_64plus.s ; ; Port on Texas Instruments TMS320C6x architecture ; ; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated ; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) ; ; This program is free software; you can redistribute it and/or modify ; it under the terms of the GNU General Public License version 2 as ; published by the Free Software Foundation. ; #include ; ;unsigned int csum_partial_copy(const char *src, char * dst, ; int len, int sum) ; ; A4: src ; B4: dst ; A6: len ; B6: sum ; return csum in A4 ; .text ENTRY(csum_partial_copy) MVC .S2 ILC,B30 MV .D1X B6,A31 ; given csum ZERO .D1 A9 ; csum (a side) || ZERO .D2 B9 ; csum (b side) || SHRU .S2X A6,2,B5 ; len / 4 ;; Check alignment and size AND .S1 3,A4,A1 || AND .S2 3,B4,B0 OR .L2X B0,A1,B0 ; non aligned condition || MVC .S2 B5,ILC || MVK .D2 1,B2 || MV .D1X B5,A1 ; words condition [!A1] B .S1 L8 [B0] BNOP .S1 L6,5 SPLOOP 1 ;; Main loop for aligned words LDW .D1T1 *A4++,A7 NOP 4 MV .S2X A7,B7 || EXTU .S1 A7,0,16,A16 STW .D2T2 B7,*B4++ || MPYU .M2 B7,B2,B8 || ADD .L1 A16,A9,A9 NOP SPKERNEL 8,0 || ADD .L2 B8,B9,B9 ZERO .D1 A1 || ADD .L1X A9,B9,A9 ; add csum from a and b sides L6: [!A1] BNOP .S1 L8,5 ;; Main loop for non-aligned words SPLOOP 2 || MVK .L1 1,A2 LDNW .D1T1 *A4++,A7 NOP 3 NOP MV .S2X A7,B7 || EXTU .S1 A7,0,16,A16 || MPYU .M1 A7,A2,A8 ADD .L1 A16,A9,A9 SPKERNEL 6,0 || STNW .D2T2 B7,*B4++ || ADD .L1 A8,A9,A9 L8: AND .S2X 2,A6,B5 CMPGT .L2 B5,0,B0 [!B0] BNOP .S1 L82,4 ;; Manage half-word ZERO .L1 A7 || ZERO .D1 A8 #ifdef CONFIG_CPU_BIG_ENDIAN LDBU .D1T1 *A4++,A7 LDBU .D1T1 *A4++,A8 NOP 3 SHL .S1 A7,8,A0 ADD .S1 A8,A9,A9 STB .D2T1 A7,*B4++ || ADD .S1 A0,A9,A9 STB .D2T1 A8,*B4++ #else LDBU .D1T1 *A4++,A7 LDBU .D1T1 *A4++,A8 NOP 3 ADD .S1 A7,A9,A9 SHL .S1 A8,8,A0 STB .D2T1 A7,*B4++ || ADD .S1 A0,A9,A9 STB .D2T1 A8,*B4++ #endif ;; Manage eventually the last byte L82: AND .S2X 1,A6,B0 [!B0] BNOP .S1 L9,5 || ZERO .L1 A7 L83: LDBU .D1T1 *A4++,A7 NOP 4 MV .L2X A7,B7 #ifdef CONFIG_CPU_BIG_ENDIAN STB .D2T2 B7,*B4++ || SHL .S1 A7,8,A7 ADD .S1 A7,A9,A9 #else STB .D2T2 B7,*B4++ || ADD .S1 A7,A9,A9 #endif ;; Fold the csum L9: SHRU .S2X A9,16,B0 [!B0] BNOP .S1 L10,5 L91: SHRU .S2X A9,16,B4 || EXTU .S1 A9,16,16,A3 ADD .D1X A3,B4,A9 SHRU .S1 A9,16,A0 [A0] BNOP .S1 L91,5 L10: ADD .D1 A31,A9,A9 MV .D1 A9,A4 BNOP .S2 B3,4 MVC .S2 B30,ILC ENDPROC(csum_partial_copy) ; ;unsigned short ;ip_fast_csum(unsigned char *iph, unsigned int ihl) ;{ ; unsigned int checksum = 0; ; unsigned short *tosum = (unsigned short *) iph; ; int len; ; ; len = ihl*4; ; ; if (len <= 0) ; return 0; ; ; while(len) { ; len -= 2; ; checksum += *tosum++; ; } ; if (len & 1) ; checksum += *(unsigned char*) tosum; ; ; while(checksum >> 16) ; checksum = (checksum & 0xffff) + (checksum >> 16); ; ; return ~checksum; ;} ; ; A4: iph ; B4: ihl ; return checksum in A4 ; .text ENTRY(ip_fast_csum) ZERO .D1 A5 || MVC .S2 ILC,B30 SHL .S2 B4,2,B0 CMPGT .L2 B0,0,B1 [!B1] BNOP .S1 L15,4 [!B1] ZERO .D1 A3 [!B0] B .S1 L12 SHRU .S2 B0,1,B0 MVC .S2 B0,ILC NOP 3 SPLOOP 1 LDHU .D1T1 *A4++,A3 NOP 3 NOP SPKERNEL 5,0 || ADD .L1 A3,A5,A5 L12: SHRU .S1 A5,16,A0 [!A0] BNOP .S1 L14,5 L13: SHRU .S2X A5,16,B4 EXTU .S1 A5,16,16,A3 ADD .D1X A3,B4,A5 SHRU .S1 A5,16,A0 [A0] BNOP .S1 L13,5 L14: NOT .D1 A5,A3 EXTU .S1 A3,16,16,A3 L15: BNOP .S2 B3,3 MVC .S2 B30,ILC MV .D1 A3,A4 ENDPROC(ip_fast_csum) ; ;unsigned short ;do_csum(unsigned char *buff, unsigned int len) ;{ ; int odd, count; ; unsigned int result = 0; ; ; if (len <= 0) ; goto out; ; odd = 1 & (unsigned long) buff; ; if (odd) { ;#ifdef __LITTLE_ENDIAN ; result += (*buff << 8); ;#else ; result = *buff; ;#endif ; len--; ; buff++; ; } ; count = len >> 1; /* nr of 16-bit words.. */ ; if (count) { ; if (2 & (unsigned long) buff) { ; result += *(unsigned short *) buff; ; count--; ; len -= 2; ; buff += 2; ; } ; count >>= 1; /* nr of 32-bit words.. */ ; if (count) { ; unsigned int carry = 0; ; do { ; unsigned int w = *(unsigned int *) buff; ; count--; ; buff += 4; ; result += carry; ; result += w; ; carry = (w > result); ; } while (count); ; result += carry; ; result = (result & 0xffff) + (result >> 16); ; } ; if (len & 2) { ; result += *(unsigned short *) buff; ; buff += 2; ; } ; } ; if (len & 1) ;#ifdef __LITTLE_ENDIAN ; result += *buff; ;#else ; result += (*buff << 8); ;#endif ; result = (result & 0xffff) + (result >> 16); ; /* add up carry.. */ ; result = (result & 0xffff) + (result >> 16); ; if (odd) ; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); ;out: ; return result; ;} ; ; A4: buff ; B4: len ; return checksum in A4 ; ENTRY(do_csum) CMPGT .L2 B4,0,B0 [!B0] BNOP .S1 L26,3 EXTU .S1 A4,31,31,A0 MV .L1 A0,A3 || MV .S1X B3,A5 || MV .L2 B4,B3 || ZERO .D1 A1 #ifdef CONFIG_CPU_BIG_ENDIAN [A0] SUB .L2 B3,1,B3 || [A0] LDBU .D1T1 *A4++,A1 #else [!A0] BNOP .S1 L21,5 || [A0] LDBU .D1T1 *A4++,A0 SUB .L2 B3,1,B3 || SHL .S1 A0,8,A1 L21: #endif SHR .S2 B3,1,B0 [!B0] BNOP .S1 L24,3 MVK .L1 2,A0 AND .L1 A4,A0,A0 [!A0] BNOP .S1 L22,5 || [A0] LDHU .D1T1 *A4++,A0 SUB .L2 B0,1,B0 || SUB .S2 B3,2,B3 || ADD .L1 A0,A1,A1 L22: SHR .S2 B0,1,B0 || ZERO .L1 A0 [!B0] BNOP .S1 L23,5 || [B0] MVC .S2 B0,ILC SPLOOP 3 SPMASK L1 || MV .L1 A1,A2 || LDW .D1T1 *A4++,A1 NOP 4 ADD .L1 A0,A1,A0 ADD .L1 A2,A0,A2 SPKERNEL 1,2 || CMPGTU .L1 A1,A2,A0 ADD .L1 A0,A2,A6 EXTU .S1 A6,16,16,A7 SHRU .S2X A6,16,B0 NOP 1 ADD .L1X A7,B0,A1 L23: MVK .L2 2,B0 AND .L2 B3,B0,B0 [B0] LDHU .D1T1 *A4++,A0 NOP 4 [B0] ADD .L1 A0,A1,A1 L24: EXTU .S2 B3,31,31,B0 #ifdef CONFIG_CPU_BIG_ENDIAN [!B0] BNOP .S1 L25,4 || [B0] LDBU .D1T1 *A4,A0 SHL .S1 A0,8,A0 ADD .L1 A0,A1,A1 L25: #else [B0] LDBU .D1T1 *A4,A0 NOP 4 [B0] ADD .L1 A0,A1,A1 #endif EXTU .S1 A1,16,16,A0 SHRU .S2X A1,16,B0 NOP 1 ADD .L1X A0,B0,A0 SHRU .S1 A0,16,A1 ADD .L1 A0,A1,A0 EXTU .S1 A0,16,16,A1 EXTU .S1 A1,16,24,A2 EXTU .S1 A1,24,16,A0 || MV .L2X A3,B0 [B0] OR .L1 A0,A2,A1 L26: NOP 1 BNOP .S2X A5,4 MV .L1 A1,A4 ENDPROC(do_csum) ;__wsum csum_partial(const void *buff, int len, __wsum wsum) ;{ ; unsigned int sum = (__force unsigned int)wsum; ; unsigned int result = do_csum(buff, len); ; ; /* add in old sum, and carry.. */ ; result += sum; ; if (sum > result) ; result += 1; ; return (__force __wsum)result; ;} ; ENTRY(csum_partial) MV .L1X B3,A9 || CALLP .S2 do_csum,B3 || MV .S1 A6,A8 BNOP .S2X A9,2 ADD .L1 A8,A4,A1 CMPGTU .L1 A8,A1,A0 ADD .L1 A1,A0,A4 ENDPROC(csum_partial) ;unsigned short ;ip_compute_csum(unsigned char *buff, unsigned int len) ; ; A4: buff ; B4: len ; return checksum in A4 ENTRY(ip_compute_csum) MV .L1X B3,A9 || CALLP .S2 do_csum,B3 BNOP .S2X A9,3 NOT .S1 A4,A4 CLR .S1 A4,16,31,A4 ENDPROC(ip_compute_csum)