svn commit: r368158 - head/sys/arm/arm
Michal Meloun
mmel at FreeBSD.org
Sun Nov 29 16:44:23 UTC 2020
Author: mmel
Date: Sun Nov 29 16:44:22 2020
New Revision: 368158
URL: https://svnweb.freebsd.org/changeset/base/368158
Log:
_ARM_ARCH_5E is always defined, we not support older CPUs.
Modified:
head/sys/arm/arm/bcopy_page.S
head/sys/arm/arm/bcopyinout.S
head/sys/arm/arm/in_cksum_arm.S
head/sys/arm/arm/machdep.c
head/sys/arm/arm/support.S
Modified: head/sys/arm/arm/bcopy_page.S
==============================================================================
--- head/sys/arm/arm/bcopy_page.S Sun Nov 29 16:29:40 2020 (r368157)
+++ head/sys/arm/arm/bcopy_page.S Sun Nov 29 16:44:22 2020 (r368158)
@@ -44,147 +44,8 @@ __FBSDID("$FreeBSD$");
#include "assym.inc"
-#ifndef _ARM_ARCH_5E
-/* #define BIG_LOOPS */
-
/*
- * bcopy_page(src, dest)
- *
- * Optimised copy page routine.
- *
- * On entry:
- * r0 - src address
- * r1 - dest address
- *
- * Requires:
- * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
- * otherwise.
- */
-
-#define CHUNK_SIZE 32
-
-#define PREFETCH_FIRST_CHUNK /* nothing */
-#define PREFETCH_NEXT_CHUNK /* nothing */
-
-#ifndef COPY_CHUNK
-#define COPY_CHUNK \
- PREFETCH_NEXT_CHUNK ; \
- ldmia r0!, {r3-r8,ip,lr} ; \
- stmia r1!, {r3-r8,ip,lr}
-#endif /* ! COPY_CHUNK */
-
-#ifndef SAVE_REGS
-#define SAVE_REGS stmfd sp!, {r4-r8, lr}; _SAVE({r4-r8, lr})
-#define RESTORE_REGS ldmfd sp!, {r4-r8, pc}
-#endif
-
-ENTRY(bcopy_page)
- PREFETCH_FIRST_CHUNK
- SAVE_REGS
-#ifdef BIG_LOOPS
- mov r2, #(PAGE_SIZE >> 9)
-#else
- mov r2, #(PAGE_SIZE >> 7)
-#endif
-
-1:
- COPY_CHUNK
- COPY_CHUNK
- COPY_CHUNK
- COPY_CHUNK
-
-#ifdef BIG_LOOPS
- /* There is little point making the loop any larger; unless we are
- running with the cache off, the load/store overheads will
- completely dominate this loop. */
- COPY_CHUNK
- COPY_CHUNK
- COPY_CHUNK
- COPY_CHUNK
-
- COPY_CHUNK
- COPY_CHUNK
- COPY_CHUNK
- COPY_CHUNK
-
- COPY_CHUNK
- COPY_CHUNK
- COPY_CHUNK
- COPY_CHUNK
-#endif
- subs r2, r2, #1
- bne 1b
-
- RESTORE_REGS /* ...and return. */
-END(bcopy_page)
-
-/*
- * bzero_page(dest)
- *
- * Optimised zero page routine.
- *
- * On entry:
- * r0 - dest address
- *
- * Requires:
- * number of bytes per page (PAGE_SIZE) is a multiple of 512 (BIG_LOOPS), 128
- * otherwise
- */
-
-ENTRY(bzero_page)
- stmfd sp!, {r4-r8, lr}
- _SAVE({r4-r8, lr})
-#ifdef BIG_LOOPS
- mov r2, #(PAGE_SIZE >> 9)
-#else
- mov r2, #(PAGE_SIZE >> 7)
-#endif
- mov r3, #0
- mov r4, #0
- mov r5, #0
- mov r6, #0
- mov r7, #0
- mov r8, #0
- mov ip, #0
- mov lr, #0
-
-1:
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
-
-#ifdef BIG_LOOPS
- /* There is little point making the loop any larger; unless we are
- running with the cache off, the load/store overheads will
- completely dominate this loop. */
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
-
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
-
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
- stmia r0!, {r3-r8,ip,lr}
-
-#endif
-
- subs r2, r2, #1
- bne 1b
-
- ldmfd sp!, {r4-r8, pc}
-END(bzero_page)
-
-#else /* _ARM_ARCH_5E */
-
-/*
* armv5e version of bcopy_page
*/
ENTRY(bcopy_page)
@@ -279,4 +140,3 @@ ENTRY(bzero_page)
bne 1b
RET
END(bzero_page)
-#endif /* _ARM_ARCH_5E */
Modified: head/sys/arm/arm/bcopyinout.S
==============================================================================
--- head/sys/arm/arm/bcopyinout.S Sun Nov 29 16:29:40 2020 (r368157)
+++ head/sys/arm/arm/bcopyinout.S Sun Nov 29 16:44:22 2020 (r368158)
@@ -47,510 +47,7 @@
.word _C_LABEL(_min_memcpy_size)
__FBSDID("$FreeBSD$");
-#ifdef _ARM_ARCH_5E
#include <arm/arm/bcopyinout_xscale.S>
-#else
-
- .text
- .align 2
-
-#define GET_PCB(tmp) \
- mrc p15, 0, tmp, c13, c0, 4; \
- add tmp, tmp, #(TD_PCB)
-
-#define SAVE_REGS stmfd sp!, {r4-r11}; _SAVE({r4-r11})
-#define RESTORE_REGS ldmfd sp!, {r4-r11}
-
-#if defined(_ARM_ARCH_5E)
-#define HELLOCPP #
-#define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ]
-#else
-#define PREFETCH(rx,o)
-#endif
-
-/*
- * r0 = user space address
- * r1 = kernel space address
- * r2 = length
- *
- * Copies bytes from user space to kernel space
- *
- * We save/restore r4-r11:
- * r4-r11 are scratch
- */
-ENTRY(copyin)
- /* Quick exit if length is zero */
- teq r2, #0
- moveq r0, #0
- RETeq
-
- adds r3, r0, r2
- movcs r0, #EFAULT
- RETc(cs)
-
- ldr r12, =(VM_MAXUSER_ADDRESS + 1)
- cmp r3, r12
- movcs r0, #EFAULT
- RETc(cs)
-
- ldr r3, .L_arm_memcpy
- ldr r3, [r3]
- cmp r3, #0
- beq .Lnormal
- ldr r3, .L_min_memcpy_size
- ldr r3, [r3]
- cmp r2, r3
- blt .Lnormal
- stmfd sp!, {r0-r2, r4, lr}
- mov r3, r0
- mov r0, r1
- mov r1, r3
- mov r3, #2 /* SRC_IS_USER */
- ldr r4, .L_arm_memcpy
- mov lr, pc
- ldr pc, [r4]
- cmp r0, #0
- ldmfd sp!, {r0-r2, r4, lr}
- moveq r0, #0
- RETeq
-
-.Lnormal:
- SAVE_REGS
- GET_PCB(r4)
- ldr r4, [r4]
-
-
- ldr r5, [r4, #PCB_ONFAULT]
- adr r3, .Lcopyfault
- str r3, [r4, #PCB_ONFAULT]
-
- PREFETCH(r0, 0)
- PREFETCH(r1, 0)
-
- /*
- * If not too many bytes, take the slow path.
- */
- cmp r2, #0x08
- blt .Licleanup
-
- /*
- * Align destination to word boundary.
- */
- and r6, r1, #0x3
- ldr pc, [pc, r6, lsl #2]
- b .Lialend
- .word .Lialend
- .word .Lial3
- .word .Lial2
- .word .Lial1
-.Lial3: ldrbt r6, [r0], #1
- sub r2, r2, #1
- strb r6, [r1], #1
-.Lial2: ldrbt r7, [r0], #1
- sub r2, r2, #1
- strb r7, [r1], #1
-.Lial1: ldrbt r6, [r0], #1
- sub r2, r2, #1
- strb r6, [r1], #1
-.Lialend:
-
- /*
- * If few bytes left, finish slow.
- */
- cmp r2, #0x08
- blt .Licleanup
-
- /*
- * If source is not aligned, finish slow.
- */
- ands r3, r0, #0x03
- bne .Licleanup
-
- cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
- blt .Licleanup8
-
- /*
- * Align destination to cacheline boundary.
- * If source and destination are nicely aligned, this can be a big
- * win. If not, it's still cheaper to copy in groups of 32 even if
- * we don't get the nice cacheline alignment.
- */
- and r6, r1, #0x1f
- ldr pc, [pc, r6]
- b .Licaligned
- .word .Licaligned
- .word .Lical28
- .word .Lical24
- .word .Lical20
- .word .Lical16
- .word .Lical12
- .word .Lical8
- .word .Lical4
-.Lical28:ldrt r6, [r0], #4
- sub r2, r2, #4
- str r6, [r1], #4
-.Lical24:ldrt r7, [r0], #4
- sub r2, r2, #4
- str r7, [r1], #4
-.Lical20:ldrt r6, [r0], #4
- sub r2, r2, #4
- str r6, [r1], #4
-.Lical16:ldrt r7, [r0], #4
- sub r2, r2, #4
- str r7, [r1], #4
-.Lical12:ldrt r6, [r0], #4
- sub r2, r2, #4
- str r6, [r1], #4
-.Lical8:ldrt r7, [r0], #4
- sub r2, r2, #4
- str r7, [r1], #4
-.Lical4:ldrt r6, [r0], #4
- sub r2, r2, #4
- str r6, [r1], #4
-
- /*
- * We start with > 0x40 bytes to copy (>= 0x60 got us into this
- * part of the code, and we may have knocked that down by as much
- * as 0x1c getting aligned).
- *
- * This loop basically works out to:
- * do {
- * prefetch-next-cacheline(s)
- * bytes -= 0x20;
- * copy cacheline
- * } while (bytes >= 0x40);
- * bytes -= 0x20;
- * copy cacheline
- */
-.Licaligned:
- PREFETCH(r0, 32)
- PREFETCH(r1, 32)
-
- sub r2, r2, #0x20
-
- /* Copy a cacheline */
- ldrt r10, [r0], #4
- ldrt r11, [r0], #4
- ldrt r6, [r0], #4
- ldrt r7, [r0], #4
- ldrt r8, [r0], #4
- ldrt r9, [r0], #4
- stmia r1!, {r10-r11}
- ldrt r10, [r0], #4
- ldrt r11, [r0], #4
- stmia r1!, {r6-r11}
-
- cmp r2, #0x40
- bge .Licaligned
-
- sub r2, r2, #0x20
-
- /* Copy a cacheline */
- ldrt r10, [r0], #4
- ldrt r11, [r0], #4
- ldrt r6, [r0], #4
- ldrt r7, [r0], #4
- ldrt r8, [r0], #4
- ldrt r9, [r0], #4
- stmia r1!, {r10-r11}
- ldrt r10, [r0], #4
- ldrt r11, [r0], #4
- stmia r1!, {r6-r11}
-
- cmp r2, #0x08
- blt .Liprecleanup
-
-.Licleanup8:
- ldrt r8, [r0], #4
- ldrt r9, [r0], #4
- sub r2, r2, #8
- stmia r1!, {r8, r9}
- cmp r2, #8
- bge .Licleanup8
-
-.Liprecleanup:
- /*
- * If we're done, bail.
- */
- cmp r2, #0
- beq .Lout
-
-.Licleanup:
- and r6, r2, #0x3
- ldr pc, [pc, r6, lsl #2]
- b .Licend
- .word .Lic4
- .word .Lic1
- .word .Lic2
- .word .Lic3
-.Lic4: ldrbt r6, [r0], #1
- sub r2, r2, #1
- strb r6, [r1], #1
-.Lic3: ldrbt r7, [r0], #1
- sub r2, r2, #1
- strb r7, [r1], #1
-.Lic2: ldrbt r6, [r0], #1
- sub r2, r2, #1
- strb r6, [r1], #1
-.Lic1: ldrbt r7, [r0], #1
- subs r2, r2, #1
- strb r7, [r1], #1
-.Licend:
- bne .Licleanup
-
-.Liout:
- mov r0, #0
-
- str r5, [r4, #PCB_ONFAULT]
- RESTORE_REGS
-
- RET
-
-.Lcopyfault:
- ldr r0, =EFAULT
- str r5, [r4, #PCB_ONFAULT]
- RESTORE_REGS
-
- RET
-END(copyin)
-
-/*
- * r0 = kernel space address
- * r1 = user space address
- * r2 = length
- *
- * Copies bytes from kernel space to user space
- *
- * We save/restore r4-r11:
- * r4-r11 are scratch
- */
-
-ENTRY(copyout)
- /* Quick exit if length is zero */
- teq r2, #0
- moveq r0, #0
- RETeq
-
- adds r3, r1, r2
- movcs r0, #EFAULT
- RETc(cs)
-
- ldr r12, =(VM_MAXUSER_ADDRESS + 1)
- cmp r3, r12
- movcs r0, #EFAULT
- RETc(cs)
-
- ldr r3, .L_arm_memcpy
- ldr r3, [r3]
- cmp r3, #0
- beq .Lnormale
- ldr r3, .L_min_memcpy_size
- ldr r3, [r3]
- cmp r2, r3
- blt .Lnormale
- stmfd sp!, {r0-r2, r4, lr}
- _SAVE({r0-r2, r4, lr})
- mov r3, r0
- mov r0, r1
- mov r1, r3
- mov r3, #1 /* DST_IS_USER */
- ldr r4, .L_arm_memcpy
- mov lr, pc
- ldr pc, [r4]
- cmp r0, #0
- ldmfd sp!, {r0-r2, r4, lr}
- moveq r0, #0
- RETeq
-
-.Lnormale:
- SAVE_REGS
- GET_PCB(r4)
- ldr r4, [r4]
-
- ldr r5, [r4, #PCB_ONFAULT]
- adr r3, .Lcopyfault
- str r3, [r4, #PCB_ONFAULT]
-
- PREFETCH(r0, 0)
- PREFETCH(r1, 0)
-
- /*
- * If not too many bytes, take the slow path.
- */
- cmp r2, #0x08
- blt .Lcleanup
-
- /*
- * Align destination to word boundary.
- */
- and r6, r1, #0x3
- ldr pc, [pc, r6, lsl #2]
- b .Lalend
- .word .Lalend
- .word .Lal3
- .word .Lal2
- .word .Lal1
-.Lal3: ldrb r6, [r0], #1
- sub r2, r2, #1
- strbt r6, [r1], #1
-.Lal2: ldrb r7, [r0], #1
- sub r2, r2, #1
- strbt r7, [r1], #1
-.Lal1: ldrb r6, [r0], #1
- sub r2, r2, #1
- strbt r6, [r1], #1
-.Lalend:
-
- /*
- * If few bytes left, finish slow.
- */
- cmp r2, #0x08
- blt .Lcleanup
-
- /*
- * If source is not aligned, finish slow.
- */
- ands r3, r0, #0x03
- bne .Lcleanup
-
- cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
- blt .Lcleanup8
-
- /*
- * Align source & destination to cacheline boundary.
- */
- and r6, r1, #0x1f
- ldr pc, [pc, r6]
- b .Lcaligned
- .word .Lcaligned
- .word .Lcal28
- .word .Lcal24
- .word .Lcal20
- .word .Lcal16
- .word .Lcal12
- .word .Lcal8
- .word .Lcal4
-.Lcal28:ldr r6, [r0], #4
- sub r2, r2, #4
- strt r6, [r1], #4
-.Lcal24:ldr r7, [r0], #4
- sub r2, r2, #4
- strt r7, [r1], #4
-.Lcal20:ldr r6, [r0], #4
- sub r2, r2, #4
- strt r6, [r1], #4
-.Lcal16:ldr r7, [r0], #4
- sub r2, r2, #4
- strt r7, [r1], #4
-.Lcal12:ldr r6, [r0], #4
- sub r2, r2, #4
- strt r6, [r1], #4
-.Lcal8: ldr r7, [r0], #4
- sub r2, r2, #4
- strt r7, [r1], #4
-.Lcal4: ldr r6, [r0], #4
- sub r2, r2, #4
- strt r6, [r1], #4
-
- /*
- * We start with > 0x40 bytes to copy (>= 0x60 got us into this
- * part of the code, and we may have knocked that down by as much
- * as 0x1c getting aligned).
- *
- * This loop basically works out to:
- * do {
- * prefetch-next-cacheline(s)
- * bytes -= 0x20;
- * copy cacheline
- * } while (bytes >= 0x40);
- * bytes -= 0x20;
- * copy cacheline
- */
-.Lcaligned:
- PREFETCH(r0, 32)
- PREFETCH(r1, 32)
-
- sub r2, r2, #0x20
-
- /* Copy a cacheline */
- ldmia r0!, {r6-r11}
- strt r6, [r1], #4
- strt r7, [r1], #4
- ldmia r0!, {r6-r7}
- strt r8, [r1], #4
- strt r9, [r1], #4
- strt r10, [r1], #4
- strt r11, [r1], #4
- strt r6, [r1], #4
- strt r7, [r1], #4
-
- cmp r2, #0x40
- bge .Lcaligned
-
- sub r2, r2, #0x20
-
- /* Copy a cacheline */
- ldmia r0!, {r6-r11}
- strt r6, [r1], #4
- strt r7, [r1], #4
- ldmia r0!, {r6-r7}
- strt r8, [r1], #4
- strt r9, [r1], #4
- strt r10, [r1], #4
- strt r11, [r1], #4
- strt r6, [r1], #4
- strt r7, [r1], #4
-
- cmp r2, #0x08
- blt .Lprecleanup
-
-.Lcleanup8:
- ldmia r0!, {r8-r9}
- sub r2, r2, #8
- strt r8, [r1], #4
- strt r9, [r1], #4
- cmp r2, #8
- bge .Lcleanup8
-
-.Lprecleanup:
- /*
- * If we're done, bail.
- */
- cmp r2, #0
- beq .Lout
-
-.Lcleanup:
- and r6, r2, #0x3
- ldr pc, [pc, r6, lsl #2]
- b .Lcend
- .word .Lc4
- .word .Lc1
- .word .Lc2
- .word .Lc3
-.Lc4: ldrb r6, [r0], #1
- sub r2, r2, #1
- strbt r6, [r1], #1
-.Lc3: ldrb r7, [r0], #1
- sub r2, r2, #1
- strbt r7, [r1], #1
-.Lc2: ldrb r6, [r0], #1
- sub r2, r2, #1
- strbt r6, [r1], #1
-.Lc1: ldrb r7, [r0], #1
- subs r2, r2, #1
- strbt r7, [r1], #1
-.Lcend:
- bne .Lcleanup
-
-.Lout:
- mov r0, #0
-
- str r5, [r4, #PCB_ONFAULT]
- RESTORE_REGS
-
- RET
-END(copyout)
-#endif
/*
* int badaddr_read_1(const uint8_t *src, uint8_t *dest)
Modified: head/sys/arm/arm/in_cksum_arm.S
==============================================================================
--- head/sys/arm/arm/in_cksum_arm.S Sun Nov 29 16:29:40 2020 (r368157)
+++ head/sys/arm/arm/in_cksum_arm.S Sun Nov 29 16:44:22 2020 (r368158)
@@ -116,9 +116,7 @@ END(do_cksum)
*/
/* LINTSTUB: Ignore */
ASENTRY_NP(L_cksumdata)
-#ifdef _ARM_ARCH_5E
pld [r0] /* Pre-fetch the start of the buffer */
-#endif
mov r2, #0
/* We first have to word-align the buffer. */
@@ -144,7 +142,6 @@ ASENTRY_NP(L_cksumdata)
/* Buffer is now word aligned */
.Lcksumdata_wordaligned:
-#ifdef _ARM_ARCH_5E
cmp r1, #0x04 /* Less than 4 bytes left? */
blt .Lcksumdata_endgame /* Yup */
@@ -199,43 +196,10 @@ ASENTRY_NP(L_cksumdata)
adcs r2, r2, r7
adc r2, r2, #0x00
-#else /* !_ARM_ARCH_5E */
-
- subs r1, r1, #0x40
- blt .Lcksumdata_bigloop_end
-
-.Lcksumdata_bigloop:
- ldmia r0!, {r3, r4, r5, r6}
- adds r2, r2, r3
- adcs r2, r2, r4
- adcs r2, r2, r5
- ldmia r0!, {r3, r4, r5, r7}
- adcs r2, r2, r6
- adcs r2, r2, r3
- adcs r2, r2, r4
- adcs r2, r2, r5
- ldmia r0!, {r3, r4, r5, r6}
- adcs r2, r2, r7
- adcs r2, r2, r3
- adcs r2, r2, r4
- adcs r2, r2, r5
- ldmia r0!, {r3, r4, r5, r7}
- adcs r2, r2, r6
- adcs r2, r2, r3
- adcs r2, r2, r4
- adcs r2, r2, r5
- adcs r2, r2, r7
- adc r2, r2, #0x00
- subs r1, r1, #0x40
- bge .Lcksumdata_bigloop
-.Lcksumdata_bigloop_end:
-#endif
-
adds r1, r1, #0x40
RETeq
cmp r1, #0x20
-#ifdef _ARM_ARCH_5E
ldrdge r4, [r0], #0x08 /* Avoid stalling pld and result */
blt .Lcksumdata_less_than_32
pld [r0, #0x18]
@@ -250,19 +214,6 @@ ASENTRY_NP(L_cksumdata)
adcs r2, r2, r5
adcs r2, r2, r6 /* XXX: Unavoidable result stall */
adcs r2, r2, r7
-#else
- blt .Lcksumdata_less_than_32
- ldmia r0!, {r3, r4, r5, r6}
- adds r2, r2, r3
- adcs r2, r2, r4
- adcs r2, r2, r5
- ldmia r0!, {r3, r4, r5, r7}
- adcs r2, r2, r6
- adcs r2, r2, r3
- adcs r2, r2, r4
- adcs r2, r2, r5
- adcs r2, r2, r7
-#endif
adc r2, r2, #0x00
subs r1, r1, #0x20
RETeq
Modified: head/sys/arm/arm/machdep.c
==============================================================================
--- head/sys/arm/arm/machdep.c Sun Nov 29 16:29:40 2020 (r368157)
+++ head/sys/arm/arm/machdep.c Sun Nov 29 16:44:22 2020 (r368158)
@@ -107,8 +107,8 @@ __FBSDID("$FreeBSD$");
#endif
-#ifndef _ARM_ARCH_5E
-#error FreeBSD requires ARMv5 or later
+#ifndef _ARM_ARCH_6
+#error FreeBSD requires ARMv6 or later
#endif
struct pcpu __pcpu[MAXCPU];
Modified: head/sys/arm/arm/support.S
==============================================================================
--- head/sys/arm/arm/support.S Sun Nov 29 16:29:40 2020 (r368157)
+++ head/sys/arm/arm/support.S Sun Nov 29 16:44:22 2020 (r368158)
@@ -149,17 +149,11 @@ do_memset:
/* We are now word aligned */
.Lmemset_wordaligned:
orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
-#ifdef _ARM_ARCH_5E
tst ip, #0x04 /* Quad-align for armv5e */
-#else
- cmp r1, #0x10
-#endif
orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
-#ifdef _ARM_ARCH_5E
subne r1, r1, #0x04 /* Quad-align if necessary */
strne r3, [ip], #0x04
cmp r1, #0x10
-#endif
blt .Lmemset_loop4 /* If less than 16 then use words */
mov r2, r3 /* Duplicate data */
cmp r1, #0x80 /* If < 128 then skip the big loop */
@@ -168,7 +162,6 @@ do_memset:
/* Do 128 bytes at a time */
.Lmemset_loop128:
subs r1, r1, #0x80
-#ifdef _ARM_ARCH_5E
strdge r2, [ip], #0x08
strdge r2, [ip], #0x08
strdge r2, [ip], #0x08
@@ -185,24 +178,6 @@ do_memset:
strdge r2, [ip], #0x08
strdge r2, [ip], #0x08
strdge r2, [ip], #0x08
-#else
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
-#endif
bgt .Lmemset_loop128
RETeq /* Zero length so just exit */
@@ -211,30 +186,18 @@ do_memset:
/* Do 32 bytes at a time */
.Lmemset_loop32:
subs r1, r1, #0x20
-#ifdef _ARM_ARCH_5E
strdge r2, [ip], #0x08
strdge r2, [ip], #0x08
strdge r2, [ip], #0x08
strdge r2, [ip], #0x08
-#else
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
-#endif
bgt .Lmemset_loop32
RETeq /* Zero length so just exit */
adds r1, r1, #0x10 /* Partially adjust for extra sub */
/* Deal with 16 bytes or more */
-#ifdef _ARM_ARCH_5E
strdge r2, [ip], #0x08
strdge r2, [ip], #0x08
-#else
- stmiage ip!, {r2-r3}
- stmiage ip!, {r2-r3}
-#endif
RETeq /* Zero length so just exit */
addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
@@ -246,14 +209,10 @@ do_memset:
bgt .Lmemset_loop4
RETeq /* Zero length so just exit */
-#ifdef _ARM_ARCH_5E
/* Compensate for 64-bit alignment check */
adds r1, r1, #0x04
RETeq
cmp r1, #2
-#else
- cmp r1, #-2
-#endif
strb r3, [ip], #0x01 /* Set 1 byte */
strbge r3, [ip], #0x01 /* Set another byte */
@@ -804,243 +763,6 @@ EENTRY(memmove)
EEND(memmove)
END(bcopy)
-#if !defined(_ARM_ARCH_5E)
-ENTRY(memcpy)
- /* save leaf functions having to store this away */
- /* Do not check arm_memcpy if we're running from flash */
-#if defined(FLASHADDR) && defined(PHYSADDR)
-#if FLASHADDR > PHYSADDR
- ldr r3, =FLASHADDR
- cmp r3, pc
- bls .Lnormal
-#else
- ldr r3, =FLASHADDR
- cmp r3, pc
- bhi .Lnormal
-#endif
-#endif
- ldr r3, .L_arm_memcpy
- ldr r3, [r3]
- cmp r3, #0
- beq .Lnormal
- ldr r3, .L_min_memcpy_size
- ldr r3, [r3]
- cmp r2, r3
- blt .Lnormal
- stmfd sp!, {r0-r2, r4, lr}
- mov r3, #0
- ldr r4, .L_arm_memcpy
- mov lr, pc
- ldr pc, [r4]
- cmp r0, #0
- ldmfd sp!, {r0-r2, r4, lr}
- RETeq
-
-.Lnormal:
- stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
-
- subs r2, r2, #4
- blt .Lmemcpy_l4 /* less than 4 bytes */
- ands r12, r0, #3
- bne .Lmemcpy_destul /* oh unaligned destination addr */
- ands r12, r1, #3
- bne .Lmemcpy_srcul /* oh unaligned source addr */
-
-.Lmemcpy_t8:
- /* We have aligned source and destination */
- subs r2, r2, #8
- blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
- subs r2, r2, #0x14
- blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
- stmdb sp!, {r4} /* borrow r4 */
-
- /* blat 32 bytes at a time */
- /* XXX for really big copies perhaps we should use more registers */
-.Lmemcpy_loop32:
- ldmia r1!, {r3, r4, r12, lr}
- stmia r0!, {r3, r4, r12, lr}
- ldmia r1!, {r3, r4, r12, lr}
- stmia r0!, {r3, r4, r12, lr}
- subs r2, r2, #0x20
- bge .Lmemcpy_loop32
-
- cmn r2, #0x10
- ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
- stmiage r0!, {r3, r4, r12, lr}
- subge r2, r2, #0x10
- ldmia sp!, {r4} /* return r4 */
-
-.Lmemcpy_l32:
- adds r2, r2, #0x14
-
- /* blat 12 bytes at a time */
-.Lmemcpy_loop12:
- ldmiage r1!, {r3, r12, lr}
- stmiage r0!, {r3, r12, lr}
- subsge r2, r2, #0x0c
- bge .Lmemcpy_loop12
-
-.Lmemcpy_l12:
- adds r2, r2, #8
- blt .Lmemcpy_l4
-
- subs r2, r2, #4
- ldrlt r3, [r1], #4
- strlt r3, [r0], #4
- ldmiage r1!, {r3, r12}
- stmiage r0!, {r3, r12}
- subge r2, r2, #4
-
-.Lmemcpy_l4:
- /* less than 4 bytes to go */
- adds r2, r2, #4
-#ifdef __APCS_26_
- ldmiaeq sp!, {r0, pc}^ /* done */
-#else
- ldmiaeq sp!, {r0, pc} /* done */
-#endif
- /* copy the crud byte at a time */
- cmp r2, #2
- ldrb r3, [r1], #1
- strb r3, [r0], #1
- ldrbge r3, [r1], #1
- strbge r3, [r0], #1
- ldrbgt r3, [r1], #1
- strbgt r3, [r0], #1
- ldmia sp!, {r0, pc}
-
- /* erg - unaligned destination */
-.Lmemcpy_destul:
- rsb r12, r12, #4
- cmp r12, #2
-
- /* align destination with byte copies */
- ldrb r3, [r1], #1
- strb r3, [r0], #1
- ldrbge r3, [r1], #1
- strbge r3, [r0], #1
- ldrbgt r3, [r1], #1
- strbgt r3, [r0], #1
- subs r2, r2, r12
- blt .Lmemcpy_l4 /* less the 4 bytes */
-
- ands r12, r1, #3
- beq .Lmemcpy_t8 /* we have an aligned source */
-
- /* erg - unaligned source */
- /* This is where it gets nasty ... */
-.Lmemcpy_srcul:
- bic r1, r1, #3
- ldr lr, [r1], #4
- cmp r12, #2
- bgt .Lmemcpy_srcul3
- beq .Lmemcpy_srcul2
- cmp r2, #0x0c
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list