aboutsummaryrefslogtreecommitdiffstats
path: root/meta/recipes-core/glibc/glibc-2.10.1/arm-memcpy.patch
diff options
context:
space:
mode:
authorRichard Purdie <rpurdie@linux.intel.com>2010-08-27 15:14:24 +0100
committerRichard Purdie <rpurdie@linux.intel.com>2010-08-27 15:29:45 +0100
commit29d6678fd546377459ef75cf54abeef5b969b5cf (patch)
tree8edd65790e37a00d01c3f203f773fe4b5012db18 /meta/recipes-core/glibc/glibc-2.10.1/arm-memcpy.patch
parentda49de6885ee1bc424e70bc02f21f6ab920efb55 (diff)
downloadopenembedded-core-contrib-29d6678fd546377459ef75cf54abeef5b969b5cf.tar.gz
Major layout change to the packages directory
Having one monolithic packages directory makes it hard to find things and is generally overwhelming. This commit splits it into several logical sections roughly based on function, recipes.txt gives more information about the classifications used. The opportunity is also used to switch from "packages" to "recipes" as used in OpenEmbedded as the term "packages" can be confusing to people and has many different meanings. Not all recipes have been classified yet, this is just a first pass at separating things out. Some packages are moved to meta-extras as they're no longer actively used or maintained. Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
Diffstat (limited to 'meta/recipes-core/glibc/glibc-2.10.1/arm-memcpy.patch')
-rw-r--r--meta/recipes-core/glibc/glibc-2.10.1/arm-memcpy.patch758
1 files changed, 758 insertions, 0 deletions
diff --git a/meta/recipes-core/glibc/glibc-2.10.1/arm-memcpy.patch b/meta/recipes-core/glibc/glibc-2.10.1/arm-memcpy.patch
new file mode 100644
index 0000000000..bc2b3dab84
--- /dev/null
+++ b/meta/recipes-core/glibc/glibc-2.10.1/arm-memcpy.patch
@@ -0,0 +1,758 @@
+--- /dev/null 2004-02-02 20:32:13.000000000 +0000
++++ sysdeps/arm/memmove.S 2004-03-20 18:37:23.000000000 +0000
+@@ -0,0 +1,251 @@
++/*
++ * Optimized memmove implementation for ARM processors
++ *
++ * Author: Nicolas Pitre
++ * Created: Dec 23, 2003
++ * Copyright: (C) MontaVista Software, Inc.
++ *
++ * This file is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This file is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ */
++
++#include <sysdep.h>
++
++
++/*
++ * Endian independent macros for shifting bytes within registers.
++ */
++#ifndef __ARMEB__
++#define pull lsr
++#define push lsl
++#else
++#define pull lsl
++#define push lsr
++#endif
++
++/*
++ * Enable data preload for architectures that support it (ARMv5 and above)
++ */
++#if defined(__ARM_ARCH_5__) || \
++ defined(__ARM_ARCH_5T__) || \
++ defined(__ARM_ARCH_5TE__)
++#define PLD(code...) code
++#else
++#define PLD(code...)
++#endif
++
++
++/* char * memmove (char *dst, const char *src) */
++ENTRY(memmove)
++ subs ip, r0, r1
++ cmphi r2, ip
++ bls memcpy(PLT)
++
++ stmfd sp!, {r0, r4, lr}
++ add r1, r1, r2
++ add r0, r0, r2
++ subs r2, r2, #4
++ blt 25f
++ ands ip, r0, #3
++ PLD( pld [r1, #-4] )
++ bne 26f
++ ands ip, r1, #3
++ bne 27f
++
++19: subs r2, r2, #4
++ blt 24f
++ subs r2, r2, #8
++ blt 23f
++ subs r2, r2, #16
++ blt 22f
++
++ PLD( pld [r1, #-32] )
++ PLD( subs r2, r2, #96 )
++ stmfd sp!, {r5 - r8}
++ PLD( blt 21f )
++
++ PLD( @ cache alignment )
++ PLD( ands ip, r1, #31 )
++ PLD( pld [r1, #-64] )
++ PLD( beq 20f )
++ PLD( cmp r2, ip )
++ PLD( pld [r1, #-96] )
++ PLD( blt 20f )
++ PLD( cmp ip, #16 )
++ PLD( sub r2, r2, ip )
++ PLD( ldmgedb r1!, {r3 - r6} )
++ PLD( stmgedb r0!, {r3 - r6} )
++ PLD( beq 20f )
++ PLD( and ip, ip, #15 )
++ PLD( cmp ip, #8 )
++ PLD( ldr r3, [r1, #-4]! )
++ PLD( ldrge r4, [r1, #-4]! )
++ PLD( ldrgt r5, [r1, #-4]! )
++ PLD( str r3, [r0, #-4]! )
++ PLD( strge r4, [r0, #-4]! )
++ PLD( strgt r5, [r0, #-4]! )
++
++20: PLD( pld [r1, #-96] )
++ PLD( pld [r1, #-128] )
++21: ldmdb r1!, {r3, r4, ip, lr}
++ subs r2, r2, #32
++ stmdb r0!, {r3, r4, ip, lr}
++ ldmdb r1!, {r3, r4, ip, lr}
++ stmgedb r0!, {r3, r4, ip, lr}
++ ldmgedb r1!, {r3, r4, ip, lr}
++ stmgedb r0!, {r3, r4, ip, lr}
++ ldmgedb r1!, {r3, r4, ip, lr}
++ subges r2, r2, #32
++ stmdb r0!, {r3, r4, ip, lr}
++ bge 20b
++ PLD( cmn r2, #96 )
++ PLD( bge 21b )
++ PLD( add r2, r2, #96 )
++ tst r2, #31
++ ldmfd sp!, {r5 - r8}
++ ldmeqfd sp!, {r0, r4, pc}
++
++ tst r2, #16
++22: ldmnedb r1!, {r3, r4, ip, lr}
++ stmnedb r0!, {r3, r4, ip, lr}
++
++ tst r2, #8
++23: ldmnedb r1!, {r3, r4}
++ stmnedb r0!, {r3, r4}
++
++ tst r2, #4
++24: ldrne r3, [r1, #-4]!
++ strne r3, [r0, #-4]!
++
++25: ands r2, r2, #3
++ ldmeqfd sp!, {r0, r4, pc}
++
++ cmp r2, #2
++ ldrb r3, [r1, #-1]
++ ldrgeb r4, [r1, #-2]
++ ldrgtb ip, [r1, #-3]
++ strb r3, [r0, #-1]
++ strgeb r4, [r0, #-2]
++ strgtb ip, [r0, #-3]
++ ldmfd sp!, {r0, r4, pc}
++
++26: cmp ip, #2
++ ldrb r3, [r1, #-1]!
++ ldrgeb r4, [r1, #-1]!
++ ldrgtb lr, [r1, #-1]!
++ strb r3, [r0, #-1]!
++ strgeb r4, [r0, #-1]!
++ strgtb lr, [r0, #-1]!
++ subs r2, r2, ip
++ blt 25b
++ ands ip, r1, #3
++ beq 19b
++
++27: bic r1, r1, #3
++ cmp ip, #2
++ ldr r3, [r1]
++ beq 35f
++ blt 36f
++
++
++ .macro backward_copy_shift push pull
++
++ cmp r2, #12
++ PLD( pld [r1, #-4] )
++ blt 33f
++ subs r2, r2, #28
++ stmfd sp!, {r5 - r9}
++ blt 31f
++
++ PLD( subs r2, r2, #96 )
++ PLD( pld [r1, #-32] )
++ PLD( blt 30f )
++ PLD( pld [r1, #-64] )
++
++ PLD( @ cache alignment )
++ PLD( ands ip, r1, #31 )
++ PLD( pld [r1, #-96] )
++ PLD( beq 29f )
++ PLD( cmp r2, ip )
++ PLD( pld [r1, #-128] )
++ PLD( blt 29f )
++ PLD( sub r2, r2, ip )
++28: PLD( mov r4, r3, push #\push )
++ PLD( ldr r3, [r1, #-4]! )
++ PLD( subs ip, ip, #4 )
++ PLD( orr r4, r4, r3, pull #\pull )
++ PLD( str r4, [r0, #-4]! )
++ PLD( bgt 28b )
++
++29: PLD( pld [r1, #-128] )
++30: mov lr, r3, push #\push
++ ldmdb r1!, {r3 - r9, ip}
++ subs r2, r2, #32
++ orr lr, lr, ip, pull #\pull
++ mov ip, ip, push #\push
++ orr ip, ip, r9, pull #\pull
++ mov r9, r9, push #\push
++ orr r9, r9, r8, pull #\pull
++ mov r8, r8, push #\push
++ orr r8, r8, r7, pull #\pull
++ mov r7, r7, push #\push
++ orr r7, r7, r6, pull #\pull
++ mov r6, r6, push #\push
++ orr r6, r6, r5, pull #\pull
++ mov r5, r5, push #\push
++ orr r5, r5, r4, pull #\pull
++ mov r4, r4, push #\push
++ orr r4, r4, r3, pull #\pull
++ stmdb r0!, {r4 - r9, ip, lr}
++ bge 29b
++ PLD( cmn r2, #96 )
++ PLD( bge 30b )
++ PLD( add r2, r2, #96 )
++ cmn r2, #16
++ blt 32f
++31: mov r7, r3, push #\push
++ ldmdb r1!, {r3 - r6}
++ sub r2, r2, #16
++ orr r7, r7, r6, pull #\pull
++ mov r6, r6, push #\push
++ orr r6, r6, r5, pull #\pull
++ mov r5, r5, push #\push
++ orr r5, r5, r4, pull #\pull
++ mov r4, r4, push #\push
++ orr r4, r4, r3, pull #\pull
++ stmdb r0!, {r4 - r7}
++32: adds r2, r2, #28
++ ldmfd sp!, {r5 - r9}
++ blt 34f
++33: mov r4, r3, push #\push
++ ldr r3, [r1, #-4]!
++ subs r2, r2, #4
++ orr r4, r4, r3, pull #\pull
++ str r4, [r0, #-4]!
++ bge 33b
++34:
++ .endm
++
++
++ backward_copy_shift push=8 pull=24
++ add r1, r1, #3
++ b 25b
++
++35: backward_copy_shift push=16 pull=16
++ add r1, r1, #2
++ b 25b
++
++36: backward_copy_shift push=24 pull=8
++ add r1, r1, #1
++ b 25b
++
++ .size memmove, . - memmove
++END(memmove)
++libc_hidden_builtin_def (memmove)
+--- /dev/null 2004-02-02 20:32:13.000000000 +0000
++++ sysdeps/arm/bcopy.S 2004-03-20 18:37:48.000000000 +0000
+@@ -0,0 +1,255 @@
++/*
++ * Optimized memmove implementation for ARM processors
++ *
++ * Author: Nicolas Pitre
++ * Created: Dec 23, 2003
++ * Copyright: (C) MontaVista Software, Inc.
++ *
++ * This file is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This file is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ */
++
++#include <sysdep.h>
++
++
++/*
++ * Endian independent macros for shifting bytes within registers.
++ */
++#ifndef __ARMEB__
++#define pull lsr
++#define push lsl
++#else
++#define pull lsl
++#define push lsr
++#endif
++
++/*
++ * Enable data preload for architectures that support it (ARMv5 and above)
++ */
++#if defined(__ARM_ARCH_5__) || \
++ defined(__ARM_ARCH_5T__) || \
++ defined(__ARM_ARCH_5TE__)
++#define PLD(code...) code
++#else
++#define PLD(code...)
++#endif
++
++dst .req r1
++src .req r0
++
++/* void *bcopy (const char *src, char *dst, size_t size) */
++ENTRY(bcopy)
++ subs ip, dst, src
++ cmphi r2, ip
++ movls r3, r0
++ movls r0, r1
++ movls r1, r3
++ bls memcpy(PLT)
++
++ stmfd sp!, {r4, lr}
++ add src, src, r2
++ add dst, dst, r2
++ subs r2, r2, #4
++ blt 25f
++ ands ip, dst, #3
++ PLD( pld [src, #-4] )
++ bne 26f
++ ands ip, src, #3
++ bne 27f
++
++19: subs r2, r2, #4
++ blt 24f
++ subs r2, r2, #8
++ blt 23f
++ subs r2, r2, #16
++ blt 22f
++
++ PLD( pld [src, #-32] )
++ PLD( subs r2, r2, #96 )
++ stmfd sp!, {r5 - r8}
++ PLD( blt 21f )
++
++ PLD( @ cache alignment )
++ PLD( ands ip, src, #31 )
++ PLD( pld [src, #-64] )
++ PLD( beq 20f )
++ PLD( cmp r2, ip )
++ PLD( pld [src, #-96] )
++ PLD( blt 20f )
++ PLD( cmp ip, #16 )
++ PLD( sub r2, r2, ip )
++ PLD( ldmgedb src!, {r3 - r6} )
++ PLD( stmgedb dst!, {r3 - r6} )
++ PLD( beq 20f )
++ PLD( and ip, ip, #15 )
++ PLD( cmp ip, #8 )
++ PLD( ldr r3, [src, #-4]! )
++ PLD( ldrge r4, [src, #-4]! )
++ PLD( ldrgt r5, [src, #-4]! )
++ PLD( str r3, [dst, #-4]! )
++ PLD( strge r4, [dst, #-4]! )
++ PLD( strgt r5, [dst, #-4]! )
++
++20: PLD( pld [src, #-96] )
++ PLD( pld [src, #-128] )
++21: ldmdb src!, {r3, r4, ip, lr}
++ subs r2, r2, #32
++ stmdb dst!, {r3, r4, ip, lr}
++ ldmdb src!, {r3, r4, ip, lr}
++ stmgedb dst!, {r3, r4, ip, lr}
++ ldmgedb src!, {r3, r4, ip, lr}
++ stmgedb dst!, {r3, r4, ip, lr}
++ ldmgedb src!, {r3, r4, ip, lr}
++ subges r2, r2, #32
++ stmdb dst!, {r3, r4, ip, lr}
++ bge 20b
++ PLD( cmn r2, #96 )
++ PLD( bge 21b )
++ PLD( add r2, r2, #96 )
++ tst r2, #31
++ ldmfd sp!, {r5 - r8}
++ ldmeqfd sp!, {r4, pc}
++
++ tst r2, #16
++22: ldmnedb src!, {r3, r4, ip, lr}
++ stmnedb dst!, {r3, r4, ip, lr}
++
++ tst r2, #8
++23: ldmnedb src!, {r3, r4}
++ stmnedb dst!, {r3, r4}
++
++ tst r2, #4
++24: ldrne r3, [src, #-4]!
++ strne r3, [dst, #-4]!
++
++25: ands r2, r2, #3
++ ldmeqfd sp!, {dst, r4, pc}
++
++ cmp r2, #2
++ ldrb r3, [src, #-1]
++ ldrgeb r4, [src, #-2]
++ ldrgtb ip, [src, #-3]
++ strb r3, [dst, #-1]
++ strgeb r4, [dst, #-2]
++ strgtb ip, [dst, #-3]
++ ldmfd sp!, {dst, r4, pc}
++
++26: cmp ip, #2
++ ldrb r3, [src, #-1]!
++ ldrgeb r4, [src, #-1]!
++ ldrgtb lr, [src, #-1]!
++ strb r3, [dst, #-1]!
++ strgeb r4, [dst, #-1]!
++ strgtb lr, [dst, #-1]!
++ subs r2, r2, ip
++ blt 25b
++ ands ip, src, #3
++ beq 19b
++
++27: bic src, src, #3
++ cmp ip, #2
++ ldr r3, [src]
++ beq 35f
++ blt 36f
++
++
++ .macro backward_copy_shift push pull
++
++ cmp r2, #12
++ PLD( pld [src, #-4] )
++ blt 33f
++ subs r2, r2, #28
++ stmfd sp!, {r5 - r9}
++ blt 31f
++
++ PLD( subs r2, r2, #96 )
++ PLD( pld [src, #-32] )
++ PLD( blt 30f )
++ PLD( pld [src, #-64] )
++
++ PLD( @ cache alignment )
++ PLD( ands ip, src, #31 )
++ PLD( pld [src, #-96] )
++ PLD( beq 29f )
++ PLD( cmp r2, ip )
++ PLD( pld [src, #-128] )
++ PLD( blt 29f )
++ PLD( sub r2, r2, ip )
++28: PLD( mov r4, r3, push #\push )
++ PLD( ldr r3, [src, #-4]! )
++ PLD( subs ip, ip, #4 )
++ PLD( orr r4, r4, r3, pull #\pull )
++ PLD( str r4, [dst, #-4]! )
++ PLD( bgt 28b )
++
++29: PLD( pld [src, #-128] )
++30: mov lr, r3, push #\push
++ ldmdb src!, {r3 - r9, ip}
++ subs r2, r2, #32
++ orr lr, lr, ip, pull #\pull
++ mov ip, ip, push #\push
++ orr ip, ip, r9, pull #\pull
++ mov r9, r9, push #\push
++ orr r9, r9, r8, pull #\pull
++ mov r8, r8, push #\push
++ orr r8, r8, r7, pull #\pull
++ mov r7, r7, push #\push
++ orr r7, r7, r6, pull #\pull
++ mov r6, r6, push #\push
++ orr r6, r6, r5, pull #\pull
++ mov r5, r5, push #\push
++ orr r5, r5, r4, pull #\pull
++ mov r4, r4, push #\push
++ orr r4, r4, r3, pull #\pull
++ stmdb dst!, {r4 - r9, ip, lr}
++ bge 29b
++ PLD( cmn r2, #96 )
++ PLD( bge 30b )
++ PLD( add r2, r2, #96 )
++ cmn r2, #16
++ blt 32f
++31: mov r7, r3, push #\push
++ ldmdb src!, {r3 - r6}
++ sub r2, r2, #16
++ orr r7, r7, r6, pull #\pull
++ mov r6, r6, push #\push
++ orr r6, r6, r5, pull #\pull
++ mov r5, r5, push #\push
++ orr r5, r5, r4, pull #\pull
++ mov r4, r4, push #\push
++ orr r4, r4, r3, pull #\pull
++ stmdb dst!, {r4 - r7}
++32: adds r2, r2, #28
++ ldmfd sp!, {r5 - r9}
++ blt 34f
++33: mov r4, r3, push #\push
++ ldr r3, [src, #-4]!
++ subs r2, r2, #4
++ orr r4, r4, r3, pull #\pull
++ str r4, [dst, #-4]!
++ bge 33b
++34:
++ .endm
++
++
++ backward_copy_shift push=8 pull=24
++ add src, src, #3
++ b 25b
++
++35: backward_copy_shift push=16 pull=16
++ add src, src, #2
++ b 25b
++
++36: backward_copy_shift push=24 pull=8
++ add src, src, #1
++ b 25b
++
++ .size bcopy, . - bcopy
++END(bcopy)
+
+--- /dev/null 2004-02-02 20:32:13.000000000 +0000
++++ sysdeps/arm/memcpy.S 2004-05-02 14:33:22.000000000 +0100
+@@ -0,0 +1,242 @@
++/*
++ * Optimized memcpy implementation for ARM processors
++ *
++ * Author: Nicolas Pitre
++ * Created: Dec 23, 2003
++ * Copyright: (C) MontaVista Software, Inc.
++ *
++ * This file is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * This file is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Lesser General Public License for more details.
++ */
++
++#include <sysdep.h>
++
++
++/*
++ * Endian independent macros for shifting bytes within registers.
++ */
++#ifndef __ARMEB__
++#define pull lsr
++#define push lsl
++#else
++#define pull lsl
++#define push lsr
++#endif
++
++/*
++ * Enable data preload for architectures that support it (ARMv5 and above)
++ */
++#if defined(__ARM_ARCH_5__) || \
++ defined(__ARM_ARCH_5T__) || \
++ defined(__ARM_ARCH_5TE__)
++#define PLD(code...) code
++#else
++#define PLD(code...)
++#endif
++
++
++/* char * memcpy (char *dst, const char *src) */
++
++ENTRY(memcpy)
++ subs r2, r2, #4
++ stmfd sp!, {r0, r4, lr}
++ blt 7f
++ ands ip, r0, #3
++ PLD( pld [r1, #0] )
++ bne 8f
++ ands ip, r1, #3
++ bne 9f
++
++1: subs r2, r2, #4
++ blt 6f
++ subs r2, r2, #8
++ blt 5f
++ subs r2, r2, #16
++ blt 4f
++
++ PLD( subs r2, r2, #65 )
++ stmfd sp!, {r5 - r8}
++ PLD( blt 3f )
++ PLD( pld [r1, #32] )
++
++ PLD( @ cache alignment )
++ PLD( ands ip, r1, #31 )
++ PLD( pld [r1, #64] )
++ PLD( beq 2f )
++ PLD( rsb ip, ip, #32 )
++ PLD( cmp r2, ip )
++ PLD( pld [r1, #96] )
++ PLD( blt 2f )
++ PLD( cmp ip, #16 )
++ PLD( sub r2, r2, ip )
++ PLD( ldmgeia r1!, {r3 - r6} )
++ PLD( stmgeia r0!, {r3 - r6} )
++ PLD( beq 2f )
++ PLD( and ip, ip, #15 )
++ PLD( cmp ip, #8 )
++ PLD( ldr r3, [r1], #4 )
++ PLD( ldrge r4, [r1], #4 )
++ PLD( ldrgt r5, [r1], #4 )
++ PLD( str r3, [r0], #4 )
++ PLD( strge r4, [r0], #4 )
++ PLD( strgt r5, [r0], #4 )
++
++2: PLD( pld [r1, #96] )
++3: ldmia r1!, {r3 - r8, ip, lr}
++ subs r2, r2, #32
++ stmia r0!, {r3 - r8, ip, lr}
++ bge 2b
++ PLD( cmn r2, #65 )
++ PLD( bge 3b )
++ PLD( add r2, r2, #65 )
++ tst r2, #31
++ ldmfd sp!, {r5 - r8}
++ ldmeqfd sp!, {r0, r4, pc}
++
++ tst r2, #16
++4: ldmneia r1!, {r3, r4, ip, lr}
++ stmneia r0!, {r3, r4, ip, lr}
++
++ tst r2, #8
++5: ldmneia r1!, {r3, r4}
++ stmneia r0!, {r3, r4}
++
++ tst r2, #4
++6: ldrne r3, [r1], #4
++ strne r3, [r0], #4
++
++7: ands r2, r2, #3
++ ldmeqfd sp!, {r0, r4, pc}
++
++ cmp r2, #2
++ ldrb r3, [r1], #1
++ ldrgeb r4, [r1], #1
++ ldrgtb ip, [r1]
++ strb r3, [r0], #1
++ strgeb r4, [r0], #1
++ strgtb ip, [r0]
++ ldmfd sp!, {r0, r4, pc}
++
++8: rsb ip, ip, #4
++ cmp ip, #2
++ ldrb r3, [r1], #1
++ ldrgeb r4, [r1], #1
++ ldrgtb lr, [r1], #1
++ strb r3, [r0], #1
++ strgeb r4, [r0], #1
++ strgtb lr, [r0], #1
++ subs r2, r2, ip
++ blt 7b
++ ands ip, r1, #3
++ beq 1b
++
++9: bic r1, r1, #3
++ cmp ip, #2
++ ldr lr, [r1], #4
++ beq 17f
++ bgt 18f
++
++
++ .macro forward_copy_shift pull push
++
++ cmp r2, #12
++ PLD( pld [r1, #0] )
++ blt 15f
++ subs r2, r2, #28
++ stmfd sp!, {r5 - r9}
++ blt 13f
++
++ PLD( subs r2, r2, #97 )
++ PLD( blt 12f )
++ PLD( pld [r1, #32] )
++
++ PLD( @ cache alignment )
++ PLD( rsb ip, r1, #36 )
++ PLD( pld [r1, #64] )
++ PLD( ands ip, ip, #31 )
++ PLD( pld [r1, #96] )
++ PLD( beq 11f )
++ PLD( cmp r2, ip )
++ PLD( pld [r1, #128] )
++ PLD( blt 11f )
++ PLD( sub r2, r2, ip )
++10: PLD( mov r3, lr, pull #\pull )
++ PLD( ldr lr, [r1], #4 )
++ PLD( subs ip, ip, #4 )
++ PLD( orr r3, r3, lr, push #\push )
++ PLD( str r3, [r0], #4 )
++ PLD( bgt 10b )
++
++11: PLD( pld [r1, #128] )
++12: mov r3, lr, pull #\pull
++ ldmia r1!, {r4 - r9, ip, lr}
++ subs r2, r2, #32
++ orr r3, r3, r4, push #\push
++ mov r4, r4, pull #\pull
++ orr r4, r4, r5, push #\push
++ mov r5, r5, pull #\pull
++ orr r5, r5, r6, push #\push
++ mov r6, r6, pull #\pull
++ orr r6, r6, r7, push #\push
++ mov r7, r7, pull #\pull
++ orr r7, r7, r8, push #\push
++ mov r8, r8, pull #\pull
++ orr r8, r8, r9, push #\push
++ mov r9, r9, pull #\pull
++ orr r9, r9, ip, push #\push
++ mov ip, ip, pull #\pull
++ orr ip, ip, lr, push #\push
++ stmia r0!, {r3 - r9, ip}
++ bge 11b
++ PLD( cmn r2, #97 )
++ PLD( bge 12b )
++ PLD( add r2, r2, #97 )
++ cmn r2, #16
++ blt 14f
++13: mov r3, lr, pull #\pull
++ ldmia r1!, {r4 - r6, lr}
++ sub r2, r2, #16
++ orr r3, r3, r4, push #\push
++ mov r4, r4, pull #\pull
++ orr r4, r4, r5, push #\push
++ mov r5, r5, pull #\pull
++ orr r5, r5, r6, push #\push
++ mov r6, r6, pull #\pull
++ orr r6, r6, lr, push #\push
++ stmia r0!, {r3 - r6}
++14: adds r2, r2, #28
++ ldmfd sp!, {r5 - r9}
++ blt 16f
++15: mov r3, lr, pull #\pull
++ ldr lr, [r1], #4
++ subs r2, r2, #4
++ orr r3, r3, lr, push #\push
++ str r3, [r0], #4
++ bge 15b
++16:
++ .endm
++
++
++ forward_copy_shift pull=8 push=24
++ sub r1, r1, #3
++ b 7b
++
++17: forward_copy_shift pull=16 push=16
++ sub r1, r1, #2
++ b 7b
++
++18: forward_copy_shift pull=24 push=8
++ sub r1, r1, #1
++ b 7b
++
++ .size memcpy, . - memcpy
++END(memcpy)
++libc_hidden_builtin_def (memcpy)
++