Skip to content

Commit dec5ddf

Browse files
committed
Select implementation at runtime for memcpy, memmove and mempcpy
This relies upon the insertion of a trampoline function that loads the address of the appropriate implementation from a static variable. The use of a static initialiser that points at the function that patches up the function pointer itself ensure that this is thread-safe. The method used to determine which is the best implementation takes advantage of the change in behaviour of interworking branches between ARMv6 and ARMv7. This makes the test OS-independent, and in any case it's questionable how safe it is to be making system calls when a function as fundamental as memcpy isn't functional yet, so this is probably a safer approach than asking the kernel. The main downside here is that anyone trying the library on an ARMv7 platform which lacks NEON (the usual example is nVidia Tegra 2) will find it doesn't work. But then we're not pretending to offer an optimised implementation for that anyway, and they might as well continue to use glibc.
1 parent 4418bb4 commit dec5ddf

File tree

5 files changed

+122
-20
lines changed

5 files changed

+122
-20
lines changed

Makefile

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
1-
OBJS = memcmp.o memcpymove.o memset.o
2-
OBJS-A7 = memcpymove-a7.o
3-
CFLAGS += -std=c99 -O2
1+
OBJS = architecture.o memcmp.o memcpymove.o memcpymove-a7.o memset.o trampoline.o
2+
CFLAGS += -std=gnu99 -O2
43

5-
all: libarmmem.so libarmmem.a libarmmem-a7.so libarmmem-a7.a test
4+
all: libarmmem.so libarmmem.a test
65

76
%.o: %.c
87
$(CROSS_COMPILE)gcc $(CFLAGS) -c -o $@ $^
@@ -16,12 +15,6 @@ libarmmem.so: $(OBJS)
1615
libarmmem.a: $(OBJS)
1716
$(CROSS_COMPILE)ar rcs $@ $^
1817

19-
libarmmem-a7.so: $(OBJS-A7)
20-
$(CROSS_COMPILE)gcc -shared -o $@ $^
21-
22-
libarmmem-a7.a: $(OBJS-A7)
23-
$(CROSS_COMPILE)ar rcs $@ $^
24-
2518
test: test.o
2619
$(CROSS_COMPILE)gcc -o $@ $^
2720

architecture.S

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
/*
2+
Copyright (c) 2015, RISC OS Open Ltd
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in the
11+
documentation and/or other materials provided with the distribution.
12+
* Neither the name of the copyright holder nor the
13+
names of its contributors may be used to endorse or promote products
14+
derived from this software without specific prior written permission.
15+
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*/
27+
28+
/* Grubby, but completely OS-independent and USR-mode safe way to
29+
* distinguish between ARMv6 and ARMv7 platforms at runtime
30+
*/
31+
32+
.global architecture
33+
.hidden architecture
34+
.func architecture
35+
.arm
36+
architecture:
37+
sub pc, pc, #1 @ is an interworking branch on ARMv7, not ARMv6
38+
and a1, a4, a1 @ second word interpreted as 'B .+0xA' if Thumb
39+
mov a1, #6
40+
bx lr
41+
.thumb
42+
mov a1, #7
43+
bx lr
44+
.endfunc

memcpymove-a7.S

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
612612

613613
.set prefetch_distance, 2
614614

615-
myfunc memcpy
615+
myfunc memcpy_cortexa7
616+
.hidden memcpy_cortexa7
616617
1000: memcpy 0
617618
.endfunc
618619

@@ -628,7 +629,8 @@ myfunc memcpy
628629

629630
.set prefetch_distance, 2
630631

631-
myfunc memmove
632+
myfunc memmove_cortexa7
633+
.hidden memmove_cortexa7
632634
cmp a2, a1
633635
bpl 1000b /* pl works even over -1 - 0 and 0x7fffffff - 0x80000000 boundaries */
634636
memcpy 1
@@ -644,9 +646,11 @@ myfunc memmove
644646
* a1 = pointer to immediately after destination block
645647
*/
646648

647-
myfunc mempcpy
648-
.global __mempcpy
649-
__mempcpy:
649+
myfunc mempcpy_cortexa7
650+
.hidden mempcpy_cortexa7
651+
.hidden __mempcpy_cortexa7
652+
.global __mempcpy_cortexa7
653+
__mempcpy_cortexa7:
650654
push {v1, lr}
651655
mov v1, a3
652656
bl 1000b

memcpymove.S

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
531531

532532
.set prefetch_distance, 3
533533

534-
myfunc memcpy
534+
myfunc memcpy_arm1176jzfs
535+
.hidden memcpy_arm1176jzfs
535536
1000: memcpy 0
536537
.endfunc
537538

@@ -547,7 +548,8 @@ myfunc memcpy
547548

548549
.set prefetch_distance, 3
549550

550-
myfunc memmove
551+
myfunc memmove_arm1176jzfs
552+
.hidden memmove_arm1176jzfs
551553
cmp a2, a1
552554
bpl 1000b /* pl works even over -1 - 0 and 0x7fffffff - 0x80000000 boundaries */
553555
memcpy 1
@@ -563,9 +565,11 @@ myfunc memmove
563565
* a1 = pointer to immediately after destination block
564566
*/
565567

566-
myfunc mempcpy
567-
.global __mempcpy
568-
__mempcpy:
568+
myfunc mempcpy_arm1176jzfs
569+
.hidden mempcpy_arm1176jzfs
570+
.hidden __mempcpy_arm1176jzfs
571+
.global __mempcpy_arm1176jzfs
572+
__mempcpy_arm1176jzfs:
569573
push {v1, lr}
570574
mov v1, a3
571575
bl 1000b

trampoline.c

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
Copyright (c) 2015, RISC OS Open Ltd
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are met:
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in the
11+
documentation and/or other materials provided with the distribution.
12+
* Neither the name of the copyright holder nor the
13+
names of its contributors may be used to endorse or promote products
14+
derived from this software without specific prior written permission.
15+
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*/
27+
28+
#define _GNU_SOURCE /* enable mempcpy */
29+
#include <string.h>
30+
31+
extern int architecture(void);
32+
33+
#define DISPATCH(ret_type, func, args_full, args) \
34+
\
35+
typeof(func) func##_arm1176jzfs; \
36+
typeof(func) func##_cortexa7; \
37+
static typeof(func) func##_initial; \
38+
static typeof(func) *p##func = func##_initial; \
39+
\
40+
ret_type func args_full \
41+
{ \
42+
return p##func args; \
43+
} \
44+
\
45+
static ret_type func##_initial args_full \
46+
{ \
47+
if (architecture() == 6) \
48+
p##func = func##_arm1176jzfs; \
49+
else \
50+
p##func = func##_cortexa7; \
51+
return p##func args; \
52+
}
53+
54+
DISPATCH(void *, memcpy, (void *__restrict __dest, __const void *__restrict __src, size_t __n), (__dest, __src, __n))
55+
DISPATCH(void *, memmove, (void * __dest, __const void * __src, size_t __n), (__dest, __src, __n))
56+
DISPATCH(void *,__mempcpy, (void *__restrict __dest, __const void *__restrict __src, size_t __n), (__dest, __src, __n))
57+
DISPATCH(void *, mempcpy, (void *__restrict __dest, __const void *__restrict __src, size_t __n), (__dest, __src, __n))

0 commit comments

Comments
 (0)