1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
/*
* BK Id: SCCS/s.string.S 1.9 10/25/01 10:08:51 trini
*/
/*
* String handling functions for PowerPC.
*
* Copyright (C) 1996 Paul Mackerras.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
//#warning Be forewarned - using PowerPC assembly
#include "ppc_asm.tmpl"
#define N_FUN 36
#define N_SO 100
#define COPY_16_BYTES \
lwz r7,4(r4); \
lwz r8,8(r4); \
lwz r9,12(r4); \
lwzu r10,16(r4); \
stw r7,4(r6); \
stw r8,8(r6); \
stw r9,12(r6); \
stwu r10,16(r6)
#define __stringify_1(x) #x
#define __stringify(x) __stringify_1(x)
#define _GLOBFN(n)\
.stabs __stringify(n:F-1),N_FUN,0,0,n;\
.type n,@function; \
.globl n;\
n:
#define _SIZE(n) \
.size n, .-n
.text
.stabs "src/xine-utils",N_SO,0,0,.
.stabs "ppcasm_string.S",N_SO,0,0,.
#warning FIXME: Get cache line sizes from /proc
#define L1_CACHE_LINE_SIZE 32
CACHELINE_BYTES = 32
LG_CACHELINE_BYTES = 5
CACHELINE_MASK = (32 -1)
/*
* This version uses dcbz on the complete cache lines in the
* destination area to reduce memory traffic. This requires that
* the destination area is cacheable.
* We only use this version if the source and dest don't overlap.
* -- paulus.
*/
_GLOBFN(ppcasm_cacheable_memcpy)
add r7,r3,r5 /* test if the src & dst overlap */
add r8,r4,r5
cmplw 0,r4,r7
cmplw 1,r3,r8
crand 0,0,4 /* cr0.lt &= cr1.lt */
blt 66f //ppcasm_memcpy /* if regions overlap */
addi r4,r4,-4
addi r6,r3,-4
neg r0,r3
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
beq 58f
cmplw 0,r5,r0 /* is this more than total to do? */
blt 63f /* if not much to do */
andi. r8,r0,3 /* get it word-aligned first */
subf r5,r0,r5
mtctr r8
beq+ 61f
70: lbz r9,4(r4) /* do some bytes */
stb r9,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 70b
61: srwi. r0,r0,2
mtctr r0
beq 58f
72: lwzu r9,4(r4) /* do some words */
stwu r9,4(r6)
bdnz 72b
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
clrlwi r5,r5,32-LG_CACHELINE_BYTES
li r11,4
mtctr r0
beq 63f
53:
#if !defined(CONFIG_8xx)
dcbz r11,r6
#endif
COPY_16_BYTES
#if L1_CACHE_LINE_SIZE >= 32
COPY_16_BYTES
#if L1_CACHE_LINE_SIZE >= 64
COPY_16_BYTES
COPY_16_BYTES
#if L1_CACHE_LINE_SIZE >= 128
COPY_16_BYTES
COPY_16_BYTES
COPY_16_BYTES
COPY_16_BYTES
#endif
#endif
#endif
bdnz 53b
63: srwi. r0,r5,2
mtctr r0
beq 64f
30: lwzu r0,4(r4)
stwu r0,4(r6)
bdnz 30b
64: andi. r0,r5,3
mtctr r0
beq+ 65f
40: lbz r0,4(r4)
stb r0,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 40b
65: blr
_SIZE(ppcasm_cacheable_memcpy)
_GLOBFN(ppcasm_memcpy)
66: srwi. r7,r5,3
addi r6,r3,-4
addi r4,r4,-4
beq 2f /* if less than 8 bytes to do */
andi. r0,r6,3 /* get dest word aligned */
mtctr r7
bne 5f
1: lwz r7,4(r4)
lwzu r8,8(r4)
stw r7,4(r6)
stwu r8,8(r6)
bdnz 1b
andi. r5,r5,7
2: cmplwi 0,r5,4
blt 3f
lwzu r0,4(r4)
addi r5,r5,-4
stwu r0,4(r6)
3: cmpwi 0,r5,0
beqlr
mtctr r5
addi r4,r4,3
addi r6,r6,3
4: lbzu r0,1(r4)
stbu r0,1(r6)
bdnz 4b
blr
5: subfic r0,r0,4
mtctr r0
6: lbz r7,4(r4)
addi r4,r4,1
stb r7,4(r6)
addi r6,r6,1
bdnz 6b
subf r5,r0,r5
rlwinm. r7,r5,32-3,3,31
beq 2b
mtctr r7
b 1b
_SIZE(ppcasm_memcpy)
|