1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
diff -ruN liboil-0.2.0.orig/liboil/simdpack/abs_u16_s16.c liboil-0.2.0/liboil/simdpack/abs_u16_s16.c
--- liboil-0.2.0.orig/liboil/simdpack/abs_u16_s16.c 2004-09-08 06:18:51.000000000 +0200
+++ liboil-0.2.0/liboil/simdpack/abs_u16_s16.c 2004-11-17 01:09:54.395284136 +0100
@@ -349,7 +349,7 @@
#ifdef HAVE_CPU_POWERPC
static void
-abs_u16_s16_a16_altivec (uint16_t * dest, int dstr, int16_t * src, int sstr,
+abs_u16_s16_altivec (uint16_t * dest, int dstr, int16_t * src, int sstr,
int n)
{
int i;
diff -ruN liboil-0.2.0.orig/liboil/simdpack/clip_ref.c liboil-0.2.0/liboil/simdpack/clip_ref.c
--- liboil-0.2.0.orig/liboil/simdpack/clip_ref.c 2004-09-08 06:18:51.000000000 +0200
+++ liboil-0.2.0/liboil/simdpack/clip_ref.c 2004-11-17 11:19:29.352045552 +0100
@@ -22,6 +22,9 @@
#include <liboil/liboilfunction.h>
+typedef float f32;
+typedef double f64;
+
#define CLIP_DEFINE_REF(type) \
static void clip_ ## type ## _ref ( \
type_ ## type *dest, \
diff -ruN liboil-0.2.0.orig/liboil/simdpack/sad8x8.c liboil-0.2.0/liboil/simdpack/sad8x8.c
--- liboil-0.2.0.orig/liboil/simdpack/sad8x8.c 2004-09-08 06:18:51.000000000 +0200
+++ liboil-0.2.0/liboil/simdpack/sad8x8.c 2004-11-17 11:12:43.265780048 +0100
@@ -70,158 +70,6 @@
OIL_DEFINE_IMPL_REF(sad8x8_s16_ref, sad8x8_s16);
-#ifdef HAVE_CPU_POWERPC
-static void
-sad8x8_s16_a16_altivec (uint32_t *dest, int16_t *src1, int16_t *src2, int s1str, int s2str)
-{
- static uint32_t x[4] __attribute__ ((__aligned__ (16)));
-
- sl_altivec_load8_0(src1, s1str);
- sl_altivec_load8_8(src2, s2str);
-
- __asm__ __volatile__(
- "\n"
- "\tvspltisw 19, 0\n"
- "\tvspltisw 22, 0\n"
-
- "\tvminsh 16, 0, 8\n"
- "\tvmaxsh 17, 0, 8\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvmrghh 20, 19, 18\n"
- "\tvmrglh 21, 19, 18\n"
- "\tvsumsws 22, 20, 22\n"
- "\tvsumsws 22, 21, 22\n"
-
- "\tvminsh 16, 1, 9\n"
- "\tvmaxsh 17, 1, 9\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvmrghh 20, 19, 18\n"
- "\tvmrglh 21, 19, 18\n"
- "\tvsumsws 22, 20, 22\n"
- "\tvsumsws 22, 21, 22\n"
-
- "\tvminsh 16, 2, 10\n"
- "\tvmaxsh 17, 2, 10\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvmrghh 20, 19, 18\n"
- "\tvmrglh 21, 19, 18\n"
- "\tvsumsws 22, 20, 22\n"
- "\tvsumsws 22, 21, 22\n"
-
- "\tvminsh 16, 3, 11\n"
- "\tvmaxsh 17, 3, 11\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvmrghh 20, 19, 18\n"
- "\tvmrglh 21, 19, 18\n"
- "\tvsumsws 22, 20, 22\n"
- "\tvsumsws 22, 21, 22\n"
-
- "\tvminsh 16, 4, 12\n"
- "\tvmaxsh 17, 4, 12\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvmrghh 20, 19, 18\n"
- "\tvmrglh 21, 19, 18\n"
- "\tvsumsws 22, 20, 22\n"
- "\tvsumsws 22, 21, 22\n"
-
- "\tvminsh 16, 5, 13\n"
- "\tvmaxsh 17, 5, 13\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvmrghh 20, 19, 18\n"
- "\tvmrglh 21, 19, 18\n"
- "\tvsumsws 22, 20, 22\n"
- "\tvsumsws 22, 21, 22\n"
-
- "\tvminsh 16, 6, 14\n"
- "\tvmaxsh 17, 6, 14\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvmrghh 20, 19, 18\n"
- "\tvmrglh 21, 19, 18\n"
- "\tvsumsws 22, 20, 22\n"
- "\tvsumsws 22, 21, 22\n"
-
- "\tvminsh 16, 7, 15\n"
- "\tvmaxsh 17, 7, 15\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvmrghh 20, 19, 18\n"
- "\tvmrglh 21, 19, 18\n"
- "\tvsumsws 22, 20, 22\n"
- "\tvsumsws 22, 21, 22\n"
-
- "\tli 0, 0\n"
- "\tstvx 22, %0, 0\n"
- :
- : "r" (x)
- );
- *dest = x[3];
-}
-
-/* IMPL sad8x8_s16_l15_a16_altivec defined(SIMDPACK_USE_ALTIVEC) */
-SL_sad8x8_s16_storage
-void sad8x8_s16_l15_a16_altivec(uint32_t *dest, int16_t *src1, int16_t *src2, int s1str, int s2str)
-{
- static uint32_t x[4] __attribute__ ((__aligned__ (16)));
-
- sl_altivec_load8_0(src1, s1str);
- sl_altivec_load8_8(src2, s2str);
-
- __asm__ __volatile__(
- "\n"
- "\tvspltisw 19, 0\n"
- "\tvspltisw 22, 0\n"
-
- "\tvminsh 16, 0, 8\n"
- "\tvmaxsh 17, 0, 8\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvsum4shs 22, 18, 22\n"
-
- "\tvminsh 16, 1, 9\n"
- "\tvmaxsh 17, 1, 9\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvsum4shs 22, 18, 22\n"
-
- "\tvminsh 16, 2, 10\n"
- "\tvmaxsh 17, 2, 10\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvsum4shs 22, 18, 22\n"
-
- "\tvminsh 16, 3, 11\n"
- "\tvmaxsh 17, 3, 11\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvsum4shs 22, 18, 22\n"
-
- "\tvminsh 16, 4, 12\n"
- "\tvmaxsh 17, 4, 12\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvsum4shs 22, 18, 22\n"
-
- "\tvminsh 16, 5, 13\n"
- "\tvmaxsh 17, 5, 13\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvsum4shs 22, 18, 22\n"
-
- "\tvminsh 16, 6, 14\n"
- "\tvmaxsh 17, 6, 14\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvsum4shs 22, 18, 22\n"
-
- "\tvminsh 16, 7, 15\n"
- "\tvmaxsh 17, 7, 15\n"
- "\tvsubuhm 18, 17, 16\n"
- "\tvsum4shs 22, 18, 22\n"
-
- "\tvsumsws 22, 22, 19\n"
-
- "\tli 0, 0\n"
- "\tstvx 22, %0, 0\n"
- :
- : "r" (x)
- );
- *dest = x[3];
-}
-#endif
-
-
#ifdef TEST_sad8x8_s16
int TEST_sad8x8_s16(void)
{
|