FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hpeldsp_alpha.c
Go to the documentation of this file.
1 /*
2  * Alpha optimized DSP utils
3  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavutil/attributes.h"
23 #include "libavcodec/hpeldsp.h"
24 #include "hpeldsp_alpha.h"
25 #include "asm.h"
26 
27 static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
28 {
29  return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
30 }
31 
32 static inline uint64_t avg2(uint64_t a, uint64_t b)
33 {
34  return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
35 }
36 
37 #if 0
38 /* The XY2 routines basically utilize this scheme, but reuse parts in
39  each iteration. */
40 static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
41 {
42  uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
43  + ((l2 & ~BYTE_VEC(0x03)) >> 2)
44  + ((l3 & ~BYTE_VEC(0x03)) >> 2)
45  + ((l4 & ~BYTE_VEC(0x03)) >> 2);
46  uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
47  + (l2 & BYTE_VEC(0x03))
48  + (l3 & BYTE_VEC(0x03))
49  + (l4 & BYTE_VEC(0x03))
50  + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
51  return r1 + r2;
52 }
53 #endif
54 
55 #define OP(LOAD, STORE) \
56  do { \
57  STORE(LOAD(pixels), block); \
58  pixels += line_size; \
59  block += line_size; \
60  } while (--h)
61 
62 #define OP_X2(LOAD, STORE) \
63  do { \
64  uint64_t pix1, pix2; \
65  \
66  pix1 = LOAD(pixels); \
67  pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
68  STORE(AVG2(pix1, pix2), block); \
69  pixels += line_size; \
70  block += line_size; \
71  } while (--h)
72 
73 #define OP_Y2(LOAD, STORE) \
74  do { \
75  uint64_t pix = LOAD(pixels); \
76  do { \
77  uint64_t next_pix; \
78  \
79  pixels += line_size; \
80  next_pix = LOAD(pixels); \
81  STORE(AVG2(pix, next_pix), block); \
82  block += line_size; \
83  pix = next_pix; \
84  } while (--h); \
85  } while (0)
86 
87 #define OP_XY2(LOAD, STORE) \
88  do { \
89  uint64_t pix1 = LOAD(pixels); \
90  uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
91  uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
92  + (pix2 & BYTE_VEC(0x03)); \
93  uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
94  + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
95  \
96  do { \
97  uint64_t npix1, npix2; \
98  uint64_t npix_l, npix_h; \
99  uint64_t avg; \
100  \
101  pixels += line_size; \
102  npix1 = LOAD(pixels); \
103  npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
104  npix_l = (npix1 & BYTE_VEC(0x03)) \
105  + (npix2 & BYTE_VEC(0x03)); \
106  npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
107  + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
108  avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
109  + pix_h + npix_h; \
110  STORE(avg, block); \
111  \
112  block += line_size; \
113  pix_l = npix_l; \
114  pix_h = npix_h; \
115  } while (--h); \
116  } while (0)
117 
118 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
119 static void OPNAME ## _pixels ## SUFF ## _axp \
120  (uint8_t *restrict block, const uint8_t *restrict pixels, \
121  ptrdiff_t line_size, int h) \
122 { \
123  if ((size_t) pixels & 0x7) { \
124  OPKIND(uldq, STORE); \
125  } else { \
126  OPKIND(ldq, STORE); \
127  } \
128 } \
129  \
130 static void OPNAME ## _pixels16 ## SUFF ## _axp \
131  (uint8_t *restrict block, const uint8_t *restrict pixels, \
132  ptrdiff_t line_size, int h) \
133 { \
134  OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
135  OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
136 }
137 
138 #define PIXOP(OPNAME, STORE) \
139  MAKE_OP(OPNAME, , OP, STORE) \
140  MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
141  MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
142  MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
143 
144 /* Rounding primitives. */
145 #define AVG2 avg2
146 #define AVG4 avg4
147 #define AVG4_ROUNDER BYTE_VEC(0x02)
148 #define STORE(l, b) stq(l, b)
149 PIXOP(put, STORE);
150 
151 #undef STORE
152 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
153 PIXOP(avg, STORE);
154 
155 /* Not rounding primitives. */
156 #undef AVG2
157 #undef AVG4
158 #undef AVG4_ROUNDER
159 #undef STORE
160 #define AVG2 avg2_no_rnd
161 #define AVG4 avg4_no_rnd
162 #define AVG4_ROUNDER BYTE_VEC(0x01)
163 #define STORE(l, b) stq(l, b)
164 PIXOP(put_no_rnd, STORE);
165 
166 #undef STORE
167 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
168 PIXOP(avg_no_rnd, STORE);
169 
170 static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
171  ptrdiff_t line_size, int h)
172 {
173  put_pixels_axp_asm(block, pixels, line_size, h);
174  put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
175 }
176 
178 {
180  c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
181  c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
182  c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
183 
185  c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
186  c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
187  c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
188 
189  c->avg_pixels_tab[0][0] = avg_pixels16_axp;
190  c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
191  c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
192  c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
193 
194  c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
195  c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
196  c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
197  c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
198 
200  c->put_pixels_tab[1][1] = put_pixels_x2_axp;
201  c->put_pixels_tab[1][2] = put_pixels_y2_axp;
202  c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
203 
205  c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
206  c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
207  c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
208 
209  c->avg_pixels_tab[1][0] = avg_pixels_axp;
210  c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
211  c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
212  c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
213 }