29 return (a & b) + (((a ^
b) &
BYTE_VEC(0xfe)) >> 1);
32 static inline uint64_t
avg2(uint64_t
a, uint64_t
b)
34 return (a | b) - (((a ^
b) &
BYTE_VEC(0xfe)) >> 1);
40 static inline uint64_t
avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
42 uint64_t r1 = ((l1 & ~
BYTE_VEC(0x03)) >> 2)
46 uint64_t r2 = (( (l1 &
BYTE_VEC(0x03))
55 #define OP(LOAD, STORE) \
57 STORE(LOAD(pixels), block); \
58 pixels += line_size; \
62 #define OP_X2(LOAD, STORE) \
64 uint64_t pix1, pix2; \
66 pix1 = LOAD(pixels); \
67 pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
68 STORE(AVG2(pix1, pix2), block); \
69 pixels += line_size; \
73 #define OP_Y2(LOAD, STORE) \
75 uint64_t pix = LOAD(pixels); \
79 pixels += line_size; \
80 next_pix = LOAD(pixels); \
81 STORE(AVG2(pix, next_pix), block); \
87 #define OP_XY2(LOAD, STORE) \
89 uint64_t pix1 = LOAD(pixels); \
90 uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
91 uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
92 + (pix2 & BYTE_VEC(0x03)); \
93 uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
94 + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
97 uint64_t npix1, npix2; \
98 uint64_t npix_l, npix_h; \
101 pixels += line_size; \
102 npix1 = LOAD(pixels); \
103 npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
104 npix_l = (npix1 & BYTE_VEC(0x03)) \
105 + (npix2 & BYTE_VEC(0x03)); \
106 npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
107 + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
108 avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
112 block += line_size; \
118 #define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
119 static void OPNAME ## _pixels ## SUFF ## _axp \
120 (uint8_t *restrict block, const uint8_t *restrict pixels, \
121 ptrdiff_t line_size, int h) \
123 if ((size_t) pixels & 0x7) { \
124 OPKIND(uldq, STORE); \
126 OPKIND(ldq, STORE); \
130 static void OPNAME ## _pixels16 ## SUFF ## _axp \
131 (uint8_t *restrict block, const uint8_t *restrict pixels, \
132 ptrdiff_t line_size, int h) \
134 OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
135 OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
138 #define PIXOP(OPNAME, STORE) \
139 MAKE_OP(OPNAME, , OP, STORE) \
140 MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
141 MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
142 MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
147 #define AVG4_ROUNDER BYTE_VEC(0x02)
148 #define STORE(l, b) stq(l, b)
152 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
160 #define AVG2 avg2_no_rnd
161 #define AVG4 avg4_no_rnd
162 #define AVG4_ROUNDER BYTE_VEC(0x01)
163 #define STORE(l, b) stq(l, b)
167 #define STORE(l, b) stq(AVG2(l, ldq(b)), b);
171 ptrdiff_t line_size,
int h)