FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
pixblockdsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002 Brian Foley
3  * Copyright (c) 2002 Dieter Shirley
4  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 #if HAVE_ALTIVEC_H
25 #include <altivec.h>
26 #endif
27 
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
35 
36 #if HAVE_ALTIVEC
37 
38 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
39  ptrdiff_t line_size)
40 {
41  int i;
42  vector unsigned char perm = vec_lvsl(0, pixels);
43  const vector unsigned char zero =
44  (const vector unsigned char) vec_splat_u8(0);
45 
46  for (i = 0; i < 8; i++) {
47  /* Read potentially unaligned pixels.
48  * We're reading 16 pixels, and actually only want 8,
49  * but we simply ignore the extras. */
50  vector unsigned char pixl = vec_ld(0, pixels);
51  vector unsigned char pixr = vec_ld(7, pixels);
52  vector unsigned char bytes = vec_perm(pixl, pixr, perm);
53 
54  // Convert the bytes into shorts.
55  vector signed short shorts = (vector signed short) vec_mergeh(zero,
56  bytes);
57 
58  // Save the data to the block, we assume the block is 16-byte aligned.
59  vec_st(shorts, i * 16, (vector signed short *) block);
60 
61  pixels += line_size;
62  }
63 }
64 
65 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
66  const uint8_t *s2, int stride)
67 {
68  int i;
69  vector unsigned char perm1 = vec_lvsl(0, s1);
70  vector unsigned char perm2 = vec_lvsl(0, s2);
71  const vector unsigned char zero =
72  (const vector unsigned char) vec_splat_u8(0);
73  vector signed short shorts1, shorts2;
74 
75  for (i = 0; i < 4; i++) {
76  /* Read potentially unaligned pixels.
77  * We're reading 16 pixels, and actually only want 8,
78  * but we simply ignore the extras. */
79  vector unsigned char pixl = vec_ld(0, s1);
80  vector unsigned char pixr = vec_ld(15, s1);
81  vector unsigned char bytes = vec_perm(pixl, pixr, perm1);
82 
83  // Convert the bytes into shorts.
84  shorts1 = (vector signed short) vec_mergeh(zero, bytes);
85 
86  // Do the same for the second block of pixels.
87  pixl = vec_ld(0, s2);
88  pixr = vec_ld(15, s2);
89  bytes = vec_perm(pixl, pixr, perm2);
90 
91  // Convert the bytes into shorts.
92  shorts2 = (vector signed short) vec_mergeh(zero, bytes);
93 
94  // Do the subtraction.
95  shorts1 = vec_sub(shorts1, shorts2);
96 
97  // Save the data to the block, we assume the block is 16-byte aligned.
98  vec_st(shorts1, 0, (vector signed short *) block);
99 
100  s1 += stride;
101  s2 += stride;
102  block += 8;
103 
104  /* The code below is a copy of the code above...
105  * This is a manual unroll. */
106 
107  /* Read potentially unaligned pixels.
108  * We're reading 16 pixels, and actually only want 8,
109  * but we simply ignore the extras. */
110  pixl = vec_ld(0, s1);
111  pixr = vec_ld(15, s1);
112  bytes = vec_perm(pixl, pixr, perm1);
113 
114  // Convert the bytes into shorts.
115  shorts1 = (vector signed short) vec_mergeh(zero, bytes);
116 
117  // Do the same for the second block of pixels.
118  pixl = vec_ld(0, s2);
119  pixr = vec_ld(15, s2);
120  bytes = vec_perm(pixl, pixr, perm2);
121 
122  // Convert the bytes into shorts.
123  shorts2 = (vector signed short) vec_mergeh(zero, bytes);
124 
125  // Do the subtraction.
126  shorts1 = vec_sub(shorts1, shorts2);
127 
128  // Save the data to the block, we assume the block is 16-byte aligned.
129  vec_st(shorts1, 0, (vector signed short *) block);
130 
131  s1 += stride;
132  s2 += stride;
133  block += 8;
134  }
135 }
136 
137 #endif /* HAVE_ALTIVEC */
138 
140  AVCodecContext *avctx,
141  unsigned high_bit_depth)
142 {
143 #if HAVE_ALTIVEC
145  return;
146 
147  c->diff_pixels = diff_pixels_altivec;
148 
149  if (!high_bit_depth) {
150  c->get_pixels = get_pixels_altivec;
151  }
152 #endif /* HAVE_ALTIVEC */
153 }