FFmpeg
cabac.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVCODEC_X86_CABAC_H
22 #define AVCODEC_X86_CABAC_H
23 
24 #include "libavcodec/cabac.h"
25 #include "libavutil/attributes.h"
26 #include "libavutil/macros.h"
27 #include "libavutil/x86/asm.h"
28 #include "config.h"
29 
30 #if (defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
31  || ( !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)\
32  || (defined(__INTEL_COMPILER) && defined(_MSC_VER))
33 # define BROKEN_COMPILER 1
34 #else
35 # define BROKEN_COMPILER 0
36 #endif
37 
38 #if HAVE_INLINE_ASM
39 
40 #ifndef UNCHECKED_BITSTREAM_READER
41 #define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
42 #endif
43 
44 #if UNCHECKED_BITSTREAM_READER
45 #define END_CHECK(end) ""
46 #else
47 #define END_CHECK(end) \
48  "cmp "end" , %%"FF_REG_c" \n\t"\
49  "jge 1f \n\t"
50 #endif
51 
52 #ifdef BROKEN_RELOCATIONS
53 #define TABLES_ARG , "r"(tables)
54 
55 #if HAVE_FAST_CMOV
56 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
57  "cmp "low" , "tmp" \n\t"\
58  "cmova %%ecx , "range" \n\t"\
59  "sbb %%rcx , %%rcx \n\t"\
60  "and %%ecx , "tmp" \n\t"\
61  "xor %%rcx , "retq" \n\t"\
62  "sub "tmp" , "low" \n\t"
63 #else /* HAVE_FAST_CMOV */
64 #define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
65 /* P4 Prescott has crappy cmov,sbb,64-bit shift so avoid them */ \
66  "sub "low" , "tmp" \n\t"\
67  "sar $31 , "tmp" \n\t"\
68  "sub %%ecx , "range" \n\t"\
69  "and "tmp" , "range" \n\t"\
70  "add %%ecx , "range" \n\t"\
71  "shl $17 , %%ecx \n\t"\
72  "and "tmp" , %%ecx \n\t"\
73  "sub %%ecx , "low" \n\t"\
74  "xor "tmp" , "ret" \n\t"\
75  "movslq "ret" , "retq" \n\t"
76 #endif /* HAVE_FAST_CMOV */
77 
78 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
79  "movzbl "statep" , "ret" \n\t"\
80  "mov "range" , "tmp" \n\t"\
81  "and $0xC0 , "range" \n\t"\
82  "lea ("ret", "range", 2), %%ecx \n\t"\
83  "movzbl "lps_off"("tables", %%rcx), "range" \n\t"\
84  "sub "range" , "tmp" \n\t"\
85  "mov "tmp" , %%ecx \n\t"\
86  "shl $17 , "tmp" \n\t"\
87  BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
88  "movzbl "norm_off"("tables", "rangeq"), %%ecx \n\t"\
89  "shl %%cl , "range" \n\t"\
90  "movzbl "mlps_off"+128("tables", "retq"), "tmp" \n\t"\
91  "shl %%cl , "low" \n\t"\
92  "mov "tmpbyte" , "statep" \n\t"\
93  "test "lowword" , "lowword" \n\t"\
94  "jnz 2f \n\t"\
95  "mov "byte" , %%"FF_REG_c" \n\t"\
96  END_CHECK(end)\
97  "add"FF_OPSIZE" $2 , "byte" \n\t"\
98  "1: \n\t"\
99  "movzwl (%%"FF_REG_c") , "tmp" \n\t"\
100  "lea -1("low") , %%ecx \n\t"\
101  "xor "low" , %%ecx \n\t"\
102  "shr $15 , %%ecx \n\t"\
103  "bswap "tmp" \n\t"\
104  "shr $15 , "tmp" \n\t"\
105  "movzbl "norm_off"("tables", %%rcx), %%ecx \n\t"\
106  "sub $0xFFFF , "tmp" \n\t"\
107  "neg %%ecx \n\t"\
108  "add $7 , %%ecx \n\t"\
109  "shl %%cl , "tmp" \n\t"\
110  "add "tmp" , "low" \n\t"\
111  "2: \n\t"
112 
113 #else /* BROKEN_RELOCATIONS */
114 #define TABLES_ARG NAMED_CONSTRAINTS_ARRAY_ADD(ff_h264_cabac_tables)
115 #define RIP_ARG
116 
117 #if HAVE_FAST_CMOV
118 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
119  "mov "tmp" , %%ecx \n\t"\
120  "shl $17 , "tmp" \n\t"\
121  "cmp "low" , "tmp" \n\t"\
122  "cmova %%ecx , "range" \n\t"\
123  "sbb %%ecx , %%ecx \n\t"\
124  "and %%ecx , "tmp" \n\t"\
125  "xor %%ecx , "ret" \n\t"\
126  "sub "tmp" , "low" \n\t"
127 #else /* HAVE_FAST_CMOV */
128 #define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
129  "mov "tmp" , %%ecx \n\t"\
130  "shl $17 , "tmp" \n\t"\
131  "sub "low" , "tmp" \n\t"\
132  "sar $31 , "tmp" \n\t" /*lps_mask*/\
133  "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
134  "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
135  "add %%ecx , "range" \n\t" /*new range*/\
136  "shl $17 , %%ecx \n\t"\
137  "and "tmp" , %%ecx \n\t"\
138  "sub %%ecx , "low" \n\t"\
139  "xor "tmp" , "ret" \n\t"
140 #endif /* HAVE_FAST_CMOV */
141 
142 #define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
143  "movzbl "statep" , "ret" \n\t"\
144  "mov "range" , "tmp" \n\t"\
145  "and $0xC0 , "range" \n\t"\
146  "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
147  "sub "range" , "tmp" \n\t"\
148  BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \
149  "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\
150  "shl %%cl , "range" \n\t"\
151  "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\
152  "shl %%cl , "low" \n\t"\
153  "mov "tmpbyte" , "statep" \n\t"\
154  "test "lowword" , "lowword" \n\t"\
155  " jnz 2f \n\t"\
156  "mov "byte" , %%"FF_REG_c" \n\t"\
157  END_CHECK(end)\
158  "add"FF_OPSIZE" $2 , "byte" \n\t"\
159  "1: \n\t"\
160  "movzwl (%%"FF_REG_c") , "tmp" \n\t"\
161  "lea -1("low") , %%ecx \n\t"\
162  "xor "low" , %%ecx \n\t"\
163  "shr $15 , %%ecx \n\t"\
164  "bswap "tmp" \n\t"\
165  "shr $15 , "tmp" \n\t"\
166  "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
167  "sub $0xFFFF , "tmp" \n\t"\
168  "neg %%ecx \n\t"\
169  "add $7 , %%ecx \n\t"\
170  "shl %%cl , "tmp" \n\t"\
171  "add "tmp" , "low" \n\t"\
172  "2: \n\t"
173 
174 #endif /* BROKEN_RELOCATIONS */
175 
176 #if HAVE_7REGS && !BROKEN_COMPILER
177 #define get_cabac_inline get_cabac_inline_x86
178 static av_always_inline int get_cabac_inline_x86(CABACContext *c,
179  uint8_t *const state)
180 {
181  int bit, tmp;
182 #ifdef BROKEN_RELOCATIONS
183  void *tables;
184 
185  __asm__ volatile(
186  "lea "MANGLE(ff_h264_cabac_tables)", %0 \n\t"
187  : "=&r"(tables)
189  );
190 #endif
191 
192  __asm__ volatile(
193  BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
194  "%2", "%q2", "%3", "%b3",
195  "%c6(%5)", "%c7(%5)",
199  "%8")
200  : "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp)
201  : "r"(state), "r"(c),
202  "i"(offsetof(CABACContext, bytestream)),
203  "i"(offsetof(CABACContext, bytestream_end))
204  TABLES_ARG
205  ,"1"(c->low), "2"(c->range)
206  : "%"FF_REG_c, "memory"
207  );
208  return bit & 1;
209 }
210 #endif /* HAVE_7REGS && !BROKEN_COMPILER */
211 
212 #if !BROKEN_COMPILER
213 #define get_cabac_bypass_sign get_cabac_bypass_sign_x86
214 static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
215 {
216  x86_reg tmp;
217  __asm__ volatile(
218  "movl %c6(%2), %k1 \n\t"
219  "movl %c3(%2), %%eax \n\t"
220  "shl $17, %k1 \n\t"
221  "add %%eax, %%eax \n\t"
222  "sub %k1, %%eax \n\t"
223  "cdq \n\t"
224  "and %%edx, %k1 \n\t"
225  "add %k1, %%eax \n\t"
226  "xor %%edx, %%ecx \n\t"
227  "sub %%edx, %%ecx \n\t"
228  "test %%ax, %%ax \n\t"
229  "jnz 1f \n\t"
230  "mov %c4(%2), %1 \n\t"
231  "subl $0xFFFF, %%eax \n\t"
232  "movzwl (%1), %%edx \n\t"
233  "bswap %%edx \n\t"
234  "shrl $15, %%edx \n\t"
235 #if UNCHECKED_BITSTREAM_READER
236  "add $2, %1 \n\t"
237  "addl %%edx, %%eax \n\t"
238  "mov %1, %c4(%2) \n\t"
239 #else
240  "addl %%edx, %%eax \n\t"
241  "cmp %c5(%2), %1 \n\t"
242  "jge 1f \n\t"
243  "add"FF_OPSIZE" $2, %c4(%2) \n\t"
244 #endif
245  "1: \n\t"
246  "movl %%eax, %c3(%2) \n\t"
247 
248  : "+c"(val), "=&r"(tmp)
249  : "r"(c),
250  "i"(offsetof(CABACContext, low)),
251  "i"(offsetof(CABACContext, bytestream)),
252  "i"(offsetof(CABACContext, bytestream_end)),
253  "i"(offsetof(CABACContext, range))
254  : "%eax", "%edx", "memory"
255  );
256  return val;
257 }
258 
259 #define get_cabac_bypass get_cabac_bypass_x86
260 static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
261 {
262  x86_reg tmp;
263  int res;
264  __asm__ volatile(
265  "movl %c6(%2), %k1 \n\t"
266  "movl %c3(%2), %%eax \n\t"
267  "shl $17, %k1 \n\t"
268  "add %%eax, %%eax \n\t"
269  "sub %k1, %%eax \n\t"
270  "cdq \n\t"
271  "and %%edx, %k1 \n\t"
272  "add %k1, %%eax \n\t"
273  "inc %%edx \n\t"
274  "test %%ax, %%ax \n\t"
275  "jnz 1f \n\t"
276  "mov %c4(%2), %1 \n\t"
277  "subl $0xFFFF, %%eax \n\t"
278  "movzwl (%1), %%ecx \n\t"
279  "bswap %%ecx \n\t"
280  "shrl $15, %%ecx \n\t"
281  "addl %%ecx, %%eax \n\t"
282  "cmp %c5(%2), %1 \n\t"
283  "jge 1f \n\t"
284  "add"FF_OPSIZE" $2, %c4(%2) \n\t"
285  "1: \n\t"
286  "movl %%eax, %c3(%2) \n\t"
287 
288  : "=&d"(res), "=&r"(tmp)
289  : "r"(c),
290  "i"(offsetof(CABACContext, low)),
291  "i"(offsetof(CABACContext, bytestream)),
292  "i"(offsetof(CABACContext, bytestream_end)),
293  "i"(offsetof(CABACContext, range))
294  : "%eax", "%ecx", "memory"
295  );
296  return res;
297 }
298 #endif /* !BROKEN_COMPILER */
299 
300 #endif /* HAVE_INLINE_ASM */
301 #endif /* AVCODEC_X86_CABAC_H */
cabac.h
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
bit
#define bit(string, value)
Definition: cbs_mpeg2.c:58
macros.h
NAMED_CONSTRAINTS_ARRAY
#define NAMED_CONSTRAINTS_ARRAY(...)
Definition: asm.h:151
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
ff_h264_cabac_tables
const uint8_t ff_h264_cabac_tables[512+4 *2 *64+4 *64+63]
state
static struct @313 state
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
asm.h
H264_LPS_RANGE_OFFSET
#define H264_LPS_RANGE_OFFSET
Definition: cabac.h:36
val
const char const char void * val
Definition: avisynth_c.h:863
attributes.h
av_always_inline
#define av_always_inline
Definition: attributes.h:43
AV_STRINGIFY
#define AV_STRINGIFY(s)
Definition: macros.h:36
uint8_t
uint8_t
Definition: audio_convert.c:194
H264_NORM_SHIFT_OFFSET
#define H264_NORM_SHIFT_OFFSET
Definition: cabac.h:35
config.h
MANGLE
#define MANGLE(a)
Definition: asm.h:127
x86_reg
int x86_reg
Definition: asm.h:72
H264_MLPS_STATE_OFFSET
#define H264_MLPS_STATE_OFFSET
Definition: cabac.h:37
CABACContext
Definition: cabac.h:43