FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hpeldsp_rnd_template.c
Go to the documentation of this file.
1 /*
2  * SIMD-optimized halfpel functions are compiled twice for rnd/no_rnd
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2003-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7  * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
8  * and improved by Zdenek Kabelac <kabi@users.sf.net>
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 #include <stddef.h>
28 #include <stdint.h>
29 
30 // put_pixels
31 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
32 {
33  MOVQ_BFE(mm6);
34  __asm__ volatile(
35  "lea (%3, %3), %%"REG_a" \n\t"
36  ".p2align 3 \n\t"
37  "1: \n\t"
38  "movq (%1), %%mm0 \n\t"
39  "movq 1(%1), %%mm1 \n\t"
40  "movq (%1, %3), %%mm2 \n\t"
41  "movq 1(%1, %3), %%mm3 \n\t"
42  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
43  "movq %%mm4, (%2) \n\t"
44  "movq %%mm5, (%2, %3) \n\t"
45  "add %%"REG_a", %1 \n\t"
46  "add %%"REG_a", %2 \n\t"
47  "movq (%1), %%mm0 \n\t"
48  "movq 1(%1), %%mm1 \n\t"
49  "movq (%1, %3), %%mm2 \n\t"
50  "movq 1(%1, %3), %%mm3 \n\t"
51  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
52  "movq %%mm4, (%2) \n\t"
53  "movq %%mm5, (%2, %3) \n\t"
54  "add %%"REG_a", %1 \n\t"
55  "add %%"REG_a", %2 \n\t"
56  "subl $4, %0 \n\t"
57  "jnz 1b \n\t"
58  :"+g"(h), "+S"(pixels), "+D"(block)
59  :"r"((x86_reg)line_size)
60  :REG_a, "memory");
61 }
62 
63 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
64 {
65  MOVQ_BFE(mm6);
66  __asm__ volatile(
67  "lea (%3, %3), %%"REG_a" \n\t"
68  ".p2align 3 \n\t"
69  "1: \n\t"
70  "movq (%1), %%mm0 \n\t"
71  "movq 1(%1), %%mm1 \n\t"
72  "movq (%1, %3), %%mm2 \n\t"
73  "movq 1(%1, %3), %%mm3 \n\t"
74  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
75  "movq %%mm4, (%2) \n\t"
76  "movq %%mm5, (%2, %3) \n\t"
77  "movq 8(%1), %%mm0 \n\t"
78  "movq 9(%1), %%mm1 \n\t"
79  "movq 8(%1, %3), %%mm2 \n\t"
80  "movq 9(%1, %3), %%mm3 \n\t"
81  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
82  "movq %%mm4, 8(%2) \n\t"
83  "movq %%mm5, 8(%2, %3) \n\t"
84  "add %%"REG_a", %1 \n\t"
85  "add %%"REG_a", %2 \n\t"
86  "movq (%1), %%mm0 \n\t"
87  "movq 1(%1), %%mm1 \n\t"
88  "movq (%1, %3), %%mm2 \n\t"
89  "movq 1(%1, %3), %%mm3 \n\t"
90  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
91  "movq %%mm4, (%2) \n\t"
92  "movq %%mm5, (%2, %3) \n\t"
93  "movq 8(%1), %%mm0 \n\t"
94  "movq 9(%1), %%mm1 \n\t"
95  "movq 8(%1, %3), %%mm2 \n\t"
96  "movq 9(%1, %3), %%mm3 \n\t"
97  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
98  "movq %%mm4, 8(%2) \n\t"
99  "movq %%mm5, 8(%2, %3) \n\t"
100  "add %%"REG_a", %1 \n\t"
101  "add %%"REG_a", %2 \n\t"
102  "subl $4, %0 \n\t"
103  "jnz 1b \n\t"
104  :"+g"(h), "+S"(pixels), "+D"(block)
105  :"r"((x86_reg)line_size)
106  :REG_a, "memory");
107 }
108 
109 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
110 {
111  MOVQ_BFE(mm6);
112  __asm__ volatile(
113  "lea (%3, %3), %%"REG_a" \n\t"
114  "movq (%1), %%mm0 \n\t"
115  ".p2align 3 \n\t"
116  "1: \n\t"
117  "movq (%1, %3), %%mm1 \n\t"
118  "movq (%1, %%"REG_a"),%%mm2 \n\t"
119  PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
120  "movq %%mm4, (%2) \n\t"
121  "movq %%mm5, (%2, %3) \n\t"
122  "add %%"REG_a", %1 \n\t"
123  "add %%"REG_a", %2 \n\t"
124  "movq (%1, %3), %%mm1 \n\t"
125  "movq (%1, %%"REG_a"),%%mm0 \n\t"
126  PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
127  "movq %%mm4, (%2) \n\t"
128  "movq %%mm5, (%2, %3) \n\t"
129  "add %%"REG_a", %1 \n\t"
130  "add %%"REG_a", %2 \n\t"
131  "subl $4, %0 \n\t"
132  "jnz 1b \n\t"
133  :"+g"(h), "+S"(pixels), "+D"(block)
134  :"r"((x86_reg)line_size)
135  :REG_a, "memory");
136 }
137 
138 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
139 {
140  MOVQ_BFE(mm6);
141  JUMPALIGN();
142  do {
143  __asm__ volatile(
144  "movq %1, %%mm0 \n\t"
145  "movq 1%1, %%mm1 \n\t"
146  "movq %0, %%mm3 \n\t"
147  PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
148  PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
149  "movq %%mm0, %0 \n\t"
150  "movq 8%1, %%mm0 \n\t"
151  "movq 9%1, %%mm1 \n\t"
152  "movq 8%0, %%mm3 \n\t"
153  PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
154  PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
155  "movq %%mm0, 8%0 \n\t"
156  :"+m"(*block)
157  :"m"(*pixels)
158  :"memory");
159  pixels += line_size;
160  block += line_size;
161  } while (--h);
162 }
163 
164 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
165 {
166  MOVQ_BFE(mm6);
167  __asm__ volatile(
168  "lea (%3, %3), %%"REG_a" \n\t"
169  "movq (%1), %%mm0 \n\t"
170  ".p2align 3 \n\t"
171  "1: \n\t"
172  "movq (%1, %3), %%mm1 \n\t"
173  "movq (%1, %%"REG_a"), %%mm2 \n\t"
174  PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
175  "movq (%2), %%mm3 \n\t"
176  PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6)
177  "movq (%2, %3), %%mm3 \n\t"
178  PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
179  "movq %%mm0, (%2) \n\t"
180  "movq %%mm1, (%2, %3) \n\t"
181  "add %%"REG_a", %1 \n\t"
182  "add %%"REG_a", %2 \n\t"
183 
184  "movq (%1, %3), %%mm1 \n\t"
185  "movq (%1, %%"REG_a"), %%mm0 \n\t"
186  PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
187  "movq (%2), %%mm3 \n\t"
188  PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6)
189  "movq (%2, %3), %%mm3 \n\t"
190  PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
191  "movq %%mm2, (%2) \n\t"
192  "movq %%mm1, (%2, %3) \n\t"
193  "add %%"REG_a", %1 \n\t"
194  "add %%"REG_a", %2 \n\t"
195 
196  "subl $4, %0 \n\t"
197  "jnz 1b \n\t"
198  :"+g"(h), "+S"(pixels), "+D"(block)
199  :"r"((x86_reg)line_size)
200  :REG_a, "memory");
201 }