#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
Go to the source code of this file.
Defines | |
#define | OP_PUT(S, D) |
#define | OP_AVG(S, D) "pavgb " #S ", " #D " \n\t" |
#define | NORMALIZE_MMX(SHIFT) |
Add rounder from mm7 to mm3 and pack result at destination. | |
#define | TRANSFER_DO_PACK(OP) |
#define | TRANSFER_DONT_PACK(OP) |
#define | DO_UNPACK(reg) "punpcklbw %%mm0, " reg "\n\t" |
#define | DONT_UNPACK(reg) |
#define | LOAD_ROUNDER_MMX(ROUND) |
Compute the rounder 32-r or 8-r and unpacks it to mm7. | |
#define | SHIFT2_LINE(OFF, R0, R1, R2, R3) |
#define | VC1_HOR_16b_SHIFT2(OP, OPNAME) |
Data is already unpacked, so some operations can directly be made from memory. | |
#define | VC1_SHIFT2(OP, OPNAME) |
#define | MSPEL_FILTER13_CORE(UNPACK, MOVQ, A1, A2, A3, A4) |
#define | MSPEL_FILTER13_VER_16B(NAME, A1, A2, A3, A4) |
#define | MSPEL_FILTER13_HOR_16B(NAME, A1, A2, A3, A4, OP, OPNAME) |
#define | MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME) |
#define | VC1_MSPEL_MC(OP) |
#define | DECLARE_FUNCTION(a, b) |
Functions | |
static void | vc1_put_ver_16b_shift2_mmx (int16_t *dst, const uint8_t *src, x86_reg stride, int rnd, int64_t shift) |
Sacrifying mm6 allows to pipeline loads from src. | |
VC1_HOR_16b_SHIFT2 (#define MSPEL_FILTER13_CORE(OP_PUT, put_) | |
static void | vc1_inv_trans_4x8_dc_mmx2 (uint8_t *dest, int linesize, DCTELEM *block) |
static void | vc1_inv_trans_8x4_dc_mmx2 (uint8_t *dest, int linesize, DCTELEM *block) |
static void | vc1_inv_trans_8x8_dc_mmx2 (uint8_t *dest, int linesize, DCTELEM *block) |
void | ff_vc1dsp_init_mmx (DSPContext *dsp, AVCodecContext *avctx) |
Variables | |
const uint64_t | ff_pw_9 = 0x0009000900090009ULL |
#define DECLARE_FUNCTION | ( | a, | |||
b | ) |
Value:
static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ }\ static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ }
Referenced by VC1_HOR_16b_SHIFT2().
#define DONT_UNPACK | ( | reg | ) |
Definition at line 54 of file vc1dsp_mmx.c.
#define LOAD_ROUNDER_MMX | ( | ROUND | ) |
Value:
"movd "ROUND", %%mm7 \n\t" \ "punpcklwd %%mm7, %%mm7 \n\t" \ "punpckldq %%mm7, %%mm7 \n\t"
Definition at line 57 of file vc1dsp_mmx.c.
Referenced by vc1_put_ver_16b_shift2_mmx().
#define MSPEL_FILTER13_8B | ( | NAME, | |||
A1, | |||||
A2, | |||||
A3, | |||||
A4, | |||||
OP, | |||||
OPNAME | ) |
Value:
static void \ OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ x86_reg stride, int rnd, x86_reg offset) \ { \ int h = 8; \ src -= offset; \ rnd = 32-rnd; \ __asm__ volatile ( \ LOAD_ROUNDER_MMX("%6") \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ ASMALIGN(3) \ "1: \n\t" \ MSPEL_FILTER13_CORE(DO_UNPACK, "movd 1", A1, A2, A3, A4) \ NORMALIZE_MMX("$6") \ TRANSFER_DO_PACK(OP) \ "add %5, %1 \n\t" \ "add %5, %2 \n\t" \ "decl %0 \n\t" \ "jnz 1b \n\t" \ : "+r"(h), "+r" (src), "+r" (dst) \ : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd) \ : "memory" \ ); \ }
Referenced by VC1_HOR_16b_SHIFT2().
#define MSPEL_FILTER13_CORE | ( | UNPACK, | |||
MOVQ, | |||||
A1, | |||||
A2, | |||||
A3, | |||||
A4 | ) |
Value:
MOVQ "*0+"A1", %%mm1 \n\t" \ MOVQ "*4+"A1", %%mm2 \n\t" \ UNPACK("%%mm1") \ UNPACK("%%mm2") \ "pmullw "MANGLE(ff_pw_3)", %%mm1\n\t" \ "pmullw "MANGLE(ff_pw_3)", %%mm2\n\t" \ MOVQ "*0+"A2", %%mm3 \n\t" \ MOVQ "*4+"A2", %%mm4 \n\t" \ UNPACK("%%mm3") \ UNPACK("%%mm4") \ "pmullw %%mm6, %%mm3 \n\t" /* *18 */ \ "pmullw %%mm6, %%mm4 \n\t" /* *18 */ \ "psubw %%mm1, %%mm3 \n\t" /* 18,-3 */ \ "psubw %%mm2, %%mm4 \n\t" /* 18,-3 */ \ MOVQ "*0+"A4", %%mm1 \n\t" \ MOVQ "*4+"A4", %%mm2 \n\t" \ UNPACK("%%mm1") \ UNPACK("%%mm2") \ "psllw $2, %%mm1 \n\t" /* 4* */ \ "psllw $2, %%mm2 \n\t" /* 4* */ \ "psubw %%mm1, %%mm3 \n\t" /* -4,18,-3 */ \ "psubw %%mm2, %%mm4 \n\t" /* -4,18,-3 */ \ MOVQ "*0+"A3", %%mm1 \n\t" \ MOVQ "*4+"A3", %%mm2 \n\t" \ UNPACK("%%mm1") \ UNPACK("%%mm2") \ "pmullw %%mm5, %%mm1 \n\t" /* *53 */ \ "pmullw %%mm5, %%mm2 \n\t" /* *53 */ \ "paddw %%mm1, %%mm3 \n\t" /* 4,53,18,-3 */ \ "paddw %%mm2, %%mm4 \n\t"
#define MSPEL_FILTER13_HOR_16B | ( | NAME, | |||
A1, | |||||
A2, | |||||
A3, | |||||
A4, | |||||
OP, | |||||
OPNAME | ) |
Value:
static void \ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ const int16_t *src, int rnd) \ { \ int h = 8; \ src -= 1; \ rnd -= (-4+58+13-3)*256; /* Add -256 bias */ \ __asm__ volatile( \ LOAD_ROUNDER_MMX("%4") \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ ASMALIGN(3) \ "1: \n\t" \ MSPEL_FILTER13_CORE(DONT_UNPACK, "movq 2", A1, A2, A3, A4) \ NORMALIZE_MMX("$7") \ /* Remove bias */ \ "paddw "MANGLE(ff_pw_128)", %%mm3 \n\t" \ "paddw "MANGLE(ff_pw_128)", %%mm4 \n\t" \ TRANSFER_DO_PACK(OP) \ "add $24, %1 \n\t" \ "add %3, %2 \n\t" \ "decl %0 \n\t" \ "jnz 1b \n\t" \ : "+r"(h), "+r" (src), "+r" (dst) \ : "r"(stride), "m"(rnd) \ : "memory" \ ); \ }
#define MSPEL_FILTER13_VER_16B | ( | NAME, | |||
A1, | |||||
A2, | |||||
A3, | |||||
A4 | ) |
#define NORMALIZE_MMX | ( | SHIFT | ) |
Value:
"paddw %%mm7, %%mm3 \n\t" /* +bias-r */ \ "paddw %%mm7, %%mm4 \n\t" /* +bias-r */ \ "psraw "SHIFT", %%mm3 \n\t" \ "psraw "SHIFT", %%mm4 \n\t"
Definition at line 35 of file vc1dsp_mmx.c.
Definition at line 32 of file vc1dsp_mmx.c.
#define OP_PUT | ( | S, | |||
D | ) |
Definition at line 31 of file vc1dsp_mmx.c.
#define SHIFT2_LINE | ( | OFF, | |||
R0, | |||||
R1, | |||||
R2, | |||||
R3 | ) |
Value:
"paddw %%mm"#R2", %%mm"#R1" \n\t" \ "movd (%0,%3), %%mm"#R0" \n\t" \ "pmullw %%mm6, %%mm"#R1" \n\t" \ "punpcklbw %%mm0, %%mm"#R0" \n\t" \ "movd (%0,%2), %%mm"#R3" \n\t" \ "psubw %%mm"#R0", %%mm"#R1" \n\t" \ "punpcklbw %%mm0, %%mm"#R3" \n\t" \ "paddw %%mm7, %%mm"#R1" \n\t" \ "psubw %%mm"#R3", %%mm"#R1" \n\t" \ "psraw %4, %%mm"#R1" \n\t" \ "movq %%mm"#R1", "#OFF"(%1) \n\t" \ "add %2, %0 \n\t"
Definition at line 62 of file vc1dsp_mmx.c.
Referenced by vc1_put_ver_16b_shift2_mmx().
#define TRANSFER_DO_PACK | ( | OP | ) |
Value:
Definition at line 41 of file vc1dsp_mmx.c.
#define TRANSFER_DONT_PACK | ( | OP | ) |
Value:
Definition at line 46 of file vc1dsp_mmx.c.
#define VC1_HOR_16b_SHIFT2 | ( | OP, | |||
OPNAME | ) |
Data is already unpacked, so some operations can directly be made from memory.
Definition at line 116 of file vc1dsp_mmx.c.
#define VC1_MSPEL_MC | ( | OP | ) |
#define VC1_SHIFT2 | ( | OP, | |||
OPNAME | ) |
Referenced by VC1_HOR_16b_SHIFT2().
void ff_vc1dsp_init_mmx | ( | DSPContext * | dsp, | |
AVCodecContext * | avctx | |||
) |
VC1_HOR_16b_SHIFT2 | ( | #define MSPEL_FILTER13_CORE( | OP_PUT, | |
put_ | ||||
) |
Definition at line 156 of file vc1dsp_mmx.c.
static void vc1_inv_trans_4x8_dc_mmx2 | ( | uint8_t * | dest, | |
int | linesize, | |||
DCTELEM * | block | |||
) | [static] |
static void vc1_inv_trans_8x4_dc_mmx2 | ( | uint8_t * | dest, | |
int | linesize, | |||
DCTELEM * | block | |||
) | [static] |
static void vc1_inv_trans_8x8_dc_mmx2 | ( | uint8_t * | dest, | |
int | linesize, | |||
DCTELEM * | block | |||
) | [static] |
static void vc1_put_ver_16b_shift2_mmx | ( | int16_t * | dst, | |
const uint8_t * | src, | |||
x86_reg | stride, | |||
int | rnd, | |||
int64_t | shift | |||
) | [static] |
const uint64_t ff_pw_9 = 0x0009000900090009ULL |