00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <string.h>
00024
00025 #include "libavutil/cpu.h"
00026 #include "libavutil/mem.h"
00027 #include "libavcodec/dsputil.h"
00028 #include "dsputil_altivec.h"
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
00051 {
00052 register int misal = ((unsigned long)blocks & 0x00000010);
00053 register int i = 0;
00054 if (misal) {
00055 ((unsigned long*)blocks)[0] = 0L;
00056 ((unsigned long*)blocks)[1] = 0L;
00057 ((unsigned long*)blocks)[2] = 0L;
00058 ((unsigned long*)blocks)[3] = 0L;
00059 i += 16;
00060 }
00061 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
00062 __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
00063 }
00064 if (misal) {
00065 ((unsigned long*)blocks)[188] = 0L;
00066 ((unsigned long*)blocks)[189] = 0L;
00067 ((unsigned long*)blocks)[190] = 0L;
00068 ((unsigned long*)blocks)[191] = 0L;
00069 i += 16;
00070 }
00071 }
00072
00073
00074
00075 #if HAVE_DCBZL
00076 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00077 {
00078 register int misal = ((unsigned long)blocks & 0x0000007f);
00079 register int i = 0;
00080 if (misal) {
00081
00082
00083
00084 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00085 }
00086 else
00087 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
00088 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
00089 }
00090 }
00091 #else
00092 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00093 {
00094 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00095 }
00096 #endif
00097
00098 #if HAVE_DCBZL
00099
00100
00101
00102
00103
00104 static long check_dcbzl_effect(void)
00105 {
00106 register char *fakedata = av_malloc(1024);
00107 register char *fakedata_middle;
00108 register long zero = 0;
00109 register long i = 0;
00110 long count = 0;
00111
00112 if (!fakedata) {
00113 return 0L;
00114 }
00115
00116 fakedata_middle = (fakedata + 512);
00117
00118 memset(fakedata, 0xFF, 1024);
00119
00120
00121
00122 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
00123
00124 for (i = 0; i < 1024 ; i ++) {
00125 if (fakedata[i] == (char)0)
00126 count++;
00127 }
00128
00129 av_free(fakedata);
00130
00131 return count;
00132 }
00133 #else
00134 static long check_dcbzl_effect(void)
00135 {
00136 return 0;
00137 }
00138 #endif
00139
00140 static void prefetch_ppc(void *mem, int stride, int h)
00141 {
00142 register const uint8_t *p = mem;
00143 do {
00144 __asm__ volatile ("dcbt 0,%0" : : "r" (p));
00145 p+= stride;
00146 } while(--h);
00147 }
00148
00149 void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
00150 {
00151 const int high_bit_depth = avctx->bits_per_raw_sample > 8;
00152 int mm_flags = av_get_cpu_flags();
00153
00154 if (avctx->dsp_mask) {
00155 if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
00156 mm_flags |= (avctx->dsp_mask & 0xffff);
00157 else
00158 mm_flags &= ~(avctx->dsp_mask & 0xffff);
00159 }
00160
00161
00162 c->prefetch = prefetch_ppc;
00163 if (!high_bit_depth) {
00164 switch (check_dcbzl_effect()) {
00165 case 32:
00166 c->clear_blocks = clear_blocks_dcbz32_ppc;
00167 break;
00168 case 128:
00169 c->clear_blocks = clear_blocks_dcbz128_ppc;
00170 break;
00171 default:
00172 break;
00173 }
00174 }
00175
00176 #if HAVE_ALTIVEC
00177 if(CONFIG_H264_DECODER) ff_dsputil_h264_init_ppc(c, avctx);
00178
00179 if (mm_flags & AV_CPU_FLAG_ALTIVEC) {
00180 ff_dsputil_init_altivec(c, avctx);
00181 ff_float_init_altivec(c, avctx);
00182 ff_int_init_altivec(c, avctx);
00183 c->gmc1 = ff_gmc1_altivec;
00184
00185 #if CONFIG_ENCODERS
00186 if (avctx->bits_per_raw_sample <= 8 &&
00187 (avctx->dct_algo == FF_DCT_AUTO ||
00188 avctx->dct_algo == FF_DCT_ALTIVEC)) {
00189 c->fdct = ff_fdct_altivec;
00190 }
00191 #endif //CONFIG_ENCODERS
00192
00193 if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) {
00194 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
00195 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
00196 c->idct_put = ff_idct_put_altivec;
00197 c->idct_add = ff_idct_add_altivec;
00198 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00199 }
00200 }
00201
00202 }
00203 #endif
00204 }