00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include "libavutil/cpu.h"
00024 #include "libavcodec/dsputil.h"
00025 #include "dsputil_altivec.h"
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
00048 {
00049 register int misal = ((unsigned long)blocks & 0x00000010);
00050 register int i = 0;
00051 #if 1
00052 if (misal) {
00053 ((unsigned long*)blocks)[0] = 0L;
00054 ((unsigned long*)blocks)[1] = 0L;
00055 ((unsigned long*)blocks)[2] = 0L;
00056 ((unsigned long*)blocks)[3] = 0L;
00057 i += 16;
00058 }
00059 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
00060 __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
00061 }
00062 if (misal) {
00063 ((unsigned long*)blocks)[188] = 0L;
00064 ((unsigned long*)blocks)[189] = 0L;
00065 ((unsigned long*)blocks)[190] = 0L;
00066 ((unsigned long*)blocks)[191] = 0L;
00067 i += 16;
00068 }
00069 #else
00070 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00071 #endif
00072 }
00073
00074
00075
00076 #if HAVE_DCBZL
00077 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00078 {
00079 register int misal = ((unsigned long)blocks & 0x0000007f);
00080 register int i = 0;
00081 #if 1
00082 if (misal) {
00083
00084
00085
00086 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00087 }
00088 else
00089 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
00090 __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
00091 }
00092 #else
00093 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00094 #endif
00095 }
00096 #else
00097 static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00098 {
00099 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00100 }
00101 #endif
00102
00103 #if HAVE_DCBZL
00104
00105
00106
00107
00108
00109 static long check_dcbzl_effect(void)
00110 {
00111 register char *fakedata = av_malloc(1024);
00112 register char *fakedata_middle;
00113 register long zero = 0;
00114 register long i = 0;
00115 long count = 0;
00116
00117 if (!fakedata) {
00118 return 0L;
00119 }
00120
00121 fakedata_middle = (fakedata + 512);
00122
00123 memset(fakedata, 0xFF, 1024);
00124
00125
00126
00127 __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
00128
00129 for (i = 0; i < 1024 ; i ++) {
00130 if (fakedata[i] == (char)0)
00131 count++;
00132 }
00133
00134 av_free(fakedata);
00135
00136 return count;
00137 }
00138 #else
00139 static long check_dcbzl_effect(void)
00140 {
00141 return 0;
00142 }
00143 #endif
00144
00145 static void prefetch_ppc(void *mem, int stride, int h)
00146 {
00147 register const uint8_t *p = mem;
00148 do {
00149 __asm__ volatile ("dcbt 0,%0" : : "r" (p));
00150 p+= stride;
00151 } while(--h);
00152 }
00153
00154 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
00155 {
00156 const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
00157
00158
00159 c->prefetch = prefetch_ppc;
00160 if (!high_bit_depth) {
00161 switch (check_dcbzl_effect()) {
00162 case 32:
00163 c->clear_blocks = clear_blocks_dcbz32_ppc;
00164 break;
00165 case 128:
00166 c->clear_blocks = clear_blocks_dcbz128_ppc;
00167 break;
00168 default:
00169 break;
00170 }
00171 }
00172
00173 #if HAVE_ALTIVEC
00174 if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
00175
00176 if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
00177 dsputil_init_altivec(c, avctx);
00178 float_init_altivec(c, avctx);
00179 int_init_altivec(c, avctx);
00180 c->gmc1 = gmc1_altivec;
00181
00182 #if CONFIG_ENCODERS
00183 if (avctx->dct_algo == FF_DCT_AUTO ||
00184 avctx->dct_algo == FF_DCT_ALTIVEC) {
00185 c->fdct = fdct_altivec;
00186 }
00187 #endif //CONFIG_ENCODERS
00188
00189 if (avctx->lowres==0) {
00190 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
00191 (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
00192 c->idct_put = idct_put_altivec;
00193 c->idct_add = idct_add_altivec;
00194 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00195 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
00196 avctx->idct_algo==FF_IDCT_VP3){
00197 c->idct_put = ff_vp3_idct_put_altivec;
00198 c->idct_add = ff_vp3_idct_add_altivec;
00199 c->idct = ff_vp3_idct_altivec;
00200 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00201 }
00202 }
00203
00204 }
00205 #endif
00206 }