[FFmpeg-devel] [PATCH] CrystalHD decoder support v3

Tue Feb 1 15:17:37 CET 2011

Hi,

mostly a naive pass on the code.

On Sun, 30 Jan 2011 18:53:08 -0800 Philip Langdale (by way of Philip Langdale ) wrote:
> diff --git a/libavcodec/crystalhd.c b/libavcodec/crystalhd.c
> new file mode 100644
> index 0000000..65bc285
> --- /dev/null
> +++ b/libavcodec/crystalhd.c
> @@ -0,0 +1,903 @@

> +#define OUTPUT_PROC_TIMEOUT 50
> +#define TIMESTAMP_UNIT 100000

I think a comment would be welcome (at least with the units) even
though the explanation is in the code.

> +static inline int extract_sps_pps_from_avcc(CHDContext *priv,
> +                                            uint8_t *data,
> +                                            uint32_t data_size)
> +{
> [...]
> +}

Do you think some code could be shared between this and and the mp4 to annex B bitstream filter?

> +static inline void print_frame_info(CHDContext *priv, BC_DTS_PROC_OUT *output)
> +{
> +    av_log(priv->avctx, AV_LOG_VERBOSE, "\tYBuffSz: %u\n", output->YbuffSz);
> +    av_log(priv->avctx, AV_LOG_VERBOSE, "\tYBuffDoneSz: %u\n",
> +           output->YBuffDoneSz);
> +    av_log(priv->avctx, AV_LOG_VERBOSE, "\tUVBuffDoneSz: %u\n",
> +           output->UVBuffDoneSz);
> +    av_log(priv->avctx, AV_LOG_VERBOSE, "\tTimestamp: %lu\n",
> +           output->PicInfo.timeStamp);

timestamp is an uint64_t

> +static inline void *memcpy_pic(void *dst, const void *src,
> +                               int bytesPerLine, int height,
> +                               int dstStride, int srcStride)
> +{
> +    int i;
> +    void *retval = dst;
> +
> +    for(i = 0; i < height; i++) {
> +        memcpy(dst, src, bytesPerLine);
> +        src = (const uint8_t*)src + srcStride;
> +        dst = (uint8_t*)dst + dstStride;
> +    }
> +    return retval;

you're not using the return value, why keeping it?

> +}
> +
> +
> +/*****************************************************************************
> + * OpaqueList functions
> + ****************************************************************************/
> +
> +static uint64_t opaque_list_push(CHDContext *priv, uint64_t reordered_opaque)
> +{
> +    OpaqueList *newNode = av_mallocz(sizeof (OpaqueList));

You should check newNode is not NULL.

> +    if (priv->head == NULL) {
> +        newNode->fake_timestamp = TIMESTAMP_UNIT;
> +        priv->head = newNode;
> +    } else {
> +        newNode->fake_timestamp = priv->tail->fake_timestamp + TIMESTAMP_UNIT;
> +        priv->tail->next = newNode;
> +    }
> +    priv->tail = newNode;
> +    newNode->reordered_opaque = reordered_opaque;
> +
> +    return newNode->fake_timestamp;
> +}
> +
> +static uint64_t opaque_list_pop(CHDContext *priv, uint64_t fake_timestamp)
> +{

Can you please document the behaviour of this function?

> +    OpaqueList *node = priv->head;
> +
> +    if (priv->head == NULL) {
> +        av_log(priv->avctx, AV_LOG_ERROR,
> +               "CrystalHD: Attempted to query non-existent timestamps.\n");
> +        return AV_NOPTS_VALUE;
> +    }
> +
> +    if (priv->head->fake_timestamp == fake_timestamp) {
> +        uint64_t reordered_opaque = node->reordered_opaque;
> +        priv->head = node->next;
> +        av_free(node);
> +
> +        if (priv->head->next == NULL)
> +            priv->tail = priv->head;
> +
> +        return reordered_opaque;
> +    }
> +
> +    while (node->next) {
> +        OpaqueList *next = node->next;
> +        if (next->fake_timestamp == fake_timestamp) {
> +            uint64_t reordered_opaque = next->reordered_opaque;
> +            node->next = next->next;
> +            av_free(next);
> +
> +            if (node->next == NULL)
> +               priv->tail = node;
> +
> +            return reordered_opaque;
> +        } else {
> +            node = next;
> +        }
> +    }
> +

Why do you need the special case for the head element?
It seems this should work fine with a 'while (node)' loop, no?

> +    av_log(priv->avctx, AV_LOG_VERBOSE,
> +           "CrystalHD: Couldn't match fake_timestamp.\n");
> +    return AV_NOPTS_VALUE;
> +}
> +
> +
> +/*****************************************************************************
> + * Video decoder API function definitions
> + ****************************************************************************/
> +
> +static void flush(AVCodecContext *avctx)
> +{
> +    CHDContext *priv = avctx->priv_data;
> +
> +    avctx->has_b_frames = 0;
> +    priv->last_picture = 2;

Please comment why it has to be 2, or make this magic value a self
explaining constant.

> +    priv->skip_next_output = 0;
> +    DtsFlushInput(priv->dev, 4);

A comment here that it flushes everything on the decoder and on the
driver side could be good too.

> +}
> +
> +
> +static av_cold int uninit(AVCodecContext *avctx)
> +{
> +    CHDContext *priv = avctx->priv_data;
> +    HANDLE device;
> +
> +    if (!priv)
> +        return 0;
> +

can this happen?

> +    device = priv->dev;
> +    DtsStopDecoder(device);
> +    DtsCloseDecoder(device);
> +    DtsDeviceClose(device);
> +
> +    av_free(priv->sps_pps_buf);
> +
> +    if (priv->pic.data[0])
> +        avctx->release_buffer(avctx, &priv->pic);
> +
> +    if (priv->head) {
> +       OpaqueList *node = priv->head;
> +       while (node) {
> +          OpaqueList *next = node->next;
> +          av_free(node);
> +          node = next;
> +       }
> +    }
> +
> +    return 0;
> +}
> +
> +
> +static av_cold int init(AVCodecContext *avctx)
> +{
> +    CHDContext* priv;
> +    BC_INPUT_FORMAT format;

(as said in the other reply, you could initialize directly here, saving
the explicit memset if it's not needed).

> +static inline CopyRet copy_frame(AVCodecContext *avctx,
> +                                 BC_DTS_PROC_OUT *output,
> +                                 void *data, int *data_size,
> +                                 uint8_t second_field)
> +{
> +    BC_STATUS ret;
> +    BC_DTS_STATUS decoder_status;
> +    uint8_t next_frame_same;
> +    uint8_t interlaced;
> +    uint8_t need_second_field;
> +
> +    CHDContext *priv = avctx->priv_data;
> +
> +    uint8_t bottom_field = (output->PicInfo.flags & VDEC_FLAG_BOTTOMFIELD) ==
> +                           VDEC_FLAG_BOTTOMFIELD;
> +    uint8_t bottom_first = output->PicInfo.flags & VDEC_FLAG_BOTTOM_FIRST;
> +

The flags are 16 bits in the lib (even though those ones are 8 bits),
why do you force those to be 8 bits?

> +    int width = output->PicInfo.width * 2; // 16bits per pixel

Even though it's hardcoded for now, this could maybe depend on the
pixel format?

> +    int height = output->PicInfo.height;
> +    uint8_t *src = output->Ybuff;
> +    uint8_t *dst;
> +    int dStride;
> +
> +    ret = DtsGetDriverStatus(priv->dev, &decoder_status);
> +    if (ret != BC_STS_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR,
> +               "CrystalHD: GetDriverStatus failed: %u\n", ret);
> +       return RET_ERROR;
> +    }
> +
> +    next_frame_same = output->PicInfo.picture_number ==
> +                       (decoder_status.picNumFlags & ~0x40000000);
> +    interlaced = ((output->PicInfo.flags & VDEC_FLAG_INTERLACED_SRC) &&
> +                  !(output->PicInfo.flags & VDEC_FLAG_UNKNOWN_SRC)) ||
> +                 next_frame_same || bottom_field || second_field;
> +    need_second_field = interlaced &&
> +                        ((!bottom_field && !bottom_first) ||
> +                         (bottom_field && bottom_first));

These two ones could be merged (though I'm not sure it's worth it):
!bottom_field == !bottom_first

> +static inline CopyRet receive_frame(AVCodecContext *avctx,
> +                                    void *data, int *data_size,
> +                                    uint8_t second_field)
> +{
> +    BC_STATUS ret;
> +    BC_DTS_PROC_OUT output;

could be initialized here and save the memset.

> +    CHDContext *priv = avctx->priv_data;
> +    HANDLE dev = priv->dev;
> +
> +    *data_size = 0;
> +
> +    memset(&output, 0, sizeof(BC_DTS_PROC_OUT));
> +    output.PicInfo.width = avctx->width;
> +    output.PicInfo.height = avctx->height;
> +
> +    // Request decoded data from the driver
> +    ret = DtsProcOutputNoCopy(dev, OUTPUT_PROC_TIMEOUT, &output);
> +    if (ret == BC_STS_FMT_CHANGE) {
> +        av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Initial format change\n");
> +        avctx->width = output.PicInfo.width;
> +        avctx->height = output.PicInfo.height;
> +        if (output.PicInfo.height == 1088)
> +            avctx->height = 1080;

why this special case? please add a comment.

> +        return RET_COPY_AGAIN;
> +    } else if (ret == BC_STS_SUCCESS) {
> +        int copy_ret = -1;
> +        if (output.PoutFlags & BC_POUT_FLAGS_PIB_VALID) {
> +            if (output.PicInfo.timeStamp == 0) {
> +                av_log(avctx, AV_LOG_VERBOSE,
> +                       "CrystalHD: Not returning packed frame twice.\n");
> +                priv->last_picture++;
> +                DtsReleaseOutputBuffs(dev, NULL, FALSE);
> +                return RET_COPY_AGAIN;
> +            }
> +
> +            print_frame_info(priv, &output);
> +
> +            if (priv->last_picture + 1 < output.PicInfo.picture_number) {
> +                av_log(avctx, AV_LOG_WARNING,
> +                       "CrystalHD: Picture Number discontinuity\n");
> +                /*
> +                 * Have we lost frames? If so, we need to shrink the
> +                 * pipeline length appropriately.
> +                 */

and nothing is done to do that?

> +            }
> +
> +            copy_ret = copy_frame(avctx, &output, data, data_size, second_field);
> +            if (*data_size > 0) {
> +                avctx->has_b_frames--;
> +                priv->last_picture++;
> +                av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Pipeline length: %u\n",
> +                       avctx->has_b_frames);
> +            }
> +        } else {
> +            /*
> +             * An invalid frame has been consumed.
> +             */
> +            av_log(avctx, AV_LOG_ERROR, "CrystalHD: ProcOutput succeeded with "
> +                                        "invalid PIB\n");
> +            avctx->has_b_frames--;
> +            copy_ret = RET_OK;
> +        }
> +        DtsReleaseOutputBuffs(dev, NULL, FALSE);
> +
> +        return copy_ret;
> +    } else if (ret == BC_STS_BUSY) {
> +        return RET_COPY_AGAIN;
> +    } else {
> +        av_log(avctx, AV_LOG_ERROR, "CrystalHD: ProcOutput failed %d\n", ret);
> +        return RET_ERROR;
> +    }
> +}
> +
> +
> +static int decode(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt)
> +{
> +    BC_STATUS ret;
> +    BC_DTS_STATUS decoder_status;
> +    CHDContext *priv = avctx->priv_data;
> +    HANDLE dev = priv->dev;
> +    uint8_t input_full = 0;
> +    int len = avpkt->size;
> +    CopyRet rec_ret;
> +
> +    av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: decode_frame\n");
> +
> +    do {
> +        if (len) {
> +            int32_t tx_free = (int32_t)DtsTxFreeSize(dev);
> +            if (len < tx_free - 1024) {
> +                /*
> +                 * Despite being notionally opaque, either libcrystalhd or
> +                 * the hardware itself will mangle pts values that are too
> +                 * small or too large. The docs claim it should be in uints

s/uints/units/

> +                 * of 100ns. Given that we're nominally dealing with a black
> +                 * box on both sides, any transform we do has no guarantee of
> +                 * avoiding mangling but, empirically, scalling as if the

s/scalling/scaling/

> +                 * reorded_opaque value is in ms seems to work.
> +                 */
> +                uint64_t pts = opaque_list_push(priv, avctx->reordered_opaque);
> +                av_log(priv->avctx, AV_LOG_VERBOSE, "input \"pts\": %lu\n",
> +                       pts);
> +                ret = DtsProcInput(dev, avpkt->data, len, pts, 0);
> +                if (ret == BC_STS_BUSY) {
> +                    av_log(avctx, AV_LOG_WARNING,
> +                           "CrystalHD: ProcInput returned busy\n");
> +                    usleep(10000);

Why are you sleeping here? Is there going to be a retry afterwards that
you're trying to prevent?

> +                    return -1;

return AVERROR(EBUSY)?

> +                } else if (ret != BC_STS_SUCCESS) {
> +                    av_log(avctx, AV_LOG_ERROR,
> +                           "CrystalHD: ProcInput failed: %u\n", ret);
> +                    return -1;
> +                }
> +                len = 0; // We don't want to try and resubmit the input...
> +                input_full = 0;
> +                avctx->has_b_frames++;
> +            } else {
> +                av_log(avctx, AV_LOG_WARNING, "CrystalHD: Input buffer full\n");
> +                input_full = 1;
> +            }
> +        } else {
> +            av_log(avctx, AV_LOG_INFO,
> +                   "CrystalHD: No more input data\n");
> +        }
> +
> +        ret = DtsGetDriverStatus(dev, &decoder_status);
> +        if (ret != BC_STS_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR,
> +                   "CrystalHD: GetDriverStatus failed\n");
> +            return -1;
> +        }
> +        /*
> +         * No frames ready. Don't try to extract.
> +         */
> +        if (decoder_status.ReadyListCount == 0) {
> +            usleep(50000);
> +        } else {
> +            break;
> +        }
> +    } while (input_full == 1);

Isn't there another way to know when a frame is ready than trial and
error?

> +    if (decoder_status.ReadyListCount == 0) {
> +        av_log(avctx, AV_LOG_INFO,
> +               "CrystalHD: No frames ready. Returning\n");
> +        return avpkt->size;
> +    }
> +
> +    if (priv->skip_next_output) {
> +        av_log(avctx, AV_LOG_VERBOSE,
> +               "CrystalHD: Skipping next output.\n");
> +        priv->skip_next_output = 0;
> +        avctx->has_b_frames--;
> +        return avpkt->size;
> +    }
> +
> +    rec_ret = receive_frame(avctx, data, data_size, 0);
> +    if (rec_ret == 0 && *data_size == 0) {
> +        /*
> +         * XXX: There's more than just mpeg2 vs h.264 interlaced content, and
> +         * I don't know which category each of those will fall in to.
> +         */
> +        if (avctx->codec->id == CODEC_ID_H264) {
> +            /*
> +             * This case is for when the encoded fields are stored separately
> +             * and we get a separate avpkt for each one. To keep the pipeline
> +             * stable, we should return nothing and wait for the next time
> +             * round to grab the second field.
> +             * H.264 PAFF is an example of this.
> +             */
> +            av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Returning after first field.\n");
> +            avctx->has_b_frames--;
> +        } else {
> +            /*
> +             * This case is for when the encoded fields are stored in a single
> +             * avpkt but the hardware returns then separately. Unless we grab
> +             * the second field before returning, we'll slip another frame in
> +             * the pipeline and if that happens a lot, we're sunk. So we have
> +             * to get that second field now.
> +             * Interlaced mpeg2 is an example of this.
> +             */
> +            av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Trying to get second field.\n");
> +            while (1) {
> +                ret = DtsGetDriverStatus(dev, &decoder_status);
> +                if (ret == BC_STS_SUCCESS && decoder_status.ReadyListCount > 0) {
> +                    rec_ret = receive_frame(avctx, data, data_size, 1);
> +                    if ((rec_ret == 0 && *data_size > 0) || rec_ret == BC_STS_BUSY)
> +                        break;
> +                }
> +                usleep(10000);
> +            }
> +            av_log(avctx, AV_LOG_VERBOSE, "CrystalHD: Got second field.\n");
> +        }
> +    } else if (rec_ret == RET_SKIP_NEXT_COPY) {
> +        /*
> +         * Two input packets got turned into a field pair. Gawd.
> +         */
> +        av_log(avctx, AV_LOG_VERBOSE, "Don't output on next decode call.\n");
> +        priv->skip_next_output = 1;
> +    } else if (rec_ret == RET_COPY_AGAIN) {
> +        /*
> +         * This means we got a FMT_CHANGE event and no frame, so go around
> +         * again to get the frame.
> +         */
> +        receive_frame(avctx, data, data_size, 0);

No check on the error code this time? what if that fails too?

> +    }
> +    usleep(10000);

This sleep, particularly, looks weird. We're returning to the caller,
but wait before doing it. Why?

> +    return avpkt->size;
> +}
> +
> +

For the following AVCodec definitions, shouldn't they depend on config.h defines at some point?
e.g.:

#if CONFIG_H264_CRYSTALHD_DECODER
> +AVCodec ff_h264_crystalhd_decoder = {
> +    "h264_crystalhd",
> +    AVMEDIA_TYPE_VIDEO,
> +    CODEC_ID_H264,
> +    sizeof(CHDContext),
> +    init,
> +    NULL,
> +    uninit,
> +    decode,
> +    CODEC_CAP_DR1 | CODEC_CAP_DELAY,
> +    .flush = flush,
> +    .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (CrystalHD acceleration)"),
> +    .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUYV422, PIX_FMT_NONE},
> +};
> +

#endif
and so on

-- 
Ben