[Libav-user] Multiple queries on filter_audio.c

amey jain amey.jain7 at gmail.com
Fri Jun 23 23:12:33 EEST 2017


Hi,
I have modified filter_audio.c a little and have used it with
av_decode_audio4, to get PCM decoded audio in float, mono sampled and
in 5512 Hz. I have used filter graph abuffer-->aformat-->abuffersink.
There are multiple problems I am facing now with file.
1. When I seek frames I don't get frames of required PTS in some of
videos of MPEG-TS stream. In some of *.ts formats it works.(in
function process_frame_by_pts)
2. When I collect frames from buffersink, I get their sample_rate from
frame->sample_rate still 48000 Hz, which should be 5512 Hz. Is that
supposed to be like that or I am wrong at conversion somewhere.
Any hint to problem will do. I was asked last time thats why I am
putting complete file down here.
Thanks

/* Intial file to extract audio from video   */
#include <stdio.h>
#include <libavfilter/avcodec.h>
#include <libavformat/avformat.h>
#include <math.h>
#include <unistd.h>
#include <libavutil/channel_layout.h>
#include <libavutil/opt.h>
#include <libavutil/samplefmt.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavutil/frame.h>
#include <libavformat/avformat.h>
#include <libavcodec/avcodec.h>
#include "av_decoder.h"
#include "config.h"

static AVFormatContext *fmt_ctx = NULL;
static AVCodecContext *dec_ctx;
AVFilterContext *buffersink_ctx;
AVFilterContext *buffersrc_ctx;
AVFilterGraph *filter_graph;
int audio_stream_index = -1;
static AVFilterGraph *graph=NULL;
static AVFilterContext *src=NULL,*sink=NULL;
enum AVSampleFormat INPUT_SAMPLE_FMT = -1;
uint64_t INPUT_SAMPLERATE = 0;
uint64_t INPUT_CHANNEL_LAYOUT = 0;
char *orig_video_name = NULL;
char *edited_video_name = NULL;

//taken from ffmpeg-filter_audio.c
https://www.ffmpeg.org/doxygen/2.2/filter_audio_8c-example.html
int init_filter_graph(AVFilterGraph **graph, AVFilterContext **src,
AVFilterContext **sink)
{
  AVFilterGraph *filter_graph;
  AVFilterContext *abuffer_ctx;
  AVFilter        *abuffer=NULL;
  AVFilterContext *resample_ctx;
  AVFilter        *resample;
  AVFilterContext *aformat_ctx;
  AVFilter        *aformat;
  AVFilterContext *abuffersink_ctx;
  AVFilter        *abuffersink;
  AVDictionary *options_dict = NULL;
  uint8_t options_str[1024];
  uint8_t ch_layout[64];
  int err;
  char errstr[256];
  /* Create a new filtergraph, which will contain all the filters. */
  filter_graph = avfilter_graph_alloc();
  if (!filter_graph) {
    fprintf(stderr, "Unable to create filter graph.\n");
    return AVERROR(ENOMEM);
  }

  /* Create the abuffer filter;
   * it will be used for feeding the data into the graph. */
  abuffer = avfilter_get_by_name("abuffer");
  if (abuffer == NULL) {
    fprintf(stderr, "Could not find the abuffer filter.\n");
    return AVERROR_FILTER_NOT_FOUND;
  }
  abuffer_ctx = avfilter_graph_alloc_filter(filter_graph, abuffer, "src");
  if (abuffer_ctx == NULL) {
    fprintf(stderr, "Could not allocate the abuffer instance.\n");
    return AVERROR(ENOMEM);
  }

  /* Set the filter options through the AVOptions API. */
  av_get_channel_layout_string(ch_layout, sizeof(ch_layout), 0,
INPUT_CHANNEL_LAYOUT);
  err = av_opt_set(abuffer_ctx, "channel_layout", ch_layout,
AV_OPT_SEARCH_CHILDREN);
  fprintf(stderr,"DEBUG: av_opt_set for channel_layout returned %d\n",err);
  err = av_opt_set(abuffer_ctx, "sample_fmt",
av_get_sample_fmt_name(INPUT_SAMPLE_FMT), AV_OPT_SEARCH_CHILDREN);
  fprintf(stderr,"DEBUG: av_opt_set for sample_fmt returned %d\n",err);
  err = av_opt_set_q(abuffer_ctx, "time_base", (AVRational){ 1,
INPUT_SAMPLERATE }, AV_OPT_SEARCH_CHILDREN);
  fprintf(stderr,"DEBUG: av_opt_set for time_base returned %d\n",err);
  err = av_opt_set_int(abuffer_ctx, "sample_rate", INPUT_SAMPLERATE,
AV_OPT_SEARCH_CHILDREN);
  fprintf(stderr,"DEBUG: av_opt_set for channel_layout returned %d\n",err);
  /* Now initialize the filter; we pass NULL options, since we have already
   * set all the options above. */
  err = avfilter_init_str(abuffer_ctx, NULL);
  if (err < 0) {
    fprintf(stderr, "Could not initialize the abuffer filter.\n");
    return err;
  }

  // Create resampling filter.
  resample = avfilter_get_by_name("aformat");
  if (!resample) {
    fprintf(stderr, "Could not find the aformat filter.\n");
    return AVERROR_FILTER_NOT_FOUND;
  }
  resample_ctx = avfilter_graph_alloc_filter(filter_graph, resample, "aformat");
  if (!resample_ctx) {
    fprintf(stderr, "Could not allocate the resample instance.\n");
    return AVERROR(ENOMEM);
  }

  // Set the filter options through the AVOptions API.
  av_get_channel_layout_string(ch_layout, sizeof(ch_layout), 0,
AV_CH_LAYOUT_MONO);
  if(! av_opt_set(resample_ctx, "channel_layout", ch_layout,
AV_OPT_SEARCH_CHILDREN)){
    fprintf(stderr,"channel layout for resample_ctx not set\n");
    return;
  }
  if(! av_opt_set(resample_ctx, "sample_fmt",
av_get_sample_fmt_name(AV_SAMPLE_FMT_FLT), AV_OPT_SEARCH_CHILDREN)){
    fprintf(stderr,"channel layout for resample_ctx not set\n");
    return;
  }
  av_opt_set_q(resample_ctx, "time_base", (AVRational){ 1, 5512 },
AV_OPT_SEARCH_CHILDREN);
  if(! av_opt_set_int(resample_ctx, "sample_rate", 5512,
AV_OPT_SEARCH_CHILDREN)){
    fprintf(stderr,"channel layout for resample_ctx not set\n");
    return;
  }
  err = avfilter_init_str(resample_ctx, NULL);
  if (err < 0) {
    fprintf(stderr, "Could not initialize the resampling filter.\n");
    return err;
  }

  /* Finally create the abuffersink filter;
   * it will be used to get the filtered data out of the graph. */
  abuffersink = avfilter_get_by_name("abuffersink");
  if (!abuffersink) {
    fprintf(stderr, "Could not find the abuffersink filter.\n");
    return AVERROR_FILTER_NOT_FOUND;
  }
  abuffersink_ctx = avfilter_graph_alloc_filter(filter_graph,
abuffersink, "sink");
  if (!abuffersink_ctx) {
    fprintf(stderr, "Could not allocate the abuffersink instance.\n");
    return AVERROR(ENOMEM);
  }
  /* This filter takes no options. */
  err = avfilter_init_str(abuffersink_ctx, NULL);
  if (err < 0) {
    fprintf(stderr, "Could not initialize the abuffersink instance.\n");
    return err;
  }
  /* Connect the filters;
   * in this simple case the filters just form a linear chain. */
   err = avfilter_link(abuffer_ctx, 0, resample_ctx, 0);
   if (err >= 0){
     err = avfilter_link(resample_ctx, 0, abuffersink_ctx, 0);
     if (err < 0) {
       fprintf(stderr, "Error connecting filters resample and
buffersink %d error\n",err);
       return err;
     }
   }
   else{
    fprintf(stderr, "Error connecting filters buffer src and resample
%d error\n",err);
    av_strerror(err, errstr, sizeof(errstr));
    fprintf(stderr, "%s\n", errstr);
    return err;
    }
  /* Configure the graph. */
  err = avfilter_graph_config(filter_graph, NULL);
  if (err < 0) {
    fprintf(stderr, "Error configuring the filter graph\n");
    return err;
  }
  *graph = filter_graph;
  *src   = abuffer_ctx;
  *sink  = abuffersink_ctx;
  return 0;
}

/*
 * Initialises decoder
 * To select only orig file pass 1 to file_select flag
 */
int init_decoder(char *filename1,char *filename2,uint8_t file_select)
{
  int err;
  if(file_select && filename1 != NULL){
    orig_video_name = filename1;
    err = open_input_file(0);
    if(err != 0){
      fprintf(stderr,"Not able to open input file\n");
      return err;
    }
  }
  else if(filename1 == NULL || filename2 == NULL && !file_select){
    fprintf(stderr,"ERROR: Unable to initialise decoder. Filename null\n");
    return -1;
  }
  else{
    orig_video_name = filename1;
    edited_video_name = filename2;
  }
  // add any other settings from outer file in this function
  err = init_filter_graph(&graph, &src, &sink);
  if(err != 0){
    fprintf(stderr,"ERROR: Not able to initialize input parameters %d\n",err);
    return err;
    }
  else {
    fprintf(stderr,"DEBUG: initialisation completed\n");
    return 1;
  }
  return 0;
}

void init_input_parameters(AVFrame *frame,AVCodecContext *dec_ctx)
{
  if(frame == NULL){
    fprintf(stderr,"ERROR:Frame is NULL\n");
    return;
  }
  if(dec_ctx == NULL){
    fprintf(stderr,"ERROR:AVCodec context NULL\n");
    return;
  }

  INPUT_CHANNEL_LAYOUT = frame->channel_layout;
  INPUT_SAMPLERATE = frame->sample_rate;
  INPUT_SAMPLE_FMT = dec_ctx->sample_fmt;
  if(!INPUT_CHANNEL_LAYOUT || !INPUT_SAMPLERATE || (INPUT_SAMPLE_FMT == -1)){
    fprintf(stderr,"ERROR:input parameters not set\n");
    return;
  }
}


/*
Opens input file and sets,initialises important context and parameters
args: file_select Selects name of the file to be opened from
orig_video_name(0) and edited_video_name(1)

 */
int open_input_file(uint8_t file_select)
{
  int ret;
  AVCodec *dec = NULL;
  AVDictionaryEntry *tag = NULL;
  // this parameter needs to be set taking parameters from CLI, taking
language number 1/2.
  uint8_t language = 0,i,j,got_frame;
  char *filename;
  AVFrame *frame = av_frame_alloc();
  int err,len;
  char errstr[128];
  AVPacket pkt;
  av_register_all();
  avfilter_register_all();
  //open input video file
  if(file_select == 0)
    filename = orig_video_name;
  else if(file_select == 1)
    filename = edited_video_name;
  else{
    fprintf(stderr,"ERROR: Invalid value for file_select flag\n");
    return -1;
  }
  if((ret = avformat_open_input(&fmt_ctx,filename,NULL,NULL)) < 0){
    printf("Unable to open %s\n",filename);
    return ret;
  }

  //print useful format information
  printf("Opening format:%s\nFile:%sTotal %d streams in
video\n",fmt_ctx->iformat->name,filename,fmt_ctx->nb_streams);
  if((ret == avformat_find_stream_info(fmt_ctx,NULL)) < 0){
    printf("Unable to find stream info\n");
    return ret;
  }

  //support for multiple audio streams(for multiple languages
possibly) if any in video;
  //if language parameter is not set by default eng is choosen else
first stream is selected;
  for(i = 0; i < fmt_ctx->nb_streams;i++){
    tag = NULL;
    if(fmt_ctx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO){
      tag = av_dict_get(fmt_ctx->streams[i]->metadata, "language",
tag, AV_DICT_IGNORE_SUFFIX);
      if(tag != NULL && strcmp(tag->value,"eng") == 0){ //language unset
    if(language == 0){
      audio_stream_index = i;
      break;
    }
      }
      else{
    audio_stream_index = i;
    break;
      }
    }
  }

  /* select audio stream and initialise decoder*/
  ret = av_find_best_stream(fmt_ctx,AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
  if(ret < 0){
    printf("Cannot find audio stream in input\n");
    return ret;
  }
  else if(dec == NULL)
    printf("Audio decoder not found\n");
  audio_stream_index = ret;
  dec_ctx = fmt_ctx->streams[audio_stream_index]->codec;
  printf("Selected audio stream:%d",audio_stream_index);
  printf("Time base unit:AVStream->time_base:
%lu/%lu\n",fmt_ctx->streams[audio_stream_index]->time_base.num,fmt_ctx->streams[audio_stream_index]->time_base.den);

  /* init audio decoder */
  if((ret = avcodec_open2(dec_ctx, dec, NULL)) < 0) {
    printf("Cannot open audio decoder\n");
    return ret;
  }
  while(1){ //read an audio frame for format specifications
    ret = av_read_frame(fmt_ctx, &pkt);
    if(ret < 0){
      printf("Unable to read frame:%d \n",ret);
      break;
    }
    if(pkt.stream_index == audio_stream_index){
      got_frame = 0;
      len = avcodec_decode_audio4(dec_ctx, frame, &got_frame, &pkt);
      if(len < 0){
    av_strerror(len,errstr,128);
    fprintf(stderr,"ERROR %s %d %s\n",__FUNCTION__,__LINE__,errstr);
      }
      else if(got_frame)
    break;
    }
  }

 /* Initialise filters */
 init_input_parameters(frame, dec_ctx);
 av_frame_free(&frame);
 return 0;
}

/*
Read frame sequence covering 1.5 seconds from time given and buffers
those frame.
Works like loop traversing frames to frames for a duration of 1.5
seconds. Resampling
function is called on frame itself then.

arg: time_to_seek_ms Time in miliseconds to get frame from
arg: index           Index of subtitle block
*/
void process_frame_by_pts(uint16_t index,int64_t time_to_seek_ms)
{
  int64_t num = fmt_ctx->streams[audio_stream_index]->time_base.num;
  int64_t den = fmt_ctx->streams[audio_stream_index]->time_base.den;
  int64_t duration;
  int64_t start_pts = ((float)time_to_seek_ms/1000) * (float)den/num;
// converted to timebase of audio stream
  int64_t end_pts = start_pts + ((float)(GRANUALITY/1000) * den/num);
  int i = 0, count = 0,temp = 0,out_count = 0,in_count = 0;
  uint8_t *OUTPUT_SAMPLES = NULL;
  AVPacket pkt;
  uint8_t *in;
  int got_frame,ret;
  AVFrame *frame = av_frame_alloc();
  int size,len,buf_size;
  uint64_t output_ch_layout = av_get_channel_layout("mono");
  enum AVSampleFormat src_sample_fmt;
  uint8_t *output_buffer = NULL;
  int err;
  char errstr[128];
  if(end_pts > fmt_ctx->streams[audio_stream_index]->duration){
    printf("Error: End PTS greater then duration of stream\n");
    return;
  }

 ret = av_seek_frame(fmt_ctx,audio_stream_index,start_pts,AVSEEK_FLAG_BACKWARD);
//get one frame before timing to cover all
 if( ret < 0 ){
   fprintf(stderr,"av_seek_frame failed with error code %d\n",ret);
   return;
 }
 fprintf(stdout,"Start PTS: %lu End PTS: %lu\n",start_pts,end_pts);

 // Problem is that output gets stored in uint8_t type whereas I want
output in float type
 do{ //outer do-while to read packets
    ret = av_read_frame(fmt_ctx, &pkt);
    if(ret < 0){
      printf("Unable to read frame:%d \n",ret);
      break;
    }
    if(pkt.stream_index == audio_stream_index){ // processing audio packets
      size = pkt.size;
      while(size > 0){ // inner while to decode frames, if more than
one are present in a single packet
    got_frame = 0;
    len = avcodec_decode_audio4(dec_ctx, frame, &got_frame, &pkt);
    if(len < 0){
      printf("Error while decoding\n");
    }
    if(got_frame){
      err = av_buffersrc_add_frame(src, frame);
      if(err < 0) {
        av_frame_unref(frame);
        fprintf(stderr,"Error adding frame to source buffer\n");
        return;
      }
      size = size - len;
    }
      }
    }
 }while(frame->pts < end_pts);

 while((err = av_buffersink_get_frame(sink,frame)) >= 0){
   if(err < 0){
     av_strerror(err,errstr,128);
     fprintf(stderr,"av_buffer_get_frame returns %d %s\n",err,errstr);
     break;
   }
   fprintf(stdout,"frame_pts %lu frame->duration(PTS) %d
frame->duration: %f with sample rate as:%d number of samples
%d\n",frame->pts,frame->pkt_duration,((double)frame->pkt_duration/frame->sample_rate)
* 1000,frame->sample_rate,frame->nb_samples);// here is problem of
sample rate
 }

 av_frame_free(&frame);
 return;
}

void close_filter()
{
  avfilter_graph_free(&graph);
}


More information about the Libav-user mailing list