[Libav-user] Real time desktop capture and encoding

Tom Hoxey tom at flaneer.com
Thu Sep 1 17:22:40 EEST 2022


 As part of a larger project I want to be able to capture and encode the
desktop frame by frame in real time. I have the following test code to
reproduce the issue shown in the screenshot that can be viewed on the stack
overflow question here
https://stackoverflow.com/questions/73570932/capture-and-encode-desktop-with-libav-in-real-time-not-giving-corect-images:


#include <stdlib.h>#include <stdio.h>#include <iostream>#include
<fstream>#include <string>#include <string.h>#include <math.h>
extern "C"
{#include "libavdevice/avdevice.h"#include
"libavutil/channel_layout.h"#include "libavutil/mathematics.h"#include
"libavutil/opt.h"#include "libavformat/avformat.h"#include
"libswscale/swscale.h"
}

/* 5 seconds stream duration */#define STREAM_DURATION   5.0#define
STREAM_FRAME_RATE 25 /* 25 images/s */#define STREAM_NB_FRAMES
((int)(STREAM_DURATION * STREAM_FRAME_RATE))#define STREAM_PIX_FMT
AV_PIX_FMT_YUV420P /* default pix_fmt */
int videoStreamIndx;int framerate = 30;
int width = 1920;int height = 1080;
int encPacketCounter;

AVFormatContext* ifmtCtx;
AVCodecContext* avcodecContx;
AVFormatContext* ofmtCtx;
AVStream* videoStream;
AVCodecContext* avCntxOut;
AVPacket* avPkt;
AVFrame* avFrame;
AVFrame* outFrame;
SwsContext* swsCtx;

std::ofstream fs;

AVDictionary* ConfigureScreenCapture(){

    AVDictionary* options = NULL;
    //Try adding "-rtbufsize 100M" as in
https://stackoverflow.com/questions/6766333/capture-windows-screen-with-ffmpeg
    av_dict_set(&options, "rtbufsize", "100M", 0);
    av_dict_set(&options, "framerate", std::to_string(framerate).c_str(), 0);
    char buffer[16];
    sprintf(buffer, "%dx%d", width, height);
    av_dict_set(&options, "video_size", buffer, 0);
    return options;
}
AVCodecParameters* ConfigureAvCodec(){
    AVCodecParameters* av_codec_par_out = avcodec_parameters_alloc();
    av_codec_par_out->width = width;
    av_codec_par_out->height = height;
    av_codec_par_out->bit_rate = 40000;
    av_codec_par_out->codec_id = AV_CODEC_ID_H264;
//AV_CODEC_ID_MPEG4; //Try H.264 instead of MPEG4
    av_codec_par_out->codec_type = AVMEDIA_TYPE_VIDEO;
    av_codec_par_out->format = 0;
    return av_codec_par_out;
}
int GetVideoStreamIndex(){
    int VideoStreamIndx = -1;
    avformat_find_stream_info(ifmtCtx, NULL);
    /* find the first video stream index . Also there is an API
available to do the below operations */
    for (int i = 0; i < (int)ifmtCtx->nb_streams; i++) // find video
stream position/index.
    {
        if (ifmtCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
        {
            VideoStreamIndx = i;
            break;
        }
    }

    if (VideoStreamIndx == -1)
    {
    }

    return VideoStreamIndx;
}
void CreateFrames(AVCodecParameters* av_codec_par_in,
AVCodecParameters* av_codec_par_out){

    avFrame = av_frame_alloc();
    avFrame->width = avcodecContx->width;
    avFrame->height = avcodecContx->height;
    avFrame->format = av_codec_par_in->format;
    av_frame_get_buffer(avFrame, 0);

    outFrame = av_frame_alloc();
    outFrame->width = avCntxOut->width;
    outFrame->height = avCntxOut->height;
    outFrame->format = av_codec_par_out->format;
    av_frame_get_buffer(outFrame, 0);
}
bool Init(){
    AVCodecParameters* avCodecParOut = ConfigureAvCodec();

    AVDictionary* options = ConfigureScreenCapture();

    AVInputFormat* ifmt = av_find_input_format("gdigrab");
    auto ifmtCtxLocal = avformat_alloc_context();
    if (avformat_open_input(&ifmtCtxLocal, "desktop", ifmt, &options) < 0)
    {
        return false;
    }
    ifmtCtx = ifmtCtxLocal;

    videoStreamIndx = GetVideoStreamIndex();

    AVCodecParameters* avCodecParIn = avcodec_parameters_alloc();
    avCodecParIn = ifmtCtx->streams[videoStreamIndx]->codecpar;

    AVCodec* avCodec = avcodec_find_decoder(avCodecParIn->codec_id);
    if (avCodec == NULL)
    {
        return false;
    }

    avcodecContx = avcodec_alloc_context3(avCodec);
    if (avcodec_parameters_to_context(avcodecContx, avCodecParIn) < 0)
    {
        return false;
    }

    //av_dict_set
    int value = avcodec_open2(avcodecContx, avCodec, NULL);
//Initialize the AVCodecContext to use the given AVCodec.
    if (value < 0)
    {
        return false;
    }

    AVOutputFormat* ofmt = av_guess_format("h264", NULL, NULL);

    if (ofmt == NULL)
    {
        return false;
    }

    auto ofmtCtxLocal = avformat_alloc_context();
    avformat_alloc_output_context2(&ofmtCtxLocal, ofmt, NULL, NULL);
    if (ofmtCtxLocal == NULL)
    {
        return false;
    }
    ofmtCtx = ofmtCtxLocal;

    AVCodec* avCodecOut = avcodec_find_encoder(avCodecParOut->codec_id);
    if (avCodecOut == NULL)
    {
        return false;
    }

    videoStream = avformat_new_stream(ofmtCtx, avCodecOut);
    if (videoStream == NULL)
    {
        return false;
    }

    avCntxOut = avcodec_alloc_context3(avCodecOut);
    if (avCntxOut == NULL)
    {
        return false;
    }

    if (avcodec_parameters_copy(videoStream->codecpar, avCodecParOut) < 0)
    {
        return false;
    }

    if (avcodec_parameters_to_context(avCntxOut, avCodecParOut) < 0)
    {
        return false;
    }

    avCntxOut->gop_size = 30; //3; //Use I-Frame frame every 30 frames.
    avCntxOut->max_b_frames = 0;
    avCntxOut->time_base.num = 1;
    avCntxOut->time_base.den = framerate;

    //avio_open(&ofmtCtx->pb, "", AVIO_FLAG_READ_WRITE);

    if (avformat_write_header(ofmtCtx, NULL) < 0)
    {
        return false;
    }

    value = avcodec_open2(avCntxOut, avCodecOut, NULL); //Initialize
the AVCodecContext to use the given AVCodec.
    if (value < 0)
    {
        return false;
    }

    if (avcodecContx->codec_id == AV_CODEC_ID_H264)
    {
        av_opt_set(avCntxOut->priv_data, "preset", "ultrafast", 0);
        av_opt_set(avCntxOut->priv_data, "zerolatency", "1", 0);
        av_opt_set(avCntxOut->priv_data, "tune", "ull", 0);
    }

    if ((ofmtCtx->oformat->flags & AVFMT_GLOBALHEADER) != 0)
    {
        avCntxOut->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
    }

    CreateFrames(avCodecParIn, avCodecParOut);

    swsCtx = sws_alloc_context();
    if (sws_init_context(swsCtx, NULL, NULL) < 0)
    {
        return false;
    }

    swsCtx = sws_getContext(avcodecContx->width, avcodecContx->height,
avcodecContx->pix_fmt,
        avCntxOut->width, avCntxOut->height, avCntxOut->pix_fmt,
SWS_FAST_BILINEAR,
        NULL, NULL, NULL);
    if (swsCtx == NULL)
    {
        return false;
    }

    return true;
}
void Encode(AVCodecContext* enc_ctx, AVFrame* frame, AVPacket* pkt){
    int ret;

    /* send the frame to the encoder */
    ret = avcodec_send_frame(enc_ctx, frame);
    if (ret < 0)
    {
        return;
    }

    while (ret >= 0)
    {
        ret = avcodec_receive_packet(enc_ctx, pkt);
        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
            return;
        if (ret < 0)
        {
            return;
        }

        fs.write((char*)pkt->data, pkt->size);
        av_packet_unref(pkt);
    }
}
void EncodeFrames(int noFrames){
    int frameCount = 0;
    avPkt = av_packet_alloc();
    AVPacket* outPacket = av_packet_alloc();
    encPacketCounter = 0;

    while (av_read_frame(ifmtCtx, avPkt) >= 0)
    {
        if (frameCount++ == noFrames)
            break;
        if (avPkt->stream_index != videoStreamIndx) continue;

        avcodec_send_packet(avcodecContx, avPkt);

        if (avcodec_receive_frame(avcodecContx, avFrame) >= 0) //
Frame successfully decoded :)
        {
            outPacket->data = NULL; // packet data will be allocated
by the encoder
            outPacket->size = 0;

            outPacket->pts = av_rescale_q(encPacketCounter,
avCntxOut->time_base, videoStream->time_base);
            if (outPacket->dts != AV_NOPTS_VALUE)
                outPacket->dts = av_rescale_q(encPacketCounter,
avCntxOut->time_base, videoStream->time_base);

            outPacket->dts = av_rescale_q(encPacketCounter,
avCntxOut->time_base, videoStream->time_base);
            outPacket->duration = av_rescale_q(1,
avCntxOut->time_base, videoStream->time_base);

            outFrame->pts = av_rescale_q(encPacketCounter,
avCntxOut->time_base, videoStream->time_base);
            outFrame->pkt_duration = av_rescale_q(encPacketCounter,
avCntxOut->time_base, videoStream->time_base);
            encPacketCounter++;

            int sts = sws_scale(swsCtx,
                avFrame->data, avFrame->linesize, 0, avFrame->height,
                outFrame->data, outFrame->linesize);

            /* make sure the frame data is writable */
            auto ret = av_frame_make_writable(outFrame);
            if (ret < 0)
                break;
            Encode(avCntxOut, outFrame, outPacket);
        }
        av_frame_unref(avFrame);
        av_packet_unref(avPkt);
    }
}
void Dispose(){
    fs.close();

    auto ifmtCtxLocal = ifmtCtx;
    avformat_close_input(&ifmtCtx);
    avformat_free_context(ifmtCtx);
    avcodec_free_context(&avcodecContx);

}
int main(int argc, char** argv){
    avdevice_register_all();

    fs.open("out.h264");

    if (Init())
    {
        EncodeFrames(300);
    }
    else
    {
        std::cout << "Failed to Init \n";
    }

    Dispose();

    return 0;
}

Thanks for any help in advance,
Tom
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://ffmpeg.org/pipermail/libav-user/attachments/20220901/2cbfb94f/attachment.htm>


More information about the Libav-user mailing list