[FFmpeg-devel] [PATCH v2 6/6] ffv1enc: add a Vulkan encoder
Lynne
dev at lynne.ee
Mon Nov 11 05:40:18 EET 2024
This commit implements a standard, compliant, version 3 and version 4
FFv1 encoder, entirely in Vulkan. The encoder is written in standard
GLSL and requires a Vulkan 1.3 supporting GPU with the BDA extension.
The encoder can use any amount of slices, but nominally, should use
32x32 slices (1024 in total) to maximize parallelism.
All features are supported, as well as all pixel formats.
This includes:
- Rice
- Range coding with a custom quantization table
- PCM encoding
CRC calculation is also massively parallelized on the GPU.
Encoding of unaligned dimensions on subsampled data requires
version 4, or requires oversizing the image to 64-pixel alignment
and cropping out the padding via container flags.
---
configure | 1 +
libavcodec/Makefile | 1 +
libavcodec/allcodecs.c | 1 +
libavcodec/ffv1enc.c | 2 +-
libavcodec/ffv1enc_vulkan.c | 1378 ++++++++++++++++++++++++
libavcodec/vulkan/Makefile | 7 +
libavcodec/vulkan/common.comp | 173 +++
libavcodec/vulkan/ffv1_common.comp | 77 ++
libavcodec/vulkan/ffv1_enc.comp | 57 +
libavcodec/vulkan/ffv1_enc_ac.comp | 83 ++
libavcodec/vulkan/ffv1_enc_common.comp | 101 ++
libavcodec/vulkan/ffv1_enc_rct.comp | 53 +
libavcodec/vulkan/ffv1_enc_rgb.comp | 70 ++
libavcodec/vulkan/ffv1_enc_setup.comp | 182 ++++
libavcodec/vulkan/ffv1_enc_vlc.comp | 113 ++
libavcodec/vulkan/ffv1_vlc.comp | 122 +++
libavcodec/vulkan/rangecoder.comp | 189 ++++
17 files changed, 2609 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/ffv1enc_vulkan.c
create mode 100644 libavcodec/vulkan/common.comp
create mode 100644 libavcodec/vulkan/ffv1_common.comp
create mode 100644 libavcodec/vulkan/ffv1_enc.comp
create mode 100644 libavcodec/vulkan/ffv1_enc_ac.comp
create mode 100644 libavcodec/vulkan/ffv1_enc_common.comp
create mode 100644 libavcodec/vulkan/ffv1_enc_rct.comp
create mode 100644 libavcodec/vulkan/ffv1_enc_rgb.comp
create mode 100644 libavcodec/vulkan/ffv1_enc_setup.comp
create mode 100644 libavcodec/vulkan/ffv1_enc_vlc.comp
create mode 100644 libavcodec/vulkan/ffv1_vlc.comp
create mode 100644 libavcodec/vulkan/rangecoder.comp
diff --git a/configure b/configure
index 0e9ed6dc3c..d0954fc7d9 100755
--- a/configure
+++ b/configure
@@ -2951,6 +2951,7 @@ exr_decoder_deps="zlib"
exr_encoder_deps="zlib"
ffv1_decoder_select="rangecoder"
ffv1_encoder_select="rangecoder"
+ffv1_vulkan_encoder_select="vulkan spirv_compiler"
ffvhuff_decoder_select="huffyuv_decoder"
ffvhuff_encoder_select="huffyuv_encoder"
fic_decoder_select="golomb"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 676ff542af..a6e0e0b55e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -370,6 +370,7 @@ OBJS-$(CONFIG_EXR_ENCODER) += exrenc.o float2half.o
OBJS-$(CONFIG_FASTAUDIO_DECODER) += fastaudio.o
OBJS-$(CONFIG_FFV1_DECODER) += ffv1dec.o ffv1.o
OBJS-$(CONFIG_FFV1_ENCODER) += ffv1enc.o ffv1.o
+OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += ffv1enc.o ffv1.o ffv1enc_vulkan.o
OBJS-$(CONFIG_FFWAVESYNTH_DECODER) += ffwavesynth.o
OBJS-$(CONFIG_FIC_DECODER) += fic.o
OBJS-$(CONFIG_FITS_DECODER) += fitsdec.o fits.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index d8a5866435..0b559dfc58 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -116,6 +116,7 @@ extern const FFCodec ff_escape130_decoder;
extern const FFCodec ff_exr_encoder;
extern const FFCodec ff_exr_decoder;
extern const FFCodec ff_ffv1_encoder;
+extern const FFCodec ff_ffv1_vulkan_encoder;
extern const FFCodec ff_ffv1_decoder;
extern const FFCodec ff_ffvhuff_encoder;
extern const FFCodec ff_ffvhuff_decoder;
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 7572594f3e..8c0d1dcdc5 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -853,7 +853,7 @@ av_cold int ff_ffv1_encode_setup_plane_info(AVCodecContext *avctx,
}
av_assert0(s->bits_per_raw_sample >= 8);
- return av_pix_fmt_get_chroma_sub_sample (avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
+ return av_pix_fmt_get_chroma_sub_sample (pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
}
static int encode_init_internal(AVCodecContext *avctx)
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
new file mode 100644
index 0000000000..b73161f9ff
--- /dev/null
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -0,0 +1,1378 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/crc.h"
+#include "libavutil/vulkan.h"
+#include "libavutil/vulkan_spirv.h"
+
+#include "avcodec.h"
+#include "hwconfig.h"
+#include "encode.h"
+#include "libavutil/opt.h"
+#include "codec_internal.h"
+
+#include "ffv1.h"
+#include "ffv1enc.h"
+
+/* Parallel Golomb alignment */
+#define LG_ALIGN_W 32
+#define LG_ALIGN_H 32
+
+typedef struct VulkanEncodeFFv1Context {
+ FFV1Context ctx;
+
+ FFVulkanContext s;
+ FFVkQueueFamilyCtx qf;
+ FFVkExecPool exec_pool;
+
+ FFVulkanShader setup;
+ FFVulkanShader rct;
+ FFVulkanShader enc;
+
+ /* Constant read-only buffers */
+ FFVkBuffer quant_buf;
+ FFVkBuffer rangecoder_static_buf;
+ FFVkBuffer crc_tab_buf;
+
+ /* Slice data buffer pool */
+ AVBufferPool *slice_data_pool;
+ AVBufferRef *keyframe_slice_data_ref;
+
+ /* Output data buffer */
+ AVBufferPool *out_data_pool;
+
+ /* Temporary data buffer */
+ AVBufferPool *tmp_data_pool;
+
+ /* Slice results buffer */
+ AVBufferPool *results_data_pool;
+
+ /* Intermediate frame pool */
+ AVBufferRef *intermediate_frames_ref;
+
+ /* Representation mode */
+ enum FFVkShaderRepFormat rep_fmt;
+
+ int num_h_slices;
+ int num_v_slices;
+
+ int is_rgb;
+ int ppi;
+ int chunks;
+} VulkanEncodeFFv1Context;
+
+extern const char *ff_source_common_comp;
+extern const char *ff_source_rangecoder_comp;
+extern const char *ff_source_ffv1_vlc_comp;
+extern const char *ff_source_ffv1_common_comp;
+extern const char *ff_source_ffv1_enc_common_comp;
+extern const char *ff_source_ffv1_enc_rct_comp;
+extern const char *ff_source_ffv1_enc_vlc_comp;
+extern const char *ff_source_ffv1_enc_ac_comp;
+extern const char *ff_source_ffv1_enc_setup_comp;
+extern const char *ff_source_ffv1_enc_comp;
+extern const char *ff_source_ffv1_enc_rgb_comp;
+
+typedef struct FFv1VkRCTParameters {
+ VkDeviceAddress slice_data;
+ uint8_t bits;
+ uint8_t padding[3];
+} FFv1VkRCTParameters;
+
+typedef struct FFv1VkParameters {
+ VkDeviceAddress slice_data;
+ VkDeviceAddress scratch_data;
+ VkDeviceAddress out_data;
+
+ int32_t sar[2];
+ uint32_t chroma_shift[2];
+
+ uint32_t slice_size_max;
+ uint32_t slice_count;
+ uint32_t context_count;
+ uint32_t crcref;
+
+ uint8_t bits_per_raw_sample;
+ uint8_t context_model;
+ uint8_t version;
+ uint8_t micro_version;
+ uint8_t force_pcm;
+ uint8_t key_frame;
+ uint8_t planes;
+ uint8_t codec_planes;
+ uint8_t transparency;
+ uint8_t colorspace;
+ uint8_t pic_mode;
+ uint8_t ec;
+ uint8_t ppi;
+ uint8_t chunks;
+ uint8_t padding[2];
+} FFv1VkParameters;
+
+static void add_push_data(FFVulkanShader *shd)
+{
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, SliceContext slice_ctx; );
+ GLSLC(1, u8buf scratch_data; );
+ GLSLC(1, u8buf out_data; );
+ GLSLC(0, );
+ GLSLC(1, ivec2 sar; );
+ GLSLC(1, uvec2 chroma_shift; );
+ GLSLC(0, );
+ GLSLC(1, uint slice_size_max; );
+ GLSLC(1, uint slice_count; );
+ GLSLC(1, uint context_count; );
+ GLSLC(1, uint32_t crcref; );
+ GLSLC(0, );
+ GLSLC(1, uint8_t bits_per_raw_sample; );
+ GLSLC(1, uint8_t context_model; );
+ GLSLC(1, uint8_t version; );
+ GLSLC(1, uint8_t micro_version; );
+ GLSLC(1, uint8_t force_pcm; );
+ GLSLC(1, uint8_t key_frame; );
+ GLSLC(1, uint8_t planes; );
+ GLSLC(1, uint8_t codec_planes; );
+ GLSLC(1, uint8_t transparency; );
+ GLSLC(1, uint8_t colorspace; );
+ GLSLC(1, uint8_t pic_mode; );
+ GLSLC(1, uint8_t ec; );
+ GLSLC(1, uint8_t ppi; );
+ GLSLC(1, uint8_t chunks; );
+ GLSLC(1, uint8_t padding[2]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static int run_rct(AVCodecContext *avctx, FFVkExecContext *exec,
+ AVFrame *enc_in, VkImageView *enc_in_views,
+ AVFrame **intermediate_frame, VkImageView *intermediate_views,
+ VkImageMemoryBarrier2 *img_bar, int *nb_img_bar,
+ VkBufferMemoryBarrier2 *buf_bar, int *nb_buf_bar,
+ FFVkBuffer *slice_data_buf)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVulkanFunctions *vk = &fv->s.vkfn;
+
+ FFv1VkRCTParameters pd;
+
+ /* Create a temporaty frame */
+ *intermediate_frame = av_frame_alloc();
+ if (!(*intermediate_frame))
+ return AVERROR(ENOMEM);
+
+ RET(av_hwframe_get_buffer(fv->intermediate_frames_ref,
+ *intermediate_frame, 0));
+
+ RET(ff_vk_exec_add_dep_frame(&fv->s, exec, *intermediate_frame,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+ RET(ff_vk_create_imageviews(&fv->s, exec, intermediate_views,
+ *intermediate_frame,
+ fv->rep_fmt));
+
+ /* Update descriptors */
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
+ enc_in, enc_in_views,
+ 0, 0,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->rct,
+ *intermediate_frame, intermediate_views,
+ 0, 1,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ /* Prep the input/output images */
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = *nb_img_bar,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = *nb_buf_bar,
+ });
+ *nb_img_bar = 0;
+ if (*nb_buf_bar) {
+ slice_data_buf->stage = buf_bar[0].dstStageMask;
+ slice_data_buf->access = buf_bar[0].dstAccessMask;
+ *nb_buf_bar = 0;
+ }
+
+ /* Run the shader */
+ ff_vk_exec_bind_shader(&fv->s, exec, &fv->rct);
+ pd = (FFv1VkRCTParameters) {
+ .slice_data = slice_data_buf->address,
+ .bits = f->bits_per_raw_sample,
+ };
+ ff_vk_shader_update_push_const(&fv->s, exec, &fv->rct,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+
+ vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+
+ /* Add a post-dispatch barrier before encoding */
+ ff_vk_frame_barrier(&fv->s, exec, *intermediate_frame, img_bar, nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+fail:
+ return err;
+}
+
+static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
+ const AVFrame *pict, int *got_packet)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVulkanFunctions *vk = &fv->s.vkfn;
+ FFVkExecContext *exec;
+
+ FFv1VkParameters pd;
+
+ AVFrame *intermediate_frame = NULL;
+
+ /* Temporary data */
+ size_t tmp_data_size;
+ AVBufferRef *tmp_data_ref;
+ FFVkBuffer *tmp_data_buf;
+
+ /* Slice data */
+ AVBufferRef *slice_data_ref;
+ FFVkBuffer *slice_data_buf;
+
+ /* Output data */
+ size_t maxsize;
+ AVBufferRef *out_data_ref;
+ FFVkBuffer *out_data_buf;
+ uint8_t *buf_p;
+
+ /* Results data */
+ AVBufferRef *results_data_ref;
+ FFVkBuffer *results_data_buf;
+ uint32_t *sc;
+
+ int has_inter = avctx->gop_size > 1;
+ uint32_t context_count = f->context_count[f->context_model];
+
+ VkImageView in_views[AV_NUM_DATA_POINTERS];
+ VkImageView intermediate_views[AV_NUM_DATA_POINTERS];
+
+ AVFrame *enc_in = (AVFrame *)pict;
+ VkImageView *enc_in_views = in_views;
+
+ VkMappedMemoryRange invalidate_data[2];
+ int nb_invalidate_data = 0;
+
+ VkImageMemoryBarrier2 img_bar[37];
+ int nb_img_bar = 0;
+ VkBufferMemoryBarrier2 buf_bar[8];
+ int nb_buf_bar = 0;
+
+ if (!pict)
+ return 0;
+
+ exec = ff_vk_exec_get(&fv->s, &fv->exec_pool);
+ ff_vk_exec_start(&fv->s, exec);
+
+ /* Frame state */
+ f->cur_enc_frame = pict;
+ if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
+ av_buffer_unref(&fv->keyframe_slice_data_ref);
+ f->key_frame = 1;
+ f->gob_count++;
+ } else {
+ f->key_frame = 0;
+ }
+
+ f->max_slice_count = f->num_h_slices * f->num_v_slices;
+ f->slice_count = f->max_slice_count;
+
+ /* Allocate temporary data buffer */
+ tmp_data_size = f->slice_count*CONTEXT_SIZE;
+ err = ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
+ &tmp_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, tmp_data_size,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (err < 0)
+ return err;
+ tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data;
+
+ /* Allocate slice buffer data */
+ slice_data_ref = fv->keyframe_slice_data_ref;
+ if (!slice_data_ref) {
+ /* Slice data buffer size */
+ uint64_t slice_state_size;
+ if (f->ac == AC_GOLOMB_RICE)
+ slice_state_size = 8;
+ else
+ slice_state_size = CONTEXT_SIZE;
+
+ slice_state_size *= context_count;
+ slice_state_size *= f->plane_count; /* Codec planes */
+ slice_state_size += 256; /* Overestimation for the SliceContext struct */
+
+ /* All is per-slice */
+ slice_state_size *= f->slice_count;
+
+ /* Allocate slice data buffer */
+ err = ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool,
+ &slice_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, slice_state_size,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (err < 0)
+ return err;
+
+ /* Only save it if we're going to use it again */
+ if (has_inter)
+ fv->keyframe_slice_data_ref = slice_data_ref;
+ }
+ slice_data_buf = (FFVkBuffer *)slice_data_ref->data;
+
+ /* Allocate results buffer */
+ err = ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool,
+ &results_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, 2048*4,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0)
+ return err;
+ results_data_buf = (FFVkBuffer *)results_data_ref->data;
+
+ /* Output buffer size */
+ maxsize = avctx->width*avctx->height*(1 + f->transparency);
+ if (f->chroma_planes)
+ maxsize += AV_CEIL_RSHIFT(avctx->width, f->chroma_h_shift) *
+ AV_CEIL_RSHIFT(f->height, f->chroma_v_shift)*2;
+ maxsize += f->slice_count * 800;
+ if (f->version > 3) {
+ maxsize *= f->bits_per_raw_sample + 1;
+ } else {
+ maxsize += f->slice_count * 2 * (avctx->width + avctx->height);
+ maxsize *= 8*(2*f->bits_per_raw_sample + 5);
+ }
+ maxsize >>= 3;
+ maxsize += FF_INPUT_BUFFER_MIN_SIZE;
+
+ if (maxsize > INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE - 32) {
+ av_log(avctx, AV_LOG_WARNING, "Cannot allocate worst case packet size, "
+ "the encoding could fail\n");
+ maxsize = INT_MAX - AV_INPUT_BUFFER_PADDING_SIZE - 32;
+ }
+
+ /* Allocate output buffer */
+ err = ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool,
+ &out_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, maxsize,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+ if (err < 0)
+ return err;
+
+ out_data_buf = (FFVkBuffer *)out_data_ref->data;
+ pkt->data = out_data_buf->mapped_mem;
+ pkt->size = out_data_buf->size;
+ pkt->buf = out_data_ref;
+
+ /* Add dependencies */
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &results_data_ref, 1, 0);
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &out_data_ref, 1, 1);
+ RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
+
+ RET(ff_vk_create_imageviews(&fv->s, exec, enc_in_views, enc_in,
+ fv->rep_fmt));
+ ff_vk_frame_barrier(&fv->s, exec, enc_in, img_bar, &nb_img_bar,
+ VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+ VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_QUEUE_FAMILY_IGNORED);
+
+ /* Setup shader needs the original input */
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->setup,
+ enc_in, enc_in_views,
+ 0, 0,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ /* Add a buffer barrier between previous and current frame */
+ if (!f->key_frame) {
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_data_buf->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_data_buf->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_data_buf->buf,
+ .size = VK_WHOLE_SIZE,
+ .offset = 0,
+ };
+ }
+
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ nb_img_bar = 0;
+ if (nb_buf_bar) {
+ slice_data_buf->stage = buf_bar[0].dstStageMask;
+ slice_data_buf->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+ }
+
+ /* Run setup shader */
+ ff_vk_exec_bind_shader(&fv->s, exec, &fv->setup);
+ pd = (FFv1VkParameters) {
+ .slice_data = slice_data_buf->address,
+ .scratch_data = tmp_data_buf->address,
+ .out_data = out_data_buf->address,
+ .slice_size_max = out_data_buf->size / f->slice_count,
+ .slice_count = f->slice_count,
+ .bits_per_raw_sample = f->bits_per_raw_sample,
+ .sar[0] = pict->sample_aspect_ratio.num,
+ .sar[1] = pict->sample_aspect_ratio.den,
+ .chroma_shift[0] = f->chroma_h_shift,
+ .chroma_shift[1] = f->chroma_v_shift,
+ .context_count = context_count,
+ .crcref = f->crcref,
+ .context_model = fv->ctx.context_model,
+ .version = f->version,
+ .micro_version = f->micro_version,
+ .force_pcm = 0,
+ .key_frame = f->key_frame,
+ .planes = av_pix_fmt_count_planes(avctx->sw_pix_fmt),
+ .codec_planes = f->plane_count,
+ .transparency = f->transparency,
+ .colorspace = f->colorspace,
+ .pic_mode = !(pict->flags & AV_FRAME_FLAG_INTERLACED) ? 3 :
+ !(pict->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) ? 2 : 1,
+ .ec = f->ec,
+ .ppi = fv->ppi,
+ .chunks = fv->chunks,
+ };
+ ff_vk_shader_update_push_const(&fv->s, exec, &fv->setup,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+ vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+
+ /* Setup shader modified the slice data buffer */
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = slice_data_buf->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
+ .srcAccessMask = slice_data_buf->access,
+ .dstAccessMask = VK_ACCESS_2_SHADER_STORAGE_READ_BIT |
+ VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = slice_data_buf->buf,
+ .size = VK_WHOLE_SIZE,
+ .offset = 0,
+ };
+
+ /* Run RCT shader */
+ if (fv->is_rgb) {
+ RET(run_rct(avctx, exec,
+ enc_in, enc_in_views,
+ &intermediate_frame, intermediate_views,
+ img_bar, &nb_img_bar, buf_bar, &nb_buf_bar,
+ slice_data_buf));
+
+ /* Use the new frame */
+ enc_in = intermediate_frame;
+ enc_in_views = intermediate_views;
+ }
+
+ /* Final barrier before encoding */
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pImageMemoryBarriers = img_bar,
+ .imageMemoryBarrierCount = nb_img_bar,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ nb_img_bar = 0;
+ if (nb_buf_bar) {
+ slice_data_buf->stage = buf_bar[0].dstStageMask;
+ slice_data_buf->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+ }
+
+ /* Main encode shader */
+ ff_vk_shader_update_desc_buffer(&fv->s, exec,
+ &fv->enc, 0, 1, 0,
+ results_data_buf,
+ 0, results_data_buf->size,
+ VK_FORMAT_UNDEFINED);
+ ff_vk_shader_update_img_array(&fv->s, exec, &fv->enc,
+ enc_in, enc_in_views,
+ 0, 0,
+ VK_IMAGE_LAYOUT_GENERAL,
+ VK_NULL_HANDLE);
+
+ ff_vk_exec_bind_shader(&fv->s, exec, &fv->enc);
+ ff_vk_shader_update_push_const(&fv->s, exec, &fv->enc,
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, sizeof(pd), &pd);
+ vk->CmdDispatch(exec->buf, fv->ctx.num_h_slices, fv->ctx.num_v_slices, 1);
+
+ /* Submit */
+ err = ff_vk_exec_submit(&fv->s, exec);
+ if (err < 0)
+ return err;
+
+ /* We need the encoded data immediately */
+ ff_vk_exec_wait(&fv->s, exec);
+
+ /* Invalidate slice/output data if needed */
+ if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+ invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = results_data_buf->mem,
+ .offset = 0,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
+ invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = out_data_buf->mem,
+ .offset = 0,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (nb_invalidate_data)
+ vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
+ nb_invalidate_data, invalidate_data);
+
+ /* First slice is in-place */
+ buf_p = pkt->data;
+ sc = &((uint32_t *)results_data_buf->mapped_mem)[0];
+ av_log(avctx, AV_LOG_VERBOSE, "Slice size = %u (max %i), src offset = %u\n",
+ sc[0], pkt->size / f->slice_count, sc[1]);
+ av_assert0(sc[0] < pkt->size / f->slice_count);
+ av_assert0(sc[0] < (1 << 24));
+ buf_p += sc[0];
+
+ /* We have to copy the rest */
+ for (int i = 1; i < f->slice_count; i++) {
+ uint32_t bytes;
+ uint8_t *bs_start;
+
+ sc = &((uint32_t *)results_data_buf->mapped_mem)[i*2];
+ bytes = sc[0];
+ bs_start = pkt->data + sc[1];
+
+ av_log(avctx, AV_LOG_VERBOSE, "Slice size = %u (max %i), src offset = %u\n",
+ bytes, pkt->size / f->slice_count, sc[1]);
+ av_assert0(bytes < pkt->size / f->slice_count);
+ av_assert0(bytes < (1 << 24));
+
+ memmove(buf_p, bs_start, bytes);
+
+ buf_p += bytes;
+ }
+
+ f->picture_number++;
+ pkt->size = buf_p - pkt->data;
+ pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame;
+ *got_packet = 1;
+
+fail:
+ /* Frames added as a dep are always referenced, so we only need to
+ * clean this up. */
+ av_frame_free(&intermediate_frame);
+
+ return 0;
+}
+
+static int init_indirect(AVCodecContext *avctx, enum AVPixelFormat sw_format)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ AVHWFramesContext *frames_ctx;
+ AVVulkanFramesContext *vk_frames;
+
+ fv->intermediate_frames_ref = av_hwframe_ctx_alloc(fv->s.device_ref);
+ if (!fv->intermediate_frames_ref)
+ return AVERROR(ENOMEM);
+
+ frames_ctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
+ frames_ctx->format = AV_PIX_FMT_VULKAN;
+ frames_ctx->sw_format = sw_format;
+ frames_ctx->width = FFALIGN(fv->s.frames->width, 32);
+ frames_ctx->height = FFALIGN(fv->s.frames->height, 32);
+
+ vk_frames = frames_ctx->hwctx;
+ vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
+ vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT;
+ vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
+
+ err = av_hwframe_ctx_init(fv->intermediate_frames_ref);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n",
+ av_get_pix_fmt_name(sw_format), av_err2str(err));
+ av_buffer_unref(&fv->intermediate_frames_ref);
+ return err;
+ }
+
+ return 0;
+}
+
+static int check_support(AVHWFramesConstraints *constraints,
+ enum AVPixelFormat fmt)
+{
+ for (int i = 0; constraints->valid_sw_formats[i]; i++) {
+ if (constraints->valid_sw_formats[i] == fmt)
+ return 1;
+ }
+ return 0;
+}
+
+static enum AVPixelFormat get_supported_rgb_buffer_fmt(AVCodecContext *avctx)
+{
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ enum AVPixelFormat fmt;
+ AVHWFramesConstraints *constraints;
+ constraints = av_hwdevice_get_hwframe_constraints(fv->s.device_ref,
+ NULL);
+
+ /* What we'd like to optimally have */
+ fmt = fv->ctx.use32bit ?
+ (fv->ctx.transparency ? AV_PIX_FMT_RGBA128 : AV_PIX_FMT_RGB96) :
+ (fv->ctx.transparency ? AV_PIX_FMT_RGBA64 : AV_PIX_FMT_RGB48);
+ if (check_support(constraints, fmt))
+ goto end;
+
+ if (fv->ctx.use32bit) {
+ if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128)))
+ goto end;
+ } else {
+ if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA64)))
+ goto end;
+
+ if (!fv->ctx.transparency &&
+ check_support(constraints, (fmt = AV_PIX_FMT_RGB96)))
+ goto end;
+
+ if (check_support(constraints, (fmt = AV_PIX_FMT_RGBA128)))
+ goto end;
+ }
+
+ fmt = AV_PIX_FMT_NONE;
+
+end:
+ av_hwframe_constraints_free(&constraints);
+ return fmt;
+}
+
+static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
+{
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ int smp_bits = f->bits_per_raw_sample == 8 ? 16 : 32;
+
+ av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
+ av_bprintf(&shd->src, "#define MAX_PLANES %i\n" ,MAX_PLANES);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
+
+ if (f->ac == AC_GOLOMB_RICE)
+ av_bprintf(&shd->src, "#define GOLOMB\n" );
+
+ GLSLF(0, #define TYPE int%i_t ,smp_bits);
+ GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
+ GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
+ GLSLD(ff_source_common_comp);
+ GLSLD(ff_source_rangecoder_comp);
+
+ if (f->ac == AC_GOLOMB_RICE)
+ GLSLD(ff_source_ffv1_vlc_comp);
+
+ GLSLD(ff_source_ffv1_common_comp);
+}
+
+static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFVulkanShader *shd = &fv->setup;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ RET(ff_vk_shader_init(&fv->s, shd, "ffv1_setup",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ 1, 1, 1,
+ 0));
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "src",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
+ fv->rep_fmt),
+ .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0));
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ { /* This descriptor is never used */
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 0));
+
+ define_shared_code(avctx, shd);
+
+ add_push_data(shd);
+
+ GLSLD(ff_source_ffv1_enc_setup_comp );
+
+ RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFVulkanShader *shd = &fv->rct;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+ int wg_count = sqrt(fv->s.props.properties.limits.maxComputeWorkGroupInvocations);
+
+ enum AVPixelFormat intermediate_fmt = get_supported_rgb_buffer_fmt(avctx);
+ if (intermediate_fmt == AV_PIX_FMT_NONE) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to find a supported compatible "
+ "pixel format for RCT buffer!\n");
+ return AVERROR(ENOTSUP);
+ }
+
+ RET(init_indirect(avctx, intermediate_fmt));
+
+ RET(ff_vk_shader_init(&fv->s, shd, "ffv1_rct",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ wg_count, wg_count, 1,
+ 0));
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "src",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(fv->s.frames->sw_format,
+ fv->rep_fmt),
+ .elems = av_pix_fmt_count_planes(fv->s.frames->sw_format),
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ {
+ .name = "dst",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(intermediate_fmt,
+ fv->rep_fmt),
+ .elems = av_pix_fmt_count_planes(intermediate_fmt),
+ .mem_quali = "writeonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0));
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 1, 1));
+
+ define_shared_code(avctx, shd);
+
+ GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
+ GLSLC(1, SliceContext slice_ctx; );
+ GLSLC(1, uint8_t bits; );
+ GLSLC(1, uint8_t padding[3]; );
+ GLSLC(0, }; );
+ ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
+ VK_SHADER_STAGE_COMPUTE_BIT);
+
+ GLSLD(ff_source_ffv1_enc_rct_comp );
+
+ RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVulkanShader *shd = &fv->enc;
+ FFVulkanDescriptorSetBinding *desc_set;
+
+ AVHWFramesContext *frames_ctx = fv->intermediate_frames_ref ?
+ (AVHWFramesContext *)fv->intermediate_frames_ref->data :
+ fv->s.frames;
+
+ uint8_t *spv_data;
+ size_t spv_len;
+ void *spv_opaque = NULL;
+
+ RET(ff_vk_shader_init(&fv->s, shd, "ffv1_enc",
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ (const char *[]) { "GL_EXT_buffer_reference",
+ "GL_EXT_buffer_reference2" }, 2,
+ 1, 1, 1,
+ 0));
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "src",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .dimensions = 2,
+ .mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
+ fv->rep_fmt),
+ .elems = av_pix_fmt_count_planes(frames_ctx->sw_format),
+ .mem_quali = "readonly",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ },
+ {
+ .name = "results_data_buf",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_quali = "writeonly",
+ .buf_content = "uint32_t slice_results[2048];",
+ },
+ };
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0));
+
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
+ av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
+ av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
+
+ desc_set = (FFVulkanDescriptorSetBinding []) {
+ {
+ .name = "rangecoder_static_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint8_t zero_one_state[512];",
+ },
+ {
+ .name = "quant_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
+ "[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
+ },
+ {
+ .name = "crc_ieee_buf",
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .mem_layout = "scalar",
+ .buf_content = "uint32_t crc_ieee[256];",
+ },
+ };
+
+ RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 1, 0));
+
+ define_shared_code(avctx, shd);
+
+ add_push_data(shd);
+
+ /* Assemble the shader body */
+ GLSLD(ff_source_ffv1_enc_common_comp);
+
+ if (f->ac == AC_GOLOMB_RICE)
+ GLSLD(ff_source_ffv1_enc_vlc_comp);
+ else
+ GLSLD(ff_source_ffv1_enc_ac_comp);
+
+ if (fv->is_rgb)
+ GLSLD(ff_source_ffv1_enc_rgb_comp);
+ else
+ GLSLD(ff_source_ffv1_enc_comp);
+
+ RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
+ &spv_opaque));
+ RET(ff_vk_shader_link(&fv->s, shd, spv_data, spv_len, "main"));
+
+ RET(ff_vk_shader_register_exec(&fv->s, &fv->exec_pool, shd));
+
+fail:
+ if (spv_opaque)
+ spv->free_shader(spv, &spv_opaque);
+
+ return err;
+}
+
+static int init_state_transition_data(AVCodecContext *avctx)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ uint8_t *buf_mapped;
+ size_t buf_len = 512*sizeof(uint8_t);
+
+ RET(ff_vk_create_buf(&fv->s, &fv->rangecoder_static_buf,
+ buf_len,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffer(&fv->s, &fv->rangecoder_static_buf,
+ &buf_mapped, 0));
+
+ for (int i = 1; i < 256; i++) {
+ buf_mapped[256 + i] = fv->ctx.state_transition[i];
+ buf_mapped[256 - i] = 256 - (int)fv->ctx.state_transition[i];
+ }
+
+ RET(ff_vk_unmap_buffer(&fv->s, &fv->rangecoder_static_buf, 1));
+
+ /* Update descriptors */
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->setup, 1, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->enc, 1, 0, 0,
+ &fv->rangecoder_static_buf,
+ 0, fv->rangecoder_static_buf.size,
+ VK_FORMAT_UNDEFINED));
+
+fail:
+ return err;
+}
+
+static int init_quant_table_data(AVCodecContext *avctx)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ int16_t *buf_mapped;
+ size_t buf_len = MAX_QUANT_TABLES*
+ MAX_CONTEXT_INPUTS*
+ MAX_QUANT_TABLE_SIZE*sizeof(int16_t);
+
+ RET(ff_vk_create_buf(&fv->s, &fv->quant_buf,
+ buf_len,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffer(&fv->s, &fv->quant_buf, (void *)&buf_mapped, 0));
+
+ memcpy(buf_mapped, fv->ctx.quant_tables,
+ sizeof(fv->ctx.quant_tables));
+
+ RET(ff_vk_unmap_buffer(&fv->s, &fv->quant_buf, 1));
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->enc, 1, 1, 0,
+ &fv->quant_buf,
+ 0, fv->quant_buf.size,
+ VK_FORMAT_UNDEFINED));
+
+fail:
+ return err;
+}
+
+static int init_crc_table_data(AVCodecContext *avctx)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ uint32_t *buf_mapped;
+ size_t buf_len = 256*sizeof(int32_t);
+
+ RET(ff_vk_create_buf(&fv->s, &fv->crc_tab_buf,
+ buf_len,
+ NULL, NULL,
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ RET(ff_vk_map_buffer(&fv->s, &fv->crc_tab_buf, (void *)&buf_mapped, 0));
+
+ memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len);
+
+ RET(ff_vk_unmap_buffer(&fv->s, &fv->crc_tab_buf, 1));
+ RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
+ &fv->enc, 1, 2, 0,
+ &fv->crc_tab_buf,
+ 0, fv->crc_tab_buf.size,
+ VK_FORMAT_UNDEFINED));
+
+fail:
+ return err;
+}
+
+static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVkSPIRVCompiler *spv;
+
+ if ((err = ff_ffv1_common_init(avctx)) < 0)
+ return err;
+
+ if (f->ac == 1)
+ f->ac = AC_RANGE_CUSTOM_TAB;
+
+ //if (fv->ctx.ac == AC_GOLOMB_RICE) {
+ if (0) {
+ int w_a = FFALIGN(avctx->width, LG_ALIGN_W);
+ int h_a = FFALIGN(avctx->height, LG_ALIGN_H);
+ int w_sl, h_sl;
+
+ /* Pixels per line an invocation handles */
+ int ppi = 0;
+ /* Chunk size */
+ int chunks = 0;
+
+ do {
+ if (ppi < 2)
+ ppi++;
+ chunks++;
+ w_sl = w_a / (LG_ALIGN_W*ppi);
+ h_sl = h_a / (LG_ALIGN_H*chunks);
+ } while (w_sl > MAX_SLICES / h_sl);
+
+ av_log(avctx, AV_LOG_VERBOSE, "Slice config: %ix%i, %i total\n",
+ LG_ALIGN_W*ppi, LG_ALIGN_H*chunks, w_sl*h_sl);
+ av_log(avctx, AV_LOG_VERBOSE, "Horizontal slices: %i (%i pixels per invoc)\n",
+ w_sl, ppi);
+ av_log(avctx, AV_LOG_VERBOSE, "Vertical slices: %i (%i chunks)\n",
+ h_sl, chunks);
+
+ f->num_h_slices = w_sl;
+ f->num_v_slices = h_sl;
+
+ fv->ppi = ppi;
+ fv->chunks = chunks;
+ } else {
+ f->num_h_slices = fv->num_h_slices;
+ f->num_v_slices = fv->num_v_slices;
+
+ if (f->num_h_slices <= 0)
+ f->num_h_slices = 32;
+ if (f->num_v_slices <= 0)
+ f->num_v_slices = 32;
+
+ f->num_h_slices = FFMIN(f->num_h_slices, avctx->width);
+ f->num_v_slices = FFMIN(f->num_v_slices, avctx->height);
+ }
+
+ err = ff_ffv1_encode_setup_plane_info(avctx, avctx->sw_pix_fmt);
+ if (err < 0)
+ return err;
+
+ /* Rice coding does not support high bit depths yet */
+ if (f->bits_per_raw_sample > 8) {
+ if (f->ac == AC_GOLOMB_RICE) {
+ av_log(avctx, AV_LOG_WARNING, "bits_per_raw_sample > 8, "
+ "forcing range coder\n");
+ f->ac = AC_RANGE_CUSTOM_TAB;
+ }
+ }
+
+ /* Target version 3 by default */
+ f->version = 3;
+ if (avctx->level == 4) {
+ if (avctx->level && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
+ av_log(avctx, AV_LOG_ERROR, "Version 4 is experimental and requires -strict -2\n");
+ return AVERROR_INVALIDDATA;
+ }
+ f->version = avctx->level;
+ }
+
+ err = ff_ffv1_encode_init(avctx);
+ if (err < 0)
+ return err;
+
+ if (f->version < 4) {
+ if (((f->chroma_h_shift > 0) && (avctx->width % (64 << f->chroma_h_shift))) ||
+ ((f->chroma_v_shift > 0) && (avctx->width % (64 << f->chroma_v_shift)))) {
+ av_log(avctx, AV_LOG_ERROR, "Encoding frames with subsampling and unaligned "
+ "dimensions is only supported in version 4 (-level 4)\n");
+ return AVERROR_PATCHWELCOME;
+ }
+ }
+
+ /* Init Vulkan */
+ err = ff_vk_init(&fv->s, avctx, NULL, avctx->hw_frames_ctx);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_qf_init(&fv->s, &fv->qf, VK_QUEUE_COMPUTE_BIT);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Device has no compute queues!\n");
+ return err;
+ }
+
+ err = ff_vk_exec_pool_init(&fv->s, &fv->qf, &fv->exec_pool,
+ fv->qf.nb_queues*4,
+ 0, 0, 0, NULL);
+ if (err < 0)
+ return err;
+
+ spv = ff_vk_spirv_init();
+ if (!spv) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ /* Detect the special RGB coding mode */
+ fv->is_rgb = !(f->colorspace == 0 && avctx->sw_pix_fmt != AV_PIX_FMT_YA8) &&
+ !(avctx->sw_pix_fmt == AV_PIX_FMT_YA8);
+
+ /* bits_per_raw_sample use regular unsigned representation,
+ * but in higher bit depths, the data is casted to int16_t */
+ fv->rep_fmt = FF_VK_REP_UINT;
+ if (!fv->is_rgb && f->bits_per_raw_sample > 8)
+ fv->rep_fmt = FF_VK_REP_INT;
+
+ /* Init setup shader */
+ err = init_setup_shader(avctx, spv);
+ if (err < 0) {
+ spv->uninit(&spv);
+ return err;
+ }
+
+ /* Init RCT shader */
+ if (fv->is_rgb) {
+ err = init_rct_shader(avctx, spv);
+ if (err < 0) {
+ spv->uninit(&spv);
+ return err;
+ }
+ }
+
+ /* Encode shader */
+ err = init_encode_shader(avctx, spv);
+ if (err < 0) {
+ spv->uninit(&spv);
+ return err;
+ }
+
+ spv->uninit(&spv);
+
+ /* Range coder data */
+ err = init_state_transition_data(avctx);
+ if (err < 0)
+ return err;
+
+ /* Quantization table data */
+ err = init_quant_table_data(avctx);
+ if (err < 0)
+ return err;
+
+ /* CRC table buffer */
+ if (f->ec) {
+ err = init_crc_table_data(avctx);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
+static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
+{
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+
+ ff_vk_exec_pool_free(&fv->s, &fv->exec_pool);
+
+ ff_vk_shader_free(&fv->s, &fv->enc);
+ ff_vk_shader_free(&fv->s, &fv->setup);
+ ff_vk_shader_free(&fv->s, &fv->rct);
+
+ av_buffer_unref(&fv->intermediate_frames_ref);
+
+ av_buffer_pool_uninit(&fv->results_data_pool);
+
+ av_buffer_pool_uninit(&fv->out_data_pool);
+ av_buffer_pool_uninit(&fv->tmp_data_pool);
+
+ av_buffer_unref(&fv->keyframe_slice_data_ref);
+ av_buffer_pool_uninit(&fv->slice_data_pool);
+
+ ff_vk_free_buf(&fv->s, &fv->quant_buf);
+ ff_vk_free_buf(&fv->s, &fv->rangecoder_static_buf);
+ ff_vk_free_buf(&fv->s, &fv->crc_tab_buf);
+
+ ff_vk_uninit(&fv->s);
+
+ return 0;
+}
+
+#define OFFSET(x) offsetof(VulkanEncodeFFv1Context, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption vulkan_encode_ffv1_options[] = {
+ { "slicecrc", "Protect slices with CRCs", OFFSET(ctx.ec), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
+ { "context", "Context model", OFFSET(ctx.context_model), AV_OPT_TYPE_INT,
+ { .i64 = 0 }, 0, 1, VE },
+ { "coder", "Coder type", OFFSET(ctx.ac), AV_OPT_TYPE_INT,
+ { .i64 = 0 }, -2, 2, VE, .unit = "coder" },
+ { "rice", "Golomb rice", 0, AV_OPT_TYPE_CONST,
+ { .i64 = AC_GOLOMB_RICE }, INT_MIN, INT_MAX, VE, .unit = "coder" },
+ { "range_tab", "Range with custom table", 0, AV_OPT_TYPE_CONST,
+ { .i64 = AC_RANGE_CUSTOM_TAB }, INT_MIN, INT_MAX, VE, .unit = "coder" },
+ { "qtable", "Quantization table", OFFSET(ctx.qtable), AV_OPT_TYPE_INT,
+ { .i64 = -1 }, -1, 2, VE },
+
+ { "slices_h", "Number of horizontal slices", OFFSET(num_h_slices), AV_OPT_TYPE_INT,
+ { .i64 = -1 }, -1, 32, VE },
+ { "slices_v", "Number of vertical slices", OFFSET(num_v_slices), AV_OPT_TYPE_INT,
+ { .i64 = -1 }, -1, 32, VE },
+
+ { NULL }
+};
+
+static const AVClass vulkan_encode_ffv1_class = {
+ .class_name = "ffv1_vulkan",
+ .item_name = av_default_item_name,
+ .option = vulkan_encode_ffv1_options,
+ .version = LIBAVUTIL_VERSION_INT,
+};
+
+const AVCodecHWConfigInternal *const vulkan_encode_ffv1_hw_configs[] = {
+ HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
+ NULL,
+};
+
+const FFCodec ff_ffv1_vulkan_encoder = {
+ .p.name = "ffv1_vulkan",
+ CODEC_LONG_NAME("FFmpeg video codec #1 (Vulkan)"),
+ .p.type = AVMEDIA_TYPE_VIDEO,
+ .p.id = AV_CODEC_ID_FFV1,
+ .priv_data_size = sizeof(VulkanEncodeFFv1Context),
+ .init = &vulkan_encode_ffv1_init,
+ FF_CODEC_ENCODE_CB(vulkan_encode_ffv1_frame),
+ .close = &vulkan_encode_ffv1_close,
+ .p.priv_class = &vulkan_encode_ffv1_class,
+ .p.capabilities = AV_CODEC_CAP_DELAY |
+ AV_CODEC_CAP_HARDWARE |
+ AV_CODEC_CAP_DR1 |
+ AV_CODEC_CAP_ENCODER_FLUSH |
+ AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
+ .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH,
+ .p.pix_fmts = (const enum AVPixelFormat[]) {
+ AV_PIX_FMT_VULKAN,
+ AV_PIX_FMT_NONE,
+ },
+ .hw_configs = vulkan_encode_ffv1_hw_configs,
+ .p.wrapper_name = "vulkan",
+};
diff --git a/libavcodec/vulkan/Makefile b/libavcodec/vulkan/Makefile
index 96b4de0092..d446886aff 100644
--- a/libavcodec/vulkan/Makefile
+++ b/libavcodec/vulkan/Makefile
@@ -3,6 +3,13 @@ GEN_CLEANSUFFIXES = *.o *.c *.d
clean::
$(RM) $(GEN_CLEANSUFFIXES:%=libavcodec/vulkan/%)
+OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
+ vulkan/rangecoder.o vulkan/ffv1_vlc.o \
+ vulkan/ffv1_common.o vulkan/ffv1_enc_common.o \
+ vulkan/ffv1_enc_rct.o vulkan/ffv1_enc_setup.o \
+ vulkan/ffv1_enc_vlc.o vulkan/ffv1_enc_ac.o \
+ vulkan/ffv1_enc.o vulkan/ffv1_enc_rgb.o
+
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
.SECONDARY: $(VULKAN:.comp=.c)
libavcodec/vulkan/%.c: TAG = VULKAN
diff --git a/libavcodec/vulkan/common.comp b/libavcodec/vulkan/common.comp
new file mode 100644
index 0000000000..597c029699
--- /dev/null
+++ b/libavcodec/vulkan/common.comp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+layout(buffer_reference, buffer_reference_align = 1) buffer u8buf {
+ uint8_t v;
+};
+
+layout(buffer_reference, buffer_reference_align = 2) buffer u16buf {
+ uint16_t v;
+};
+
+layout(buffer_reference, buffer_reference_align = 4) buffer u32buf {
+ uint32_t v;
+};
+
+layout(buffer_reference, buffer_reference_align = 8) buffer u64buf {
+ uint64_t v;
+};
+
+#define OFFBUF(type, b, l) \
+ type(uint64_t(b) + uint64_t(l))
+
+#define zero_extend(a, p) \
+ ((a) & ((1 << (p)) - 1))
+
+#define sign_extend(val, bits) \
+ bitfieldExtract(val, 0, bits)
+
+#define fold(diff, bits) \
+ sign_extend(diff, bits)
+
+#define mid_pred(a, b, c) \
+ max(min((a), (b)), min(max((a), (b)), (c)))
+
+/* TODO: optimize */
+uint align(uint src, uint a)
+{
+ uint res = src % a;
+ if (res == 0)
+ return src;
+ return src + a - res;
+}
+
+#define reverse4(src) \
+ (pack32(unpack8(uint32_t(src)).wzxy))
+
+uint64_t reverse8(uint64_t src)
+{
+ u32vec2 tmp = unpack32(src);
+ tmp.x = reverse4(tmp.x);
+ tmp.y = reverse4(tmp.y);
+ return pack64(tmp.yx);
+}
+
+#ifndef PB32
+#define BIT_BUF_TYPE uint64_t
+#define BUF_TYPE u64buf
+#define BUF_REVERSE(src) reverse8(src)
+#define BUF_BITS uint8_t(64)
+#define BUF_BYTES uint8_t(8)
+#define BYTE_EXTRACT(src, byte_off) \
+ (uint8_t(((src) >> ((byte_off) << 3)) & 0xFF))
+#else
+#define BIT_BUF_TYPE uint32_t
+#define BUF_TYPE u32buf
+#define BUF_REVERSE(src) reverse4(src)
+#define BUF_BITS uint8_t(32)
+#define BUF_BYTES uint8_t(4)
+#define BYTE_EXTRACT(src, byte_off) \
+ (uint8_t(bitfieldExtract((src), ((byte_off) << 3), 8)))
+#endif
+
+struct PutBitContext {
+ uint64_t buf_start;
+ uint64_t buf;
+
+ BIT_BUF_TYPE bit_buf;
+ uint8_t bit_left;
+};
+
+void put_bits(inout PutBitContext pb, const uint32_t n, uint32_t value)
+{
+ if (n < pb.bit_left) {
+ pb.bit_buf = (pb.bit_buf << n) | value;
+ pb.bit_left -= uint8_t(n);
+ } else {
+ pb.bit_buf <<= pb.bit_left;
+ pb.bit_buf |= (value >> (n - pb.bit_left));
+
+ u8buf bs = u8buf(pb.buf);
+ [[unroll]]
+ for (uint8_t i = uint8_t(0); i < BUF_BYTES; i++)
+ bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i);
+ pb.buf = uint64_t(bs) + BUF_BYTES;
+
+ pb.bit_left += BUF_BITS - uint8_t(n);
+ pb.bit_buf = value;
+ }
+}
+
+void put_bits_aligned(inout PutBitContext pb, const uint32_t n, uint32_t value)
+{
+ if (n < pb.bit_left) {
+ pb.bit_buf = (pb.bit_buf << n) | value;
+ pb.bit_left -= uint8_t(n);
+ } else {
+ pb.bit_buf <<= pb.bit_left;
+ pb.bit_buf |= (value >> (n - pb.bit_left));
+
+#ifdef DEBUG
+ if ((pb.buf % BUF_BYTES) != 0)
+ debugPrintfEXT("put_bits buffer is not aligned!");
+#endif
+
+ BUF_TYPE(pb.buf)[0].v = BUF_REVERSE(pb.bit_buf);
+ pb.buf += BUF_BYTES;
+
+ pb.bit_left += BUF_BITS - uint8_t(n);
+ pb.bit_buf = value;
+ }
+}
+
+uint32_t flush_put_bits(inout PutBitContext pb)
+{
+ /* Align bits to MSBs */
+ if (pb.bit_left < BUF_BITS)
+ pb.bit_buf <<= pb.bit_left;
+
+ if (pb.bit_left < BUF_BITS) {
+ uint to_write = ((BUF_BITS - pb.bit_left) >> 3) + 1;
+
+ u8buf bs = u8buf(pb.buf);
+ for (int i = 0; i < to_write; i++)
+ bs[i].v = BYTE_EXTRACT(pb.bit_buf, BUF_BYTES - uint8_t(1) - i);
+ pb.buf = uint64_t(bs) + BUF_BYTES;
+ }
+
+ pb.bit_left = BUF_BITS;
+ pb.bit_buf = 0x0;
+
+ return uint32_t(pb.buf - pb.buf_start);
+}
+
+void init_put_bits(inout PutBitContext pb, u8buf data, uint64_t len)
+{
+ pb.buf_start = uint64_t(data);
+ pb.buf = uint64_t(data);
+
+ pb.bit_buf = 0;
+ pb.bit_left = BUF_BITS;
+}
+
+uint64_t put_bits_count(const PutBitContext pb)
+{
+ return (pb.buf - pb.buf_start)*8 + BUF_BITS - pb.bit_left;
+}
diff --git a/libavcodec/vulkan/ffv1_common.comp b/libavcodec/vulkan/ffv1_common.comp
new file mode 100644
index 0000000000..51abfce487
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_common.comp
@@ -0,0 +1,77 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+layout(buffer_reference, buffer_reference_align = 32) buffer SliceContext {
+ RangeCoder c;
+
+#ifdef GOLOMB
+ PutBitContext pb; /* 8*8 bytes */
+#endif
+
+ uint64_t plane_state;
+
+ ivec2 slice_dim;
+ ivec2 slice_pos;
+
+ uint hdr_len; // only used for golomb
+ int slice_coding_mode;
+ int slice_rct_by_coef;
+ int slice_rct_ry_coef;
+};
+
+/* -1, { -1, 0 } */
+int predict(TYPE L, VTYPE2 top)
+{
+ return mid_pred(L, int(L) + int(top[1] - top[0]), top[1]);
+}
+
+/* { -2, -1 }, { -1, 0, 1 }, 0 */
+int get_context(VTYPE2 cur_l, VTYPE3 top_l, TYPE top2, uint8_t quant_table_idx)
+{
+ const int LT = top_l[0]; /* -1 */
+ const int T = top_l[1]; /* 0 */
+ const int RT = top_l[2]; /* 1 */
+ const int L = cur_l[1]; /* -1 */
+
+ int base = quant_table[quant_table_idx][0][(L - LT) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][1][(LT - T) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][2][(T - RT) & MAX_QUANT_TABLE_MASK];
+
+ if ((quant_table[quant_table_idx][3][127] == 0) &&
+ (quant_table[quant_table_idx][4][127] == 0))
+ return base;
+
+ const int TT = top2; /* -2 */
+ const int LL = cur_l[0]; /* -2 */
+ return base +
+ quant_table[quant_table_idx][3][(LL - L) & MAX_QUANT_TABLE_MASK] +
+ quant_table[quant_table_idx][4][(TT - T) & MAX_QUANT_TABLE_MASK];
+}
+
+const uint32_t log2_run[41] = {
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24,
+};
diff --git a/libavcodec/vulkan/ffv1_enc.comp b/libavcodec/vulkan/ffv1_enc.comp
new file mode 100644
index 0000000000..ef3eccdb7c
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc.comp
@@ -0,0 +1,57 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ SliceContext sc = slice_ctx[slice_idx];
+ int bits = bits_per_raw_sample;
+
+#ifndef GOLOMB
+ if (sc.slice_coding_mode == 1) {
+ for (int p = 0; p < planes; p++) {
+
+ int h = sc.slice_dim.y;
+ if (p > 0 && p < 3)
+ h >>= chroma_shift.y;
+
+ for (int y = 0; y < h; y++)
+ encode_line_pcm(sc, y, p, 0, bits);
+ }
+ } else
+#endif
+ {
+ for (int p = 0; p < planes; p++) {
+ int cp = min(p, codec_planes - 1);
+ int run_index = 0;
+
+ int h = sc.slice_dim.y;
+ if (p > 0 && p < 3)
+ h >>= chroma_shift.y;
+
+ for (int y = 0; y < h; y++)
+ encode_line(sc, y, p, cp, 0, bits, run_index);
+ }
+ }
+
+ finalize_slice(sc, slice_idx);
+}
diff --git a/libavcodec/vulkan/ffv1_enc_ac.comp b/libavcodec/vulkan/ffv1_enc_ac.comp
new file mode 100644
index 0000000000..63ce3ba810
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_ac.comp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void put_rac(inout RangeCoder c, uint64_t state, bool bit)
+{
+ put_rac_norenorm(c, state, bit);
+ if (c.range < 0x100)
+ renorm_encoder(c);
+}
+
+/* Note - only handles signed values */
+void put_symbol(inout RangeCoder c, uint64_t state, int v)
+{
+ bool is_nil = (v == 0);
+ put_rac(c, state, is_nil);
+ if (is_nil)
+ return;
+
+ const int a = abs(v);
+ const int e = findMSB(a);
+
+ state += 1;
+ for (int i = 0; i < e; i++)
+ put_rac(c, state + min(i, 9), true);
+ put_rac(c, state + min(e, 9), false);
+
+ state += 21;
+ for (int i = e - 1; i >= 0; i--)
+ put_rac(c, state + min(i, 9), bool(bitfieldExtract(a, i, 1)));
+ put_rac(c, state - 11 + min(e, 10), v < 0); // 11..21
+}
+
+void encode_line_pcm(SliceContext sc, int y, int p, int comp,
+ int bits)
+{
+ ivec2 sp = sc.slice_pos;
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ for (int x = 0; x < w; x++) {
+ uint v = imageLoad(src[p], (sp + ivec2(x, y)))[comp];
+ for (int i = (bits - 1); i >= 0; i--)
+ put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));
+ }
+}
+
+void encode_line(SliceContext sc, int y, int p, int cp, int comp,
+ int bits, const int run_index)
+{
+ ivec2 sp = sc.slice_pos;
+ uint64_t state = sc.plane_state + cp*context_count*CONTEXT_SIZE;
+
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ for (int x = 0; x < w; x++) {
+ const ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
+ put_symbol(sc.c, state + CONTEXT_SIZE*d[0], d[1]);
+ }
+}
diff --git a/libavcodec/vulkan/ffv1_enc_common.comp b/libavcodec/vulkan/ffv1_enc_common.comp
new file mode 100644
index 0000000000..f3c1ac90d1
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_common.comp
@@ -0,0 +1,101 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ivec2 get_diff(ivec2 pos, ivec2 off, int p, int comp, int sw, int bits)
+{
+ const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
+ const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
+
+ TYPE top2 = TYPE(0);
+ if (off.y > 1)
+ top2 = TYPE(imageLoad(src[p], pos + ivec2(0, -2))[comp]);
+
+ VTYPE3 top = VTYPE3(TYPE(0),
+ TYPE(0),
+ TYPE(0));
+ if (off.y > 0 && off != ivec2(0, 1))
+ top[0] = TYPE(imageLoad(src[p], pos + ivec2(-1, -1) + yoff_border1)[comp]);
+ if (off.y > 0) {
+ top[1] = TYPE(imageLoad(src[p], pos + ivec2(0, -1))[comp]);
+ top[2] = TYPE(imageLoad(src[p], pos + ivec2(min(1, sw - off.x - 1), -1))[comp]);
+ }
+
+ VTYPE3 cur = VTYPE3(TYPE(0),
+ TYPE(0),
+ imageLoad(src[p], pos)[comp]);
+ if (off.x > 0 && off != ivec2(1, 0))
+ cur[0] = TYPE(imageLoad(src[p], pos + ivec2(-2, 0) + yoff_border2)[comp]);
+ if (off != ivec2(0, 0))
+ cur[1] = TYPE(imageLoad(src[p], pos + ivec2(-1, 0) + yoff_border1)[comp]);
+
+ /* context, diff */
+ ivec2 d = ivec2(get_context(VTYPE2(cur), top, top2, context_model),
+ cur[2] - predict(cur[1], VTYPE2(top)));
+
+ if (d[0] < 0)
+ d = -d;
+
+ d[1] = fold(d[1], bits);
+
+ return d;
+}
+
+void finalize_slice(SliceContext sc, const uint slice_idx)
+{
+#ifdef GOLOMB
+ uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);
+#else
+ uint32_t enc_len = rac_terminate(sc.c);
+#endif
+
+ u8buf bs = u8buf(sc.c.bytestream_start);
+
+ /* Append slice length */
+ u8vec4 enc_len_p = unpack8(enc_len);
+ bs[enc_len + 0].v = enc_len_p.z;
+ bs[enc_len + 1].v = enc_len_p.y;
+ bs[enc_len + 2].v = enc_len_p.x;
+ enc_len += 3;
+
+ /* Calculate and write CRC */
+ if (ec > 0) {
+ bs[enc_len].v = uint8_t(0);
+ enc_len++;
+
+ uint32_t crc = crcref;
+ for (int i = 0; i < enc_len; i++)
+ crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
+
+ if (crcref != 0x00000000)
+ crc ^= 0x8CD88196;
+
+ u8vec4 crc_p = unpack8(crc);
+ bs[enc_len + 0].v = crc_p.x;
+ bs[enc_len + 1].v = crc_p.y;
+ bs[enc_len + 2].v = crc_p.z;
+ bs[enc_len + 3].v = crc_p.w;
+ enc_len += 4;
+ }
+
+ slice_results[slice_idx*2 + 0] = enc_len;
+ slice_results[slice_idx*2 + 1] = uint32_t(uint64_t(bs) - uint64_t(out_data));
+}
diff --git a/libavcodec/vulkan/ffv1_enc_rct.comp b/libavcodec/vulkan/ffv1_enc_rct.comp
new file mode 100644
index 0000000000..2303d4065d
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_rct.comp
@@ -0,0 +1,53 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ * Copyright (c) 2003-2013 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void transform_sample(in SliceContext sc, ivec2 off)
+{
+ ivec2 pos = sc.slice_pos + off;
+ ivec4 pix = ivec4(imageLoad(src[0], pos));
+
+ /* Bypass RCT when coding slice as PCM */
+ if (sc.slice_coding_mode == 1) {
+ imageStore(dst[0], pos, pix);
+ return;
+ }
+
+ const int offset = 1 << bits;
+ pix.b -= pix.g;
+ pix.r -= pix.g;
+ pix.g += (pix.b * sc.slice_rct_by_coef +
+ pix.r * sc.slice_rct_ry_coef) >> 2;
+ pix.b += offset;
+ pix.r += offset;
+ imageStore(dst[0], pos, pix);
+}
+
+void main()
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ SliceContext sc = slice_ctx[slice_idx];
+
+ for (uint y = gl_LocalInvocationID.y; y < sc.slice_dim.y; y += gl_WorkGroupSize.y)
+ for (uint x = gl_LocalInvocationID.x; x < sc.slice_dim.x; x += gl_WorkGroupSize.x)
+ transform_sample(sc, ivec2(x, y));
+}
diff --git a/libavcodec/vulkan/ffv1_enc_rgb.comp b/libavcodec/vulkan/ffv1_enc_rgb.comp
new file mode 100644
index 0000000000..b3b7dc1fff
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_rgb.comp
@@ -0,0 +1,70 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ SliceContext sc = slice_ctx[slice_idx];
+
+ int bits = 9;
+ if (bits != 8 || sc.slice_coding_mode != 0)
+ bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
+
+ int run_index = 0;
+
+#ifndef GOLOMB
+ if (sc.slice_coding_mode == 1) {
+ if (transparency == 1) {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line_pcm(sc, y, 0, 1, bits);
+ encode_line_pcm(sc, y, 0, 2, bits);
+ encode_line_pcm(sc, y, 0, 0, bits);
+ encode_line_pcm(sc, y, 0, 3, bits);
+ }
+ } else {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line_pcm(sc, y, 0, 1, bits);
+ encode_line_pcm(sc, y, 0, 2, bits);
+ encode_line_pcm(sc, y, 0, 0, bits);
+ }
+ }
+ } else
+#endif
+ {
+ if (transparency == 1) {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line(sc, y, 0, 0, 1, bits, run_index);
+ encode_line(sc, y, 0, 1, 2, bits, run_index);
+ encode_line(sc, y, 0, 1, 0, bits, run_index);
+ encode_line(sc, y, 0, 2, 3, bits, run_index);
+ }
+ } else {
+ for (int y = 0; y < sc.slice_dim.y; y++) {
+ encode_line(sc, y, 0, 0, 1, bits, run_index);
+ encode_line(sc, y, 0, 1, 2, bits, run_index);
+ encode_line(sc, y, 0, 1, 0, bits, run_index);
+ }
+ }
+ }
+
+ finalize_slice(sc, slice_idx);
+}
diff --git a/libavcodec/vulkan/ffv1_enc_setup.comp b/libavcodec/vulkan/ffv1_enc_setup.comp
new file mode 100644
index 0000000000..338034cdee
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_setup.comp
@@ -0,0 +1,182 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void put_rac_full(inout RangeCoder c, uint64_t state, bool bit)
+{
+ put_rac_norenorm(c, state, bit);
+ if (c.range < 0x100)
+ renorm_encoder_full(c);
+}
+
+void put_symbol_unsigned(inout RangeCoder c, uint64_t state, uint v)
+{
+ bool is_nil = (v == 0);
+ put_rac_full(c, state, is_nil);
+ if (is_nil)
+ return;
+
+ const int e = findMSB(v);
+
+ state += 1;
+ for (int i = 0; i < e; i++)
+ put_rac_full(c, state + min(i, 9), true);
+ put_rac_full(c, state + min(e, 9), false);
+
+ state += 21;
+ for (int i = e - 1; i >= 0; i--)
+ put_rac_full(c, state + min(i, 9), bool(bitfieldExtract(v, i, 1)));
+}
+
+void write_slice_header(inout SliceContext sc, uint64_t state)
+{
+ u8buf sb = u8buf(state);
+
+ [[unroll]]
+ for (int i = 0; i < CONTEXT_SIZE; i++)
+ sb[i].v = uint8_t(128);
+
+ put_symbol_unsigned(sc.c, state, gl_WorkGroupID.x);
+ put_symbol_unsigned(sc.c, state, gl_WorkGroupID.y);
+ put_symbol_unsigned(sc.c, state, 0);
+ put_symbol_unsigned(sc.c, state, 0);
+
+ for (int i = 0; i < codec_planes; i++)
+ put_symbol_unsigned(sc.c, state, context_model);
+
+ put_symbol_unsigned(sc.c, state, pic_mode);
+ put_symbol_unsigned(sc.c, state, sar.x);
+ put_symbol_unsigned(sc.c, state, sar.y);
+
+ if (version > 3) {
+ put_rac_full(sc.c, state, sc.slice_coding_mode == 1);
+ put_symbol_unsigned(sc.c, state, sc.slice_coding_mode);
+ if (sc.slice_coding_mode != 1 && colorspace == 1) {
+ put_symbol_unsigned(sc.c, state, sc.slice_rct_by_coef);
+ put_symbol_unsigned(sc.c, state, sc.slice_rct_ry_coef);
+ }
+ }
+}
+
+void write_frame_header(SliceContext sc, uint64_t state)
+{
+ u8buf sb = u8buf(state);
+ sb.v = uint8_t(128);
+ put_rac_full(sc.c, state, bool(key_frame));
+}
+
+uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
+{
+ uint mpw = 1 << chroma_shift;
+ uint awidth = align(width, mpw);
+
+ if ((version < 4) || ((version == 4) && (micro_version < 3)))
+ return width * sx / num_h_slices;
+
+ sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
+ if (sx == awidth)
+ sx = width;
+
+ return sx;
+}
+
+void setup_slice_data(SliceContext sc, const uint slice_idx)
+{
+ uvec2 img_size = imageSize(src[0]);
+
+ uint sxs = slice_coord(img_size.x, gl_WorkGroupID.x + 0,
+ gl_NumWorkGroups.x, chroma_shift.x);
+ uint sxe = slice_coord(img_size.x, gl_WorkGroupID.x + 1,
+ gl_NumWorkGroups.x, chroma_shift.x);
+ uint sys = slice_coord(img_size.y, gl_WorkGroupID.y + 0,
+ gl_NumWorkGroups.y, chroma_shift.y);
+ uint sye = slice_coord(img_size.y, gl_WorkGroupID.y + 1,
+ gl_NumWorkGroups.y, chroma_shift.y);
+
+ sc.slice_pos = ivec2(sxs, sys);
+ sc.slice_dim = ivec2(sxe - sxs, sye - sys);
+ sc.slice_coding_mode = version > 3 ? (force_pcm == 1 ? 1 : 0) : 0;
+ sc.slice_rct_by_coef = 1;
+ sc.slice_rct_ry_coef = 1;
+
+ /* Init range coder, write the frame header and slice headers */
+ rac_init(sc.c,
+ OFFBUF(u8buf, out_data, slice_idx * slice_size_max),
+ slice_size_max);
+
+ /* Coder state pointer setting */
+ uint64_t state_size = codec_planes*context_count;
+#ifdef GOLOMB
+ state_size *= VLC_STATE_SIZE;
+#else
+ state_size *= CONTEXT_SIZE;
+#endif
+ sc.plane_state = uint64_t(slice_ctx[gl_NumWorkGroups.x*gl_NumWorkGroups.y]) +
+ slice_idx*state_size;
+
+ /* Write slice data */
+ uint64_t scratch_state = uint64_t(scratch_data) + slice_idx*CONTEXT_SIZE;
+ u8buf sb = u8buf(scratch_state);
+
+ if (slice_idx == 0)
+ write_frame_header(sc, scratch_state);
+
+ write_slice_header(sc, scratch_state);
+
+#ifdef GOLOMB
+ sc.hdr_len = rac_terminate(sc.c);
+ init_put_bits(sc.pb,
+ OFFBUF(u8buf, sc.c.bytestream_start, sc.hdr_len),
+ slice_size_max - sc.hdr_len);
+#endif
+}
+
+void init_slice_state(SliceContext sc)
+{
+#ifdef GOLOMB
+ uint nb_contexts = codec_planes*context_count;
+ for (int j = 0; j < nb_contexts; j++) {
+ VlcState sb = VlcState(sc.plane_state + j*VLC_STATE_SIZE);
+ sb.drift = int16_t(0);
+ sb.error_sum = uint16_t(4);
+ sb.bias = int8_t(0);
+ sb.count = uint8_t(1);
+ }
+#else
+ uint nb_contexts = codec_planes*context_count*CONTEXT_SIZE;
+ u8buf sb = u8buf(sc.plane_state);
+ for (int j = 0; j < nb_contexts; j++)
+ sb[j].v = uint8_t(128);
+#endif
+}
+
+void main(void)
+{
+ const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
+ SliceContext sc = slice_ctx[slice_idx];
+
+ /* Setup slice data */
+ setup_slice_data(sc, slice_idx);
+
+ /* Reinit state on keyframes */
+ if (sc.slice_coding_mode == 1 || key_frame == 1)
+ init_slice_state(sc);
+}
diff --git a/libavcodec/vulkan/ffv1_enc_vlc.comp b/libavcodec/vulkan/ffv1_enc_vlc.comp
new file mode 100644
index 0000000000..6e29a3e972
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_enc_vlc.comp
@@ -0,0 +1,113 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+struct RLEState {
+ int count;
+ uint8_t index;
+ bool mode;
+ TYPE diff;
+};
+
+void calc_new_state(inout RLEState state, int context)
+{
+ if (context == 0)
+ state.mode = false;
+
+ if (!state.mode)
+ return;
+
+ if (state.diff > 0) {
+ while (state.count >= (1 << log2_run[state.index])) {
+ state.count -= 1 << log2_run[state.index];
+ state.index++;
+ }
+ if (state.index > 0)
+ state.index--;
+ state.count = 0;
+ state.mode = false;
+ if (state.diff > 0)
+ state.diff--;
+ } else {
+ state.count++;
+ }
+}
+
+void encode_line(SliceContext sc, int y, int p, int cp, int comp,
+ int bits, inout int run_index)
+{
+ ivec2 sp = sc.slice_pos;
+ uint64_t state = sc.plane_state + cp*context_count*VLC_STATE_SIZE;
+
+ int w = sc.slice_dim.x;
+ if (p > 0 && p < 3) {
+ w >>= chroma_shift.x;
+ sp >>= chroma_shift;
+ }
+
+ int run_count = 0;
+ bool run_mode = false;
+
+ for (int x = 0; x < w; x++) {
+ ivec2 d = get_diff(sp + ivec2(x, y), ivec2(x, y), p, comp, w, bits);
+
+ if (d[0] == 0)
+ run_mode = true;
+
+ if (run_mode) {
+ if (d[1] != 0) {
+ /* A very unlikely loop */
+ while (run_count >= 1 << log2_run[run_index]) {
+ run_count -= 1 << log2_run[run_index];
+ run_index++;
+ put_bits(sc.pb, 1, 1);
+ }
+
+ put_bits(sc.pb, 1 + log2_run[run_index], run_count);
+ if (run_index != 0)
+ run_index--;
+ run_count = 0;
+ run_mode = false;
+ if (d[1] > 0)
+ d[1]--;
+ } else {
+ run_count++;
+ }
+ }
+
+ if (!run_mode) {
+ VlcState sb = VlcState(state + VLC_STATE_SIZE*d[0]);
+ Symbol sym = get_vlc_symbol(sb, d[1], bits);
+ put_bits(sc.pb, sym.bits, sym.val);
+ }
+ }
+
+ if (run_mode) {
+ while (run_count >= (1 << log2_run[run_index])) {
+ run_count -= 1 << log2_run[run_index];
+ run_index++;
+ put_bits(sc.pb, 1, 1);
+ }
+
+ if (run_count > 0)
+ put_bits(sc.pb, 1, 1);
+ }
+}
diff --git a/libavcodec/vulkan/ffv1_vlc.comp b/libavcodec/vulkan/ffv1_vlc.comp
new file mode 100644
index 0000000000..1f07a19dba
--- /dev/null
+++ b/libavcodec/vulkan/ffv1_vlc.comp
@@ -0,0 +1,122 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define VLC_STATE_SIZE 8
+layout(buffer_reference, buffer_reference_align = VLC_STATE_SIZE) buffer VlcState {
+ int16_t drift;
+ uint16_t error_sum;
+ int8_t bias;
+ uint8_t count;
+};
+
+void update_vlc_state(inout VlcState state, const int v)
+{
+ int drift = state.drift;
+ int count = state.count;
+ int bias = state.bias;
+ state.error_sum += uint16_t(abs(v));
+ drift += v;
+
+ if (count == 128) { // FIXME: variable
+ count >>= 1;
+ drift >>= 1;
+ state.error_sum >>= 1;
+ }
+ count++;
+
+ if (drift <= -count) {
+ bias = max(bias - 1, -128);
+ drift = max(drift + count, -count + 1);
+ } else if (drift > 0) {
+ bias = min(bias + 1, 127);
+ drift = min(drift - count, 0);
+ }
+
+ state.bias = int8_t(bias);
+ state.drift = int16_t(drift);
+ state.count = uint8_t(count);
+}
+
+struct Symbol {
+ uint32_t bits;
+ uint32_t val;
+};
+
+Symbol set_ur_golomb(int i, int k, int limit, int esc_len)
+{
+ int e;
+ Symbol sym;
+
+#ifdef DEBUG
+ if (i < 0)
+ debugPrintfEXT("Error: i is zero!");
+#endif
+
+ e = i >> k;
+ if (e < limit) {
+ sym.bits = e + k + 1;
+ sym.val = (1 << k) + zero_extend(i, k);
+ } else {
+ sym.bits = limit + esc_len;
+ sym.val = i - limit + 1;
+ }
+
+ return sym;
+}
+
+/**
+ * write signed golomb rice code (ffv1).
+ */
+Symbol set_sr_golomb(int i, int k, int limit, int esc_len)
+{
+ int v;
+
+ v = -2 * i - 1;
+ v ^= (v >> 31);
+
+ return set_ur_golomb(v, k, limit, esc_len);
+}
+
+Symbol get_vlc_symbol(inout VlcState state, int v, int bits)
+{
+ int i, k, code;
+ Symbol sym;
+ v = fold(v - int(state.bias), bits);
+
+ i = state.count;
+ k = 0;
+ while (i < state.error_sum) { // FIXME: optimize
+ k++;
+ i += i;
+ }
+
+#ifdef DEBUG
+ if (k > 13)
+ debugPrintfEXT("Error: k > 13!");
+#endif
+
+ code = v ^ ((2 * state.drift + state.count) >> 31);
+
+ update_vlc_state(state, v);
+
+ return set_sr_golomb(code, k, 12, bits);
+}
diff --git a/libavcodec/vulkan/rangecoder.comp b/libavcodec/vulkan/rangecoder.comp
new file mode 100644
index 0000000000..9f6def75e4
--- /dev/null
+++ b/libavcodec/vulkan/rangecoder.comp
@@ -0,0 +1,189 @@
+/*
+ * FFv1 codec
+ *
+ * Copyright (c) 2024 Lynne <dev at lynne.ee>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+struct RangeCoder {
+ u8buf bytestream_start;
+ u8buf bytestream;
+
+ uint low;
+ uint16_t range;
+ uint8_t outstanding_count;
+ uint8_t outstanding_byte;
+};
+
+/* Full renorm version that can handle outstanding_byte == 0xFF */
+void renorm_encoder_full(inout RangeCoder c)
+{
+ int bs_cnt = 0;
+
+ if (c.outstanding_byte == 0xFF) {
+ c.outstanding_byte = uint8_t(c.low >> 8);
+ } else if (c.low <= 0xFF00) {
+ c.bytestream[bs_cnt++].v = c.outstanding_byte;
+ uint8_t cnt = c.outstanding_count;
+ for (; cnt > 0; cnt--)
+ c.bytestream[bs_cnt++].v = uint8_t(0xFF);
+ c.outstanding_count = uint8_t(0);
+ c.outstanding_byte = uint8_t(c.low >> 8);
+ } else if (c.low >= 0x10000) {
+ c.bytestream[bs_cnt++].v = c.outstanding_byte + uint8_t(1);
+ uint8_t cnt = c.outstanding_count;
+ for (; cnt > 0; cnt--)
+ c.bytestream[bs_cnt++].v = uint8_t(0x00);
+ c.outstanding_count = uint8_t(0);
+ c.outstanding_byte = uint8_t(bitfieldExtract(c.low, 8, 8));
+ } else {
+ c.outstanding_count++;
+ }
+
+ c.bytestream = OFFBUF(u8buf, c.bytestream, bs_cnt);
+ c.range <<= 8;
+ c.low = bitfieldInsert(0, c.low, 8, 8);
+}
+
+/* Cannot deal with outstanding_byte == -1 in the name of speed */
+void renorm_encoder(inout RangeCoder c)
+{
+ uint8_t oc = c.outstanding_count + uint8_t(1);
+ uint low = c.low;
+
+ c.range <<= 8;
+ c.low = bitfieldInsert(0, low, 8, 8);
+
+ if (low > 0xFF00 && low < 0x10000) {
+ c.outstanding_count = oc;
+ return;
+ }
+
+ u8buf bs = c.bytestream;
+ uint8_t outstanding_byte = c.outstanding_byte;
+
+ c.bytestream = OFFBUF(u8buf, bs, oc);
+ c.outstanding_count = uint8_t(0);
+ c.outstanding_byte = uint8_t(low >> 8);
+
+ uint8_t obs = uint8_t(low > 0xFF00);
+ uint8_t fill = obs - uint8_t(1); /* unsigned underflow */
+
+ bs[0].v = outstanding_byte + obs;
+ for (int i = 1; i < oc; i++)
+ bs[i].v = fill;
+}
+
+void put_rac_norenorm(inout RangeCoder c, uint64_t state, bool bit)
+{
+ u8buf sb = u8buf(state);
+ uint val = uint(sb.v);
+ uint16_t range1 = uint16_t((uint(c.range) * val) >> 8);
+
+#ifdef DEBUG
+ if (val == 0)
+ debugPrintfEXT("Error: state is zero (addr: 0x%lx)", uint64_t(sb));
+ if (range1 >= c.range)
+ debugPrintfEXT("Error: range1 >= c.range");
+ if (range1 <= 0)
+ debugPrintfEXT("Error: range1 <= 0");
+#endif
+
+ if (bit) {
+ c.low += c.range - range1;
+ c.range = range1;
+ } else {
+ c.range -= range1;
+ }
+
+ sb.v = zero_one_state[uint(bit)*256 + val];
+
+#ifdef DEBUG
+ if (sb.v == 0)
+ debugPrintfEXT("Error: inserted zero state from tab %i idx %i", bit, val);
+#endif
+}
+
+/* Equiprobable bit */
+void put_rac_equi(inout RangeCoder c, bool bit)
+{
+ uint16_t range1 = c.range >> 1;
+
+#ifdef DEBUG
+ if (range1 >= c.range)
+ debugPrintfEXT("Error: range1 >= c.range");
+ if (range1 <= 0)
+ debugPrintfEXT("Error: range1 <= 0");
+#endif
+
+ if (bit) {
+ c.low += c.range - range1;
+ c.range = range1;
+ } else {
+ c.range -= range1;
+ }
+
+ if (c.range < 0x100)
+ renorm_encoder(c);
+}
+
+void put_rac_terminate(inout RangeCoder c)
+{
+ uint16_t range1 = uint16_t((uint(c.range) * 129) >> 8);
+
+#ifdef DEBUG
+ if (range1 >= c.range)
+ debugPrintfEXT("Error: range1 >= c.range");
+ if (range1 <= 0)
+ debugPrintfEXT("Error: range1 <= 0");
+#endif
+
+ c.range -= range1;
+ if (c.range < 0x100)
+ renorm_encoder(c);
+}
+
+/* Return the number of bytes written. */
+uint32_t rac_terminate(inout RangeCoder c)
+{
+ put_rac_terminate(c);
+ c.range = uint16_t(0xFF);
+ c.low += 0xFF;
+ renorm_encoder(c);
+ c.range = uint16_t(0xFF);
+ renorm_encoder(c);
+
+#ifdef DEBUG
+ if (c.low != 0)
+ debugPrintfEXT("Error: c.low != 0");
+ if (c.range < 0x100)
+ debugPrintfEXT("Error: range < 0x100");
+#endif
+
+ return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start));
+}
+
+void rac_init(out RangeCoder r, u8buf data, uint64_t buf_size)
+{
+ r.bytestream_start = data;
+ r.bytestream = data;
+ r.low = 0;
+ r.range = uint16_t(0xFF00);
+ r.outstanding_count = uint8_t(0);
+ r.outstanding_byte = uint8_t(0xFF);
+}
--
2.45.2.753.g447d99e1c3b
More information about the ffmpeg-devel
mailing list