25 #define FN_CREATING(ctx, type, shortname, array, num) \
26 static av_always_inline type *create_ ##shortname(ctx *dctx) \
28 type **array, *sctx = av_mallocz(sizeof(*sctx)); \
32 array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
38 dctx->array = array; \
39 dctx->array[dctx->num++] = sctx; \
45 .r = VK_COMPONENT_SWIZZLE_IDENTITY,
46 .g = VK_COMPONENT_SWIZZLE_IDENTITY,
47 .b = VK_COMPONENT_SWIZZLE_IDENTITY,
48 .a = VK_COMPONENT_SWIZZLE_IDENTITY,
54 #define CASE(VAL) case VAL: return #VAL
62 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
63 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
64 CASE(VK_ERROR_INITIALIZATION_FAILED);
65 CASE(VK_ERROR_DEVICE_LOST);
66 CASE(VK_ERROR_MEMORY_MAP_FAILED);
67 CASE(VK_ERROR_LAYER_NOT_PRESENT);
68 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
69 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
70 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
71 CASE(VK_ERROR_TOO_MANY_OBJECTS);
72 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
73 CASE(VK_ERROR_FRAGMENTED_POOL);
74 CASE(VK_ERROR_SURFACE_LOST_KHR);
75 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
76 CASE(VK_SUBOPTIMAL_KHR);
77 CASE(VK_ERROR_OUT_OF_DATE_KHR);
78 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
79 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
80 CASE(VK_ERROR_INVALID_SHADER_NV);
81 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
82 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
83 CASE(VK_ERROR_NOT_PERMITTED_EXT);
84 default:
return "Unknown error";
90 VkMemoryPropertyFlagBits req_flags,
void *alloc_extension,
91 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
95 VkPhysicalDeviceProperties props;
96 VkPhysicalDeviceMemoryProperties mprops;
99 VkMemoryAllocateInfo alloc_info = {
100 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
101 .pNext = alloc_extension,
104 vkGetPhysicalDeviceProperties(
s->hwctx->phys_dev, &props);
105 vkGetPhysicalDeviceMemoryProperties(
s->hwctx->phys_dev, &mprops);
108 if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
109 req->size =
FFALIGN(req->size, props.limits.minMemoryMapAlignment);
111 alloc_info.allocationSize = req->size;
115 for (
int i = 0;
i < mprops.memoryTypeCount;
i++) {
117 if (!(req->memoryTypeBits & (1 <<
i)))
121 if ((mprops.memoryTypes[
i].propertyFlags & req_flags) != req_flags)
135 alloc_info.memoryTypeIndex =
index;
137 ret = vkAllocateMemory(
s->hwctx->act_dev, &alloc_info,
138 s->hwctx->alloc, mem);
139 if (
ret != VK_SUCCESS) {
145 *mem_flags |= mprops.memoryTypes[
index].propertyFlags;
151 VkBufferUsageFlags
usage, VkMemoryPropertyFlagBits
flags)
158 VkBufferCreateInfo buf_spawn = {
159 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
162 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
167 VkBufferMemoryRequirementsInfo2 req_desc = {
168 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
170 VkMemoryDedicatedAllocateInfo ded_alloc = {
171 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
174 VkMemoryDedicatedRequirements ded_req = {
175 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
177 VkMemoryRequirements2 req = {
178 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
182 ret = vkCreateBuffer(
s->hwctx->act_dev, &buf_spawn,
NULL, &buf->
buf);
183 if (
ret != VK_SUCCESS) {
189 req_desc.buffer = buf->
buf;
191 vkGetBufferMemoryRequirements2(
s->hwctx->act_dev, &req_desc, &req);
194 use_ded_mem = ded_req.prefersDedicatedAllocation |
195 ded_req.requiresDedicatedAllocation;
197 ded_alloc.buffer = buf->
buf;
200 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
205 ret = vkBindBufferMemory(
s->hwctx->act_dev, buf->
buf, buf->
mem, 0);
206 if (
ret != VK_SUCCESS) {
216 int nb_buffers,
int invalidate)
220 VkMappedMemoryRange *inval_list =
NULL;
223 for (
int i = 0;
i < nb_buffers;
i++) {
224 ret = vkMapMemory(
s->hwctx->act_dev, buf[
i].
mem, 0,
225 VK_WHOLE_SIZE, 0, (
void **)&mem[
i]);
226 if (
ret != VK_SUCCESS) {
236 for (
int i = 0;
i < nb_buffers;
i++) {
237 const VkMappedMemoryRange ival_buf = {
238 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
239 .memory = buf[
i].
mem,
240 .size = VK_WHOLE_SIZE,
242 if (buf[
i].
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
245 (++inval_count)*
sizeof(*inval_list));
248 inval_list[inval_count - 1] = ival_buf;
252 ret = vkInvalidateMappedMemoryRanges(
s->hwctx->act_dev, inval_count,
254 if (
ret != VK_SUCCESS) {
270 VkMappedMemoryRange *flush_list =
NULL;
274 for (
int i = 0;
i < nb_buffers;
i++) {
275 const VkMappedMemoryRange flush_buf = {
276 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
277 .memory = buf[
i].
mem,
278 .size = VK_WHOLE_SIZE,
280 if (buf[
i].
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
283 (++flush_count)*
sizeof(*flush_list));
286 flush_list[flush_count - 1] = flush_buf;
291 ret = vkFlushMappedMemoryRanges(
s->hwctx->act_dev, flush_count,
293 if (
ret != VK_SUCCESS) {
300 for (
int i = 0;
i < nb_buffers;
i++)
301 vkUnmapMemory(
s->hwctx->act_dev, buf[
i].
mem);
312 if (buf->
buf != VK_NULL_HANDLE)
313 vkDestroyBuffer(
s->hwctx->act_dev, buf->
buf,
s->hwctx->alloc);
314 if (buf->
mem != VK_NULL_HANDLE)
315 vkFreeMemory(
s->hwctx->act_dev, buf->
mem,
s->hwctx->alloc);
319 int offset,
int size, VkShaderStageFlagBits stage)
321 VkPushConstantRange *pc;
329 memset(pc, 0,
sizeof(*pc));
331 pc->stageFlags = stage;
345 int queue_family =
s->queue_family_idx;
346 int nb_queues =
s->queue_count;
349 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
350 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
351 .queueFamilyIndex = queue_family,
353 VkCommandBufferAllocateInfo cbuf_create = {
354 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
355 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
356 .commandBufferCount = nb_queues,
373 s->hwctx->alloc, &e->
pool);
374 if (
ret != VK_SUCCESS) {
380 cbuf_create.commandPool = e->
pool;
383 ret = vkAllocateCommandBuffers(
s->hwctx->act_dev, &cbuf_create, e->
bufs);
384 if (
ret != VK_SUCCESS) {
390 for (
int i = 0;
i < nb_queues;
i++) {
392 vkGetDeviceQueue(
s->hwctx->act_dev, queue_family,
i, &q->
queue);
423 VkCommandBufferBeginInfo cmd_start = {
424 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
425 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
430 VkFenceCreateInfo fence_spawn = {
431 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
433 ret = vkCreateFence(
s->hwctx->act_dev, &fence_spawn,
s->hwctx->alloc,
435 if (
ret != VK_SUCCESS) {
441 vkWaitForFences(
s->hwctx->act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
442 vkResetFences(
s->hwctx->act_dev, 1, &q->
fence);
448 ret = vkBeginCommandBuffer(e->
bufs[
s->cur_queue_idx], &cmd_start);
449 if (
ret != VK_SUCCESS) {
461 return e->
bufs[
s->cur_queue_idx];
465 AVFrame *
frame, VkPipelineStageFlagBits in_wait_dst_flag)
505 (q->nb_frame_deps + 1) *
sizeof(*dst));
513 if (!q->frame_deps[q->nb_frame_deps]) {
528 VkSubmitInfo s_info = {
529 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
530 .commandBufferCount = 1,
531 .pCommandBuffers = &e->
bufs[
s->cur_queue_idx],
537 .pSignalSemaphores = e->
sem_sig,
541 ret = vkEndCommandBuffer(e->
bufs[
s->cur_queue_idx]);
542 if (
ret != VK_SUCCESS) {
549 if (
ret != VK_SUCCESS) {
556 s->cur_queue_idx = (
s->cur_queue_idx + 1) %
s->queue_count;
568 if (!deps || !nb_deps)
578 for (
int i = 0;
i < nb_deps;
i++) {
616 s->hwctx =
s->device->hwctx;
642 if (!
inlink->hw_frames_ctx) {
644 "hardware frames context on the input.\n");
667 if (!
s->output_width)
669 if (!
s->output_height)
683 if (!
s->device_ref) {
699 outlink->
w =
s->output_width;
700 outlink->
h =
s->output_height;
715 if (!
s->device_ref) {
728 if (!output_frames_ref) {
736 output_frames->
width =
s->output_width;
737 output_frames->
height =
s->output_height;
742 "frames: %d.\n", err);
747 outlink->
w =
s->output_width;
748 outlink->
h =
s->output_height;
775 VkSamplerCreateInfo sampler_info = {
776 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
778 .minFilter = sampler_info.magFilter,
779 .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
780 VK_SAMPLER_MIPMAP_MODE_LINEAR,
781 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
782 .addressModeV = sampler_info.addressModeU,
783 .addressModeW = sampler_info.addressModeU,
784 .anisotropyEnable = VK_FALSE,
785 .compareOp = VK_COMPARE_OP_NEVER,
786 .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
787 .unnormalizedCoordinates = unnorm_coords,
790 VkSampler *sampler = create_sampler(
s);
794 ret = vkCreateSampler(
s->hwctx->act_dev, &sampler_info,
795 s->hwctx->alloc, sampler);
796 if (
ret != VK_SUCCESS) {
820 const int high =
desc->comp[0].depth > 8;
821 return high ?
"rgba16f" :
"rgba8";
832 vkDestroyImageView(
s->hwctx->act_dev, iv->
view,
s->hwctx->alloc);
837 VkImageView *v, VkImage
img, VkFormat fmt,
838 const VkComponentMapping
map)
843 VkImageViewCreateInfo imgview_spawn = {
844 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
847 .viewType = VK_IMAGE_VIEW_TYPE_2D,
850 .subresourceRange = {
851 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
861 VkResult
ret = vkCreateImageView(
s->hwctx->act_dev, &imgview_spawn,
862 s->hwctx->alloc, &iv->
view);
863 if (
ret != VK_SUCCESS) {
889 const
char *
name, VkShaderStageFlags stage)
897 shd->
shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
898 shd->
shader.stage = stage;
903 GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
917 "local_size_y = %i, local_size_z = %i) in;\n\n",
924 const char *p = shd->
src.str;
925 const char *start = p;
930 for (
int i = 0;
i < strlen(p);
i++) {
938 av_log(avctx, prio,
"Shader %s: \n%s", shd->
name, buf.str);
943 const char *entrypoint)
947 VkShaderModuleCreateInfo shader_create;
956 shd->
shader.pName = entrypoint;
974 shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
975 shader_create.pNext =
NULL;
976 shader_create.codeSize = res->
size;
977 shader_create.flags = 0;
978 shader_create.pCode = res->
data;
980 ret = vkCreateShaderModule(
s->hwctx->act_dev, &shader_create,
NULL,
987 if (
ret != VK_SUCCESS) {
994 shd->
name, shader_create.codeSize);
1007 [VK_DESCRIPTOR_TYPE_SAMPLER] = {
sizeof(VkDescriptorImageInfo),
"sampler", 1, 0, 0, 0, },
1008 [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = {
sizeof(VkDescriptorImageInfo),
"texture", 1, 0, 1, 0, },
1009 [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = {
sizeof(VkDescriptorImageInfo),
"image", 1, 1, 1, 0, },
1010 [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = {
sizeof(VkDescriptorImageInfo),
"subpassInput", 1, 0, 0, 0, },
1011 [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = {
sizeof(VkDescriptorImageInfo),
"sampler", 1, 0, 1, 0, },
1012 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = {
sizeof(VkDescriptorBufferInfo),
NULL, 1, 0, 0, 1, },
1013 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = {
sizeof(VkDescriptorBufferInfo),
"buffer", 0, 1, 0, 1, },
1014 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = {
sizeof(VkDescriptorBufferInfo),
NULL, 1, 0, 0, 1, },
1015 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = {
sizeof(VkDescriptorBufferInfo),
"buffer", 0, 1, 0, 1, },
1016 [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = {
sizeof(VkBufferView),
"samplerBuffer", 1, 0, 0, 0, },
1017 [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = {
sizeof(VkBufferView),
"imageBuffer", 1, 0, 0, 0, },
1022 int num,
int only_print_to_shader)
1025 VkDescriptorSetLayout *
layout;
1028 if (only_print_to_shader)
1040 VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
1041 VkDescriptorSetLayoutBinding *desc_binding;
1043 desc_binding =
av_mallocz(
sizeof(*desc_binding)*num);
1047 for (
int i = 0;
i < num;
i++) {
1048 desc_binding[
i].binding =
i;
1049 desc_binding[
i].descriptorType =
desc[
i].type;
1050 desc_binding[
i].descriptorCount =
FFMAX(
desc[
i].elems, 1);
1051 desc_binding[
i].stageFlags =
desc[
i].stages;
1052 desc_binding[
i].pImmutableSamplers =
desc[
i].samplers;
1055 desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
1056 desc_create_layout.pBindings = desc_binding;
1057 desc_create_layout.bindingCount = num;
1059 ret = vkCreateDescriptorSetLayout(
s->hwctx->act_dev, &desc_create_layout,
1062 if (
ret != VK_SUCCESS) {
1070 for (
int i = 0;
i < num;
i++) {
1089 VkDescriptorUpdateTemplateCreateInfo *dt;
1090 VkDescriptorUpdateTemplateEntry *des_entries;
1093 des_entries =
av_mallocz(num*
sizeof(VkDescriptorUpdateTemplateEntry));
1097 for (
int i = 0;
i < num;
i++) {
1098 des_entries[
i].dstBinding =
i;
1099 des_entries[
i].descriptorType =
desc[
i].type;
1100 des_entries[
i].descriptorCount =
FFMAX(
desc[
i].elems, 1);
1101 des_entries[
i].dstArrayElement = 0;
1113 memset(dt, 0,
sizeof(*dt));
1115 dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
1116 dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
1117 dt->descriptorSetLayout = *
layout;
1118 dt->pDescriptorUpdateEntries = des_entries;
1119 dt->descriptorUpdateEntryCount = num;
1126 for (
int i = 0;
i < num;
i++) {
1130 if (
desc[
i].mem_layout)
1150 else if (
desc[
i].elems > 0)
1165 vkUpdateDescriptorSetWithTemplate(
s->hwctx->act_dev,
1172 VkShaderStageFlagBits stage,
int offset,
1188 VkDescriptorPoolCreateInfo pool_create_info = {
1189 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1195 ret = vkCreateDescriptorPool(
s->hwctx->act_dev, &pool_create_info,
1198 if (
ret != VK_SUCCESS) {
1206 VkDescriptorSetAllocateInfo alloc_info = {
1207 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1217 ret = vkAllocateDescriptorSets(
s->hwctx->act_dev, &alloc_info,
1219 if (
ret != VK_SUCCESS) {
1227 VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
1228 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1235 ret = vkCreatePipelineLayout(
s->hwctx->act_dev, &spawn_pipeline_layout,
1239 if (
ret != VK_SUCCESS) {
1247 VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
1257 ret = vkCreateDescriptorUpdateTemplate(
s->hwctx->act_dev,
1261 av_free((
void *)desc_template_info->pDescriptorUpdateEntries);
1262 if (
ret != VK_SUCCESS) {
1278 return create_pipeline(avctx->priv);
1287 VkComputePipelineCreateInfo pipe = {
1288 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1293 if (pl->
shaders[
i]->
shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
1303 ret = vkCreateComputePipelines(
s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
1305 if (
ret != VK_SUCCESS) {
1311 pl->
bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1333 for (
int i = 0;
i <
s->queue_count;
i++) {
1337 vkWaitForFences(
s->hwctx->act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1338 vkResetFences(
s->hwctx->act_dev, 1, &q->
fence);
1343 vkDestroyFence(
s->hwctx->act_dev, q->
fence,
s->hwctx->alloc);
1357 vkFreeCommandBuffers(
s->hwctx->act_dev, e->
pool,
s->queue_count, e->
bufs);
1359 vkDestroyCommandPool(
s->hwctx->act_dev, e->
pool,
s->hwctx->alloc);
1374 vkDestroyShaderModule(
s->hwctx->act_dev, shd->
shader.module,
1379 vkDestroyPipeline(
s->hwctx->act_dev, pl->
pipeline,
s->hwctx->alloc);
1385 vkDestroyDescriptorUpdateTemplate(
s->hwctx->act_dev, pl->
desc_template[
i],
1388 vkDestroyDescriptorSetLayout(
s->hwctx->act_dev, pl->
desc_layout[
i],
1394 vkDestroyDescriptorPool(
s->hwctx->act_dev, pl->
desc_pool,
1421 for (
int i = 0;
i <
s->exec_ctx_num;
i++)
1425 for (
int i = 0;
i <
s->samplers_num;
i++) {
1426 vkDestroySampler(
s->hwctx->act_dev, *
s->samplers[
i],
s->hwctx->alloc);
1431 for (
int i = 0;
i <
s->pipelines_num;
i++)
1436 s->scratch_size = 0;