19 #define VK_NO_PROTOTYPES
20 #define VK_ENABLE_BETA_EXTENSIONS
24 #include <versionhelpers.h>
46 #include <drm_fourcc.h>
49 #include <va/va_drmcommon.h>
57 #define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
85 VkPhysicalDeviceProperties2
props;
86 VkPhysicalDeviceMemoryProperties
mprops;
87 VkPhysicalDeviceExternalMemoryHostPropertiesEXT
hprops;
144 #define ADD_VAL_TO_LIST(list, count, val) \
146 list = av_realloc_array(list, sizeof(*list), ++count); \
148 err = AVERROR(ENOMEM); \
151 list[count - 1] = av_strdup(val); \
152 if (!list[count - 1]) { \
153 err = AVERROR(ENOMEM); \
158 #define RELEASE_PROPS(props, count) \
160 for (int i = 0; i < count; i++) \
161 av_free((void *)((props)[i])); \
162 av_free((void *)props); \
165 static const struct {
184 {
AV_PIX_FMT_YUV420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
189 {
AV_PIX_FMT_YUV422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
194 {
AV_PIX_FMT_YUV444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
199 {
AV_PIX_FMT_YUVA420P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
200 {
AV_PIX_FMT_YUVA420P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
202 {
AV_PIX_FMT_YUVA420P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
204 {
AV_PIX_FMT_YUVA422P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
205 {
AV_PIX_FMT_YUVA422P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
206 {
AV_PIX_FMT_YUVA422P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
207 {
AV_PIX_FMT_YUVA422P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
209 {
AV_PIX_FMT_YUVA444P, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
210 {
AV_PIX_FMT_YUVA444P10, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
211 {
AV_PIX_FMT_YUVA444P12, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
212 {
AV_PIX_FMT_YUVA444P16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
236 {
AV_PIX_FMT_GBRAP, { VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM, VK_FORMAT_R8_UNORM } },
237 {
AV_PIX_FMT_GBRAP16, { VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM, VK_FORMAT_R16_UNORM } },
238 {
AV_PIX_FMT_GBRPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
239 {
AV_PIX_FMT_GBRAPF32, { VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32_SFLOAT } },
252 const VkBaseInStructure *in = chain;
254 if (in->sType == stype)
265 VkBaseOutStructure *
out = chain;
288 VkFormatFeatureFlags
flags;
289 VkFormatProperties2 prop = {
290 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
292 vk->GetPhysicalDeviceFormatProperties2(hwctx->
phys_dev, fmt[
i], &prop);
293 flags =
linear ? prop.formatProperties.linearTilingFeatures :
294 prop.formatProperties.optimalTilingFeatures;
307 static const char *lib_names[] = {
310 #elif defined(__APPLE__)
321 p->
libvulkan = dlopen(lib_names[
i], RTLD_NOW | RTLD_LOCAL);
358 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_MEMORY },
359 { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, FF_VK_EXT_EXTERNAL_WIN32_SEM },
366 #define CASE(VAL) case VAL: return #VAL
372 CASE(VK_EVENT_RESET);
374 CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
375 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
376 CASE(VK_ERROR_INITIALIZATION_FAILED);
377 CASE(VK_ERROR_DEVICE_LOST);
378 CASE(VK_ERROR_MEMORY_MAP_FAILED);
379 CASE(VK_ERROR_LAYER_NOT_PRESENT);
380 CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
381 CASE(VK_ERROR_FEATURE_NOT_PRESENT);
382 CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
383 CASE(VK_ERROR_TOO_MANY_OBJECTS);
384 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
385 CASE(VK_ERROR_FRAGMENTED_POOL);
386 CASE(VK_ERROR_SURFACE_LOST_KHR);
387 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
388 CASE(VK_SUBOPTIMAL_KHR);
389 CASE(VK_ERROR_OUT_OF_DATE_KHR);
390 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
391 CASE(VK_ERROR_VALIDATION_FAILED_EXT);
392 CASE(VK_ERROR_INVALID_SHADER_NV);
393 CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
394 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
395 CASE(VK_ERROR_NOT_PERMITTED_EXT);
396 CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT);
397 CASE(VK_ERROR_INVALID_DEVICE_ADDRESS_EXT);
398 CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT);
399 default:
return "Unknown error";
405 VkDebugUtilsMessageTypeFlagsEXT messageType,
406 const VkDebugUtilsMessengerCallbackDataEXT *
data,
413 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l =
AV_LOG_VERBOSE;
break;
414 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l =
AV_LOG_INFO;
break;
415 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l =
AV_LOG_WARNING;
break;
416 case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l =
AV_LOG_ERROR;
break;
421 for (
int i = 0;
i <
data->cmdBufLabelCount;
i++)
428 const char *
const **dst, uint32_t *num,
int debug)
431 const char **extension_names =
NULL;
435 int err = 0, found, extensions_found = 0;
438 int optional_exts_num;
439 uint32_t sup_ext_count;
440 char *user_exts_str =
NULL;
442 VkExtensionProperties *sup_ext;
452 if (!user_exts_str) {
457 vk->EnumerateInstanceExtensionProperties(
NULL, &sup_ext_count,
NULL);
458 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
461 vk->EnumerateInstanceExtensionProperties(
NULL, &sup_ext_count, sup_ext);
469 if (!user_exts_str) {
474 vk->EnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
475 &sup_ext_count,
NULL);
476 sup_ext =
av_malloc_array(sup_ext_count,
sizeof(VkExtensionProperties));
479 vk->EnumerateDeviceExtensionProperties(hwctx->
phys_dev,
NULL,
480 &sup_ext_count, sup_ext);
483 for (
int i = 0;
i < optional_exts_num;
i++) {
484 tstr = optional_exts[
i].
name;
486 for (
int j = 0; j < sup_ext_count; j++) {
487 if (!strcmp(tstr, sup_ext[j].extensionName)) {
501 tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
503 for (
int j = 0; j < sup_ext_count; j++) {
504 if (!strcmp(tstr, sup_ext[j].extensionName)) {
522 char *save, *token =
av_strtok(user_exts_str,
"+", &save);
525 for (
int j = 0; j < sup_ext_count; j++) {
526 if (!strcmp(token, sup_ext[j].extensionName)) {
542 *dst = extension_names;
543 *num = extensions_found;
557 const char *
const **dst, uint32_t *num,
560 static const char default_layer[] = {
"VK_LAYER_KHRONOS_validation" };
562 int found = 0, err = 0;
566 uint32_t sup_layer_count;
567 VkLayerProperties *sup_layers;
570 char *user_layers_str =
NULL;
573 const char **enabled_layers =
NULL;
574 uint32_t enabled_layers_count = 0;
577 int debug = debug_opt && strtol(debug_opt->
value,
NULL, 10);
580 if (debug_opt && !debug)
583 vk->EnumerateInstanceLayerProperties(&sup_layer_count,
NULL);
584 sup_layers =
av_malloc_array(sup_layer_count,
sizeof(VkLayerProperties));
587 vk->EnumerateInstanceLayerProperties(&sup_layer_count, sup_layers);
590 for (
int i = 0;
i < sup_layer_count;
i++)
596 for (
int i = 0;
i < sup_layer_count;
i++) {
597 if (!strcmp(default_layer, sup_layers[
i].layerName)) {
612 if (!user_layers_str) {
617 token =
av_strtok(user_layers_str,
"+", &save);
620 if (!strcmp(default_layer, token)) {
630 for (
int j = 0; j < sup_layer_count; j++) {
631 if (!strcmp(token, sup_layers[j].layerName)) {
641 "Validation Layer \"%s\" not support.\n", token);
653 *dst = enabled_layers;
654 *num = enabled_layers_count;
668 int err = 0, debug_mode = 0;
673 VkApplicationInfo application_info = {
674 .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
675 .pEngineName =
"libavutil",
676 .apiVersion = VK_API_VERSION_1_2,
681 VkInstanceCreateInfo inst_props = {
682 .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
683 .pApplicationInfo = &application_info,
699 &inst_props.enabledLayerCount, &debug_mode);
705 &inst_props.enabledExtensionCount, debug_mode);
712 ret = vk->CreateInstance(&inst_props, hwctx->
alloc, &hwctx->
inst);
715 if (
ret != VK_SUCCESS) {
729 VkDebugUtilsMessengerCreateInfoEXT dbg = {
730 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
731 .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
732 VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
733 VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
734 VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
735 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
736 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
737 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
742 vk->CreateDebugUtilsMessengerEXT(hwctx->
inst, &dbg,
749 RELEASE_PROPS(inst_props.ppEnabledLayerNames, inst_props.enabledLayerCount);
765 case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU:
return "integrated";
766 case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU:
return "discrete";
767 case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU:
return "virtual";
768 case VK_PHYSICAL_DEVICE_TYPE_CPU:
return "software";
769 default:
return "unknown";
776 int err = 0, choice = -1;
781 VkPhysicalDevice *devices =
NULL;
782 VkPhysicalDeviceIDProperties *idp =
NULL;
783 VkPhysicalDeviceProperties2 *prop =
NULL;
786 ret = vk->EnumeratePhysicalDevices(hwctx->
inst, &num,
NULL);
787 if (
ret != VK_SUCCESS || !num) {
796 ret = vk->EnumeratePhysicalDevices(hwctx->
inst, &num, devices);
797 if (
ret != VK_SUCCESS) {
817 for (
int i = 0;
i < num;
i++) {
818 idp[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
819 prop[
i].sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
820 prop[
i].pNext = &idp[
i];
822 vk->GetPhysicalDeviceProperties2(devices[
i], &prop[
i]);
824 prop[
i].properties.deviceName,
826 prop[
i].properties.deviceID);
830 for (
int i = 0;
i < num;
i++) {
831 if (!strncmp(idp[
i].deviceUUID, select->
uuid, VK_UUID_SIZE)) {
839 }
else if (select->
name) {
841 for (
int i = 0;
i < num;
i++) {
842 if (strstr(prop[
i].properties.deviceName, select->
name)) {
853 for (
int i = 0;
i < num;
i++) {
854 if (select->
pci_device == prop[
i].properties.deviceID) {
865 for (
int i = 0;
i < num;
i++) {
866 if (select->
vendor_id == prop[
i].properties.vendorID) {
876 if (select->
index < num) {
877 choice = select->
index;
889 choice, prop[choice].properties.deviceName,
891 prop[choice].properties.deviceID);
904 VkQueueFlagBits
flags)
907 uint32_t min_score = UINT32_MAX;
909 for (
int i = 0;
i < num_qf;
i++) {
910 const VkQueueFlagBits qflags = qf[
i].queueFlags;
911 if (qflags &
flags) {
912 uint32_t score =
av_popcount(qflags) + qf[
i].timestampValidBits;
913 if (score < min_score) {
921 qf[
index].timestampValidBits++;
930 VkQueueFamilyProperties *qf =
NULL;
934 int graph_index, comp_index, tx_index, enc_index, dec_index;
937 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num,
NULL);
949 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &num, qf);
952 for (
int i = 0;
i < num;
i++) {
954 ((qf[
i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ?
" graphics" :
"",
955 ((qf[
i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ?
" compute" :
"",
956 ((qf[
i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ?
" transfer" :
"",
957 ((qf[
i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ?
" encode" :
"",
958 ((qf[
i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ?
" decode" :
"",
959 ((qf[
i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ?
" sparse" :
"",
960 ((qf[
i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ?
" protected" :
"",
965 qf[
i].timestampValidBits = 0;
988 #define SETUP_QUEUE(qf_idx) \
991 int qc = qf[fidx].queueCount; \
992 VkDeviceQueueCreateInfo *pc; \
994 if (fidx == graph_index) { \
995 hwctx->queue_family_index = fidx; \
996 hwctx->nb_graphics_queues = qc; \
999 if (fidx == comp_index) { \
1000 hwctx->queue_family_comp_index = fidx; \
1001 hwctx->nb_comp_queues = qc; \
1004 if (fidx == tx_index) { \
1005 hwctx->queue_family_tx_index = fidx; \
1006 hwctx->nb_tx_queues = qc; \
1009 if (fidx == enc_index) { \
1010 hwctx->queue_family_encode_index = fidx; \
1011 hwctx->nb_encode_queues = qc; \
1014 if (fidx == dec_index) { \
1015 hwctx->queue_family_decode_index = fidx; \
1016 hwctx->nb_decode_queues = qc; \
1020 pc = av_realloc((void *)cd->pQueueCreateInfos, \
1021 sizeof(*pc) * (cd->queueCreateInfoCount + 1)); \
1024 return AVERROR(ENOMEM); \
1026 cd->pQueueCreateInfos = pc; \
1027 pc = &pc[cd->queueCreateInfoCount]; \
1029 weights = av_malloc(qc * sizeof(float)); \
1032 return AVERROR(ENOMEM); \
1035 memset(pc, 0, sizeof(*pc)); \
1036 pc->sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; \
1037 pc->queueFamilyIndex = fidx; \
1038 pc->queueCount = qc; \
1039 pc->pQueuePriorities = weights; \
1041 for (int i = 0; i < qc; i++) \
1042 weights[i] = 1.0f / qc; \
1044 cd->queueCreateInfoCount++; \
1061 int queue_family_index,
int num_queues)
1069 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
1070 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
1071 .queueFamilyIndex = queue_family_index,
1073 VkCommandBufferAllocateInfo cbuf_create = {
1074 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
1075 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
1076 .commandBufferCount = num_queues,
1084 if (
ret != VK_SUCCESS) {
1094 cbuf_create.commandPool = cmd->
pool;
1097 ret = vk->AllocateCommandBuffers(hwctx->
act_dev, &cbuf_create, cmd->
bufs);
1098 if (
ret != VK_SUCCESS) {
1109 for (
int i = 0;
i < num_queues;
i++) {
1111 vk->GetDeviceQueue(hwctx->
act_dev, queue_family_index,
i, &q->
queue);
1130 vk->WaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1177 VkCommandBufferBeginInfo cmd_start = {
1178 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
1179 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
1184 VkFenceCreateInfo fence_spawn = {
1185 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
1189 if (
ret != VK_SUCCESS) {
1195 vk->WaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1203 if (
ret != VK_SUCCESS) {
1218 if (!deps || !nb_deps)
1228 for (
int i = 0;
i < nb_deps;
i++) {
1243 VkSubmitInfo *s_info,
AVVkFrame *
f,
int synchronous)
1251 if (
ret != VK_SUCCESS) {
1259 s_info->commandBufferCount = 1;
1262 if (
ret != VK_SUCCESS) {
1270 for (
int i = 0;
i < s_info->signalSemaphoreCount;
i++)
1277 vk->WaitForFences(hwctx->
act_dev, 1, &q->
fence, VK_TRUE, UINT64_MAX);
1297 vk->DestroyDebugUtilsMessengerEXT(hwctx->
inst, p->
debug_ctx,
1301 vk->DestroyInstance(hwctx->
inst, hwctx->
alloc);
1326 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
1327 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
1329 VkPhysicalDeviceVulkan12Features dev_features_1_2 = {
1330 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1331 .pNext = &timeline_features,
1333 VkPhysicalDeviceVulkan11Features dev_features_1_1 = {
1334 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1335 .pNext = &dev_features_1_2,
1337 VkPhysicalDeviceFeatures2 dev_features = {
1338 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
1339 .pNext = &dev_features_1_1,
1342 VkDeviceCreateInfo dev_info = {
1343 .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
1347 hwctx->
device_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
1362 vk->GetPhysicalDeviceFeatures2(hwctx->
phys_dev, &dev_features);
1365 #define COPY_FEATURE(DST, NAME) (DST).features.NAME = dev_features.features.NAME;
1375 if (!timeline_features.timelineSemaphore) {
1387 &dev_info.enabledExtensionCount, 0))) {
1388 for (
int i = 0;
i < dev_info.queueCreateInfoCount;
i++)
1389 av_free((
void *)dev_info.pQueueCreateInfos[
i].pQueuePriorities);
1390 av_free((
void *)dev_info.pQueueCreateInfos);
1397 for (
int i = 0;
i < dev_info.queueCreateInfoCount;
i++)
1398 av_free((
void *)dev_info.pQueueCreateInfos[
i].pQueuePriorities);
1399 av_free((
void *)dev_info.pQueueCreateInfos);
1401 if (
ret != VK_SUCCESS) {
1404 for (
int i = 0;
i < dev_info.enabledExtensionCount;
i++)
1405 av_free((
void *)dev_info.ppEnabledExtensionNames[
i]);
1406 av_free((
void *)dev_info.ppEnabledExtensionNames);
1436 int graph_index, comp_index, tx_index, enc_index, dec_index;
1455 p->
props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1457 p->
hprops.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT;
1459 vk->GetPhysicalDeviceProperties2(hwctx->
phys_dev, &p->
props);
1461 p->
props.properties.deviceName);
1464 p->
props.properties.limits.optimalBufferCopyRowPitchAlignment);
1466 p->
props.properties.limits.minMemoryMapAlignment);
1469 p->
hprops.minImportedHostPointerAlignment);
1474 vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->
phys_dev, &queue_num,
NULL);
1486 #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc) \
1488 if (ctx_qf < 0 && required) { \
1489 av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
1490 " in the context!\n", type); \
1491 return AVERROR(EINVAL); \
1492 } else if (fidx < 0 || ctx_qf < 0) { \
1494 } else if (ctx_qf >= queue_num) { \
1495 av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
1496 type, ctx_qf, queue_num); \
1497 return AVERROR(EINVAL); \
1500 av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
1501 " for%s%s%s%s%s\n", \
1503 ctx_qf == graph_index ? " graphics" : "", \
1504 ctx_qf == comp_index ? " compute" : "", \
1505 ctx_qf == tx_index ? " transfers" : "", \
1506 ctx_qf == enc_index ? " encode" : "", \
1507 ctx_qf == dec_index ? " decode" : ""); \
1508 graph_index = (ctx_qf == graph_index) ? -1 : graph_index; \
1509 comp_index = (ctx_qf == comp_index) ? -1 : comp_index; \
1510 tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
1511 enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
1512 dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
1513 p->qfs[p->num_qfs++] = ctx_qf; \
1525 vk->GetPhysicalDeviceMemoryProperties(hwctx->
phys_dev, &p->
mprops);
1534 if (device && device[0]) {
1536 dev_select.
index = strtol(device, &end, 10);
1537 if (end == device) {
1538 dev_select.
index = 0;
1539 dev_select.
name = device;
1555 switch(src_ctx->
type) {
1561 const char *vendor = vaQueryVendorString(src_hwctx->
display);
1567 if (strstr(vendor,
"Intel"))
1568 dev_select.vendor_id = 0x8086;
1569 if (strstr(vendor,
"AMD"))
1570 dev_select.vendor_id = 0x1002;
1578 drmDevice *drm_dev_info;
1579 int err = drmGetDevice(src_hwctx->
fd, &drm_dev_info);
1585 if (drm_dev_info->bustype == DRM_BUS_PCI)
1586 dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
1588 drmFreeDevice(&drm_dev_info);
1598 CudaFunctions *cu = cu_internal->
cuda_dl;
1600 int ret =
CHECK_CU(cu->cuDeviceGetUuid((CUuuid *)&dev_select.uuid,
1607 dev_select.has_uuid = 1;
1618 const void *hwconfig,
1650 constraints->
max_width = p->
props.properties.limits.maxImageDimension2D;
1651 constraints->
max_height = p->
props.properties.limits.maxImageDimension2D;
1664 VkMemoryPropertyFlagBits req_flags,
const void *alloc_extension,
1665 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
1672 VkMemoryAllocateInfo alloc_info = {
1673 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
1674 .pNext = alloc_extension,
1675 .allocationSize = req->size,
1680 for (
int i = 0;
i < p->
mprops.memoryTypeCount;
i++) {
1681 const VkMemoryType *
type = &p->
mprops.memoryTypes[
i];
1684 if (!(req->memoryTypeBits & (1 <<
i)))
1688 if ((
type->propertyFlags & req_flags) != req_flags)
1692 if (req->size > p->
mprops.memoryHeaps[
type->heapIndex].size)
1706 alloc_info.memoryTypeIndex =
index;
1708 ret = vk->AllocateMemory(dev_hwctx->
act_dev, &alloc_info,
1709 dev_hwctx->
alloc, mem);
1710 if (
ret != VK_SUCCESS) {
1716 *mem_flags |= p->
mprops.memoryTypes[
index].propertyFlags;
1729 if (internal->cuda_fc_ref) {
1735 CudaFunctions *cu = cu_internal->
cuda_dl;
1738 if (internal->cu_sem[
i])
1739 CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[
i]));
1740 if (internal->cu_mma[
i])
1741 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[
i]));
1742 if (internal->ext_mem[
i])
1743 CHECK_CU(cu->cuDestroyExternalMemory(internal->ext_mem[
i]));
1745 if (internal->ext_sem_handle[
i])
1746 CloseHandle(internal->ext_sem_handle[
i]);
1747 if (internal->ext_mem_handle[
i])
1748 CloseHandle(internal->ext_mem_handle[
i]);
1770 vk->DeviceWaitIdle(hwctx->
act_dev);
1784 void *alloc_pnext,
size_t alloc_pnext_stride)
1795 VkMemoryRequirements cont_memory_requirements = { 0 };
1797 int cont_mem_size = 0;
1803 VkImageMemoryRequirementsInfo2 req_desc = {
1804 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1807 VkMemoryDedicatedAllocateInfo ded_alloc = {
1808 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
1809 .pNext = (
void *)(((uint8_t *)alloc_pnext) +
i*alloc_pnext_stride),
1811 VkMemoryDedicatedRequirements ded_req = {
1812 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
1814 VkMemoryRequirements2 req = {
1815 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1819 vk->GetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
1821 if (
f->tiling == VK_IMAGE_TILING_LINEAR)
1822 req.memoryRequirements.size =
FFALIGN(req.memoryRequirements.size,
1823 p->
props.properties.limits.minMemoryMapAlignment);
1826 if (ded_req.requiresDedicatedAllocation) {
1828 "device requires dedicated image allocation!\n");
1831 cont_memory_requirements = req.memoryRequirements;
1832 }
else if (cont_memory_requirements.memoryTypeBits !=
1833 req.memoryRequirements.memoryTypeBits) {
1835 "and %i, cannot allocate in a single region!\n",
1840 cont_mem_size_list[
i] =
FFALIGN(req.memoryRequirements.size,
1841 req.memoryRequirements.alignment);
1842 cont_mem_size += cont_mem_size_list[
i];
1847 use_ded_mem = ded_req.prefersDedicatedAllocation |
1848 ded_req.requiresDedicatedAllocation;
1850 ded_alloc.image =
f->img[
i];
1854 f->tiling == VK_IMAGE_TILING_LINEAR ?
1855 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1856 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1857 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
1858 &
f->flags, &
f->mem[
i])))
1861 f->size[
i] = req.memoryRequirements.size;
1862 bind_info[
i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1863 bind_info[
i].image =
f->img[
i];
1864 bind_info[
i].memory =
f->mem[
i];
1868 cont_memory_requirements.size = cont_mem_size;
1872 f->tiling == VK_IMAGE_TILING_LINEAR ?
1873 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
1874 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1875 (
void *)(((uint8_t *)alloc_pnext)),
1876 &
f->flags, &
f->mem[0])))
1879 f->size[0] = cont_memory_requirements.size;
1882 bind_info[
i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
1883 bind_info[
i].image =
f->img[
i];
1884 bind_info[
i].memory =
f->mem[0];
1885 bind_info[
i].memoryOffset =
offset;
1887 f->offset[
i] = bind_info[
i].memoryOffset;
1888 offset += cont_mem_size_list[
i];
1894 if (
ret != VK_SUCCESS) {
1913 uint32_t src_qf, dst_qf;
1914 VkImageLayout new_layout;
1915 VkAccessFlags new_access;
1923 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
1924 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
1925 .pSignalSemaphoreValues = sem_sig_val,
1926 .signalSemaphoreValueCount =
planes,
1929 VkSubmitInfo s_info = {
1930 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
1931 .pNext = &s_timeline_sem_info,
1932 .pSignalSemaphores =
frame->sem,
1933 .signalSemaphoreCount =
planes,
1938 wait_st[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1939 sem_sig_val[
i] =
frame->sem_value[
i] + 1;
1944 new_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1945 new_access = VK_ACCESS_TRANSFER_WRITE_BIT;
1946 src_qf = VK_QUEUE_FAMILY_IGNORED;
1947 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1950 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1951 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1952 src_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1953 dst_qf = VK_QUEUE_FAMILY_IGNORED;
1954 s_timeline_sem_info.pWaitSemaphoreValues =
frame->sem_value;
1955 s_timeline_sem_info.waitSemaphoreValueCount =
planes;
1956 s_info.pWaitSemaphores =
frame->sem;
1957 s_info.pWaitDstStageMask = wait_st;
1958 s_info.waitSemaphoreCount =
planes;
1961 new_layout = VK_IMAGE_LAYOUT_GENERAL;
1962 new_access = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT;
1963 src_qf = VK_QUEUE_FAMILY_IGNORED;
1964 dst_qf = VK_QUEUE_FAMILY_EXTERNAL_KHR;
1965 s_timeline_sem_info.pWaitSemaphoreValues =
frame->sem_value;
1966 s_timeline_sem_info.waitSemaphoreValueCount =
planes;
1967 s_info.pWaitSemaphores =
frame->sem;
1968 s_info.pWaitDstStageMask = wait_st;
1969 s_info.waitSemaphoreCount =
planes;
1980 img_bar[
i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
1981 img_bar[
i].srcAccessMask = 0x0;
1982 img_bar[
i].dstAccessMask = new_access;
1983 img_bar[
i].oldLayout =
frame->layout[
i];
1984 img_bar[
i].newLayout = new_layout;
1985 img_bar[
i].srcQueueFamilyIndex = src_qf;
1986 img_bar[
i].dstQueueFamilyIndex = dst_qf;
1987 img_bar[
i].image =
frame->img[
i];
1988 img_bar[
i].subresourceRange.levelCount = 1;
1989 img_bar[
i].subresourceRange.layerCount = 1;
1990 img_bar[
i].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1992 frame->layout[
i] = img_bar[
i].newLayout;
1993 frame->access[
i] = img_bar[
i].dstAccessMask;
1997 VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1998 VK_PIPELINE_STAGE_TRANSFER_BIT,
2005 int frame_w,
int frame_h,
int plane)
2022 VkImageTiling tiling, VkImageUsageFlagBits
usage,
2035 VkExportSemaphoreCreateInfo ext_sem_info = {
2036 .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO,
2038 .handleTypes = IsWindows8OrGreater()
2039 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
2040 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
2042 .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
2046 VkSemaphoreTypeCreateInfo sem_type_info = {
2047 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2049 .pNext = p->
extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info :
NULL,
2053 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2057 VkSemaphoreCreateInfo sem_spawn = {
2058 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2059 .pNext = &sem_type_info,
2070 VkImageCreateInfo create_info = {
2071 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2072 .pNext = create_pnext,
2073 .imageType = VK_IMAGE_TYPE_2D,
2074 .format = img_fmts[
i],
2078 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2080 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2082 .samples = VK_SAMPLE_COUNT_1_BIT,
2083 .pQueueFamilyIndices = p->
qfs,
2084 .queueFamilyIndexCount = p->
num_qfs,
2085 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2086 VK_SHARING_MODE_EXCLUSIVE,
2089 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2092 ret = vk->CreateImage(hwctx->
act_dev, &create_info,
2094 if (
ret != VK_SUCCESS) {
2102 ret = vk->CreateSemaphore(hwctx->
act_dev, &sem_spawn,
2104 if (
ret != VK_SUCCESS) {
2110 f->layout[
i] = create_info.initialLayout;
2112 f->sem_value[
i] = 0;
2128 VkExternalMemoryHandleTypeFlags *comp_handle_types,
2129 VkExternalMemoryHandleTypeFlagBits *iexp,
2130 VkExternalMemoryHandleTypeFlagBits
exp)
2138 const VkImageDrmFormatModifierListCreateInfoEXT *drm_mod_info =
2140 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2141 int has_mods = hwctx->
tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && drm_mod_info;
2144 VkExternalImageFormatProperties eprops = {
2145 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2147 VkImageFormatProperties2 props = {
2148 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2151 VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
2152 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2154 .pQueueFamilyIndices = p->
qfs,
2155 .queueFamilyIndexCount = p->
num_qfs,
2156 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2157 VK_SHARING_MODE_EXCLUSIVE,
2159 VkPhysicalDeviceExternalImageFormatInfo enext = {
2160 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2162 .pNext = has_mods ? &phy_dev_mod_info :
NULL,
2164 VkPhysicalDeviceImageFormatInfo2 pinfo = {
2165 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2166 .pNext = !
exp ?
NULL : &enext,
2168 .type = VK_IMAGE_TYPE_2D,
2170 .usage = hwctx->
usage,
2171 .flags = VK_IMAGE_CREATE_ALIAS_BIT,
2174 nb_mods = has_mods ? drm_mod_info->drmFormatModifierCount : 1;
2175 for (
int i = 0;
i < nb_mods;
i++) {
2177 phy_dev_mod_info.drmFormatModifier = drm_mod_info->pDrmFormatModifiers[
i];
2179 ret = vk->GetPhysicalDeviceImageFormatProperties2(dev_hwctx->
phys_dev,
2182 if (
ret == VK_SUCCESS) {
2184 *comp_handle_types |= eprops.externalMemoryProperties.compatibleHandleTypes;
2199 VkExternalMemoryHandleTypeFlags e = 0x0;
2201 VkExternalMemoryImageCreateInfo eiinfo = {
2202 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2207 if (p->
extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY)
2209 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
2210 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT);
2214 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT);
2218 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2222 eminfo[
i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
2224 eminfo[
i].handleTypes = e;
2228 eiinfo.handleTypes ? &eiinfo :
NULL);
2256 if (
fp->modifier_info) {
2257 if (
fp->modifier_info->pDrmFormatModifiers)
2258 av_freep(&
fp->modifier_info->pDrmFormatModifiers);
2275 const VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
2280 has_modifiers ? VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT :
2282 VK_IMAGE_TILING_OPTIMAL;
2294 VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
2297 if (has_modifiers && !modifier_info) {
2299 VkImageDrmFormatModifierListCreateInfoEXT *modifier_info;
2301 VkDrmFormatModifierPropertiesEXT *mod_props;
2302 uint64_t *modifiers;
2303 int modifier_count = 0;
2305 VkDrmFormatModifierPropertiesListEXT mod_props_list = {
2306 .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
2308 .drmFormatModifierCount = 0,
2309 .pDrmFormatModifierProperties =
NULL,
2311 VkFormatProperties2 prop = {
2312 .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
2313 .pNext = &mod_props_list,
2317 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->
phys_dev, fmt[0], &prop);
2319 if (!mod_props_list.drmFormatModifierCount) {
2320 av_log(hwfc,
AV_LOG_ERROR,
"There are no supported modifiers for the given sw_format\n");
2325 modifier_info =
av_mallocz(
sizeof(*modifier_info));
2329 modifier_info->pNext =
NULL;
2330 modifier_info->sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT;
2339 fp->modifier_info = modifier_info;
2342 modifiers =
av_mallocz(mod_props_list.drmFormatModifierCount *
2343 sizeof(*modifiers));
2347 modifier_info->pDrmFormatModifiers = modifiers;
2350 mod_props =
av_mallocz(mod_props_list.drmFormatModifierCount *
2351 sizeof(*mod_props));
2355 mod_props_list.pDrmFormatModifierProperties = mod_props;
2358 vk->GetPhysicalDeviceFormatProperties2(dev_hwctx->
phys_dev, fmt[0], &prop);
2361 for (
int i = 0;
i < mod_props_list.drmFormatModifierCount;
i++) {
2362 if (!(mod_props[
i].drmFormatModifierTilingFeatures & hwctx->
usage))
2365 modifiers[modifier_count++] = mod_props[
i].drmFormatModifier;
2368 if (!modifier_count) {
2370 " the usage flags!\n");
2375 modifier_info->drmFormatModifierCount = modifier_count;
2461 !(
map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2466 flush_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2467 flush_ranges[
i].memory =
map->frame->mem[
i];
2468 flush_ranges[
i].size = VK_WHOLE_SIZE;
2473 if (
ret != VK_SUCCESS) {
2480 vk->UnmapMemory(hwctx->
act_dev,
map->frame->mem[
i]);
2489 int err, mapped_mem_count = 0, mem_planes = 0;
2508 if (!(
f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
2509 !(
f->tiling == VK_IMAGE_TILING_LINEAR)) {
2520 for (
int i = 0;
i < mem_planes;
i++) {
2521 ret = vk->MapMemory(hwctx->act_dev,
f->mem[
i], 0,
2522 VK_WHOLE_SIZE, 0, (
void **)&dst->
data[
i]);
2523 if (
ret != VK_SUCCESS) {
2539 !(
f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
2542 map_mem_ranges[
i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
2543 map_mem_ranges[
i].size = VK_WHOLE_SIZE;
2544 map_mem_ranges[
i].memory =
f->mem[
i];
2547 ret = vk->InvalidateMappedMemoryRanges(hwctx->act_dev,
planes,
2549 if (
ret != VK_SUCCESS) {
2558 VkImageSubresource
sub = {
2559 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2561 VkSubresourceLayout
layout;
2562 vk->GetImageSubresourceLayout(hwctx->act_dev,
f->img[
i], &
sub, &
layout);
2577 for (
int i = 0;
i < mapped_mem_count;
i++)
2578 vk->UnmapMemory(hwctx->act_dev,
f->mem[
i]);
2593 VkSemaphoreWaitInfo wait_info = {
2594 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
2596 .pSemaphores =
f->sem,
2597 .pValues =
f->sem_value,
2598 .semaphoreCount =
planes,
2601 vk->WaitSemaphores(hwctx->
act_dev, &wait_info, UINT64_MAX);
2614 static const struct {
2615 uint32_t drm_fourcc;
2617 } vulkan_drm_format_map[] = {
2618 { DRM_FORMAT_R8, VK_FORMAT_R8_UNORM },
2619 { DRM_FORMAT_R16, VK_FORMAT_R16_UNORM },
2620 { DRM_FORMAT_GR88, VK_FORMAT_R8G8_UNORM },
2621 { DRM_FORMAT_RG88, VK_FORMAT_R8G8_UNORM },
2622 { DRM_FORMAT_GR1616, VK_FORMAT_R16G16_UNORM },
2623 { DRM_FORMAT_RG1616, VK_FORMAT_R16G16_UNORM },
2624 { DRM_FORMAT_ARGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2625 { DRM_FORMAT_XRGB8888, VK_FORMAT_B8G8R8A8_UNORM },
2626 { DRM_FORMAT_ABGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2627 { DRM_FORMAT_XBGR8888, VK_FORMAT_R8G8B8A8_UNORM },
2630 #ifdef DRM_FORMAT_XYUV8888
2631 { DRM_FORMAT_XYUV8888, VK_FORMAT_R8G8B8A8_UNORM },
2632 { DRM_FORMAT_XVYU12_16161616, VK_FORMAT_R16G16B16A16_UNORM} ,
2635 { DRM_FORMAT_Y416, VK_FORMAT_R16G16B16A16_UNORM },
2639 static inline VkFormat drm_to_vulkan_fmt(uint32_t drm_fourcc)
2642 if (vulkan_drm_format_map[
i].drm_fourcc == drm_fourcc)
2643 return vulkan_drm_format_map[
i].vk_format;
2644 return VK_FORMAT_UNDEFINED;
2653 int bind_counts = 0;
2664 if (drm_to_vulkan_fmt(
desc->layers[
i].format) == VK_FORMAT_UNDEFINED) {
2666 desc->layers[
i].format);
2677 f->tiling = VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
2679 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2683 VkSemaphoreTypeCreateInfo sem_type_info = {
2684 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
2685 .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
2688 VkSemaphoreCreateInfo sem_spawn = {
2689 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
2690 .pNext = &sem_type_info,
2695 VkImageDrmFormatModifierExplicitCreateInfoEXT ext_img_mod_spec = {
2696 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT,
2697 .drmFormatModifier =
desc->objects[0].format_modifier,
2698 .drmFormatModifierPlaneCount =
planes,
2699 .pPlaneLayouts = (
const VkSubresourceLayout *)&ext_img_layouts,
2701 VkExternalMemoryImageCreateInfo ext_img_spec = {
2702 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
2703 .pNext = &ext_img_mod_spec,
2704 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2706 VkImageCreateInfo create_info = {
2707 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2708 .pNext = &ext_img_spec,
2709 .imageType = VK_IMAGE_TYPE_2D,
2710 .format = drm_to_vulkan_fmt(
desc->layers[
i].format),
2715 .tiling =
f->tiling,
2716 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2717 .usage = VK_IMAGE_USAGE_SAMPLED_BIT |
2718 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
2719 .samples = VK_SAMPLE_COUNT_1_BIT,
2720 .pQueueFamilyIndices = p->
qfs,
2721 .queueFamilyIndexCount = p->
num_qfs,
2722 .sharingMode = p->
num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
2723 VK_SHARING_MODE_EXCLUSIVE,
2727 VkExternalImageFormatProperties ext_props = {
2728 .sType = VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHR,
2730 VkImageFormatProperties2 props_ret = {
2731 .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
2732 .pNext = &ext_props,
2734 VkPhysicalDeviceImageDrmFormatModifierInfoEXT props_drm_mod = {
2735 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
2736 .drmFormatModifier = ext_img_mod_spec.drmFormatModifier,
2737 .pQueueFamilyIndices = create_info.pQueueFamilyIndices,
2738 .queueFamilyIndexCount = create_info.queueFamilyIndexCount,
2739 .sharingMode = create_info.sharingMode,
2741 VkPhysicalDeviceExternalImageFormatInfo props_ext = {
2742 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
2743 .pNext = &props_drm_mod,
2744 .handleType = ext_img_spec.handleTypes,
2746 VkPhysicalDeviceImageFormatInfo2 fmt_props = {
2747 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
2748 .pNext = &props_ext,
2749 .format = create_info.format,
2750 .type = create_info.imageType,
2751 .tiling = create_info.tiling,
2752 .usage = create_info.usage,
2753 .flags = create_info.flags,
2757 ret = vk->GetPhysicalDeviceImageFormatProperties2(hwctx->
phys_dev,
2758 &fmt_props, &props_ret);
2759 if (
ret != VK_SUCCESS) {
2767 get_plane_wh(&create_info.extent.width, &create_info.extent.height,
2771 for (
int j = 0; j <
planes; j++) {
2772 ext_img_layouts[j].offset =
desc->layers[
i].planes[j].offset;
2773 ext_img_layouts[j].rowPitch =
desc->layers[
i].planes[j].pitch;
2774 ext_img_layouts[j].size = 0;
2775 ext_img_layouts[j].arrayPitch = 0;
2776 ext_img_layouts[j].depthPitch = 0;
2780 ret = vk->CreateImage(hwctx->
act_dev, &create_info,
2782 if (
ret != VK_SUCCESS) {
2789 ret = vk->CreateSemaphore(hwctx->
act_dev, &sem_spawn,
2791 if (
ret != VK_SUCCESS) {
2802 f->layout[
i] = create_info.initialLayout;
2804 f->sem_value[
i] = 0;
2807 for (
int i = 0;
i <
desc->nb_objects;
i++) {
2809 VkImageMemoryRequirementsInfo2 req_desc = {
2810 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
2813 VkMemoryDedicatedRequirements ded_req = {
2814 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
2816 VkMemoryRequirements2 req2 = {
2817 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2822 VkMemoryFdPropertiesKHR fdmp = {
2823 .sType = VK_STRUCTURE_TYPE_MEMORY_FD_PROPERTIES_KHR,
2825 VkImportMemoryFdInfoKHR idesc = {
2826 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
2827 .fd = dup(
desc->objects[
i].fd),
2828 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2830 VkMemoryDedicatedAllocateInfo ded_alloc = {
2831 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
2833 .image = req_desc.image,
2837 ret = vk->GetMemoryFdPropertiesKHR(hwctx->
act_dev,
2838 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
2840 if (
ret != VK_SUCCESS) {
2848 vk->GetImageMemoryRequirements2(hwctx->
act_dev, &req_desc, &req2);
2851 req2.memoryRequirements.memoryTypeBits = fdmp.memoryTypeBits;
2854 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
2855 (ded_req.prefersDedicatedAllocation ||
2856 ded_req.requiresDedicatedAllocation) ?
2857 &ded_alloc : ded_alloc.pNext,
2858 &
f->flags, &
f->mem[
i]);
2864 f->size[
i] = req2.memoryRequirements.size;
2867 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2869 for (
int j = 0; j <
planes; j++) {
2870 VkImageAspectFlagBits aspect = j == 0 ? VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT :
2871 j == 1 ? VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT :
2872 VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT;
2874 plane_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
2876 plane_info[bind_counts].planeAspect = aspect;
2878 bind_info[bind_counts].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
2880 bind_info[bind_counts].image =
f->img[
i];
2881 bind_info[bind_counts].memory =
f->mem[
desc->layers[
i].planes[j].object_index];
2884 bind_info[bind_counts].memoryOffset = 0;
2891 ret = vk->BindImageMemory2(hwctx->
act_dev, bind_counts, bind_info);
2892 if (
ret != VK_SUCCESS) {
2908 for (
int i = 0;
i <
desc->nb_layers;
i++) {
2912 for (
int i = 0;
i <
desc->nb_objects;
i++)
2926 if ((err = vulkan_map_from_drm_frame_desc(hwfc, &
f,
src)))
2930 dst->
data[0] = (uint8_t *)
f;
2935 &vulkan_unmap_from_drm,
f);
2958 VASurfaceID surface_id = (VASurfaceID)(uintptr_t)
src->data[3];
2964 vaSyncSurface(vaapi_ctx->display, surface_id);
2972 err = vulkan_map_from_drm(dst_fc, dst,
tmp,
flags);
3005 CudaFunctions *cu = cu_internal->
cuda_dl;
3006 CUarray_format cufmt =
desc->comp[0].depth > 8 ? CU_AD_FORMAT_UNSIGNED_INT16 :
3007 CU_AD_FORMAT_UNSIGNED_INT8;
3012 if (!dst_int || !dst_int->cuda_fc_ref) {
3020 if (!dst_int->cuda_fc_ref) {
3026 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
3031 .NumChannels = 1 + ((
planes == 2) &&
i),
3039 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3040 .type = IsWindows8OrGreater()
3041 ? CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
3042 : CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT,
3043 .size = dst_f->
size[
i],
3045 VkMemoryGetWin32HandleInfoKHR export_info = {
3046 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
3047 .memory = dst_f->
mem[
i],
3048 .handleType = IsWindows8OrGreater()
3049 ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT
3050 : VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3052 VkSemaphoreGetWin32HandleInfoKHR sem_export = {
3053 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR,
3054 .semaphore = dst_f->
sem[
i],
3055 .handleType = IsWindows8OrGreater()
3056 ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT
3057 : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT,
3059 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3063 ret = vk->GetMemoryWin32HandleKHR(hwctx->
act_dev, &export_info,
3064 &ext_desc.handle.win32.handle);
3065 if (
ret != VK_SUCCESS) {
3071 dst_int->ext_mem_handle[
i] = ext_desc.handle.win32.handle;
3073 CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
3074 .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
3075 .size = dst_f->
size[
i],
3077 VkMemoryGetFdInfoKHR export_info = {
3078 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3079 .memory = dst_f->
mem[
i],
3080 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
3082 VkSemaphoreGetFdInfoKHR sem_export = {
3083 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
3084 .semaphore = dst_f->
sem[
i],
3085 .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
3087 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
3091 ret = vk->GetMemoryFdKHR(hwctx->
act_dev, &export_info,
3092 &ext_desc.handle.fd);
3093 if (
ret != VK_SUCCESS) {
3101 ret =
CHECK_CU(cu->cuImportExternalMemory(&dst_int->ext_mem[
i], &ext_desc));
3104 close(ext_desc.handle.fd);
3111 tex_desc.arrayDesc.Width = p_w;
3112 tex_desc.arrayDesc.Height = p_h;
3114 ret =
CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&dst_int->cu_mma[
i],
3115 dst_int->ext_mem[
i],
3122 ret =
CHECK_CU(cu->cuMipmappedArrayGetLevel(&dst_int->cu_array[
i],
3123 dst_int->cu_mma[
i], 0));
3130 ret = vk->GetSemaphoreWin32HandleKHR(hwctx->
act_dev, &sem_export,
3131 &ext_sem_desc.handle.win32.handle);
3133 ret = vk->GetSemaphoreFdKHR(hwctx->
act_dev, &sem_export,
3134 &ext_sem_desc.handle.fd);
3136 if (
ret != VK_SUCCESS) {
3143 dst_int->ext_sem_handle[
i] = ext_sem_desc.handle.win32.handle;
3146 ret =
CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[
i],
3150 close(ext_sem_desc.handle.fd);
3180 CudaFunctions *cu = cu_internal->
cuda_dl;
3190 err =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
3194 err = vulkan_export_to_cuda(hwfc,
src->hw_frames_ctx, dst);
3203 s_w_par[
i].params.fence.value = dst_f->
sem_value[
i] + 0;
3204 s_s_par[
i].params.fence.value = dst_f->
sem_value[
i] + 1;
3207 err =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
3208 planes, cuda_dev->stream));
3213 CUDA_MEMCPY2D cpy = {
3214 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
3215 .srcDevice = (CUdeviceptr)
src->data[
i],
3216 .srcPitch =
src->linesize[
i],
3219 .dstMemoryType = CU_MEMORYTYPE_ARRAY,
3220 .dstArray = dst_int->cu_array[
i],
3226 cpy.WidthInBytes = p_w *
desc->comp[
i].step;
3229 err =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
3234 err =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
3235 planes, cuda_dev->stream));
3262 switch (
src->format) {
3267 return vulkan_map_from_vaapi(hwfc, dst,
src,
flags);
3273 return vulkan_map_from_drm(hwfc, dst,
src,
flags);
3283 typedef struct VulkanDRMMapping {
3298 static inline uint32_t vulkan_fmt_to_drm(
VkFormat vkfmt)
3301 if (vulkan_drm_format_map[
i].vk_format == vkfmt)
3302 return vulkan_drm_format_map[
i].drm_fourcc;
3303 return DRM_FORMAT_INVALID;
3318 VkImageDrmFormatModifierPropertiesEXT drm_mod = {
3319 .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
3321 VkSemaphoreWaitInfo wait_info = {
3322 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
3324 .semaphoreCount =
planes,
3336 wait_info.pSemaphores =
f->sem;
3337 wait_info.pValues =
f->sem_value;
3339 vk->WaitSemaphores(hwctx->
act_dev, &wait_info, UINT64_MAX);
3345 ret = vk->GetImageDrmFormatModifierPropertiesEXT(hwctx->
act_dev,
f->img[0],
3347 if (
ret != VK_SUCCESS) {
3353 for (
int i = 0; (
i <
planes) && (
f->mem[
i]);
i++) {
3354 VkMemoryGetFdInfoKHR export_info = {
3355 .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
3356 .memory =
f->mem[
i],
3357 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
3360 ret = vk->GetMemoryFdKHR(hwctx->
act_dev, &export_info,
3362 if (
ret != VK_SUCCESS) {
3375 VkSubresourceLayout
layout;
3376 VkImageSubresource
sub = {
3377 .aspectMask = VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT,
3381 drm_desc->
layers[
i].
format = vulkan_fmt_to_drm(plane_vkfmt);
3392 if (
f->tiling == VK_IMAGE_TILING_OPTIMAL)
3405 dst->
data[0] = (uint8_t *)drm_desc;
3453 return vulkan_map_to_drm(hwfc, dst,
src,
flags);
3459 return vulkan_map_to_vaapi(hwfc, dst,
src,
flags);
3502 VkBufferUsageFlags
usage, VkMemoryPropertyFlagBits
flags,
3503 size_t size, uint32_t req_memory_bits,
int host_mapped,
3504 void *create_pnext,
void *alloc_pnext)
3513 VkBufferCreateInfo buf_spawn = {
3514 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
3515 .pNext = create_pnext,
3518 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
3521 VkBufferMemoryRequirementsInfo2 req_desc = {
3522 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
3524 VkMemoryDedicatedAllocateInfo ded_alloc = {
3525 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
3526 .pNext = alloc_pnext,
3528 VkMemoryDedicatedRequirements ded_req = {
3529 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
3531 VkMemoryRequirements2 req = {
3532 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
3543 if (
ret != VK_SUCCESS) {
3550 req_desc.buffer = vkbuf->
buf;
3552 vk->GetBufferMemoryRequirements2(hwctx->
act_dev, &req_desc, &req);
3555 use_ded_mem = ded_req.prefersDedicatedAllocation |
3556 ded_req.requiresDedicatedAllocation;
3558 ded_alloc.buffer = vkbuf->
buf;
3561 if (req_memory_bits)
3562 req.memoryRequirements.memoryTypeBits &= req_memory_bits;
3565 use_ded_mem ? &ded_alloc : (
void *)ded_alloc.pNext,
3571 if (
ret != VK_SUCCESS) {
3593 int nb_buffers,
int invalidate)
3600 int invalidate_count = 0;
3602 for (
int i = 0;
i < nb_buffers;
i++) {
3608 VK_WHOLE_SIZE, 0, (
void **)&mem[
i]);
3609 if (
ret != VK_SUCCESS) {
3619 for (
int i = 0;
i < nb_buffers;
i++) {
3621 const VkMappedMemoryRange ival_buf = {
3622 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3623 .memory = vkbuf->
mem,
3624 .size = VK_WHOLE_SIZE,
3633 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3636 invalidate_ctx[invalidate_count++] = ival_buf;
3639 if (invalidate_count) {
3640 ret = vk->InvalidateMappedMemoryRanges(hwctx->
act_dev, invalidate_count,
3642 if (
ret != VK_SUCCESS)
3651 int nb_buffers,
int flush)
3659 int flush_count = 0;
3662 for (
int i = 0;
i < nb_buffers;
i++) {
3664 const VkMappedMemoryRange flush_buf = {
3665 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
3666 .memory = vkbuf->
mem,
3667 .size = VK_WHOLE_SIZE,
3670 if (vkbuf->
flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
3673 flush_ctx[flush_count++] = flush_buf;
3678 ret = vk->FlushMappedMemoryRanges(hwctx->
act_dev, flush_count, flush_ctx);
3679 if (
ret != VK_SUCCESS) {
3686 for (
int i = 0;
i < nb_buffers;
i++) {
3699 const int *buf_stride,
int w,
3720 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
3721 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
3722 .pWaitSemaphoreValues =
frame->sem_value,
3723 .pSignalSemaphoreValues = sem_signal_values,
3724 .waitSemaphoreValueCount =
planes,
3725 .signalSemaphoreValueCount =
planes,
3728 VkSubmitInfo s_info = {
3729 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
3730 .pNext = &s_timeline_sem_info,
3731 .pSignalSemaphores =
frame->sem,
3732 .pWaitSemaphores =
frame->sem,
3733 .pWaitDstStageMask = sem_wait_dst,
3734 .signalSemaphoreCount =
planes,
3735 .waitSemaphoreCount =
planes,
3739 sem_signal_values[
i] =
frame->sem_value[
i] + 1;
3746 VkImageLayout new_layout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
3747 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
3748 VkAccessFlags new_access = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
3749 VK_ACCESS_TRANSFER_WRITE_BIT;
3751 sem_wait_dst[
i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
3754 if ((
frame->layout[
i] == new_layout) && (
frame->access[
i] & new_access))
3757 img_bar[bar_num].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
3758 img_bar[bar_num].srcAccessMask = 0x0;
3759 img_bar[bar_num].dstAccessMask = new_access;
3760 img_bar[bar_num].oldLayout =
frame->layout[
i];
3761 img_bar[bar_num].newLayout = new_layout;
3762 img_bar[bar_num].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3763 img_bar[bar_num].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
3764 img_bar[bar_num].image =
frame->img[
i];
3765 img_bar[bar_num].subresourceRange.levelCount = 1;
3766 img_bar[bar_num].subresourceRange.layerCount = 1;
3767 img_bar[bar_num].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3769 frame->layout[
i] = img_bar[bar_num].newLayout;
3770 frame->access[
i] = img_bar[bar_num].dstAccessMask;
3776 vk->CmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
3777 VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
3778 0,
NULL, 0,
NULL, bar_num, img_bar);
3783 VkBufferImageCopy buf_reg = {
3784 .bufferOffset = buf_offsets[
i],
3785 .bufferRowLength = buf_stride[
i] /
desc->comp[
i].step,
3786 .imageSubresource.layerCount = 1,
3787 .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
3788 .imageOffset = { 0, 0, 0, },
3794 buf_reg.bufferImageHeight = p_h;
3795 buf_reg.imageExtent = (VkExtent3D){ p_w, p_h, 1, };
3798 vk->CmdCopyImageToBuffer(cmd_buf,
frame->img[
i],
frame->layout[
i],
3799 vkbuf->
buf, 1, &buf_reg);
3801 vk->CmdCopyBufferToImage(cmd_buf, vkbuf->
buf,
frame->img[
i],
3802 frame->layout[
i], 1, &buf_reg);
3853 if (swf->width > hwfc->
width || swf->height > hwfc->
height)
3857 if (
f->tiling == VK_IMAGE_TILING_LINEAR &&
3858 f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
3862 map->format = swf->format;
3877 VkExternalMemoryBufferCreateInfo create_desc = {
3878 .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO,
3879 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3882 VkImportMemoryHostPointerInfoEXT import_desc = {
3883 .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
3884 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
3887 VkMemoryHostPointerPropertiesEXT p_props = {
3888 .sType = VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT,
3891 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3896 if (map_host && swf->linesize[
i] > 0) {
3898 offs = (uintptr_t)swf->data[
i] % p->
hprops.minImportedHostPointerAlignment;
3899 import_desc.pHostPointer = swf->data[
i] - offs;
3904 p->
hprops.minImportedHostPointerAlignment);
3906 ret = vk->GetMemoryHostPointerPropertiesEXT(hwctx->
act_dev,
3907 import_desc.handleType,
3908 import_desc.pHostPointer,
3911 if (
ret == VK_SUCCESS) {
3913 buf_offsets[
i] = offs;
3917 if (!host_mapped[
i])
3921 from ? VK_BUFFER_USAGE_TRANSFER_DST_BIT :
3922 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
3923 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
3924 req_size, p_props.memoryTypeBits, host_mapped[
i],
3925 host_mapped[
i] ? &create_desc :
NULL,
3926 host_mapped[
i] ? &import_desc :
NULL);
3940 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3943 (
const uint8_t *)swf->data[
i], swf->linesize[
i],
3954 swf->width, swf->height, swf->format,
from);
3965 get_plane_wh(&p_w, &p_h, swf->format, swf->width, swf->height,
i);
3968 (
const uint8_t *)
tmp.data[
i],
tmp.linesize[
i],
3989 switch (
src->format) {
3993 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
3994 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
3999 return vulkan_transfer_data_from_cuda(hwfc, dst,
src);
4002 if (
src->hw_frames_ctx)
4025 CudaFunctions *cu = cu_internal->
cuda_dl;
4035 err =
CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
4048 s_w_par[
i].params.fence.value = dst_f->
sem_value[
i] + 0;
4049 s_s_par[
i].params.fence.value = dst_f->
sem_value[
i] + 1;
4052 err =
CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
4053 planes, cuda_dev->stream));
4058 CUDA_MEMCPY2D cpy = {
4059 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
4060 .dstDevice = (CUdeviceptr)dst->
data[
i],
4064 .srcMemoryType = CU_MEMORYTYPE_ARRAY,
4065 .srcArray = dst_int->cu_array[
i],
4071 cpy.WidthInBytes =
w *
desc->comp[
i].step;
4074 err =
CHECK_CU(cu->cuMemcpy2DAsync(&cpy, cuda_dev->stream));
4079 err =
CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
4080 planes, cuda_dev->stream));
4111 if ((p->extensions & FF_VK_EXT_EXTERNAL_WIN32_MEMORY) &&
4112 (p->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM))
4117 return vulkan_transfer_data_to_cuda(hwfc, dst,
src);