Discussion:
[FFmpeg-devel] [PATCH v3 0/8] Vulkan hwcontext and filtering
Rostislav Pehlivanov
2018-05-22 02:46:08 UTC
Permalink
This is pretty much finished.

Rostislav Pehlivanov (8):
hwcontext_internal: add ff_hwframe_map_replace
hwcontext_opencl: use ff_hwframe_map_replace()
lavu: add a Vulkan hwcontext
lavfi: add common Vulkan filtering code
lavfi: add a Vulkan avgblur filter
lavfi: add a Vulkan chromatic aberration filter
lavfi: add a Vulkan scale filter
lavfi: add a Vulkan overlay filter

configure | 24 +-
doc/APIchanges | 3 +
libavfilter/Makefile | 4 +
libavfilter/allfilters.c | 4 +
libavfilter/vf_avgblur_vulkan.c | 343 ++++
libavfilter/vf_chromaticaberration_vulkan.c | 342 ++++
libavfilter/vf_overlay_vulkan.c | 461 +++++
libavfilter/vf_scale_vulkan.c | 395 ++++
libavfilter/vulkan.c | 1186 +++++++++++
libavfilter/vulkan.h | 223 ++
libavutil/Makefile | 3 +
libavutil/hwcontext.c | 11 +
libavutil/hwcontext.h | 1 +
libavutil/hwcontext_internal.h | 6 +
libavutil/hwcontext_opencl.c | 5 +-
libavutil/hwcontext_vulkan.c | 2013 +++++++++++++++++++
libavutil/hwcontext_vulkan.h | 133 ++
libavutil/pixdesc.c | 4 +
libavutil/pixfmt.h | 4 +
libavutil/version.h | 4 +-
20 files changed, 5162 insertions(+), 7 deletions(-)
create mode 100644 libavfilter/vf_avgblur_vulkan.c
create mode 100644 libavfilter/vf_chromaticaberration_vulkan.c
create mode 100644 libavfilter/vf_overlay_vulkan.c
create mode 100644 libavfilter/vf_scale_vulkan.c
create mode 100644 libavfilter/vulkan.c
create mode 100644 libavfilter/vulkan.h
create mode 100644 libavutil/hwcontext_vulkan.c
create mode 100644 libavutil/hwcontext_vulkan.h
--
2.17.0
Rostislav Pehlivanov
2018-05-22 02:46:11 UTC
Permalink
This commit adds a Vulkan hwcontext, currently capable of mapping DRM and
VAAPI frames but additional functionality can be added later to support
importing of D3D11 surfaces as well as exporting to various other APIs.

This context requires the newest stable version of the Vulkan API,
and once the new extension for DRM surfaces makes it in will also require
it (in order to properly and fully import them).

It makes use of every part of the Vulkan spec in order to ensure fastest
possible uploading, downloading and mapping of frames. On AMD, it will
also make use of mapping host memory frames in order to upload
very efficiently and with minimal CPU to hardware.

To be useful for non-RGB images an implementation with the YUV images
extension is needed. All current implementations support that with the
exception of AMD, though support is coming soon for Mesa.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 10 +
doc/APIchanges | 3 +
libavutil/Makefile | 3 +
libavutil/hwcontext.c | 4 +
libavutil/hwcontext.h | 1 +
libavutil/hwcontext_internal.h | 1 +
libavutil/hwcontext_vulkan.c | 2013 ++++++++++++++++++++++++++++++++
libavutil/hwcontext_vulkan.h | 133 +++
libavutil/pixdesc.c | 4 +
libavutil/pixfmt.h | 4 +
libavutil/version.h | 4 +-
11 files changed, 2178 insertions(+), 2 deletions(-)
create mode 100644 libavutil/hwcontext_vulkan.c
create mode 100644 libavutil/hwcontext_vulkan.h

diff --git a/configure b/configure
index 09ff0c55e2..5f4407b753 100755
--- a/configure
+++ b/configure
@@ -300,6 +300,7 @@ External library support:
--enable-opengl enable OpenGL rendering [no]
--enable-openssl enable openssl, needed for https support
if gnutls, libtls or mbedtls is not used [no]
+ --enable-vulkan enable Vulkan code [no]
--disable-sndio disable sndio support [autodetect]
--disable-schannel disable SChannel SSP, needed for TLS support on
Windows if openssl and gnutls are not used [autodetect]
@@ -1767,6 +1768,7 @@ HWACCEL_LIBRARY_LIST="
mmal
omx
opencl
+ vulkan
"

DOCUMENT_LIST="
@@ -2223,6 +2225,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ vulkan_drm_mod
perl
pod2man
texi2html
@@ -6349,6 +6352,13 @@ enabled vdpau &&

enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd

+enabled vulkan &&
+ require_pkg_config vulkan "vulkan >= 1.1.73" "vulkan/vulkan.h" vkCreateInstance
+
+if enabled_all vulkan libdrm ; then
+ check_cpp_condition vulkan_drm_mod vulkan/vulkan.h "defined VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME"
+fi
+
if enabled x86; then
case $target_os in
mingw32*|mingw64*|win32|win64|linux|cygwin*)
diff --git a/doc/APIchanges b/doc/APIchanges
index efe15ba4e0..1b37f58ca7 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@ libavutil: 2017-10-21

API changes, most recent first:

+2018-04-xx - xxxxxxxxxx - lavu 56.19.100 - hwcontext.h
+ Add AV_HWDEVICE_TYPE_VULKAN and implementation.
+
2018-05-xx - xxxxxxxxxx - lavf 58.15.100 - avformat.h
Add pmt_version field to AVProgram

diff --git a/libavutil/Makefile b/libavutil/Makefile
index d0632f16a6..9fb32bc5e2 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -42,6 +42,7 @@ HEADERS = adler32.h \
hwcontext_vaapi.h \
hwcontext_videotoolbox.h \
hwcontext_vdpau.h \
+ hwcontext_vulkan.h \
imgutils.h \
intfloat.h \
intreadwrite.h \
@@ -168,6 +169,7 @@ OBJS-$(CONFIG_QSV) += hwcontext_qsv.o
OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o
OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o
OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o
+OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o

OBJS += $(COMPAT_OBJS:%=../compat/%)

@@ -183,6 +185,7 @@ SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h
SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h
SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h
+SKIPHEADERS-$(CONFIG_VULKAN) += hwcontext_vulkan.h

TESTPROGS = adler32 \
aes \
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index f1e404ab20..ee2216c7c1 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -58,6 +58,9 @@ static const HWContextType * const hw_table[] = {
#endif
#if CONFIG_MEDIACODEC
&ff_hwcontext_type_mediacodec,
+#endif
+#if CONFIG_VULKAN
+ &ff_hwcontext_type_vulkan,
#endif
NULL,
};
@@ -73,6 +76,7 @@ static const char *const hw_type_names[] = {
[AV_HWDEVICE_TYPE_VDPAU] = "vdpau",
[AV_HWDEVICE_TYPE_VIDEOTOOLBOX] = "videotoolbox",
[AV_HWDEVICE_TYPE_MEDIACODEC] = "mediacodec",
+ [AV_HWDEVICE_TYPE_VULKAN] = "vulkan",
};

enum AVHWDeviceType av_hwdevice_find_type_by_name(const char *name)
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index f5a4b62387..f874af9f8f 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h
@@ -36,6 +36,7 @@ enum AVHWDeviceType {
AV_HWDEVICE_TYPE_DRM,
AV_HWDEVICE_TYPE_OPENCL,
AV_HWDEVICE_TYPE_MEDIACODEC,
+ AV_HWDEVICE_TYPE_VULKAN,
};

typedef struct AVHWDeviceInternal AVHWDeviceInternal;
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index 77dc47ddd6..dba0f39944 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -172,5 +172,6 @@ extern const HWContextType ff_hwcontext_type_vaapi;
extern const HWContextType ff_hwcontext_type_vdpau;
extern const HWContextType ff_hwcontext_type_videotoolbox;
extern const HWContextType ff_hwcontext_type_mediacodec;
+extern const HWContextType ff_hwcontext_type_vulkan;

#endif /* AVUTIL_HWCONTEXT_INTERNAL_H */
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
new file mode 100644
index 0000000000..db0a5b7e61
--- /dev/null
+++ b/libavutil/hwcontext_vulkan.c
@@ -0,0 +1,2013 @@
+/*
+ * Vulkan hwcontext
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "pixdesc.h"
+#include "avstring.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_vulkan.h"
+
+#if CONFIG_LIBDRM
+#include <unistd.h> /* lseek */
+#include <xf86drm.h>
+#include <drm_fourcc.h>
+#include "hwcontext_drm.h"
+#if CONFIG_VAAPI
+#include <va/va_drmcommon.h>
+#include "hwcontext_vaapi.h"
+#endif
+#endif
+
+typedef struct VulkanDevicePriv {
+ /* Properties */
+ VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceMemoryProperties mprops;
+
+ /* Debug callback */
+ VkDebugUtilsMessengerEXT debug_ctx;
+
+ /* Image uploading */
+ VkCommandPool cmd_pool;
+ VkCommandBuffer cmd_buf;
+ VkQueue cmd_queue;
+ VkFence cmd_fence;
+
+ /* Extensions */
+ uint64_t extensions;
+
+ /* Settings */
+ int use_linear_images;
+ int use_disjoint_images;
+} VulkanDevicePriv;
+
+#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
+ vkGetInstanceProcAddr(inst, #name)
+
+#define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
+ VK_IMAGE_USAGE_STORAGE_BIT | \
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT)
+
+#define ADD_VAL_TO_LIST(list, count, val) \
+ do { \
+ list = av_realloc_array(list, sizeof(*list), ++count); \
+ if (!list) { \
+ err = AVERROR(ENOMEM); \
+ goto end; \
+ } \
+ list[count - 1] = val; \
+ } while(0)
+
+static const VkFormat vk_format_map[AV_PIX_FMT_NB] = {
+ /* Gray */
+ [AV_PIX_FMT_GRAY8] = VK_FORMAT_R8_UNORM,
+ [AV_PIX_FMT_GRAY10] = VK_FORMAT_R10X6_UNORM_PACK16,
+ [AV_PIX_FMT_GRAY12] = VK_FORMAT_R12X4_UNORM_PACK16,
+ [AV_PIX_FMT_GRAY16] = VK_FORMAT_R16_UNORM,
+
+ /* Interleaved */
+ [AV_PIX_FMT_NV12] = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
+ [AV_PIX_FMT_P010] = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_P016] = VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,
+ [AV_PIX_FMT_NV16] = VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,
+ [AV_PIX_FMT_UYVY422] = VK_FORMAT_B16G16R16G16_422_UNORM,
+ [AV_PIX_FMT_YVYU422] = VK_FORMAT_G16B16G16R16_422_UNORM,
+
+ /* 420 */
+ [AV_PIX_FMT_YUV420P] = VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,
+ [AV_PIX_FMT_YUV420P16] = VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM,
+
+ /* 422 */
+ [AV_PIX_FMT_YUV422P] = VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,
+ [AV_PIX_FMT_YUV422P16] = VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM,
+
+ /* 444 */
+ [AV_PIX_FMT_YUV444P] = VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,
+ [AV_PIX_FMT_YUV444P16] = VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
+
+ /* RGB */
+ [AV_PIX_FMT_ABGR] = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ [AV_PIX_FMT_BGRA] = VK_FORMAT_B8G8R8A8_UNORM,
+ [AV_PIX_FMT_RGBA] = VK_FORMAT_R8G8B8A8_UNORM,
+ [AV_PIX_FMT_RGB24] = VK_FORMAT_R8G8B8_UNORM,
+ [AV_PIX_FMT_BGR24] = VK_FORMAT_B8G8R8_UNORM,
+ [AV_PIX_FMT_RGB48] = VK_FORMAT_R16G16B16_UNORM,
+ [AV_PIX_FMT_RGBA64] = VK_FORMAT_R16G16B16A16_UNORM,
+ [AV_PIX_FMT_RGB565] = VK_FORMAT_R5G6B5_UNORM_PACK16,
+ [AV_PIX_FMT_BGR565] = VK_FORMAT_B5G6R5_UNORM_PACK16,
+ [AV_PIX_FMT_BGR0] = VK_FORMAT_B8G8R8A8_UNORM,
+ [AV_PIX_FMT_0BGR] = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ [AV_PIX_FMT_RGB0] = VK_FORMAT_R8G8B8A8_UNORM,
+};
+
+enum VulkanExtensions {
+ EXT_DEDICATED_ALLOC = 1LL << 0, /* VK_KHR_dedicated_allocation */
+ EXT_IMAGE_FORMAT_LIST = 1LL << 1, /* VK_KHR_image_format_list */
+ EXT_EXTERNAL_MEMORY = 1LL << 2, /* VK_KHR_external_memory */
+ EXT_EXTERNAL_HOST_MEMORY = 1LL << 3, /* VK_EXT_external_memory_host */
+ EXT_EXTERNAL_FD_MEMORY = 1LL << 4, /* VK_KHR_external_memory_fd */
+ EXT_EXTERNAL_DMABUF_MEMORY = 1LL << 5, /* VK_EXT_external_memory_dma_buf */
+ EXT_DRM_MODIFIER_FLAGS = 1LL << 6, /* VK_EXT_image_drm_format_modifier */
+ EXT_YUV_IMAGES = 1LL << 7, /* VK_KHR_sampler_ycbcr_conversion */
+
+ EXT_OPTIONAL = 1LL << 62,
+ EXT_REQUIRED = 1LL << 63,
+};
+
+typedef struct VulkanOptExtension {
+ const char *name;
+ uint64_t flag;
+} VulkanOptExtension;
+
+VulkanOptExtension optional_instance_exts[] = {
+ { VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, EXT_EXTERNAL_MEMORY, },
+ { VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, EXT_REQUIRED },
+};
+
+VulkanOptExtension optional_device_exts[] = {
+ { VK_KHR_MAINTENANCE1_EXTENSION_NAME, EXT_REQUIRED },
+ { VK_KHR_MAINTENANCE2_EXTENSION_NAME, EXT_REQUIRED },
+ { VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, EXT_REQUIRED },
+ { VK_KHR_BIND_MEMORY_2_EXTENSION_NAME, EXT_REQUIRED },
+ { VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, EXT_REQUIRED },
+
+ { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, EXT_OPTIONAL, },
+
+ { VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, EXT_DEDICATED_ALLOC, },
+ { VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, EXT_IMAGE_FORMAT_LIST, },
+ { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, EXT_EXTERNAL_MEMORY, },
+ { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, EXT_EXTERNAL_HOST_MEMORY, },
+ { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, },
+ { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, EXT_YUV_IMAGES },
+#if HAVE_VULKAN_DRM_MOD
+ { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
+#else
+ { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
+#endif
+};
+
+VkFormat av_vkfmt_from_pixfmt(enum AVPixelFormat p)
+{
+ if ((p >= 0 && p < AV_PIX_FMT_NB) && vk_format_map[p])
+ return vk_format_map[p];
+ return VK_FORMAT_UNDEFINED;
+}
+
+static int vkfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
+ int linear)
+{
+ VkFormatFeatureFlags flags;
+ VkFormatProperties2 prop = {
+ .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+ };
+ VkFormat fmt = av_vkfmt_from_pixfmt(p);
+
+ if (fmt == VK_FORMAT_UNDEFINED)
+ return 0;
+
+ vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt, &prop);
+ flags = linear ? prop.formatProperties.linearTilingFeatures :
+ prop.formatProperties.optimalTilingFeatures;
+
+ return !!(flags & (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+ VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT |
+ VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_TRANSFER_DST_BIT));
+}
+
+/* Converts return values to strings */
+static const char *vk_ret2str(VkResult res)
+{
+#define CASE(VAL) case VAL: return #VAL
+ switch (res) {
+ CASE(VK_SUCCESS);
+ CASE(VK_NOT_READY);
+ CASE(VK_TIMEOUT);
+ CASE(VK_EVENT_SET);
+ CASE(VK_EVENT_RESET);
+ CASE(VK_INCOMPLETE);
+ CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
+ CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ CASE(VK_ERROR_INITIALIZATION_FAILED);
+ CASE(VK_ERROR_DEVICE_LOST);
+ CASE(VK_ERROR_MEMORY_MAP_FAILED);
+ CASE(VK_ERROR_LAYER_NOT_PRESENT);
+ CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
+ CASE(VK_ERROR_FEATURE_NOT_PRESENT);
+ CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
+ CASE(VK_ERROR_TOO_MANY_OBJECTS);
+ CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
+ CASE(VK_ERROR_FRAGMENTED_POOL);
+ CASE(VK_ERROR_SURFACE_LOST_KHR);
+ CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
+ CASE(VK_SUBOPTIMAL_KHR);
+ CASE(VK_ERROR_OUT_OF_DATE_KHR);
+ CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
+ CASE(VK_ERROR_VALIDATION_FAILED_EXT);
+ CASE(VK_ERROR_INVALID_SHADER_NV);
+ CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+ CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ CASE(VK_ERROR_NOT_PERMITTED_EXT);
+ default: return "Unknown error";
+ }
+#undef CASE
+}
+
+static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
+ VkDebugUtilsMessageTypeFlagsEXT messageType,
+ const VkDebugUtilsMessengerCallbackDataEXT *data,
+ void *priv)
+{
+ int l;
+ AVHWDeviceContext *ctx = priv;
+
+ switch (severity) {
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
+ default: l = AV_LOG_DEBUG; break;
+ };
+
+ av_log(ctx, l, "%s\n", data->pMessage);
+ for (int i = 0; i < data->cmdBufLabelCount; i++)
+ av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
+
+ return 0;
+}
+
+static int check_extensions(AVHWDeviceContext *ctx, int dev,
+ const char * const **dst, uint32_t *num, int debug)
+{
+ const char *tstr;
+ const char **extension_names = NULL;
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ int err = 0, found, extensions_found = 0;
+
+ const char *mod;
+ int optional_exts_num;
+ uint32_t sup_ext_count;
+ VkExtensionProperties *sup_ext;
+ VulkanOptExtension *optional_exts;
+
+ if (!dev) {
+ mod = "instance";
+ optional_exts = optional_instance_exts;
+ optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
+ vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
+ sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
+ if (!sup_ext)
+ return AVERROR(ENOMEM);
+ vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
+ } else {
+ mod = "device";
+ optional_exts = optional_device_exts;
+ optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
+ vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
+ &sup_ext_count, NULL);
+ sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
+ if (!sup_ext)
+ return AVERROR(ENOMEM);
+ vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
+ &sup_ext_count, sup_ext);
+ }
+
+ for (int i = 0; i < optional_exts_num; i++) {
+ int req = optional_exts[i].flag & EXT_REQUIRED;
+ tstr = optional_exts[i].name;
+
+ found = 0;
+ for (int j = 0; j < sup_ext_count; j++) {
+ if (!strcmp(tstr, sup_ext[j].extensionName)) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE;
+ av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr);
+ if (req) {
+ err = AVERROR(EINVAL);
+ goto end;
+ }
+ continue;
+ }
+ if (!req)
+ p->extensions |= optional_exts[i].flag;
+
+ av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
+
+ ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
+ }
+
+ if (debug && !dev) {
+ tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
+ found = 0;
+ for (int j = 0; j < sup_ext_count; j++) {
+ if (!strcmp(tstr, sup_ext[j].extensionName)) {
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
+ tstr);
+ err = AVERROR(EINVAL);
+ goto end;
+ }
+ }
+
+ *dst = extension_names;
+ *num = extensions_found;
+
+end:
+ av_free(sup_ext);
+ return err;
+}
+
+/* Creates a VkInstance */
+static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
+{
+ int err = 0;
+ VkResult ret;
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
+ const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
+ VkApplicationInfo application_info = {
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pEngineName = "libavutil",
+ .apiVersion = VK_API_VERSION_1_1,
+ .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
+ LIBAVUTIL_VERSION_MINOR,
+ LIBAVUTIL_VERSION_MICRO),
+ };
+ VkInstanceCreateInfo inst_props = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pApplicationInfo = &application_info,
+ };
+
+ /* Check for present/missing extensions */
+ err = check_extensions(ctx, 0, &inst_props.ppEnabledExtensionNames,
+ &inst_props.enabledExtensionCount, debug_mode);
+ if (err < 0)
+ return err;
+
+ if (debug_mode) {
+ static const char *layers[] = { "VK_LAYER_LUNARG_standard_validation" };
+ inst_props.ppEnabledLayerNames = layers;
+ inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
+ }
+
+ /* Try to create the instance */
+ ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
+
+ /* Free used memory */
+ av_free((void *)inst_props.ppEnabledExtensionNames);
+
+ /* Check for errors */
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ if (debug_mode) {
+ VkDebugUtilsMessengerCreateInfoEXT dbg = {
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
+ .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
+ .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+ .pfnUserCallback = vk_dbg_callback,
+ .pUserData = ctx,
+ };
+ VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
+
+ pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
+ hwctx->alloc, &p->debug_ctx);
+ }
+
+ return 0;
+}
+
+typedef struct VulkanDeviceSelection {
+ const char *name; /* Will use this first unless NULL */
+ uint32_t pci_device; /* Will use this second unless 0x0 */
+ uint32_t vendor_id; /* Last resort to find something deterministic */
+ int index; /* Finally fall back to index */
+} VulkanDeviceSelection;
+
+/* Finds a device */
+static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
+{
+ int err = 0;
+ uint32_t num;
+ VkResult ret;
+ VkPhysicalDevice *devices = NULL;
+ VkPhysicalDeviceProperties *prop = NULL;
+ VkPhysicalDevice choice = VK_NULL_HANDLE;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ static const char *dev_types[] = {
+ [VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU] = "integrated",
+ [VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU] = "discrete",
+ [VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU] = "virtual",
+ [VK_PHYSICAL_DEVICE_TYPE_CPU] = "software",
+ [VK_PHYSICAL_DEVICE_TYPE_OTHER] = "unknown",
+ };
+
+ ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
+ if (ret != VK_SUCCESS || !num) {
+ av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
+ if (!devices)
+ return AVERROR(ENOMEM);
+
+ ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto end;
+ }
+
+ prop = av_malloc_array(num, sizeof(VkPhysicalDeviceProperties));
+ if (!prop) {
+ err = AVERROR(ENOMEM);
+ goto end;
+ }
+
+ av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
+ for (int i = 0; i < num; i++) {
+ vkGetPhysicalDeviceProperties(devices[i], &prop[i]);
+ av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i, prop[i].deviceName,
+ dev_types[prop[i].deviceType], prop[i].deviceID);
+ }
+
+ if (select->name) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
+ for (int i = 0; i < num; i++) {
+ if (strcmp(select->name, prop[i].deviceName) == 0) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
+ select->name);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ } else if (select->pci_device) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
+ for (int i = 0; i < num; i++) {
+ if (select->pci_device == prop[i].deviceID) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
+ select->pci_device);
+ err = AVERROR(EINVAL);
+ goto end;
+ } else if (select->vendor_id) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
+ for (int i = 0; i < num; i++) {
+ if (select->vendor_id == prop[i].vendorID) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
+ select->vendor_id);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ } else {
+ if (select->index < num) {
+ choice = devices[select->index];
+ goto end;
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
+ select->index);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ }
+
+end:
+ av_free(devices);
+ av_free(prop);
+ hwctx->phys_dev = choice;
+
+ return err;
+}
+
+static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
+{
+ uint32_t num;
+ VkQueueFamilyProperties *qs = NULL;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ int graph_index = -1, comp_index = -1, tx_index = -1;
+ VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
+
+ /* First get the number of queue families */
+ vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
+ if (!num) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ /* Then allocate memory */
+ qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
+ if (!qs)
+ return AVERROR(ENOMEM);
+
+ /* Finally retrieve the queue families */
+ vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
+
+#define SEARCH_FLAGS(expr, out) \
+ for (int i = 0; i < num; i++) { \
+ const VkQueueFlagBits flags = qs[i].queueFlags; \
+ if (expr) { \
+ out = i; \
+ break; \
+ } \
+ }
+
+ if (!hwctx->queue_family_index)
+ SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
+
+ if (!hwctx->queue_family_comp_index)
+ SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
+ comp_index)
+
+ if (!hwctx->queue_family_tx_index)
+ SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
+ (i != comp_index), tx_index)
+
+#undef SEARCH_FLAGS
+#define QF_FLAGS(flags) \
+ ((flags) & VK_QUEUE_GRAPHICS_BIT ) ? "(graphics) " : "", \
+ ((flags) & VK_QUEUE_COMPUTE_BIT ) ? "(compute) " : "", \
+ ((flags) & VK_QUEUE_TRANSFER_BIT ) ? "(transfer) " : "", \
+ ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""
+
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, "
+ "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags));
+
+ hwctx->queue_family_index = graph_index;
+ hwctx->queue_family_tx_index = graph_index;
+ hwctx->queue_family_comp_index = graph_index;
+
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index;
+
+ if (comp_index != -1) {
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, "
+ "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags));
+ hwctx->queue_family_tx_index = comp_index;
+ hwctx->queue_family_comp_index = comp_index;
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index;
+ }
+
+ if (tx_index != -1) {
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, "
+ "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags));
+ hwctx->queue_family_tx_index = tx_index;
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index;
+ }
+
+#undef PRINT_QF_FLAGS
+
+ av_free(qs);
+
+ return 0;
+}
+
+static int create_exec_ctx(AVHWDeviceContext *ctx)
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ VkCommandPoolCreateInfo cqueue_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = hwctx->queue_family_tx_index,
+ };
+ VkCommandBufferAllocateInfo cbuf_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+ VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
+
+ ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
+ hwctx->alloc, &p->cmd_pool);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ cbuf_create.commandPool = p->cmd_pool;
+
+ ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &p->cmd_buf);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
+ hwctx->alloc, &p->cmd_fence);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ vkGetDeviceQueue(hwctx->act_dev, hwctx->queue_family_tx_index, 0,
+ &p->cmd_queue);
+
+ return 0;
+}
+
+static void free_exec_ctx(AVHWDeviceContext *ctx)
+{
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ if (!p)
+ return;
+
+ if (p->cmd_fence != VK_NULL_HANDLE)
+ vkDestroyFence(hwctx->act_dev, p->cmd_fence, hwctx->alloc);
+ if (p->cmd_buf != VK_NULL_HANDLE)
+ vkFreeCommandBuffers(hwctx->act_dev, p->cmd_pool, 1, &p->cmd_buf);
+ if (p->cmd_pool != VK_NULL_HANDLE)
+ vkDestroyCommandPool(hwctx->act_dev, p->cmd_pool, hwctx->alloc);
+}
+
+static void vulkan_device_free(AVHWDeviceContext *ctx)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+
+ free_exec_ctx(ctx);
+
+ vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
+
+ if (p && p->debug_ctx != VK_NULL_HANDLE) {
+ VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
+ pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
+ hwctx->alloc);
+ }
+
+ vkDestroyInstance(hwctx->inst, hwctx->alloc);
+}
+
+static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
+ VulkanDeviceSelection *dev_select,
+ AVDictionary *opts, int flags)
+{
+ int err = 0;
+ VkResult ret;
+ AVDictionaryEntry *opt_d;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkDeviceQueueCreateInfo queue_create_info[3] = {
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ };
+
+ VkDeviceCreateInfo dev_info = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .pQueueCreateInfos = queue_create_info,
+ .queueCreateInfoCount = 0,
+ };
+
+ VulkanDevicePriv *p = av_mallocz(sizeof(*p));
+ if (!p) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ ctx->internal->priv = p;
+ ctx->free = vulkan_device_free;
+
+ /* Create an instance if not given one */
+ if (!hwctx->inst && (err = create_instance(ctx, opts)))
+ goto fail;
+
+ /* Find a device (if not given one) */
+ if (!hwctx->phys_dev && (err = find_device(ctx, dev_select)))
+ goto fail;
+
+ vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
+ av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
+ av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
+ av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n",
+ p->props.limits.optimalBufferCopyOffsetAlignment);
+ av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n",
+ p->props.limits.optimalBufferCopyRowPitchAlignment);
+ av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n",
+ p->props.limits.minMemoryMapAlignment);
+
+ /* Search queue family */
+ if ((err = search_queue_families(ctx, &dev_info)))
+ goto fail;
+
+ if (!hwctx->act_dev) {
+ err = check_extensions(ctx, 1, &dev_info.ppEnabledExtensionNames,
+ &dev_info.enabledExtensionCount, 0);
+ if (err)
+ goto fail;
+
+ ret = vkCreateDevice(hwctx->phys_dev, &dev_info,
+ hwctx->alloc, &hwctx->act_dev);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ av_free((void *)dev_info.ppEnabledExtensionNames);
+ }
+
+ /* Tiled images setting, use them by default */
+ opt_d = av_dict_get(opts, "linear_images", NULL, 0);
+ if (opt_d)
+ p->use_linear_images = strtol(opt_d->value, NULL, 10);
+
+ /* Disjoint images setting, don't use them by default */
+ opt_d = av_dict_get(opts, "disjoint_images", NULL, 0);
+ if (opt_d)
+ p->use_disjoint_images = strtol(opt_d->value, NULL, 10);
+
+ return 0;
+
+fail:
+ av_freep(&ctx->internal->priv);
+ return err;
+}
+
+static int vulkan_device_init(AVHWDeviceContext *ctx)
+{
+ int err;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ /* Create exec context - if there's something invalid this will error out */
+ err = create_exec_ctx(ctx);
+ if (err)
+ return err;
+
+ /* Get device capabilities */
+ vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
+
+ return 0;
+}
+
+static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
+ AVDictionary *opts, int flags)
+{
+ VulkanDeviceSelection dev_select = { 0 };
+ if (device && device[0]) {
+ if (av_isdigit(device[0]))
+ dev_select.index = strtol(device, NULL, 10);
+ else
+ dev_select.name = device;
+ }
+
+ return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
+}
+
+static int vulkan_device_derive(AVHWDeviceContext *ctx,
+ AVHWDeviceContext *src_ctx, int flags)
+{
+ VulkanDeviceSelection dev_select = { 0 };
+
+ switch(src_ctx->type) {
+#if CONFIG_LIBDRM
+#if CONFIG_VAAPI
+ case AV_HWDEVICE_TYPE_VAAPI: {
+ AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
+ const char *vendor = vaQueryVendorString(src_hwctx->display);
+ if (!vendor) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get device info from vaapi!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ if (strstr(vendor, "Intel"))
+ dev_select.vendor_id = 0x8086;
+ if (strstr(vendor, "AMD"))
+ dev_select.vendor_id = 0x1002;
+
+ return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
+ }
+#endif
+ case AV_HWDEVICE_TYPE_DRM: {
+ AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
+
+ drmDevice *drm_dev_info;
+ int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get device info from drm fd!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
+
+ return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
+ }
+#endif
+ default:
+ return AVERROR(ENOSYS);
+ }
+}
+
+static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
+ const void *hwconfig,
+ AVHWFramesConstraints *constraints)
+{
+ int count = 0;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
+ count += vkfmt_is_supported(hwctx, i, p->use_linear_images);
+
+ constraints->valid_sw_formats = av_malloc_array(count + 1,
+ sizeof(enum AVPixelFormat));
+ if (!constraints->valid_sw_formats)
+ return AVERROR(ENOMEM);
+
+ count = 0;
+ for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
+ if (vkfmt_is_supported(hwctx, i, p->use_linear_images))
+ constraints->valid_sw_formats[count++] = i;
+ constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
+
+ constraints->min_width = 0;
+ constraints->min_height = 0;
+ constraints->max_width = p->props.limits.maxImageDimension2D;
+ constraints->max_height = p->props.limits.maxImageDimension2D;
+
+ constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
+ if (!constraints->valid_hw_formats)
+ return AVERROR(ENOMEM);
+
+ constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
+ constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
+
+ return 0;
+}
+
+static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+{
+ VkResult ret;
+ int index = -1;
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = alloc_extension,
+ };
+
+ /* Align if we need to */
+ if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment);
+
+ alloc_info.allocationSize = req->size;
+
+ /* The vulkan spec requires memory types to be sorted in the "optimal"
+ * order, so the first matching type we find will be the best/fastest one */
+ for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
+ /* The memory type must be supported by the requirements (bitfield) */
+ if (!(req->memoryTypeBits & (1 << i)))
+ continue;
+
+ /* The memory type flags must include our properties */
+ if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ continue;
+
+ /* Found a suitable memory type */
+ index = i;
+ break;
+ }
+
+ if (index < 0) {
+ av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ req_flags);
+ return AVERROR(EINVAL);
+ }
+
+ alloc_info.memoryTypeIndex = index;
+
+ ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
+ dev_hwctx->alloc, mem);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR(ENOMEM);
+ }
+
+ *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
+
+ return 0;
+}
+
+static void vulkan_frame_free(void *opaque, uint8_t *data)
+{
+ AVVkFrame *f = (AVVkFrame *)data;
+ AVVulkanDeviceContext *hwctx = opaque;
+
+ if (!f)
+ return;
+
+ vkDestroyImage(hwctx->act_dev, f->img, hwctx->alloc);
+ for (int i = 0; i < f->mem_count; i++)
+ vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+
+ av_free(f);
+}
+
+static int alloc_bind_mem(AVHWDeviceContext *ctx, AVVkFrame *f,
+ void *alloc_pnext, size_t alloc_pnext_stride)
+{
+ int err;
+ VkResult ret;
+ VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
+ VkBindImagePlaneMemoryInfo bind_p_info[AV_NUM_DATA_POINTERS] = { { 0 } };
+
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ VK_LOAD_PFN(hwctx->inst, vkBindImageMemory2KHR);
+ VK_LOAD_PFN(hwctx->inst, vkGetImageMemoryRequirements2KHR);
+
+ for (int i = 0; i < f->mem_count; i++) {
+ int use_ded_mem;
+ VkImagePlaneMemoryRequirementsInfo plane_req = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO,
+ .planeAspect = i == 0 ? VK_IMAGE_ASPECT_PLANE_0_BIT :
+ i == 1 ? VK_IMAGE_ASPECT_PLANE_1_BIT :
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ };
+ VkImageMemoryRequirementsInfo2 req_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
+ .pNext = f->mem_count > 1 ? &plane_req : NULL,
+ .image = f->img,
+ };
+ VkMemoryDedicatedAllocateInfo ded_alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
+ };
+ VkMemoryDedicatedRequirements ded_req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ };
+ VkMemoryRequirements2 req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ .pNext = (p->extensions & EXT_DEDICATED_ALLOC) ? &ded_req : NULL,
+ };
+
+ pfn_vkGetImageMemoryRequirements2KHR(hwctx->act_dev, &req_desc, &req);
+
+ /* In case the implementation prefers/requires dedicated allocation */
+ use_ded_mem = ded_req.prefersDedicatedAllocation |
+ ded_req.requiresDedicatedAllocation;
+ if (use_ded_mem)
+ ded_alloc.image = f->img;
+
+ /* Allocate memory */
+ if ((err = alloc_mem(ctx, &req.memoryRequirements,
+ f->tiling == VK_IMAGE_TILING_LINEAR ?
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ &f->flags, &f->mem[i])))
+ return err;
+
+ if (f->mem_count > 1) {
+ bind_p_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
+ bind_p_info[i].planeAspect = plane_req.planeAspect;
+ bind_info[i].pNext = &bind_p_info[i];
+ }
+
+ bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
+ bind_info[i].image = f->img;
+ bind_info[i].memory = f->mem[i];
+ }
+
+ /* Bind the allocated memory to the image */
+ ret = pfn_vkBindImageMemory2KHR(hwctx->act_dev, f->mem_count, bind_info);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
+ VkImageTiling tiling, VkImageUsageFlagBits usage,
+ int disjoint, void *create_pnext, void *alloc_pnext,
+ size_t alloc_pnext_stride)
+{
+ int err;
+ VkResult ret;
+ AVHWDeviceContext *ctx = hwfc->device_ctx;
+ enum AVPixelFormat format = hwfc->sw_format;
+ VkFormat img_fmt = av_vkfmt_from_pixfmt(format);
+ const int planes = av_pix_fmt_count_planes(format);
+
+ /* Allocated */
+ AVVkFrame *f = NULL;
+
+ /* Contexts */
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ /* Image properties */
+ VkFormat possible_fmts[2];
+ VkImageFormatListCreateInfoKHR img_fmt_list = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
+ .pNext = create_pnext,
+ .pViewFormats = possible_fmts,
+ .viewFormatCount = 1,
+ };
+ VkImageCreateInfo image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = create_pnext,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = img_fmt,
+ .extent.width = hwfc->width,
+ .extent.height = hwfc->height,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT |
+ (disjoint ? VK_IMAGE_CREATE_DISJOINT_BIT : 0),
+ .tiling = tiling,
+ .initialLayout = tiling == VK_IMAGE_TILING_LINEAR ?
+ VK_IMAGE_LAYOUT_PREINITIALIZED :
+ VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ };
+
+ if (img_fmt == VK_FORMAT_UNDEFINED) {
+ av_log(ctx, AV_LOG_ERROR, "Unsupported image format!\n");
+ return AVERROR(EINVAL);
+ }
+
+ f = av_mallocz(sizeof(*f));
+ if (!f) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ /* Needed */
+ f->flags = 0;
+ f->mem_count = disjoint ? planes : 1;
+ f->tiling = image_create_info.tiling;
+ f->layout = image_create_info.initialLayout;
+ f->access = 0;
+
+ possible_fmts[0] = image_create_info.format;
+ /* Mark the formats that a VkImageView can be made of if supported */
+ if ((planes > 1) && (p->extensions & EXT_IMAGE_FORMAT_LIST)) {
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
+ switch (desc->comp[0].depth) {
+ case 8: possible_fmts[1] = VK_FORMAT_R8_UNORM; break;
+ case 10: possible_fmts[1] = VK_FORMAT_R10X6_UNORM_PACK16; break;
+ case 12: possible_fmts[1] = VK_FORMAT_R12X4_UNORM_PACK16; break;
+ case 16: possible_fmts[1] = VK_FORMAT_R16_UNORM; break;
+ }
+ img_fmt_list.viewFormatCount++;
+ image_create_info.pNext = &img_fmt_list;
+ }
+
+ /* Create the image */
+ ret = vkCreateImage(hwctx->act_dev, &image_create_info,
+ hwctx->alloc, &f->img);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ if ((err = alloc_bind_mem(ctx, f, alloc_pnext, alloc_pnext_stride)))
+ goto fail;
+
+ *frame = f;
+ return 0;
+
+fail:
+ vulkan_frame_free(hwctx, (uint8_t *)f);
+ return err;
+}
+
+/* Checks if an export flag is enabled, and if it is ORs it with *iexp */
+static void try_export_flags(AVHWFramesContext *hwfc,
+ VkExternalMemoryHandleTypeFlagBits *iexp,
+ VkExternalMemoryHandleTypeFlagBits exp)
+{
+ VkResult ret;
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
+ VK_LOAD_PFN(dev_hwctx->inst, vkGetPhysicalDeviceImageFormatProperties2);
+ VkImageFormatProperties2 props = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+ };
+ VkPhysicalDeviceExternalImageFormatInfo enext = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
+ .handleType = exp,
+ };
+ VkPhysicalDeviceImageFormatInfo2 pinfo = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+ .pNext = &enext,
+ .format = av_vkfmt_from_pixfmt(hwfc->sw_format),
+ .type = VK_IMAGE_TYPE_2D,
+ .tiling = hwctx->tiling,
+ .usage = hwctx->usage,
+ .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT |
+ (hwctx->disjoint ? VK_IMAGE_CREATE_DISJOINT_BIT : 0),
+ };
+ ret = pfn_vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
+ &pinfo, &props);
+ if (ret == VK_SUCCESS)
+ *iexp |= exp;
+}
+
+static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
+{
+ int err;
+ AVVkFrame *f;
+ AVBufferRef *avbuf = NULL;
+ AVHWFramesContext *hwfc = opaque;
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ VkExportMemoryAllocateInfo einfo[AV_NUM_DATA_POINTERS];
+ VkExternalMemoryHandleTypeFlags e = 0x0;
+
+ try_export_flags(hwfc, &e, VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+
+ for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
+ einfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
+ einfo[i].pNext = hwctx->alloc_pnext[i];
+ einfo[i].handleTypes = e;
+ }
+
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
+ hwctx->disjoint, hwctx->create_pnext,
+ einfo, sizeof(*einfo));
+ if (err)
+ return NULL;
+
+ avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
+ vulkan_frame_free, hwfc->device_ctx->hwctx, 0);
+ if (!avbuf) {
+ vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
+ return NULL;
+ }
+
+ return avbuf;
+}
+
+static int vulkan_frames_init(AVHWFramesContext *hwfc)
+{
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+
+ if (hwfc->pool)
+ return 0;
+
+ /* Default pool flags */
+ hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+
+ hwctx->usage |= DEFAULT_USAGE_FLAGS;
+
+ hwctx->disjoint = hwctx->disjoint ? hwctx->disjoint : p->use_disjoint_images;
+
+ hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
+ hwfc, vulkan_pool_alloc,
+ NULL);
+ if (!hwfc->internal->pool_internal)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
+{
+ frame->buf[0] = av_buffer_pool_get(hwfc->pool);
+ if (!frame->buf[0])
+ return AVERROR(ENOMEM);
+
+ frame->data[0] = frame->buf[0]->data;
+ frame->format = AV_PIX_FMT_VULKAN;
+ frame->width = hwfc->width;
+ frame->height = hwfc->height;
+
+ return 0;
+}
+
+static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
+ enum AVHWFrameTransferDirection dir,
+ enum AVPixelFormat **formats)
+{
+ int count = 0;
+ enum AVPixelFormat *pix_fmts = NULL;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
+
+ /* All formats can be transferred to themselves */
+ count++;
+
+ /* All formats with a luma can have only that channel transferred */
+ count += !(desc->flags & AV_PIX_FMT_FLAG_RGB);
+
+ pix_fmts = av_malloc((count + 1) * sizeof(*pix_fmts));
+ if (!pix_fmts)
+ return AVERROR(ENOMEM);
+
+ count = 0;
+ pix_fmts[count++] = hwfc->sw_format;
+ if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+ switch (desc->comp[0].depth) {
+ case 8: pix_fmts[count++] = AV_PIX_FMT_GRAY8; break;
+ case 10: pix_fmts[count++] = AV_PIX_FMT_GRAY10; break;
+ case 12: pix_fmts[count++] = AV_PIX_FMT_GRAY12; break;
+ case 16: pix_fmts[count++] = AV_PIX_FMT_GRAY16; break;
+ }
+ }
+ pix_fmts[count++] = AV_PIX_FMT_NONE;
+
+ *formats = pix_fmts;
+
+ return 0;
+}
+
+typedef struct VulkanMapping {
+ AVVkFrame *frame;
+ int flags;
+} VulkanMapping;
+
+static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
+{
+ VulkanMapping *map = hwmap->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+
+ /* Check if buffer needs flushing */
+ if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
+ !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkResult ret;
+ VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
+
+ for (int i = 0; i < map->frame->mem_count; i++) {
+ flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ flush_ranges[i].memory = map->frame->mem[i];
+ flush_ranges[i].size = VK_WHOLE_SIZE;
+ }
+
+ ret = vkFlushMappedMemoryRanges(hwctx->act_dev, map->frame->mem_count,
+ flush_ranges);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ vk_ret2str(ret));
+ }
+ }
+
+ for (int i = 0; i < map->frame->mem_count; i++)
+ vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
+
+ av_free(map);
+}
+
+static int vulkan_map_frame(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ int err;
+ VkResult ret;
+ AVVkFrame *f = (AVVkFrame *)src->data[0];
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+ const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
+
+ VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
+ if (!map)
+ return AVERROR(EINVAL);
+
+ if (src->format != AV_PIX_FMT_VULKAN) {
+ av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
+ av_get_pix_fmt_name(src->format));
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
+ !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
+ av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
+ "and linear!\n");
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ dst->width = src->width;
+ dst->height = src->height;
+
+ for (int i = 0; i < f->mem_count; i++) {
+ ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
+ VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ }
+
+ /* For non disjoint memory duplicate them */
+ if (f->mem_count == 1)
+ for (int i = 1; i < planes; i++)
+ dst->data[i] = dst->data[0];
+
+ /* Check if the memory contents matter */
+ if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
+ !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
+ for (int i = 0; i < f->mem_count; i++) {
+ map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ map_mem_ranges[i].size = VK_WHOLE_SIZE;
+ map_mem_ranges[i].memory = f->mem[i];
+ }
+
+ ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, f->mem_count,
+ map_mem_ranges);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ }
+
+ for (int i = 0; i < planes; i++) {
+ VkImageSubresource sub = {
+ .aspectMask = planes < 2 ? VK_IMAGE_ASPECT_COLOR_BIT :
+ i == 0 ? VK_IMAGE_ASPECT_PLANE_0_BIT :
+ i == 1 ? VK_IMAGE_ASPECT_PLANE_1_BIT :
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ };
+ VkSubresourceLayout layout;
+ vkGetImageSubresourceLayout(hwctx->act_dev, f->img, &sub, &layout);
+ dst->data[i] += layout.offset;
+ dst->linesize[i] = layout.rowPitch;
+ }
+
+ map->frame = f;
+ map->flags = flags;
+
+ err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+ &vulkan_unmap_frame, map);
+ if (err < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ for (int i = 0; i < f->mem_count; i++)
+ vkUnmapMemory(hwctx->act_dev, f->mem[i]);
+
+ av_free(map);
+ return err;
+}
+
+#if CONFIG_LIBDRM
+static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
+{
+ VulkanMapping *map = hwmap->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+
+ vkDestroyImage(hwctx->act_dev, map->frame->img, hwctx->alloc);
+ for (int i = 0; i < map->frame->mem_count; i++)
+ vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
+
+ av_freep(&map->frame);
+}
+
+static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f,
+ AVDRMFrameDescriptor *desc)
+{
+ int err = 0;
+
+ /* Destination frame */
+#if HAVE_VULKAN_DRM_MOD
+ uint64_t modifier_buf[AV_NUM_DATA_POINTERS];
+ VkImageDrmFormatModifierListCreateInfoEXT drm_mod = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
+ };
+#endif
+ VkExternalMemoryImageCreateInfo ext_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+#if HAVE_VULKAN_DRM_MOD
+ .pNext = &drm_mod,
+#endif
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+ };
+ VkImportMemoryFdInfoKHR import_desc[AV_NUM_DATA_POINTERS];
+
+ if ((desc->nb_objects > 1) &&
+ (desc->nb_objects != av_pix_fmt_count_planes(hwfc->format))) {
+ av_log(hwfc, AV_LOG_ERROR, "Number of DRM objects doesn't match "
+ "plane count!\n");
+ return AVERROR(EINVAL);
+ }
+
+ for (int i = 0; i < desc->nb_objects; i++) {
+ import_desc[i].sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR;
+ import_desc[i].pNext = NULL;
+ import_desc[i].handleType = ext_info.handleTypes;
+ import_desc[i].fd = desc->objects[i].fd;
+#if HAVE_VULKAN_DRM_MOD
+ modifier_buf[i] = desc->objects[i].format_modifier;
+#endif
+ }
+#if HAVE_VULKAN_DRM_MOD
+ drm_mod.pDrmFormatModifiers = modifier_buf;
+ drm_mod.drmFormatModifierCount = desc->nb_objects;
+#endif
+
+ err = create_frame(hwfc, f,
+#if HAVE_VULKAN_DRM_MOD
+ VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
+#else
+ desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
+#endif
+ DEFAULT_USAGE_FLAGS, desc->nb_objects > 1, &ext_info,
+ import_desc, sizeof(*import_desc));
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ int err = 0;
+ AVVkFrame *f;
+ VulkanMapping *map = NULL;
+
+ err = vulkan_map_from_drm_frame_desc(hwfc, &f,
+ (AVDRMFrameDescriptor *)src->data[0]);
+ if (err)
+ goto fail;
+
+ /* The unmapping function will free this */
+ dst->data[0] = (uint8_t *)f;
+ dst->width = src->width;
+ dst->height = src->height;
+
+ map = av_mallocz(sizeof(VulkanMapping));
+ if (!map)
+ goto fail;
+
+ map->frame = f;
+ map->flags = flags;
+
+ err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
+ &vulkan_unmap_from, map);
+ if (err < 0)
+ goto fail;
+
+ av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
+
+ return 0;
+
+fail:
+ vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
+ av_free(map);
+ return err;
+}
+
+#if CONFIG_VAAPI
+static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
+ AVFrame *dst, const AVFrame *src,
+ int flags)
+{
+ int err;
+ AVFrame *tmp = av_frame_alloc();
+ if (!tmp)
+ return AVERROR(ENOMEM);
+
+ tmp->format = AV_PIX_FMT_DRM_PRIME;
+
+ err = av_hwframe_map(tmp, src, flags);
+ if (err < 0)
+ goto fail;
+
+ err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
+ if (err < 0)
+ goto fail;
+
+ err = ff_hwframe_map_replace(dst, src);
+
+fail:
+ av_frame_free(&tmp);
+ return err;
+}
+#endif
+#endif
+
+static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+
+ if (!(p->extensions & EXT_EXTERNAL_MEMORY)) {
+ av_log(hwfc, AV_LOG_ERROR, "Cannot import any external memory, "
+ "VK_KHR_external_memory is unsupported!\n");
+ return AVERROR(ENOSYS);
+ }
+
+ switch (src->format) {
+#if CONFIG_LIBDRM
+#if CONFIG_VAAPI
+ case AV_PIX_FMT_VAAPI:
+ if (p->extensions & EXT_DRM_MODIFIER_FLAGS)
+ return vulkan_map_from_vaapi(hwfc, dst, src, flags);
+#endif
+ case AV_PIX_FMT_DRM_PRIME:
+ if (p->extensions & EXT_DRM_MODIFIER_FLAGS)
+ return vulkan_map_from_drm(hwfc, dst, src, flags);
+#endif
+ default:
+ return AVERROR(ENOSYS);
+ }
+}
+
+typedef struct ImageBuffer {
+ VkBuffer buf;
+ VkDeviceMemory mem;
+ VkMemoryPropertyFlagBits flags;
+} ImageBuffer;
+
+static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags,
+ void *create_pnext, void *alloc_pnext)
+{
+ int err;
+ VkResult ret;
+ VkMemoryRequirements req;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = create_pnext,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host visible
+ but should be ok */
+ };
+
+ ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
+
+ err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem);
+ if (err)
+ return err;
+
+ ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
+{
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ if (!buf)
+ return;
+
+ vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
+ vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
+}
+
+static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate)
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
+ int invalidate_count = 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ if (!invalidate)
+ return 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ invalidate_ctx[invalidate_count++] = ival_buf;
+ }
+
+ if (invalidate_count) {
+ ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
+ invalidate_ctx);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ return 0;
+}
+
+static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf,
+ int nb_buffers, int flush)
+{
+ int err = 0;
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
+ int flush_count = 0;
+
+ if (flush) {
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ flush_ctx[flush_count++] = flush_buf;
+ }
+ }
+
+ if (flush_count) {
+ ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ }
+ }
+
+ for (int i = 0; i < nb_buffers; i++)
+ vkUnmapMemory(hwctx->act_dev, buf[i].mem);
+
+ return err;
+}
+
+static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
+ ImageBuffer *buffer, const int *stride, int w,
+ int h, enum AVPixelFormat pix_fmt, int to_buf)
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *s = ctx->internal->priv;
+
+ const int planes = av_pix_fmt_count_planes(pix_fmt);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->cmd_buf,
+ };
+
+ vkBeginCommandBuffer(s->cmd_buf, &cmd_start);
+
+ { /* Change the image layout to something more optimal for transfers */
+ VkImageMemoryBarrier bar = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = frame->layout,
+ .newLayout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = frame->img,
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ };
+
+ if (planes == 1) {
+ bar.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ } else {
+ bar.subresourceRange.aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT;
+ bar.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_PLANE_1_BIT;
+ if (planes > 2)
+ bar.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_PLANE_2_BIT;
+ }
+
+ vkCmdPipelineBarrier(s->cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, 0, NULL, 0, NULL, 1, &bar);
+
+ /* Update to the new layout */
+ frame->layout = bar.newLayout;
+ frame->access = bar.dstAccessMask;
+ }
+
+ /* Schedule a copy for each plane */
+ for (int i = 0; i < planes; i++) {
+ VkImageSubresourceLayers sub = {
+ .aspectMask = planes < 2 ? VK_IMAGE_ASPECT_COLOR_BIT :
+ i == 0 ? VK_IMAGE_ASPECT_PLANE_0_BIT :
+ i == 1 ? VK_IMAGE_ASPECT_PLANE_1_BIT :
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ .layerCount = 1,
+ };
+ const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
+ const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ VkBufferImageCopy buf_reg = {
+ .bufferOffset = 0,
+ /* Buffer stride isn't in bytes, it's in samples, the implementation
+ * uses the image's VkFormat to know how many bytes per sample
+ * the buffer has. So we have to convert by dividing. Stupid. */
+ .bufferRowLength = stride[i] / desc->comp[i].step,
+ .bufferImageHeight = p_h,
+ .imageSubresource = sub,
+ .imageOffset = { 0 },
+ .imageExtent = { p_w, p_h, 1, },
+ };
+ if (to_buf)
+ vkCmdCopyImageToBuffer(s->cmd_buf, frame->img, frame->layout,
+ buffer[i].buf, 1, &buf_reg);
+ else
+ vkCmdCopyBufferToImage(s->cmd_buf, buffer[i].buf, frame->img,
+ frame->layout, 1, &buf_reg);
+ }
+
+ vkEndCommandBuffer(s->cmd_buf);
+
+ ret = vkQueueSubmit(s->cmd_queue, 1, &s_info, s->cmd_fence);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(hwctx->act_dev, 1, &s->cmd_fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(hwctx->act_dev, 1, &s->cmd_fence);
+ }
+
+ return 0;
+}
+
+static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src)
+{
+ int err = 0;
+ AVFrame *map = NULL;
+ ImageBuffer buf[3] = { { 0 } };
+ AVVkFrame *f = (AVVkFrame *)dst->data[0];
+ AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
+ VulkanDevicePriv *p = dev_ctx->internal->priv;
+ const int planes = av_pix_fmt_count_planes(src->format);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(src->format);
+ int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY;
+
+ if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
+ av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (src->width > hwfc->width || src->height > hwfc->height)
+ return AVERROR(EINVAL);
+
+ /* Path one - image is host visible and linear */
+ if (f->tiling == VK_IMAGE_TILING_LINEAR &&
+ f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+ map = av_frame_alloc();
+ if (!map)
+ return AVERROR(ENOMEM);
+ map->format = src->format;
+
+ err = vulkan_map_frame(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
+ if (err)
+ goto end;
+
+ err = av_frame_copy(map, src);
+ goto end;
+ }
+
+ /* Path three - we can import _host_ memory and bind it to a buffer */
+ for (int i = 0; i < planes; i++) {
+ int h = src->height;
+ int p_height = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ size_t size = p_height*src->linesize[i];
+ VkImportMemoryHostPointerInfoEXT import_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ .pHostPointer = src->data[i],
+ };
+ err = create_buf(dev_ctx, &buf[i], size,
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL,
+ map_host ? &import_desc : NULL);
+ if (err)
+ goto end;
+ }
+
+ /* Path two - we can't import host memory so we have to do 2 copies */
+ if (!map_host) {
+ uint8_t *mem[3];
+ if ((err = map_buffers(dev_ctx, buf, mem, planes, 0)))
+ goto end;
+
+ for (int i = 0; i < planes; i++) {
+ int h = src->height;
+ int p_height = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ memcpy(mem[i], src->data[i], p_height*src->linesize[i]);
+ }
+
+ if ((err = unmap_buffers(dev_ctx, buf, planes, 1)))
+ goto end;
+ }
+
+ /* Copy buffer to image */
+ transfer_image_buf(dev_ctx, f, buf, src->linesize,
+ src->width, src->height, src->format, 0);
+
+end:
+ av_frame_free(&map);
+ for (int i = 0; i < planes; i++)
+ free_buf(dev_ctx, &buf[i]);
+
+ return err;
+}
+
+static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src)
+{
+ int err = 0;
+ AVFrame *map = NULL;
+ ImageBuffer buf[3] = { { 0 } };
+ AVVkFrame *f = (AVVkFrame *)src->data[0];
+ AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
+ VulkanDevicePriv *p = dev_ctx->internal->priv;
+ const int planes = av_pix_fmt_count_planes(dst->format);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dst->format);
+ const int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY;
+
+ if (dst->width > hwfc->width || dst->height > hwfc->height)
+ return AVERROR(EINVAL);
+
+ /* Path one - image is host visible and linear */
+ if (f->tiling == VK_IMAGE_TILING_LINEAR &&
+ f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+ map = av_frame_alloc();
+ if (!map)
+ return AVERROR(ENOMEM);
+ map->format = dst->format;
+
+ err = vulkan_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
+ if (err)
+ goto end;
+
+ err = av_frame_copy(dst, map);
+ goto end;
+ }
+
+ /* Path two */
+ for (int i = 0; i < planes; i++) {
+ VkImportMemoryHostPointerInfoEXT import_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ .pHostPointer = dst->data[i],
+ };
+ int h = dst->height;
+ int p_height = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ err = create_buf(dev_ctx, &buf[i], p_height * dst->linesize[i],
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL,
+ map_host ? &import_desc : NULL);
+ }
+
+ /* Copy image to buffer */
+ transfer_image_buf(dev_ctx, f, buf, dst->linesize,
+ dst->width, dst->height, dst->format, 1);
+
+ if (!map_host) {
+ uint8_t *mem[3];
+ map_buffers(dev_ctx, buf, mem, planes, 1);
+
+ for (int i = 0; i < planes; i++) {
+ int h = dst->height;
+ int p_height = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ memcpy(dst->data[i], mem[i], p_height * dst->linesize[i]);
+ }
+
+ unmap_buffers(dev_ctx, buf, planes, 0);
+ }
+
+end:
+ av_frame_free(&map);
+ for (int i = 0; i < planes; i++)
+ free_buf(dev_ctx, &buf[i]);
+
+ return err;
+}
+
+const HWContextType ff_hwcontext_type_vulkan = {
+ .type = AV_HWDEVICE_TYPE_VULKAN,
+ .name = "Vulkan",
+
+ .device_hwctx_size = sizeof(AVVulkanDeviceContext),
+ .device_priv_size = sizeof(VulkanDevicePriv),
+ .frames_hwctx_size = sizeof(AVVulkanFramesContext),
+
+ .device_init = &vulkan_device_init,
+ .device_create = &vulkan_device_create,
+ .device_derive = &vulkan_device_derive,
+
+ .frames_get_constraints = &vulkan_frames_get_constraints,
+ .frames_init = vulkan_frames_init,
+ .frames_get_buffer = vulkan_get_buffer,
+
+ .transfer_get_formats = vulkan_transfer_get_formats,
+ .transfer_data_to = vulkan_transfer_data_to,
+ .transfer_data_from = vulkan_transfer_data_from,
+
+ .map_to = vulkan_map_to,
+
+ .pix_fmts = (const enum AVPixelFormat[]) {
+ AV_PIX_FMT_VULKAN,
+ AV_PIX_FMT_NONE
+ },
+};
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
new file mode 100644
index 0000000000..342c833a23
--- /dev/null
+++ b/libavutil/hwcontext_vulkan.h
@@ -0,0 +1,133 @@
+/*
+ * Vulkan hwcontext
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HWCONTEXT_VULKAN_H
+#define AVUTIL_HWCONTEXT_VULKAN_H
+
+#include <vulkan/vulkan.h>
+
+/**
+ * @file
+ * API-specific header for AV_HWDEVICE_TYPE_VULKAN.
+ *
+ * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
+ * with the data pointer set to an AVVkFrame.
+ */
+
+/**
+ * Main Vulkan context, allocated as AVHWDeviceContext.hwctx.
+ * All of these can be set before init to change what the context uses
+ */
+typedef struct AVVulkanDeviceContext {
+ /**
+ * Custom memory allocator, else NULL
+ */
+ const VkAllocationCallbacks *alloc;
+ /**
+ * Instance
+ */
+ VkInstance inst;
+ /**
+ * Physical device
+ */
+ VkPhysicalDevice phys_dev;
+ /**
+ * Activated physical device
+ */
+ VkDevice act_dev;
+ /**
+ * Queue family index for graphics
+ */
+ int queue_family_index;
+ /**
+ * Queue family index for transfer ops only. By default, the priority order
+ * is dedicated transfer > dedicated compute > graphics.
+ */
+ int queue_family_tx_index;
+ /**
+ * Queue family index for compute ops. Will be equal to the graphics
+ * one unless a dedicated transfer queue is found.
+ */
+ int queue_family_comp_index;
+} AVVulkanDeviceContext;
+
+/**
+ * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options
+ */
+typedef struct AVVulkanFramesContext {
+ /**
+ * Controls the tiling of output frames.
+ */
+ VkImageTiling tiling;
+ /**
+ * Defines extra usage of output frames. This is bitwise OR'd with the
+ * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and TRANSFER_DST).
+ */
+ VkImageUsageFlagBits usage;
+ /**
+ * Set to 1 to allocate all planes separately (disjoint images)
+ */
+ int disjoint;
+ /**
+ * Extension data for image creation. By default, if the extension is
+ * available, this will be chained to a VkImageFormatListCreateInfoKHR.
+ */
+ void *create_pnext;
+ /**
+ * Extension data for memory allocation. If the image is disjoint, this
+ * must be one per plane, otherwise just the first entry is used.
+ * This will be chained to VkExportMemoryAllocateInfo, which is used
+ * to make all pool images exportable to other APIs.
+ */
+ void *alloc_pnext[AV_NUM_DATA_POINTERS];
+} AVVulkanFramesContext;
+
+/*
+ * Frame structure, the VkFormat of the image will always match
+ * the pool's sw_format.
+ */
+typedef struct AVVkFrame {
+ VkImage img;
+ VkImageTiling tiling;
+ /**
+ * Always 1 for non-disjoint images, #planes for disjoint
+ */
+ int mem_count;
+ VkDeviceMemory mem[AV_NUM_DATA_POINTERS];
+ /**
+ * OR'd flags for all memory allocated
+ */
+ VkMemoryPropertyFlagBits flags;
+
+ /**
+ * Updated after every barrier
+ */
+ VkAccessFlagBits access;
+ VkImageLayout layout;
+} AVVkFrame;
+
+/**
+ * Converts AVPixelFormat to VkFormat, returns VK_FORMAT_UNDEFINED if unsupported
+ * by the hwcontext
+ */
+VkFormat av_vkfmt_from_pixfmt(enum AVPixelFormat p);
+
+#endif /* AVUTIL_HWCONTEXT_VULKAN_H */
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index ff5c20d50e..c3b3aaee65 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -1673,6 +1673,10 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
.name = "videotoolbox_vld",
.flags = AV_PIX_FMT_FLAG_HWACCEL,
},
+ [AV_PIX_FMT_VULKAN] = {
+ .name = "vulkan",
+ .flags = AV_PIX_FMT_FLAG_HWACCEL,
+ },
[AV_PIX_FMT_GBRP] = {
.name = "gbrp",
.nb_components = 3,
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index aea008bbdc..e6991f3630 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -333,6 +333,10 @@ enum AVPixelFormat {
AV_PIX_FMT_GRAY14BE, ///< Y , 14bpp, big-endian
AV_PIX_FMT_GRAY14LE, ///< Y , 14bpp, little-endian

+ /* Vulkan hardware images,
+ * data[0] contain an AVVkFrame */
+ AV_PIX_FMT_VULKAN,
+
AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
};

diff --git a/libavutil/version.h b/libavutil/version.h
index 44bdebdc93..84409b1d69 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,8 +79,8 @@
*/

#define LIBAVUTIL_VERSION_MAJOR 56
-#define LIBAVUTIL_VERSION_MINOR 18
-#define LIBAVUTIL_VERSION_MICRO 102
+#define LIBAVUTIL_VERSION_MINOR 19
+#define LIBAVUTIL_VERSION_MICRO 100

#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
LIBAVUTIL_VERSION_MINOR, \
--
2.17.0
Mark Thompson
2018-05-27 17:15:17 UTC
Permalink
Post by Rostislav Pehlivanov
This commit adds a Vulkan hwcontext, currently capable of mapping DRM and
VAAPI frames but additional functionality can be added later to support
importing of D3D11 surfaces as well as exporting to various other APIs.
Have you investigated the D3D11 interop at all? Seeing that working (even if it isn't included here) would be nice to make sure there aren't any gotchas later.
Post by Rostislav Pehlivanov
This context requires the newest stable version of the Vulkan API,
and once the new extension for DRM surfaces makes it in will also require
it (in order to properly and fully import them).
It makes use of every part of the Vulkan spec in order to ensure fastest
possible uploading, downloading and mapping of frames. On AMD, it will
also make use of mapping host memory frames in order to upload
very efficiently and with minimal CPU to hardware.
To be useful for non-RGB images an implementation with the YUV images
extension is needed. All current implementations support that with the
exception of AMD, though support is coming soon for Mesa.
Neither AMD nor Intel on Windows seem to support it (vulkaninfo with both: <https://0x0.st/s212.txt> (I realise that won't show the relevant formats, but it also doesn't work)).
Post by Rostislav Pehlivanov
---
configure | 10 +
doc/APIchanges | 3 +
libavutil/Makefile | 3 +
libavutil/hwcontext.c | 4 +
libavutil/hwcontext.h | 1 +
libavutil/hwcontext_internal.h | 1 +
libavutil/hwcontext_vulkan.c | 2013 ++++++++++++++++++++++++++++++++
libavutil/hwcontext_vulkan.h | 133 +++
libavutil/pixdesc.c | 4 +
libavutil/pixfmt.h | 4 +
libavutil/version.h | 4 +-
11 files changed, 2178 insertions(+), 2 deletions(-)
create mode 100644 libavutil/hwcontext_vulkan.c
create mode 100644 libavutil/hwcontext_vulkan.h
diff --git a/configure b/configure
index 09ff0c55e2..5f4407b753 100755
--- a/configure
+++ b/configure
--enable-opengl enable OpenGL rendering [no]
--enable-openssl enable openssl, needed for https support
if gnutls, libtls or mbedtls is not used [no]
+ --enable-vulkan enable Vulkan code [no]
Ordering (and in list below).
Post by Rostislav Pehlivanov
--disable-sndio disable sndio support [autodetect]
--disable-schannel disable SChannel SSP, needed for TLS support on
Windows if openssl and gnutls are not used [autodetect]
@@ -1767,6 +1768,7 @@ HWACCEL_LIBRARY_LIST="
mmal
omx
opencl
+ vulkan
"
DOCUMENT_LIST="
@@ -2223,6 +2225,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ vulkan_drm_mod
perl
pod2man
texi2html
@@ -6349,6 +6352,13 @@ enabled vdpau &&
enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd
+enabled vulkan &&
+ require_pkg_config vulkan "vulkan >= 1.1.73" "vulkan/vulkan.h" vkCreateInstance
+
+if enabled_all vulkan libdrm ; then
+ check_cpp_condition vulkan_drm_mod vulkan/vulkan.h "defined VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME"
+fi
+
if enabled x86; then
case $target_os in
mingw32*|mingw64*|win32|win64|linux|cygwin*)
diff --git a/doc/APIchanges b/doc/APIchanges
index efe15ba4e0..1b37f58ca7 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@ libavutil: 2017-10-21
+2018-04-xx - xxxxxxxxxx - lavu 56.19.100 - hwcontext.h
+ Add AV_HWDEVICE_TYPE_VULKAN and implementation.
This should mention AV_PIX_FMT_VULKAN as well.
Post by Rostislav Pehlivanov
+
2018-05-xx - xxxxxxxxxx - lavf 58.15.100 - avformat.h
Add pmt_version field to AVProgram
...
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
new file mode 100644
index 0000000000..db0a5b7e61
--- /dev/null
+++ b/libavutil/hwcontext_vulkan.c
@@ -0,0 +1,2013 @@
+/*
+ * Vulkan hwcontext
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "pixdesc.h"
+#include "avstring.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_vulkan.h"
+
+#if CONFIG_LIBDRM
+#include <unistd.h> /* lseek */
+#include <xf86drm.h>
+#include <drm_fourcc.h>
+#include "hwcontext_drm.h"
+#if CONFIG_VAAPI
+#include <va/va_drmcommon.h>
+#include "hwcontext_vaapi.h"
+#endif
+#endif
+
+typedef struct VulkanDevicePriv {
+ /* Properties */
+ VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceMemoryProperties mprops;
+
+ /* Debug callback */
+ VkDebugUtilsMessengerEXT debug_ctx;
+
+ /* Image uploading */
+ VkCommandPool cmd_pool;
+ VkCommandBuffer cmd_buf;
+ VkQueue cmd_queue;
+ VkFence cmd_fence;
+
+ /* Extensions */
+ uint64_t extensions;
+
+ /* Settings */
+ int use_linear_images;
+ int use_disjoint_images;
+} VulkanDevicePriv;
+
+#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
+ vkGetInstanceProcAddr(inst, #name)
+
+#define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
+ VK_IMAGE_USAGE_STORAGE_BIT | \
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT)
+
+#define ADD_VAL_TO_LIST(list, count, val) \
+ do { \
+ list = av_realloc_array(list, sizeof(*list), ++count); \
+ if (!list) { \
+ err = AVERROR(ENOMEM); \
+ goto end; \
+ } \
+ list[count - 1] = val; \
+ } while(0)
+
+static const VkFormat vk_format_map[AV_PIX_FMT_NB] = {
+ /* Gray */
+ [AV_PIX_FMT_GRAY8] = VK_FORMAT_R8_UNORM,
+ [AV_PIX_FMT_GRAY10] = VK_FORMAT_R10X6_UNORM_PACK16,
+ [AV_PIX_FMT_GRAY12] = VK_FORMAT_R12X4_UNORM_PACK16,
Aren't GRAY10 and GRAY12 packed in the low bits rather than the high bits?
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_GRAY16] = VK_FORMAT_R16_UNORM,
+
+ /* Interleaved */
+ [AV_PIX_FMT_NV12] = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
+ [AV_PIX_FMT_P010] = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_P016] = VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,
+ [AV_PIX_FMT_NV16] = VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,
+ [AV_PIX_FMT_UYVY422] = VK_FORMAT_B16G16R16G16_422_UNORM,
+ [AV_PIX_FMT_YVYU422] = VK_FORMAT_G16B16G16R16_422_UNORM,
This should be AV_PIX_FMT_YUYV422?

Changing that makes it accept a DRM object for this format (DRM_FORMAT_YUYV), though with anv we then get an assertion in the driver:

$ gdb --args ./ffmpeg_g -v 55 -y -hwaccel vaapi -hwaccel_output_format vaapi -hwaccel_device /dev/dri/renderD128 -i in.mp4 -an -vf 'scale_vaapi=format=yuyv422,hwmap=derive_device=vulkan,scale_vulkan=1280:720,hwmap=derive_device=vaapi:reverse=1' -c:v h264_vaapi out.mp4
...
[Parsed_scale_vaapi_0 @ 0x555558dd4d40] Filter output: vaapi_vld, 1920x1080 (2000).
[hwmap @ 0x555558dd9380] Filter input: vaapi_vld, 1920x1080 (2000).
ffmpeg_g: ../../../src/intel/vulkan/anv_image.c:599: anv_image_create: Assertion `format != NULL' failed.

Thread 1 "ffmpeg_g" received signal SIGABRT, Aborted.
__GI_raise (sig=***@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 __GI_raise (sig=***@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007ffff3127231 in __GI_abort () at abort.c:79
#2 0x00007ffff311e9da in __assert_fail_base (fmt=0x7ffff3271d48 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=***@entry=0x7fffafd06a51 "format != NULL", file=***@entry=0x7fffafd05d38 "../../../src/intel/vulkan/anv_image.c", line=***@entry=599,
function=***@entry=0x7fffafd07000 <__PRETTY_FUNCTION__.66498> "anv_image_create") at assert.c:92
#3 0x00007ffff311ea52 in __GI___assert_fail (assertion=***@entry=0x7fffafd06a51 "format != NULL", file=***@entry=0x7fffafd05d38 "../../../src/intel/vulkan/anv_image.c", line=***@entry=599, function=***@entry=0x7fffafd07000 <__PRETTY_FUNCTION__.66498> "anv_image_create") at assert.c:101
#4 0x00007fffaf928190 in anv_image_create (_device=<optimized out>, create_info=***@entry=0x7fffffffced0, alloc=<optimized out>, pImage=<optimized out>) at ../../../src/intel/vulkan/anv_image.c:599
#5 0x00007fffaf9282b2 in anv_CreateImage (device=<optimized out>, pCreateInfo=<optimized out>, pAllocator=<optimized out>, pImage=<optimized out>) at ../../../src/intel/vulkan/anv_image.c:641
#6 0x00007ffff66776c4 in vkCreateImage (device=0x5555592a64e0, pCreateInfo=0x7fffffffcf70, pAllocator=0x0, pImage=0x5555592b9f40) at /home/mrt/video/vulkan/loader/loader/trampoline.c:1328
#7 0x0000555556a009c7 in create_frame (hwfc=0x5555591e7380, frame=0x7fffffffd1a8, tiling=VK_IMAGE_TILING_OPTIMAL, usage=15, disjoint=0, create_pnext=0x7fffffffd140, alloc_pnext=0x7fffffffd080, alloc_pnext_stride=24) at src/libavutil/hwcontext_vulkan.c:1130
#8 0x0000555556a0199b in vulkan_map_from_drm_frame_desc (hwfc=0x5555591e7380, f=0x7fffffffd1a8, desc=0x5555592b97c0) at src/libavutil/hwcontext_vulkan.c:1480
#9 0x0000555556a019f8 in vulkan_map_from_drm (hwfc=0x5555591e7380, dst=0x5555592ab440, src=0x5555592b9580, flags=3) at src/libavutil/hwcontext_vulkan.c:1502
#10 0x0000555556a01b50 in vulkan_map_from_vaapi (dst_fc=0x5555591e7380, dst=0x5555592ab440, src=0x5555592ab700, flags=3) at src/libavutil/hwcontext_vulkan.c:1550
#11 0x0000555556a01c27 in vulkan_map_to (hwfc=0x5555591e7380, dst=0x5555592ab440, src=0x5555592ab700, flags=3) at src/libavutil/hwcontext_vulkan.c:1579
#12 0x00005555569f32c7 in av_hwframe_map (dst=0x5555592ab440, src=0x5555592ab700, flags=3) at src/libavutil/hwcontext.c:792
#13 0x0000555555798c2c in hwmap_filter_frame (link=0x555558dd9a00, input=0x5555592ab700) at src/libavfilter/vf_hwmap.c:339
#14 0x00005555556af6c9 in ff_filter_frame_framed (link=0x555558dd9a00, frame=0x5555592ab700) at src/libavfilter/avfilter.c:1071
#15 0x00005555556aff52 in ff_filter_frame_to_filter (link=0x555558dd9a00) at src/libavfilter/avfilter.c:1219
#16 0x00005555556b014e in ff_filter_activate_default (filter=0x555558dd9580) at src/libavfilter/avfilter.c:1268
#17 0x00005555556b0372 in ff_filter_activate (filter=0x555558dd9580) at src/libavfilter/avfilter.c:1429
#18 0x00005555556b5036 in ff_filter_graph_run_once (graph=0x555558dd9440) at src/libavfilter/avfiltergraph.c:1454
#19 0x00005555556b6466 in push_frame (graph=0x555558dd9440) at src/libavfilter/buffersrc.c:181
#20 0x00005555556b6778 in av_buffersrc_add_frame_internal (ctx=0x555558ddc3c0, frame=0x55555842b080, flags=4) at src/libavfilter/buffersrc.c:255
#21 0x00005555556b63ed in av_buffersrc_add_frame_flags (ctx=0x555558ddc3c0, frame=0x55555842b080, flags=4) at src/libavfilter/buffersrc.c:164
#22 0x0000555555679212 in ifilter_send_frame (ifilter=0x5555581f2580, frame=0x55555842b080) at src/fftools/ffmpeg.c:2190
#23 0x00005555556794f2 in send_frame_to_filters (ist=0x5555581f4340, decoded_frame=0x55555842b080) at src/fftools/ffmpeg.c:2264
#24 0x000055555567a2ac in decode_video (ist=0x5555581f4340, pkt=0x7fffffffd800, got_output=0x7fffffffd7f4, duration_pts=0x7fffffffd7f8, eof=0, decode_failed=0x7fffffffd7f0) at src/fftools/ffmpeg.c:2465
#25 0x000055555567ac47 in process_input_packet (ist=0x5555581f4340, pkt=0x7fffffffd9c0, no_eof=0) at src/fftools/ffmpeg.c:2619
#26 0x0000555555681bb5 in process_input (file_index=0) at src/fftools/ffmpeg.c:4457
#27 0x00005555556820c4 in transcode_step () at src/fftools/ffmpeg.c:4577
#28 0x00005555556821f1 in transcode () at src/fftools/ffmpeg.c:4631
#29 0x0000555555682a81 in main (argc=18, argv=0x7fffffffe3c8) at src/fftools/ffmpeg.c:4838


(For the above case you also need this VAAPI patch to give you composed layers:

diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c
index a2387d4fc4..8e6abdc6ca 100644
--- a/libavutil/hwcontext_vaapi.c
+++ b/libavutil/hwcontext_vaapi.c
@@ -1104,7 +1104,7 @@ static int vaapi_map_to_drm_esh(AVHWFramesContext *hwfc, AVFrame *dst,

surface_id = (VASurfaceID)(uintptr_t)src->data[3];

- export_flags = VA_EXPORT_SURFACE_SEPARATE_LAYERS;
+ export_flags = VA_EXPORT_SURFACE_COMPOSED_LAYERS;
if (flags & AV_HWFRAME_MAP_READ)
export_flags |= VA_EXPORT_SURFACE_READ_ONLY;
if (flags & AV_HWFRAME_MAP_WRITE)

)
Post by Rostislav Pehlivanov
+
+ /* 420 */
+ [AV_PIX_FMT_YUV420P] = VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,
+ [AV_PIX_FMT_YUV420P16] = VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM,
+
+ /* 422 */
+ [AV_PIX_FMT_YUV422P] = VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,
+ [AV_PIX_FMT_YUV422P16] = VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM,
+
+ /* 444 */
+ [AV_PIX_FMT_YUV444P] = VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,
+ [AV_PIX_FMT_YUV444P16] = VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
+
+ /* RGB */
+ [AV_PIX_FMT_ABGR] = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ [AV_PIX_FMT_BGRA] = VK_FORMAT_B8G8R8A8_UNORM,
+ [AV_PIX_FMT_RGBA] = VK_FORMAT_R8G8B8A8_UNORM,
+ [AV_PIX_FMT_RGB24] = VK_FORMAT_R8G8B8_UNORM,
+ [AV_PIX_FMT_BGR24] = VK_FORMAT_B8G8R8_UNORM,
+ [AV_PIX_FMT_RGB48] = VK_FORMAT_R16G16B16_UNORM,
+ [AV_PIX_FMT_RGBA64] = VK_FORMAT_R16G16B16A16_UNORM,
+ [AV_PIX_FMT_RGB565] = VK_FORMAT_R5G6B5_UNORM_PACK16,
+ [AV_PIX_FMT_BGR565] = VK_FORMAT_B5G6R5_UNORM_PACK16,
+ [AV_PIX_FMT_BGR0] = VK_FORMAT_B8G8R8A8_UNORM,
+ [AV_PIX_FMT_0BGR] = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ [AV_PIX_FMT_RGB0] = VK_FORMAT_R8G8B8A8_UNORM,
+};
+
+enum VulkanExtensions {
+ EXT_DEDICATED_ALLOC = 1LL << 0, /* VK_KHR_dedicated_allocation */
+ EXT_IMAGE_FORMAT_LIST = 1LL << 1, /* VK_KHR_image_format_list */
+ EXT_EXTERNAL_MEMORY = 1LL << 2, /* VK_KHR_external_memory */
+ EXT_EXTERNAL_HOST_MEMORY = 1LL << 3, /* VK_EXT_external_memory_host */
+ EXT_EXTERNAL_FD_MEMORY = 1LL << 4, /* VK_KHR_external_memory_fd */
+ EXT_EXTERNAL_DMABUF_MEMORY = 1LL << 5, /* VK_EXT_external_memory_dma_buf */
+ EXT_DRM_MODIFIER_FLAGS = 1LL << 6, /* VK_EXT_image_drm_format_modifier */
+ EXT_YUV_IMAGES = 1LL << 7, /* VK_KHR_sampler_ycbcr_conversion */
+
+ EXT_OPTIONAL = 1LL << 62,
+ EXT_REQUIRED = 1LL << 63,
That's signed overflow -> undefined behaviour. Since you want a uint64_t, use UINT64_C().
Post by Rostislav Pehlivanov
+};
+
...
+
+static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
+ VkDebugUtilsMessageTypeFlagsEXT messageType,
+ const VkDebugUtilsMessengerCallbackDataEXT *data,
+ void *priv)
+{
+ int l;
+ AVHWDeviceContext *ctx = priv;
+
+ switch (severity) {
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
+ default: l = AV_LOG_DEBUG; break;
+ };
Stray semicolon.
Post by Rostislav Pehlivanov
+
+ av_log(ctx, l, "%s\n", data->pMessage);
+ for (int i = 0; i < data->cmdBufLabelCount; i++)
+ av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
+
+ return 0;
+}
+
...
+
+typedef struct VulkanDeviceSelection {
+ const char *name; /* Will use this first unless NULL */
+ uint32_t pci_device; /* Will use this second unless 0x0 */
+ uint32_t vendor_id; /* Last resort to find something deterministic */
+ int index; /* Finally fall back to index */
+} VulkanDeviceSelection;
+
+/* Finds a device */
+static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
+{
+ int err = 0;
+ uint32_t num;
+ VkResult ret;
+ VkPhysicalDevice *devices = NULL;
+ VkPhysicalDeviceProperties *prop = NULL;
+ VkPhysicalDevice choice = VK_NULL_HANDLE;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ static const char *dev_types[] = {
+ [VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU] = "integrated",
+ [VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU] = "discrete",
+ [VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU] = "virtual",
+ [VK_PHYSICAL_DEVICE_TYPE_CPU] = "software",
+ [VK_PHYSICAL_DEVICE_TYPE_OTHER] = "unknown",
+ };
+
+ ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
+ if (ret != VK_SUCCESS || !num) {
+ av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
AVERROR(ENODEV) might be clearer, and in similar "no device" cases below too.
Post by Rostislav Pehlivanov
+ }
+
+ devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
+ if (!devices)
+ return AVERROR(ENOMEM);
+
+ ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto end;
+ }
+
+ prop = av_malloc_array(num, sizeof(VkPhysicalDeviceProperties));
+ if (!prop) {
+ err = AVERROR(ENOMEM);
+ goto end;
+ }
+
+ av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
+ for (int i = 0; i < num; i++) {
+ vkGetPhysicalDeviceProperties(devices[i], &prop[i]);
+ av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i, prop[i].deviceName,
^ "%#x" (and below)
Post by Rostislav Pehlivanov
+ dev_types[prop[i].deviceType], prop[i].deviceID);
dev_types would feel safer as a function, I think? (If a later Vulkan version adds a new device type then you can crash if you see it.)
Post by Rostislav Pehlivanov
+ }
+
+ if (select->name) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
+ for (int i = 0; i < num; i++) {
+ if (strcmp(select->name, prop[i].deviceName) == 0) {
Might it be nicer to use strstr() rather than strcmp() here?

The requirement to put e.g. "AMD RADV POLARIS11 (LLVM 6.0.0)" is pretty annoying, especially when that string might change between versions. If you know you have an AMD card and an Intel card, then matching "AMD" seems pretty safe.
Post by Rostislav Pehlivanov
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
+ select->name);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ } else if (select->pci_device) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
+ for (int i = 0; i < num; i++) {
+ if (select->pci_device == prop[i].deviceID) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
+ select->pci_device);
I wonder whether this should have some magic if you have multiple of the same graphics card (something with opts, maybe?). That will be a common case in compute, though I don't know if it matters here.
Post by Rostislav Pehlivanov
+ err = AVERROR(EINVAL);
+ goto end;
+ } else if (select->vendor_id) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
+ for (int i = 0; i < num; i++) {
+ if (select->vendor_id == prop[i].vendorID) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
+ select->vendor_id);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ } else {
+ if (select->index < num) {
+ choice = devices[select->index];
+ goto end;
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
+ select->index);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ }
+
+ av_free(devices);
+ av_free(prop);
+ hwctx->phys_dev = choice;
+
+ return err;
+}
+
+static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
+{
+ uint32_t num;
+ VkQueueFamilyProperties *qs = NULL;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ int graph_index = -1, comp_index = -1, tx_index = -1;
+ VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
+
+ /* First get the number of queue families */
+ vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
+ if (!num) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ /* Then allocate memory */
+ qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
+ if (!qs)
+ return AVERROR(ENOMEM);
+
+ /* Finally retrieve the queue families */
+ vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
+
+#define SEARCH_FLAGS(expr, out) \
+ for (int i = 0; i < num; i++) { \
Adding the "int " pushed the "\" out of alignment :P
Post by Rostislav Pehlivanov
+ const VkQueueFlagBits flags = qs[i].queueFlags; \
+ if (expr) { \
+ out = i; \
+ break; \
+ } \
+ }
+
+ if (!hwctx->queue_family_index)
I don't quite understand what this test is doing. You search for the queues to use on device create (not external init), so it should always be unset when you get here?
Post by Rostislav Pehlivanov
+ SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
+
+ if (!hwctx->queue_family_comp_index)
+ SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
+ comp_index)
+
+ if (!hwctx->queue_family_tx_index)
+ SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
+ (i != comp_index), tx_index)
+
+#undef SEARCH_FLAGS
+#define QF_FLAGS(flags) \
+ ((flags) & VK_QUEUE_GRAPHICS_BIT ) ? "(graphics) " : "", \
+ ((flags) & VK_QUEUE_COMPUTE_BIT ) ? "(compute) " : "", \
+ ((flags) & VK_QUEUE_TRANSFER_BIT ) ? "(transfer) " : "", \
+ ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""
+
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, "
+ "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags));
+
+ hwctx->queue_family_index = graph_index;
+ hwctx->queue_family_tx_index = graph_index;
+ hwctx->queue_family_comp_index = graph_index;
+
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index;
+
+ if (comp_index != -1) {
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, "
+ "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags));
+ hwctx->queue_family_tx_index = comp_index;
+ hwctx->queue_family_comp_index = comp_index;
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index;
+ }
+
+ if (tx_index != -1) {
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, "
+ "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags));
+ hwctx->queue_family_tx_index = tx_index;
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index;
+ }
+
+#undef PRINT_QF_FLAGS
"QF_FLAGS".
Post by Rostislav Pehlivanov
+
+ av_free(qs);
+
+ return 0;
+}
+
+static int create_exec_ctx(AVHWDeviceContext *ctx)
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ VkCommandPoolCreateInfo cqueue_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = hwctx->queue_family_tx_index,
+ };
+ VkCommandBufferAllocateInfo cbuf_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+ VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
+
+ ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
+ hwctx->alloc, &p->cmd_pool);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ vk_ret2str(ret));
+ return 1;
These failures are going to return a nonnegative number from device_init(); I don't think that's wanted.
Post by Rostislav Pehlivanov
+ }
+
+ cbuf_create.commandPool = p->cmd_pool;
+
+ ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &p->cmd_buf);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
+ hwctx->alloc, &p->cmd_fence);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ vkGetDeviceQueue(hwctx->act_dev, hwctx->queue_family_tx_index, 0,
+ &p->cmd_queue);
+
+ return 0;
+}
+
+static void free_exec_ctx(AVHWDeviceContext *ctx)
+{
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ if (!p)
This can never be true - priv is set before the free function.
Post by Rostislav Pehlivanov
+ return;
+
+ if (p->cmd_fence != VK_NULL_HANDLE)
Since we're depending on VK_NULL_HANDLE being zero for correct initialisation, maybe just treat these as pointers and write "if (p->cmd_fence)", etc.
Post by Rostislav Pehlivanov
+ vkDestroyFence(hwctx->act_dev, p->cmd_fence, hwctx->alloc);
+ if (p->cmd_buf != VK_NULL_HANDLE)
+ vkFreeCommandBuffers(hwctx->act_dev, p->cmd_pool, 1, &p->cmd_buf);
+ if (p->cmd_pool != VK_NULL_HANDLE)
+ vkDestroyCommandPool(hwctx->act_dev, p->cmd_pool, hwctx->alloc);
+}
+
+static void vulkan_device_free(AVHWDeviceContext *ctx)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+
+ free_exec_ctx(ctx);
+
+ vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
+
+ if (p && p->debug_ctx != VK_NULL_HANDLE) {
+ VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
+ pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
+ hwctx->alloc);
+ }
+
+ vkDestroyInstance(hwctx->inst, hwctx->alloc);
+}
+
+static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
+ VulkanDeviceSelection *dev_select,
+ AVDictionary *opts, int flags)
+{
+ int err = 0;
+ VkResult ret;
+ AVDictionaryEntry *opt_d;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkDeviceQueueCreateInfo queue_create_info[3] = {
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ };
+
+ VkDeviceCreateInfo dev_info = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .pQueueCreateInfos = queue_create_info,
+ .queueCreateInfoCount = 0,
+ };
+
+ VulkanDevicePriv *p = av_mallocz(sizeof(*p));
+ if (!p) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
This is already allocated inside av_hwdevice_ctx_alloc(), you've overwriting it here and leaking the original.
Post by Rostislav Pehlivanov
+
+ ctx->internal->priv = p;
+ ctx->free = vulkan_device_free;
+
+ /* Create an instance if not given one */
+ if (!hwctx->inst && (err = create_instance(ctx, opts)))
+ goto fail;
+
+ /* Find a device (if not given one) */
+ if (!hwctx->phys_dev && (err = find_device(ctx, dev_select)))
+ goto fail;
+
+ vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
+ av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
+ av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
+ av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n",
+ p->props.limits.optimalBufferCopyOffsetAlignment);
+ av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n",
+ p->props.limits.optimalBufferCopyRowPitchAlignment);
+ av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n",
+ p->props.limits.minMemoryMapAlignment);
+
+ /* Search queue family */
+ if ((err = search_queue_families(ctx, &dev_info)))
+ goto fail;
+
+ if (!hwctx->act_dev) {
+ err = check_extensions(ctx, 1, &dev_info.ppEnabledExtensionNames,
+ &dev_info.enabledExtensionCount, 0);
+ if (err)
+ goto fail;
+
+ ret = vkCreateDevice(hwctx->phys_dev, &dev_info,
+ hwctx->alloc, &hwctx->act_dev);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ av_free((void *)dev_info.ppEnabledExtensionNames);
+ }
+
+ /* Tiled images setting, use them by default */
+ opt_d = av_dict_get(opts, "linear_images", NULL, 0);
+ if (opt_d)
+ p->use_linear_images = strtol(opt_d->value, NULL, 10);
+
+ /* Disjoint images setting, don't use them by default */
+ opt_d = av_dict_get(opts, "disjoint_images", NULL, 0);
+ if (opt_d)
+ p->use_disjoint_images = strtol(opt_d->value, NULL, 10);
+
+ return 0;
+
+ av_freep(&ctx->internal->priv);
I don't think you want to free this here, it's managed by the hwcontext layer.
Post by Rostislav Pehlivanov
+ return err;
+}
+
+static int vulkan_device_init(AVHWDeviceContext *ctx)
+{
+ int err;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ /* Create exec context - if there's something invalid this will error out */
+ err = create_exec_ctx(ctx);
+ if (err)
+ return err;
+
+ /* Get device capabilities */
+ vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
+
+ return 0;
+}
+
+static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
+ AVDictionary *opts, int flags)
+{
+ VulkanDeviceSelection dev_select = { 0 };
+ if (device && device[0]) {
+ if (av_isdigit(device[0]))
3dfx probably wouldn't appreciate this test, though I admit they are unlikely to add Vulkan support to their cards.
Post by Rostislav Pehlivanov
+ dev_select.index = strtol(device, NULL, 10);
Might be nicer to always call strtol and then check whether *end is zero, just in case of a future problem like that.
Post by Rostislav Pehlivanov
+ else
+ dev_select.name = device;
+ }
+
+ return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
+}
+
+static int vulkan_device_derive(AVHWDeviceContext *ctx,
+ AVHWDeviceContext *src_ctx, int flags)
+{
+ VulkanDeviceSelection dev_select = { 0 };
+
+ switch(src_ctx->type) {
+#if CONFIG_LIBDRM
+#if CONFIG_VAAPI
+ case AV_HWDEVICE_TYPE_VAAPI: {
+ AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
+ const char *vendor = vaQueryVendorString(src_hwctx->display);
+ if (!vendor) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get device info from vaapi!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ if (strstr(vendor, "Intel"))
+ dev_select.vendor_id = 0x8086;
+ if (strstr(vendor, "AMD"))
+ dev_select.vendor_id = 0x1002;
+
+ return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
Did you think about making an addition to VAAPI which could do this in a more sensible way?
Post by Rostislav Pehlivanov
+ }
+#endif
+ case AV_HWDEVICE_TYPE_DRM: {
+ AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
+
+ drmDevice *drm_dev_info;
+ int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get device info from drm fd!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
Not all devices are PCI, check bustype before using this. I don't know what information you can use in other cases, though (GPUs on mobile will just be opaque platform devices).

The drmDevice structure needs to be freed, too (drmFreeDevice).
Post by Rostislav Pehlivanov
+
+ return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
+ }
+#endif
+ return AVERROR(ENOSYS);
+ }
+}
+
+static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
+ const void *hwconfig,
+ AVHWFramesConstraints *constraints)
+{
+ int count = 0;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
This iteration feels dubious, maybe it would be better to use av_pix_fmt_desc_next()?
Post by Rostislav Pehlivanov
+ count += vkfmt_is_supported(hwctx, i, p->use_linear_images);
+
+ constraints->valid_sw_formats = av_malloc_array(count + 1,
+ sizeof(enum AVPixelFormat));
+ if (!constraints->valid_sw_formats)
+ return AVERROR(ENOMEM);
+
+ count = 0;
+ for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
+ if (vkfmt_is_supported(hwctx, i, p->use_linear_images))
+ constraints->valid_sw_formats[count++] = i;
+ constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
+
+ constraints->min_width = 0;
+ constraints->min_height = 0;
Not directly related to this, but I was trying because of it: I note that ANV returns:

[AVHWDeviceContext @ 0x555558792a40] Image creation failure: VK_ERROR_OUT_OF_DEVICE_MEMORY

with a 1x1 YUV420P image, which is a pretty opaque failure (works for 2x2 or 3x3). What are the requirements there? Who should be checking it?
Post by Rostislav Pehlivanov
+ constraints->max_width = p->props.limits.maxImageDimension2D;
+ constraints->max_height = p->props.limits.maxImageDimension2D;
+
+ constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
+ if (!constraints->valid_hw_formats)
+ return AVERROR(ENOMEM);
+
+ constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
+ constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
+
+ return 0;
+}
+
...
+
+static int alloc_bind_mem(AVHWDeviceContext *ctx, AVVkFrame *f,
+ void *alloc_pnext, size_t alloc_pnext_stride)
+{
+ int err;
+ VkResult ret;
+ VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
+ VkBindImagePlaneMemoryInfo bind_p_info[AV_NUM_DATA_POINTERS] = { { 0 } };
+
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ VK_LOAD_PFN(hwctx->inst, vkBindImageMemory2KHR);
+ VK_LOAD_PFN(hwctx->inst, vkGetImageMemoryRequirements2KHR);
The presence of the relevant extension presumably means that these necessarily succeed?
Post by Rostislav Pehlivanov
+
+ for (int i = 0; i < f->mem_count; i++) {
+ int use_ded_mem;
+ VkImagePlaneMemoryRequirementsInfo plane_req = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO,
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ };
+ VkImageMemoryRequirementsInfo2 req_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
+ .pNext = f->mem_count > 1 ? &plane_req : NULL,
+ .image = f->img,
+ };
+ VkMemoryDedicatedAllocateInfo ded_alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
+ };
+ VkMemoryDedicatedRequirements ded_req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ };
+ VkMemoryRequirements2 req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ .pNext = (p->extensions & EXT_DEDICATED_ALLOC) ? &ded_req : NULL,
+ };
+
+ pfn_vkGetImageMemoryRequirements2KHR(hwctx->act_dev, &req_desc, &req);
+
+ /* In case the implementation prefers/requires dedicated allocation */
+ use_ded_mem = ded_req.prefersDedicatedAllocation |
+ ded_req.requiresDedicatedAllocation;
+ if (use_ded_mem)
+ ded_alloc.image = f->img;
+
+ /* Allocate memory */
+ if ((err = alloc_mem(ctx, &req.memoryRequirements,
+ f->tiling == VK_IMAGE_TILING_LINEAR ?
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ &f->flags, &f->mem[i])))
+ return err;
+
+ if (f->mem_count > 1) {
+ bind_p_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
+ bind_p_info[i].planeAspect = plane_req.planeAspect;
+ bind_info[i].pNext = &bind_p_info[i];
+ }
+
+ bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
+ bind_info[i].image = f->img;
+ bind_info[i].memory = f->mem[i];
+ }
+
+ /* Bind the allocated memory to the image */
+ ret = pfn_vkBindImageMemory2KHR(hwctx->act_dev, f->mem_count, bind_info);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
This looks like it can leak some allocated memory if something goes wrong during bind or when allocating a plane after the first. Or is there some magic which means it doesn't?
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
...
+
+static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
+{
+ int err;
+ AVVkFrame *f;
+ AVBufferRef *avbuf = NULL;
+ AVHWFramesContext *hwfc = opaque;
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ VkExportMemoryAllocateInfo einfo[AV_NUM_DATA_POINTERS];
+ VkExternalMemoryHandleTypeFlags e = 0x0;
+
+ try_export_flags(hwfc, &e, VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
The intent of this is to allocate memory which is dma_buf inside the kernel and can therefore be exported as DRM objects?

Have you tried making a map_from which uses that? (That would allow hwmap Vulkan->VAAPI without reverse mapping, I guess.)
Post by Rostislav Pehlivanov
+
+ for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
+ einfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
+ einfo[i].pNext = hwctx->alloc_pnext[i];
+ einfo[i].handleTypes = e;
+ }
+
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
+ hwctx->disjoint, hwctx->create_pnext,
+ einfo, sizeof(*einfo));
+ if (err)
+ return NULL;
+
+ avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
+ vulkan_frame_free, hwfc->device_ctx->hwctx, 0);
+ if (!avbuf) {
+ vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
+ return NULL;
+ }
+
+ return avbuf;
+}
+
+static int vulkan_frames_init(AVHWFramesContext *hwfc)
+{
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+
+ if (hwfc->pool)
+ return 0;
+
+ /* Default pool flags */
+ hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+
+ hwctx->usage |= DEFAULT_USAGE_FLAGS;
+
+ hwctx->disjoint = hwctx->disjoint ? hwctx->disjoint : p->use_disjoint_images;
+
+ hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
+ hwfc, vulkan_pool_alloc,
+ NULL);
+ if (!hwfc->internal->pool_internal)
+ return AVERROR(ENOMEM);
This doesn't actually check anything about the parameters - e.g. I can make frames context with a crazy unsupported sw_format and it will return success.

Is it sensible to, say, test-allocate a single frame to make sure it actually works?
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
+static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
+{
+ frame->buf[0] = av_buffer_pool_get(hwfc->pool);
+ if (!frame->buf[0])
+ return AVERROR(ENOMEM);
+
+ frame->data[0] = frame->buf[0]->data;
+ frame->format = AV_PIX_FMT_VULKAN;
+ frame->width = hwfc->width;
+ frame->height = hwfc->height;
+
+ return 0;
+}
+
+static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
+ enum AVHWFrameTransferDirection dir,
+ enum AVPixelFormat **formats)
+{
+ int count = 0;
+ enum AVPixelFormat *pix_fmts = NULL;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
+
+ /* All formats can be transferred to themselves */
+ count++;
+
+ /* All formats with a luma can have only that channel transferred */
+ count += !(desc->flags & AV_PIX_FMT_FLAG_RGB);
In what cases is this acutally expected to work?

$ gdb --args ./ffmpeg_g -v 55 -y -i in.mp4 -an -init_hw_device vulkan=amd:0 -init_hw_device vulkan=intel:1 -filter_hw_device intel -vf 'format=yuv420p,hwupload,hwdownload,format=gray8' -c:v libx264 -frames:v 1 out.mp4
...
[hwupload @ 0x5555597bd1c0] Surface format is yuv420p.
[swscaler @ 0x5555597c2380] deprecated pixel format used, make sure you did set range correctly
[auto_scaler_0 @ 0x5555597c1480] w:1920 h:1080 fmt:gray sar:1/1 -> w:1920 h:1080 fmt:yuvj444p sar:1/1 flags:0x4
ffmpeg_g: ../../../src/intel/vulkan/anv_image.c:806: anv_layout_to_aux_usage: Assertion `_mesa_bitcount(aspect) == 1 && (aspect & image->aspects)' failed.

Thread 1 "ffmpeg_g" received signal SIGABRT, Aborted.
__GI_raise (sig=***@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 __GI_raise (sig=***@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1 0x00007ffff3127231 in __GI_abort () at abort.c:79
#2 0x00007ffff311e9da in __assert_fail_base (fmt=0x7ffff3271d48 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n", assertion=***@entry=0x7ffff21c72f0 "_mesa_bitcount(aspect) == 1 && (aspect & image->aspects)", file=***@entry=0x7ffff21c6d38 "../../../src/intel/vulkan/anv_image.c", line=***@entry=806,
function=***@entry=0x7ffff21c7e60 <__PRETTY_FUNCTION__.66594> "anv_layout_to_aux_usage") at assert.c:92
#3 0x00007ffff311ea52 in __GI___assert_fail (assertion=***@entry=0x7ffff21c72f0 "_mesa_bitcount(aspect) == 1 && (aspect & image->aspects)", file=***@entry=0x7ffff21c6d38 "../../../src/intel/vulkan/anv_image.c", line=***@entry=806,
function=***@entry=0x7ffff21c7e60 <__PRETTY_FUNCTION__.66594> "anv_layout_to_aux_usage") at assert.c:101
#4 0x00007ffff1de9ba8 in anv_layout_to_aux_usage (devinfo=***@entry=0x555558b6a8f8, image=***@entry=0x5555597db110, aspect=***@entry=VK_IMAGE_ASPECT_COLOR_BIT, layout=***@entry=VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) at ../../../src/intel/vulkan/anv_image.c:806
#5 0x00007ffff1dd9ed7 in get_blorp_surf_for_anv_image (device=0x555558b6a8b0, image=***@entry=0x5555597db110, aspect=***@entry=1, layout=***@entry=VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, aux_usage=***@entry=ISL_AUX_USAGE_NONE, blorp_surf=***@entry=0x7fffffffce60)
at ../../../src/intel/vulkan/anv_blorp.c:204
#6 0x00007ffff1dda209 in copy_buffer_to_image (cmd_buffer=0x555558aaba70, anv_buffer=0x555558670dd0, anv_image=0x5555597db110, image_layout=VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, regionCount=<optimized out>, pRegions=<optimized out>, buffer_to_image=false) at ../../../src/intel/vulkan/anv_blorp.c:382
#7 0x00007ffff1ddab56 in anv_CmdCopyImageToBuffer (commandBuffer=<optimized out>, srcImage=<optimized out>, srcImageLayout=<optimized out>, dstBuffer=<optimized out>, regionCount=<optimized out>, pRegions=<optimized out>) at ../../../src/intel/vulkan/anv_blorp.c:475
#8 0x00007ffff667887d in vkCmdCopyImageToBuffer (commandBuffer=0x555558aaba70, srcImage=0x5555597db110, srcImageLayout=VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dstBuffer=0x555558670dd0, regionCount=1, pRegions=0x7fffffffd060) at /home/mrt/video/vulkan/loader/loader/trampoline.c:1923
#9 0x0000555556a0281d in transfer_image_buf (ctx=0x555558792a40, frame=0x5555597c0a00, buffer=0x7fffffffd1f0, stride=0x5555597e4780, w=1920, h=1080, pix_fmt=AV_PIX_FMT_GRAY8, to_buf=1) at src/libavutil/hwcontext_vulkan.c:1810
#10 0x0000555556a03158 in vulkan_transfer_data_from (hwfc=0x5555597c0640, dst=0x5555597e4740, src=0x5555597bf8c0) at src/libavutil/hwcontext_vulkan.c:1963
#11 0x00005555569f28d5 in av_hwframe_transfer_data (dst=0x5555597e4740, src=0x5555597bf8c0, flags=0) at src/libavutil/hwcontext.c:454
#12 0x000055555579819c in hwdownload_filter_frame (link=0x5555597bdd40, input=0x5555597bf8c0) at src/libavfilter/vf_hwdownload.c:153
#13 0x00005555556af6c9 in ff_filter_frame_framed (link=0x5555597bdd40, frame=0x5555597bf8c0) at src/libavfilter/avfilter.c:1071
#14 0x00005555556aff52 in ff_filter_frame_to_filter (link=0x5555597bdd40) at src/libavfilter/avfilter.c:1219
#15 0x00005555556b014e in ff_filter_activate_default (filter=0x5555597bbd00) at src/libavfilter/avfilter.c:1268
#16 0x00005555556b0372 in ff_filter_activate (filter=0x5555597bbd00) at src/libavfilter/avfilter.c:1429
#17 0x00005555556b5036 in ff_filter_graph_run_once (graph=0x5555597bd900) at src/libavfilter/avfiltergraph.c:1454
#18 0x00005555556b6466 in push_frame (graph=0x5555597bd900) at src/libavfilter/buffersrc.c:181
#19 0x00005555556b6778 in av_buffersrc_add_frame_internal (ctx=0x5555597be100, frame=0x555558e10300, flags=4) at src/libavfilter/buffersrc.c:255
#20 0x00005555556b63ed in av_buffersrc_add_frame_flags (ctx=0x5555597be100, frame=0x555558e10300, flags=4) at src/libavfilter/buffersrc.c:164
#21 0x0000555555679212 in ifilter_send_frame (ifilter=0x555558b9da00, frame=0x555558e10300) at src/fftools/ffmpeg.c:2190
#22 0x00005555556794f2 in send_frame_to_filters (ist=0x555558d944c0, decoded_frame=0x555558e10300) at src/fftools/ffmpeg.c:2264
#23 0x000055555567a2ac in decode_video (ist=0x555558d944c0, pkt=0x7fffffffd820, got_output=0x7fffffffd814, duration_pts=0x7fffffffd818, eof=0, decode_failed=0x7fffffffd810) at src/fftools/ffmpeg.c:2465
#24 0x000055555567ac47 in process_input_packet (ist=0x555558d944c0, pkt=0x7fffffffd9e0, no_eof=0) at src/fftools/ffmpeg.c:2619
#25 0x0000555555681bb5 in process_input (file_index=0) at src/fftools/ffmpeg.c:4457
#26 0x00005555556820c4 in transcode_step () at src/fftools/ffmpeg.c:4577
#27 0x00005555556821f1 in transcode () at src/fftools/ffmpeg.c:4631
#28 0x0000555555682a81 in main (argc=20, argv=0x7fffffffe3e8) at src/fftools/ffmpeg.c:4838
Post by Rostislav Pehlivanov
+
+ pix_fmts = av_malloc((count + 1) * sizeof(*pix_fmts));
+ if (!pix_fmts)
+ return AVERROR(ENOMEM);
+
+ count = 0;
+ pix_fmts[count++] = hwfc->sw_format;
+ if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+ switch (desc->comp[0].depth) {
+ case 8: pix_fmts[count++] = AV_PIX_FMT_GRAY8; break;
+ case 10: pix_fmts[count++] = AV_PIX_FMT_GRAY10; break;
+ case 12: pix_fmts[count++] = AV_PIX_FMT_GRAY12; break;
+ case 16: pix_fmts[count++] = AV_PIX_FMT_GRAY16; break;
+ }
Tbh I'm not convinced that offering the luma-only option as well is going to cause anything other than confusion. Do you have any use-cases in mind for it?
Post by Rostislav Pehlivanov
+ }
+ pix_fmts[count++] = AV_PIX_FMT_NONE;
+
+ *formats = pix_fmts;
+
+ return 0;
+}
+
+typedef struct VulkanMapping {
+ AVVkFrame *frame;
+ int flags;
+} VulkanMapping;
+
+static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
+{
+ VulkanMapping *map = hwmap->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+
+ /* Check if buffer needs flushing */
+ if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
+ !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkResult ret;
+ VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
+
+ for (int i = 0; i < map->frame->mem_count; i++) {
+ flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ flush_ranges[i].memory = map->frame->mem[i];
+ flush_ranges[i].size = VK_WHOLE_SIZE;
+ }
+
+ ret = vkFlushMappedMemoryRanges(hwctx->act_dev, map->frame->mem_count,
+ flush_ranges);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ vk_ret2str(ret));
+ }
+ }
+
+ for (int i = 0; i < map->frame->mem_count; i++)
+ vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
+
+ av_free(map);
+}
+
+static int vulkan_map_frame(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ int err;
+ VkResult ret;
+ AVVkFrame *f = (AVVkFrame *)src->data[0];
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+ const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
+
+ VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
+ if (!map)
+ return AVERROR(EINVAL);
+
+ if (src->format != AV_PIX_FMT_VULKAN) {
+ av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
+ av_get_pix_fmt_name(src->format));
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
+ !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
+ av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible "
+ "and linear!\n");
Is this a requirement? Some devices have magic MMU hardware which can linear-map tiled memory.
Post by Rostislav Pehlivanov
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ dst->width = src->width;
+ dst->height = src->height;
+
+ for (int i = 0; i < f->mem_count; i++) {
+ ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
+ VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ }
+
+ /* For non disjoint memory duplicate them */
+ if (f->mem_count == 1)
+ for (int i = 1; i < planes; i++)
+ dst->data[i] = dst->data[0];
+
+ /* Check if the memory contents matter */
+ if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
+ !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
+ for (int i = 0; i < f->mem_count; i++) {
+ map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ map_mem_ranges[i].size = VK_WHOLE_SIZE;
+ map_mem_ranges[i].memory = f->mem[i];
+ }
+
+ ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, f->mem_count,
+ map_mem_ranges);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ }
+
+ for (int i = 0; i < planes; i++) {
+ VkImageSubresource sub = {
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ };
+ VkSubresourceLayout layout;
+ vkGetImageSubresourceLayout(hwctx->act_dev, f->img, &sub, &layout);
+ dst->data[i] += layout.offset;
+ dst->linesize[i] = layout.rowPitch;
+ }
+
+ map->frame = f;
+ map->flags = flags;
+
+ err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+ &vulkan_unmap_frame, map);
+ if (err < 0)
+ goto fail;
+
+ return 0;
+
+ for (int i = 0; i < f->mem_count; i++)
+ vkUnmapMemory(hwctx->act_dev, f->mem[i]);
Unmap isn't valid on memory which isn't currently mapped; this needs to track how many have actually been mapped.
Post by Rostislav Pehlivanov
+
+ av_free(map);
+ return err;
+}
+
+#if CONFIG_LIBDRM
+static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
+{
+ VulkanMapping *map = hwmap->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+
+ vkDestroyImage(hwctx->act_dev, map->frame->img, hwctx->alloc);
+ for (int i = 0; i < map->frame->mem_count; i++)
+ vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
+
+ av_freep(&map->frame);
+}
+
+static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f,
+ AVDRMFrameDescriptor *desc)
+{
+ int err = 0;
+
+ /* Destination frame */
+#if HAVE_VULKAN_DRM_MOD
+ uint64_t modifier_buf[AV_NUM_DATA_POINTERS];
+ VkImageDrmFormatModifierListCreateInfoEXT drm_mod = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
+ };
+#endif
+ VkExternalMemoryImageCreateInfo ext_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+#if HAVE_VULKAN_DRM_MOD
+ .pNext = &drm_mod,
+#endif
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+ };
+ VkImportMemoryFdInfoKHR import_desc[AV_NUM_DATA_POINTERS];
+
+ if ((desc->nb_objects > 1) &&
+ (desc->nb_objects != av_pix_fmt_count_planes(hwfc->format))) {
"hwfc->sw_format"
Post by Rostislav Pehlivanov
+ av_log(hwfc, AV_LOG_ERROR, "Number of DRM objects doesn't match "
+ "plane count!\n");
+ return AVERROR(EINVAL);
+ }
+
+ for (int i = 0; i < desc->nb_objects; i++) {
+ import_desc[i].sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR;
+ import_desc[i].pNext = NULL;
+ import_desc[i].handleType = ext_info.handleTypes;
+ import_desc[i].fd = desc->objects[i].fd;
+#if HAVE_VULKAN_DRM_MOD
+ modifier_buf[i] = desc->objects[i].format_modifier;
I think you want to give it the modifier structure only if the modifier isn't DRM_FORMAT_MOD_INVALID. Not passing a modifier asks the driver to use internal magic if it can (e.g. dri_bo_get_tiling()), passing a modifier must alway use what you give it.
Post by Rostislav Pehlivanov
+#endif
+ }
+#if HAVE_VULKAN_DRM_MOD
+ drm_mod.pDrmFormatModifiers = modifier_buf;
+ drm_mod.drmFormatModifierCount = desc->nb_objects;
+#endif
+
+ err = create_frame(hwfc, f,
+#if HAVE_VULKAN_DRM_MOD
+ VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
+#else
+ desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
+#endif
+ DEFAULT_USAGE_FLAGS, desc->nb_objects > 1, &ext_info,
+ import_desc, sizeof(*import_desc));
+ if (err < 0)
+ return err;
You do need to look at the layer information in the DRM descriptor here.

E.g. mapping from VAAPI/NV12 works on Intel, but on AMD I get (with the above plane-count bug fixed):

$ gdb --args ./ffmpeg_g -y -hwaccel vaapi -hwaccel_output_format vaapi -hwaccel_device /dev/dri/renderD129 -i in.mp4 -an -vf 'hwmap=derive_device=vulkan,scale_vulkan=1280:720,hwmap=derive_device=vaapi:reverse=1' -c:v h264_vaapi -frames:v 1 out.mp4
...
[Parsed_hwmap_2 @ 0x555558f3ff40] Configure hwmap vulkan -> vaapi_vld.
[AVHWFramesContext @ 0x55555952be00] Created surface 0x24.
[AVHWFramesContext @ 0x55555952be00] Direct mapping disabled: deriving image does not work: 6 (invalid VASurfaceID).
[hwmap @ 0x5555584578c0] Filter input: vaapi_vld, 1920x1080 (2000).

Thread 1 "ffmpeg_g" received signal SIGSEGV, Segmentation fault.
vk_format_get_nr_components (format=<optimized out>) at ../../../../src/amd/vulkan/vk_format.h:535
535 return desc->nr_channels;
(gdb) bt
#0 vk_format_get_nr_components (format=<optimized out>) at ../../../../src/amd/vulkan/vk_format.h:535
#1 radv_image_create (_device=0x5555593e26a0, create_info=***@entry=0x7fffffffcf00, alloc=<optimized out>, pImage=<optimized out>) at ../../../../src/amd/vulkan/radv_image.c:943
#2 0x00007fffac3ed92e in radv_CreateImage (device=<optimized out>, pCreateInfo=<optimized out>, pAllocator=<optimized out>, pImage=<optimized out>) at ../../../../src/amd/vulkan/radv_image.c:1261
#3 0x00007ffff66776c4 in vkCreateImage (device=0x5555593e26a0, pCreateInfo=0x7fffffffcf90, pAllocator=0x0, pImage=0x55555946f300) at /home/mrt/video/vulkan/loader/loader/trampoline.c:1328
#4 0x0000555556a00a07 in create_frame (hwfc=0x555559568c80, frame=0x7fffffffd1c8, tiling=VK_IMAGE_TILING_LINEAR, usage=15, disjoint=1, create_pnext=0x7fffffffd160, alloc_pnext=0x7fffffffd0a0, alloc_pnext_stride=24) at src/libavutil/hwcontext_vulkan.c:1130
#5 0x0000555556a019db in vulkan_map_from_drm_frame_desc (hwfc=0x555559568c80, f=0x7fffffffd1c8, desc=0x5555595abd80) at src/libavutil/hwcontext_vulkan.c:1480
#6 0x0000555556a01a38 in vulkan_map_from_drm (hwfc=0x555559568c80, dst=0x555559322000, src=0x555559322300, flags=3) at src/libavutil/hwcontext_vulkan.c:1502
#7 0x0000555556a01b90 in vulkan_map_from_vaapi (dst_fc=0x555559568c80, dst=0x555559322000, src=0x555559321d40, flags=3) at src/libavutil/hwcontext_vulkan.c:1550
#8 0x0000555556a01c67 in vulkan_map_to (hwfc=0x555559568c80, dst=0x555559322000, src=0x555559321d40, flags=3) at src/libavutil/hwcontext_vulkan.c:1579
#9 0x00005555569f3307 in av_hwframe_map (dst=0x555559322000, src=0x555559321d40, flags=3) at src/libavutil/hwcontext.c:792
#10 0x0000555555798c69 in hwmap_filter_frame (link=0x555558f41180, input=0x555559321d40) at src/libavfilter/vf_hwmap.c:339
#11 0x00005555556af6c9 in ff_filter_frame_framed (link=0x555558f41180, frame=0x555559321d40) at src/libavfilter/avfilter.c:1071
#12 0x00005555556aff52 in ff_filter_frame_to_filter (link=0x555558f41180) at src/libavfilter/avfilter.c:1219
#13 0x00005555556b014e in ff_filter_activate_default (filter=0x5555582649c0) at src/libavfilter/avfilter.c:1268
#14 0x00005555556b0372 in ff_filter_activate (filter=0x5555582649c0) at src/libavfilter/avfilter.c:1429
#15 0x00005555556b5036 in ff_filter_graph_run_once (graph=0x555558f3e500) at src/libavfilter/avfiltergraph.c:1454
#16 0x00005555556b6466 in push_frame (graph=0x555558f3e500) at src/libavfilter/buffersrc.c:181
#17 0x00005555556b6778 in av_buffersrc_add_frame_internal (ctx=0x555558f3e600, frame=0x555558592c40, flags=4) at src/libavfilter/buffersrc.c:255
#18 0x00005555556b63ed in av_buffersrc_add_frame_flags (ctx=0x555558f3e600, frame=0x555558592c40, flags=4) at src/libavfilter/buffersrc.c:164
#19 0x0000555555679212 in ifilter_send_frame (ifilter=0x5555581f1b40, frame=0x555558592c40) at src/fftools/ffmpeg.c:2190
#20 0x00005555556794f2 in send_frame_to_filters (ist=0x555558352240, decoded_frame=0x555558592c40) at src/fftools/ffmpeg.c:2264
#21 0x000055555567a2ac in decode_video (ist=0x555558352240, pkt=0x7fffffffd820, got_output=0x7fffffffd814, duration_pts=0x7fffffffd818, eof=0, decode_failed=0x7fffffffd810) at src/fftools/ffmpeg.c:2465
#22 0x000055555567ac47 in process_input_packet (ist=0x555558352240, pkt=0x7fffffffd9e0, no_eof=0) at src/fftools/ffmpeg.c:2619
#23 0x0000555555681bb5 in process_input (file_index=0) at src/fftools/ffmpeg.c:4457
#24 0x00005555556820c4 in transcode_step () at src/fftools/ffmpeg.c:4577
#25 0x00005555556821f1 in transcode () at src/fftools/ffmpeg.c:4631
#26 0x0000555555682a81 in main (argc=20, argv=0x7fffffffe3e8) at src/fftools/ffmpeg.c:4838
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
...
+
+typedef struct ImageBuffer {
+ VkBuffer buf;
+ VkDeviceMemory mem;
+ VkMemoryPropertyFlagBits flags;
+} ImageBuffer;
+
+static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags,
+ void *create_pnext, void *alloc_pnext)
+{
+ int err;
+ VkResult ret;
+ VkMemoryRequirements req;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = create_pnext,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host visible
+ but should be ok */
+ };
+
+ ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
+
+ err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem);
+ if (err)
+ return err;
+
+ ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
This function looks like it is missing the free cases on failure.
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
+static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
+{
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ if (!buf)
+ return;
+
+ vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
+ vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
+}
+
+static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate)
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
+ int invalidate_count = 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ if (!invalidate)
+ return 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ invalidate_ctx[invalidate_count++] = ival_buf;
+ }
+
+ if (invalidate_count) {
+ ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
+ invalidate_ctx);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
Missing unmap cases?
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
...
+
+static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
+ ImageBuffer *buffer, const int *stride, int w,
+ int h, enum AVPixelFormat pix_fmt, int to_buf)
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *s = ctx->internal->priv;
+
+ const int planes = av_pix_fmt_count_planes(pix_fmt);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->cmd_buf,
+ };
+
+ vkBeginCommandBuffer(s->cmd_buf, &cmd_start);
Return value needs to be checked.
Post by Rostislav Pehlivanov
+
+ { /* Change the image layout to something more optimal for transfers */
+ VkImageMemoryBarrier bar = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = frame->layout,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = frame->img,
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ };
+
+ if (planes == 1) {
+ bar.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ } else {
+ bar.subresourceRange.aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT;
+ bar.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_PLANE_1_BIT;
+ if (planes > 2)
+ bar.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_PLANE_2_BIT;
+ }
+
+ vkCmdPipelineBarrier(s->cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, 0, NULL, 0, NULL, 1, &bar);
+
+ /* Update to the new layout */
+ frame->layout = bar.newLayout;
+ frame->access = bar.dstAccessMask;
+ }
+
+ /* Schedule a copy for each plane */
+ for (int i = 0; i < planes; i++) {
+ VkImageSubresourceLayers sub = {
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ .layerCount = 1,
+ };
+ const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
+ const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ VkBufferImageCopy buf_reg = {
+ .bufferOffset = 0,
+ /* Buffer stride isn't in bytes, it's in samples, the implementation
+ * uses the image's VkFormat to know how many bytes per sample
+ * the buffer has. So we have to convert by dividing. Stupid. */
+ .bufferRowLength = stride[i] / desc->comp[i].step,
comp[i] isn't necessarily plane[i], but I think it happens to work anyway for all of the supported formats.

More generally, this tricky code using some properties like the log2_chroma values will fail for formats like YUVA420P, so if you are intending to add them later (for overlay) it might be sensible to ensure that this doesn't make too many assumptions now.
Post by Rostislav Pehlivanov
+ .bufferImageHeight = p_h,
+ .imageSubresource = sub,
+ .imageOffset = { 0 },
+ .imageExtent = { p_w, p_h, 1, },
+ };
+ if (to_buf)
+ vkCmdCopyImageToBuffer(s->cmd_buf, frame->img, frame->layout,
+ buffer[i].buf, 1, &buf_reg);
+ else
+ vkCmdCopyBufferToImage(s->cmd_buf, buffer[i].buf, frame->img,
+ frame->layout, 1, &buf_reg);
+ }
+
+ vkEndCommandBuffer(s->cmd_buf);
Can also fail.
Post by Rostislav Pehlivanov
+
+ ret = vkQueueSubmit(s->cmd_queue, 1, &s_info, s->cmd_fence);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(hwctx->act_dev, 1, &s->cmd_fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(hwctx->act_dev, 1, &s->cmd_fence);
+ }
+
+ return 0;
+}
+
...
+
+const HWContextType ff_hwcontext_type_vulkan = {
+ .type = AV_HWDEVICE_TYPE_VULKAN,
+ .name = "Vulkan",
+
+ .device_hwctx_size = sizeof(AVVulkanDeviceContext),
+ .device_priv_size = sizeof(VulkanDevicePriv),
+ .frames_hwctx_size = sizeof(AVVulkanFramesContext),
+
+ .device_init = &vulkan_device_init,
+ .device_create = &vulkan_device_create,
+ .device_derive = &vulkan_device_derive,
+
+ .frames_get_constraints = &vulkan_frames_get_constraints,
+ .frames_init = vulkan_frames_init,
+ .frames_get_buffer = vulkan_get_buffer,
+
+ .transfer_get_formats = vulkan_transfer_get_formats,
+ .transfer_data_to = vulkan_transfer_data_to,
+ .transfer_data_from = vulkan_transfer_data_from,
+
+ .map_to = vulkan_map_to,
+
+ .pix_fmts = (const enum AVPixelFormat[]) {
+ AV_PIX_FMT_VULKAN,
+ AV_PIX_FMT_NONE
+ },
+};
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
new file mode 100644
index 0000000000..342c833a23
--- /dev/null
+++ b/libavutil/hwcontext_vulkan.h
@@ -0,0 +1,133 @@
+/*
+ * Vulkan hwcontext
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HWCONTEXT_VULKAN_H
+#define AVUTIL_HWCONTEXT_VULKAN_H
+
+#include <vulkan/vulkan.h>
+
+/**
+ * API-specific header for AV_HWDEVICE_TYPE_VULKAN.
+ *
+ * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
+ * with the data pointer set to an AVVkFrame.
+ */
+
+/**
+ * Main Vulkan context, allocated as AVHWDeviceContext.hwctx.
+ * All of these can be set before init to change what the context uses
+ */
+typedef struct AVVulkanDeviceContext {
+ /**
+ * Custom memory allocator, else NULL
+ */
+ const VkAllocationCallbacks *alloc;
+ /**
+ * Instance
+ */
+ VkInstance inst;
+ /**
+ * Physical device
+ */
+ VkPhysicalDevice phys_dev;
+ /**
+ * Activated physical device
+ */
+ VkDevice act_dev;
+ /**
+ * Queue family index for graphics
+ */
+ int queue_family_index;
+ /**
+ * Queue family index for transfer ops only. By default, the priority order
+ * is dedicated transfer > dedicated compute > graphics.
+ */
+ int queue_family_tx_index;
+ /**
+ * Queue family index for compute ops. Will be equal to the graphics
+ * one unless a dedicated transfer queue is found.
+ */
+ int queue_family_comp_index;
+} AVVulkanDeviceContext;
+
+/**
+ * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options
+ */
+typedef struct AVVulkanFramesContext {
+ /**
+ * Controls the tiling of output frames.
+ */
+ VkImageTiling tiling;
+ /**
+ * Defines extra usage of output frames. This is bitwise OR'd with the
+ * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and TRANSFER_DST).
+ */
+ VkImageUsageFlagBits usage;
+ /**
+ * Set to 1 to allocate all planes separately (disjoint images)
+ */
+ int disjoint;
+ /**
+ * Extension data for image creation. By default, if the extension is
+ * available, this will be chained to a VkImageFormatListCreateInfoKHR.
+ */
+ void *create_pnext;
+ /**
+ * Extension data for memory allocation. If the image is disjoint, this
+ * must be one per plane, otherwise just the first entry is used.
+ * This will be chained to VkExportMemoryAllocateInfo, which is used
+ * to make all pool images exportable to other APIs.
+ */
+ void *alloc_pnext[AV_NUM_DATA_POINTERS];
+} AVVulkanFramesContext;
+
+/*
+ * Frame structure, the VkFormat of the image will always match
+ * the pool's sw_format.
+ */
+typedef struct AVVkFrame {
+ VkImage img;
+ VkImageTiling tiling;
+ /**
+ * Always 1 for non-disjoint images, #planes for disjoint
+ */
+ int mem_count;
+ VkDeviceMemory mem[AV_NUM_DATA_POINTERS];
+ /**
+ * OR'd flags for all memory allocated
+ */
+ VkMemoryPropertyFlagBits flags;
+
+ /**
+ * Updated after every barrier
+ */
+ VkAccessFlagBits access;
+ VkImageLayout layout;
+} AVVkFrame;
This all looks much cleaner than the previous version.
Post by Rostislav Pehlivanov
+/**
+ * Converts AVPixelFormat to VkFormat, returns VK_FORMAT_UNDEFINED if unsupported
+ * by the hwcontext
+ */
+VkFormat av_vkfmt_from_pixfmt(enum AVPixelFormat p);
+
+#endif /* AVUTIL_HWCONTEXT_VULKAN_H */
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index ff5c20d50e..c3b3aaee65 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -1673,6 +1673,10 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
.name = "videotoolbox_vld",
.flags = AV_PIX_FMT_FLAG_HWACCEL,
},
+ [AV_PIX_FMT_VULKAN] = {
+ .name = "vulkan",
+ .flags = AV_PIX_FMT_FLAG_HWACCEL,
+ },
You've put this in a funny place in the middle?
Post by Rostislav Pehlivanov
[AV_PIX_FMT_GBRP] = {
.name = "gbrp",
.nb_components = 3,
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index aea008bbdc..e6991f3630 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -333,6 +333,10 @@ enum AVPixelFormat {
AV_PIX_FMT_GRAY14BE, ///< Y , 14bpp, big-endian
AV_PIX_FMT_GRAY14LE, ///< Y , 14bpp, little-endian
+ /* Vulkan hardware images,
+ * data[0] contain an AVVkFrame */
+ AV_PIX_FMT_VULKAN,
+
AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
};
diff --git a/libavutil/version.h b/libavutil/version.h
index 44bdebdc93..84409b1d69 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,8 +79,8 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 56
-#define LIBAVUTIL_VERSION_MINOR 18
-#define LIBAVUTIL_VERSION_MICRO 102
+#define LIBAVUTIL_VERSION_MINOR 19
+#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
LIBAVUTIL_VERSION_MINOR, \
I think I would make the pixfmt addition a patch on its own just to keep it separate, but that probably doesn't matter very much.

- Mark
Rostislav Pehlivanov
2018-05-31 10:48:36 UTC
Permalink
Post by Mark Thompson
Post by Rostislav Pehlivanov
This commit adds a Vulkan hwcontext, currently capable of mapping DRM and
VAAPI frames but additional functionality can be added later to support
importing of D3D11 surfaces as well as exporting to various other APIs.
Have you investigated the D3D11 interop at all? Seeing that working (even
if it isn't included here) would be nice to make sure there aren't any
gotchas later.
Post by Rostislav Pehlivanov
This context requires the newest stable version of the Vulkan API,
and once the new extension for DRM surfaces makes it in will also require
it (in order to properly and fully import them).
It makes use of every part of the Vulkan spec in order to ensure fastest
possible uploading, downloading and mapping of frames. On AMD, it will
also make use of mapping host memory frames in order to upload
very efficiently and with minimal CPU to hardware.
To be useful for non-RGB images an implementation with the YUV images
extension is needed. All current implementations support that with the
exception of AMD, though support is coming soon for Mesa.
<https://0x0.st/s212.txt> (I realise that won't show the relevant
formats, but it also doesn't work)).
Yes, AMD doesn't support it on Windows, Intel does, however, on a 620 and
most others. Maybe you have old drivers?

https://vulkan.gpuinfo.org/displayreport.php?id=3318#extensions
Post by Mark Thompson
Post by Rostislav Pehlivanov
---
configure | 10 +
doc/APIchanges | 3 +
libavutil/Makefile | 3 +
libavutil/hwcontext.c | 4 +
libavutil/hwcontext.h | 1 +
libavutil/hwcontext_internal.h | 1 +
libavutil/hwcontext_vulkan.c | 2013 ++++++++++++++++++++++++++++++++
libavutil/hwcontext_vulkan.h | 133 +++
libavutil/pixdesc.c | 4 +
libavutil/pixfmt.h | 4 +
libavutil/version.h | 4 +-
11 files changed, 2178 insertions(+), 2 deletions(-)
create mode 100644 libavutil/hwcontext_vulkan.c
create mode 100644 libavutil/hwcontext_vulkan.h
diff --git a/configure b/configure
index 09ff0c55e2..5f4407b753 100755
--- a/configure
+++ b/configure
--enable-opengl enable OpenGL rendering [no]
--enable-openssl enable openssl, needed for https support
if gnutls, libtls or mbedtls is not used [no]
+ --enable-vulkan enable Vulkan code [no]
Ordering (and in list below).
What do you mean? Its in alphabet order.
Post by Mark Thompson
Post by Rostislav Pehlivanov
--disable-sndio disable sndio support [autodetect]
--disable-schannel disable SChannel SSP, needed for TLS support
on
Post by Rostislav Pehlivanov
Windows if openssl and gnutls are not used
[autodetect]
Post by Rostislav Pehlivanov
@@ -1767,6 +1768,7 @@ HWACCEL_LIBRARY_LIST="
mmal
omx
opencl
+ vulkan
"
DOCUMENT_LIST="
@@ -2223,6 +2225,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ vulkan_drm_mod
perl
pod2man
texi2html
@@ -6349,6 +6352,13 @@ enabled vdpau &&
enabled crystalhd && check_lib crystalhd "stdint.h
libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd
Post by Rostislav Pehlivanov
+enabled vulkan &&
+ require_pkg_config vulkan "vulkan >= 1.1.73" "vulkan/vulkan.h"
vkCreateInstance
Post by Rostislav Pehlivanov
+
+if enabled_all vulkan libdrm ; then
+ check_cpp_condition vulkan_drm_mod vulkan/vulkan.h "defined
VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME"
Post by Rostislav Pehlivanov
+fi
+
if enabled x86; then
case $target_os in
mingw32*|mingw64*|win32|win64|linux|cygwin*)
diff --git a/doc/APIchanges b/doc/APIchanges
index efe15ba4e0..1b37f58ca7 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@ libavutil: 2017-10-21
+2018-04-xx - xxxxxxxxxx - lavu 56.19.100 - hwcontext.h
+ Add AV_HWDEVICE_TYPE_VULKAN and implementation.
This should mention AV_PIX_FMT_VULKAN as well.
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
2018-05-xx - xxxxxxxxxx - lavf 58.15.100 - avformat.h
Add pmt_version field to AVProgram
...
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
new file mode 100644
index 0000000000..db0a5b7e61
--- /dev/null
+++ b/libavutil/hwcontext_vulkan.c
@@ -0,0 +1,2013 @@
+/*
+ * Vulkan hwcontext
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
Post by Rostislav Pehlivanov
+ */
+
+#include "config.h"
+#include "pixdesc.h"
+#include "avstring.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_vulkan.h"
+
+#if CONFIG_LIBDRM
+#include <unistd.h> /* lseek */
+#include <xf86drm.h>
+#include <drm_fourcc.h>
+#include "hwcontext_drm.h"
+#if CONFIG_VAAPI
+#include <va/va_drmcommon.h>
+#include "hwcontext_vaapi.h"
+#endif
+#endif
+
+typedef struct VulkanDevicePriv {
+ /* Properties */
+ VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceMemoryProperties mprops;
+
+ /* Debug callback */
+ VkDebugUtilsMessengerEXT debug_ctx;
+
+ /* Image uploading */
+ VkCommandPool cmd_pool;
+ VkCommandBuffer cmd_buf;
+ VkQueue cmd_queue;
+ VkFence cmd_fence;
+
+ /* Extensions */
+ uint64_t extensions;
+
+ /* Settings */
+ int use_linear_images;
+ int use_disjoint_images;
+} VulkanDevicePriv;
+
+#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name)
\
Post by Rostislav Pehlivanov
+
vkGetInstanceProcAddr(inst, #name)
Post by Rostislav Pehlivanov
+
+#define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT |
\
Post by Rostislav Pehlivanov
+ VK_IMAGE_USAGE_STORAGE_BIT |
\
Post by Rostislav Pehlivanov
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
\
Post by Rostislav Pehlivanov
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT)
+
+#define ADD_VAL_TO_LIST(list, count, val)
\
Post by Rostislav Pehlivanov
+ do {
\
Post by Rostislav Pehlivanov
+ list = av_realloc_array(list, sizeof(*list), ++count);
\
Post by Rostislav Pehlivanov
+ if (!list) {
\
Post by Rostislav Pehlivanov
+ err = AVERROR(ENOMEM);
\
Post by Rostislav Pehlivanov
+ goto end;
\
Post by Rostislav Pehlivanov
+ }
\
Post by Rostislav Pehlivanov
+ list[count - 1] = val;
\
Post by Rostislav Pehlivanov
+ } while(0)
+
+static const VkFormat vk_format_map[AV_PIX_FMT_NB] = {
+ /* Gray */
+ [AV_PIX_FMT_GRAY8] = VK_FORMAT_R8_UNORM,
+ [AV_PIX_FMT_GRAY10] = VK_FORMAT_R10X6_UNORM_PACK16,
+ [AV_PIX_FMT_GRAY12] = VK_FORMAT_R12X4_UNORM_PACK16,
Aren't GRAY10 and GRAY12 packed in the low bits rather than the high bits?
Nope:
"VK_FORMAT_R10X6_UNORM_PACK16 specifies a one-component, 16-bit unsigned
normalized format that has a single 10-bit R component in the top 10 bits
of a 16-bit word, with the bottom 6 bits set to 0."
"VK_FORMAT_R12X4_UNORM_PACK16 specifies a one-component, 16-bit unsigned
normalized format that has a single 12-bit R component in the top 12 bits
of a 16-bit word, with the bottom 4 bits set to 0."

We put padding bits in the top so I've removed these 2.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_GRAY16] = VK_FORMAT_R16_UNORM,
+
+ /* Interleaved */
+ [AV_PIX_FMT_NV12] = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
+ [AV_PIX_FMT_P010] = VK_FORMAT_G10X6_B10X6R10X6_2PL
ANE_420_UNORM_3PACK16,
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P016] = VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,
+ [AV_PIX_FMT_NV16] = VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,
+ [AV_PIX_FMT_UYVY422] = VK_FORMAT_B16G16R16G16_422_UNORM,
+ [AV_PIX_FMT_YVYU422] = VK_FORMAT_G16B16G16R16_422_UNORM,
This should be AV_PIX_FMT_YUYV422?
Seems so, I don't know, the spec is extremely vague:
"VK_FORMAT_G16B16G16R16_422_UNORM specifies a four-component, 64-bit format
containing a pair of G components, an R component, and a B component,
collectively encoding a 2×1 rectangle of unsigned normalized RGB texel
data. One G value is present at each *i* coordinate, with the B and R
values shared across both G values and thus recorded at half the horizontal
resolution of the image. This format has a 16-bit G component for the even
*i* coordinate in the word in bytes 0..1, a 16-bit B component in the word
in bytes 2..3, a 16-bit G component for the odd *i* coordinate in the word
in bytes 4..5, and a 16-bit R component in the word in bytes 6..7. Images
in this format *must* be defined with a width that is a multiple of two.
For the purposes of the constraints on copy extents, this format is treated
as a compressed format with a 2×1 compressed texel block."
Post by Mark Thompson
Changing that makes it accept a DRM object for this format
$ gdb --args ./ffmpeg_g -v 55 -y -hwaccel vaapi -hwaccel_output_format
vaapi -hwaccel_device /dev/dri/renderD128 -i in.mp4 -an -vf
'scale_vaapi=format=yuyv422,hwmap=derive_device=vulkan,scale
_vulkan=1280:720,hwmap=derive_device=vaapi:reverse=1' -c:v h264_vaapi
out.mp4
...
Assertion `format != NULL' failed.
Thread 1 "ffmpeg_g" received signal SIGABRT, Aborted.
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#1 0x00007ffff3127231 in __GI_abort () at abort.c:79
#2 0x00007ffff311e9da in __assert_fail_base (fmt=0x7ffff3271d48
"anv_image_create") at assert.c:92
"anv_image_create") at assert.c:101
#4 0x00007fffaf928190 in anv_image_create (_device=<optimized out>,
pImage=<optimized out>) at ../../../src/intel/vulkan/anv_image.c:599
#5 0x00007fffaf9282b2 in anv_CreateImage (device=<optimized out>,
pCreateInfo=<optimized out>, pAllocator=<optimized out>, pImage=<optimized
out>) at ../../../src/intel/vulkan/anv_image.c:641
#6 0x00007ffff66776c4 in vkCreateImage (device=0x5555592a64e0,
pCreateInfo=0x7fffffffcf70, pAllocator=0x0, pImage=0x5555592b9f40) at
/home/mrt/video/vulkan/loader/loader/trampoline.c:1328
#7 0x0000555556a009c7 in create_frame (hwfc=0x5555591e7380,
frame=0x7fffffffd1a8, tiling=VK_IMAGE_TILING_OPTIMAL, usage=15,
disjoint=0, create_pnext=0x7fffffffd140, alloc_pnext=0x7fffffffd080,
alloc_pnext_stride=24) at src/libavutil/hwcontext_vulkan.c:1130
#8 0x0000555556a0199b in vulkan_map_from_drm_frame_desc
(hwfc=0x5555591e7380, f=0x7fffffffd1a8, desc=0x5555592b97c0) at
src/libavutil/hwcontext_vulkan.c:1480
#9 0x0000555556a019f8 in vulkan_map_from_drm (hwfc=0x5555591e7380,
dst=0x5555592ab440, src=0x5555592b9580, flags=3) at
src/libavutil/hwcontext_vulkan.c:1502
#10 0x0000555556a01b50 in vulkan_map_from_vaapi (dst_fc=0x5555591e7380,
dst=0x5555592ab440, src=0x5555592ab700, flags=3) at
src/libavutil/hwcontext_vulkan.c:1550
#11 0x0000555556a01c27 in vulkan_map_to (hwfc=0x5555591e7380,
dst=0x5555592ab440, src=0x5555592ab700, flags=3) at
src/libavutil/hwcontext_vulkan.c:1579
#12 0x00005555569f32c7 in av_hwframe_map (dst=0x5555592ab440,
src=0x5555592ab700, flags=3) at src/libavutil/hwcontext.c:792
#13 0x0000555555798c2c in hwmap_filter_frame (link=0x555558dd9a00,
input=0x5555592ab700) at src/libavfilter/vf_hwmap.c:339
#14 0x00005555556af6c9 in ff_filter_frame_framed (link=0x555558dd9a00,
frame=0x5555592ab700) at src/libavfilter/avfilter.c:1071
#15 0x00005555556aff52 in ff_filter_frame_to_filter (link=0x555558dd9a00)
at src/libavfilter/avfilter.c:1219
#16 0x00005555556b014e in ff_filter_activate_default
(filter=0x555558dd9580) at src/libavfilter/avfilter.c:1268
#17 0x00005555556b0372 in ff_filter_activate (filter=0x555558dd9580) at
src/libavfilter/avfilter.c:1429
#18 0x00005555556b5036 in ff_filter_graph_run_once (graph=0x555558dd9440)
at src/libavfilter/avfiltergraph.c:1454
#19 0x00005555556b6466 in push_frame (graph=0x555558dd9440) at
src/libavfilter/buffersrc.c:181
#20 0x00005555556b6778 in av_buffersrc_add_frame_internal
(ctx=0x555558ddc3c0, frame=0x55555842b080, flags=4) at
src/libavfilter/buffersrc.c:255
#21 0x00005555556b63ed in av_buffersrc_add_frame_flags
(ctx=0x555558ddc3c0, frame=0x55555842b080, flags=4) at
src/libavfilter/buffersrc.c:164
#22 0x0000555555679212 in ifilter_send_frame (ifilter=0x5555581f2580,
frame=0x55555842b080) at src/fftools/ffmpeg.c:2190
#23 0x00005555556794f2 in send_frame_to_filters (ist=0x5555581f4340,
decoded_frame=0x55555842b080) at src/fftools/ffmpeg.c:2264
#24 0x000055555567a2ac in decode_video (ist=0x5555581f4340,
pkt=0x7fffffffd800, got_output=0x7fffffffd7f4, duration_pts=0x7fffffffd7f8,
eof=0, decode_failed=0x7fffffffd7f0) at src/fftools/ffmpeg.c:2465
#25 0x000055555567ac47 in process_input_packet (ist=0x5555581f4340,
pkt=0x7fffffffd9c0, no_eof=0) at src/fftools/ffmpeg.c:2619
#26 0x0000555555681bb5 in process_input (file_index=0) at
src/fftools/ffmpeg.c:4457
#27 0x00005555556820c4 in transcode_step () at src/fftools/ffmpeg.c:4577
#28 0x00005555556821f1 in transcode () at src/fftools/ffmpeg.c:4631
#29 0x0000555555682a81 in main (argc=18, argv=0x7fffffffe3c8) at src/fftools/ffmpeg.c:4838
diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c
index a2387d4fc4..8e6abdc6ca 100644
--- a/libavutil/hwcontext_vaapi.c
+++ b/libavutil/hwcontext_vaapi.c
@@ -1104,7 +1104,7 @@ static int vaapi_map_to_drm_esh(AVHWFramesContext
*hwfc, AVFrame *dst,
surface_id = (VASurfaceID)(uintptr_t)src->data[3];
- export_flags = VA_EXPORT_SURFACE_SEPARATE_LAYERS;
+ export_flags = VA_EXPORT_SURFACE_COMPOSED_LAYERS;
if (flags & AV_HWFRAME_MAP_READ)
export_flags |= VA_EXPORT_SURFACE_READ_ONLY;
if (flags & AV_HWFRAME_MAP_WRITE)
)
Maybe hwcontext_vaapi should be changed to give you composed layers if the
pixfmt demands it?
Post by Mark Thompson
+
Post by Rostislav Pehlivanov
+ /* 420 */
+ [AV_PIX_FMT_YUV420P] = VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,
+ [AV_PIX_FMT_YUV420P16] = VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM,
+
+ /* 422 */
+ [AV_PIX_FMT_YUV422P] = VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,
+ [AV_PIX_FMT_YUV422P16] = VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM,
+
+ /* 444 */
+ [AV_PIX_FMT_YUV444P] = VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,
+ [AV_PIX_FMT_YUV444P16] = VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
+
+ /* RGB */
+ [AV_PIX_FMT_ABGR] = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ [AV_PIX_FMT_BGRA] = VK_FORMAT_B8G8R8A8_UNORM,
+ [AV_PIX_FMT_RGBA] = VK_FORMAT_R8G8B8A8_UNORM,
+ [AV_PIX_FMT_RGB24] = VK_FORMAT_R8G8B8_UNORM,
+ [AV_PIX_FMT_BGR24] = VK_FORMAT_B8G8R8_UNORM,
+ [AV_PIX_FMT_RGB48] = VK_FORMAT_R16G16B16_UNORM,
+ [AV_PIX_FMT_RGBA64] = VK_FORMAT_R16G16B16A16_UNORM,
+ [AV_PIX_FMT_RGB565] = VK_FORMAT_R5G6B5_UNORM_PACK16,
+ [AV_PIX_FMT_BGR565] = VK_FORMAT_B5G6R5_UNORM_PACK16,
+ [AV_PIX_FMT_BGR0] = VK_FORMAT_B8G8R8A8_UNORM,
+ [AV_PIX_FMT_0BGR] = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ [AV_PIX_FMT_RGB0] = VK_FORMAT_R8G8B8A8_UNORM,
+};
+
+enum VulkanExtensions {
+ EXT_DEDICATED_ALLOC = 1LL << 0, /*
VK_KHR_dedicated_allocation */
Post by Rostislav Pehlivanov
+ EXT_IMAGE_FORMAT_LIST = 1LL << 1, /* VK_KHR_image_format_list
*/
Post by Rostislav Pehlivanov
+ EXT_EXTERNAL_MEMORY = 1LL << 2, /* VK_KHR_external_memory */
+ EXT_EXTERNAL_HOST_MEMORY = 1LL << 3, /*
VK_EXT_external_memory_host */
Post by Rostislav Pehlivanov
+ EXT_EXTERNAL_FD_MEMORY = 1LL << 4, /*
VK_KHR_external_memory_fd */
Post by Rostislav Pehlivanov
+ EXT_EXTERNAL_DMABUF_MEMORY = 1LL << 5, /*
VK_EXT_external_memory_dma_buf */
Post by Rostislav Pehlivanov
+ EXT_DRM_MODIFIER_FLAGS = 1LL << 6, /*
VK_EXT_image_drm_format_modifier */
Post by Rostislav Pehlivanov
+ EXT_YUV_IMAGES = 1LL << 7, /*
VK_KHR_sampler_ycbcr_conversion */
Post by Rostislav Pehlivanov
+
+ EXT_OPTIONAL = 1LL << 62,
+ EXT_REQUIRED = 1LL << 63,
That's signed overflow -> undefined behaviour. Since you want a uint64_t, use UINT64_C().
Changed to 1ULL. We discussed it last time, UINT64_C does exactly that.
Post by Mark Thompson
+};
Post by Rostislav Pehlivanov
+
...
+
+static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT
severity,
Post by Rostislav Pehlivanov
+ VkDebugUtilsMessageTypeFlagsEXT
messageType,
Post by Rostislav Pehlivanov
+ const VkDebugUtilsMessengerCallbackDataEXT
*data,
Post by Rostislav Pehlivanov
+ void *priv)
+{
+ int l;
+ AVHWDeviceContext *ctx = priv;
+
+ switch (severity) {
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l =
AV_LOG_VERBOSE; break;
Post by Rostislav Pehlivanov
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l =
AV_LOG_INFO; break;
Post by Rostislav Pehlivanov
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l =
AV_LOG_WARNING; break;
Post by Rostislav Pehlivanov
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l =
AV_LOG_ERROR; break;
Post by Rostislav Pehlivanov
+ default: l =
AV_LOG_DEBUG; break;
Post by Rostislav Pehlivanov
+ };
Stray semicolon.
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ av_log(ctx, l, "%s\n", data->pMessage);
+ for (int i = 0; i < data->cmdBufLabelCount; i++)
+ av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelN
ame);
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
...
+
+typedef struct VulkanDeviceSelection {
+ const char *name; /* Will use this first unless NULL */
+ uint32_t pci_device; /* Will use this second unless 0x0 */
+ uint32_t vendor_id; /* Last resort to find something deterministic
*/
Post by Rostislav Pehlivanov
+ int index; /* Finally fall back to index */
+} VulkanDeviceSelection;
+
+/* Finds a device */
+static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection
*select)
Post by Rostislav Pehlivanov
+{
+ int err = 0;
+ uint32_t num;
+ VkResult ret;
+ VkPhysicalDevice *devices = NULL;
+ VkPhysicalDeviceProperties *prop = NULL;
+ VkPhysicalDevice choice = VK_NULL_HANDLE;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ static const char *dev_types[] = {
+ [VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU] = "integrated",
+ [VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU] = "discrete",
+ [VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU] = "virtual",
+ [VK_PHYSICAL_DEVICE_TYPE_CPU] = "software",
+ [VK_PHYSICAL_DEVICE_TYPE_OTHER] = "unknown",
+ };
+
+ ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
+ if (ret != VK_SUCCESS || !num) {
+ av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n",
vk_ret2str(ret));
Post by Rostislav Pehlivanov
+ return AVERROR_EXTERNAL;
AVERROR(ENODEV) might be clearer, and in similar "no device" cases below too.
Fixed, also in other places in that function.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ }
+
+ devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
+ if (!devices)
+ return AVERROR(ENOMEM);
+
+ ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto end;
+ }
+
+ prop = av_malloc_array(num, sizeof(VkPhysicalDeviceProperties));
+ if (!prop) {
+ err = AVERROR(ENOMEM);
+ goto end;
+ }
+
+ av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
+ for (int i = 0; i < num; i++) {
+ vkGetPhysicalDeviceProperties(devices[i], &prop[i]);
+ av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i,
prop[i].deviceName,
^ "%#x" (and below)
Post by Rostislav Pehlivanov
+ dev_types[prop[i].deviceType], prop[i].deviceID);
dev_types would feel safer as a function, I think? (If a later Vulkan
version adds a new device type then you can crash if you see it.)
Done.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ }
+
+ if (select->name) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n",
select->name);
Post by Rostislav Pehlivanov
+ for (int i = 0; i < num; i++) {
+ if (strcmp(select->name, prop[i].deviceName) == 0) {
Might it be nicer to use strstr() rather than strcmp() here?
The requirement to put e.g. "AMD RADV POLARIS11 (LLVM 6.0.0)" is pretty
annoying, especially when that string might change between versions. If
you know you have an AMD card and an Intel card, then matching "AMD" seems
pretty safe.
Done. Sadly its not case-independent but oh well.
Post by Mark Thompson
+ choice = devices[i];
Post by Rostislav Pehlivanov
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
+ select->name);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ } else if (select->pci_device) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n",
select->pci_device);
Post by Rostislav Pehlivanov
+ for (int i = 0; i < num; i++) {
+ if (select->pci_device == prop[i].deviceID) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID
0x%x!\n",
Post by Rostislav Pehlivanov
+ select->pci_device);
I wonder whether this should have some magic if you have multiple of the
same graphics card (something with opts, maybe?). That will be a common
case in compute, though I don't know if it matters here.
We could fix it later if the API introduces something better or at least a
way to translate UUIDs.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ err = AVERROR(EINVAL);
+ goto end;
+ } else if (select->vendor_id) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n",
select->vendor_id);
Post by Rostislav Pehlivanov
+ for (int i = 0; i < num; i++) {
+ if (select->vendor_id == prop[i].vendorID) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID
0x%x!\n",
Post by Rostislav Pehlivanov
+ select->vendor_id);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ } else {
+ if (select->index < num) {
+ choice = devices[select->index];
+ goto end;
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with index
%i!\n",
Post by Rostislav Pehlivanov
+ select->index);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ }
+
+ av_free(devices);
+ av_free(prop);
+ hwctx->phys_dev = choice;
+
+ return err;
+}
+
+static int search_queue_families(AVHWDeviceContext *ctx,
VkDeviceCreateInfo *cd)
Post by Rostislav Pehlivanov
+{
+ uint32_t num;
+ VkQueueFamilyProperties *qs = NULL;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ int graph_index = -1, comp_index = -1, tx_index = -1;
+ VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo
*)cd->pQueueCreateInfos;
Post by Rostislav Pehlivanov
+
+ /* First get the number of queue families */
+ vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num,
NULL);
Post by Rostislav Pehlivanov
+ if (!num) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ /* Then allocate memory */
+ qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
+ if (!qs)
+ return AVERROR(ENOMEM);
+
+ /* Finally retrieve the queue families */
+ vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num,
qs);
Post by Rostislav Pehlivanov
+
+#define SEARCH_FLAGS(expr, out)
\
Post by Rostislav Pehlivanov
+ for (int i = 0; i < num; i++) {
\
Adding the "int " pushed the "\" out of alignment :P
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ const VkQueueFlagBits flags = qs[i].queueFlags;
\
Post by Rostislav Pehlivanov
+ if (expr) {
\
Post by Rostislav Pehlivanov
+ out = i;
\
Post by Rostislav Pehlivanov
+ break;
\
Post by Rostislav Pehlivanov
+ }
\
Post by Rostislav Pehlivanov
+ }
+
+ if (!hwctx->queue_family_index)
I don't quite understand what this test is doing. You search for the
queues to use on device create (not external init), so it should always be
unset when you get here?
Right, I didn't know about this until I used the API a week ago to write a
demo client for a wayland surface capture (which imported DMABUFs, mapped
them to whatever and encoded them) protocol.
Fixed, as well as in other places. Also I verify the queue index in the
init function now.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
+
+ if (!hwctx->queue_family_comp_index)
+ SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i !=
graph_index),
Post by Rostislav Pehlivanov
+ comp_index)
+
+ if (!hwctx->queue_family_tx_index)
+ SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i !=
graph_index) &&
Post by Rostislav Pehlivanov
+ (i != comp_index), tx_index)
+
+#undef SEARCH_FLAGS
+#define QF_FLAGS(flags)
\
Post by Rostislav Pehlivanov
+ ((flags) & VK_QUEUE_GRAPHICS_BIT ) ? "(graphics) " : "",
\
Post by Rostislav Pehlivanov
+ ((flags) & VK_QUEUE_COMPUTE_BIT ) ? "(compute) " : "",
\
Post by Rostislav Pehlivanov
+ ((flags) & VK_QUEUE_TRANSFER_BIT ) ? "(transfer) " : "",
\
Post by Rostislav Pehlivanov
+ ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""
+
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, "
+ "flags: %s%s%s%s\n", graph_index,
QF_FLAGS(qs[graph_index].queueFlags));
Post by Rostislav Pehlivanov
+
+ hwctx->queue_family_index = graph_index;
+ hwctx->queue_family_tx_index = graph_index;
+ hwctx->queue_family_comp_index = graph_index;
+
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index;
+
+ if (comp_index != -1) {
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute,
"
Post by Rostislav Pehlivanov
+ "flags: %s%s%s%s\n", comp_index,
QF_FLAGS(qs[comp_index].queueFlags));
Post by Rostislav Pehlivanov
+ hwctx->queue_family_tx_index = comp_index;
+ hwctx->queue_family_comp_index = comp_index;
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index;
+ }
+
+ if (tx_index != -1) {
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for
transfers, "
Post by Rostislav Pehlivanov
+ "flags: %s%s%s%s\n", tx_index,
QF_FLAGS(qs[tx_index].queueFlags));
Post by Rostislav Pehlivanov
+ hwctx->queue_family_tx_index = tx_index;
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index;
+ }
+
+#undef PRINT_QF_FLAGS
"QF_FLAGS".
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ av_free(qs);
+
+ return 0;
+}
+
+static int create_exec_ctx(AVHWDeviceContext *ctx)
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ VkCommandPoolCreateInfo cqueue_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL
_CREATE_INFO,
Post by Rostislav Pehlivanov
+ .flags = VK_COMMAND_POOL_CREATE_RESET_C
OMMAND_BUFFER_BIT,
Post by Rostislav Pehlivanov
+ .queueFamilyIndex = hwctx->queue_family_tx_index,
+ };
+ VkCommandBufferAllocateInfo cbuf_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFF
ER_ALLOCATE_INFO,
Post by Rostislav Pehlivanov
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+ VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO
};
Post by Rostislav Pehlivanov
+
+ ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
+ hwctx->alloc, &p->cmd_pool);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ vk_ret2str(ret));
+ return 1;
These failures are going to return a nonnegative number from
device_init(); I don't think that's wanted.
Fixed, replaced with AVERROR_EXTERNAL.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ }
+
+ cbuf_create.commandPool = p->cmd_pool;
+
+ ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create,
&p->cmd_buf);
Post by Rostislav Pehlivanov
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
+ hwctx->alloc, &p->cmd_fence);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ vkGetDeviceQueue(hwctx->act_dev, hwctx->queue_family_tx_index, 0,
+ &p->cmd_queue);
+
+ return 0;
+}
+
+static void free_exec_ctx(AVHWDeviceContext *ctx)
+{
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ if (!p)
This can never be true - priv is set before the free function.
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ return;
+
+ if (p->cmd_fence != VK_NULL_HANDLE)
Since we're depending on VK_NULL_HANDLE being zero for correct
initialisation, maybe just treat these as pointers and write "if
(p->cmd_fence)", etc.
Done (and in other places in this file).
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ vkDestroyFence(hwctx->act_dev, p->cmd_fence, hwctx->alloc);
+ if (p->cmd_buf != VK_NULL_HANDLE)
+ vkFreeCommandBuffers(hwctx->act_dev, p->cmd_pool, 1,
&p->cmd_buf);
Post by Rostislav Pehlivanov
+ if (p->cmd_pool != VK_NULL_HANDLE)
+ vkDestroyCommandPool(hwctx->act_dev, p->cmd_pool,
hwctx->alloc);
Post by Rostislav Pehlivanov
+}
+
+static void vulkan_device_free(AVHWDeviceContext *ctx)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+
+ free_exec_ctx(ctx);
+
+ vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
+
+ if (p && p->debug_ctx != VK_NULL_HANDLE) {
+ VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
+ pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
+ hwctx->alloc);
+ }
+
+ vkDestroyInstance(hwctx->inst, hwctx->alloc);
+}
+
+static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
+ VulkanDeviceSelection
*dev_select,
Post by Rostislav Pehlivanov
+ AVDictionary *opts, int flags)
+{
+ int err = 0;
+ VkResult ret;
+ AVDictionaryEntry *opt_d;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkDeviceQueueCreateInfo queue_create_info[3] = {
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE
_CREATE_INFO,
Post by Rostislav Pehlivanov
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE
_CREATE_INFO,
Post by Rostislav Pehlivanov
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE
_CREATE_INFO,
Post by Rostislav Pehlivanov
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ };
+
+ VkDeviceCreateInfo dev_info = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .pQueueCreateInfos = queue_create_info,
+ .queueCreateInfoCount = 0,
+ };
+
+ VulkanDevicePriv *p = av_mallocz(sizeof(*p));
+ if (!p) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
This is already allocated inside av_hwdevice_ctx_alloc(), you've
overwriting it here and leaking the original.
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ ctx->internal->priv = p;
+ ctx->free = vulkan_device_free;
+
+ /* Create an instance if not given one */
+ if (!hwctx->inst && (err = create_instance(ctx, opts)))
+ goto fail;
+
+ /* Find a device (if not given one) */
+ if (!hwctx->phys_dev && (err = find_device(ctx, dev_select)))
+ goto fail;
+
+ vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
+ av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n",
p->props.deviceName);
Post by Rostislav Pehlivanov
+ av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
%li\n",
Post by Rostislav Pehlivanov
+ p->props.limits.optimalBufferCopyOffsetAlignment);
%li\n",
Post by Rostislav Pehlivanov
+ p->props.limits.optimalBufferCopyRowPitchAlignment);
%li\n",
Post by Rostislav Pehlivanov
+ p->props.limits.minMemoryMapAlignment);
+
+ /* Search queue family */
+ if ((err = search_queue_families(ctx, &dev_info)))
+ goto fail;
+
+ if (!hwctx->act_dev) {
+ err = check_extensions(ctx, 1, &dev_info.ppEnabledExtensionNa
mes,
Post by Rostislav Pehlivanov
+ &dev_info.enabledExtensionCount, 0);
+ if (err)
+ goto fail;
+
+ ret = vkCreateDevice(hwctx->phys_dev, &dev_info,
+ hwctx->alloc, &hwctx->act_dev);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ av_free((void *)dev_info.ppEnabledExtensionNames);
+ }
+
+ /* Tiled images setting, use them by default */
+ opt_d = av_dict_get(opts, "linear_images", NULL, 0);
+ if (opt_d)
+ p->use_linear_images = strtol(opt_d->value, NULL, 10);
+
+ /* Disjoint images setting, don't use them by default */
+ opt_d = av_dict_get(opts, "disjoint_images", NULL, 0);
+ if (opt_d)
+ p->use_disjoint_images = strtol(opt_d->value, NULL, 10);
+
+ return 0;
+
+ av_freep(&ctx->internal->priv);
I don't think you want to free this here, it's managed by the hwcontext layer.
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ return err;
+}
+
+static int vulkan_device_init(AVHWDeviceContext *ctx)
+{
+ int err;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ /* Create exec context - if there's something invalid this will
error out */
Post by Rostislav Pehlivanov
+ err = create_exec_ctx(ctx);
+ if (err)
+ return err;
+
+ /* Get device capabilities */
+ vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
+
+ return 0;
+}
+
+static int vulkan_device_create(AVHWDeviceContext *ctx, const char
*device,
Post by Rostislav Pehlivanov
+ AVDictionary *opts, int flags)
+{
+ VulkanDeviceSelection dev_select = { 0 };
+ if (device && device[0]) {
+ if (av_isdigit(device[0]))
3dfx probably wouldn't appreciate this test, though I admit they are
unlikely to add Vulkan support to their cards.
Post by Rostislav Pehlivanov
+ dev_select.index = strtol(device, NULL, 10);
Might be nicer to always call strtol and then check whether *end is zero,
just in case of a future problem like that.
Done, replaced with:
char *end = NULL;
dev_select.index = strtol(device, &end, 10);
if (end == device) {
dev_select.index = 0;
dev_select.name = device;
}
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ else
+ dev_select.name = device;
+ }
+
+ return vulkan_device_create_internal(ctx, &dev_select, opts,
flags);
Post by Rostislav Pehlivanov
+}
+
+static int vulkan_device_derive(AVHWDeviceContext *ctx,
+ AVHWDeviceContext *src_ctx, int flags)
+{
+ VulkanDeviceSelection dev_select = { 0 };
+
+ switch(src_ctx->type) {
+#if CONFIG_LIBDRM
+#if CONFIG_VAAPI
+ case AV_HWDEVICE_TYPE_VAAPI: {
+ AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
+ const char *vendor = vaQueryVendorString(src_hwctx->display);
+ if (!vendor) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get device info from
vaapi!\n");
Post by Rostislav Pehlivanov
+ return AVERROR_EXTERNAL;
+ }
+
+ if (strstr(vendor, "Intel"))
+ dev_select.vendor_id = 0x8086;
+ if (strstr(vendor, "AMD"))
+ dev_select.vendor_id = 0x1002;
+
+ return vulkan_device_create_internal(ctx, &dev_select, NULL,
flags);
Did you think about making an addition to VAAPI which could do this in a more sensible way?
No, not yet. If anything better comes up we can replace it.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ }
+#endif
+ case AV_HWDEVICE_TYPE_DRM: {
+ AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
+
+ drmDevice *drm_dev_info;
+ int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get device info from
drm fd!\n");
Post by Rostislav Pehlivanov
+ return AVERROR_EXTERNAL;
+ }
+
+ dev_select.pci_device = drm_dev_info->deviceinfo.pci->
device_id;
Not all devices are PCI, check bustype before using this. I don't know
what information you can use in other cases, though (GPUs on mobile will
just be opaque platform devices).
Done.
Post by Mark Thompson
The drmDevice structure needs to be freed, too (drmFreeDevice).
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ return vulkan_device_create_internal(ctx, &dev_select, NULL,
flags);
Post by Rostislav Pehlivanov
+ }
+#endif
+ return AVERROR(ENOSYS);
+ }
+}
+
+static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
+ const void *hwconfig,
+ AVHWFramesConstraints
*constraints)
Post by Rostislav Pehlivanov
+{
+ int count = 0;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
This iteration feels dubious, maybe it would be better to use
av_pix_fmt_desc_next()?
Its fine, this is lavu, there's no way for AV_PIX_FMT_NB to be different.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ count += vkfmt_is_supported(hwctx, i, p->use_linear_images);
+
+ constraints->valid_sw_formats = av_malloc_array(count + 1,
+ sizeof(enum
AVPixelFormat));
Post by Rostislav Pehlivanov
+ if (!constraints->valid_sw_formats)
+ return AVERROR(ENOMEM);
+
+ count = 0;
+ for (enum AVPixelFormat i = 0; i < AV_PIX_FMT_NB; i++)
+ if (vkfmt_is_supported(hwctx, i, p->use_linear_images))
+ constraints->valid_sw_formats[count++] = i;
+ constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
+
+ constraints->min_width = 0;
+ constraints->min_height = 0;
VK_ERROR_OUT_OF_DEVICE_MEMORY
with a 1x1 YUV420P image, which is a pretty opaque failure (works for 2x2
or 3x3). What are the requirements there? Who should be checking it?
There are no minimum dimension requirements exposed by the API. My guess is
the implementation is rejecting it for some reason.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ constraints->max_width = p->props.limits.maxImageDimension2D;
+ constraints->max_height = p->props.limits.maxImageDimension2D;
+
+ constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum
AVPixelFormat));
Post by Rostislav Pehlivanov
+ if (!constraints->valid_hw_formats)
+ return AVERROR(ENOMEM);
+
+ constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
+ constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
+
+ return 0;
+}
+
...
+
+static int alloc_bind_mem(AVHWDeviceContext *ctx, AVVkFrame *f,
+ void *alloc_pnext, size_t alloc_pnext_stride)
+{
+ int err;
+ VkResult ret;
+ VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
+ VkBindImagePlaneMemoryInfo bind_p_info[AV_NUM_DATA_POINTERS] = { {
0 } };
Post by Rostislav Pehlivanov
+
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ VK_LOAD_PFN(hwctx->inst, vkBindImageMemory2KHR);
+ VK_LOAD_PFN(hwctx->inst, vkGetImageMemoryRequirements2KHR);
The presence of the relevant extension presumably means that these necessarily succeed?
Yep.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ for (int i = 0; i < f->mem_count; i++) {
+ int use_ded_mem;
+ VkImagePlaneMemoryRequirementsInfo plane_req = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_
MEMORY_REQUIREMENTS_INFO,
Post by Rostislav Pehlivanov
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ };
+ VkImageMemoryRequirementsInfo2 req_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY
_REQUIREMENTS_INFO_2,
Post by Rostislav Pehlivanov
+ .pNext = f->mem_count > 1 ? &plane_req : NULL,
+ .image = f->img,
+ };
+ VkMemoryDedicatedAllocateInfo ded_alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = (void *)(((uint8_t *)alloc_pnext) +
i*alloc_pnext_stride),
Post by Rostislav Pehlivanov
+ };
+ VkMemoryDedicatedRequirements ded_req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ };
+ VkMemoryRequirements2 req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
NULL,
Post by Rostislav Pehlivanov
+ };
+
+ pfn_vkGetImageMemoryRequirements2KHR(hwctx->act_dev,
&req_desc, &req);
Post by Rostislav Pehlivanov
+
+ /* In case the implementation prefers/requires dedicated
allocation */
Post by Rostislav Pehlivanov
+ use_ded_mem = ded_req.prefersDedicatedAllocation |
+ ded_req.requiresDedicatedAllocation;
+ if (use_ded_mem)
+ ded_alloc.image = f->img;
+
+ /* Allocate memory */
+ if ((err = alloc_mem(ctx, &req.memoryRequirements,
+ f->tiling == VK_IMAGE_TILING_LINEAR ?
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ use_ded_mem ? &ded_alloc : (void
*)ded_alloc.pNext,
Post by Rostislav Pehlivanov
+ &f->flags, &f->mem[i])))
+ return err;
+
+ if (f->mem_count > 1) {
+ bind_p_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_P
LANE_MEMORY_INFO;
Post by Rostislav Pehlivanov
+ bind_p_info[i].planeAspect = plane_req.planeAspect;
+ bind_info[i].pNext = &bind_p_info[i];
+ }
+
+ bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
+ bind_info[i].image = f->img;
+ bind_info[i].memory = f->mem[i];
+ }
+
+ /* Bind the allocated memory to the image */
+ ret = pfn_vkBindImageMemory2KHR(hwctx->act_dev, f->mem_count,
bind_info);
Post by Rostislav Pehlivanov
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
This looks like it can leak some allocated memory if something goes wrong
during bind or when allocating a plane after the first. Or is there some
magic which means it doesn't?
There's some magic which means it doesn't:

av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
vk_ret2str(ret));
return AVERROR_EXTERNAL;

which goes to

if ((err = alloc_bind_mem(ctx, f, alloc_pnext, alloc_pnext_stride)))
goto fail;

which goes to
fail:
vulkan_frame_free(hwctx, (uint8_t *)f);
return err;
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
...
+
+static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
+{
+ int err;
+ AVVkFrame *f;
+ AVBufferRef *avbuf = NULL;
+ AVHWFramesContext *hwfc = opaque;
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ VkExportMemoryAllocateInfo einfo[AV_NUM_DATA_POINTERS];
+ VkExternalMemoryHandleTypeFlags e = 0x0;
+
+ try_export_flags(hwfc, &e, VK_EXTERNAL_MEMORY_HANDLE_TYPE
_DMA_BUF_BIT_EXT);
The intent of this is to allocate memory which is dma_buf inside the
kernel and can therefore be exported as DRM objects?
Have you tried making a map_from which uses that? (That would allow hwmap
Vulkan->VAAPI without reverse mapping, I guess.)
Yes, it works. I'm not sure why though. The spec says ownership is
transferred and that destroying the image and memory would do nothing,
you'd need to close the exported FDs. And I think that means closing the
FDs without destroying the image and memory would also make the memory and
image invalid, but that's not the case. It works fine, I'm not leaking any
file descriptors. I think the spec just omits to tell that if you close the
FD while the image is still alive it won't destroy the image.
Anyway, its in the new patch, along with VAAPI support.
Tested with:

./ffmpeg_g -init_hw_device "vaapi=vp:/dev/dri/renderD129" -i ~/dumper.mkv
-loglevel verbose -filter_hw_device vp -vf
hwupload,hwmap=derive_device=vulkan,chromaticaberration_vulkan,hwmap=reverse=1:derive_device=vaapi,format=vaapi
-c:v hevc_vaapi -f null -2
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ for (int i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++)
{
Post by Rostislav Pehlivanov
+ einfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMOR
Y_ALLOCATE_INFO;
Post by Rostislav Pehlivanov
+ einfo[i].pNext = hwctx->alloc_pnext[i];
+ einfo[i].handleTypes = e;
+ }
+
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
+ hwctx->disjoint, hwctx->create_pnext,
+ einfo, sizeof(*einfo));
+ if (err)
+ return NULL;
+
+ avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
+ vulkan_frame_free,
hwfc->device_ctx->hwctx, 0);
Post by Rostislav Pehlivanov
+ if (!avbuf) {
+ vulkan_frame_free(hwfc->device_ctx->hwctx, (uint8_t *)f);
+ return NULL;
+ }
+
+ return avbuf;
+}
+
+static int vulkan_frames_init(AVHWFramesContext *hwfc)
+{
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+
+ if (hwfc->pool)
+ return 0;
+
+ /* Default pool flags */
p->use_linear_images ?
Post by Rostislav Pehlivanov
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+
+ hwctx->usage |= DEFAULT_USAGE_FLAGS;
+
p->use_disjoint_images;
Post by Rostislav Pehlivanov
+
+ hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AV
VkFrame),
Post by Rostislav Pehlivanov
+ hwfc,
vulkan_pool_alloc,
Post by Rostislav Pehlivanov
+ NULL);
+ if (!hwfc->internal->pool_internal)
+ return AVERROR(ENOMEM);
This doesn't actually check anything about the parameters - e.g. I can
make frames context with a crazy unsupported sw_format and it will return
success.
Is it sensible to, say, test-allocate a single frame to make sure it actually works?
Done.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
+static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
+{
+ frame->buf[0] = av_buffer_pool_get(hwfc->pool);
+ if (!frame->buf[0])
+ return AVERROR(ENOMEM);
+
+ frame->data[0] = frame->buf[0]->data;
+ frame->format = AV_PIX_FMT_VULKAN;
+ frame->width = hwfc->width;
+ frame->height = hwfc->height;
+
+ return 0;
+}
+
+static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
+ enum AVHWFrameTransferDirection
dir,
Post by Rostislav Pehlivanov
+ enum AVPixelFormat **formats)
+{
+ int count = 0;
+ enum AVPixelFormat *pix_fmts = NULL;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_f
ormat);
Post by Rostislav Pehlivanov
+
+ /* All formats can be transferred to themselves */
+ count++;
+
+ /* All formats with a luma can have only that channel transferred */
+ count += !(desc->flags & AV_PIX_FMT_FLAG_RGB);
In what cases is this acutally expected to work?
Works here.

./ffmpeg_g -init_hw_device "vulkan=vk:Intel" -i ~/dumper.mkv -loglevel
verbose -filter_hw_device vk -vf
format=yuv420p,hwupload,hwdownload,format=gray8
-f null -

Latest mesa git master.
Post by Mark Thompson
$ gdb --args ./ffmpeg_g -v 55 -y -i in.mp4 -an -init_hw_device
vulkan=amd:0 -init_hw_device vulkan=intel:1 -filter_hw_device intel -vf
'format=yuv420p,hwupload,hwdownload,format=gray8' -c:v libx264 -frames:v
1 out.mp4
...
did set range correctly
h:1080 fmt:yuvj444p sar:1/1 flags:0x4
anv_layout_to_aux_usage: Assertion `_mesa_bitcount(aspect) == 1 && (aspect
& image->aspects)' failed.
Thread 1 "ffmpeg_g" received signal SIGABRT, Aborted.
51 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#1 0x00007ffff3127231 in __GI_abort () at abort.c:79
#2 0x00007ffff311e9da in __assert_fail_base (fmt=0x7ffff3271d48
=0x7ffff21c72f0 "_mesa_bitcount(aspect) == 1 && (aspect &
"anv_layout_to_aux_usage") at assert.c:92
=0x7ffff21c72f0 "_mesa_bitcount(aspect) == 1 && (aspect &
"anv_layout_to_aux_usage") at assert.c:101
at ../../../src/intel/vulkan/anv_image.c:806
#5 0x00007ffff1dd9ed7 in get_blorp_surf_for_anv_image
=0x7fffffffce60)
at ../../../src/intel/vulkan/anv_blorp.c:204
#6 0x00007ffff1dda209 in copy_buffer_to_image (cmd_buffer=0x555558aaba70,
anv_buffer=0x555558670dd0, anv_image=0x5555597db110,
image_layout=VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, regionCount=<optimized
out>, pRegions=<optimized out>, buffer_to_image=false) at
../../../src/intel/vulkan/anv_blorp.c:382
#7 0x00007ffff1ddab56 in anv_CmdCopyImageToBuffer
(commandBuffer=<optimized out>, srcImage=<optimized out>,
srcImageLayout=<optimized out>, dstBuffer=<optimized out>,
regionCount=<optimized out>, pRegions=<optimized out>) at
../../../src/intel/vulkan/anv_blorp.c:475
#8 0x00007ffff667887d in vkCmdCopyImageToBuffer
(commandBuffer=0x555558aaba70, srcImage=0x5555597db110,
srcImageLayout=VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
dstBuffer=0x555558670dd0, regionCount=1, pRegions=0x7fffffffd060) at
/home/mrt/video/vulkan/loader/loader/trampoline.c:1923
#9 0x0000555556a0281d in transfer_image_buf (ctx=0x555558792a40,
frame=0x5555597c0a00, buffer=0x7fffffffd1f0, stride=0x5555597e4780, w=1920,
h=1080, pix_fmt=AV_PIX_FMT_GRAY8, to_buf=1) at
src/libavutil/hwcontext_vulkan.c:1810
#10 0x0000555556a03158 in vulkan_transfer_data_from (hwfc=0x5555597c0640,
dst=0x5555597e4740, src=0x5555597bf8c0) at src/libavutil/hwcontext_vulkan
.c:1963
#11 0x00005555569f28d5 in av_hwframe_transfer_data (dst=0x5555597e4740,
src=0x5555597bf8c0, flags=0) at src/libavutil/hwcontext.c:454
#12 0x000055555579819c in hwdownload_filter_frame (link=0x5555597bdd40,
input=0x5555597bf8c0) at src/libavfilter/vf_hwdownload.c:153
#13 0x00005555556af6c9 in ff_filter_frame_framed (link=0x5555597bdd40,
frame=0x5555597bf8c0) at src/libavfilter/avfilter.c:1071
#14 0x00005555556aff52 in ff_filter_frame_to_filter (link=0x5555597bdd40)
at src/libavfilter/avfilter.c:1219
#15 0x00005555556b014e in ff_filter_activate_default
(filter=0x5555597bbd00) at src/libavfilter/avfilter.c:1268
#16 0x00005555556b0372 in ff_filter_activate (filter=0x5555597bbd00) at
src/libavfilter/avfilter.c:1429
#17 0x00005555556b5036 in ff_filter_graph_run_once (graph=0x5555597bd900)
at src/libavfilter/avfiltergraph.c:1454
#18 0x00005555556b6466 in push_frame (graph=0x5555597bd900) at
src/libavfilter/buffersrc.c:181
#19 0x00005555556b6778 in av_buffersrc_add_frame_internal
(ctx=0x5555597be100, frame=0x555558e10300, flags=4) at
src/libavfilter/buffersrc.c:255
#20 0x00005555556b63ed in av_buffersrc_add_frame_flags
(ctx=0x5555597be100, frame=0x555558e10300, flags=4) at
src/libavfilter/buffersrc.c:164
#21 0x0000555555679212 in ifilter_send_frame (ifilter=0x555558b9da00,
frame=0x555558e10300) at src/fftools/ffmpeg.c:2190
#22 0x00005555556794f2 in send_frame_to_filters (ist=0x555558d944c0,
decoded_frame=0x555558e10300) at src/fftools/ffmpeg.c:2264
#23 0x000055555567a2ac in decode_video (ist=0x555558d944c0,
pkt=0x7fffffffd820, got_output=0x7fffffffd814, duration_pts=0x7fffffffd818,
eof=0, decode_failed=0x7fffffffd810) at src/fftools/ffmpeg.c:2465
#24 0x000055555567ac47 in process_input_packet (ist=0x555558d944c0,
pkt=0x7fffffffd9e0, no_eof=0) at src/fftools/ffmpeg.c:2619
#25 0x0000555555681bb5 in process_input (file_index=0) at
src/fftools/ffmpeg.c:4457
#26 0x00005555556820c4 in transcode_step () at src/fftools/ffmpeg.c:4577
#27 0x00005555556821f1 in transcode () at src/fftools/ffmpeg.c:4631
#28 0x0000555555682a81 in main (argc=20, argv=0x7fffffffe3e8) at src/fftools/ffmpeg.c:4838
Post by Rostislav Pehlivanov
+
+ pix_fmts = av_malloc((count + 1) * sizeof(*pix_fmts));
+ if (!pix_fmts)
+ return AVERROR(ENOMEM);
+
+ count = 0;
+ pix_fmts[count++] = hwfc->sw_format;
+ if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+ switch (desc->comp[0].depth) {
+ case 8: pix_fmts[count++] = AV_PIX_FMT_GRAY8; break;
+ case 10: pix_fmts[count++] = AV_PIX_FMT_GRAY10; break;
+ case 12: pix_fmts[count++] = AV_PIX_FMT_GRAY12; break;
+ case 16: pix_fmts[count++] = AV_PIX_FMT_GRAY16; break;
+ }
Tbh I'm not convinced that offering the luma-only option as well is going
to cause anything other than confusion. Do you have any use-cases in mind
for it?
Not that I can think of, but may be useful to someone. I'll leave it in -
its a feature after all and we can expose it.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ }
+ pix_fmts[count++] = AV_PIX_FMT_NONE;
+
+ *formats = pix_fmts;
+
+ return 0;
+}
+
+typedef struct VulkanMapping {
+ AVVkFrame *frame;
+ int flags;
+} VulkanMapping;
+
+static void vulkan_unmap_frame(AVHWFramesContext *hwfc,
HWMapDescriptor *hwmap)
Post by Rostislav Pehlivanov
+{
+ VulkanMapping *map = hwmap->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+
+ /* Check if buffer needs flushing */
+ if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
+ !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkResult ret;
+ VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0
} };
Post by Rostislav Pehlivanov
+
+ for (int i = 0; i < map->frame->mem_count; i++) {
+ flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMOR
Y_RANGE;
Post by Rostislav Pehlivanov
+ flush_ranges[i].memory = map->frame->mem[i];
+ flush_ranges[i].size = VK_WHOLE_SIZE;
+ }
+
+ ret = vkFlushMappedMemoryRanges(hwctx->act_dev,
map->frame->mem_count,
Post by Rostislav Pehlivanov
+ flush_ranges);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ vk_ret2str(ret));
+ }
+ }
+
+ for (int i = 0; i < map->frame->mem_count; i++)
+ vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
+
+ av_free(map);
+}
+
+static int vulkan_map_frame(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ int err;
+ VkResult ret;
+ AVVkFrame *f = (AVVkFrame *)src->data[0];
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+ const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
+
+ VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
+ if (!map)
+ return AVERROR(EINVAL);
+
+ if (src->format != AV_PIX_FMT_VULKAN) {
+ av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
+ av_get_pix_fmt_name(src->format));
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ||
+ !(f->tiling == VK_IMAGE_TILING_LINEAR)) {
+ av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host
visible "
Post by Rostislav Pehlivanov
+ "and linear!\n");
Is this a requirement? Some devices have magic MMU hardware which can
linear-map tiled memory.
The specifications mention nothing of those, so I guess not. If its
modified to specify what happens we can change it.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ dst->width = src->width;
+ dst->height = src->height;
+
+ for (int i = 0; i < f->mem_count; i++) {
+ ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
+ VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
+ if (ret != VK_SUCCESS) {
%s\n",
Post by Rostislav Pehlivanov
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ }
+
+ /* For non disjoint memory duplicate them */
+ if (f->mem_count == 1)
+ for (int i = 1; i < planes; i++)
+ dst->data[i] = dst->data[0];
+
+ /* Check if the memory contents matter */
+ if (((flags & AV_HWFRAME_MAP_READ) || !(flags &
AV_HWFRAME_MAP_OVERWRITE)) &&
Post by Rostislav Pehlivanov
+ !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { {
0 } };
Post by Rostislav Pehlivanov
+ for (int i = 0; i < f->mem_count; i++) {
+ map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMOR
Y_RANGE;
Post by Rostislav Pehlivanov
+ map_mem_ranges[i].size = VK_WHOLE_SIZE;
+ map_mem_ranges[i].memory = f->mem[i];
+ }
+
+ ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev,
f->mem_count,
Post by Rostislav Pehlivanov
+ map_mem_ranges);
+ if (ret != VK_SUCCESS) {
%s\n",
Post by Rostislav Pehlivanov
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ }
+
+ for (int i = 0; i < planes; i++) {
+ VkImageSubresource sub = {
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ };
+ VkSubresourceLayout layout;
+ vkGetImageSubresourceLayout(hwctx->act_dev, f->img, &sub,
&layout);
Post by Rostislav Pehlivanov
+ dst->data[i] += layout.offset;
+ dst->linesize[i] = layout.rowPitch;
+ }
+
+ map->frame = f;
+ map->flags = flags;
+
+ err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+ &vulkan_unmap_frame, map);
+ if (err < 0)
+ goto fail;
+
+ return 0;
+
+ for (int i = 0; i < f->mem_count; i++)
+ vkUnmapMemory(hwctx->act_dev, f->mem[i]);
Unmap isn't valid on memory which isn't currently mapped; this needs to
track how many have actually been mapped.
Done.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ av_free(map);
+ return err;
+}
+
+#if CONFIG_LIBDRM
+static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor
*hwmap)
Post by Rostislav Pehlivanov
+{
+ VulkanMapping *map = hwmap->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+
+ vkDestroyImage(hwctx->act_dev, map->frame->img, hwctx->alloc);
+ for (int i = 0; i < map->frame->mem_count; i++)
+ vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
+
+ av_freep(&map->frame);
+}
+
+static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc,
AVVkFrame **f,
Post by Rostislav Pehlivanov
+ AVDRMFrameDescriptor *desc)
+{
+ int err = 0;
+
+ /* Destination frame */
+#if HAVE_VULKAN_DRM_MOD
+ uint64_t modifier_buf[AV_NUM_DATA_POINTERS];
+ VkImageDrmFormatModifierListCreateInfoEXT drm_mod = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEV
ICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
Post by Rostislav Pehlivanov
+ };
+#endif
+ VkExternalMemoryImageCreateInfo ext_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEM
ORY_IMAGE_CREATE_INFO,
Post by Rostislav Pehlivanov
+#if HAVE_VULKAN_DRM_MOD
+ .pNext = &drm_mod,
+#endif
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+ };
+ VkImportMemoryFdInfoKHR import_desc[AV_NUM_DATA_POINTERS];
+
+ if ((desc->nb_objects > 1) &&
+ (desc->nb_objects != av_pix_fmt_count_planes(hwfc->format))) {
"hwfc->sw_format"
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ av_log(hwfc, AV_LOG_ERROR, "Number of DRM objects doesn't match
"
Post by Rostislav Pehlivanov
+ "plane count!\n");
+ return AVERROR(EINVAL);
+ }
+
+ for (int i = 0; i < desc->nb_objects; i++) {
+ import_desc[i].sType = VK_STRUCTURE_TYPE_IMPORT_MEMOR
Y_FD_INFO_KHR;
Post by Rostislav Pehlivanov
+ import_desc[i].pNext = NULL;
+ import_desc[i].handleType = ext_info.handleTypes;
+ import_desc[i].fd = desc->objects[i].fd;
+#if HAVE_VULKAN_DRM_MOD
+ modifier_buf[i] = desc->objects[i].format_modifier;
I think you want to give it the modifier structure only if the modifier
isn't DRM_FORMAT_MOD_INVALID. Not passing a modifier asks the driver to
use internal magic if it can (e.g. dri_bo_get_tiling()), passing a modifier
must alway use what you give it.
Done (I also check if DRM_FORMAT_MOD_INVALID exists and if not define it,
like in hwcontext_vaapi.c).
Post by Mark Thompson
Post by Rostislav Pehlivanov
+#endif
+ }
+#if HAVE_VULKAN_DRM_MOD
+ drm_mod.pDrmFormatModifiers = modifier_buf;
+ drm_mod.drmFormatModifierCount = desc->nb_objects;
+#endif
+
+ err = create_frame(hwfc, f,
+#if HAVE_VULKAN_DRM_MOD
+ VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
+#else
+ desc->objects[0].format_modifier ==
DRM_FORMAT_MOD_LINEAR ?
Post by Rostislav Pehlivanov
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
+#endif
+ DEFAULT_USAGE_FLAGS, desc->nb_objects > 1,
&ext_info,
Post by Rostislav Pehlivanov
+ import_desc, sizeof(*import_desc));
+ if (err < 0)
+ return err;
You do need to look at the layer information in the DRM descriptor here.
E.g. mapping from VAAPI/NV12 works on Intel, but on AMD I get (with the
It should error out that the mapping failed. It's a driver bug.
I'd also really rather not check the layers struct, there's just too many
ways you can represent pixfmts.
Post by Mark Thompson
$ gdb --args ./ffmpeg_g -y -hwaccel vaapi -hwaccel_output_format vaapi
-hwaccel_device /dev/dri/renderD129 -i in.mp4 -an -vf
'hwmap=derive_device=vulkan,scale_vulkan=1280:720,hwmap=derive_device=vaapi:reverse=1'
-c:v h264_vaapi -frames:v 1 out.mp4
...
image does not work: 6 (invalid VASurfaceID).
Thread 1 "ffmpeg_g" received signal SIGSEGV, Segmentation fault.
vk_format_get_nr_components (format=<optimized out>) at
../../../../src/amd/vulkan/vk_format.h:535
535 return desc->nr_channels;
(gdb) bt
#0 vk_format_get_nr_components (format=<optimized out>) at
../../../../src/amd/vulkan/vk_format.h:535
#1 radv_image_create (_device=0x5555593e26a0,
pImage=<optimized out>) at ../../../../src/amd/vulkan/radv_image.c:943
#2 0x00007fffac3ed92e in radv_CreateImage (device=<optimized out>,
pCreateInfo=<optimized out>, pAllocator=<optimized out>, pImage=<optimized
out>) at ../../../../src/amd/vulkan/radv_image.c:1261
#3 0x00007ffff66776c4 in vkCreateImage (device=0x5555593e26a0,
pCreateInfo=0x7fffffffcf90, pAllocator=0x0, pImage=0x55555946f300) at
/home/mrt/video/vulkan/loader/loader/trampoline.c:1328
#4 0x0000555556a00a07 in create_frame (hwfc=0x555559568c80,
frame=0x7fffffffd1c8, tiling=VK_IMAGE_TILING_LINEAR, usage=15, disjoint=1,
create_pnext=0x7fffffffd160, alloc_pnext=0x7fffffffd0a0,
alloc_pnext_stride=24) at src/libavutil/hwcontext_vulkan.c:1130
#5 0x0000555556a019db in vulkan_map_from_drm_frame_desc
(hwfc=0x555559568c80, f=0x7fffffffd1c8, desc=0x5555595abd80) at
src/libavutil/hwcontext_vulkan.c:1480
#6 0x0000555556a01a38 in vulkan_map_from_drm (hwfc=0x555559568c80,
dst=0x555559322000, src=0x555559322300, flags=3) at
src/libavutil/hwcontext_vulkan.c:1502
#7 0x0000555556a01b90 in vulkan_map_from_vaapi (dst_fc=0x555559568c80,
dst=0x555559322000, src=0x555559321d40, flags=3) at
src/libavutil/hwcontext_vulkan.c:1550
#8 0x0000555556a01c67 in vulkan_map_to (hwfc=0x555559568c80,
dst=0x555559322000, src=0x555559321d40, flags=3) at
src/libavutil/hwcontext_vulkan.c:1579
#9 0x00005555569f3307 in av_hwframe_map (dst=0x555559322000,
src=0x555559321d40, flags=3) at src/libavutil/hwcontext.c:792
#10 0x0000555555798c69 in hwmap_filter_frame (link=0x555558f41180,
input=0x555559321d40) at src/libavfilter/vf_hwmap.c:339
#11 0x00005555556af6c9 in ff_filter_frame_framed (link=0x555558f41180,
frame=0x555559321d40) at src/libavfilter/avfilter.c:1071
#12 0x00005555556aff52 in ff_filter_frame_to_filter (link=0x555558f41180)
at src/libavfilter/avfilter.c:1219
#13 0x00005555556b014e in ff_filter_activate_default
(filter=0x5555582649c0) at src/libavfilter/avfilter.c:1268
#14 0x00005555556b0372 in ff_filter_activate (filter=0x5555582649c0) at
src/libavfilter/avfilter.c:1429
#15 0x00005555556b5036 in ff_filter_graph_run_once (graph=0x555558f3e500)
at src/libavfilter/avfiltergraph.c:1454
#16 0x00005555556b6466 in push_frame (graph=0x555558f3e500) at
src/libavfilter/buffersrc.c:181
#17 0x00005555556b6778 in av_buffersrc_add_frame_internal
(ctx=0x555558f3e600, frame=0x555558592c40, flags=4) at
src/libavfilter/buffersrc.c:255
#18 0x00005555556b63ed in av_buffersrc_add_frame_flags
(ctx=0x555558f3e600, frame=0x555558592c40, flags=4) at
src/libavfilter/buffersrc.c:164
#19 0x0000555555679212 in ifilter_send_frame (ifilter=0x5555581f1b40,
frame=0x555558592c40) at src/fftools/ffmpeg.c:2190
#20 0x00005555556794f2 in send_frame_to_filters (ist=0x555558352240,
decoded_frame=0x555558592c40) at src/fftools/ffmpeg.c:2264
#21 0x000055555567a2ac in decode_video (ist=0x555558352240,
pkt=0x7fffffffd820, got_output=0x7fffffffd814, duration_pts=0x7fffffffd818,
eof=0, decode_failed=0x7fffffffd810) at src/fftools/ffmpeg.c:2465
#22 0x000055555567ac47 in process_input_packet (ist=0x555558352240,
pkt=0x7fffffffd9e0, no_eof=0) at src/fftools/ffmpeg.c:2619
#23 0x0000555555681bb5 in process_input (file_index=0) at
src/fftools/ffmpeg.c:4457
#24 0x00005555556820c4 in transcode_step () at src/fftools/ffmpeg.c:4577
#25 0x00005555556821f1 in transcode () at src/fftools/ffmpeg.c:4631
#26 0x0000555555682a81 in main (argc=20, argv=0x7fffffffe3e8) at src/fftools/ffmpeg.c:4838
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
...
+
+typedef struct ImageBuffer {
+ VkBuffer buf;
+ VkDeviceMemory mem;
+ VkMemoryPropertyFlagBits flags;
+} ImageBuffer;
+
+static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, size_t
size,
Post by Rostislav Pehlivanov
+ VkBufferUsageFlags usage,
VkMemoryPropertyFlagBits flags,
Post by Rostislav Pehlivanov
+ void *create_pnext, void *alloc_pnext)
+{
+ int err;
+ VkResult ret;
+ VkMemoryRequirements req;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = create_pnext,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host
visible
Post by Rostislav Pehlivanov
+ but should be ok */
+ };
+
+ ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
+
+ err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags,
&buf->mem);
Post by Rostislav Pehlivanov
+ if (err)
+ return err;
+
+ ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
+ if (ret != VK_SUCCESS) {
%s\n",
Post by Rostislav Pehlivanov
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
This function looks like it is missing the free cases on failure.
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
+static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
+{
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ if (!buf)
+ return;
+
+ vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
+ vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
+}
+
+static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf,
uint8_t *mem[],
Post by Rostislav Pehlivanov
+ int nb_buffers, int invalidate)
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
+ int invalidate_count = 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
%s\n",
Post by Rostislav Pehlivanov
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ if (!invalidate)
+ return 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ invalidate_ctx[invalidate_count++] = ival_buf;
+ }
+
+ if (invalidate_count) {
+ ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev,
invalidate_count,
Post by Rostislav Pehlivanov
+ invalidate_ctx);
+ if (ret != VK_SUCCESS) {
%s\n",
Post by Rostislav Pehlivanov
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
Missing unmap cases?
I'd rather not error out here, I've changed this to a warning and made the
function return 0.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ return 0;
+}
+
...
+
+static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
+ ImageBuffer *buffer, const int *stride,
int w,
Post by Rostislav Pehlivanov
+ int h, enum AVPixelFormat pix_fmt, int
to_buf)
Post by Rostislav Pehlivanov
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *s = ctx->internal->priv;
+
+ const int planes = av_pix_fmt_count_planes(pix_fmt);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->cmd_buf,
+ };
+
+ vkBeginCommandBuffer(s->cmd_buf, &cmd_start);
Return value needs to be checked.
Done.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ { /* Change the image layout to something more optimal for
transfers */
Post by Rostislav Pehlivanov
+ VkImageMemoryBarrier bar = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = frame->layout,
+ .newLayout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = frame->img,
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ };
+
+ if (planes == 1) {
+ bar.subresourceRange.aspectMask =
VK_IMAGE_ASPECT_COLOR_BIT;
Post by Rostislav Pehlivanov
+ } else {
+ bar.subresourceRange.aspectMask =
VK_IMAGE_ASPECT_PLANE_0_BIT;
Post by Rostislav Pehlivanov
+ bar.subresourceRange.aspectMask |=
VK_IMAGE_ASPECT_PLANE_1_BIT;
Post by Rostislav Pehlivanov
+ if (planes > 2)
+ bar.subresourceRange.aspectMask |=
VK_IMAGE_ASPECT_PLANE_2_BIT;
Post by Rostislav Pehlivanov
+ }
+
+ vkCmdPipelineBarrier(s->cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_
BIT,
Post by Rostislav Pehlivanov
+ VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, 0, NULL, 0, NULL, 1, &bar);
+
+ /* Update to the new layout */
+ frame->layout = bar.newLayout;
+ frame->access = bar.dstAccessMask;
+ }
+
+ /* Schedule a copy for each plane */
+ for (int i = 0; i < planes; i++) {
+ VkImageSubresourceLayers sub = {
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ .layerCount = 1,
+ };
+ const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w)
: w;
Post by Rostislav Pehlivanov
+ const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h)
: h;
Post by Rostislav Pehlivanov
+ VkBufferImageCopy buf_reg = {
+ .bufferOffset = 0,
+ /* Buffer stride isn't in bytes, it's in samples, the
implementation
Post by Rostislav Pehlivanov
+ * uses the image's VkFormat to know how many bytes per
sample
Post by Rostislav Pehlivanov
+ * the buffer has. So we have to convert by dividing.
Stupid. */
Post by Rostislav Pehlivanov
+ .bufferRowLength = stride[i] / desc->comp[i].step,
comp[i] isn't necessarily plane[i], but I think it happens to work anyway
for all of the supported formats.
More generally, this tricky code using some properties like the
log2_chroma values will fail for formats like YUVA420P, so if you are
intending to add them later (for overlay) it might be sensible to ensure
that this doesn't make too many assumptions now.
I'd rather change this later. I don't think alpha planes will be supported,
for overlay with transparency its better to use RGBA.
I've added a note to say it won't work with planar alpha formats.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+ .bufferImageHeight = p_h,
+ .imageSubresource = sub,
+ .imageOffset = { 0 },
+ .imageExtent = { p_w, p_h, 1, },
+ };
+ if (to_buf)
+ vkCmdCopyImageToBuffer(s->cmd_buf, frame->img,
frame->layout,
Post by Rostislav Pehlivanov
+ buffer[i].buf, 1, &buf_reg);
+ else
+ vkCmdCopyBufferToImage(s->cmd_buf, buffer[i].buf,
frame->img,
Post by Rostislav Pehlivanov
+ frame->layout, 1, &buf_reg);
+ }
+
+ vkEndCommandBuffer(s->cmd_buf);
Can also fail.
Fixed.
Post by Mark Thompson
Post by Rostislav Pehlivanov
+
+ ret = vkQueueSubmit(s->cmd_queue, 1, &s_info, s->cmd_fence);
+ if (ret != VK_SUCCESS) {
%s\n",
Post by Rostislav Pehlivanov
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(hwctx->act_dev, 1, &s->cmd_fence, VK_TRUE,
UINT64_MAX);
Post by Rostislav Pehlivanov
+ vkResetFences(hwctx->act_dev, 1, &s->cmd_fence);
+ }
+
+ return 0;
+}
+
...
+
+const HWContextType ff_hwcontext_type_vulkan = {
+ .type = AV_HWDEVICE_TYPE_VULKAN,
+ .name = "Vulkan",
+
+ .device_hwctx_size = sizeof(AVVulkanDeviceContext),
+ .device_priv_size = sizeof(VulkanDevicePriv),
+ .frames_hwctx_size = sizeof(AVVulkanFramesContext),
+
+ .device_init = &vulkan_device_init,
+ .device_create = &vulkan_device_create,
+ .device_derive = &vulkan_device_derive,
+
+ .frames_get_constraints = &vulkan_frames_get_constraints,
+ .frames_init = vulkan_frames_init,
+ .frames_get_buffer = vulkan_get_buffer,
+
+ .transfer_get_formats = vulkan_transfer_get_formats,
+ .transfer_data_to = vulkan_transfer_data_to,
+ .transfer_data_from = vulkan_transfer_data_from,
+
+ .map_to = vulkan_map_to,
+
+ .pix_fmts = (const enum AVPixelFormat[]) {
+ AV_PIX_FMT_VULKAN,
+ AV_PIX_FMT_NONE
+ },
+};
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
new file mode 100644
index 0000000000..342c833a23
--- /dev/null
+++ b/libavutil/hwcontext_vulkan.h
@@ -0,0 +1,133 @@
+/*
+ * Vulkan hwcontext
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
Post by Rostislav Pehlivanov
+ */
+
+#ifndef AVUTIL_HWCONTEXT_VULKAN_H
+#define AVUTIL_HWCONTEXT_VULKAN_H
+
+#include <vulkan/vulkan.h>
+
+/**
+ * API-specific header for AV_HWDEVICE_TYPE_VULKAN.
+ *
+ * For user-allocated pools, AVHWFramesContext.pool must return
AVBufferRefs
Post by Rostislav Pehlivanov
+ * with the data pointer set to an AVVkFrame.
+ */
+
+/**
+ * Main Vulkan context, allocated as AVHWDeviceContext.hwctx.
+ * All of these can be set before init to change what the context uses
+ */
+typedef struct AVVulkanDeviceContext {
+ /**
+ * Custom memory allocator, else NULL
+ */
+ const VkAllocationCallbacks *alloc;
+ /**
+ * Instance
+ */
+ VkInstance inst;
+ /**
+ * Physical device
+ */
+ VkPhysicalDevice phys_dev;
+ /**
+ * Activated physical device
+ */
+ VkDevice act_dev;
+ /**
+ * Queue family index for graphics
+ */
+ int queue_family_index;
+ /**
+ * Queue family index for transfer ops only. By default, the
priority order
Post by Rostislav Pehlivanov
+ * is dedicated transfer > dedicated compute > graphics.
+ */
+ int queue_family_tx_index;
+ /**
+ * Queue family index for compute ops. Will be equal to the graphics
+ * one unless a dedicated transfer queue is found.
+ */
+ int queue_family_comp_index;
+} AVVulkanDeviceContext;
+
+/**
+ * Allocated as AVHWFramesContext.hwctx, used to set pool-specific
options
Post by Rostislav Pehlivanov
+ */
+typedef struct AVVulkanFramesContext {
+ /**
+ * Controls the tiling of output frames.
+ */
+ VkImageTiling tiling;
+ /**
+ * Defines extra usage of output frames. This is bitwise OR'd with
the
Post by Rostislav Pehlivanov
+ * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and
TRANSFER_DST).
Post by Rostislav Pehlivanov
+ */
+ VkImageUsageFlagBits usage;
+ /**
+ * Set to 1 to allocate all planes separately (disjoint images)
+ */
+ int disjoint;
+ /**
+ * Extension data for image creation. By default, if the extension
is
Post by Rostislav Pehlivanov
+ * available, this will be chained to a
VkImageFormatListCreateInfoKHR.
Post by Rostislav Pehlivanov
+ */
+ void *create_pnext;
+ /**
+ * Extension data for memory allocation. If the image is disjoint,
this
Post by Rostislav Pehlivanov
+ * must be one per plane, otherwise just the first entry is used.
+ * This will be chained to VkExportMemoryAllocateInfo, which is used
+ * to make all pool images exportable to other APIs.
+ */
+ void *alloc_pnext[AV_NUM_DATA_POINTERS];
+} AVVulkanFramesContext;
+
+/*
+ * Frame structure, the VkFormat of the image will always match
+ * the pool's sw_format.
+ */
+typedef struct AVVkFrame {
+ VkImage img;
+ VkImageTiling tiling;
+ /**
+ * Always 1 for non-disjoint images, #planes for disjoint
+ */
+ int mem_count;
+ VkDeviceMemory mem[AV_NUM_DATA_POINTERS];
+ /**
+ * OR'd flags for all memory allocated
+ */
+ VkMemoryPropertyFlagBits flags;
+
+ /**
+ * Updated after every barrier
+ */
+ VkAccessFlagBits access;
+ VkImageLayout layout;
+} AVVkFrame;
This all looks much cleaner than the previous version.
Post by Rostislav Pehlivanov
+/**
+ * Converts AVPixelFormat to VkFormat, returns VK_FORMAT_UNDEFINED if
unsupported
Post by Rostislav Pehlivanov
+ * by the hwcontext
+ */
+VkFormat av_vkfmt_from_pixfmt(enum AVPixelFormat p);
+
+#endif /* AVUTIL_HWCONTEXT_VULKAN_H */
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index ff5c20d50e..c3b3aaee65 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -1673,6 +1673,10 @@ static const AVPixFmtDescriptor
av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
Post by Rostislav Pehlivanov
.name = "videotoolbox_vld",
.flags = AV_PIX_FMT_FLAG_HWACCEL,
},
+ [AV_PIX_FMT_VULKAN] = {
+ .name = "vulkan",
+ .flags = AV_PIX_FMT_FLAG_HWACCEL,
+ },
You've put this in a funny place in the middle?
I thought it was in alphabet order, fixed, its now just after OPENCL.
Post by Mark Thompson
Post by Rostislav Pehlivanov
[AV_PIX_FMT_GBRP] = {
.name = "gbrp",
.nb_components = 3,
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index aea008bbdc..e6991f3630 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -333,6 +333,10 @@ enum AVPixelFormat {
AV_PIX_FMT_GRAY14BE, ///< Y , 14bpp, big-endian
AV_PIX_FMT_GRAY14LE, ///< Y , 14bpp, little-endian
+ /* Vulkan hardware images,
+ * data[0] contain an AVVkFrame */
+ AV_PIX_FMT_VULKAN,
+
AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS
if you want to link with shared libav* because the number of formats might
differ between versions
Post by Rostislav Pehlivanov
};
diff --git a/libavutil/version.h b/libavutil/version.h
index 44bdebdc93..84409b1d69 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,8 +79,8 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 56
-#define LIBAVUTIL_VERSION_MINOR 18
-#define LIBAVUTIL_VERSION_MICRO 102
+#define LIBAVUTIL_VERSION_MINOR 19
+#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR,
\
Post by Rostislav Pehlivanov
LIBAVUTIL_VERSION_MINOR,
\
I think I would make the pixfmt addition a patch on its own just to keep
it separate, but that probably doesn't matter very much.
I'll keep it then.
Post by Mark Thompson
- Mark
_______________________________________________
ffmpeg-devel mailing list
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
I've attached a new patch, thanks for the review.

Rostislav Pehlivanov
2018-05-22 02:46:10 UTC
Permalink
Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
libavutil/hwcontext_opencl.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
index 43b5c5ae0c..1d18da37bf 100644
--- a/libavutil/hwcontext_opencl.c
+++ b/libavutil/hwcontext_opencl.c
@@ -2171,10 +2171,7 @@ static int opencl_map_from_vaapi(AVHWFramesContext *dst_fc,
if (err < 0)
goto fail;

- // Adjust the map descriptor so that unmap works correctly.
- hwmap = (HWMapDescriptor*)dst->buf[0]->data;
- av_frame_unref(hwmap->source);
- err = av_frame_ref(hwmap->source, src);
+ err = ff_hwframe_map_replace(dst, src);

fail:
av_frame_free(&tmp);
--
2.17.0
Rostislav Pehlivanov
2018-05-22 02:46:09 UTC
Permalink
Used to fix unmapping when no direct interop exists between APIs.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
libavutil/hwcontext.c | 7 +++++++
libavutil/hwcontext_internal.h | 5 +++++
2 files changed, 12 insertions(+)

diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 745016ed7e..f1e404ab20 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -870,3 +870,10 @@ fail:
av_buffer_unref(&dst_ref);
return ret;
}
+
+int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src)
+{
+ HWMapDescriptor *hwmap = (HWMapDescriptor*)dst->buf[0]->data;
+ av_frame_unref(hwmap->source);
+ return av_frame_ref(hwmap->source, src);
+}
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index 332062ddaa..77dc47ddd6 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -156,6 +156,11 @@ int ff_hwframe_map_create(AVBufferRef *hwframe_ref,
HWMapDescriptor *hwmap),
void *priv);

+/**
+ * Replace the current hwmap of dst with the one from src, used for indirect
+ * mappings like VAAPI->(DRM)->OpenCL/Vulkan where a direct interop is missing
+ */
+int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src);

extern const HWContextType ff_hwcontext_type_cuda;
extern const HWContextType ff_hwcontext_type_d3d11va;
--
2.17.0
Mark Thompson
2018-05-27 14:04:53 UTC
Permalink
Post by Rostislav Pehlivanov
Used to fix unmapping when no direct interop exists between APIs.
---
libavutil/hwcontext.c | 7 +++++++
libavutil/hwcontext_internal.h | 5 +++++
2 files changed, 12 insertions(+)
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 745016ed7e..f1e404ab20 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
av_buffer_unref(&dst_ref);
return ret;
}
+
+int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src)
+{
+ HWMapDescriptor *hwmap = (HWMapDescriptor*)dst->buf[0]->data;
+ av_frame_unref(hwmap->source);
+ return av_frame_ref(hwmap->source, src);
+}
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index 332062ddaa..77dc47ddd6 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -156,6 +156,11 @@ int ff_hwframe_map_create(AVBufferRef *hwframe_ref,
HWMapDescriptor *hwmap),
void *priv);
+/**
+ * Replace the current hwmap of dst with the one from src, used for indirect
+ * mappings like VAAPI->(DRM)->OpenCL/Vulkan where a direct interop is missing
"missing" makes it sound like you /want/ to have direct interop, and the consequent mess of one-to-one cases. You shouldn't! :P
Post by Rostislav Pehlivanov
+ */
+int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src);
extern const HWContextType ff_hwcontext_type_cuda;
extern const HWContextType ff_hwcontext_type_d3d11va;
---
libavutil/hwcontext_opencl.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
index 43b5c5ae0c..1d18da37bf 100644
--- a/libavutil/hwcontext_opencl.c
+++ b/libavutil/hwcontext_opencl.c
@@ -2171,10 +2171,7 @@ static int opencl_map_from_vaapi(AVHWFramesContext *dst_fc,
if (err < 0)
goto fail;
- // Adjust the map descriptor so that unmap works correctly.
- hwmap = (HWMapDescriptor*)dst->buf[0]->data;
- av_frame_unref(hwmap->source);
- err = av_frame_ref(hwmap->source, src);
+ err = ff_hwframe_map_replace(dst, src);
av_frame_free(&tmp);
These two patches LGTM.

Thanks,

- Mark
Rostislav Pehlivanov
2018-05-22 02:46:13 UTC
Permalink
Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_avgblur_vulkan.c | 343 ++++++++++++++++++++++++++++++++
4 files changed, 346 insertions(+)
create mode 100644 libavfilter/vf_avgblur_vulkan.c

diff --git a/configure b/configure
index 52c1e7a6e8..c34a48b2bc 100755
--- a/configure
+++ b/configure
@@ -3308,6 +3308,7 @@ ass_filter_deps="libass"
atempo_filter_deps="avcodec"
atempo_filter_select="rdft"
avgblur_opencl_filter_deps="opencl"
+avgblur_vulkan_filter_deps="vulkan libshaderc"
azmq_filter_deps="libzmq"
blackframe_filter_deps="gpl"
boxblur_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index c68ef05fdc..fd8cf8c13c 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -146,6 +146,7 @@ OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o
OBJS-$(CONFIG_AVGBLUR_FILTER) += vf_avgblur.o
OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \
opencl/avgblur.o
+OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vf_avgblur_vulkan.o vulkan.o
OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o
OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o
OBJS-$(CONFIG_BITPLANENOISE_FILTER) += vf_bitplanenoise.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index b44093d21b..c53cb2154e 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -139,6 +139,7 @@ extern AVFilter ff_vf_ass;
extern AVFilter ff_vf_atadenoise;
extern AVFilter ff_vf_avgblur;
extern AVFilter ff_vf_avgblur_opencl;
+extern AVFilter ff_vf_avgblur_vulkan;
extern AVFilter ff_vf_bbox;
extern AVFilter ff_vf_bench;
extern AVFilter ff_vf_bitplanenoise;
diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c
new file mode 100644
index 0000000000..5b89ae0718
--- /dev/null
+++ b/libavfilter/vf_avgblur_vulkan.c
@@ -0,0 +1,343 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "internal.h"
+
+typedef struct AvgBlurVulkanContext {
+ VulkanFilterContext vkctx;
+
+ int initialized;
+ FFVkExecContext exec;
+
+ /* Shader updators, must be in the main filter struct */
+ VkDescriptorImageInfo input_images[3];
+ VkDescriptorImageInfo output_images[3];
+
+ int size_x;
+ int size_y;
+ int planes;
+} AvgBlurVulkanContext;
+
+static const char blur_kernel[] = {
+ C(0, #define CACHE_SIZE (ivec2(gl_WorkGroupSize) + FILTER_RADIUS*2) )
+ C(0, shared vec4 cache[AREA(CACHE_SIZE)]; )
+ C(0, )
+ C(0, void blur_kernel(int idx, ivec2 pos) )
+ C(0, { )
+ C(1, ivec2 d; )
+ C(1, const ivec2 s = CACHE_SIZE; )
+ C(1, const ivec2 w = ivec2(gl_WorkGroupSize); )
+ C(1, const ivec2 l = ivec2(gl_LocalInvocationID.xy); )
+ C(1, )
+ C(1, for (d.y = l.y; d.y < s.y; d.y += w.y) { )
+ C(2, for (d.x = l.x; d.x < s.x; d.x += w.x) { )
+ C(3, const ivec2 np = pos + d - l - FILTER_RADIUS; )
+ C(3, cache[d.y*s.x + d.x] = imageLoad(input_img[idx], np); )
+ C(2, } )
+ C(1, } )
+ C(0, )
+ C(1, barrier(); )
+ C(0, )
+ C(1, vec4 avg = vec4(0.0f); )
+ C(1, ivec2 start = ivec2(0); )
+ C(1, ivec2 end = FILTER_RADIUS*2 + 1; )
+ C(1, for (d.y = start.y; d.y < end.y; d.y++) )
+ C(2, for (d.x = start.x; d.x < end.x; d.x++) )
+ C(3, avg += cache[(l.y + d.y)*s.x + l.x + d.x]; )
+ C(0, )
+ C(1, avg /= AREA(end - start); )
+ C(1, imageStore(output_img[idx], pos, avg); )
+ C(0, } )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+{
+ int err;
+ AvgBlurVulkanContext *s = ctx->priv;
+
+ { /* Create the shader */
+ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ SPIRVShader *shd = ff_vk_init_shader(ctx, "avgblur_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+ VulkanDescriptorSetBinding desc_i[2] = {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.input_format),
+ .mem_quali = "readonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->input_images,
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->output_images,
+ },
+ };
+
+ RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 2, 0)); /* set 0 */
+
+ GLSLF(0, #define FILTER_RADIUS ivec2(%i, %i), s->size_x, s->size_y);
+ GLSLD( blur_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, for (int i = 0; i < %i; i++) { ,planes);
+ GLSLC(2, if (!IS_WITHIN(pos, imageSize(input_img[i]))) { );
+ GLSLC(3, barrier(); );
+ GLSLC(3, continue; );
+ GLSLC(2, } );
+ GLSLC(2, else barrier(); ); /* Workaround */
+ GLSLF(2, if ((0x%x & (1 << i)) != 0) { ,s->planes);
+ GLSLC(3, blur_kernel(i, pos); );
+ GLSLC(2, } else { );
+ GLSLC(3, const vec4 val = imageLoad(input_img[i], pos); );
+ GLSLC(3, imageStore(output_img[i], pos, val); );
+ GLSLC(2, } );
+ GLSLC(1, } );
+ GLSLC(0, } );
+
+ RET(ff_vk_compile_shader(ctx, shd, "main"));
+ }
+
+ RET(ff_vk_init_pipeline_layout(ctx));
+
+ /* Execution context */
+ RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+ s->vkctx.hwctx->queue_family_comp_index));
+
+ /* The pipeline */
+ RET(ff_vk_init_compute_pipeline(ctx));
+
+ s->initialized = 1;
+
+ return 0;
+
+fail:
+ return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
+{
+ int err;
+ AvgBlurVulkanContext *s = avctx->priv;
+ int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkComponentMapping null_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ for (int i = 0; i < planes; i++) {
+ RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
+ ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
+ ff_vk_aspect_flags(s->vkctx.input_format, i),
+ null_map, NULL));
+
+ RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+ ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+ ff_vk_aspect_flags(s->vkctx.output_format, i),
+ null_map, NULL));
+
+ s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+
+ ff_vk_update_descriptor_set(avctx, 0);
+
+ vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+ {
+ VkImageMemoryBarrier bar[2] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = in->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = in->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .oldLayout = out->layout,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = out->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.output_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ };
+
+ vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+ 0, NULL, 0, NULL, 2, bar);
+
+ in->layout = bar[0].newLayout;
+ in->access = bar[0].dstAccessMask;
+
+ out->layout = bar[1].newLayout;
+ out->access = bar[1].dstAccessMask;
+ }
+
+ vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+ vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+ vkCmdDispatch(s->exec.buf,
+ FFALIGN(s->vkctx.output_width, s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+ FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+ vkEndCommandBuffer(s->exec.buf);
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->exec.buf,
+ };
+
+ VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+ }
+
+fail:
+
+ for (int i = 0; i < planes; i++) {
+ ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+ }
+
+ return err;
+}
+
+static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+ int err;
+ AVFilterContext *ctx = link->dst;
+ AvgBlurVulkanContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+
+ AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ if (!s->initialized)
+ RET(init_filter(ctx, in));
+
+ RET(process_frames(ctx, (AVVkFrame *)out->data[0],
+ (AVVkFrame *) in->data[0]));
+
+ err = av_frame_copy_props(out, in);
+ if (err < 0)
+ goto fail;
+
+ av_frame_free(&in);
+
+ return ff_filter_frame(outlink, out);
+
+fail:
+ av_frame_free(&in);
+ av_frame_free(&out);
+ return err;
+}
+
+static void avgblur_vulkan_uninit(AVFilterContext *avctx)
+{
+ AvgBlurVulkanContext *s = avctx->priv;
+
+ ff_vk_free_exec_ctx(avctx, &s->exec);
+ ff_vk_filter_uninit(avctx);
+
+ s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(AvgBlurVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption avgblur_vulkan_options[] = {
+ { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS },
+ { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS },
+ { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS },
+ { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(avgblur_vulkan);
+
+static const AVFilterPad avgblur_vulkan_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = &avgblur_vulkan_filter_frame,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad avgblur_vulkan_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &ff_vk_filter_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_avgblur_vulkan = {
+ .name = "avgblur_vulkan",
+ .description = NULL_IF_CONFIG_SMALL("Apply avgblur mask to input video"),
+ .priv_size = sizeof(AvgBlurVulkanContext),
+ .init = &ff_vk_filter_init,
+ .uninit = &avgblur_vulkan_uninit,
+ .query_formats = &ff_vk_filter_query_formats,
+ .inputs = avgblur_vulkan_inputs,
+ .outputs = avgblur_vulkan_outputs,
+ .priv_class = &avgblur_vulkan_class,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.17.0
Rostislav Pehlivanov
2018-05-22 02:46:14 UTC
Permalink
It tries to do something similar to it with YUV images, but RGB images
are done properly.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_chromaticaberration_vulkan.c | 342 ++++++++++++++++++++
4 files changed, 345 insertions(+)
create mode 100644 libavfilter/vf_chromaticaberration_vulkan.c

diff --git a/configure b/configure
index c34a48b2bc..eb81cc1ed5 100755
--- a/configure
+++ b/configure
@@ -3313,6 +3313,7 @@ azmq_filter_deps="libzmq"
blackframe_filter_deps="gpl"
boxblur_filter_deps="gpl"
bs2b_filter_deps="libbs2b"
+chromaticaberration_vulkan_filter_deps="vulkan libshaderc"
colormatrix_filter_deps="gpl"
convolution_opencl_filter_deps="opencl"
convolve_filter_deps="avcodec"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index fd8cf8c13c..976955959c 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -156,6 +156,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o framesync.o
OBJS-$(CONFIG_BOXBLUR_FILTER) += vf_boxblur.o
OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o
OBJS-$(CONFIG_CHROMAKEY_FILTER) += vf_chromakey.o
+OBJS-$(CONFIG_CHROMATICABERRATION_VULKAN_FILTER) += vf_chromaticaberration_vulkan.o vulkan.o
OBJS-$(CONFIG_CIESCOPE_FILTER) += vf_ciescope.o
OBJS-$(CONFIG_CODECVIEW_FILTER) += vf_codecview.o
OBJS-$(CONFIG_COLORBALANCE_FILTER) += vf_colorbalance.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index c53cb2154e..7be81e4706 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -149,6 +149,7 @@ extern AVFilter ff_vf_blend;
extern AVFilter ff_vf_boxblur;
extern AVFilter ff_vf_bwdif;
extern AVFilter ff_vf_chromakey;
+extern AVFilter ff_vf_chromaticaberration_vulkan;
extern AVFilter ff_vf_ciescope;
extern AVFilter ff_vf_codecview;
extern AVFilter ff_vf_colorbalance;
diff --git a/libavfilter/vf_chromaticaberration_vulkan.c b/libavfilter/vf_chromaticaberration_vulkan.c
new file mode 100644
index 0000000000..1d1aeb95a0
--- /dev/null
+++ b/libavfilter/vf_chromaticaberration_vulkan.c
@@ -0,0 +1,342 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "internal.h"
+
+typedef struct ChromaticAberrationVulkanContext {
+ VulkanFilterContext vkctx;
+
+ int initialized;
+ FFVkExecContext exec;
+
+ /* Shader updators, must be in the main filter struct */
+ VkDescriptorImageInfo input_images[3];
+ VkDescriptorImageInfo output_images[3];
+
+ float dist_x;
+ float dist_y;
+} ChromaticAberrationVulkanContext;
+
+static const char distort_chroma_kernel[] = {
+ C(0, void distort_rgb(ivec2 size, ivec2 pos) )
+ C(0, { )
+ C(1, const vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; )
+ C(1, const vec2 o = p * (FILTER_DIST - 1.0f); )
+ C(0, )
+ C(1, vec4 res; )
+ C(1, res.r = texture(input_img[0], ((p - o)/2.0f) + 0.5f).r; )
+ C(1, res.g = texture(input_img[0], (( p)/2.0f) + 0.5f).g; )
+ C(1, res.b = texture(input_img[0], ((p + o)/2.0f) + 0.5f).b; )
+ C(1, res.a = texture(input_img[0], (( p)/2.0f) + 0.5f).a; )
+ C(1, imageStore(output_img[0], pos, res); )
+ C(0, } )
+ C(0, )
+ C(0, void distort_chroma(int idx, ivec2 size, ivec2 pos) )
+ C(0, { )
+ C(1, vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; )
+ C(1, float d = sqrt(p.x*p.x + p.y*p.y); )
+ C(1, p *= d / (d*FILTER_DIST); )
+ C(1, vec4 res = texture(input_img[idx], (p/2.0f) + 0.5f); )
+ C(1, imageStore(output_img[idx], pos, res); )
+ C(0, } )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+{
+ int err;
+ ChromaticAberrationVulkanContext *s = ctx->priv;
+
+ /* Create a sampler */
+ const VulkanSampler *sampler = ff_vk_init_sampler(ctx, NULL, 0,
+ VK_FILTER_LINEAR);
+ if (!sampler)
+ return AVERROR_EXTERNAL;
+
+ { /* Create the shader */
+ const float dist_x = (s->dist_x / 100.0f) + 1.0f;
+ const float dist_y = (s->dist_y / 100.0f) + 1.0f;
+ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ SPIRVShader *shd = ff_vk_init_shader(ctx, "chromaticaberration_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+ VulkanDescriptorSetBinding desc_i[2] = {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->input_images,
+ .samplers = DUP_SAMPLER_ARRAY4(sampler->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->output_images,
+ },
+ };
+
+ RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 2, 0)); /* set 0 */
+
+ GLSLF(0, #define FILTER_DIST vec2(%f, %f) ,dist_x, dist_y);
+ GLSLD( distort_chroma_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, int planes = %i; ,planes);
+ GLSLC(1, for (int i = 0; i < planes; i++) { );
+ GLSLC(2, ivec2 size = imageSize(output_img[i]); );
+ GLSLC(2, if (!IS_WITHIN(pos, size)) );
+ GLSLC(3, continue; );
+ GLSLC(2, if (planes == 1) { );
+ GLSLC(3, distort_rgb(size, pos); );
+ GLSLC(2, } else if (i > 0) { );
+ GLSLC(3, distort_chroma(i, size, pos); );
+ GLSLC(2, } else { );
+ GLSLC(3, vec2 npos = vec2(pos)/vec2(size); );
+ GLSLC(3, vec4 res = texture(input_img[i], npos); );
+ GLSLC(3, imageStore(output_img[i], pos, res); );
+ GLSLC(2, } );
+ GLSLC(1, } );
+ GLSLC(0, } );
+
+ RET(ff_vk_compile_shader(ctx, shd, "main"));
+ }
+
+ RET(ff_vk_init_pipeline_layout(ctx));
+
+ /* Execution context */
+ RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+ s->vkctx.hwctx->queue_family_comp_index));
+
+ /* The pipeline */
+ RET(ff_vk_init_compute_pipeline(ctx));
+
+ s->initialized = 1;
+
+ return 0;
+
+fail:
+ return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
+{
+ int err;
+ ChromaticAberrationVulkanContext *s = avctx->priv;
+ int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkComponentMapping null_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ for (int i = 0; i < planes; i++) {
+ RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
+ ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
+ ff_vk_aspect_flags(s->vkctx.input_format, i),
+ null_map, NULL));
+
+ RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+ ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+ ff_vk_aspect_flags(s->vkctx.output_format, i),
+ null_map, NULL));
+
+ s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+
+ ff_vk_update_descriptor_set(avctx, 0);
+
+ vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+ {
+ VkImageMemoryBarrier bar[2] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = in->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = in->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .oldLayout = out->layout,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = out->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.output_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ };
+
+ vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+ 0, NULL, 0, NULL, 2, bar);
+
+ in->layout = bar[0].newLayout;
+ in->access = bar[0].dstAccessMask;
+
+ out->layout = bar[1].newLayout;
+ out->access = bar[1].dstAccessMask;
+ }
+
+ vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+ vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+ vkCmdDispatch(s->exec.buf,
+ FFALIGN(s->vkctx.output_width, s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+ FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+ vkEndCommandBuffer(s->exec.buf);
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->exec.buf,
+ };
+
+ VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+ }
+
+fail:
+
+ for (int i = 0; i < planes; i++) {
+ ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+ }
+
+ return err;
+}
+
+static int chromaticaberration_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+ int err;
+ AVFilterContext *ctx = link->dst;
+ ChromaticAberrationVulkanContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+
+ AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ if (!s->initialized)
+ RET(init_filter(ctx, in));
+
+ RET(process_frames(ctx, (AVVkFrame *)out->data[0],
+ (AVVkFrame *) in->data[0]));
+
+ err = av_frame_copy_props(out, in);
+ if (err < 0)
+ goto fail;
+
+ av_frame_free(&in);
+
+ return ff_filter_frame(outlink, out);
+
+fail:
+ av_frame_free(&in);
+ av_frame_free(&out);
+ return err;
+}
+
+static void chromaticaberration_vulkan_uninit(AVFilterContext *avctx)
+{
+ ChromaticAberrationVulkanContext *s = avctx->priv;
+
+ ff_vk_free_exec_ctx(avctx, &s->exec);
+ ff_vk_filter_uninit(avctx);
+
+ s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(ChromaticAberrationVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption chromaticaberration_vulkan_options[] = {
+ { "dist_x", "Set horizontal distortion amount", OFFSET(dist_x), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 10.0f, .flags = FLAGS },
+ { "dist_y", "Set vertical distortion amount", OFFSET(dist_y), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 10.0f, .flags = FLAGS },
+ { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(chromaticaberration_vulkan);
+
+static const AVFilterPad chromaticaberration_vulkan_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = &chromaticaberration_vulkan_filter_frame,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad chromaticaberration_vulkan_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &ff_vk_filter_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_chromaticaberration_vulkan = {
+ .name = "chromaticaberration_vulkan",
+ .description = NULL_IF_CONFIG_SMALL("Offset chroma of input video"),
+ .priv_size = sizeof(ChromaticAberrationVulkanContext),
+ .init = &ff_vk_filter_init,
+ .uninit = &chromaticaberration_vulkan_uninit,
+ .query_formats = &ff_vk_filter_query_formats,
+ .inputs = chromaticaberration_vulkan_inputs,
+ .outputs = chromaticaberration_vulkan_outputs,
+ .priv_class = &chromaticaberration_vulkan_class,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.17.0
Rostislav Pehlivanov
2018-05-22 02:46:12 UTC
Permalink
This commit adds a common code for use in Vulkan filters. It attempts
to ease the burden of writing Vulkan image filtering to a minimum,
which is pretty much a requirement considering how verbose the API is.

It supports both compute and graphic pipelines and manages to abstract
the API to such a level there's no need to call any Vulkan functions
inside the init path of the code. Handling shader descriptors is probably
the bulk of the code, and despite the abstraction, it loses none of the
features for describing shader IO.

In order to produce linkable shaders, it depends on the libshaderc
library (and depends on the latest stable version of it). This allows
for greater performance and flexibility than static built-in shaders
and also eliminates the cumbersome process of interfacing with glslang
to compile GLSL to SPIR-V.

It's based off of the common opencl and provides similar interfaces for
filter pad init and config, with the addition that it also supports
in-place filtering.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 10 +-
libavfilter/vulkan.c | 1186 ++++++++++++++++++++++++++++++++++++++++++
libavfilter/vulkan.h | 223 ++++++++
3 files changed, 1418 insertions(+), 1 deletion(-)
create mode 100644 libavfilter/vulkan.c
create mode 100644 libavfilter/vulkan.h

diff --git a/configure b/configure
index 5f4407b753..52c1e7a6e8 100755
--- a/configure
+++ b/configure
@@ -252,6 +252,7 @@ External library support:
--enable-librsvg enable SVG rasterization via librsvg [no]
--enable-librubberband enable rubberband needed for rubberband filter [no]
--enable-librtmp enable RTMP[E] support via librtmp [no]
+ --enable-libshaderc enable GLSL->SPIRV compilation via libshaderc [no]
--enable-libshine enable fixed-point MP3 encoding via libshine [no]
--enable-libsmbclient enable Samba protocol via libsmbclient [no]
--enable-libsnappy enable Snappy compression, needed for hap encoding [no]
@@ -1707,6 +1708,7 @@ EXTERNAL_LIBRARY_LIST="
libpulse
librsvg
librtmp
+ libshaderc
libshine
libsmbclient
libsnappy
@@ -2225,6 +2227,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ shaderc_opt_perf
vulkan_drm_mod
perl
pod2man
@@ -3461,7 +3464,7 @@ avformat_deps="avcodec avutil"
avformat_suggest="libm network zlib"
avresample_deps="avutil"
avresample_suggest="libm"
-avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
+avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl vulkan libshaderc user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
postproc_deps="avutil gpl"
postproc_suggest="libm"
swresample_deps="avutil"
@@ -6050,6 +6053,7 @@ enabled libpulse && require_pkg_config libpulse libpulse pulse/pulseaud
enabled librsvg && require_pkg_config librsvg librsvg-2.0 librsvg-2.0/librsvg/rsvg.h rsvg_handle_render_cairo
enabled librtmp && require_pkg_config librtmp librtmp librtmp/rtmp.h RTMP_Socket
enabled librubberband && require_pkg_config librubberband "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new -lstdc++ && append librubberband_extralibs "-lstdc++"
+enabled libshaderc && require libshaderc shaderc/shaderc.h shaderc_compiler_initialize -lshaderc_shared
enabled libshine && require_pkg_config libshine shine shine/layer3.h shine_encode_buffer
enabled libsmbclient && { check_pkg_config libsmbclient smbclient libsmbclient.h smbc_init ||
require libsmbclient libsmbclient.h smbc_init -lsmbclient; }
@@ -6355,6 +6359,10 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.
enabled vulkan &&
require_pkg_config vulkan "vulkan >= 1.1.73" "vulkan/vulkan.h" vkCreateInstance

+if enabled_all vulkan libshaderc ; then
+ check_cc shaderc_opt_perf shaderc/shaderc.h "int t = shaderc_optimization_level_performance"
+fi
+
if enabled_all vulkan libdrm ; then
check_cpp_condition vulkan_drm_mod vulkan/vulkan.h "defined VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME"
fi
diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c
new file mode 100644
index 0000000000..1df3b7a1f2
--- /dev/null
+++ b/libavfilter/vulkan.c
@@ -0,0 +1,1186 @@
+/*
+ * Vulkan utilities
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "formats.h"
+#include "vulkan.h"
+
+#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
+ vkGetInstanceProcAddr(inst, #name)
+
+/* Converts return values to strings */
+const char *ff_vk_ret2str(VkResult res)
+{
+#define CASE(VAL) case VAL: return #VAL
+ switch (res) {
+ CASE(VK_SUCCESS);
+ CASE(VK_NOT_READY);
+ CASE(VK_TIMEOUT);
+ CASE(VK_EVENT_SET);
+ CASE(VK_EVENT_RESET);
+ CASE(VK_INCOMPLETE);
+ CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
+ CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ CASE(VK_ERROR_INITIALIZATION_FAILED);
+ CASE(VK_ERROR_DEVICE_LOST);
+ CASE(VK_ERROR_MEMORY_MAP_FAILED);
+ CASE(VK_ERROR_LAYER_NOT_PRESENT);
+ CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
+ CASE(VK_ERROR_FEATURE_NOT_PRESENT);
+ CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
+ CASE(VK_ERROR_TOO_MANY_OBJECTS);
+ CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
+ CASE(VK_ERROR_FRAGMENTED_POOL);
+ CASE(VK_ERROR_SURFACE_LOST_KHR);
+ CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
+ CASE(VK_SUBOPTIMAL_KHR);
+ CASE(VK_ERROR_OUT_OF_DATE_KHR);
+ CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
+ CASE(VK_ERROR_VALIDATION_FAILED_EXT);
+ CASE(VK_ERROR_INVALID_SHADER_NV);
+ CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+ CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ CASE(VK_ERROR_NOT_PERMITTED_EXT);
+ default: return "Unknown error";
+ }
+#undef CASE
+}
+
+int ff_vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+{
+ VkResult ret;
+ int index = -1;
+ VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceMemoryProperties mprops;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = alloc_extension,
+ };
+
+ vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
+ vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
+
+ /* Align if we need to */
+ if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment);
+
+ alloc_info.allocationSize = req->size;
+
+ /* The vulkan spec requires memory types to be sorted in the "optimal"
+ * order, so the first matching type we find will be the best/fastest one */
+ for (int i = 0; i < mprops.memoryTypeCount; i++) {
+ /* The memory type must be supported by the requirements (bitfield) */
+ if (!(req->memoryTypeBits & (1 << i)))
+ continue;
+
+ /* The memory type flags must include our properties */
+ if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ continue;
+
+ /* Found a suitable memory type */
+ index = i;
+ break;
+ }
+
+ if (index < 0) {
+ av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ req_flags);
+ return AVERROR(EINVAL);
+ }
+
+ alloc_info.memoryTypeIndex = index;
+
+ ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info,
+ s->hwctx->alloc, mem);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR(ENOMEM);
+ }
+
+ *mem_flags |= mprops.memoryTypes[index].propertyFlags;
+
+ return 0;
+}
+
+int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
+{
+ int err;
+ VkResult ret;
+ VkMemoryRequirements req;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = NULL,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host visible
+ but should be ok */
+ };
+
+ ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ vkGetBufferMemoryRequirements(s->hwctx->act_dev, buf->buf, &req);
+
+ err = ff_vk_alloc_mem(avctx, &req, flags, NULL, &buf->flags, &buf->mem);
+ if (err)
+ return err;
+
+ ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VkMappedMemoryRange *inval_list = NULL;
+ int inval_count = 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ if (!invalidate)
+ return 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
+ (++inval_count)*sizeof(*inval_list));
+ if (!inval_list)
+ return AVERROR(ENOMEM);
+ inval_list[inval_count - 1] = ival_buf;
+ }
+
+ if (inval_count) {
+ ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
+ inval_list);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ return 0;
+}
+
+int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
+ int flush)
+{
+ int err = 0;
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VkMappedMemoryRange *flush_list = NULL;
+ int flush_count = 0;
+
+ if (flush) {
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
+ (++flush_count)*sizeof(*flush_list));
+ if (!flush_list)
+ return AVERROR(ENOMEM);
+ flush_list[flush_count - 1] = flush_buf;
+ }
+ }
+
+ if (flush_count) {
+ ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
+ flush_list);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ }
+ }
+
+ for (int i = 0; i < nb_buffers; i++)
+ vkUnmapMemory(s->hwctx->act_dev, buf[i].mem);
+
+ return err;
+}
+
+void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
+{
+ VulkanFilterContext *s = avctx->priv;
+ if (!buf)
+ return;
+
+ if (buf->buf != VK_NULL_HANDLE)
+ vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
+ if (buf->mem != VK_NULL_HANDLE)
+ vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
+}
+
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, int queue)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkCommandPoolCreateInfo cqueue_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = queue,
+ };
+ VkCommandBufferAllocateInfo cbuf_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+ VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
+
+ ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create,
+ s->hwctx->alloc, &e->pool);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ ff_vk_ret2str(ret));
+ return 1;
+ }
+
+ cbuf_create.commandPool = e->pool;
+
+ ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, &e->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ ff_vk_ret2str(ret));
+ return 1;
+ }
+
+ ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn,
+ s->hwctx->alloc, &e->fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
+ ff_vk_ret2str(ret));
+ return 1;
+ }
+
+ vkGetDeviceQueue(s->hwctx->act_dev, queue, 0, &e->queue);
+
+ return 0;
+}
+
+void ff_vk_free_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ if (!e)
+ return;
+
+ if (e->fence != VK_NULL_HANDLE)
+ vkDestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
+ if (e->buf != VK_NULL_HANDLE)
+ vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, 1, &e->buf);
+ if (e->pool != VK_NULL_HANDLE)
+ vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+}
+
+int ff_vk_filter_query_formats(AVFilterContext *avctx)
+{
+ static const enum AVPixelFormat pixel_formats[] = {
+ AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE,
+ };
+ AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
+ if (!pix_fmts)
+ return AVERROR(ENOMEM);
+
+ return ff_set_common_formats(avctx, pix_fmts);
+}
+
+static int vulkan_filter_set_device(AVFilterContext *avctx,
+ AVBufferRef *device)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ av_buffer_unref(&s->device_ref);
+
+ s->device_ref = av_buffer_ref(device);
+ if (!s->device_ref)
+ return AVERROR(ENOMEM);
+
+ s->device = (AVHWDeviceContext*)s->device_ref->data;
+ s->hwctx = s->device->hwctx;
+
+ return 0;
+}
+
+static int vulkan_filter_set_frames(AVFilterContext *avctx,
+ AVBufferRef *frames)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ av_buffer_unref(&s->frames_ref);
+
+ s->frames_ref = av_buffer_ref(frames);
+ if (!s->frames_ref)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+int ff_vk_filter_config_input(AVFilterLink *inlink)
+{
+ int err;
+ AVFilterContext *avctx = inlink->dst;
+ VulkanFilterContext *s = avctx->priv;
+ AVHWFramesContext *input_frames;
+
+ if (!inlink->hw_frames_ctx) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "hardware frames context on the input.\n");
+ return AVERROR(EINVAL);
+ }
+
+ /* Extract the device and default output format from the first input. */
+ if (avctx->inputs[0] != inlink)
+ return 0;
+
+ input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
+ if (input_frames->format != AV_PIX_FMT_VULKAN)
+ return AVERROR(EINVAL);
+
+ err = vulkan_filter_set_device(avctx, input_frames->device_ref);
+ if (err < 0)
+ return err;
+ err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx);
+ if (err < 0)
+ return err;
+
+ /* Default output parameters match input parameters. */
+ s->input_format = input_frames->sw_format;
+ if (s->output_format == AV_PIX_FMT_NONE)
+ s->output_format = input_frames->sw_format;
+ if (!s->output_width)
+ s->output_width = inlink->w;
+ if (!s->output_height)
+ s->output_height = inlink->h;
+
+ return 0;
+}
+
+int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ VulkanFilterContext *s = avctx->priv;
+
+ av_buffer_unref(&outlink->hw_frames_ctx);
+
+ if (!s->device_ref) {
+ if (!avctx->hw_device_ctx) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "Vulkan device.\n");
+ return AVERROR(EINVAL);
+ }
+
+ err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
+ if (err < 0)
+ return err;
+ }
+
+ outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref);
+ outlink->w = s->output_width;
+ outlink->h = s->output_height;
+
+ return 0;
+}
+
+int ff_vk_filter_config_output(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ VulkanFilterContext *s = avctx->priv;
+ AVBufferRef *output_frames_ref;
+ AVHWFramesContext *output_frames;
+
+ av_buffer_unref(&outlink->hw_frames_ctx);
+
+ if (!s->device_ref) {
+ if (!avctx->hw_device_ctx) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "Vulkan device.\n");
+ return AVERROR(EINVAL);
+ }
+
+ err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
+ if (err < 0)
+ return err;
+ }
+
+ output_frames_ref = av_hwframe_ctx_alloc(s->device_ref);
+ if (!output_frames_ref) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ output_frames = (AVHWFramesContext*)output_frames_ref->data;
+
+ output_frames->format = AV_PIX_FMT_VULKAN;
+ output_frames->sw_format = s->output_format;
+ output_frames->width = s->output_width;
+ output_frames->height = s->output_height;
+
+ err = av_hwframe_ctx_init(output_frames_ref);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to initialise output "
+ "frames: %d.\n", err);
+ goto fail;
+ }
+
+ outlink->hw_frames_ctx = output_frames_ref;
+ outlink->w = s->output_width;
+ outlink->h = s->output_height;
+
+ return 0;
+fail:
+ av_buffer_unref(&output_frames_ref);
+ return err;
+}
+
+int ff_vk_filter_init(AVFilterContext *avctx)
+{
+ VulkanFilterContext *s = avctx->priv;
+ const shaderc_env_version opt_ver = shaderc_env_version_vulkan_1_1;
+ const shaderc_optimization_level opt_lvl = shaderc_optimization_level_performance;
+
+ s->output_format = AV_PIX_FMT_NONE;
+
+ s->sc_compiler = shaderc_compiler_initialize();
+ if (!s->sc_compiler)
+ return AVERROR_EXTERNAL;
+
+ s->sc_opts = shaderc_compile_options_initialize();
+ if (!s->sc_compiler)
+ return AVERROR_EXTERNAL;
+
+ shaderc_compile_options_set_target_env(s->sc_opts,
+ shaderc_target_env_vulkan,
+ opt_ver);
+ shaderc_compile_options_set_optimization_level(s->sc_opts, opt_lvl);
+
+ return 0;
+}
+
+void ff_vk_filter_uninit(AVFilterContext *avctx)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ shaderc_compile_options_release(s->sc_opts);
+ shaderc_compiler_release(s->sc_compiler);
+
+ for (int i = 0; i < s->shaders_num; i++) {
+ SPIRVShader *shd = &s->shaders[i];
+ vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
+ s->hwctx->alloc);
+ }
+
+ if (s->pipeline != VK_NULL_HANDLE)
+ vkDestroyPipeline(s->hwctx->act_dev, s->pipeline, s->hwctx->alloc);
+ if (s->pipeline_layout != VK_NULL_HANDLE)
+ vkDestroyPipelineLayout(s->hwctx->act_dev, s->pipeline_layout,
+ s->hwctx->alloc);
+
+ for (int i = 0; i < s->samplers_num; i++) {
+ VulkanSampler *sampler = &s->samplers[i];
+ VK_LOAD_PFN(s->hwctx->inst, vkDestroySamplerYcbcrConversionKHR);
+ vkDestroySampler(s->hwctx->act_dev, sampler->sampler, s->hwctx->alloc);
+ pfn_vkDestroySamplerYcbcrConversionKHR(s->hwctx->act_dev,
+ sampler->yuv_conv.conversion,
+ s->hwctx->alloc);
+ }
+
+ ff_vk_free_buf(avctx, &s->vbuffer);
+
+ for (int i = 0; i < s->descriptor_sets_num; i++) {
+ VK_LOAD_PFN(s->hwctx->inst, vkDestroyDescriptorUpdateTemplateKHR);
+ pfn_vkDestroyDescriptorUpdateTemplateKHR(s->hwctx->act_dev,
+ s->desc_template[i],
+ s->hwctx->alloc);
+ vkDestroyDescriptorSetLayout(s->hwctx->act_dev, s->desc_layout[i],
+ s->hwctx->alloc);
+ }
+
+ if (s->desc_pool != VK_NULL_HANDLE)
+ vkDestroyDescriptorPool(s->hwctx->act_dev, s->desc_pool,
+ s->hwctx->alloc);
+
+ av_freep(&s->desc_layout);
+ av_freep(&s->pool_size_desc);
+ av_freep(&s->shaders);
+ av_freep(&s->samplers);
+ av_buffer_unref(&s->device_ref);
+ av_buffer_unref(&s->frames_ref);
+
+ /* Only freed in case of failure */
+ av_freep(&s->push_consts);
+ av_freep(&s->pool_size_desc);
+ if (s->desc_template_info) {
+ for (int i = 0; i < s->descriptor_sets_num; i++)
+ av_free((void *)s->desc_template_info[i].pDescriptorUpdateEntries);
+ av_freep(&s->desc_template_info);
+ }
+}
+
+SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, const char *name,
+ VkShaderStageFlags stage)
+{
+ SPIRVShader *shd;
+ VulkanFilterContext *s = avctx->priv;
+
+ s->shaders = av_realloc_array(s->shaders, sizeof(*s->shaders),
+ s->shaders_num + 1);
+ if (!s->shaders)
+ return NULL;
+
+ shd = &s->shaders[s->shaders_num++];
+ memset(shd, 0, sizeof(*shd));
+ av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ shd->shader.stage = stage;
+
+ shd->name = name;
+
+ GLSLF(0, #version %i ,460);
+ GLSLC(0, #define AREA(v) ((v).x*(v).y) );
+ GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
+ GLSLC(0, );
+
+ return shd;
+}
+
+void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
+ int local_size[3])
+{
+ shd->local_size[0] = local_size[0];
+ shd->local_size[1] = local_size[1];
+ shd->local_size[2] = local_size[2];
+
+ av_bprintf(&shd->src, "layout (local_size_x = %i, "
+ "local_size_y = %i, local_size_z = %i) in;\n",
+ shd->local_size[0], shd->local_size[1], shd->local_size[2]);
+}
+
+static void print_shader(AVFilterContext *avctx, SPIRVShader *shd)
+{
+ int line = 0;
+ const char *p = shd->src.str;
+ const char *start = p;
+
+ AVBPrint buf;
+ av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ for (int i = 0; i < strlen(p); i++) {
+ if (p[i] == '\n') {
+ av_bprintf(&buf, "%i\t", ++line);
+ av_bprint_append_data(&buf, start, &p[i] - start + 1);
+ start = &p[i + 1];
+ }
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Compiling shader %s: \n%s\n",
+ shd->name, buf.str);
+ av_bprint_finalize(&buf, NULL);
+}
+
+int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
+ const char *entry)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VkShaderModuleCreateInfo shader_create;
+
+ shaderc_compilation_result_t res;
+ static const shaderc_shader_kind type_map[] = {
+ [VK_SHADER_STAGE_VERTEX_BIT] = shaderc_vertex_shader,
+ [VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT] = shaderc_tess_control_shader,
+ [VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT] = shaderc_tess_evaluation_shader,
+ [VK_SHADER_STAGE_GEOMETRY_BIT] = shaderc_geometry_shader,
+ [VK_SHADER_STAGE_FRAGMENT_BIT] = shaderc_fragment_shader,
+ [VK_SHADER_STAGE_COMPUTE_BIT] = shaderc_compute_shader,
+ };
+
+ shd->shader.pName = entry;
+
+ print_shader(avctx, shd);
+
+ res = shaderc_compile_into_spv(s->sc_compiler, shd->src.str, shd->src.len,
+ type_map[shd->shader.stage], shd->name,
+ entry, s->sc_opts);
+ av_bprint_finalize(&shd->src, NULL);
+
+ if (shaderc_result_get_compilation_status(res) !=
+ shaderc_compilation_status_success) {
+ av_log(avctx, AV_LOG_ERROR, "%s", shaderc_result_get_error_message(res));
+ return AVERROR_EXTERNAL;
+ }
+
+ shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ shader_create.pNext = NULL;
+ shader_create.codeSize = shaderc_result_get_length(res);
+ shader_create.flags = 0;
+ shader_create.pCode = (const uint32_t *)shaderc_result_get_bytes(res);
+
+ ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
+ &shd->shader.module);
+ shaderc_result_release(res);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Shader linked! Size: %zu bytes\n",
+ shader_create.codeSize);
+
+ return 0;
+}
+
+static VkSamplerYcbcrModelConversion conv_primaries(enum AVColorPrimaries color_primaries)
+{
+ switch(color_primaries) {
+ case AVCOL_PRI_BT470BG:
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
+ case AVCOL_PRI_BT709:
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709;
+ case AVCOL_PRI_BT2020:
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020;
+ }
+ /* Just assume its 709 */
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709;
+}
+
+const VulkanSampler *ff_vk_init_sampler(AVFilterContext *avctx, AVFrame *input,
+ int unnorm_coords, VkFilter filt)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VulkanSampler *sampler;
+
+ VkSamplerCreateInfo sampler_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = filt,
+ .minFilter = sampler_info.magFilter,
+ .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
+ VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeV = sampler_info.addressModeU,
+ .addressModeW = sampler_info.addressModeU,
+ .anisotropyEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
+ .unnormalizedCoordinates = unnorm_coords,
+ };
+
+ s->samplers = av_realloc_array(s->samplers, sizeof(*s->samplers),
+ s->samplers_num + 1);
+ if (!s->samplers)
+ return NULL;
+
+ sampler = &s->samplers[s->samplers_num++];
+ memset(sampler, 0, sizeof(*sampler));
+
+ sampler->converting = !!input;
+ sampler->yuv_conv.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO;
+
+ if (input) {
+ VkSamplerYcbcrConversion *conv = &sampler->yuv_conv.conversion;
+ VkComponentMapping comp_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ VkSamplerYcbcrConversionCreateInfo c_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO,
+ .format = av_vkfmt_from_pixfmt(s->input_format),
+ .chromaFilter = VK_FILTER_LINEAR,
+ .ycbcrModel = conv_primaries(input->color_primaries),
+ .ycbcrRange = input->color_range == AVCOL_RANGE_JPEG ?
+ VK_SAMPLER_YCBCR_RANGE_ITU_FULL :
+ VK_SAMPLER_YCBCR_RANGE_ITU_NARROW,
+ .xChromaOffset = input->chroma_location == AVCHROMA_LOC_CENTER ?
+ VK_CHROMA_LOCATION_MIDPOINT :
+ VK_CHROMA_LOCATION_COSITED_EVEN,
+ .components = comp_map,
+ };
+
+ VK_LOAD_PFN(s->hwctx->inst, vkCreateSamplerYcbcrConversionKHR);
+
+ sampler_info.pNext = &sampler->yuv_conv;
+
+ if (unnorm_coords) {
+ av_log(avctx, AV_LOG_ERROR, "Cannot create a converting sampler "
+ "with unnormalized addressing, forbidden by spec!\n");
+ return NULL;
+ }
+
+ ret = pfn_vkCreateSamplerYcbcrConversionKHR(s->hwctx->act_dev, &c_info,
+ s->hwctx->alloc, conv);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init conversion: %s\n",
+ ff_vk_ret2str(ret));
+ return NULL;
+ }
+ }
+
+ ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info,
+ s->hwctx->alloc, &sampler->sampler);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
+ ff_vk_ret2str(ret));
+ return NULL;
+ }
+
+ return sampler;
+}
+
+int ff_vk_add_push_constant(AVFilterContext *avctx, int offset, int size,
+ VkShaderStageFlagBits stage)
+{
+ VkPushConstantRange *pc;
+ VulkanFilterContext *s = avctx->priv;
+
+ s->push_consts = av_realloc_array(s->push_consts, sizeof(*s->push_consts),
+ s->push_consts_num + 1);
+ if (!s->push_consts)
+ return AVERROR(ENOMEM);
+
+ pc = &s->push_consts[s->push_consts_num++];
+ memset(pc, 0, sizeof(*pc));
+
+ pc->stageFlags = stage;
+ pc->offset = offset;
+ pc->size = size;
+
+ return s->push_consts_num - 1;
+}
+
+static const struct descriptor_props {
+ size_t struct_size; /* Size of the opaque which updates the descriptor */
+ const char *type;
+ int is_uniform;
+ int mem_quali; /* Can use a memory qualifier */
+ int dim_needed; /* Must indicate dimension */
+ int buf_content; /* Must indicate buffer contents */
+} descriptor_props[] = {
+ [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
+};
+
+int ff_vk_add_descriptor_set(AVFilterContext *avctx, SPIRVShader *shd,
+ VulkanDescriptorSetBinding *desc, int num,
+ int only_print_to_shader)
+{
+ VkResult ret;
+ VkDescriptorSetLayout *layout;
+ VulkanFilterContext *s = avctx->priv;
+
+ if (only_print_to_shader)
+ goto print;
+
+ s->desc_layout = av_realloc_array(s->desc_layout, sizeof(*s->desc_layout),
+ s->descriptor_sets_num + 1);
+ if (!s->desc_layout)
+ return AVERROR(ENOMEM);
+
+ layout = &s->desc_layout[s->descriptor_sets_num];
+ memset(layout, 0, sizeof(*layout));
+
+ { /* Create descriptor set layout descriptions */
+ VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
+ VkDescriptorSetLayoutBinding *desc_binding;
+
+ desc_binding = av_mallocz(sizeof(*desc_binding)*num);
+ if (!desc_binding)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < num; i++) {
+ desc_binding[i].binding = i;
+ desc_binding[i].descriptorType = desc[i].type;
+ desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ desc_binding[i].stageFlags = desc[i].stages;
+ desc_binding[i].pImmutableSamplers = desc[i].samplers;
+ }
+
+ desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+ desc_create_layout.pBindings = desc_binding;
+ desc_create_layout.bindingCount = num;
+
+ ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
+ s->hwctx->alloc, layout);
+ av_free(desc_binding);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
+ "layout: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Pool each descriptor by type and update pool counts */
+ for (int i = 0; i < num; i++) {
+ int j;
+ for (j = 0; j < s->pool_size_desc_num; j++)
+ if (s->pool_size_desc[j].type == desc[i].type)
+ break;
+ if (j >= s->pool_size_desc_num) {
+ s->pool_size_desc = av_realloc_array(s->pool_size_desc,
+ sizeof(*s->pool_size_desc),
+ ++s->pool_size_desc_num);
+ if (!s->pool_size_desc)
+ return AVERROR(ENOMEM);
+ memset(&s->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
+ }
+ s->pool_size_desc[j].type = desc[i].type;
+ s->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1);
+ }
+ }
+
+ { /* Create template creation struct */
+ VkDescriptorUpdateTemplateCreateInfo *dt;
+ VkDescriptorUpdateTemplateEntry *des_entries;
+
+ /* Freed after descriptor set initialization */
+ des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
+ if (!des_entries)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < num; i++) {
+ des_entries[i].dstBinding = i;
+ des_entries[i].descriptorType = desc[i].type;
+ des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ des_entries[i].dstArrayElement = 0;
+ des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
+ des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
+ }
+
+ s->desc_template_info = av_realloc_array(s->desc_template_info,
+ sizeof(*s->desc_template_info),
+ s->descriptor_sets_num + 1);
+ if (!s->desc_layout)
+ return AVERROR(ENOMEM);
+
+ dt = &s->desc_template_info[s->descriptor_sets_num];
+ memset(dt, 0, sizeof(*dt));
+
+ dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
+ dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
+ dt->descriptorSetLayout = *layout;
+ dt->pDescriptorUpdateEntries = des_entries;
+ dt->descriptorUpdateEntryCount = num;
+ }
+
+ s->descriptor_sets_num++;
+
+print:
+ /* Write shader info */
+ for (int i = 0; i < num; i++) {
+ const struct descriptor_props *prop = &descriptor_props[desc[i].type];
+ GLSLA("layout (set = %i, binding = %i", s->descriptor_sets_num - 1, i);
+
+ if (desc[i].mem_layout)
+ GLSLA(", %s", desc[i].mem_layout);
+ GLSLA(")");
+
+ if (prop->is_uniform)
+ GLSLA(" uniform");
+
+ if (prop->mem_quali && desc[i].mem_quali)
+ GLSLA(" %s", desc[i].mem_quali);
+
+ if (prop->type)
+ GLSLA(" %s", prop->type);
+
+ if (prop->dim_needed)
+ GLSLA("%iD", desc[i].dimensions);
+
+ GLSLA(" %s", desc[i].name);
+
+ if (prop->buf_content)
+ GLSLA(" {\n %s\n}", desc[i].buf_content);
+ else if (desc[i].elems > 0)
+ GLSLA("[%i]", desc[i].elems);
+
+ GLSLA(";\n");
+ }
+
+ return 0;
+}
+
+void ff_vk_update_descriptor_set(AVFilterContext *avctx, int set_id)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ VK_LOAD_PFN(s->hwctx->inst, vkUpdateDescriptorSetWithTemplateKHR);
+ pfn_vkUpdateDescriptorSetWithTemplateKHR(s->hwctx->act_dev,
+ s->desc_set[set_id],
+ s->desc_template[set_id], s);
+}
+
+const enum VkImageAspectFlagBits ff_vk_aspect_flags(enum AVPixelFormat pixfmt,
+ int plane)
+{
+ const int tot_planes = av_pix_fmt_count_planes(pixfmt);
+ static const enum VkImageAspectFlagBits m[] = { VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+ if (!tot_planes || (plane > tot_planes))
+ return 0;
+ if (tot_planes == 1)
+ return VK_IMAGE_ASPECT_COLOR_BIT;
+ if (plane < 0)
+ return m[0] | m[1] | (tot_planes > 2 ? m[2] : 0);
+ return m[plane];
+}
+
+const VkFormat ff_vk_plane_rep_fmt(enum AVPixelFormat pixfmt, int plane)
+{
+ const int tot_planes = av_pix_fmt_count_planes(pixfmt);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
+ const int high = desc->comp[plane].depth > 8;
+ if (tot_planes == 1) { /* RGB, etc.'s singleplane rep is itself */
+ return av_vkfmt_from_pixfmt(pixfmt);
+ } else if (tot_planes == 2) { /* Must be NV12 or P010 */
+ if (!high)
+ return !plane ? VK_FORMAT_R8_UNORM : VK_FORMAT_R8G8_UNORM;
+ else
+ return !plane ? VK_FORMAT_R16_UNORM : VK_FORMAT_R16G16_UNORM;
+ } else { /* Regular planar YUV */
+ return !high ? VK_FORMAT_R8_UNORM : VK_FORMAT_R16_UNORM;
+ }
+}
+
+const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
+{
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
+ const int high = desc->comp[0].depth > 8;
+ return high ? "rgba16f" : "rgba8";
+}
+
+int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, AVVkFrame *f,
+ VkFormat fmt, enum VkImageAspectFlagBits aspect,
+ VkComponentMapping map, const void *pnext)
+{
+ VulkanFilterContext *s = avctx->priv;
+ VkImageViewCreateInfo imgview_spawn = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = pnext,
+ .image = f->img,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = fmt,
+ .components = map,
+ .subresourceRange = {
+ .aspectMask = aspect,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+
+ VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn,
+ s->hwctx->alloc, v);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView v)
+{
+ VulkanFilterContext *s = avctx->priv;
+ vkDestroyImageView(s->hwctx->act_dev, v, s->hwctx->alloc);
+}
+
+int ff_vk_init_pipeline_layout(AVFilterContext *avctx)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ { /* Init descriptor set pool */
+ VkDescriptorPoolCreateInfo pool_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .poolSizeCount = s->pool_size_desc_num,
+ .pPoolSizes = s->pool_size_desc,
+ .maxSets = s->descriptor_sets_num,
+ };
+
+ ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
+ s->hwctx->alloc, &s->desc_pool);
+ av_freep(&s->pool_size_desc);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
+ "pool: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Allocate descriptor sets */
+ VkDescriptorSetAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = s->desc_pool,
+ .descriptorSetCount = s->descriptor_sets_num,
+ .pSetLayouts = s->desc_layout,
+ };
+
+ s->desc_set = av_malloc(s->descriptor_sets_num*sizeof(*s->desc_set));
+ if (!s->desc_set)
+ return AVERROR(ENOMEM);
+
+ ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
+ s->desc_set);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Finally create the pipeline layout */
+ VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = s->descriptor_sets_num,
+ .pSetLayouts = s->desc_layout,
+ .pushConstantRangeCount = s->push_consts_num,
+ .pPushConstantRanges = s->push_consts,
+ };
+
+ ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
+ s->hwctx->alloc, &s->pipeline_layout);
+ av_freep(&s->push_consts);
+ s->push_consts_num = 0;
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Descriptor template (for tightly packed descriptors) */
+ VK_LOAD_PFN(s->hwctx->inst, vkCreateDescriptorUpdateTemplateKHR);
+ VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
+
+ s->desc_template = av_malloc(s->descriptor_sets_num*sizeof(*s->desc_template));
+ if (!s->desc_template)
+ return AVERROR(ENOMEM);
+
+ /* Create update templates for the descriptor sets */
+ for (int i = 0; i < s->descriptor_sets_num; i++) {
+ desc_template_info = &s->desc_template_info[i];
+ desc_template_info->pipelineLayout = s->pipeline_layout;
+ ret = pfn_vkCreateDescriptorUpdateTemplateKHR(s->hwctx->act_dev,
+ desc_template_info,
+ s->hwctx->alloc,
+ &s->desc_template[i]);
+ av_free((void *)desc_template_info->pDescriptorUpdateEntries);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
+ "template: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ av_freep(&s->desc_template_info);
+ }
+
+ return 0;
+}
+
+int ff_vk_init_compute_pipeline(AVFilterContext *avctx)
+{
+ int i;
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkComputePipelineCreateInfo pipe = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .layout = s->pipeline_layout,
+ };
+
+ for (i = 0; i < s->shaders_num; i++) {
+ if (s->shaders[i].shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
+ pipe.stage = s->shaders[i].shader;
+ break;
+ }
+ }
+ if (i == s->shaders_num) {
+ av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
+ return AVERROR(EINVAL);
+ }
+
+ ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
+ s->hwctx->alloc, &s->pipeline);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
diff --git a/libavfilter/vulkan.h b/libavfilter/vulkan.h
new file mode 100644
index 0000000000..cac06f6920
--- /dev/null
+++ b/libavfilter/vulkan.h
@@ -0,0 +1,223 @@
+/*
+ * Vulkan utilities
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VULKAN_COMMON_H
+#define AVFILTER_VULKAN_COMMON_H
+
+#include "avfilter.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/bprint.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_vulkan.h"
+
+#include <shaderc/shaderc.h>
+
+/* GLSL management macros */
+#define INDENT(N) INDENT_##N
+#define INDENT_0
+#define INDENT_1 INDENT_0 " "
+#define INDENT_2 INDENT_1 INDENT_1
+#define INDENT_3 INDENT_2 INDENT_1
+#define INDENT_4 INDENT_3 INDENT_1
+#define INDENT_5 INDENT_4 INDENT_1
+#define INDENT_6 INDENT_5 INDENT_1
+#define C(N, S) INDENT(N) #S "\n"
+#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S))
+#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__)
+#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__)
+#define GLSLD(D) GLSLC(0, ); \
+ av_bprint_append_data(&shd->src, D, strlen(D)); \
+ GLSLC(0, )
+
+/* Helper, pretty much every Vulkan return value needs to be checked */
+#define RET(x) \
+ do { \
+ if ((err = (x)) < 0) \
+ goto fail; \
+ } while (0)
+
+/* Useful for attaching immutable samplers to arrays */
+#define DUP_SAMPLER_ARRAY4(x) (const VkSampler []){ x, x, x, x, }
+
+typedef struct SPIRVShader {
+ const char *name; /* Name for id/debugging purposes */
+ AVBPrint src;
+ int local_size[3]; /* Compute shader workgroup sizes */
+ VkPipelineShaderStageCreateInfo shader;
+} SPIRVShader;
+
+typedef struct VulkanDescriptorSetBinding {
+ const char *name;
+ VkDescriptorType type;
+ const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */
+ const char *mem_quali; /* readonly, writeonly, etc. */
+ const char *buf_content; /* For buffers */
+ uint32_t dimensions; /* Needed for e.g. sampler%iD */
+ uint32_t elems; /* 0 - scalar, 1 or more - vector */
+ VkShaderStageFlags stages;
+ const VkSampler *samplers; /* Immutable samplers, length - #elems */
+ void *updater;
+} VulkanDescriptorSetBinding;
+
+typedef struct VulkanSampler {
+ VkSampler sampler;
+ VkSamplerYcbcrConversionInfo yuv_conv; /* For imageview creation */
+ int converting; /* Indicates whether sampler is a converting one */
+} VulkanSampler;
+
+typedef struct FFVkExecContext {
+ VkCommandPool pool;
+ VkCommandBuffer buf;
+ VkQueue queue;
+ VkFence fence;
+} FFVkExecContext;
+
+typedef struct FFVkBuffer {
+ VkBuffer buf;
+ VkDeviceMemory mem;
+ VkMemoryPropertyFlagBits flags;
+} FFVkBuffer;
+
+typedef struct VulkanFilterContext {
+ const AVClass *class;
+
+ AVBufferRef *device_ref;
+ AVBufferRef *frames_ref; /* For in-place filtering */
+ AVHWDeviceContext *device;
+ AVVulkanDeviceContext *hwctx;
+
+ /* Properties */
+ int output_width;
+ int output_height;
+ enum AVPixelFormat output_format;
+ enum AVPixelFormat input_format;
+
+ /* Samplers */
+ VulkanSampler *samplers;
+ int samplers_num;
+
+ /* Shaders */
+ SPIRVShader *shaders;
+ int shaders_num;
+ shaderc_compiler_t sc_compiler;
+ shaderc_compile_options_t sc_opts;
+
+ /* Contexts */
+ VkRenderPass renderpass;
+ VkPipelineLayout pipeline_layout;
+ VkPipeline pipeline;
+
+ /* Descriptors */
+ VkDescriptorSetLayout *desc_layout;
+ VkDescriptorPool desc_pool;
+ VkDescriptorSet *desc_set;
+ VkDescriptorUpdateTemplate *desc_template;
+ int push_consts_num;
+ int descriptor_sets_num;
+ int pool_size_desc_num;
+
+ /* Vertex buffer */
+ FFVkBuffer vbuffer;
+ int num_verts;
+
+ /* Temporary, used to store data in between initialization stages */
+ VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
+ VkDescriptorPoolSize *pool_size_desc;
+ VkPushConstantRange *push_consts;
+ void *scratch; /* Scratch memory used only in functions */
+ unsigned int scratch_size;
+} VulkanFilterContext;
+
+/* Generic memory allocation.
+ * Will align size to the minimum map alignment requirement in case req_flags
+ * has VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT set */
+int ff_vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+
+/* Buffer I/O */
+int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
+int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate);
+int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
+ int flush);
+void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf);
+
+/* Command context init/uninit */
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, int queue);
+void ff_vk_free_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e);
+
+/* Converts Vulkan return values to strings */
+const char *ff_vk_ret2str(VkResult res);
+
+/* Create a Vulkan sampler, if input isn't NULL the sampler will convert to RGB */
+const VulkanSampler *ff_vk_init_sampler(AVFilterContext *avctx, AVFrame *input,
+ int unnorm_coords, VkFilter filt);
+
+/* Gets the single-plane representation format */
+const VkFormat ff_vk_plane_rep_fmt(enum AVPixelFormat pixfmt, int plane);
+/* Gets the glsl format for an image */
+const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
+/* Gets the image aspect flags of a plane */
+const enum VkImageAspectFlagBits ff_vk_aspect_flags(enum AVPixelFormat pixfmt,
+ int plane);
+/* Creates an imageview */
+int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, AVVkFrame *f,
+ VkFormat fmt, enum VkImageAspectFlagBits aspect,
+ VkComponentMapping map, const void *pnext);
+/* Destroys an imageview */
+void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView v);
+/* Creates a shader */
+SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, const char *name,
+ VkShaderStageFlags stage);
+/* For compute shaders, defines the workgroup size */
+void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
+ int local_size[3]);
+/* Compiles a completed shader into a module */
+int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
+ const char *entry);
+
+/* Needs to be abstracted so it adds them to a certain pipeline layout */
+int ff_vk_add_descriptor_set(AVFilterContext *avctx, SPIRVShader *shd,
+ VulkanDescriptorSetBinding *desc, int num,
+ int only_print_to_shader);
+int ff_vk_add_push_constant(AVFilterContext *avctx, int offset, int size,
+ VkShaderStageFlagBits stage);
+
+/* Creates a Vulkan pipeline layout */
+int ff_vk_init_pipeline_layout(AVFilterContext *avctx);
+
+/* Creates a compute pipeline */
+int ff_vk_init_compute_pipeline(AVFilterContext *avctx);
+
+/* Updates a given descriptor set after pipeline initialization */
+void ff_vk_update_descriptor_set(AVFilterContext *avctx, int set_id);
+
+/* General lavfi IO functions */
+int ff_vk_filter_query_formats (AVFilterContext *avctx);
+int ff_vk_filter_init (AVFilterContext *avctx);
+int ff_vk_filter_config_input (AVFilterLink *inlink);
+int ff_vk_filter_config_output (AVFilterLink *outlink);
+int ff_vk_filter_config_output_inplace(AVFilterLink *outlink);
+void ff_vk_filter_uninit (AVFilterContext *avctx);
+
+#endif /* AVFILTER_VULKAN_COMMON_H */
--
2.17.0
James Almer
2018-05-22 03:49:56 UTC
Permalink
Post by Rostislav Pehlivanov
This commit adds a common code for use in Vulkan filters. It attempts
to ease the burden of writing Vulkan image filtering to a minimum,
which is pretty much a requirement considering how verbose the API is.
It supports both compute and graphic pipelines and manages to abstract
the API to such a level there's no need to call any Vulkan functions
inside the init path of the code. Handling shader descriptors is probably
the bulk of the code, and despite the abstraction, it loses none of the
features for describing shader IO.
In order to produce linkable shaders, it depends on the libshaderc
library (and depends on the latest stable version of it). This allows
for greater performance and flexibility than static built-in shaders
and also eliminates the cumbersome process of interfacing with glslang
to compile GLSL to SPIR-V.
It's based off of the common opencl and provides similar interfaces for
filter pad init and config, with the addition that it also supports
in-place filtering.
---
configure | 10 +-
libavfilter/vulkan.c | 1186 ++++++++++++++++++++++++++++++++++++++++++
libavfilter/vulkan.h | 223 ++++++++
3 files changed, 1418 insertions(+), 1 deletion(-)
create mode 100644 libavfilter/vulkan.c
create mode 100644 libavfilter/vulkan.h
diff --git a/configure b/configure
index 5f4407b753..52c1e7a6e8 100755
--- a/configure
+++ b/configure
--enable-librsvg enable SVG rasterization via librsvg [no]
--enable-librubberband enable rubberband needed for rubberband filter [no]
--enable-librtmp enable RTMP[E] support via librtmp [no]
+ --enable-libshaderc enable GLSL->SPIRV compilation via libshaderc [no]
--enable-libshine enable fixed-point MP3 encoding via libshine [no]
--enable-libsmbclient enable Samba protocol via libsmbclient [no]
--enable-libsnappy enable Snappy compression, needed for hap encoding [no]
@@ -1707,6 +1708,7 @@ EXTERNAL_LIBRARY_LIST="
libpulse
librsvg
librtmp
+ libshaderc
libshine
libsmbclient
libsnappy
@@ -2225,6 +2227,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ shaderc_opt_perf
vulkan_drm_mod
perl
pod2man
@@ -3461,7 +3464,7 @@ avformat_deps="avcodec avutil"
avformat_suggest="libm network zlib"
avresample_deps="avutil"
avresample_suggest="libm"
-avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
+avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl vulkan libshaderc user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
libshaderc is not being used by avutil. You should add it to avfilter
instead.
Post by Rostislav Pehlivanov
postproc_deps="avutil gpl"
postproc_suggest="libm"
swresample_deps="avutil"
@@ -6050,6 +6053,7 @@ enabled libpulse && require_pkg_config libpulse libpulse pulse/pulseaud
enabled librsvg && require_pkg_config librsvg librsvg-2.0 librsvg-2.0/librsvg/rsvg.h rsvg_handle_render_cairo
enabled librtmp && require_pkg_config librtmp librtmp librtmp/rtmp.h RTMP_Socket
enabled librubberband && require_pkg_config librubberband "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new -lstdc++ && append librubberband_extralibs "-lstdc++"
+enabled libshaderc && require libshaderc shaderc/shaderc.h shaderc_compiler_initialize -lshaderc_shared
enabled libshine && require_pkg_config libshine shine shine/layer3.h shine_encode_buffer
enabled libsmbclient && { check_pkg_config libsmbclient smbclient libsmbclient.h smbc_init ||
require libsmbclient libsmbclient.h smbc_init -lsmbclient; }
@@ -6355,6 +6359,10 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.
enabled vulkan &&
require_pkg_config vulkan "vulkan >= 1.1.73" "vulkan/vulkan.h" vkCreateInstance
+if enabled_all vulkan libshaderc ; then
+ check_cc shaderc_opt_perf shaderc/shaderc.h "int t = shaderc_optimization_level_performance"
You don't seem to be using shaderc_opt_perf anywhere, be it this patch
or the following ones.
Post by Rostislav Pehlivanov
+fi
+
if enabled_all vulkan libdrm ; then
check_cpp_condition vulkan_drm_mod vulkan/vulkan.h "defined VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME"
fi
Rostislav Pehlivanov
2018-05-22 04:28:33 UTC
Permalink
This commit adds a common code for use in Vulkan filters. It attempts
to ease the burden of writing Vulkan image filtering to a minimum,
which is pretty much a requirement considering how verbose the API is.

It supports both compute and graphic pipelines and manages to abstract
the API to such a level there's no need to call any Vulkan functions
inside the init path of the code. Handling shader descriptors is probably
the bulk of the code, and despite the abstraction, it loses none of the
features for describing shader IO.

In order to produce linkable shaders, it depends on the libshaderc
library (and depends on the latest stable version of it). This allows
for greater performance and flexibility than static built-in shaders
and also eliminates the cumbersome process of interfacing with glslang
to compile GLSL to SPIR-V.

It's based off of the common opencl and provides similar interfaces for
filter pad init and config, with the addition that it also supports
in-place filtering.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 12 +-
libavfilter/vulkan.c | 1190 ++++++++++++++++++++++++++++++++++++++++++
libavfilter/vulkan.h | 223 ++++++++
3 files changed, 1423 insertions(+), 2 deletions(-)
create mode 100644 libavfilter/vulkan.c
create mode 100644 libavfilter/vulkan.h

diff --git a/configure b/configure
index 5f4407b753..abcfe32625 100755
--- a/configure
+++ b/configure
@@ -252,6 +252,7 @@ External library support:
--enable-librsvg enable SVG rasterization via librsvg [no]
--enable-librubberband enable rubberband needed for rubberband filter [no]
--enable-librtmp enable RTMP[E] support via librtmp [no]
+ --enable-libshaderc enable GLSL->SPIRV compilation via libshaderc [no]
--enable-libshine enable fixed-point MP3 encoding via libshine [no]
--enable-libsmbclient enable Samba protocol via libsmbclient [no]
--enable-libsnappy enable Snappy compression, needed for hap encoding [no]
@@ -1707,6 +1708,7 @@ EXTERNAL_LIBRARY_LIST="
libpulse
librsvg
librtmp
+ libshaderc
libshine
libsmbclient
libsnappy
@@ -2225,6 +2227,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ shaderc_opt_perf
vulkan_drm_mod
perl
pod2man
@@ -3456,12 +3459,12 @@ avcodec_select="null_bsf"
avdevice_deps="avformat avcodec avutil"
avdevice_suggest="libm"
avfilter_deps="avutil"
-avfilter_suggest="libm"
+avfilter_suggest="libm libshaderc"
avformat_deps="avcodec avutil"
avformat_suggest="libm network zlib"
avresample_deps="avutil"
avresample_suggest="libm"
-avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
+avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl vulkan user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
postproc_deps="avutil gpl"
postproc_suggest="libm"
swresample_deps="avutil"
@@ -6050,6 +6053,7 @@ enabled libpulse && require_pkg_config libpulse libpulse pulse/pulseaud
enabled librsvg && require_pkg_config librsvg librsvg-2.0 librsvg-2.0/librsvg/rsvg.h rsvg_handle_render_cairo
enabled librtmp && require_pkg_config librtmp librtmp librtmp/rtmp.h RTMP_Socket
enabled librubberband && require_pkg_config librubberband "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new -lstdc++ && append librubberband_extralibs "-lstdc++"
+enabled libshaderc && require libshaderc shaderc/shaderc.h shaderc_compiler_initialize -lshaderc_shared
enabled libshine && require_pkg_config libshine shine shine/layer3.h shine_encode_buffer
enabled libsmbclient && { check_pkg_config libsmbclient smbclient libsmbclient.h smbc_init ||
require libsmbclient libsmbclient.h smbc_init -lsmbclient; }
@@ -6355,6 +6359,10 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.
enabled vulkan &&
require_pkg_config vulkan "vulkan >= 1.1.73" "vulkan/vulkan.h" vkCreateInstance

+if enabled_all vulkan libshaderc ; then
+ check_cc shaderc_opt_perf shaderc/shaderc.h "int t = shaderc_optimization_level_performance"
+fi
+
if enabled_all vulkan libdrm ; then
check_cpp_condition vulkan_drm_mod vulkan/vulkan.h "defined VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME"
fi
diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c
new file mode 100644
index 0000000000..7954c6f665
--- /dev/null
+++ b/libavfilter/vulkan.c
@@ -0,0 +1,1190 @@
+/*
+ * Vulkan utilities
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "formats.h"
+#include "vulkan.h"
+
+#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
+ vkGetInstanceProcAddr(inst, #name)
+
+/* Converts return values to strings */
+const char *ff_vk_ret2str(VkResult res)
+{
+#define CASE(VAL) case VAL: return #VAL
+ switch (res) {
+ CASE(VK_SUCCESS);
+ CASE(VK_NOT_READY);
+ CASE(VK_TIMEOUT);
+ CASE(VK_EVENT_SET);
+ CASE(VK_EVENT_RESET);
+ CASE(VK_INCOMPLETE);
+ CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
+ CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ CASE(VK_ERROR_INITIALIZATION_FAILED);
+ CASE(VK_ERROR_DEVICE_LOST);
+ CASE(VK_ERROR_MEMORY_MAP_FAILED);
+ CASE(VK_ERROR_LAYER_NOT_PRESENT);
+ CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
+ CASE(VK_ERROR_FEATURE_NOT_PRESENT);
+ CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
+ CASE(VK_ERROR_TOO_MANY_OBJECTS);
+ CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
+ CASE(VK_ERROR_FRAGMENTED_POOL);
+ CASE(VK_ERROR_SURFACE_LOST_KHR);
+ CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
+ CASE(VK_SUBOPTIMAL_KHR);
+ CASE(VK_ERROR_OUT_OF_DATE_KHR);
+ CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
+ CASE(VK_ERROR_VALIDATION_FAILED_EXT);
+ CASE(VK_ERROR_INVALID_SHADER_NV);
+ CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+ CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ CASE(VK_ERROR_NOT_PERMITTED_EXT);
+ default: return "Unknown error";
+ }
+#undef CASE
+}
+
+int ff_vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+{
+ VkResult ret;
+ int index = -1;
+ VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceMemoryProperties mprops;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = alloc_extension,
+ };
+
+ vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
+ vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
+
+ /* Align if we need to */
+ if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment);
+
+ alloc_info.allocationSize = req->size;
+
+ /* The vulkan spec requires memory types to be sorted in the "optimal"
+ * order, so the first matching type we find will be the best/fastest one */
+ for (int i = 0; i < mprops.memoryTypeCount; i++) {
+ /* The memory type must be supported by the requirements (bitfield) */
+ if (!(req->memoryTypeBits & (1 << i)))
+ continue;
+
+ /* The memory type flags must include our properties */
+ if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ continue;
+
+ /* Found a suitable memory type */
+ index = i;
+ break;
+ }
+
+ if (index < 0) {
+ av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ req_flags);
+ return AVERROR(EINVAL);
+ }
+
+ alloc_info.memoryTypeIndex = index;
+
+ ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info,
+ s->hwctx->alloc, mem);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR(ENOMEM);
+ }
+
+ *mem_flags |= mprops.memoryTypes[index].propertyFlags;
+
+ return 0;
+}
+
+int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
+{
+ int err;
+ VkResult ret;
+ VkMemoryRequirements req;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = NULL,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host visible
+ but should be ok */
+ };
+
+ ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ vkGetBufferMemoryRequirements(s->hwctx->act_dev, buf->buf, &req);
+
+ err = ff_vk_alloc_mem(avctx, &req, flags, NULL, &buf->flags, &buf->mem);
+ if (err)
+ return err;
+
+ ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VkMappedMemoryRange *inval_list = NULL;
+ int inval_count = 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ if (!invalidate)
+ return 0;
+
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
+ (++inval_count)*sizeof(*inval_list));
+ if (!inval_list)
+ return AVERROR(ENOMEM);
+ inval_list[inval_count - 1] = ival_buf;
+ }
+
+ if (inval_count) {
+ ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
+ inval_list);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ return 0;
+}
+
+int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
+ int flush)
+{
+ int err = 0;
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VkMappedMemoryRange *flush_list = NULL;
+ int flush_count = 0;
+
+ if (flush) {
+ for (int i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
+ (++flush_count)*sizeof(*flush_list));
+ if (!flush_list)
+ return AVERROR(ENOMEM);
+ flush_list[flush_count - 1] = flush_buf;
+ }
+ }
+
+ if (flush_count) {
+ ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
+ flush_list);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ }
+ }
+
+ for (int i = 0; i < nb_buffers; i++)
+ vkUnmapMemory(s->hwctx->act_dev, buf[i].mem);
+
+ return err;
+}
+
+void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
+{
+ VulkanFilterContext *s = avctx->priv;
+ if (!buf)
+ return;
+
+ if (buf->buf != VK_NULL_HANDLE)
+ vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
+ if (buf->mem != VK_NULL_HANDLE)
+ vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
+}
+
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, int queue)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkCommandPoolCreateInfo cqueue_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = queue,
+ };
+ VkCommandBufferAllocateInfo cbuf_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+ VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
+
+ ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create,
+ s->hwctx->alloc, &e->pool);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ ff_vk_ret2str(ret));
+ return 1;
+ }
+
+ cbuf_create.commandPool = e->pool;
+
+ ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, &e->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ ff_vk_ret2str(ret));
+ return 1;
+ }
+
+ ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn,
+ s->hwctx->alloc, &e->fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
+ ff_vk_ret2str(ret));
+ return 1;
+ }
+
+ vkGetDeviceQueue(s->hwctx->act_dev, queue, 0, &e->queue);
+
+ return 0;
+}
+
+void ff_vk_free_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ if (!e)
+ return;
+
+ if (e->fence != VK_NULL_HANDLE)
+ vkDestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
+ if (e->buf != VK_NULL_HANDLE)
+ vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, 1, &e->buf);
+ if (e->pool != VK_NULL_HANDLE)
+ vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+}
+
+int ff_vk_filter_query_formats(AVFilterContext *avctx)
+{
+ static const enum AVPixelFormat pixel_formats[] = {
+ AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE,
+ };
+ AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
+ if (!pix_fmts)
+ return AVERROR(ENOMEM);
+
+ return ff_set_common_formats(avctx, pix_fmts);
+}
+
+static int vulkan_filter_set_device(AVFilterContext *avctx,
+ AVBufferRef *device)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ av_buffer_unref(&s->device_ref);
+
+ s->device_ref = av_buffer_ref(device);
+ if (!s->device_ref)
+ return AVERROR(ENOMEM);
+
+ s->device = (AVHWDeviceContext*)s->device_ref->data;
+ s->hwctx = s->device->hwctx;
+
+ return 0;
+}
+
+static int vulkan_filter_set_frames(AVFilterContext *avctx,
+ AVBufferRef *frames)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ av_buffer_unref(&s->frames_ref);
+
+ s->frames_ref = av_buffer_ref(frames);
+ if (!s->frames_ref)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+int ff_vk_filter_config_input(AVFilterLink *inlink)
+{
+ int err;
+ AVFilterContext *avctx = inlink->dst;
+ VulkanFilterContext *s = avctx->priv;
+ AVHWFramesContext *input_frames;
+
+ if (!inlink->hw_frames_ctx) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "hardware frames context on the input.\n");
+ return AVERROR(EINVAL);
+ }
+
+ /* Extract the device and default output format from the first input. */
+ if (avctx->inputs[0] != inlink)
+ return 0;
+
+ input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
+ if (input_frames->format != AV_PIX_FMT_VULKAN)
+ return AVERROR(EINVAL);
+
+ err = vulkan_filter_set_device(avctx, input_frames->device_ref);
+ if (err < 0)
+ return err;
+ err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx);
+ if (err < 0)
+ return err;
+
+ /* Default output parameters match input parameters. */
+ s->input_format = input_frames->sw_format;
+ if (s->output_format == AV_PIX_FMT_NONE)
+ s->output_format = input_frames->sw_format;
+ if (!s->output_width)
+ s->output_width = inlink->w;
+ if (!s->output_height)
+ s->output_height = inlink->h;
+
+ return 0;
+}
+
+int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ VulkanFilterContext *s = avctx->priv;
+
+ av_buffer_unref(&outlink->hw_frames_ctx);
+
+ if (!s->device_ref) {
+ if (!avctx->hw_device_ctx) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "Vulkan device.\n");
+ return AVERROR(EINVAL);
+ }
+
+ err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
+ if (err < 0)
+ return err;
+ }
+
+ outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref);
+ outlink->w = s->output_width;
+ outlink->h = s->output_height;
+
+ return 0;
+}
+
+int ff_vk_filter_config_output(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ VulkanFilterContext *s = avctx->priv;
+ AVBufferRef *output_frames_ref;
+ AVHWFramesContext *output_frames;
+
+ av_buffer_unref(&outlink->hw_frames_ctx);
+
+ if (!s->device_ref) {
+ if (!avctx->hw_device_ctx) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "Vulkan device.\n");
+ return AVERROR(EINVAL);
+ }
+
+ err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
+ if (err < 0)
+ return err;
+ }
+
+ output_frames_ref = av_hwframe_ctx_alloc(s->device_ref);
+ if (!output_frames_ref) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ output_frames = (AVHWFramesContext*)output_frames_ref->data;
+
+ output_frames->format = AV_PIX_FMT_VULKAN;
+ output_frames->sw_format = s->output_format;
+ output_frames->width = s->output_width;
+ output_frames->height = s->output_height;
+
+ err = av_hwframe_ctx_init(output_frames_ref);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to initialise output "
+ "frames: %d.\n", err);
+ goto fail;
+ }
+
+ outlink->hw_frames_ctx = output_frames_ref;
+ outlink->w = s->output_width;
+ outlink->h = s->output_height;
+
+ return 0;
+fail:
+ av_buffer_unref(&output_frames_ref);
+ return err;
+}
+
+int ff_vk_filter_init(AVFilterContext *avctx)
+{
+ VulkanFilterContext *s = avctx->priv;
+ const shaderc_env_version opt_ver = shaderc_env_version_vulkan_1_1;
+#if HAVE_SHADERC_OPT_PERF
+ const shaderc_optimization_level opt_lvl = shaderc_optimization_level_performance;
+#else
+ const shaderc_optimization_level opt_lvl = shaderc_optimization_level_size;
+#endif
+
+ s->output_format = AV_PIX_FMT_NONE;
+
+ s->sc_compiler = shaderc_compiler_initialize();
+ if (!s->sc_compiler)
+ return AVERROR_EXTERNAL;
+
+ s->sc_opts = shaderc_compile_options_initialize();
+ if (!s->sc_compiler)
+ return AVERROR_EXTERNAL;
+
+ shaderc_compile_options_set_target_env(s->sc_opts,
+ shaderc_target_env_vulkan,
+ opt_ver);
+ shaderc_compile_options_set_optimization_level(s->sc_opts, opt_lvl);
+
+ return 0;
+}
+
+void ff_vk_filter_uninit(AVFilterContext *avctx)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ shaderc_compile_options_release(s->sc_opts);
+ shaderc_compiler_release(s->sc_compiler);
+
+ for (int i = 0; i < s->shaders_num; i++) {
+ SPIRVShader *shd = &s->shaders[i];
+ vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
+ s->hwctx->alloc);
+ }
+
+ if (s->pipeline != VK_NULL_HANDLE)
+ vkDestroyPipeline(s->hwctx->act_dev, s->pipeline, s->hwctx->alloc);
+ if (s->pipeline_layout != VK_NULL_HANDLE)
+ vkDestroyPipelineLayout(s->hwctx->act_dev, s->pipeline_layout,
+ s->hwctx->alloc);
+
+ for (int i = 0; i < s->samplers_num; i++) {
+ VulkanSampler *sampler = &s->samplers[i];
+ VK_LOAD_PFN(s->hwctx->inst, vkDestroySamplerYcbcrConversionKHR);
+ vkDestroySampler(s->hwctx->act_dev, sampler->sampler, s->hwctx->alloc);
+ pfn_vkDestroySamplerYcbcrConversionKHR(s->hwctx->act_dev,
+ sampler->yuv_conv.conversion,
+ s->hwctx->alloc);
+ }
+
+ ff_vk_free_buf(avctx, &s->vbuffer);
+
+ for (int i = 0; i < s->descriptor_sets_num; i++) {
+ VK_LOAD_PFN(s->hwctx->inst, vkDestroyDescriptorUpdateTemplateKHR);
+ pfn_vkDestroyDescriptorUpdateTemplateKHR(s->hwctx->act_dev,
+ s->desc_template[i],
+ s->hwctx->alloc);
+ vkDestroyDescriptorSetLayout(s->hwctx->act_dev, s->desc_layout[i],
+ s->hwctx->alloc);
+ }
+
+ if (s->desc_pool != VK_NULL_HANDLE)
+ vkDestroyDescriptorPool(s->hwctx->act_dev, s->desc_pool,
+ s->hwctx->alloc);
+
+ av_freep(&s->desc_layout);
+ av_freep(&s->pool_size_desc);
+ av_freep(&s->shaders);
+ av_freep(&s->samplers);
+ av_buffer_unref(&s->device_ref);
+ av_buffer_unref(&s->frames_ref);
+
+ /* Only freed in case of failure */
+ av_freep(&s->push_consts);
+ av_freep(&s->pool_size_desc);
+ if (s->desc_template_info) {
+ for (int i = 0; i < s->descriptor_sets_num; i++)
+ av_free((void *)s->desc_template_info[i].pDescriptorUpdateEntries);
+ av_freep(&s->desc_template_info);
+ }
+}
+
+SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, const char *name,
+ VkShaderStageFlags stage)
+{
+ SPIRVShader *shd;
+ VulkanFilterContext *s = avctx->priv;
+
+ s->shaders = av_realloc_array(s->shaders, sizeof(*s->shaders),
+ s->shaders_num + 1);
+ if (!s->shaders)
+ return NULL;
+
+ shd = &s->shaders[s->shaders_num++];
+ memset(shd, 0, sizeof(*shd));
+ av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ shd->shader.stage = stage;
+
+ shd->name = name;
+
+ GLSLF(0, #version %i ,460);
+ GLSLC(0, #define AREA(v) ((v).x*(v).y) );
+ GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
+ GLSLC(0, );
+
+ return shd;
+}
+
+void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
+ int local_size[3])
+{
+ shd->local_size[0] = local_size[0];
+ shd->local_size[1] = local_size[1];
+ shd->local_size[2] = local_size[2];
+
+ av_bprintf(&shd->src, "layout (local_size_x = %i, "
+ "local_size_y = %i, local_size_z = %i) in;\n",
+ shd->local_size[0], shd->local_size[1], shd->local_size[2]);
+}
+
+static void print_shader(AVFilterContext *avctx, SPIRVShader *shd)
+{
+ int line = 0;
+ const char *p = shd->src.str;
+ const char *start = p;
+
+ AVBPrint buf;
+ av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ for (int i = 0; i < strlen(p); i++) {
+ if (p[i] == '\n') {
+ av_bprintf(&buf, "%i\t", ++line);
+ av_bprint_append_data(&buf, start, &p[i] - start + 1);
+ start = &p[i + 1];
+ }
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Compiling shader %s: \n%s\n",
+ shd->name, buf.str);
+ av_bprint_finalize(&buf, NULL);
+}
+
+int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
+ const char *entry)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VkShaderModuleCreateInfo shader_create;
+
+ shaderc_compilation_result_t res;
+ static const shaderc_shader_kind type_map[] = {
+ [VK_SHADER_STAGE_VERTEX_BIT] = shaderc_vertex_shader,
+ [VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT] = shaderc_tess_control_shader,
+ [VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT] = shaderc_tess_evaluation_shader,
+ [VK_SHADER_STAGE_GEOMETRY_BIT] = shaderc_geometry_shader,
+ [VK_SHADER_STAGE_FRAGMENT_BIT] = shaderc_fragment_shader,
+ [VK_SHADER_STAGE_COMPUTE_BIT] = shaderc_compute_shader,
+ };
+
+ shd->shader.pName = entry;
+
+ print_shader(avctx, shd);
+
+ res = shaderc_compile_into_spv(s->sc_compiler, shd->src.str, shd->src.len,
+ type_map[shd->shader.stage], shd->name,
+ entry, s->sc_opts);
+ av_bprint_finalize(&shd->src, NULL);
+
+ if (shaderc_result_get_compilation_status(res) !=
+ shaderc_compilation_status_success) {
+ av_log(avctx, AV_LOG_ERROR, "%s", shaderc_result_get_error_message(res));
+ return AVERROR_EXTERNAL;
+ }
+
+ shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ shader_create.pNext = NULL;
+ shader_create.codeSize = shaderc_result_get_length(res);
+ shader_create.flags = 0;
+ shader_create.pCode = (const uint32_t *)shaderc_result_get_bytes(res);
+
+ ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
+ &shd->shader.module);
+ shaderc_result_release(res);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Shader linked! Size: %zu bytes\n",
+ shader_create.codeSize);
+
+ return 0;
+}
+
+static VkSamplerYcbcrModelConversion conv_primaries(enum AVColorPrimaries color_primaries)
+{
+ switch(color_primaries) {
+ case AVCOL_PRI_BT470BG:
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
+ case AVCOL_PRI_BT709:
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709;
+ case AVCOL_PRI_BT2020:
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020;
+ }
+ /* Just assume its 709 */
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709;
+}
+
+const VulkanSampler *ff_vk_init_sampler(AVFilterContext *avctx, AVFrame *input,
+ int unnorm_coords, VkFilter filt)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VulkanSampler *sampler;
+
+ VkSamplerCreateInfo sampler_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = filt,
+ .minFilter = sampler_info.magFilter,
+ .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
+ VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeV = sampler_info.addressModeU,
+ .addressModeW = sampler_info.addressModeU,
+ .anisotropyEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
+ .unnormalizedCoordinates = unnorm_coords,
+ };
+
+ s->samplers = av_realloc_array(s->samplers, sizeof(*s->samplers),
+ s->samplers_num + 1);
+ if (!s->samplers)
+ return NULL;
+
+ sampler = &s->samplers[s->samplers_num++];
+ memset(sampler, 0, sizeof(*sampler));
+
+ sampler->converting = !!input;
+ sampler->yuv_conv.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO;
+
+ if (input) {
+ VkSamplerYcbcrConversion *conv = &sampler->yuv_conv.conversion;
+ VkComponentMapping comp_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ VkSamplerYcbcrConversionCreateInfo c_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO,
+ .format = av_vkfmt_from_pixfmt(s->input_format),
+ .chromaFilter = VK_FILTER_LINEAR,
+ .ycbcrModel = conv_primaries(input->color_primaries),
+ .ycbcrRange = input->color_range == AVCOL_RANGE_JPEG ?
+ VK_SAMPLER_YCBCR_RANGE_ITU_FULL :
+ VK_SAMPLER_YCBCR_RANGE_ITU_NARROW,
+ .xChromaOffset = input->chroma_location == AVCHROMA_LOC_CENTER ?
+ VK_CHROMA_LOCATION_MIDPOINT :
+ VK_CHROMA_LOCATION_COSITED_EVEN,
+ .components = comp_map,
+ };
+
+ VK_LOAD_PFN(s->hwctx->inst, vkCreateSamplerYcbcrConversionKHR);
+
+ sampler_info.pNext = &sampler->yuv_conv;
+
+ if (unnorm_coords) {
+ av_log(avctx, AV_LOG_ERROR, "Cannot create a converting sampler "
+ "with unnormalized addressing, forbidden by spec!\n");
+ return NULL;
+ }
+
+ ret = pfn_vkCreateSamplerYcbcrConversionKHR(s->hwctx->act_dev, &c_info,
+ s->hwctx->alloc, conv);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init conversion: %s\n",
+ ff_vk_ret2str(ret));
+ return NULL;
+ }
+ }
+
+ ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info,
+ s->hwctx->alloc, &sampler->sampler);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
+ ff_vk_ret2str(ret));
+ return NULL;
+ }
+
+ return sampler;
+}
+
+int ff_vk_add_push_constant(AVFilterContext *avctx, int offset, int size,
+ VkShaderStageFlagBits stage)
+{
+ VkPushConstantRange *pc;
+ VulkanFilterContext *s = avctx->priv;
+
+ s->push_consts = av_realloc_array(s->push_consts, sizeof(*s->push_consts),
+ s->push_consts_num + 1);
+ if (!s->push_consts)
+ return AVERROR(ENOMEM);
+
+ pc = &s->push_consts[s->push_consts_num++];
+ memset(pc, 0, sizeof(*pc));
+
+ pc->stageFlags = stage;
+ pc->offset = offset;
+ pc->size = size;
+
+ return s->push_consts_num - 1;
+}
+
+static const struct descriptor_props {
+ size_t struct_size; /* Size of the opaque which updates the descriptor */
+ const char *type;
+ int is_uniform;
+ int mem_quali; /* Can use a memory qualifier */
+ int dim_needed; /* Must indicate dimension */
+ int buf_content; /* Must indicate buffer contents */
+} descriptor_props[] = {
+ [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
+};
+
+int ff_vk_add_descriptor_set(AVFilterContext *avctx, SPIRVShader *shd,
+ VulkanDescriptorSetBinding *desc, int num,
+ int only_print_to_shader)
+{
+ VkResult ret;
+ VkDescriptorSetLayout *layout;
+ VulkanFilterContext *s = avctx->priv;
+
+ if (only_print_to_shader)
+ goto print;
+
+ s->desc_layout = av_realloc_array(s->desc_layout, sizeof(*s->desc_layout),
+ s->descriptor_sets_num + 1);
+ if (!s->desc_layout)
+ return AVERROR(ENOMEM);
+
+ layout = &s->desc_layout[s->descriptor_sets_num];
+ memset(layout, 0, sizeof(*layout));
+
+ { /* Create descriptor set layout descriptions */
+ VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
+ VkDescriptorSetLayoutBinding *desc_binding;
+
+ desc_binding = av_mallocz(sizeof(*desc_binding)*num);
+ if (!desc_binding)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < num; i++) {
+ desc_binding[i].binding = i;
+ desc_binding[i].descriptorType = desc[i].type;
+ desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ desc_binding[i].stageFlags = desc[i].stages;
+ desc_binding[i].pImmutableSamplers = desc[i].samplers;
+ }
+
+ desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+ desc_create_layout.pBindings = desc_binding;
+ desc_create_layout.bindingCount = num;
+
+ ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
+ s->hwctx->alloc, layout);
+ av_free(desc_binding);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
+ "layout: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Pool each descriptor by type and update pool counts */
+ for (int i = 0; i < num; i++) {
+ int j;
+ for (j = 0; j < s->pool_size_desc_num; j++)
+ if (s->pool_size_desc[j].type == desc[i].type)
+ break;
+ if (j >= s->pool_size_desc_num) {
+ s->pool_size_desc = av_realloc_array(s->pool_size_desc,
+ sizeof(*s->pool_size_desc),
+ ++s->pool_size_desc_num);
+ if (!s->pool_size_desc)
+ return AVERROR(ENOMEM);
+ memset(&s->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
+ }
+ s->pool_size_desc[j].type = desc[i].type;
+ s->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1);
+ }
+ }
+
+ { /* Create template creation struct */
+ VkDescriptorUpdateTemplateCreateInfo *dt;
+ VkDescriptorUpdateTemplateEntry *des_entries;
+
+ /* Freed after descriptor set initialization */
+ des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
+ if (!des_entries)
+ return AVERROR(ENOMEM);
+
+ for (int i = 0; i < num; i++) {
+ des_entries[i].dstBinding = i;
+ des_entries[i].descriptorType = desc[i].type;
+ des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ des_entries[i].dstArrayElement = 0;
+ des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
+ des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
+ }
+
+ s->desc_template_info = av_realloc_array(s->desc_template_info,
+ sizeof(*s->desc_template_info),
+ s->descriptor_sets_num + 1);
+ if (!s->desc_layout)
+ return AVERROR(ENOMEM);
+
+ dt = &s->desc_template_info[s->descriptor_sets_num];
+ memset(dt, 0, sizeof(*dt));
+
+ dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
+ dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
+ dt->descriptorSetLayout = *layout;
+ dt->pDescriptorUpdateEntries = des_entries;
+ dt->descriptorUpdateEntryCount = num;
+ }
+
+ s->descriptor_sets_num++;
+
+print:
+ /* Write shader info */
+ for (int i = 0; i < num; i++) {
+ const struct descriptor_props *prop = &descriptor_props[desc[i].type];
+ GLSLA("layout (set = %i, binding = %i", s->descriptor_sets_num - 1, i);
+
+ if (desc[i].mem_layout)
+ GLSLA(", %s", desc[i].mem_layout);
+ GLSLA(")");
+
+ if (prop->is_uniform)
+ GLSLA(" uniform");
+
+ if (prop->mem_quali && desc[i].mem_quali)
+ GLSLA(" %s", desc[i].mem_quali);
+
+ if (prop->type)
+ GLSLA(" %s", prop->type);
+
+ if (prop->dim_needed)
+ GLSLA("%iD", desc[i].dimensions);
+
+ GLSLA(" %s", desc[i].name);
+
+ if (prop->buf_content)
+ GLSLA(" {\n %s\n}", desc[i].buf_content);
+ else if (desc[i].elems > 0)
+ GLSLA("[%i]", desc[i].elems);
+
+ GLSLA(";\n");
+ }
+
+ return 0;
+}
+
+void ff_vk_update_descriptor_set(AVFilterContext *avctx, int set_id)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ VK_LOAD_PFN(s->hwctx->inst, vkUpdateDescriptorSetWithTemplateKHR);
+ pfn_vkUpdateDescriptorSetWithTemplateKHR(s->hwctx->act_dev,
+ s->desc_set[set_id],
+ s->desc_template[set_id], s);
+}
+
+const enum VkImageAspectFlagBits ff_vk_aspect_flags(enum AVPixelFormat pixfmt,
+ int plane)
+{
+ const int tot_planes = av_pix_fmt_count_planes(pixfmt);
+ static const enum VkImageAspectFlagBits m[] = { VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+ if (!tot_planes || (plane > tot_planes))
+ return 0;
+ if (tot_planes == 1)
+ return VK_IMAGE_ASPECT_COLOR_BIT;
+ if (plane < 0)
+ return m[0] | m[1] | (tot_planes > 2 ? m[2] : 0);
+ return m[plane];
+}
+
+const VkFormat ff_vk_plane_rep_fmt(enum AVPixelFormat pixfmt, int plane)
+{
+ const int tot_planes = av_pix_fmt_count_planes(pixfmt);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
+ const int high = desc->comp[plane].depth > 8;
+ if (tot_planes == 1) { /* RGB, etc.'s singleplane rep is itself */
+ return av_vkfmt_from_pixfmt(pixfmt);
+ } else if (tot_planes == 2) { /* Must be NV12 or P010 */
+ if (!high)
+ return !plane ? VK_FORMAT_R8_UNORM : VK_FORMAT_R8G8_UNORM;
+ else
+ return !plane ? VK_FORMAT_R16_UNORM : VK_FORMAT_R16G16_UNORM;
+ } else { /* Regular planar YUV */
+ return !high ? VK_FORMAT_R8_UNORM : VK_FORMAT_R16_UNORM;
+ }
+}
+
+const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
+{
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
+ const int high = desc->comp[0].depth > 8;
+ return high ? "rgba16f" : "rgba8";
+}
+
+int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, AVVkFrame *f,
+ VkFormat fmt, enum VkImageAspectFlagBits aspect,
+ VkComponentMapping map, const void *pnext)
+{
+ VulkanFilterContext *s = avctx->priv;
+ VkImageViewCreateInfo imgview_spawn = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = pnext,
+ .image = f->img,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = fmt,
+ .components = map,
+ .subresourceRange = {
+ .aspectMask = aspect,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+
+ VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn,
+ s->hwctx->alloc, v);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView v)
+{
+ VulkanFilterContext *s = avctx->priv;
+ vkDestroyImageView(s->hwctx->act_dev, v, s->hwctx->alloc);
+}
+
+int ff_vk_init_pipeline_layout(AVFilterContext *avctx)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ { /* Init descriptor set pool */
+ VkDescriptorPoolCreateInfo pool_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .poolSizeCount = s->pool_size_desc_num,
+ .pPoolSizes = s->pool_size_desc,
+ .maxSets = s->descriptor_sets_num,
+ };
+
+ ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
+ s->hwctx->alloc, &s->desc_pool);
+ av_freep(&s->pool_size_desc);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
+ "pool: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Allocate descriptor sets */
+ VkDescriptorSetAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = s->desc_pool,
+ .descriptorSetCount = s->descriptor_sets_num,
+ .pSetLayouts = s->desc_layout,
+ };
+
+ s->desc_set = av_malloc(s->descriptor_sets_num*sizeof(*s->desc_set));
+ if (!s->desc_set)
+ return AVERROR(ENOMEM);
+
+ ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
+ s->desc_set);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Finally create the pipeline layout */
+ VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = s->descriptor_sets_num,
+ .pSetLayouts = s->desc_layout,
+ .pushConstantRangeCount = s->push_consts_num,
+ .pPushConstantRanges = s->push_consts,
+ };
+
+ ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
+ s->hwctx->alloc, &s->pipeline_layout);
+ av_freep(&s->push_consts);
+ s->push_consts_num = 0;
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Descriptor template (for tightly packed descriptors) */
+ VK_LOAD_PFN(s->hwctx->inst, vkCreateDescriptorUpdateTemplateKHR);
+ VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
+
+ s->desc_template = av_malloc(s->descriptor_sets_num*sizeof(*s->desc_template));
+ if (!s->desc_template)
+ return AVERROR(ENOMEM);
+
+ /* Create update templates for the descriptor sets */
+ for (int i = 0; i < s->descriptor_sets_num; i++) {
+ desc_template_info = &s->desc_template_info[i];
+ desc_template_info->pipelineLayout = s->pipeline_layout;
+ ret = pfn_vkCreateDescriptorUpdateTemplateKHR(s->hwctx->act_dev,
+ desc_template_info,
+ s->hwctx->alloc,
+ &s->desc_template[i]);
+ av_free((void *)desc_template_info->pDescriptorUpdateEntries);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
+ "template: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ av_freep(&s->desc_template_info);
+ }
+
+ return 0;
+}
+
+int ff_vk_init_compute_pipeline(AVFilterContext *avctx)
+{
+ int i;
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkComputePipelineCreateInfo pipe = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .layout = s->pipeline_layout,
+ };
+
+ for (i = 0; i < s->shaders_num; i++) {
+ if (s->shaders[i].shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
+ pipe.stage = s->shaders[i].shader;
+ break;
+ }
+ }
+ if (i == s->shaders_num) {
+ av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
+ return AVERROR(EINVAL);
+ }
+
+ ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
+ s->hwctx->alloc, &s->pipeline);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
diff --git a/libavfilter/vulkan.h b/libavfilter/vulkan.h
new file mode 100644
index 0000000000..cac06f6920
--- /dev/null
+++ b/libavfilter/vulkan.h
@@ -0,0 +1,223 @@
+/*
+ * Vulkan utilities
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VULKAN_COMMON_H
+#define AVFILTER_VULKAN_COMMON_H
+
+#include "avfilter.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/bprint.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_vulkan.h"
+
+#include <shaderc/shaderc.h>
+
+/* GLSL management macros */
+#define INDENT(N) INDENT_##N
+#define INDENT_0
+#define INDENT_1 INDENT_0 " "
+#define INDENT_2 INDENT_1 INDENT_1
+#define INDENT_3 INDENT_2 INDENT_1
+#define INDENT_4 INDENT_3 INDENT_1
+#define INDENT_5 INDENT_4 INDENT_1
+#define INDENT_6 INDENT_5 INDENT_1
+#define C(N, S) INDENT(N) #S "\n"
+#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S))
+#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__)
+#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__)
+#define GLSLD(D) GLSLC(0, ); \
+ av_bprint_append_data(&shd->src, D, strlen(D)); \
+ GLSLC(0, )
+
+/* Helper, pretty much every Vulkan return value needs to be checked */
+#define RET(x) \
+ do { \
+ if ((err = (x)) < 0) \
+ goto fail; \
+ } while (0)
+
+/* Useful for attaching immutable samplers to arrays */
+#define DUP_SAMPLER_ARRAY4(x) (const VkSampler []){ x, x, x, x, }
+
+typedef struct SPIRVShader {
+ const char *name; /* Name for id/debugging purposes */
+ AVBPrint src;
+ int local_size[3]; /* Compute shader workgroup sizes */
+ VkPipelineShaderStageCreateInfo shader;
+} SPIRVShader;
+
+typedef struct VulkanDescriptorSetBinding {
+ const char *name;
+ VkDescriptorType type;
+ const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */
+ const char *mem_quali; /* readonly, writeonly, etc. */
+ const char *buf_content; /* For buffers */
+ uint32_t dimensions; /* Needed for e.g. sampler%iD */
+ uint32_t elems; /* 0 - scalar, 1 or more - vector */
+ VkShaderStageFlags stages;
+ const VkSampler *samplers; /* Immutable samplers, length - #elems */
+ void *updater;
+} VulkanDescriptorSetBinding;
+
+typedef struct VulkanSampler {
+ VkSampler sampler;
+ VkSamplerYcbcrConversionInfo yuv_conv; /* For imageview creation */
+ int converting; /* Indicates whether sampler is a converting one */
+} VulkanSampler;
+
+typedef struct FFVkExecContext {
+ VkCommandPool pool;
+ VkCommandBuffer buf;
+ VkQueue queue;
+ VkFence fence;
+} FFVkExecContext;
+
+typedef struct FFVkBuffer {
+ VkBuffer buf;
+ VkDeviceMemory mem;
+ VkMemoryPropertyFlagBits flags;
+} FFVkBuffer;
+
+typedef struct VulkanFilterContext {
+ const AVClass *class;
+
+ AVBufferRef *device_ref;
+ AVBufferRef *frames_ref; /* For in-place filtering */
+ AVHWDeviceContext *device;
+ AVVulkanDeviceContext *hwctx;
+
+ /* Properties */
+ int output_width;
+ int output_height;
+ enum AVPixelFormat output_format;
+ enum AVPixelFormat input_format;
+
+ /* Samplers */
+ VulkanSampler *samplers;
+ int samplers_num;
+
+ /* Shaders */
+ SPIRVShader *shaders;
+ int shaders_num;
+ shaderc_compiler_t sc_compiler;
+ shaderc_compile_options_t sc_opts;
+
+ /* Contexts */
+ VkRenderPass renderpass;
+ VkPipelineLayout pipeline_layout;
+ VkPipeline pipeline;
+
+ /* Descriptors */
+ VkDescriptorSetLayout *desc_layout;
+ VkDescriptorPool desc_pool;
+ VkDescriptorSet *desc_set;
+ VkDescriptorUpdateTemplate *desc_template;
+ int push_consts_num;
+ int descriptor_sets_num;
+ int pool_size_desc_num;
+
+ /* Vertex buffer */
+ FFVkBuffer vbuffer;
+ int num_verts;
+
+ /* Temporary, used to store data in between initialization stages */
+ VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
+ VkDescriptorPoolSize *pool_size_desc;
+ VkPushConstantRange *push_consts;
+ void *scratch; /* Scratch memory used only in functions */
+ unsigned int scratch_size;
+} VulkanFilterContext;
+
+/* Generic memory allocation.
+ * Will align size to the minimum map alignment requirement in case req_flags
+ * has VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT set */
+int ff_vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+
+/* Buffer I/O */
+int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
+int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate);
+int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
+ int flush);
+void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf);
+
+/* Command context init/uninit */
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, int queue);
+void ff_vk_free_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e);
+
+/* Converts Vulkan return values to strings */
+const char *ff_vk_ret2str(VkResult res);
+
+/* Create a Vulkan sampler, if input isn't NULL the sampler will convert to RGB */
+const VulkanSampler *ff_vk_init_sampler(AVFilterContext *avctx, AVFrame *input,
+ int unnorm_coords, VkFilter filt);
+
+/* Gets the single-plane representation format */
+const VkFormat ff_vk_plane_rep_fmt(enum AVPixelFormat pixfmt, int plane);
+/* Gets the glsl format for an image */
+const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
+/* Gets the image aspect flags of a plane */
+const enum VkImageAspectFlagBits ff_vk_aspect_flags(enum AVPixelFormat pixfmt,
+ int plane);
+/* Creates an imageview */
+int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, AVVkFrame *f,
+ VkFormat fmt, enum VkImageAspectFlagBits aspect,
+ VkComponentMapping map, const void *pnext);
+/* Destroys an imageview */
+void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView v);
+/* Creates a shader */
+SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, const char *name,
+ VkShaderStageFlags stage);
+/* For compute shaders, defines the workgroup size */
+void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
+ int local_size[3]);
+/* Compiles a completed shader into a module */
+int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
+ const char *entry);
+
+/* Needs to be abstracted so it adds them to a certain pipeline layout */
+int ff_vk_add_descriptor_set(AVFilterContext *avctx, SPIRVShader *shd,
+ VulkanDescriptorSetBinding *desc, int num,
+ int only_print_to_shader);
+int ff_vk_add_push_constant(AVFilterContext *avctx, int offset, int size,
+ VkShaderStageFlagBits stage);
+
+/* Creates a Vulkan pipeline layout */
+int ff_vk_init_pipeline_layout(AVFilterContext *avctx);
+
+/* Creates a compute pipeline */
+int ff_vk_init_compute_pipeline(AVFilterContext *avctx);
+
+/* Updates a given descriptor set after pipeline initialization */
+void ff_vk_update_descriptor_set(AVFilterContext *avctx, int set_id);
+
+/* General lavfi IO functions */
+int ff_vk_filter_query_formats (AVFilterContext *avctx);
+int ff_vk_filter_init (AVFilterContext *avctx);
+int ff_vk_filter_config_input (AVFilterLink *inlink);
+int ff_vk_filter_config_output (AVFilterLink *outlink);
+int ff_vk_filter_config_output_inplace(AVFilterLink *outlink);
+void ff_vk_filter_uninit (AVFilterContext *avctx);
+
+#endif /* AVFILTER_VULKAN_COMMON_H */
--
2.17.0
James Almer
2018-05-23 04:44:29 UTC
Permalink
Post by Rostislav Pehlivanov
This commit adds a common code for use in Vulkan filters. It attempts
to ease the burden of writing Vulkan image filtering to a minimum,
which is pretty much a requirement considering how verbose the API is.
It supports both compute and graphic pipelines and manages to abstract
the API to such a level there's no need to call any Vulkan functions
inside the init path of the code. Handling shader descriptors is probably
the bulk of the code, and despite the abstraction, it loses none of the
features for describing shader IO.
In order to produce linkable shaders, it depends on the libshaderc
library (and depends on the latest stable version of it). This allows
for greater performance and flexibility than static built-in shaders
and also eliminates the cumbersome process of interfacing with glslang
to compile GLSL to SPIR-V.
It's based off of the common opencl and provides similar interfaces for
filter pad init and config, with the addition that it also supports
in-place filtering.
---
configure | 12 +-
libavfilter/vulkan.c | 1190 ++++++++++++++++++++++++++++++++++++++++++
libavfilter/vulkan.h | 223 ++++++++
3 files changed, 1423 insertions(+), 2 deletions(-)
create mode 100644 libavfilter/vulkan.c
create mode 100644 libavfilter/vulkan.h
diff --git a/configure b/configure
index 5f4407b753..abcfe32625 100755
--- a/configure
+++ b/configure
--enable-librsvg enable SVG rasterization via librsvg [no]
--enable-librubberband enable rubberband needed for rubberband filter [no]
--enable-librtmp enable RTMP[E] support via librtmp [no]
+ --enable-libshaderc enable GLSL->SPIRV compilation via libshaderc [no]
--enable-libshine enable fixed-point MP3 encoding via libshine [no]
--enable-libsmbclient enable Samba protocol via libsmbclient [no]
--enable-libsnappy enable Snappy compression, needed for hap encoding [no]
@@ -1707,6 +1708,7 @@ EXTERNAL_LIBRARY_LIST="
libpulse
librsvg
librtmp
+ libshaderc
libshine
libsmbclient
libsnappy
@@ -2225,6 +2227,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ shaderc_opt_perf
vulkan_drm_mod
perl
pod2man
@@ -3456,12 +3459,12 @@ avcodec_select="null_bsf"
avdevice_deps="avformat avcodec avutil"
avdevice_suggest="libm"
avfilter_deps="avutil"
-avfilter_suggest="libm"
+avfilter_suggest="libm libshaderc"
If libavfiler/vulkan.c (which i assume uses libshaderc methods) is only
going to be built if one or more of the filters is also built, then this
is unnecessary. The libshaderc dep on each filter is enough.
Post by Rostislav Pehlivanov
avformat_deps="avcodec avutil"
avformat_suggest="libm network zlib"
avresample_deps="avutil"
avresample_suggest="libm"
-avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
+avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl vulkan user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
This one belongs in patch 3/8, otherwise hwcontext_vulkan will fail to link.
Post by Rostislav Pehlivanov
postproc_deps="avutil gpl"
postproc_suggest="libm"
swresample_deps="avutil"
@@ -6050,6 +6053,7 @@ enabled libpulse && require_pkg_config libpulse libpulse pulse/pulseaud
enabled librsvg && require_pkg_config librsvg librsvg-2.0 librsvg-2.0/librsvg/rsvg.h rsvg_handle_render_cairo
enabled librtmp && require_pkg_config librtmp librtmp librtmp/rtmp.h RTMP_Socket
enabled librubberband && require_pkg_config librubberband "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new -lstdc++ && append librubberband_extralibs "-lstdc++"
+enabled libshaderc && require libshaderc shaderc/shaderc.h shaderc_compiler_initialize -lshaderc_shared
enabled libshine && require_pkg_config libshine shine shine/layer3.h shine_encode_buffer
enabled libsmbclient && { check_pkg_config libsmbclient smbclient libsmbclient.h smbc_init ||
require libsmbclient libsmbclient.h smbc_init -lsmbclient; }
@@ -6355,6 +6359,10 @@ enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.
enabled vulkan &&
require_pkg_config vulkan "vulkan >= 1.1.73" "vulkan/vulkan.h" vkCreateInstance
+if enabled_all vulkan libshaderc ; then
+ check_cc shaderc_opt_perf shaderc/shaderc.h "int t = shaderc_optimization_level_performance"
+fi
+
if enabled_all vulkan libdrm ; then
check_cpp_condition vulkan_drm_mod vulkan/vulkan.h "defined VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME"
fi
Should be good otherwise.
Rostislav Pehlivanov
2018-05-22 02:46:16 UTC
Permalink
Could be done in-plane with the main image but framesync segfaults.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_overlay_vulkan.c | 461 ++++++++++++++++++++++++++++++++
4 files changed, 464 insertions(+)
create mode 100644 libavfilter/vf_overlay_vulkan.c

diff --git a/configure b/configure
index 3b29cd123a..8806a66668 100755
--- a/configure
+++ b/configure
@@ -3365,6 +3365,7 @@ ocr_filter_deps="libtesseract"
ocv_filter_deps="libopencv"
openclsrc_filter_deps="opencl"
overlay_opencl_filter_deps="opencl"
+overlay_vulkan_filter_deps="vulkan libshaderc"
overlay_qsv_filter_deps="libmfx"
overlay_qsv_filter_select="qsvvpp"
owdenoise_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index e8c5438c78..bfbd679a44 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -276,6 +276,7 @@ OBJS-$(CONFIG_OSCILLOSCOPE_FILTER) += vf_datascope.o
OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o framesync.o
OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \
opencl/overlay.o framesync.o
+OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o
OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o
OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o
OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index f9dce32f4d..85383f234a 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -267,6 +267,7 @@ extern AVFilter ff_vf_ocv;
extern AVFilter ff_vf_oscilloscope;
extern AVFilter ff_vf_overlay;
extern AVFilter ff_vf_overlay_opencl;
+extern AVFilter ff_vf_overlay_vulkan;
extern AVFilter ff_vf_overlay_qsv;
extern AVFilter ff_vf_owdenoise;
extern AVFilter ff_vf_pad;
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
new file mode 100644
index 0000000000..a7d8cc3cf2
--- /dev/null
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -0,0 +1,461 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "internal.h"
+#include "framesync.h"
+
+typedef struct OverlayVulkanContext {
+ VulkanFilterContext vkctx;
+
+ int initialized;
+ FFVkExecContext exec;
+ FFFrameSync fs;
+ FFVkBuffer params_buf;
+
+ /* Shader updators, must be in the main filter struct */
+ VkDescriptorImageInfo main_images[3];
+ VkDescriptorImageInfo overlay_images[3];
+ VkDescriptorImageInfo output_images[3];
+ VkDescriptorBufferInfo params_desc;
+
+ int overlay_x;
+ int overlay_y;
+} OverlayVulkanContext;
+
+static const char overlay_noalpha[] = {
+ C(0, void overlay_noalpha(int i, ivec2 pos) )
+ C(0, { )
+ C(1, ivec2 overlay_size = imageSize(overlay_img[i]); )
+ C(1, if ((o_offset[i].x <= pos.x) && (o_offset[i].y <= pos.y) &&
+ (pos.x < (o_offset[i].x + overlay_size.x)) &&
+ (pos.y < (o_offset[i].y + overlay_size.y))) { )
+ C(2, vec4 res = imageLoad(overlay_img[i], pos - o_offset[i]); )
+ C(2, imageStore(output_img[i], pos, res); )
+ C(1, } else { )
+ C(2, vec4 res = imageLoad(main_img[i], pos); )
+ C(2, imageStore(output_img[i], pos, res); )
+ C(1, } )
+ C(0, } )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx)
+{
+ int err;
+ OverlayVulkanContext *s = ctx->priv;
+
+ { /* Create the shader */
+ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ SPIRVShader *shd = ff_vk_init_shader(ctx, "overlay_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+ VulkanDescriptorSetBinding desc_i[3] = {
+ {
+ .name = "main_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "readonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->main_images,
+ },
+ {
+ .name = "overlay_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "readonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->overlay_images,
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->output_images,
+ },
+ };
+
+ VulkanDescriptorSetBinding desc_b = {
+ .name = "params",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .mem_quali = "readonly",
+ .mem_layout = "std430",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = &s->params_desc,
+ .buf_content = "ivec2 o_offset[3];",
+ };
+
+ RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 3, 0)); /* set 0 */
+ RET(ff_vk_add_descriptor_set(ctx, shd, &desc_b, 1, 0)); /* set 1 */
+
+ GLSLD( overlay_noalpha );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, int planes = %i; ,planes);
+ GLSLC(1, for (int i = 0; i < planes; i++) { );
+ GLSLC(2, overlay_noalpha(i, pos); );
+ GLSLC(1, } );
+ GLSLC(0, } );
+
+ RET(ff_vk_compile_shader(ctx, shd, "main"));
+ }
+
+ RET(ff_vk_init_pipeline_layout(ctx));
+
+ {
+ const AVPixFmtDescriptor *desc;
+ struct {
+ int32_t o_offset[2*3];
+ } *par;
+
+ err = ff_vk_create_buf(ctx, &s->params_buf,
+ sizeof(*par),
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err)
+ return err;
+
+ err = ff_vk_map_buffers(ctx, &s->params_buf, (uint8_t **)&par, 1, 0);
+ if (err)
+ return err;
+
+ desc = av_pix_fmt_desc_get(s->vkctx.output_format);
+
+ par->o_offset[0] = s->overlay_x;
+ par->o_offset[1] = s->overlay_y;
+ par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w;
+ par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h;
+ par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w;
+ par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h;
+
+ err = ff_vk_unmap_buffers(ctx, &s->params_buf, 1, 1);
+ if (err)
+ return err;
+
+ s->params_desc.buffer = s->params_buf.buf;
+ s->params_desc.range = VK_WHOLE_SIZE;
+
+ ff_vk_update_descriptor_set(ctx, 1);
+ }
+
+ /* Execution context */
+ RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+ s->vkctx.hwctx->queue_family_comp_index));
+
+ /* The pipeline */
+ RET(ff_vk_init_compute_pipeline(ctx));
+
+ s->initialized = 1;
+
+ return 0;
+
+fail:
+ return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
+ AVFrame *main_f, AVFrame *overlay_f)
+{
+ int err;
+ OverlayVulkanContext *s = avctx->priv;
+ int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ AVVkFrame *out = (AVVkFrame *)out_f->data[0];
+ AVVkFrame *main = (AVVkFrame *)main_f->data[0];
+ AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0];
+
+ AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data;
+ AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data;
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkComponentMapping null_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ for (int i = 0; i < planes; i++) {
+ RET(ff_vk_create_imageview(avctx, &s->main_images[i].imageView, main,
+ ff_vk_plane_rep_fmt(main_fc->sw_format, i),
+ ff_vk_aspect_flags(main_fc->sw_format, i),
+ null_map, NULL));
+
+ RET(ff_vk_create_imageview(avctx, &s->overlay_images[i].imageView, overlay,
+ ff_vk_plane_rep_fmt(overlay_fc->sw_format, i),
+ ff_vk_aspect_flags(overlay_fc->sw_format, i),
+ null_map, NULL));
+
+ RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+ ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+ ff_vk_aspect_flags(s->vkctx.output_format, i),
+ null_map, NULL));
+
+ s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+
+ ff_vk_update_descriptor_set(avctx, 0);
+
+ vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+ {
+ VkImageMemoryBarrier bar[3] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = main->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = main->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(main_fc->sw_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = overlay->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = overlay->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(overlay_fc->sw_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .oldLayout = out->layout,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = out->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.output_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ };
+
+ vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+ 0, NULL, 0, NULL, 3, bar);
+
+ main->layout = bar[0].newLayout;
+ main->access = bar[0].dstAccessMask;
+
+ overlay->layout = bar[1].newLayout;
+ overlay->access = bar[1].dstAccessMask;
+
+ out->layout = bar[2].newLayout;
+ out->access = bar[2].dstAccessMask;
+ }
+
+ vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+ vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+ vkCmdDispatch(s->exec.buf,
+ FFALIGN(s->vkctx.output_width, s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+ FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+ vkEndCommandBuffer(s->exec.buf);
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->exec.buf,
+ };
+
+ VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+ }
+
+fail:
+
+ for (int i = 0; i < planes; i++) {
+ ff_vk_destroy_imageview(avctx, s->main_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->overlay_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+ }
+
+ return err;
+}
+
+static int overlay_vulkan_blend(FFFrameSync *fs)
+{
+ int err;
+ AVFilterContext *ctx = fs->parent;
+ OverlayVulkanContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *input_main, *input_overlay, *out;
+
+ err = ff_framesync_get_frame(fs, 0, &input_main, 0);
+ if (err < 0)
+ goto fail;
+ err = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
+ if (err < 0)
+ goto fail;
+
+ if (!input_main || !input_overlay)
+ return 0;
+
+ if (!s->initialized) {
+ AVHWFramesContext *main_fc = (AVHWFramesContext*)input_main->hw_frames_ctx->data;
+ AVHWFramesContext *overlay_fc = (AVHWFramesContext*)input_overlay->hw_frames_ctx->data;
+ if (main_fc->sw_format != overlay_fc->sw_format) {
+ av_log(ctx, AV_LOG_ERROR, "Mismatching sw formats!\n");
+ return AVERROR(EINVAL);
+ }
+ RET(init_filter(ctx));
+ }
+
+ out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ RET(process_frames(ctx, out, input_main, input_overlay));
+
+ err = av_frame_copy_props(out, input_main);
+ if (err < 0)
+ goto fail;
+
+ return ff_filter_frame(outlink, out);
+
+fail:
+ av_frame_free(&out);
+ return err;
+}
+
+static int overlay_vulkan_config_output(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ OverlayVulkanContext *s = avctx->priv;
+
+ err = ff_vk_filter_config_output(outlink);
+ if (err < 0)
+ return err;
+
+ err = ff_framesync_init_dualinput(&s->fs, avctx);
+ if (err < 0)
+ return err;
+
+ return ff_framesync_configure(&s->fs);
+}
+
+static int overlay_vulkan_activate(AVFilterContext *avctx)
+{
+ OverlayVulkanContext *s = avctx->priv;
+
+ return ff_framesync_activate(&s->fs);
+}
+
+static av_cold int overlay_vulkan_init(AVFilterContext *avctx)
+{
+ OverlayVulkanContext *s = avctx->priv;
+
+ s->fs.on_event = &overlay_vulkan_blend;
+
+ return ff_vk_filter_init(avctx);
+}
+
+static void overlay_vulkan_uninit(AVFilterContext *avctx)
+{
+ OverlayVulkanContext *s = avctx->priv;
+
+ ff_vk_free_exec_ctx(avctx, &s->exec);
+ ff_vk_filter_uninit(avctx);
+ ff_framesync_uninit(&s->fs);
+
+ s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(OverlayVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption overlay_vulkan_options[] = {
+ { "x", "Set horizontal offset", OFFSET(overlay_x), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS },
+ { "y", "Set vertical offset", OFFSET(overlay_y), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS },
+ { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(overlay_vulkan);
+
+static const AVFilterPad overlay_vulkan_inputs[] = {
+ {
+ .name = "main",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ {
+ .name = "overlay",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad overlay_vulkan_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &overlay_vulkan_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_overlay_vulkan = {
+ .name = "overlay_vulkan",
+ .description = NULL_IF_CONFIG_SMALL("Overlay a source on top of another"),
+ .priv_size = sizeof(OverlayVulkanContext),
+ .init = &overlay_vulkan_init,
+ .uninit = &overlay_vulkan_uninit,
+ .query_formats = &ff_vk_filter_query_formats,
+ .activate = &overlay_vulkan_activate,
+ .inputs = overlay_vulkan_inputs,
+ .outputs = overlay_vulkan_outputs,
+ .priv_class = &overlay_vulkan_class,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.17.0
Rostislav Pehlivanov
2018-05-22 02:46:15 UTC
Permalink
Can convert to RGB using very fast fixed-function conversions.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_scale_vulkan.c | 395 ++++++++++++++++++++++++++++++++++
4 files changed, 398 insertions(+)
create mode 100644 libavfilter/vf_scale_vulkan.c

diff --git a/configure b/configure
index eb81cc1ed5..3b29cd123a 100755
--- a/configure
+++ b/configure
@@ -3422,6 +3422,7 @@ zmq_filter_deps="libzmq"
zoompan_filter_deps="swscale"
zscale_filter_deps="libzimg const_nan"
scale_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer"
+scale_vulkan_filter_deps="vulkan libshaderc"
vpp_qsv_filter_deps="libmfx"
vpp_qsv_filter_select="qsvvpp"

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 976955959c..e8c5438c78 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -313,6 +313,7 @@ OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.pt
OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o
OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o
OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale.o vaapi_vpp.o
+OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vf_scale_vulkan.o scale.o vulkan.o
OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o
OBJS-$(CONFIG_SELECT_FILTER) += f_select.o
OBJS-$(CONFIG_SELECTIVECOLOR_FILTER) += vf_selectivecolor.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 7be81e4706..f9dce32f4d 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -304,6 +304,7 @@ extern AVFilter ff_vf_scale_cuda;
extern AVFilter ff_vf_scale_npp;
extern AVFilter ff_vf_scale_qsv;
extern AVFilter ff_vf_scale_vaapi;
+extern AVFilter ff_vf_scale_vulkan;
extern AVFilter ff_vf_scale2ref;
extern AVFilter ff_vf_select;
extern AVFilter ff_vf_selectivecolor;
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
new file mode 100644
index 0000000000..4a5647072d
--- /dev/null
+++ b/libavfilter/vf_scale_vulkan.c
@@ -0,0 +1,395 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "scale.h"
+#include "internal.h"
+
+enum ScalerFunc {
+ F_BILINEAR = 0,
+ F_NEAREST,
+
+ F_NB,
+};
+
+typedef struct ScaleVulkanContext {
+ VulkanFilterContext vkctx;
+
+ int conv;
+ int initialized;
+ FFVkExecContext exec;
+ const VulkanSampler *sampler;
+
+ /* Shader updators, must be in the main filter struct */
+ VkDescriptorImageInfo input_images[3];
+ VkDescriptorImageInfo output_images[3];
+
+ enum ScalerFunc scaler;
+ char *output_format_string;
+ char *w_expr;
+ char *h_expr;
+} ScaleVulkanContext;
+
+static const char scale_bilinear[] = {
+ C(0, void scale_bilinear(int idx, ivec2 pos) )
+ C(0, { )
+ C(1, const vec2 npos = (vec2(pos) + 0.5f) / imageSize(output_img[idx]);)
+ C(1, imageStore(output_img[idx], pos, texture(input_img[idx], npos)); )
+ C(0, } )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+{
+ int err;
+ VkFilter sampler_mode;
+ ScaleVulkanContext *s = ctx->priv;
+
+ switch (s->scaler) {
+ case F_NEAREST:
+ sampler_mode = VK_FILTER_NEAREST;
+ break;
+ case F_BILINEAR:
+ sampler_mode = VK_FILTER_LINEAR;
+ break;
+ };
+
+ /* Create a sampler */
+ s->sampler = ff_vk_init_sampler(ctx, s->conv ? in : NULL, 0, sampler_mode);
+ if (!s->sampler)
+ return AVERROR_EXTERNAL;
+
+ { /* Create the shader */
+ SPIRVShader *shd = ff_vk_init_shader(ctx, "scale_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+ VulkanDescriptorSetBinding desc_i[2] = {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = s->conv ? 1 :
+ av_pix_fmt_count_planes(s->vkctx.input_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->input_images,
+ .samplers = DUP_SAMPLER_ARRAY4(s->sampler->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = av_pix_fmt_count_planes(s->vkctx.output_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->output_images,
+ },
+ };
+
+ RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 2, 0)); /* set 0 */
+
+ GLSLD( scale_bilinear );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLF(1, for (int i = 0; i < %i; i++) ,desc_i[1].elems);
+
+ switch (s->scaler) {
+ case F_NEAREST:
+ case F_BILINEAR:
+ GLSLC(2, scale_bilinear(i, ivec2(gl_GlobalInvocationID.xy)); );
+ break;
+ };
+
+ GLSLC(0, } );
+
+ RET(ff_vk_compile_shader(ctx, shd, "main"));
+ }
+
+ RET(ff_vk_init_pipeline_layout(ctx));
+
+ /* Execution context */
+ RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+ s->vkctx.hwctx->queue_family_comp_index));
+
+ /* The pipeline */
+ RET(ff_vk_init_compute_pipeline(ctx));
+
+ s->initialized = 1;
+
+ return 0;
+
+fail:
+ return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
+{
+ int i, err;
+ ScaleVulkanContext *s = avctx->priv;
+ int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkComponentMapping null_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ if (s->sampler->converting) { /* RGB */
+ RET(ff_vk_create_imageview(avctx, &s->input_images[0].imageView, in,
+ av_vkfmt_from_pixfmt(s->vkctx.input_format),
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ null_map, &s->sampler->yuv_conv));
+ s->input_images[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ } else {
+ for (i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) {
+ RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
+ ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
+ ff_vk_aspect_flags(s->vkctx.input_format, i),
+ null_map, NULL));
+ s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ }
+ }
+
+ for (i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
+ RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+ ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+ ff_vk_aspect_flags(s->vkctx.output_format, i),
+ null_map, NULL));
+ s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+
+ ff_vk_update_descriptor_set(avctx, 0);
+
+ vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+ {
+ VkImageMemoryBarrier bar[2] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = in->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = in->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .oldLayout = out->layout,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = out->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.output_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ };
+
+ vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+ 0, NULL, 0, NULL, 2, bar);
+
+ in->layout = bar[0].newLayout;
+ in->access = bar[0].dstAccessMask;
+
+ out->layout = bar[1].newLayout;
+ out->access = bar[1].dstAccessMask;
+ }
+
+ vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+ vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+ vkCmdDispatch(s->exec.buf,
+ FFALIGN(s->vkctx.output_width, s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+ FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+ vkEndCommandBuffer(s->exec.buf);
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->exec.buf,
+ };
+
+ VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+ }
+
+fail:
+
+ for (i = 0; i < planes; i++) {
+ ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+ }
+
+ return err;
+}
+
+static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+ int err;
+ AVFilterContext *ctx = link->dst;
+ ScaleVulkanContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+
+ AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ if (!s->initialized)
+ RET(init_filter(ctx, in));
+
+ RET(process_frames(ctx, (AVVkFrame *)out->data[0],
+ (AVVkFrame *) in->data[0]));
+
+ err = av_frame_copy_props(out, in);
+ if (err < 0)
+ goto fail;
+
+ av_frame_free(&in);
+
+ return ff_filter_frame(outlink, out);
+
+fail:
+ av_frame_free(&in);
+ av_frame_free(&out);
+ return err;
+}
+
+static int scale_vulkan_config_output(AVFilterLink *outlink)
+{
+ int err;
+ const AVPixFmtDescriptor *desc;
+ AVFilterContext *avctx = outlink->src;
+ ScaleVulkanContext *s = avctx->priv;
+ AVFilterLink *inlink = outlink->src->inputs[0];
+
+ err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink,
+ &s->vkctx.output_width,
+ &s->vkctx.output_height);
+ if (err < 0)
+ return err;
+
+ if (s->output_format_string)
+ s->vkctx.output_format = av_get_pix_fmt(s->output_format_string);
+
+ desc = av_pix_fmt_desc_get(s->vkctx.output_format);
+
+ if ((s->vkctx.input_format != s->vkctx.output_format) &&
+ !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+ av_log(avctx, AV_LOG_WARNING, "Unsupported conversion %s -> %s! "
+ "Currently output format must either match input format or "
+ "must be some supported RGB format!\n",
+ av_get_pix_fmt_name(s->vkctx.input_format),
+ av_get_pix_fmt_name(s->vkctx.output_format));
+ return AVERROR_PATCHWELCOME;
+ }
+
+ desc = av_pix_fmt_desc_get(s->vkctx.input_format);
+
+ s->conv = !(desc->flags & AV_PIX_FMT_FLAG_RGB) &&
+ (s->vkctx.input_format != s->vkctx.output_format);
+
+ err = ff_vk_filter_config_output(outlink);
+ if (err < 0)
+ return err;
+
+ if (inlink->sample_aspect_ratio.num)
+ outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
+ else
+ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+ return 0;
+}
+
+static void scale_vulkan_uninit(AVFilterContext *avctx)
+{
+ ScaleVulkanContext *s = avctx->priv;
+
+ ff_vk_free_exec_ctx(avctx, &s->exec);
+ ff_vk_filter_uninit(avctx);
+
+ s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(ScaleVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption scale_vulkan_options[] = {
+ { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
+ { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
+ { "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, "scaler" },
+ { "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, "scaler" },
+ { "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, "scaler" },
+ { "format", "Output video format (software format of hardware frames)", OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
+ { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(scale_vulkan);
+
+static const AVFilterPad scale_vulkan_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = &scale_vulkan_filter_frame,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad scale_vulkan_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &scale_vulkan_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_scale_vulkan = {
+ .name = "scale_vulkan",
+ .description = NULL_IF_CONFIG_SMALL("Scale Vulkan frames"),
+ .priv_size = sizeof(ScaleVulkanContext),
+ .init = &ff_vk_filter_init,
+ .uninit = &scale_vulkan_uninit,
+ .query_formats = &ff_vk_filter_query_formats,
+ .inputs = scale_vulkan_inputs,
+ .outputs = scale_vulkan_outputs,
+ .priv_class = &scale_vulkan_class,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.17.0
Loading...