Discussion:
[FFmpeg-devel] [PATCH 3/8] lavu: add a Vulkan hwcontext
Rostislav Pehlivanov
2018-04-20 04:30:05 UTC
Permalink
This commit adds a Vulkan hwcontext, currently capable of mapping DRM and
VAAPI frames but additional functionality can be added later to support
importing of D3D11 surfaces as well as exporting to various other APIs.

This context requires the newest stable version of the Vulkan API,
and once the new extension for DRM surfaces makes it in will also require
it (in order to properly and fully import them).

It makes use of every part of the Vulkan spec in order to ensure fastest
possible uploading, downloading and mapping of frames. On AMD, it will
also make use of mapping host memory frames in order to upload
very efficiently and with minimal CPU to hardware.

To be useful for non-RGB images an implementation with the YUV images
extension is needed. All current implementations support that with the
exception of AMD, though support is coming soon for Mesa.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 12 +
doc/APIchanges | 3 +
libavutil/Makefile | 3 +
libavutil/hwcontext.c | 4 +
libavutil/hwcontext.h | 1 +
libavutil/hwcontext_internal.h | 1 +
libavutil/hwcontext_vulkan.c | 2125 ++++++++++++++++++++++++++++++++
libavutil/hwcontext_vulkan.h | 133 ++
libavutil/pixdesc.c | 4 +
libavutil/pixfmt.h | 4 +
libavutil/version.h | 2 +-
11 files changed, 2291 insertions(+), 1 deletion(-)
create mode 100644 libavutil/hwcontext_vulkan.c
create mode 100644 libavutil/hwcontext_vulkan.h

diff --git a/configure b/configure
index dee507cb6a..cd88f7eae1 100755
--- a/configure
+++ b/configure
@@ -297,6 +297,7 @@ External library support:
--enable-opengl enable OpenGL rendering [no]
--enable-openssl enable openssl, needed for https support
if gnutls or libtls is not used [no]
+ --enable-vulkan enable Vulkan code [no]
--disable-sndio disable sndio support [autodetect]
--disable-schannel disable SChannel SSP, needed for TLS support on
Windows if openssl and gnutls are not used [autodetect]
@@ -1761,6 +1762,7 @@ HWACCEL_LIBRARY_LIST="
mmal
omx
opencl
+ vulkan
"

DOCUMENT_LIST="
@@ -2217,6 +2219,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ vulkan_drm_mod
perl
pod2man
texi2html
@@ -6322,6 +6325,15 @@ enabled vdpau &&

enabled crystalhd && check_lib crystalhd "stdint.h libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd

+enabled vulkan &&
+ check_lib vulkan "vulkan/vulkan.h" vkCreateInstance -lvulkan &&
+ check_cpp_condition vulkan vulkan/vulkan.h "defined VK_API_VERSION_1_1"
+
+if enabled_all vulkan libdrm ; then
+ check_cpp_condition vulkan vulkan/vulkan.h "defined VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME"
+ enable vulkan_drm_mod
+fi
+
if enabled x86; then
case $target_os in
mingw32*|mingw64*|win32|win64|linux|cygwin*)
diff --git a/doc/APIchanges b/doc/APIchanges
index 4f6ac2a031..05a28473d7 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@ libavutil: 2017-10-21

API changes, most recent first:

+2018-04-xx - xxxxxxxxxx - lavu 56.16.100 - hwcontext.h
+ Add AV_HWDEVICE_TYPE_VULKAN and implementation.
+
-------- 8< --------- FFmpeg 4.0 was cut here -------- 8< ---------

2018-04-03 - d6fc031caf - lavu 56.13.100 - pixdesc.h
diff --git a/libavutil/Makefile b/libavutil/Makefile
index a63ba523c9..aa641d78ed 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -42,6 +42,7 @@ HEADERS = adler32.h \
hwcontext_vaapi.h \
hwcontext_videotoolbox.h \
hwcontext_vdpau.h \
+ hwcontext_vulkan.h \
imgutils.h \
intfloat.h \
intreadwrite.h \
@@ -168,6 +169,7 @@ OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o
OBJS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.o
OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o
OBJS-$(CONFIG_MEDIACODEC) += hwcontext_mediacodec.o
+OBJS-$(CONFIG_VULKAN) += hwcontext_vulkan.o

OBJS += $(COMPAT_OBJS:%=../compat/%)

@@ -183,6 +185,7 @@ SKIPHEADERS-$(CONFIG_OPENCL) += hwcontext_opencl.h
SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h
SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += hwcontext_videotoolbox.h
SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h
+SKIPHEADERS-$(CONFIG_VULKAN) += hwcontext_vulkan.h

TESTPROGS = adler32 \
aes \
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index f9ce2f5b13..96623a89c2 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -58,6 +58,9 @@ static const HWContextType * const hw_table[] = {
#endif
#if CONFIG_MEDIACODEC
&ff_hwcontext_type_mediacodec,
+#endif
+#if CONFIG_VULKAN
+ &ff_hwcontext_type_vulkan,
#endif
NULL,
};
@@ -73,6 +76,7 @@ static const char *const hw_type_names[] = {
[AV_HWDEVICE_TYPE_VDPAU] = "vdpau",
[AV_HWDEVICE_TYPE_VIDEOTOOLBOX] = "videotoolbox",
[AV_HWDEVICE_TYPE_MEDIACODEC] = "mediacodec",
+ [AV_HWDEVICE_TYPE_VULKAN] = "vulkan",
};

enum AVHWDeviceType av_hwdevice_find_type_by_name(const char *name)
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index f5a4b62387..f874af9f8f 100644
--- a/libavutil/hwcontext.h
+++ b/libavutil/hwcontext.h
@@ -36,6 +36,7 @@ enum AVHWDeviceType {
AV_HWDEVICE_TYPE_DRM,
AV_HWDEVICE_TYPE_OPENCL,
AV_HWDEVICE_TYPE_MEDIACODEC,
+ AV_HWDEVICE_TYPE_VULKAN,
};

typedef struct AVHWDeviceInternal AVHWDeviceInternal;
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index 77dc47ddd6..dba0f39944 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -172,5 +172,6 @@ extern const HWContextType ff_hwcontext_type_vaapi;
extern const HWContextType ff_hwcontext_type_vdpau;
extern const HWContextType ff_hwcontext_type_videotoolbox;
extern const HWContextType ff_hwcontext_type_mediacodec;
+extern const HWContextType ff_hwcontext_type_vulkan;

#endif /* AVUTIL_HWCONTEXT_INTERNAL_H */
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
new file mode 100644
index 0000000000..ee6505091c
--- /dev/null
+++ b/libavutil/hwcontext_vulkan.c
@@ -0,0 +1,2125 @@
+/*
+ * Vulkan hwcontext
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "pixdesc.h"
+#include "avstring.h"
+#include "hwcontext.h"
+#include "hwcontext_internal.h"
+#include "hwcontext_vulkan.h"
+
+#if CONFIG_LIBDRM
+#include <unistd.h> /* lseek */
+#include <xf86drm.h>
+#include <drm_fourcc.h>
+#include "hwcontext_drm.h"
+#if CONFIG_VAAPI
+#include <va/va_drmcommon.h>
+#include "hwcontext_vaapi.h"
+#endif
+#endif
+
+typedef struct VulkanDevicePriv {
+ /* Properties */
+ VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceMemoryProperties mprops;
+
+ /* Debug callback */
+ VkDebugUtilsMessengerEXT debug_ctx;
+
+ /* Image uploading */
+ VkCommandPool cmd_pool;
+ VkCommandBuffer cmd_buf;
+ VkQueue cmd_queue;
+ VkFence cmd_fence;
+
+ /* Extensions */
+ uint64_t extensions;
+
+ /* Settings */
+ int use_linear_images;
+} VulkanDevicePriv;
+
+#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
+ vkGetInstanceProcAddr(inst, #name)
+
+#define DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \
+ VK_IMAGE_USAGE_STORAGE_BIT | \
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT)
+
+#define ADD_VAL_TO_LIST(list, count, val) \
+ do { \
+ list = av_realloc_array(list, sizeof(*list), ++count); \
+ if (!list) { \
+ err = AVERROR(ENOMEM); \
+ goto end; \
+ } \
+ list[count - 1] = val; \
+ } while(0)
+
+static const VkFormat vk_format_map[AV_PIX_FMT_NB] = {
+ /* Gray */
+ [AV_PIX_FMT_GRAY8] = VK_FORMAT_R8_UNORM,
+ [AV_PIX_FMT_GRAY10] = VK_FORMAT_R10X6_UNORM_PACK16,
+ [AV_PIX_FMT_GRAY12] = VK_FORMAT_R12X4_UNORM_PACK16,
+ [AV_PIX_FMT_GRAY16] = VK_FORMAT_R16_UNORM,
+
+ /* Interleaved */
+ [AV_PIX_FMT_NV12] = VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
+ [AV_PIX_FMT_P010] = VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_P016] = VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,
+ [AV_PIX_FMT_NV16] = VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,
+ [AV_PIX_FMT_UYVY422] = VK_FORMAT_B16G16R16G16_422_UNORM,
+ [AV_PIX_FMT_YVYU422] = VK_FORMAT_G16B16G16R16_422_UNORM,
+
+ /* 420 */
+ [AV_PIX_FMT_YUV420P] = VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,
+ [AV_PIX_FMT_YUV420P10] = VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P12] = VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P16] = VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM,
+
+ /* 422 */
+ [AV_PIX_FMT_YUV422P] = VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,
+ [AV_PIX_FMT_YUV422P10] = VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV422P12] = VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV422P16] = VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM,
+
+ /* 444 */
+ [AV_PIX_FMT_YUV444P] = VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,
+ [AV_PIX_FMT_YUV444P10] = VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV444P12] = VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV444P16] = VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
+
+ /* RGB */
+ [AV_PIX_FMT_ABGR] = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ [AV_PIX_FMT_BGRA] = VK_FORMAT_B8G8R8A8_UNORM,
+ [AV_PIX_FMT_RGBA] = VK_FORMAT_R8G8B8A8_UNORM,
+ [AV_PIX_FMT_RGB24] = VK_FORMAT_R8G8B8_UNORM,
+ [AV_PIX_FMT_BGR24] = VK_FORMAT_B8G8R8_UNORM,
+ [AV_PIX_FMT_RGB48] = VK_FORMAT_R16G16B16_UNORM,
+ [AV_PIX_FMT_RGBA64] = VK_FORMAT_R16G16B16A16_UNORM,
+ [AV_PIX_FMT_RGB565] = VK_FORMAT_R5G6B5_UNORM_PACK16,
+ [AV_PIX_FMT_BGR565] = VK_FORMAT_B5G6R5_UNORM_PACK16,
+ [AV_PIX_FMT_BGR0] = VK_FORMAT_B8G8R8A8_UNORM,
+ [AV_PIX_FMT_0BGR] = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+ [AV_PIX_FMT_RGB0] = VK_FORMAT_R8G8B8A8_UNORM,
+};
+
+enum VulkanExtensions {
+ EXT_DEDICATED_ALLOC = 1LL << 0, /* VK_KHR_dedicated_allocation */
+ EXT_IMAGE_FORMAT_LIST = 1LL << 1, /* VK_KHR_image_format_list */
+ EXT_EXTERNAL_MEMORY = 1LL << 2, /* VK_KHR_external_memory */
+ EXT_EXTERNAL_HOST_MEMORY = 1LL << 3, /* VK_EXT_external_memory_host */
+ EXT_EXTERNAL_FD_MEMORY = 1LL << 4, /* VK_KHR_external_memory_fd */
+ EXT_EXTERNAL_DMABUF_MEMORY = 1LL << 5, /* VK_EXT_external_memory_dma_buf */
+ EXT_DRM_MODIFIER_FLAGS = 1LL << 6, /* VK_EXT_image_drm_format_modifier */
+ EXT_YUV_IMAGES = 1LL << 7, /* VK_KHR_sampler_ycbcr_conversion */
+
+ EXT_OPTIONAL = 1LL << 62,
+ EXT_REQUIRED = 1LL << 63,
+};
+
+typedef struct VulkanOptExtension {
+ const char *name;
+ uint64_t flag;
+} VulkanOptExtension;
+
+VulkanOptExtension optional_instance_exts[] = {
+ { VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME, EXT_EXTERNAL_MEMORY, },
+ { VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, EXT_REQUIRED },
+};
+
+VulkanOptExtension optional_device_exts[] = {
+ { VK_KHR_MAINTENANCE1_EXTENSION_NAME, EXT_REQUIRED },
+ { VK_KHR_MAINTENANCE2_EXTENSION_NAME, EXT_REQUIRED },
+ { VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, EXT_REQUIRED },
+ { VK_KHR_BIND_MEMORY_2_EXTENSION_NAME, EXT_REQUIRED },
+ { VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, EXT_REQUIRED },
+
+ { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, EXT_OPTIONAL, },
+
+ { VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, EXT_DEDICATED_ALLOC, },
+ { VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, EXT_IMAGE_FORMAT_LIST, },
+ { VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME, EXT_EXTERNAL_MEMORY, },
+ { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, EXT_EXTERNAL_HOST_MEMORY, },
+ { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, },
+ { VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, EXT_YUV_IMAGES },
+#if HAVE_VULKAN_DRM_MOD
+ { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
+#else
+ { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, },
+#endif
+};
+
+VkFormat av_vkfmt_from_pixfmt(enum AVPixelFormat p)
+{
+ if ((p >= 0 && p < AV_PIX_FMT_NB) && vk_format_map[p])
+ return vk_format_map[p];
+ return VK_FORMAT_UNDEFINED;
+}
+
+static int vkfmt_is_supported(AVVulkanDeviceContext *hwctx, enum AVPixelFormat p,
+ int linear)
+{
+ VkFormatFeatureFlags flags;
+ VkFormatProperties2 prop = {
+ .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+ };
+ VkFormat fmt = av_vkfmt_from_pixfmt(p);
+
+ if (fmt == VK_FORMAT_UNDEFINED)
+ return 0;
+
+ vkGetPhysicalDeviceFormatProperties2(hwctx->phys_dev, fmt, &prop);
+ flags = linear ? prop.formatProperties.linearTilingFeatures :
+ prop.formatProperties.optimalTilingFeatures;
+
+ return !!(flags & (VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+ VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT |
+ VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_TRANSFER_DST_BIT));
+}
+
+/* Converts return values to strings */
+static const char *vk_ret2str(VkResult res)
+{
+#define CASE(VAL) case VAL: return #VAL
+ switch (res) {
+ CASE(VK_SUCCESS);
+ CASE(VK_NOT_READY);
+ CASE(VK_TIMEOUT);
+ CASE(VK_EVENT_SET);
+ CASE(VK_EVENT_RESET);
+ CASE(VK_INCOMPLETE);
+ CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
+ CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ CASE(VK_ERROR_INITIALIZATION_FAILED);
+ CASE(VK_ERROR_DEVICE_LOST);
+ CASE(VK_ERROR_MEMORY_MAP_FAILED);
+ CASE(VK_ERROR_LAYER_NOT_PRESENT);
+ CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
+ CASE(VK_ERROR_FEATURE_NOT_PRESENT);
+ CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
+ CASE(VK_ERROR_TOO_MANY_OBJECTS);
+ CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
+ CASE(VK_ERROR_FRAGMENTED_POOL);
+ CASE(VK_ERROR_SURFACE_LOST_KHR);
+ CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
+ CASE(VK_SUBOPTIMAL_KHR);
+ CASE(VK_ERROR_OUT_OF_DATE_KHR);
+ CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
+ CASE(VK_ERROR_VALIDATION_FAILED_EXT);
+ CASE(VK_ERROR_INVALID_SHADER_NV);
+ CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+ CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ CASE(VK_ERROR_NOT_PERMITTED_EXT);
+ default: return "Unknown error";
+ }
+#undef CASE
+}
+
+static VkBool32 vk_dbg_callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
+ VkDebugUtilsMessageTypeFlagsEXT messageType,
+ const VkDebugUtilsMessengerCallbackDataEXT *data,
+ void *priv)
+{
+ int i, l;
+ AVHWDeviceContext *ctx = priv;
+
+ switch (severity) {
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: l = AV_LOG_VERBOSE; break;
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: l = AV_LOG_INFO; break;
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: l = AV_LOG_WARNING; break;
+ case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: l = AV_LOG_ERROR; break;
+ default: l = AV_LOG_DEBUG; break;
+ };
+
+ av_log(ctx, l, "%s\n", data->pMessage);
+ for (i = 0; i < data->cmdBufLabelCount; i++)
+ av_log(ctx, l, "\t%i: %s\n", i, data->pCmdBufLabels[i].pLabelName);
+
+ return 0;
+}
+
+static int check_extensions(AVHWDeviceContext *ctx, int dev,
+ const char * const **dst, uint32_t *num, int debug)
+{
+ const char *tstr;
+ const char **extension_names = NULL;
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ int i, j, err = 0, found, extensions_found = 0;
+
+ const char *mod;
+ int optional_exts_num;
+ uint32_t sup_ext_count;
+ VkExtensionProperties *sup_ext;
+ VulkanOptExtension *optional_exts;
+
+ if (!dev) {
+ mod = "instance";
+ optional_exts = optional_instance_exts;
+ optional_exts_num = FF_ARRAY_ELEMS(optional_instance_exts);
+ vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, NULL);
+ sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
+ if (!sup_ext)
+ return AVERROR(ENOMEM);
+ vkEnumerateInstanceExtensionProperties(NULL, &sup_ext_count, sup_ext);
+ } else {
+ mod = "device";
+ optional_exts = optional_device_exts;
+ optional_exts_num = FF_ARRAY_ELEMS(optional_device_exts);
+ vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
+ &sup_ext_count, NULL);
+ sup_ext = av_malloc_array(sup_ext_count, sizeof(VkExtensionProperties));
+ if (!sup_ext)
+ return AVERROR(ENOMEM);
+ vkEnumerateDeviceExtensionProperties(hwctx->phys_dev, NULL,
+ &sup_ext_count, sup_ext);
+ }
+
+ for (i = 0; i < optional_exts_num; i++) {
+ int req = optional_exts[i].flag & EXT_REQUIRED;
+ tstr = optional_exts[i].name;
+
+ found = 0;
+ for (j = 0; j < sup_ext_count; j++) {
+ if (!strcmp(tstr, sup_ext[j].extensionName)) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ int lvl = req ? AV_LOG_ERROR : AV_LOG_VERBOSE;
+ av_log(ctx, lvl, "Extension \"%s\" not found!\n", tstr);
+ if (req) {
+ err = AVERROR(EINVAL);
+ goto end;
+ }
+ continue;
+ }
+ if (!req)
+ p->extensions |= optional_exts[i].flag;
+
+ av_log(ctx, AV_LOG_VERBOSE, "Using %s extension \"%s\"\n", mod, tstr);
+
+ ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
+ }
+
+ if (debug && !dev) {
+ tstr = VK_EXT_DEBUG_UTILS_EXTENSION_NAME;
+ found = 0;
+ for (j = 0; j < sup_ext_count; j++) {
+ if (!strcmp(tstr, sup_ext[j].extensionName)) {
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ ADD_VAL_TO_LIST(extension_names, extensions_found, tstr);
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "Debug extension \"%s\" not found!\n",
+ tstr);
+ err = AVERROR(EINVAL);
+ goto end;
+ }
+ }
+
+ *dst = extension_names;
+ *num = extensions_found;
+
+end:
+ av_free(sup_ext);
+ return err;
+}
+
+/* Creates a VkInstance */
+static int create_instance(AVHWDeviceContext *ctx, AVDictionary *opts)
+{
+ int err = 0;
+ VkResult ret;
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ AVDictionaryEntry *debug_opt = av_dict_get(opts, "debug", NULL, 0);
+ const int debug_mode = debug_opt && strtol(debug_opt->value, NULL, 10);
+ VkApplicationInfo application_info = {
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pEngineName = "libavutil",
+ .apiVersion = VK_API_VERSION_1_1,
+ .engineVersion = VK_MAKE_VERSION(LIBAVUTIL_VERSION_MAJOR,
+ LIBAVUTIL_VERSION_MINOR,
+ LIBAVUTIL_VERSION_MICRO),
+ };
+ VkInstanceCreateInfo inst_props = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pApplicationInfo = &application_info,
+ };
+
+ /* Check for present/missing extensions */
+ err = check_extensions(ctx, 0, &inst_props.ppEnabledExtensionNames,
+ &inst_props.enabledExtensionCount, debug_mode);
+ if (err < 0)
+ return err;
+
+ if (debug_mode) {
+ static const char *layers[] = { "VK_LAYER_LUNARG_standard_validation" };
+ inst_props.ppEnabledLayerNames = layers;
+ inst_props.enabledLayerCount = FF_ARRAY_ELEMS(layers);
+ }
+
+ /* Try to create the instance */
+ ret = vkCreateInstance(&inst_props, hwctx->alloc, &hwctx->inst);
+
+ /* Free used memory */
+ av_free((void *)inst_props.ppEnabledExtensionNames);
+
+ /* Check for errors */
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Instance creation failure: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ if (debug_mode) {
+ VkDebugUtilsMessengerCreateInfoEXT dbg = {
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
+ .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
+ .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+ .pfnUserCallback = vk_dbg_callback,
+ .pUserData = ctx,
+ };
+ VK_LOAD_PFN(hwctx->inst, vkCreateDebugUtilsMessengerEXT);
+
+ pfn_vkCreateDebugUtilsMessengerEXT(hwctx->inst, &dbg,
+ hwctx->alloc, &p->debug_ctx);
+ }
+
+ return 0;
+}
+
+typedef struct VulkanDeviceSelection {
+ const char *name; /* Will use this first unless NULL */
+ uint32_t pci_device; /* Will use this second unless 0x0 */
+ uint32_t vendor_id; /* Last resort to find something deterministic */
+ int index; /* Finally fall back to index */
+} VulkanDeviceSelection;
+
+/* Finds a device */
+static int find_device(AVHWDeviceContext *ctx, VulkanDeviceSelection *select)
+{
+ uint32_t num;
+ VkResult ret;
+ int i, err = 0;
+ VkPhysicalDevice *devices = NULL;
+ VkPhysicalDeviceProperties *prop = NULL;
+ VkPhysicalDevice choice = VK_NULL_HANDLE;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ static const char *dev_types[] = {
+ [VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU] = "integrated",
+ [VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU] = "discrete",
+ [VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU] = "virtual",
+ [VK_PHYSICAL_DEVICE_TYPE_CPU] = "software",
+ [VK_PHYSICAL_DEVICE_TYPE_OTHER] = "unknown",
+ };
+
+ ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, NULL);
+ if (ret != VK_SUCCESS || !num) {
+ av_log(ctx, AV_LOG_ERROR, "No devices found: %s!\n", vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ devices = av_malloc_array(num, sizeof(VkPhysicalDevice));
+ if (!devices)
+ return AVERROR(ENOMEM);
+
+ ret = vkEnumeratePhysicalDevices(hwctx->inst, &num, devices);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed enumerating devices: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto end;
+ }
+
+ prop = av_malloc_array(num, sizeof(VkPhysicalDeviceProperties));
+ if (!prop) {
+ err = AVERROR(ENOMEM);
+ goto end;
+ }
+
+ av_log(ctx, AV_LOG_VERBOSE, "GPU listing:\n");
+ for (i = 0; i < num; i++) {
+ vkGetPhysicalDeviceProperties(devices[i], &prop[i]);
+ av_log(ctx, AV_LOG_VERBOSE, " %d: %s (%s) (0x%x)\n", i, prop[i].deviceName,
+ dev_types[prop[i].deviceType], prop[i].deviceID);
+ }
+
+ if (select->name) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested device: %s\n", select->name);
+ for (i = 0; i < num; i++) {
+ if (strcmp(select->name, prop[i].deviceName) == 0) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device \"%s\"!\n",
+ select->name);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ } else if (select->pci_device) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested device: 0x%x\n", select->pci_device);
+ for (i = 0; i < num; i++) {
+ if (select->pci_device == prop[i].deviceID) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with PCI ID 0x%x!\n",
+ select->pci_device);
+ err = AVERROR(EINVAL);
+ goto end;
+ } else if (select->vendor_id) {
+ av_log(ctx, AV_LOG_VERBOSE, "Requested vendor: 0x%x\n", select->vendor_id);
+ for (i = 0; i < num; i++) {
+ if (select->vendor_id == prop[i].vendorID) {
+ choice = devices[i];
+ goto end;
+ }
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with Vendor ID 0x%x!\n",
+ select->vendor_id);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ } else {
+ if (select->index < num) {
+ choice = devices[select->index];
+ goto end;
+ }
+ av_log(ctx, AV_LOG_ERROR, "Unable to find device with index %i!\n",
+ select->index);
+ err = AVERROR_UNKNOWN;
+ goto end;
+ }
+
+end:
+ av_free(devices);
+ av_free(prop);
+ hwctx->phys_dev = choice;
+
+ return err;
+}
+
+static int search_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
+{
+ uint32_t i, num;
+ VkQueueFamilyProperties *qs = NULL;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ int graph_index = -1, comp_index = -1, tx_index = -1;
+ VkDeviceQueueCreateInfo *pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
+
+ /* First get the number of queue families */
+ vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
+ if (!num) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ /* Then allocate memory */
+ qs = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
+ if (!qs)
+ return AVERROR(ENOMEM);
+
+ /* Finally retrieve the queue families */
+ vkGetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qs);
+
+#define SEARCH_FLAGS(expr, out) \
+ for (i = 0; i < num; i++) { \
+ const VkQueueFlagBits flags = qs[i].queueFlags; \
+ if (expr) { \
+ out = i; \
+ break; \
+ } \
+ }
+
+ if (!hwctx->queue_family_index)
+ SEARCH_FLAGS(flags & VK_QUEUE_GRAPHICS_BIT, graph_index)
+
+ if (!hwctx->queue_family_comp_index)
+ SEARCH_FLAGS((flags & VK_QUEUE_COMPUTE_BIT) && (i != graph_index),
+ comp_index)
+
+ if (!hwctx->queue_family_tx_index)
+ SEARCH_FLAGS((flags & VK_QUEUE_TRANSFER_BIT) && (i != graph_index) &&
+ (i != comp_index), tx_index)
+
+#undef SEARCH_FLAGS
+#define QF_FLAGS(flags) \
+ ((flags) & VK_QUEUE_GRAPHICS_BIT ) ? "(graphics) " : "", \
+ ((flags) & VK_QUEUE_COMPUTE_BIT ) ? "(compute) " : "", \
+ ((flags) & VK_QUEUE_TRANSFER_BIT ) ? "(transfer) " : "", \
+ ((flags) & VK_QUEUE_SPARSE_BINDING_BIT) ? "(sparse) " : ""
+
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for graphics, "
+ "flags: %s%s%s%s\n", graph_index, QF_FLAGS(qs[graph_index].queueFlags));
+
+ hwctx->queue_family_index = graph_index;
+ hwctx->queue_family_tx_index = graph_index;
+ hwctx->queue_family_comp_index = graph_index;
+
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = graph_index;
+
+ if (comp_index != -1) {
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for compute, "
+ "flags: %s%s%s%s\n", comp_index, QF_FLAGS(qs[comp_index].queueFlags));
+ hwctx->queue_family_tx_index = comp_index;
+ hwctx->queue_family_comp_index = comp_index;
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = comp_index;
+ }
+
+ if (tx_index != -1) {
+ av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i for transfers, "
+ "flags: %s%s%s%s\n", tx_index, QF_FLAGS(qs[tx_index].queueFlags));
+ hwctx->queue_family_tx_index = tx_index;
+ pc[cd->queueCreateInfoCount++].queueFamilyIndex = tx_index;
+ }
+
+#undef PRINT_QF_FLAGS
+
+ av_free(qs);
+
+ return 0;
+}
+
+static int create_exec_ctx(AVHWDeviceContext *ctx)
+{
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ VkCommandPoolCreateInfo cqueue_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = hwctx->queue_family_tx_index,
+ };
+ VkCommandBufferAllocateInfo cbuf_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+ VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
+
+ ret = vkCreateCommandPool(hwctx->act_dev, &cqueue_create,
+ hwctx->alloc, &p->cmd_pool);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ cbuf_create.commandPool = p->cmd_pool;
+
+ ret = vkAllocateCommandBuffers(hwctx->act_dev, &cbuf_create, &p->cmd_buf);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ ret = vkCreateFence(hwctx->act_dev, &fence_spawn,
+ hwctx->alloc, &p->cmd_fence);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
+ vk_ret2str(ret));
+ return 1;
+ }
+
+ vkGetDeviceQueue(hwctx->act_dev, hwctx->queue_family_tx_index, 0,
+ &p->cmd_queue);
+
+ return 0;
+}
+
+static void free_exec_ctx(AVHWDeviceContext *ctx)
+{
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ if (!p)
+ return;
+
+ if (p->cmd_fence != VK_NULL_HANDLE)
+ vkDestroyFence(hwctx->act_dev, p->cmd_fence, hwctx->alloc);
+ if (p->cmd_buf != VK_NULL_HANDLE)
+ vkFreeCommandBuffers(hwctx->act_dev, p->cmd_pool, 1, &p->cmd_buf);
+ if (p->cmd_pool != VK_NULL_HANDLE)
+ vkDestroyCommandPool(hwctx->act_dev, p->cmd_pool, hwctx->alloc);
+}
+
+static void vulkan_device_free(AVHWDeviceContext *ctx)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+
+ free_exec_ctx(ctx);
+
+ vkDestroyDevice(hwctx->act_dev, hwctx->alloc);
+
+ if (p && p->debug_ctx != VK_NULL_HANDLE) {
+ VK_LOAD_PFN(hwctx->inst, vkDestroyDebugUtilsMessengerEXT);
+ pfn_vkDestroyDebugUtilsMessengerEXT(hwctx->inst, p->debug_ctx,
+ hwctx->alloc);
+ }
+
+ vkDestroyInstance(hwctx->inst, hwctx->alloc);
+}
+
+static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
+ VulkanDeviceSelection *dev_select,
+ AVDictionary *opts, int flags)
+{
+ int err = 0;
+ VkResult ret;
+ AVDictionaryEntry *use_linear_images_opt;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkDeviceQueueCreateInfo queue_create_info[3] = {
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .pQueuePriorities = (float []){ 1.0f },
+ .queueCount = 1, },
+ };
+
+ VkDeviceCreateInfo dev_info = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .pQueueCreateInfos = queue_create_info,
+ .queueCreateInfoCount = 0,
+ };
+
+ VulkanDevicePriv *p = av_mallocz(sizeof(*p));
+ if (!p) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ ctx->internal->priv = p;
+ ctx->free = vulkan_device_free;
+
+ /* Create an instance if not given one */
+ if (!hwctx->inst && (err = create_instance(ctx, opts)))
+ goto fail;
+
+ /* Find a device (if not given one) */
+ if (!hwctx->phys_dev && (err = find_device(ctx, dev_select)))
+ goto fail;
+
+ vkGetPhysicalDeviceProperties(hwctx->phys_dev, &p->props);
+ av_log(ctx, AV_LOG_VERBOSE, "Using device: %s\n", p->props.deviceName);
+ av_log(ctx, AV_LOG_VERBOSE, "Alignments:\n");
+ av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyOffsetAlignment: %li\n",
+ p->props.limits.optimalBufferCopyOffsetAlignment);
+ av_log(ctx, AV_LOG_VERBOSE, " optimalBufferCopyRowPitchAlignment: %li\n",
+ p->props.limits.optimalBufferCopyRowPitchAlignment);
+ av_log(ctx, AV_LOG_VERBOSE, " minMemoryMapAlignment: %li\n",
+ p->props.limits.minMemoryMapAlignment);
+
+ /* Search queue family */
+ if ((err = search_queue_families(ctx, &dev_info)))
+ goto fail;
+
+ if (!hwctx->act_dev) {
+ err = check_extensions(ctx, 1, &dev_info.ppEnabledExtensionNames,
+ &dev_info.enabledExtensionCount, 0);
+ if (err)
+ goto fail;
+
+ ret = vkCreateDevice(hwctx->phys_dev, &dev_info,
+ hwctx->alloc, &hwctx->act_dev);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Device creation failure: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ av_free((void *)dev_info.ppEnabledExtensionNames);
+ }
+
+ /* Tiled images setting, use them by default */
+ use_linear_images_opt = av_dict_get(opts, "linear_images", NULL, 0);
+ if (use_linear_images_opt)
+ p->use_linear_images = strtol(use_linear_images_opt->value, NULL, 10);
+
+ return 0;
+
+fail:
+ av_freep(&ctx->internal->priv);
+ return err;
+}
+
+static int vulkan_device_init(AVHWDeviceContext *ctx)
+{
+ int err;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ /* Create exec context - if there's something invalid this will error out */
+ err = create_exec_ctx(ctx);
+ if (err)
+ return err;
+
+ /* Get device capabilities */
+ vkGetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
+
+ return 0;
+}
+
+static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,
+ AVDictionary *opts, int flags)
+{
+ VulkanDeviceSelection dev_select = { 0 };
+ if (device && device[0]) {
+ if (av_isdigit(device[0]))
+ dev_select.index = strtol(device, NULL, 10);
+ else
+ dev_select.name = device;
+ }
+
+ return vulkan_device_create_internal(ctx, &dev_select, opts, flags);
+}
+
+static int vulkan_device_derive(AVHWDeviceContext *ctx,
+ AVHWDeviceContext *src_ctx, int flags)
+{
+ VulkanDeviceSelection dev_select = { 0 };
+
+ switch(src_ctx->type) {
+#if CONFIG_LIBDRM
+#if CONFIG_VAAPI
+ case AV_HWDEVICE_TYPE_VAAPI: {
+ AVVAAPIDeviceContext *src_hwctx = src_ctx->hwctx;
+ const char *vendor = vaQueryVendorString(src_hwctx->display);
+ if (!vendor) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get device info from vaapi!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ if (strstr(vendor, "Intel"))
+ dev_select.vendor_id = 0x8086;
+ if (strstr(vendor, "AMD"))
+ dev_select.vendor_id = 0x1002;
+
+ return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
+ }
+#endif
+ case AV_HWDEVICE_TYPE_DRM: {
+ AVDRMDeviceContext *src_hwctx = src_ctx->hwctx;
+
+ drmDevice *drm_dev_info;
+ int err = drmGetDevice(src_hwctx->fd, &drm_dev_info);
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to get device info from drm fd!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ dev_select.pci_device = drm_dev_info->deviceinfo.pci->device_id;
+
+ return vulkan_device_create_internal(ctx, &dev_select, NULL, flags);
+ }
+#endif
+ default:
+ return AVERROR(ENOSYS);
+ }
+}
+
+static int vulkan_frames_get_constraints(AVHWDeviceContext *ctx,
+ const void *hwconfig,
+ AVHWFramesConstraints *constraints)
+{
+ int count = 0;
+ enum AVPixelFormat i;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ for (i = 0; i < AV_PIX_FMT_NB; i++)
+ count += vkfmt_is_supported(hwctx, i, p->use_linear_images);
+
+ constraints->valid_sw_formats = av_malloc_array(count + 1,
+ sizeof(enum AVPixelFormat));
+ if (!constraints->valid_sw_formats)
+ return AVERROR(ENOMEM);
+
+ count = 0;
+ for (i = 0; i < AV_PIX_FMT_NB; i++)
+ if (vkfmt_is_supported(hwctx, i, p->use_linear_images))
+ constraints->valid_sw_formats[count++] = i;
+ constraints->valid_sw_formats[count++] = AV_PIX_FMT_NONE;
+
+ constraints->min_width = 0;
+ constraints->min_height = 0;
+ constraints->max_width = p->props.limits.maxImageDimension2D;
+ constraints->max_height = p->props.limits.maxImageDimension2D;
+
+ constraints->valid_hw_formats = av_malloc_array(2, sizeof(enum AVPixelFormat));
+ if (!constraints->valid_hw_formats)
+ return AVERROR(ENOMEM);
+
+ constraints->valid_hw_formats[0] = AV_PIX_FMT_VULKAN;
+ constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
+
+ return 0;
+}
+
+static int alloc_mem(AVHWDeviceContext *ctx, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+{
+ VkResult ret;
+ int index = -1;
+ VulkanDevicePriv *p = ctx->internal->priv;
+ AVVulkanDeviceContext *dev_hwctx = ctx->hwctx;
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = alloc_extension,
+ };
+
+ /* Align if we need to */
+ if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ req->size = FFALIGN(req->size, p->props.limits.minMemoryMapAlignment);
+
+ alloc_info.allocationSize = req->size;
+
+ /* The vulkan spec requires memory types to be sorted in the "optimal"
+ * order, so the first matching type we find will be the best/fastest one */
+ for (int i = 0; i < p->mprops.memoryTypeCount; i++) {
+ /* The memory type must be supported by the requirements (bitfield) */
+ if (!(req->memoryTypeBits & (1 << i)))
+ continue;
+
+ /* The memory type flags must include our properties */
+ if ((p->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ continue;
+
+ /* Found a suitable memory type */
+ index = i;
+ break;
+ }
+
+ if (index < 0) {
+ av_log(ctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ req_flags);
+ return AVERROR(EINVAL);
+ }
+
+ alloc_info.memoryTypeIndex = index;
+
+ ret = vkAllocateMemory(dev_hwctx->act_dev, &alloc_info,
+ dev_hwctx->alloc, mem);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR(ENOMEM);
+ }
+
+ *mem_flags |= p->mprops.memoryTypes[index].propertyFlags;
+
+ return 0;
+}
+
+static void vulkan_frame_free(void *opaque, uint8_t *data)
+{
+ int i;
+ AVVkFrame *f = (AVVkFrame *)data;
+ AVVulkanDeviceContext *hwctx = opaque;
+
+ if (!f)
+ return;
+
+ vkDestroyImage(hwctx->act_dev, f->img, hwctx->alloc);
+ for (i = 0; i < f->mem_count; i++)
+ vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+
+ av_free(f);
+}
+
+static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
+ VkImageTiling tiling, VkImageUsageFlagBits usage,
+ int disjoint, void *create_pnext, void *alloc_pnext,
+ size_t alloc_pnext_stride)
+{
+ int i, err;
+ VkResult ret;
+ AVHWDeviceContext *ctx = hwfc->device_ctx;
+ enum AVPixelFormat format = hwfc->sw_format;
+ VkFormat img_fmt = av_vkfmt_from_pixfmt(format);
+ const int planes = av_pix_fmt_count_planes(format);
+
+ /* Allocated */
+ AVVkFrame *f = NULL;
+
+ /* Contexts */
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *p = ctx->internal->priv;
+
+ /* For some reason some extensions need function loading */
+ VK_LOAD_PFN(hwctx->inst, vkBindImageMemory2KHR);
+ VK_LOAD_PFN(hwctx->inst, vkGetImageMemoryRequirements2KHR);
+
+ /* Image properties */
+ VkFormat possible_fmts[2];
+ VkImageFormatListCreateInfoKHR img_fmt_list = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR,
+ .pNext = create_pnext,
+ .pViewFormats = possible_fmts,
+ .viewFormatCount = 1,
+ };
+ VkBindImageMemoryInfo bind_info[AV_NUM_DATA_POINTERS] = { { 0 } };
+ VkBindImagePlaneMemoryInfo bind_p_info[AV_NUM_DATA_POINTERS] = { { 0 } };
+ VkImageCreateInfo image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .pNext = create_pnext,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .format = img_fmt,
+ .extent.width = hwfc->width,
+ .extent.height = hwfc->height,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT |
+ (disjoint ? VK_IMAGE_CREATE_DISJOINT_BIT : 0),
+ .tiling = tiling,
+ .initialLayout = tiling == VK_IMAGE_TILING_LINEAR ?
+ VK_IMAGE_LAYOUT_PREINITIALIZED :
+ VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ };
+
+ if (img_fmt == VK_FORMAT_UNDEFINED) {
+ av_log(ctx, AV_LOG_ERROR, "Unsupported image format!\n");
+ return AVERROR(EINVAL);
+ }
+
+ f = av_mallocz(sizeof(*f));
+ if (!f) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to allocate memory for AVVkFrame!\n");
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ /* Needed */
+ f->flags = 0;
+ f->mem_count = disjoint ? planes : 1;
+ f->tiling = image_create_info.tiling;
+ f->layout = image_create_info.initialLayout;
+ f->access = 0;
+
+ possible_fmts[0] = image_create_info.format;
+ /* Mark the formats that a VkImageView can be made of if supported */
+ if ((planes > 1) && (p->extensions & EXT_IMAGE_FORMAT_LIST)) {
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(format);
+ switch (desc->comp[0].depth) {
+ case 8: possible_fmts[1] = VK_FORMAT_R8_UNORM; break;
+ case 10: possible_fmts[1] = VK_FORMAT_R10X6_UNORM_PACK16; break;
+ case 12: possible_fmts[1] = VK_FORMAT_R12X4_UNORM_PACK16; break;
+ case 16: possible_fmts[1] = VK_FORMAT_R16_UNORM; break;
+ }
+ img_fmt_list.viewFormatCount++;
+ image_create_info.pNext = &img_fmt_list;
+ }
+
+ /* Create the image */
+ ret = vkCreateImage(hwctx->act_dev, &image_create_info,
+ hwctx->alloc, &f->img);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Image creation failure: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ for (i = 0; i < f->mem_count; i++) {
+ int use_ded_mem;
+ VkImagePlaneMemoryRequirementsInfo plane_req = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO,
+ .planeAspect = i == 0 ? VK_IMAGE_ASPECT_PLANE_0_BIT :
+ i == 1 ? VK_IMAGE_ASPECT_PLANE_1_BIT :
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ };
+ VkImageMemoryRequirementsInfo2 req_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
+ .pNext = disjoint ? &plane_req : NULL,
+ .image = f->img,
+ };
+ VkMemoryDedicatedAllocateInfo ded_alloc = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
+ .pNext = (void *)(((uint8_t *)alloc_pnext) + i*alloc_pnext_stride),
+ };
+ VkMemoryDedicatedRequirements ded_req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
+ };
+ VkMemoryRequirements2 req = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
+ .pNext = (p->extensions & EXT_DEDICATED_ALLOC) ? &ded_req : NULL,
+ };
+
+ pfn_vkGetImageMemoryRequirements2KHR(hwctx->act_dev, &req_desc, &req);
+
+ /* In case the implementation prefers/requires dedicated allocation */
+ use_ded_mem = ded_req.prefersDedicatedAllocation |
+ ded_req.requiresDedicatedAllocation;
+ if (use_ded_mem)
+ ded_alloc.image = f->img;
+
+ /* Allocate host visible memory */
+ if ((err = alloc_mem(ctx, &req.memoryRequirements,
+ tiling == VK_IMAGE_TILING_LINEAR ?
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
+ &f->flags, &f->mem[i])))
+ goto fail;
+
+ if (disjoint) {
+ bind_p_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO;
+ bind_p_info[i].planeAspect = plane_req.planeAspect;
+ bind_info[i].pNext = &bind_p_info[i];
+ }
+
+ bind_info[i].sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
+ bind_info[i].image = f->img;
+ bind_info[i].memory = f->mem[i];
+ }
+
+ /* Bind the allocated memory to the image */
+ ret = pfn_vkBindImageMemory2KHR(hwctx->act_dev, f->mem_count, bind_info);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to bind memory: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ *frame = f;
+ return 0;
+
+fail:
+ vulkan_frame_free(hwctx, (uint8_t *)f);
+ return err;
+}
+
+/* Checks if an export flag is enabled, and if it is ORs it with *iexp */
+static void check_export_flags(AVHWFramesContext *hwfc,
+ VkExternalMemoryHandleTypeFlagBits *iexp,
+ VkExternalMemoryHandleTypeFlagBits exp)
+{
+ VkResult ret;
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ AVVulkanDeviceContext *dev_hwctx = hwfc->device_ctx->hwctx;
+ VK_LOAD_PFN(dev_hwctx->inst, vkGetPhysicalDeviceImageFormatProperties2);
+ VkImageFormatProperties2 props = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+ };
+ VkPhysicalDeviceExternalImageFormatInfo enext = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
+ .handleType = exp,
+ };
+ VkPhysicalDeviceImageFormatInfo2 pinfo = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+ .pNext = &enext,
+ .format = av_vkfmt_from_pixfmt(hwfc->sw_format),
+ .type = VK_IMAGE_TYPE_2D,
+ .tiling = hwctx->tiling,
+ .usage = hwctx->usage,
+ .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
+ VK_IMAGE_CREATE_EXTENDED_USAGE_BIT |
+ (hwctx->disjoint ? VK_IMAGE_CREATE_DISJOINT_BIT : 0),
+ };
+ ret = pfn_vkGetPhysicalDeviceImageFormatProperties2(dev_hwctx->phys_dev,
+ &pinfo, &props);
+ if (ret == VK_SUCCESS)
+ *iexp |= exp;
+}
+
+static AVBufferRef *vulkan_pool_alloc(void *opaque, int size)
+{
+ int i, err;
+ AVVkFrame *f;
+ AVBufferRef *avbuf = NULL;
+ AVHWFramesContext *hwfc = opaque;
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ VkExportMemoryAllocateInfo einfo[AV_NUM_DATA_POINTERS];
+ VkExternalMemoryHandleTypeFlags e = 0x0;
+
+ check_export_flags(hwfc, &e, VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+
+ for (i = 0; i < av_pix_fmt_count_planes(hwfc->sw_format); i++) {
+ einfo[i].sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
+ einfo[i].pNext = hwctx->alloc_pnext[i];
+ einfo[i].handleTypes = e;
+ }
+
+ err = create_frame(hwfc, &f, hwctx->tiling, hwctx->usage,
+ hwctx->disjoint, hwctx->create_pnext,
+ einfo, sizeof(*einfo));
+ if (err)
+ return NULL;
+
+ avbuf = av_buffer_create((uint8_t *)f, sizeof(AVVkFrame),
+ vulkan_frame_free, hwfc->device_ctx->hwctx, 0);
+ if (!avbuf) {
+ vulkan_frame_free(hwfc, (uint8_t *)f);
+ return NULL;
+ }
+
+ return avbuf;
+}
+
+static int vulkan_frames_init(AVHWFramesContext *hwfc)
+{
+ AVVulkanFramesContext *hwctx = hwfc->hwctx;
+ VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+
+ if (hwfc->pool)
+ return 0;
+
+ /* Default pool flags */
+ hwctx->tiling = hwctx->tiling ? hwctx->tiling : p->use_linear_images ?
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
+ hwctx->usage |= DEFAULT_USAGE_FLAGS;
+
+ hwfc->internal->pool_internal = av_buffer_pool_init2(sizeof(AVVkFrame),
+ hwfc, vulkan_pool_alloc,
+ NULL);
+ if (!hwfc->internal->pool_internal)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+static int vulkan_get_buffer(AVHWFramesContext *hwfc, AVFrame *frame)
+{
+ frame->buf[0] = av_buffer_pool_get(hwfc->pool);
+ if (!frame->buf[0])
+ return AVERROR(ENOMEM);
+
+ frame->data[0] = frame->buf[0]->data;
+ frame->format = AV_PIX_FMT_VULKAN;
+ frame->width = hwfc->width;
+ frame->height = hwfc->height;
+
+ return 0;
+}
+
+static int vulkan_transfer_get_formats(AVHWFramesContext *hwfc,
+ enum AVHWFrameTransferDirection dir,
+ enum AVPixelFormat **formats)
+{
+ int count = 0;
+ enum AVPixelFormat *pix_fmts = NULL;
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
+
+ /* All formats can be transferred to themselves */
+ count++;
+
+ /* All formats with a luma can have only that channel transferred */
+ count += !(desc->flags & AV_PIX_FMT_FLAG_RGB);
+
+ pix_fmts = av_malloc((count + 1) * sizeof(*pix_fmts));
+ if (!pix_fmts)
+ return AVERROR(ENOMEM);
+
+ count = 0;
+ pix_fmts[count++] = hwfc->sw_format;
+ if (!(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+ switch (desc->comp[0].depth) {
+ case 8: pix_fmts[count++] = AV_PIX_FMT_GRAY8; break;
+ case 10: pix_fmts[count++] = AV_PIX_FMT_GRAY10; break;
+ case 12: pix_fmts[count++] = AV_PIX_FMT_GRAY12; break;
+ case 16: pix_fmts[count++] = AV_PIX_FMT_GRAY16; break;
+ }
+ }
+ pix_fmts[count++] = AV_PIX_FMT_NONE;
+
+ *formats = pix_fmts;
+
+ return 0;
+}
+
+typedef struct VulkanMapping {
+ AVVkFrame *frame;
+ int flags;
+} VulkanMapping;
+
+static void vulkan_unmap_frame(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
+{
+ int i;
+ VulkanMapping *map = hwmap->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+
+ /* Check if buffer needs flushing */
+ if ((map->flags & AV_HWFRAME_MAP_WRITE) &&
+ !(map->frame->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkResult ret;
+ VkMappedMemoryRange flush_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
+
+ for (i = 0; i < map->frame->mem_count; i++) {
+ flush_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ flush_ranges[i].memory = map->frame->mem[i];
+ flush_ranges[i].size = VK_WHOLE_SIZE;
+ }
+
+ ret = vkFlushMappedMemoryRanges(hwctx->act_dev, map->frame->mem_count,
+ flush_ranges);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ vk_ret2str(ret));
+ }
+ }
+
+ for (i = 0; i < map->frame->mem_count; i++)
+ vkUnmapMemory(hwctx->act_dev, map->frame->mem[i]);
+
+ av_free(map);
+}
+
+static int vulkan_map_frame(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ int i, err;
+ VkResult ret;
+ AVVkFrame *f = (AVVkFrame *)src->data[0];
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+ const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
+
+ VulkanMapping *map = av_mallocz(sizeof(VulkanMapping));
+ if (!map)
+ return AVERROR(EINVAL);
+
+ if (src->format != AV_PIX_FMT_VULKAN) {
+ av_log(hwfc, AV_LOG_ERROR, "Cannot map from pixel format %s!\n",
+ av_get_pix_fmt_name(src->format));
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ if (!(f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
+ av_log(hwfc, AV_LOG_ERROR, "Unable to map frame, not host visible!\n");
+ err = AVERROR(EINVAL);
+ goto fail;
+ }
+
+ dst->width = src->width;
+ dst->height = src->height;
+
+ for (i = 0; i < f->mem_count; i++) {
+ ret = vkMapMemory(hwctx->act_dev, f->mem[i], 0,
+ VK_WHOLE_SIZE, 0, (void **)&dst->data[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to map image memory: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ }
+
+ /* For non disjoint memory duplicate them */
+ if (f->mem_count == 1)
+ for (i = 1; i < planes; i++)
+ dst->data[i] = dst->data[0];
+
+ /* Check if the memory contents matter */
+ if (((flags & AV_HWFRAME_MAP_READ) || !(flags & AV_HWFRAME_MAP_OVERWRITE)) &&
+ !(f->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange map_mem_ranges[AV_NUM_DATA_POINTERS] = { { 0 } };
+ for (i = 0; i < f->mem_count; i++) {
+ map_mem_ranges[i].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
+ map_mem_ranges[i].size = VK_WHOLE_SIZE;
+ map_mem_ranges[i].memory = f->mem[i];
+ }
+
+ ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, f->mem_count,
+ map_mem_ranges);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+ }
+
+ for (i = 0; i < planes; i++) {
+ VkImageSubresource sub = {
+ .aspectMask = planes < 2 ? VK_IMAGE_ASPECT_COLOR_BIT :
+ i == 0 ? VK_IMAGE_ASPECT_PLANE_0_BIT :
+ i == 1 ? VK_IMAGE_ASPECT_PLANE_1_BIT :
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ };
+ VkSubresourceLayout layout;
+ vkGetImageSubresourceLayout(hwctx->act_dev, f->img, &sub, &layout);
+ dst->data[i] += layout.offset;
+ dst->linesize[i] = layout.rowPitch;
+ }
+
+ map->frame = f;
+ map->flags = flags;
+
+ err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+ &vulkan_unmap_frame, map);
+ if (err < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ for (i = 0; i < f->mem_count; i++)
+ vkUnmapMemory(hwctx->act_dev, f->mem[i]);
+
+ av_free(map);
+ return err;
+}
+
+#if CONFIG_LIBDRM
+static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
+{
+ int i;
+ VulkanMapping *map = hwmap->priv;
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+
+ vkDestroyImage(hwctx->act_dev, map->frame->img, hwctx->alloc);
+ for (i = 0; i < map->frame->mem_count; i++)
+ vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
+
+ av_freep(&map->frame);
+}
+
+static int vulkan_map_from_drm(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ int i, err = 0;
+ VulkanMapping *map = NULL;
+
+ /* Source frame */
+ AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->data[0];
+
+ /* Destination frame */
+ AVVkFrame *f = NULL;
+#if HAVE_VULKAN_DRM_MOD
+ uint64_t modifier_buf[AV_NUM_DATA_POINTERS];
+ VkImageDrmFormatModifierListCreateInfoEXT drm_mod = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
+ };
+#endif
+ VkExternalMemoryImageCreateInfo ext_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+#if HAVE_VULKAN_DRM_MOD
+ .pNext = &drm_mod,
+#endif
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+ };
+ VkImportMemoryFdInfoKHR import_desc[AV_NUM_DATA_POINTERS];
+
+ if ((desc->nb_objects > 1) &&
+ (desc->nb_objects != av_pix_fmt_count_planes(hwfc->format))) {
+ av_log(hwfc, AV_LOG_ERROR, "Number of DRM objects doesn't match "
+ "plane count!\n");
+ return AVERROR(EINVAL);
+ }
+
+ for (i = 0; i < desc->nb_objects; i++) {
+ import_desc[i].sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR;
+ import_desc[i].pNext = NULL;
+ import_desc[i].handleType = ext_info.handleTypes;
+ import_desc[i].fd = desc->objects[i].fd;
+#if HAVE_VULKAN_DRM_MOD
+ modifier_buf[i] = desc->objects[i].format_modifier;
+#endif
+ }
+#if HAVE_VULKAN_DRM_MOD
+ drm_mod.pDrmFormatModifiers = modifier_buf;
+ drm_mod.drmFormatModifierCount = desc->nb_objects;
+#endif
+
+ err = create_frame(hwfc, &f,
+#if HAVE_VULKAN_DRM_MOD
+ VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
+#else
+ desc->objects[0].format_modifier == DRM_FORMAT_MOD_LINEAR ?
+ VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL,
+#endif
+ DEFAULT_USAGE_FLAGS, desc->nb_objects > 1, &ext_info,
+ import_desc, sizeof(*import_desc));
+ if (err < 0)
+ goto fail;
+
+ /* The unmapping function will free this */
+ dst->data[0] = (uint8_t *)f;
+ dst->width = src->width;
+ dst->height = src->height;
+
+ map = av_mallocz(sizeof(VulkanMapping));
+ if (!map)
+ goto fail;
+
+ map->frame = f;
+ map->flags = flags;
+
+ err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
+ &vulkan_unmap_from, map);
+ if (err < 0)
+ goto fail;
+
+ av_log(hwfc, AV_LOG_DEBUG, "Mapped DRM object to Vulkan!\n");
+
+ return 0;
+
+fail:
+ vulkan_frame_free(f, NULL);
+ av_free(map);
+ return err;
+}
+
+#if CONFIG_VAAPI
+static int vulkan_map_from_vaapi(AVHWFramesContext *dst_fc,
+ AVFrame *dst, const AVFrame *src,
+ int flags)
+{
+ int err;
+ AVFrame *tmp = av_frame_alloc();
+ if (!tmp)
+ return AVERROR(ENOMEM);
+
+ tmp->format = AV_PIX_FMT_DRM_PRIME;
+
+ err = av_hwframe_map(tmp, src, flags);
+ if (err < 0)
+ goto fail;
+
+ err = vulkan_map_from_drm(dst_fc, dst, tmp, flags);
+ if (err < 0)
+ goto fail;
+
+ err = ff_hwframe_map_replace(dst, src);
+
+fail:
+ av_frame_free(&tmp);
+ return err;
+}
+#endif
+#endif
+
+static int vulkan_map_to(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+
+ if (!(p->extensions & EXT_EXTERNAL_MEMORY)) {
+ av_log(hwfc, AV_LOG_ERROR, "Cannot import any external memory, "
+ "VK_KHR_external_memory is unsupported!\n");
+ return AVERROR(ENOSYS);
+ }
+
+ switch (src->format) {
+#if CONFIG_LIBDRM
+#if CONFIG_VAAPI
+ case AV_PIX_FMT_VAAPI:
+ if (p->extensions & EXT_DRM_MODIFIER_FLAGS)
+ return vulkan_map_from_vaapi(hwfc, dst, src, flags);
+#endif
+ case AV_PIX_FMT_DRM_PRIME:
+ if (p->extensions & EXT_DRM_MODIFIER_FLAGS)
+ return vulkan_map_from_drm(hwfc, dst, src, flags);
+#endif
+ default:
+ return AVERROR(ENOSYS);
+ }
+}
+
+static void vulkan_unmap_to_drm(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
+{
+ int i;
+ AVDRMFrameDescriptor *drm_desc = hwmap->priv;
+
+ for (i = 0; i < drm_desc->nb_objects; i++)
+ close(drm_desc->objects[i].fd);
+
+ av_freep(&drm_desc);
+}
+
+#if CONFIG_LIBDRM
+const uint32_t drm_format_map[AV_PIX_FMT_NB] = {
+ [AV_PIX_FMT_NV12] = DRM_FORMAT_NV12,
+ [AV_PIX_FMT_YUV420P] = DRM_FORMAT_YVU420,
+};
+
+static int vulkan_map_to_drm(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ VkResult ret;
+ int i, j, err = 0;
+ AVVkFrame *f = (AVVkFrame *)src->data[0];
+ AVVulkanDeviceContext *hwctx = hwfc->device_ctx->hwctx;
+ VK_LOAD_PFN(hwctx->inst, vkGetMemoryFdKHR);
+#if HAVE_VULKAN_DRM_MOD
+ VkImageDrmFormatModifierPropertiesEXT drm_mod = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_PROPERTIES_EXT,
+ };
+#endif
+
+ AVDRMFrameDescriptor *drm_desc = av_mallocz(sizeof(*drm_desc));
+ if (!drm_desc)
+ return AVERROR(ENOMEM);
+
+#if HAVE_VULKAN_DRM_MOD
+ ret = vkGetImageDrmFormatModifierPropertiesEXT(hwctx->act_dev, f->img,
+ &drm_mod);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Failed to retrieve DRM format modifier!\n");
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+#endif
+
+ drm_desc->nb_objects = f->mem_count;
+ for (i = 0; i < drm_desc->nb_objects; i++) {
+ VkMemoryGetFdInfoKHR export_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
+ .memory = f->mem[i],
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+ };
+
+ ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
+ &drm_desc->objects[i].fd);
+ if (ret != VK_SUCCESS) {
+ av_log(hwfc, AV_LOG_ERROR, "Unable to export the image as a FD!\n");
+ err = AVERROR_EXTERNAL;
+ goto fail;
+ }
+
+ drm_desc->objects[i].size = lseek(drm_desc->objects[i].fd, 0, SEEK_END);
+#if HAVE_VULKAN_DRM_MOD
+ drm_desc->objects[i].format_modifier = drm_mod.drmFormatModifier;
+#endif
+ }
+
+ drm_desc->nb_layers = 1;
+ for (i = 0; i < drm_desc->nb_layers; i++) {
+ drm_desc->layers[i].format = drm_format_map[hwfc->sw_format];
+ drm_desc->layers[i].nb_planes = av_pix_fmt_count_planes(hwfc->sw_format);
+
+ if (!drm_desc->layers[i].format)
+ goto fail;
+
+ for (j = 0; j < drm_desc->layers[i].nb_planes; j++) {
+ const int disjoint = drm_desc->nb_objects;
+ const int nb_planes = drm_desc->layers[i].nb_planes;
+ VkImageSubresource sub = {
+ .aspectMask = nb_planes < 2 ? VK_IMAGE_ASPECT_COLOR_BIT :
+ i == 0 ? VK_IMAGE_ASPECT_PLANE_0_BIT :
+ i == 1 ? VK_IMAGE_ASPECT_PLANE_1_BIT :
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ };
+ VkSubresourceLayout layout;
+ vkGetImageSubresourceLayout(hwctx->act_dev, f->img, &sub, &layout);
+ drm_desc->layers[i].planes[j].object_index = disjoint ? j : 0;
+ drm_desc->layers[i].planes[j].offset = layout.offset;
+ drm_desc->layers[i].planes[j].pitch = layout.rowPitch;
+ }
+ }
+
+ err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+ &vulkan_unmap_to_drm, drm_desc);
+ if (err < 0)
+ goto fail;
+
+ dst->width = src->width;
+ dst->height = src->height;
+ dst->data[0] = (uint8_t*)drm_desc;
+
+ av_log(hwfc, AV_LOG_DEBUG, "Mapped AVVkFrame to a DRM object!\n");
+
+ return 0;
+
+fail:
+ for (i = 0; i < drm_desc->nb_objects; i++)
+ close(drm_desc->objects[i].fd);
+ av_freep(&drm_desc);
+ return err;
+}
+#endif
+
+static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src, int flags)
+{
+ VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+
+ switch (dst->format) {
+#if CONFIG_LIBDRM
+ case AV_PIX_FMT_DRM_PRIME:
+ if (p->extensions & EXT_DRM_MODIFIER_FLAGS)
+ return vulkan_map_to_drm(hwfc, dst, src, flags);
+#endif
+ default:
+ av_log(hwfc, AV_LOG_ERROR, "Unsupported destination pixel format %s!\n",
+ av_get_pix_fmt_name(dst->format));
+ return AVERROR(EINVAL);
+ }
+
+ return 0;
+}
+
+typedef struct ImageBuffer {
+ VkBuffer buf;
+ VkDeviceMemory mem;
+ VkMemoryPropertyFlagBits flags;
+} ImageBuffer;
+
+static int create_buf(AVHWDeviceContext *ctx, ImageBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags,
+ void *create_pnext, void *alloc_pnext)
+{
+ int err;
+ VkResult ret;
+ VkMemoryRequirements req;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = create_pnext,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host visible
+ but should be ok */
+ };
+
+ ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ vkGetBufferMemoryRequirements(hwctx->act_dev, buf->buf, &req);
+
+ err = alloc_mem(ctx, &req, flags, alloc_pnext, &buf->flags, &buf->mem);
+ if (err)
+ return err;
+
+ ret = vkBindBufferMemory(hwctx->act_dev, buf->buf, buf->mem, 0);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static void free_buf(AVHWDeviceContext *ctx, ImageBuffer *buf)
+{
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ if (!buf)
+ return;
+
+ vkDestroyBuffer(hwctx->act_dev, buf->buf, hwctx->alloc);
+ vkFreeMemory(hwctx->act_dev, buf->mem, hwctx->alloc);
+}
+
+static int map_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate)
+{
+ int i;
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkMappedMemoryRange invalidate_ctx[AV_NUM_DATA_POINTERS];
+ int invalidate_count = 0;
+
+ for (i = 0; i < nb_buffers; i++) {
+ ret = vkMapMemory(hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ if (!invalidate)
+ return 0;
+
+ for (i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ invalidate_ctx[invalidate_count++] = ival_buf;
+ }
+
+ if (invalidate_count) {
+ ret = vkInvalidateMappedMemoryRanges(hwctx->act_dev, invalidate_count,
+ invalidate_ctx);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ return 0;
+}
+
+static int unmap_buffers(AVHWDeviceContext *ctx, ImageBuffer *buf,
+ int nb_buffers, int flush)
+{
+ VkResult ret;
+ int i, err = 0;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VkMappedMemoryRange flush_ctx[AV_NUM_DATA_POINTERS];
+ int flush_count = 0;
+
+ if (flush) {
+ for (i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ flush_ctx[flush_count++] = flush_buf;
+ }
+ }
+
+ if (flush_count) {
+ ret = vkFlushMappedMemoryRanges(hwctx->act_dev, flush_count, flush_ctx);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ vk_ret2str(ret));
+ err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ }
+ }
+
+ for (i = 0; i < nb_buffers; i++)
+ vkUnmapMemory(hwctx->act_dev, buf[i].mem);
+
+ return err;
+}
+
+static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
+ ImageBuffer *buffer, const int *stride, int w,
+ int h, enum AVPixelFormat pix_fmt, int to_buf)
+{
+ int i;
+ VkResult ret;
+ AVVulkanDeviceContext *hwctx = ctx->hwctx;
+ VulkanDevicePriv *s = ctx->internal->priv;
+
+ const int planes = av_pix_fmt_count_planes(pix_fmt);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->cmd_buf,
+ };
+
+ vkBeginCommandBuffer(s->cmd_buf, &cmd_start);
+
+ { /* Change the image layout to something more optimal for transfers */
+ VkImageMemoryBarrier bar = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = to_buf ? VK_ACCESS_TRANSFER_READ_BIT :
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ .oldLayout = frame->layout,
+ .newLayout = to_buf ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL :
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = frame->img,
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ };
+
+ if (planes == 1) {
+ bar.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ } else {
+ bar.subresourceRange.aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT;
+ bar.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_PLANE_1_BIT;
+ if (planes > 2)
+ bar.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_PLANE_2_BIT;
+ }
+
+ vkCmdPipelineBarrier(s->cmd_buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_TRANSFER_BIT,
+ 0, 0, NULL, 0, NULL, 1, &bar);
+
+ /* Update to the new layout */
+ frame->layout = bar.newLayout;
+ frame->access = bar.dstAccessMask;
+ }
+
+ /* Schedule a copy for each plane */
+ for (i = 0; i < planes; i++) {
+ VkImageSubresourceLayers sub = {
+ .aspectMask = planes < 2 ? VK_IMAGE_ASPECT_COLOR_BIT :
+ i == 0 ? VK_IMAGE_ASPECT_PLANE_0_BIT :
+ i == 1 ? VK_IMAGE_ASPECT_PLANE_1_BIT :
+ VK_IMAGE_ASPECT_PLANE_2_BIT,
+ .layerCount = 1,
+ };
+ const int p_w = i > 0 ? AV_CEIL_RSHIFT(w, desc->log2_chroma_w) : w;
+ const int p_h = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ VkBufferImageCopy buf_reg = {
+ .bufferOffset = 0,
+ /* Buffer stride isn't in bytes, it's in samples, the implementation
+ * uses the image's VkFormat to know how many bytes per sample
+ * the buffer has. So we have to convert by dividing. Stupid. */
+ .bufferRowLength = stride[i] / desc->comp[i].step,
+ .bufferImageHeight = p_h,
+ .imageSubresource = sub,
+ .imageOffset = { 0 },
+ .imageExtent = { p_w, p_h, 1, },
+ };
+ if (to_buf)
+ vkCmdCopyImageToBuffer(s->cmd_buf, frame->img, frame->layout,
+ buffer[i].buf, 1, &buf_reg);
+ else
+ vkCmdCopyBufferToImage(s->cmd_buf, buffer[i].buf, frame->img,
+ frame->layout, 1, &buf_reg);
+ }
+
+ vkEndCommandBuffer(s->cmd_buf);
+
+ ret = vkQueueSubmit(s->cmd_queue, 1, &s_info, s->cmd_fence);
+ if (ret != VK_SUCCESS) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(hwctx->act_dev, 1, &s->cmd_fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(hwctx->act_dev, 1, &s->cmd_fence);
+ }
+
+ return 0;
+}
+
+static int vulkan_transfer_data_to(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src)
+{
+ int i, err = 0;
+ AVFrame *map = NULL;
+ ImageBuffer buf[3] = { { 0 } };
+ AVVkFrame *f = (AVVkFrame *)dst->data[0];
+ AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
+ VulkanDevicePriv *p = dev_ctx->internal->priv;
+ const int planes = av_pix_fmt_count_planes(src->format);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(src->format);
+ int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY;
+
+ if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
+ av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
+ return AVERROR(EINVAL);
+ }
+
+ if (src->width > hwfc->width || src->height > hwfc->height)
+ return AVERROR(EINVAL);
+
+ /* Path one - image is host visible and linear */
+ if (f->tiling & VK_IMAGE_TILING_LINEAR &&
+ f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+ map = av_frame_alloc();
+ if (!map)
+ return AVERROR(ENOMEM);
+ map->format = src->format;
+
+ err = vulkan_map_frame(hwfc, map, dst, AV_HWFRAME_MAP_WRITE);
+ if (err)
+ goto end;
+
+ err = av_frame_copy(map, src);
+ goto end;
+ }
+
+ /* Path three - we can import _host_ memory and bind it to a buffer */
+ for (i = 0; i < planes; i++) {
+ int h = src->height;
+ int p_height = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ size_t size = p_height*src->linesize[i];
+ VkImportMemoryHostPointerInfoEXT import_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ .pHostPointer = src->data[i],
+ };
+ err = create_buf(dev_ctx, &buf[i], size,
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL,
+ map_host ? &import_desc : NULL);
+ if (err)
+ goto end;
+ }
+
+ /* Path two - we can't import host memory so we have to do 2 copies */
+ if (!map_host) {
+ uint8_t *mem[3];
+ if ((err = map_buffers(dev_ctx, buf, mem, planes, 0)))
+ goto end;
+
+ for (i = 0; i < planes; i++) {
+ int h = src->height;
+ int p_height = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ memcpy(mem[i], src->data[i], p_height*src->linesize[i]);
+ }
+
+ if ((err = unmap_buffers(dev_ctx, buf, planes, 1)))
+ goto end;
+ }
+
+ /* Copy buffer to image */
+ transfer_image_buf(dev_ctx, f, buf, src->linesize,
+ src->width, src->height, src->format, 0);
+
+end:
+ av_frame_free(&map);
+ for (i = 0; i < planes; i++)
+ free_buf(dev_ctx, &buf[i]);
+
+ return err;
+}
+
+static int vulkan_transfer_data_from(AVHWFramesContext *hwfc, AVFrame *dst,
+ const AVFrame *src)
+{
+ int i, err = 0;
+ AVFrame *map = NULL;
+ ImageBuffer buf[3] = { { 0 } };
+ AVVkFrame *f = (AVVkFrame *)src->data[0];
+ AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
+ VulkanDevicePriv *p = dev_ctx->internal->priv;
+ const int planes = av_pix_fmt_count_planes(dst->format);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(dst->format);
+ const int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY;
+
+ if (dst->width > hwfc->width || dst->height > hwfc->height)
+ return AVERROR(EINVAL);
+
+ /* Path one - image is host visible and linear */
+ if (f->tiling & VK_IMAGE_TILING_LINEAR &&
+ f->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
+ map = av_frame_alloc();
+ if (!map)
+ return AVERROR(ENOMEM);
+ map->format = dst->format;
+
+ err = vulkan_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
+ if (err)
+ goto end;
+
+ err = av_frame_copy(dst, map);
+ goto end;
+ }
+
+ /* Path two */
+ for (i = 0; i < planes; i++) {
+ VkImportMemoryHostPointerInfoEXT import_desc = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+ .pHostPointer = dst->data[i],
+ };
+ int h = dst->height;
+ int p_height = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ err = create_buf(dev_ctx, &buf[i], p_height * dst->linesize[i],
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL,
+ map_host ? &import_desc : NULL);
+ }
+
+ /* Copy image to buffer */
+ transfer_image_buf(dev_ctx, f, buf, dst->linesize,
+ dst->width, dst->height, dst->format, 1);
+
+ if (!map_host) {
+ uint8_t *mem[3];
+ map_buffers(dev_ctx, buf, mem, planes, 1);
+
+ for (i = 0; i < planes; i++) {
+ int h = dst->height;
+ int p_height = i > 0 ? AV_CEIL_RSHIFT(h, desc->log2_chroma_h) : h;
+ memcpy(dst->data[i], mem[i], p_height * dst->linesize[i]);
+ }
+
+ unmap_buffers(dev_ctx, buf, planes, 0);
+ }
+
+end:
+ av_frame_free(&map);
+ for (i = 0; i < planes; i++)
+ free_buf(dev_ctx, &buf[i]);
+
+ return err;
+}
+
+const HWContextType ff_hwcontext_type_vulkan = {
+ .type = AV_HWDEVICE_TYPE_VULKAN,
+ .name = "Vulkan",
+
+ .device_hwctx_size = sizeof(AVVulkanDeviceContext),
+ .device_priv_size = sizeof(VulkanDevicePriv),
+ .frames_hwctx_size = sizeof(AVVulkanFramesContext),
+
+ .device_init = &vulkan_device_init,
+ .device_create = &vulkan_device_create,
+ .device_derive = &vulkan_device_derive,
+
+ .frames_get_constraints = &vulkan_frames_get_constraints,
+ .frames_init = vulkan_frames_init,
+ .frames_get_buffer = vulkan_get_buffer,
+
+ .transfer_get_formats = vulkan_transfer_get_formats,
+ .transfer_data_to = vulkan_transfer_data_to,
+ .transfer_data_from = vulkan_transfer_data_from,
+
+ .map_to = vulkan_map_to,
+ .map_from = vulkan_map_from,
+
+ .pix_fmts = (const enum AVPixelFormat[]) {
+ AV_PIX_FMT_VULKAN,
+ AV_PIX_FMT_NONE
+ },
+};
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
new file mode 100644
index 0000000000..342c833a23
--- /dev/null
+++ b/libavutil/hwcontext_vulkan.h
@@ -0,0 +1,133 @@
+/*
+ * Vulkan hwcontext
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_HWCONTEXT_VULKAN_H
+#define AVUTIL_HWCONTEXT_VULKAN_H
+
+#include <vulkan/vulkan.h>
+
+/**
+ * @file
+ * API-specific header for AV_HWDEVICE_TYPE_VULKAN.
+ *
+ * For user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
+ * with the data pointer set to an AVVkFrame.
+ */
+
+/**
+ * Main Vulkan context, allocated as AVHWDeviceContext.hwctx.
+ * All of these can be set before init to change what the context uses
+ */
+typedef struct AVVulkanDeviceContext {
+ /**
+ * Custom memory allocator, else NULL
+ */
+ const VkAllocationCallbacks *alloc;
+ /**
+ * Instance
+ */
+ VkInstance inst;
+ /**
+ * Physical device
+ */
+ VkPhysicalDevice phys_dev;
+ /**
+ * Activated physical device
+ */
+ VkDevice act_dev;
+ /**
+ * Queue family index for graphics
+ */
+ int queue_family_index;
+ /**
+ * Queue family index for transfer ops only. By default, the priority order
+ * is dedicated transfer > dedicated compute > graphics.
+ */
+ int queue_family_tx_index;
+ /**
+ * Queue family index for compute ops. Will be equal to the graphics
+ * one unless a dedicated transfer queue is found.
+ */
+ int queue_family_comp_index;
+} AVVulkanDeviceContext;
+
+/**
+ * Allocated as AVHWFramesContext.hwctx, used to set pool-specific options
+ */
+typedef struct AVVulkanFramesContext {
+ /**
+ * Controls the tiling of output frames.
+ */
+ VkImageTiling tiling;
+ /**
+ * Defines extra usage of output frames. This is bitwise OR'd with the
+ * standard usage flags (SAMPLED, STORAGE, TRANSFER_SRC and TRANSFER_DST).
+ */
+ VkImageUsageFlagBits usage;
+ /**
+ * Set to 1 to allocate all planes separately (disjoint images)
+ */
+ int disjoint;
+ /**
+ * Extension data for image creation. By default, if the extension is
+ * available, this will be chained to a VkImageFormatListCreateInfoKHR.
+ */
+ void *create_pnext;
+ /**
+ * Extension data for memory allocation. If the image is disjoint, this
+ * must be one per plane, otherwise just the first entry is used.
+ * This will be chained to VkExportMemoryAllocateInfo, which is used
+ * to make all pool images exportable to other APIs.
+ */
+ void *alloc_pnext[AV_NUM_DATA_POINTERS];
+} AVVulkanFramesContext;
+
+/*
+ * Frame structure, the VkFormat of the image will always match
+ * the pool's sw_format.
+ */
+typedef struct AVVkFrame {
+ VkImage img;
+ VkImageTiling tiling;
+ /**
+ * Always 1 for non-disjoint images, #planes for disjoint
+ */
+ int mem_count;
+ VkDeviceMemory mem[AV_NUM_DATA_POINTERS];
+ /**
+ * OR'd flags for all memory allocated
+ */
+ VkMemoryPropertyFlagBits flags;
+
+ /**
+ * Updated after every barrier
+ */
+ VkAccessFlagBits access;
+ VkImageLayout layout;
+} AVVkFrame;
+
+/**
+ * Converts AVPixelFormat to VkFormat, returns VK_FORMAT_UNDEFINED if unsupported
+ * by the hwcontext
+ */
+VkFormat av_vkfmt_from_pixfmt(enum AVPixelFormat p);
+
+#endif /* AVUTIL_HWCONTEXT_VULKAN_H */
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
index 8ed52751c1..e739e05309 100644
--- a/libavutil/pixdesc.c
+++ b/libavutil/pixdesc.c
@@ -1652,6 +1652,10 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
.name = "videotoolbox_vld",
.flags = AV_PIX_FMT_FLAG_HWACCEL,
},
+ [AV_PIX_FMT_VULKAN] = {
+ .name = "vulkan",
+ .flags = AV_PIX_FMT_FLAG_HWACCEL,
+ },
[AV_PIX_FMT_GBRP] = {
.name = "gbrp",
.nb_components = 3,
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
index e184a56672..a149aa67d9 100644
--- a/libavutil/pixfmt.h
+++ b/libavutil/pixfmt.h
@@ -330,6 +330,10 @@ enum AVPixelFormat {
*/
AV_PIX_FMT_OPENCL,

+ /* Vulkan hardware images,
+ * data[0] contain an AVVkFrame */
+ AV_PIX_FMT_VULKAN,
+
AV_PIX_FMT_NB ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
};

diff --git a/libavutil/version.h b/libavutil/version.h
index 387421775f..23567000a3 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
*/

#define LIBAVUTIL_VERSION_MAJOR 56
-#define LIBAVUTIL_VERSION_MINOR 15
+#define LIBAVUTIL_VERSION_MINOR 16
#define LIBAVUTIL_VERSION_MICRO 100

#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
--
2.17.0
Rostislav Pehlivanov
2018-04-20 04:30:06 UTC
Permalink
This commit adds a common code for use in Vulkan filters. It attempts
to ease the burden of writing Vulkan image filtering to a minimum,
which is pretty much a requirement considering how verbose the API is.

It supports both compute and graphic pipelines and manages to abstract
the API to such a level there's no need to call any Vulkan functions
inside the init path of the code. Handling shader descriptors is probably
the bulk of the code, and despite the abstraction, it loses none of the
features for describing shader IO.

In order to produce linkable shaders, it depends on the libshaderc
library (and depends on the latest stable version of it). This allows
for greater performance and flexibility than static built-in shaders
and also eliminates the cumbersome process of interfacing with glslang
to compile GLSL to SPIR-V.

It's based off of the common opencl and provides similar interfaces for
filter pad init and config, with the addition that it also supports
in-place filtering.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 11 +-
libavfilter/vulkan.c | 1450 ++++++++++++++++++++++++++++++++++++++++++
libavfilter/vulkan.h | 234 +++++++
3 files changed, 1694 insertions(+), 1 deletion(-)
create mode 100644 libavfilter/vulkan.c
create mode 100644 libavfilter/vulkan.h

diff --git a/configure b/configure
index cd88f7eae1..20fae191ea 100755
--- a/configure
+++ b/configure
@@ -252,6 +252,7 @@ External library support:
--enable-librsvg enable SVG rasterization via librsvg [no]
--enable-librubberband enable rubberband needed for rubberband filter [no]
--enable-librtmp enable RTMP[E] support via librtmp [no]
+ --enable-libshaderc enable GLSL->SPIRV compilation via libshaderc [no]
--enable-libshine enable fixed-point MP3 encoding via libshine [no]
--enable-libsmbclient enable Samba protocol via libsmbclient [no]
--enable-libsnappy enable Snappy compression, needed for hap encoding [no]
@@ -1702,6 +1703,7 @@ EXTERNAL_LIBRARY_LIST="
libpulse
librsvg
librtmp
+ libshaderc
libshine
libsmbclient
libsnappy
@@ -2219,6 +2221,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ shaderc_opt_perf
vulkan_drm_mod
perl
pod2man
@@ -3449,7 +3452,7 @@ avformat_deps="avcodec avutil"
avformat_suggest="libm network zlib"
avresample_deps="avutil"
avresample_suggest="libm"
-avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
+avutil_suggest="clock_gettime ffnvcodec libm libdrm libmfx opencl vulkan libshaderc user32 vaapi videotoolbox corefoundation corevideo coremedia bcrypt"
postproc_deps="avutil gpl"
postproc_suggest="libm"
swresample_deps="avutil"
@@ -6029,6 +6032,7 @@ enabled libpulse && require_pkg_config libpulse libpulse pulse/pulseaud
enabled librsvg && require_pkg_config librsvg librsvg-2.0 librsvg-2.0/librsvg/rsvg.h rsvg_handle_render_cairo
enabled librtmp && require_pkg_config librtmp librtmp librtmp/rtmp.h RTMP_Socket
enabled librubberband && require_pkg_config librubberband "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new -lstdc++ && append librubberband_extralibs "-lstdc++"
+enabled libshaderc && require libshaderc shaderc/shaderc.h shaderc_compiler_initialize -lshaderc_shared
enabled libshine && require_pkg_config libshine shine shine/layer3.h shine_encode_buffer
enabled libsmbclient && { check_pkg_config libsmbclient smbclient libsmbclient.h smbc_init ||
require libsmbclient libsmbclient.h smbc_init -lsmbclient; }
@@ -6329,6 +6333,11 @@ enabled vulkan &&
check_lib vulkan "vulkan/vulkan.h" vkCreateInstance -lvulkan &&
check_cpp_condition vulkan vulkan/vulkan.h "defined VK_API_VERSION_1_1"

+if enabled_all vulkan libshaderc ; then
+ check_cc shaderc shaderc/shaderc.h "int t = shaderc_optimization_level_performance;"
+ enable shaderc_opt_perf
+fi
+
if enabled_all vulkan libdrm ; then
check_cpp_condition vulkan vulkan/vulkan.h "defined VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME"
enable vulkan_drm_mod
diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c
new file mode 100644
index 0000000000..4266791618
--- /dev/null
+++ b/libavfilter/vulkan.c
@@ -0,0 +1,1450 @@
+/*
+ * Vulkan utilities
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "formats.h"
+#include "vulkan.h"
+
+#define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
+ vkGetInstanceProcAddr(inst, #name)
+
+/* Converts return values to strings */
+const char *ff_vk_ret2str(VkResult res)
+{
+#define CASE(VAL) case VAL: return #VAL
+ switch (res) {
+ CASE(VK_SUCCESS);
+ CASE(VK_NOT_READY);
+ CASE(VK_TIMEOUT);
+ CASE(VK_EVENT_SET);
+ CASE(VK_EVENT_RESET);
+ CASE(VK_INCOMPLETE);
+ CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
+ CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ CASE(VK_ERROR_INITIALIZATION_FAILED);
+ CASE(VK_ERROR_DEVICE_LOST);
+ CASE(VK_ERROR_MEMORY_MAP_FAILED);
+ CASE(VK_ERROR_LAYER_NOT_PRESENT);
+ CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
+ CASE(VK_ERROR_FEATURE_NOT_PRESENT);
+ CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
+ CASE(VK_ERROR_TOO_MANY_OBJECTS);
+ CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
+ CASE(VK_ERROR_FRAGMENTED_POOL);
+ CASE(VK_ERROR_SURFACE_LOST_KHR);
+ CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
+ CASE(VK_SUBOPTIMAL_KHR);
+ CASE(VK_ERROR_OUT_OF_DATE_KHR);
+ CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
+ CASE(VK_ERROR_VALIDATION_FAILED_EXT);
+ CASE(VK_ERROR_INVALID_SHADER_NV);
+ CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
+ CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ CASE(VK_ERROR_NOT_PERMITTED_EXT);
+ default: return "Unknown error";
+ }
+#undef CASE
+}
+
+int ff_vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
+{
+ VkResult ret;
+ int index = -1;
+ VkPhysicalDeviceProperties props;
+ VkPhysicalDeviceMemoryProperties mprops;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = alloc_extension,
+ };
+
+ vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
+ vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
+
+ /* Align if we need to */
+ if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment);
+
+ alloc_info.allocationSize = req->size;
+
+ /* The vulkan spec requires memory types to be sorted in the "optimal"
+ * order, so the first matching type we find will be the best/fastest one */
+ for (int i = 0; i < mprops.memoryTypeCount; i++) {
+ /* The memory type must be supported by the requirements (bitfield) */
+ if (!(req->memoryTypeBits & (1 << i)))
+ continue;
+
+ /* The memory type flags must include our properties */
+ if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
+ continue;
+
+ /* Found a suitable memory type */
+ index = i;
+ break;
+ }
+
+ if (index < 0) {
+ av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
+ req_flags);
+ return AVERROR(EINVAL);
+ }
+
+ alloc_info.memoryTypeIndex = index;
+
+ ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info,
+ s->hwctx->alloc, mem);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR(ENOMEM);
+ }
+
+ *mem_flags |= mprops.memoryTypes[index].propertyFlags;
+
+ return 0;
+}
+
+int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
+{
+ int err;
+ VkResult ret;
+ VkMemoryRequirements req;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkBufferCreateInfo buf_spawn = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .pNext = NULL,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .size = size, /* Gets FFALIGNED during alloc if host visible
+ but should be ok */
+ };
+
+ ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ vkGetBufferMemoryRequirements(s->hwctx->act_dev, buf->buf, &req);
+
+ err = ff_vk_alloc_mem(avctx, &req, flags, NULL, &buf->flags, &buf->mem);
+ if (err)
+ return err;
+
+ ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate)
+{
+ int i;
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VkMappedMemoryRange *inval_list = NULL;
+ int inval_count = 0;
+
+ for (i = 0; i < nb_buffers; i++) {
+ ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0,
+ VK_WHOLE_SIZE, 0, (void **)&mem[i]);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ if (!invalidate)
+ return 0;
+
+ for (i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange ival_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
+ (++inval_count)*sizeof(*inval_list));
+ if (!inval_list)
+ return AVERROR(ENOMEM);
+ inval_list[inval_count - 1] = ival_buf;
+ }
+
+ if (inval_count) {
+ ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
+ inval_list);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ return 0;
+}
+
+int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
+ int flush)
+{
+ VkResult ret;
+ int i, err = 0;
+ VulkanFilterContext *s = avctx->priv;
+ VkMappedMemoryRange *flush_list = NULL;
+ int flush_count = 0;
+
+ if (flush) {
+ for (i = 0; i < nb_buffers; i++) {
+ const VkMappedMemoryRange flush_buf = {
+ .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+ .memory = buf[i].mem,
+ .size = VK_WHOLE_SIZE,
+ };
+ if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
+ continue;
+ flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
+ (++flush_count)*sizeof(*flush_list));
+ if (!flush_list)
+ return AVERROR(ENOMEM);
+ flush_list[flush_count - 1] = flush_buf;
+ }
+ }
+
+ if (flush_count) {
+ ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
+ flush_list);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
+ ff_vk_ret2str(ret));
+ err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
+ }
+ }
+
+ for (i = 0; i < nb_buffers; i++)
+ vkUnmapMemory(s->hwctx->act_dev, buf[i].mem);
+
+ return err;
+}
+
+void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
+{
+ VulkanFilterContext *s = avctx->priv;
+ if (!buf)
+ return;
+
+ if (buf->buf != VK_NULL_HANDLE)
+ vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
+ if (buf->mem != VK_NULL_HANDLE)
+ vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
+}
+
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, int queue)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkCommandPoolCreateInfo cqueue_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = queue,
+ };
+ VkCommandBufferAllocateInfo cbuf_create = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = 1,
+ };
+ VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
+
+ ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create,
+ s->hwctx->alloc, &e->pool);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
+ ff_vk_ret2str(ret));
+ return 1;
+ }
+
+ cbuf_create.commandPool = e->pool;
+
+ ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, &e->buf);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
+ ff_vk_ret2str(ret));
+ return 1;
+ }
+
+ ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn,
+ s->hwctx->alloc, &e->fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
+ ff_vk_ret2str(ret));
+ return 1;
+ }
+
+ vkGetDeviceQueue(s->hwctx->act_dev, queue, 0, &e->queue);
+
+ return 0;
+}
+
+void ff_vk_free_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ if (!e)
+ return;
+
+ if (e->fence != VK_NULL_HANDLE)
+ vkDestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
+ if (e->buf != VK_NULL_HANDLE)
+ vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, 1, &e->buf);
+ if (e->pool != VK_NULL_HANDLE)
+ vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
+}
+
+int ff_vk_filter_query_formats(AVFilterContext *avctx)
+{
+ static const enum AVPixelFormat pixel_formats[] = {
+ AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE,
+ };
+ AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
+ if (!pix_fmts)
+ return AVERROR(ENOMEM);
+
+ return ff_set_common_formats(avctx, pix_fmts);
+}
+
+static int vulkan_filter_set_device(AVFilterContext *avctx,
+ AVBufferRef *device)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ av_buffer_unref(&s->device_ref);
+
+ s->device_ref = av_buffer_ref(device);
+ if (!s->device_ref)
+ return AVERROR(ENOMEM);
+
+ s->device = (AVHWDeviceContext*)s->device_ref->data;
+ s->hwctx = s->device->hwctx;
+
+ return 0;
+}
+
+static int vulkan_filter_set_frames(AVFilterContext *avctx,
+ AVBufferRef *frames)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ av_buffer_unref(&s->frames_ref);
+
+ s->frames_ref = av_buffer_ref(frames);
+ if (!s->frames_ref)
+ return AVERROR(ENOMEM);
+
+ return 0;
+}
+
+int ff_vk_filter_config_input(AVFilterLink *inlink)
+{
+ int err;
+ AVFilterContext *avctx = inlink->dst;
+ VulkanFilterContext *s = avctx->priv;
+ AVHWFramesContext *input_frames;
+
+ if (!inlink->hw_frames_ctx) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "hardware frames context on the input.\n");
+ return AVERROR(EINVAL);
+ }
+
+ /* Extract the device and default output format from the first input. */
+ if (avctx->inputs[0] != inlink)
+ return 0;
+
+ input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
+ if (input_frames->format != AV_PIX_FMT_VULKAN)
+ return AVERROR(EINVAL);
+
+ err = vulkan_filter_set_device(avctx, input_frames->device_ref);
+ if (err < 0)
+ return err;
+ err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx);
+ if (err < 0)
+ return err;
+
+ /* Default output parameters match input parameters. */
+ s->input_format = input_frames->sw_format;
+ if (s->output_format == AV_PIX_FMT_NONE)
+ s->output_format = input_frames->sw_format;
+ if (!s->output_width)
+ s->output_width = inlink->w;
+ if (!s->output_height)
+ s->output_height = inlink->h;
+
+ return 0;
+}
+
+int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ VulkanFilterContext *s = avctx->priv;
+
+ av_buffer_unref(&outlink->hw_frames_ctx);
+
+ if (!s->device_ref) {
+ if (!avctx->hw_device_ctx) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "Vulkan device.\n");
+ return AVERROR(EINVAL);
+ }
+
+ err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
+ if (err < 0)
+ return err;
+ }
+
+ outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref);
+ outlink->w = s->output_width;
+ outlink->h = s->output_height;
+
+ return 0;
+}
+
+int ff_vk_filter_config_output(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ VulkanFilterContext *s = avctx->priv;
+ AVBufferRef *output_frames_ref;
+ AVHWFramesContext *output_frames;
+
+ av_buffer_unref(&outlink->hw_frames_ctx);
+
+ if (!s->device_ref) {
+ if (!avctx->hw_device_ctx) {
+ av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
+ "Vulkan device.\n");
+ return AVERROR(EINVAL);
+ }
+
+ err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
+ if (err < 0)
+ return err;
+ }
+
+ output_frames_ref = av_hwframe_ctx_alloc(s->device_ref);
+ if (!output_frames_ref) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+ output_frames = (AVHWFramesContext*)output_frames_ref->data;
+
+ output_frames->format = AV_PIX_FMT_VULKAN;
+ output_frames->sw_format = s->output_format;
+ output_frames->width = s->output_width;
+ output_frames->height = s->output_height;
+
+ err = av_hwframe_ctx_init(output_frames_ref);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Failed to initialise output "
+ "frames: %d.\n", err);
+ goto fail;
+ }
+
+ outlink->hw_frames_ctx = output_frames_ref;
+ outlink->w = s->output_width;
+ outlink->h = s->output_height;
+
+ return 0;
+fail:
+ av_buffer_unref(&output_frames_ref);
+ return err;
+}
+
+int ff_vk_filter_init(AVFilterContext *avctx)
+{
+ VulkanFilterContext *s = avctx->priv;
+ const shaderc_env_version opt_ver = shaderc_env_version_vulkan_1_1;
+ const shaderc_optimization_level opt_lvl = shaderc_optimization_level_performance;
+
+ s->output_format = AV_PIX_FMT_NONE;
+
+ s->sc_compiler = shaderc_compiler_initialize();
+ if (!s->sc_compiler)
+ return AVERROR_EXTERNAL;
+
+ s->sc_opts = shaderc_compile_options_initialize();
+ if (!s->sc_compiler)
+ return AVERROR_EXTERNAL;
+
+ shaderc_compile_options_set_target_env(s->sc_opts,
+ shaderc_target_env_vulkan,
+ opt_ver);
+ shaderc_compile_options_set_optimization_level(s->sc_opts, opt_lvl);
+
+ return 0;
+}
+
+void ff_vk_filter_uninit(AVFilterContext *avctx)
+{
+ int i;
+ VulkanFilterContext *s = avctx->priv;
+
+ shaderc_compile_options_release(s->sc_opts);
+ shaderc_compiler_release(s->sc_compiler);
+
+ for (i = 0; i < s->shaders_num; i++) {
+ SPIRVShader *shd = &s->shaders[i];
+ vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
+ s->hwctx->alloc);
+ }
+
+ if (s->pipeline != VK_NULL_HANDLE)
+ vkDestroyPipeline(s->hwctx->act_dev, s->pipeline, s->hwctx->alloc);
+ if (s->pipeline_layout != VK_NULL_HANDLE)
+ vkDestroyPipelineLayout(s->hwctx->act_dev, s->pipeline_layout,
+ s->hwctx->alloc);
+
+ for (i = 0; i < s->samplers_num; i++) {
+ VulkanSampler *sampler = &s->samplers[i];
+ VK_LOAD_PFN(s->hwctx->inst, vkDestroySamplerYcbcrConversionKHR);
+ vkDestroySampler(s->hwctx->act_dev, sampler->sampler, s->hwctx->alloc);
+ pfn_vkDestroySamplerYcbcrConversionKHR(s->hwctx->act_dev,
+ sampler->yuv_conv.conversion,
+ s->hwctx->alloc);
+ }
+
+ ff_vk_free_buf(avctx, &s->vbuffer);
+
+ for (i = 0; i < s->descriptor_sets_num; i++) {
+ VK_LOAD_PFN(s->hwctx->inst, vkDestroyDescriptorUpdateTemplateKHR);
+ pfn_vkDestroyDescriptorUpdateTemplateKHR(s->hwctx->act_dev,
+ s->desc_template[i],
+ s->hwctx->alloc);
+ vkDestroyDescriptorSetLayout(s->hwctx->act_dev, s->desc_layout[i],
+ s->hwctx->alloc);
+ }
+
+ if (s->desc_pool != VK_NULL_HANDLE)
+ vkDestroyDescriptorPool(s->hwctx->act_dev, s->desc_pool,
+ s->hwctx->alloc);
+ if (s->renderpass != VK_NULL_HANDLE)
+ vkDestroyRenderPass(s->hwctx->act_dev, s->renderpass,
+ s->hwctx->alloc);
+
+ av_freep(&s->desc_layout);
+ av_freep(&s->pool_size_desc);
+ av_freep(&s->shaders);
+ av_freep(&s->samplers);
+ av_buffer_unref(&s->device_ref);
+ av_buffer_unref(&s->frames_ref);
+
+ /* Only freed in case of failure */
+ av_freep(&s->push_consts);
+ av_freep(&s->pool_size_desc);
+ if (s->desc_template_info) {
+ for (i = 0; i < s->descriptor_sets_num; i++)
+ av_free((void *)s->desc_template_info[i].pDescriptorUpdateEntries);
+ av_freep(&s->desc_template_info);
+ }
+}
+
+SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, const char *name,
+ VkShaderStageFlags stage)
+{
+ SPIRVShader *shd;
+ VulkanFilterContext *s = avctx->priv;
+
+ s->shaders = av_realloc_array(s->shaders, sizeof(*s->shaders),
+ s->shaders_num + 1);
+ if (!s->shaders)
+ return NULL;
+
+ shd = &s->shaders[s->shaders_num++];
+ memset(shd, 0, sizeof(*shd));
+ av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+ shd->shader.stage = stage;
+
+ shd->name = name;
+
+ GLSLF(0, #version %i ,460);
+ GLSLC(0, #define AREA(v) ((v).x*(v).y) );
+ GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
+ GLSLC(0, );
+
+ return shd;
+}
+
+void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
+ int local_size[3])
+{
+ shd->local_size[0] = local_size[0];
+ shd->local_size[1] = local_size[1];
+ shd->local_size[2] = local_size[2];
+
+ av_bprintf(&shd->src, "layout (local_size_x = %i, "
+ "local_size_y = %i, local_size_z = %i) in;\n",
+ shd->local_size[0], shd->local_size[1], shd->local_size[2]);
+}
+
+static void print_shader(AVFilterContext *avctx, SPIRVShader *shd)
+{
+ int i;
+ int line = 0;
+ const char *p = shd->src.str;
+ const char *start = p;
+
+ AVBPrint buf;
+ av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
+
+ for (i = 0; i < strlen(p); i++) {
+ if (p[i] == '\n') {
+ av_bprintf(&buf, "%i\t", ++line);
+ av_bprint_append_data(&buf, start, &p[i] - start + 1);
+ start = &p[i + 1];
+ }
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Compiling shader %s: \n%s\n",
+ shd->name, buf.str);
+ av_bprint_finalize(&buf, NULL);
+}
+
+int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
+ const char *entry)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VkShaderModuleCreateInfo shader_create;
+
+ shaderc_compilation_result_t res;
+ static const shaderc_shader_kind type_map[] = {
+ [VK_SHADER_STAGE_VERTEX_BIT] = shaderc_vertex_shader,
+ [VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT] = shaderc_tess_control_shader,
+ [VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT] = shaderc_tess_evaluation_shader,
+ [VK_SHADER_STAGE_GEOMETRY_BIT] = shaderc_geometry_shader,
+ [VK_SHADER_STAGE_FRAGMENT_BIT] = shaderc_fragment_shader,
+ [VK_SHADER_STAGE_COMPUTE_BIT] = shaderc_compute_shader,
+ };
+
+ shd->shader.pName = entry;
+
+ print_shader(avctx, shd);
+
+ res = shaderc_compile_into_spv(s->sc_compiler, shd->src.str, shd->src.len,
+ type_map[shd->shader.stage], shd->name,
+ entry, s->sc_opts);
+ av_bprint_finalize(&shd->src, NULL);
+
+ if (shaderc_result_get_compilation_status(res) !=
+ shaderc_compilation_status_success) {
+ av_log(avctx, AV_LOG_ERROR, "%s", shaderc_result_get_error_message(res));
+ return AVERROR_EXTERNAL;
+ }
+
+ shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+ shader_create.pNext = NULL;
+ shader_create.codeSize = shaderc_result_get_length(res);
+ shader_create.flags = 0;
+ shader_create.pCode = (const uint32_t *)shaderc_result_get_bytes(res);
+
+ ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
+ &shd->shader.module);
+ shaderc_result_release(res);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ av_log(avctx, AV_LOG_VERBOSE, "Shader linked! Size: %zu bytes\n",
+ shader_create.codeSize);
+
+ return 0;
+}
+
+int ff_vk_init_renderpass(AVFilterContext *avctx)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ VkAttachmentDescription rpass_att[] = {
+ {
+ .format = av_vkfmt_from_pixfmt(s->output_format),
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
+ },
+ };
+
+ VkSubpassDescription rpass_sub_desc[] = {
+ {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = (VkAttachmentReference[]) {
+ { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL },
+ },
+ .pDepthStencilAttachment = NULL,
+ .preserveAttachmentCount = 0,
+ }
+ };
+
+ VkRenderPassCreateInfo renderpass_spawn = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .pAttachments = rpass_att,
+ .attachmentCount = FF_ARRAY_ELEMS(rpass_att),
+ .pSubpasses = rpass_sub_desc,
+ .subpassCount = FF_ARRAY_ELEMS(rpass_sub_desc),
+ };
+
+ VkResult ret = vkCreateRenderPass(s->hwctx->act_dev, &renderpass_spawn,
+ s->hwctx->alloc, &s->renderpass);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Renderpass init failure: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static VkSamplerYcbcrModelConversion conv_primaries(enum AVColorPrimaries color_primaries)
+{
+ switch(color_primaries) {
+ case AVCOL_PRI_BT470BG:
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
+ case AVCOL_PRI_BT709:
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709;
+ case AVCOL_PRI_BT2020:
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020;
+ }
+ /* Just assume its 709 */
+ return VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709;
+}
+
+const VulkanSampler *ff_vk_init_sampler(AVFilterContext *avctx, AVFrame *input,
+ int unnorm_coords, VkFilter filt)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+ VulkanSampler *sampler;
+
+ VkSamplerCreateInfo sampler_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = filt,
+ .minFilter = sampler_info.magFilter,
+ .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
+ VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ .addressModeV = sampler_info.addressModeU,
+ .addressModeW = sampler_info.addressModeU,
+ .anisotropyEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_NEVER,
+ .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
+ .unnormalizedCoordinates = unnorm_coords,
+ };
+
+ s->samplers = av_realloc_array(s->samplers, sizeof(*s->samplers),
+ s->samplers_num + 1);
+ if (!s->samplers)
+ return NULL;
+
+ sampler = &s->samplers[s->samplers_num++];
+ memset(sampler, 0, sizeof(*sampler));
+
+ sampler->converting = !!input;
+ sampler->yuv_conv.sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO;
+
+ if (input) {
+ VkSamplerYcbcrConversion *conv = &sampler->yuv_conv.conversion;
+ VkComponentMapping comp_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ VkSamplerYcbcrConversionCreateInfo c_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO,
+ .format = av_vkfmt_from_pixfmt(s->input_format),
+ .chromaFilter = VK_FILTER_LINEAR,
+ .ycbcrModel = conv_primaries(input->color_primaries),
+ .ycbcrRange = input->color_range == AVCOL_RANGE_JPEG ?
+ VK_SAMPLER_YCBCR_RANGE_ITU_FULL :
+ VK_SAMPLER_YCBCR_RANGE_ITU_NARROW,
+ .xChromaOffset = input->chroma_location == AVCHROMA_LOC_CENTER ?
+ VK_CHROMA_LOCATION_MIDPOINT :
+ VK_CHROMA_LOCATION_COSITED_EVEN,
+ .components = comp_map,
+ };
+
+ VK_LOAD_PFN(s->hwctx->inst, vkCreateSamplerYcbcrConversionKHR);
+
+ sampler_info.pNext = &sampler->yuv_conv;
+
+ if (unnorm_coords) {
+ av_log(avctx, AV_LOG_ERROR, "Cannot create a converting sampler "
+ "with unnormalized addressing, forbidden by spec!\n");
+ return NULL;
+ }
+
+ ret = pfn_vkCreateSamplerYcbcrConversionKHR(s->hwctx->act_dev, &c_info,
+ s->hwctx->alloc, conv);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init conversion: %s\n",
+ ff_vk_ret2str(ret));
+ return NULL;
+ }
+ }
+
+ ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info,
+ s->hwctx->alloc, &sampler->sampler);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
+ ff_vk_ret2str(ret));
+ return NULL;
+ }
+
+ return sampler;
+}
+
+/* A 3x2 matrix, with the translation part separate. */
+struct transform {
+ /* row-major, e.g. in mathematical notation:
+ * | m[0][0] m[0][1] |
+ * | m[1][0] m[1][1] | */
+ float m[2][2];
+ float t[2];
+};
+
+/* Standard parallel 2D projection, except y1 < y0 means that the coordinate
+ * system is flipped, not the projection. */
+static inline void transform_ortho(struct transform *t, float x0, float x1,
+ float y0, float y1)
+{
+ if (y1 < y0) {
+ float tmp = y0;
+ y0 = tmp - y1;
+ y1 = tmp;
+ }
+
+ t->m[0][0] = 2.0f / (x1 - x0);
+ t->m[0][1] = 0.0f;
+ t->m[1][0] = 0.0f;
+ t->m[1][1] = 2.0f / (y1 - y0);
+ t->t[0] = -(x1 + x0) / (x1 - x0);
+ t->t[1] = -(y1 + y0) / (y1 - y0);
+}
+
+/* This treats m as an affine transformation, in other words m[2][n] gets
+ * added to the output. */
+static inline void transform_vec(struct transform t, float *x, float *y)
+{
+ float vx = *x, vy = *y;
+ *x = vx * t.m[0][0] + vy * t.m[0][1] + t.t[0];
+ *y = vx * t.m[1][0] + vy * t.m[1][1] + t.t[1];
+}
+
+/* Vertex buffer structure */
+struct vertex {
+ struct {
+ float x, y;
+ } position;
+ struct {
+ float x, y;
+ } texcoord[4];
+};
+
+int ff_vk_init_simple_vbuffer(AVFilterContext *avctx)
+{
+ struct vertex *va;
+ struct transform t;
+ VulkanFilterContext *s = avctx->priv;
+
+ int i, n, err, vp_w = s->output_width, vp_h = s->output_height;
+ float x[2] = { 0, vp_w };
+ float y[2] = { 0, vp_h };
+
+ s->num_verts = 4;
+
+ err = ff_vk_create_buf(avctx, &s->vbuffer,
+ sizeof(struct vertex)*s->num_verts,
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err)
+ return err;
+
+ err = ff_vk_map_buffers(avctx, &s->vbuffer, (uint8_t **)&va, 1, 0);
+ if (err)
+ return err;
+
+ transform_ortho(&t, 0, vp_w, 0, vp_h);
+ transform_vec(t, &x[0], &y[0]);
+ transform_vec(t, &x[1], &y[1]);
+
+ for (n = 0; n < s->num_verts; n++) {
+ struct vertex *v = &va[n];
+ v->position.x = x[n / 2];
+ v->position.y = y[n % 2];
+ for (i = 0; i < 4; i++) {
+ struct transform tr = { { { 0 } } };
+ float tx = (n / 2) * vp_w;
+ float ty = (n % 2) * vp_h;
+ tr.m[0][0] = 1.0f;
+ tr.m[1][1] = 1.0f;
+ transform_vec(tr, &tx, &ty);
+ v->texcoord[i].x = tx / vp_w;
+ v->texcoord[i].y = ty / vp_h;
+ }
+ }
+
+ err = ff_vk_unmap_buffers(avctx, &s->vbuffer, 1, 1);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int ff_vk_add_push_constant(AVFilterContext *avctx, int offset, int size,
+ VkShaderStageFlagBits stage)
+{
+ VkPushConstantRange *pc;
+ VulkanFilterContext *s = avctx->priv;
+
+ s->push_consts = av_realloc_array(s->push_consts, sizeof(*s->push_consts),
+ s->push_consts_num + 1);
+ if (!s->push_consts)
+ return AVERROR(ENOMEM);
+
+ pc = &s->push_consts[s->push_consts_num++];
+ memset(pc, 0, sizeof(*pc));
+
+ pc->stageFlags = stage;
+ pc->offset = offset;
+ pc->size = size;
+
+ return s->push_consts_num - 1;
+}
+
+static const struct descriptor_props {
+ size_t struct_size; /* Size of the opaque which updates the descriptor */
+ const char *type;
+ int is_uniform;
+ int mem_quali; /* Can use a memory qualifier */
+ int dim_needed; /* Must indicate dimension */
+ int buf_content; /* Must indicate buffer contents */
+} descriptor_props[] = {
+ [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
+ [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
+ [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
+};
+
+int ff_vk_add_descriptor_set(AVFilterContext *avctx, SPIRVShader *shd,
+ VulkanDescriptorSetBinding *desc, int num,
+ int only_print_to_shader)
+{
+ int i, j;
+ VkResult ret;
+ VkDescriptorSetLayout *layout;
+ VulkanFilterContext *s = avctx->priv;
+
+ if (only_print_to_shader)
+ goto print;
+
+ s->desc_layout = av_realloc_array(s->desc_layout, sizeof(*s->desc_layout),
+ s->descriptor_sets_num + 1);
+ if (!s->desc_layout)
+ return AVERROR(ENOMEM);
+
+ layout = &s->desc_layout[s->descriptor_sets_num];
+ memset(layout, 0, sizeof(*layout));
+
+ { /* Create descriptor set layout descriptions */
+ VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
+ VkDescriptorSetLayoutBinding *desc_binding;
+
+ desc_binding = av_mallocz(sizeof(*desc_binding)*num);
+ if (!desc_binding)
+ return AVERROR(ENOMEM);
+
+ for (i = 0; i < num; i++) {
+ desc_binding[i].binding = i;
+ desc_binding[i].descriptorType = desc[i].type;
+ desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ desc_binding[i].stageFlags = desc[i].stages;
+ desc_binding[i].pImmutableSamplers = desc[i].samplers;
+ }
+
+ desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+ desc_create_layout.pBindings = desc_binding;
+ desc_create_layout.bindingCount = num;
+
+ ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
+ s->hwctx->alloc, layout);
+ av_free(desc_binding);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
+ "layout: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Pool each descriptor by type and update pool counts */
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < s->pool_size_desc_num; j++)
+ if (s->pool_size_desc[j].type == desc[i].type)
+ break;
+ if (j >= s->pool_size_desc_num) {
+ s->pool_size_desc = av_realloc_array(s->pool_size_desc,
+ sizeof(*s->pool_size_desc),
+ ++s->pool_size_desc_num);
+ if (!s->pool_size_desc)
+ return AVERROR(ENOMEM);
+ memset(&s->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
+ }
+ s->pool_size_desc[j].type = desc[i].type;
+ s->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1);
+ }
+ }
+
+ { /* Create template creation struct */
+ VkDescriptorUpdateTemplateCreateInfo *dt;
+ VkDescriptorUpdateTemplateEntry *des_entries;
+
+ /* Freed after descriptor set initialization */
+ des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
+ if (!des_entries)
+ return AVERROR(ENOMEM);
+
+ for (i = 0; i < num; i++) {
+ des_entries[i].dstBinding = i;
+ des_entries[i].descriptorType = desc[i].type;
+ des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
+ des_entries[i].dstArrayElement = 0;
+ des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
+ des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
+ }
+
+ s->desc_template_info = av_realloc_array(s->desc_template_info,
+ sizeof(*s->desc_template_info),
+ s->descriptor_sets_num + 1);
+ if (!s->desc_layout)
+ return AVERROR(ENOMEM);
+
+ dt = &s->desc_template_info[s->descriptor_sets_num];
+ memset(dt, 0, sizeof(*dt));
+
+ dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
+ dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
+ dt->descriptorSetLayout = *layout;
+ dt->pDescriptorUpdateEntries = des_entries;
+ dt->descriptorUpdateEntryCount = num;
+ }
+
+ s->descriptor_sets_num++;
+
+print:
+ /* Write shader info */
+ for (i = 0; i < num; i++) {
+ const struct descriptor_props *prop = &descriptor_props[desc[i].type];
+ GLSLA("layout (set = %i, binding = %i", s->descriptor_sets_num - 1, i);
+
+ if (desc[i].mem_layout)
+ GLSLA(", %s", desc[i].mem_layout);
+ GLSLA(")");
+
+ if (prop->is_uniform)
+ GLSLA(" uniform");
+
+ if (prop->mem_quali && desc[i].mem_quali)
+ GLSLA(" %s", desc[i].mem_quali);
+
+ if (prop->type)
+ GLSLA(" %s", prop->type);
+
+ if (prop->dim_needed)
+ GLSLA("%iD", desc[i].dimensions);
+
+ GLSLA(" %s", desc[i].name);
+
+ if (prop->buf_content)
+ GLSLA(" {\n %s\n}", desc[i].buf_content);
+ else if (desc[i].elems > 0)
+ GLSLA("[%i]", desc[i].elems);
+
+ GLSLA(";\n");
+ }
+
+ return 0;
+}
+
+void ff_vk_update_descriptor_set(AVFilterContext *avctx, int set_id)
+{
+ VulkanFilterContext *s = avctx->priv;
+
+ VK_LOAD_PFN(s->hwctx->inst, vkUpdateDescriptorSetWithTemplateKHR);
+ pfn_vkUpdateDescriptorSetWithTemplateKHR(s->hwctx->act_dev,
+ s->desc_set[set_id],
+ s->desc_template[set_id], s);
+}
+
+const enum VkImageAspectFlagBits ff_vk_aspect_flags(enum AVPixelFormat pixfmt,
+ int plane)
+{
+ const int tot_planes = av_pix_fmt_count_planes(pixfmt);
+ static const enum VkImageAspectFlagBits m[] = { VK_IMAGE_ASPECT_PLANE_0_BIT,
+ VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT, };
+ if (!tot_planes || (plane > tot_planes))
+ return 0;
+ if (tot_planes == 1)
+ return VK_IMAGE_ASPECT_COLOR_BIT;
+ if (plane < 0)
+ return m[0] | m[1] | (tot_planes > 2 ? m[2] : 0);
+ return m[plane];
+}
+
+const VkFormat ff_vk_plane_rep_fmt(enum AVPixelFormat pixfmt, int plane)
+{
+ const int tot_planes = av_pix_fmt_count_planes(pixfmt);
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
+ const int high = desc->comp[plane].depth > 8;
+ if (tot_planes == 1) { /* RGB, etc.'s singleplane rep is itself */
+ return av_vkfmt_from_pixfmt(pixfmt);
+ } else if (tot_planes == 2) { /* Must be NV12 or P010 */
+ if (!high)
+ return !plane ? VK_FORMAT_R8_UNORM : VK_FORMAT_R8G8_UNORM;
+ else
+ return !plane ? VK_FORMAT_R16_UNORM : VK_FORMAT_R16G16_UNORM;
+ } else { /* Regular planar YUV */
+ return !high ? VK_FORMAT_R8_UNORM : VK_FORMAT_R16_UNORM;
+ }
+}
+
+int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, AVVkFrame *f,
+ VkFormat fmt, enum VkImageAspectFlagBits aspect,
+ VkComponentMapping map, const void *pnext)
+{
+ VulkanFilterContext *s = avctx->priv;
+ VkImageViewCreateInfo imgview_spawn = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .pNext = pnext,
+ .image = f->img,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = fmt,
+ .components = map,
+ .subresourceRange = {
+ .aspectMask = aspect,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+
+ VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn,
+ s->hwctx->alloc, v);
+ if (ret != VK_SUCCESS) {
+ av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView v)
+{
+ VulkanFilterContext *s = avctx->priv;
+ vkDestroyImageView(s->hwctx->act_dev, v, s->hwctx->alloc);
+}
+
+int ff_vk_init_pipeline_layout(AVFilterContext *avctx)
+{
+ int i;
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ { /* Init descriptor set pool */
+ VkDescriptorPoolCreateInfo pool_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .poolSizeCount = s->pool_size_desc_num,
+ .pPoolSizes = s->pool_size_desc,
+ .maxSets = s->descriptor_sets_num,
+ };
+
+ ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
+ s->hwctx->alloc, &s->desc_pool);
+ av_freep(&s->pool_size_desc);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
+ "pool: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Allocate descriptor sets */
+ VkDescriptorSetAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = s->desc_pool,
+ .descriptorSetCount = s->descriptor_sets_num,
+ .pSetLayouts = s->desc_layout,
+ };
+
+ s->desc_set = av_malloc(s->descriptor_sets_num*sizeof(*s->desc_set));
+ if (!s->desc_set)
+ return AVERROR(ENOMEM);
+
+ ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
+ s->desc_set);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Finally create the pipeline layout */
+ VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = s->descriptor_sets_num,
+ .pSetLayouts = s->desc_layout,
+ .pushConstantRangeCount = s->push_consts_num,
+ .pPushConstantRanges = s->push_consts,
+ };
+
+ ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
+ s->hwctx->alloc, &s->pipeline_layout);
+ av_freep(&s->push_consts);
+ s->push_consts_num = 0;
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ { /* Descriptor template (for tightly packed descriptors) */
+ VK_LOAD_PFN(s->hwctx->inst, vkCreateDescriptorUpdateTemplateKHR);
+ VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
+
+ s->desc_template = av_malloc(s->descriptor_sets_num*sizeof(*s->desc_template));
+ if (!s->desc_template)
+ return AVERROR(ENOMEM);
+
+ /* Create update templates for the descriptor sets */
+ for (i = 0; i < s->descriptor_sets_num; i++) {
+ desc_template_info = &s->desc_template_info[i];
+ desc_template_info->pipelineLayout = s->pipeline_layout;
+ ret = pfn_vkCreateDescriptorUpdateTemplateKHR(s->hwctx->act_dev,
+ desc_template_info,
+ s->hwctx->alloc,
+ &s->desc_template[i]);
+ av_free((void *)desc_template_info->pDescriptorUpdateEntries);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
+ "template: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+ }
+
+ av_freep(&s->desc_template_info);
+ }
+
+ return 0;
+}
+
+int ff_vk_init_compute_pipeline(AVFilterContext *avctx)
+{
+ int i;
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkComputePipelineCreateInfo pipe = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .layout = s->pipeline_layout,
+ };
+
+ for (i = 0; i < s->shaders_num; i++) {
+ if (s->shaders[i].shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
+ pipe.stage = s->shaders[i].shader;
+ break;
+ }
+ }
+ if (i == s->shaders_num) {
+ av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
+ return AVERROR(EINVAL);
+ }
+
+ ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
+ s->hwctx->alloc, &s->pipeline);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+int ff_vk_init_graphics_pipeline(AVFilterContext *avctx)
+{
+ VkResult ret;
+ VulkanFilterContext *s = avctx->priv;
+
+ VkVertexInputBindingDescription vbind_desc = {
+ .binding = 0,
+ .stride = sizeof(struct vertex),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+ };
+
+ VkVertexInputAttributeDescription vatt_desc[4] = { { 0 } };
+ for (int i = 0; i < 4; i++) {
+ VkVertexInputAttributeDescription *att = &vatt_desc[i];
+ att->location = i;
+ att->binding = 0;
+ att->format = VK_FORMAT_R32G32_SFLOAT,
+ att->offset = i*2*sizeof(float);
+ }
+
+ VkPipelineVertexInputStateCreateInfo vpipe_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexAttributeDescriptionCount = FF_ARRAY_ELEMS(vatt_desc),
+ .pVertexAttributeDescriptions = vatt_desc,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = &vbind_desc,
+ };
+
+ VkPipelineDynamicStateCreateInfo dynamic_states = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates = (VkDynamicState []) {
+ VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
+ },
+ };
+
+ VkPipelineInputAssemblyStateCreateInfo spawn_input_asm = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = VK_FALSE,
+ };
+
+ VkRect2D scissor = { .extent = { .width = s->output_width, .height = s->output_height } };
+ VkViewport viewport = { .width = s->output_width, .height = s->output_height, .maxDepth = 1.0f };
+
+ VkPipelineViewportStateCreateInfo spawn_viewport = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .pViewports = &viewport,
+ .scissorCount = 1,
+ .pScissors = &scissor,
+ };
+
+ VkPipelineRasterizationStateCreateInfo rasterizer = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .lineWidth = 1.0f,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE,
+ };
+
+ VkPipelineMultisampleStateCreateInfo multisampling = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .sampleShadingEnable = VK_FALSE,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ .minSampleShading = 1.0f,
+ .alphaToCoverageEnable = VK_FALSE,
+ .alphaToOneEnable = VK_FALSE,
+ };
+
+ VkPipelineColorBlendAttachmentState col_blend_att = {
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ .blendEnable = VK_FALSE,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_ONE,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ };
+
+ VkPipelineColorBlendStateCreateInfo col_blend = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = 1,
+ .pAttachments = &col_blend_att,
+ };
+
+ VkGraphicsPipelineCreateInfo spawn_pipeline = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .pVertexInputState = &vpipe_info,
+ .stageCount = s->shaders_num,
+ //.pStages = s->shaders, TODO
+ .renderPass = s->renderpass,
+ .subpass = 0,
+ .layout = s->pipeline_layout,
+ .pDynamicState = &dynamic_states,
+ .pInputAssemblyState = &spawn_input_asm,
+ .pViewportState = &spawn_viewport,
+ .pRasterizationState = &rasterizer,
+ .pMultisampleState = &multisampling,
+ .pColorBlendState = &col_blend,
+ };
+
+ ret = vkCreateGraphicsPipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1,
+ &spawn_pipeline,
+ s->hwctx->alloc, &s->pipeline);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline: %s\n", ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
diff --git a/libavfilter/vulkan.h b/libavfilter/vulkan.h
new file mode 100644
index 0000000000..45a13d4932
--- /dev/null
+++ b/libavfilter/vulkan.h
@@ -0,0 +1,234 @@
+/*
+ * Vulkan utilities
+ * Copyright (c) 2018 Rostislav Pehlivanov <***@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFILTER_VULKAN_COMMON_H
+#define AVFILTER_VULKAN_COMMON_H
+
+#include "avfilter.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/bprint.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_vulkan.h"
+
+#include <shaderc/shaderc.h>
+
+/* GLSL management macros */
+#define INDENT(N) INDENT_##N
+#define INDENT_0
+#define INDENT_1 INDENT_0 " "
+#define INDENT_2 INDENT_1 INDENT_1
+#define INDENT_3 INDENT_2 INDENT_1
+#define INDENT_4 INDENT_3 INDENT_1
+#define INDENT_5 INDENT_4 INDENT_1
+#define INDENT_6 INDENT_5 INDENT_1
+#define C(N, S) INDENT(N) #S "\n"
+#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S))
+#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__)
+#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__)
+#define GLSLD(D) GLSLC(0, ); \
+ av_bprint_append_data(&shd->src, D, strlen(D)); \
+ GLSLC(0, )
+
+/* Helper, pretty much every Vulkan return value needs to be checked */
+#define RET(x) \
+ do { \
+ if ((err = (x)) < 0) \
+ goto fail; \
+ } while (0)
+
+/* Useful for attaching immutable samplers to arrays */
+#define DUP_SAMPLER_ARRAY4(x) (const VkSampler []){ x, x, x, x, }
+
+typedef struct SPIRVShader {
+ const char *name; /* Name for id/debugging purposes */
+ AVBPrint src;
+ int local_size[3]; /* Compute shader workgroup sizes */
+ VkPipelineShaderStageCreateInfo shader;
+} SPIRVShader;
+
+typedef struct VulkanDescriptorSetBinding {
+ const char *name;
+ VkDescriptorType type;
+ const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */
+ const char *mem_quali; /* readonly, writeonly, etc. */
+ const char *buf_content; /* For buffers */
+ uint32_t dimensions; /* Needed for e.g. sampler%iD */
+ uint32_t elems; /* 0 - scalar, 1 or more - vector */
+ VkShaderStageFlags stages;
+ const VkSampler *samplers; /* Immutable samplers, length - #elems */
+ void *updater;
+} VulkanDescriptorSetBinding;
+
+typedef struct VulkanSampler {
+ VkSampler sampler;
+ VkSamplerYcbcrConversionInfo yuv_conv; /* For imageview creation */
+ int converting; /* Indicates whether sampler is a converting one */
+} VulkanSampler;
+
+typedef struct FFVkExecContext {
+ VkCommandPool pool;
+ VkCommandBuffer buf;
+ VkQueue queue;
+ VkFence fence;
+} FFVkExecContext;
+
+typedef struct FFVkBuffer {
+ VkBuffer buf;
+ VkDeviceMemory mem;
+ VkMemoryPropertyFlagBits flags;
+} FFVkBuffer;
+
+typedef struct VulkanFilterContext {
+ const AVClass *class;
+
+ AVBufferRef *device_ref;
+ AVBufferRef *frames_ref; /* For in-place filtering */
+ AVHWDeviceContext *device;
+ AVVulkanDeviceContext *hwctx;
+
+ /* Properties */
+ int output_width;
+ int output_height;
+ enum AVPixelFormat output_format;
+ enum AVPixelFormat input_format;
+
+ /* Samplers */
+ VulkanSampler *samplers;
+ int samplers_num;
+
+ /* Shaders */
+ SPIRVShader *shaders;
+ int shaders_num;
+ shaderc_compiler_t sc_compiler;
+ shaderc_compile_options_t sc_opts;
+
+ /* Contexts */
+ VkRenderPass renderpass;
+ VkPipelineLayout pipeline_layout;
+ VkPipeline pipeline;
+
+ /* Descriptors */
+ VkDescriptorSetLayout *desc_layout;
+ VkDescriptorPool desc_pool;
+ VkDescriptorSet *desc_set;
+ VkDescriptorUpdateTemplate *desc_template;
+ int push_consts_num;
+ int descriptor_sets_num;
+ int pool_size_desc_num;
+
+ /* Vertex buffer */
+ FFVkBuffer vbuffer;
+ int num_verts;
+
+ /* Temporary, used to store data in between initialization stages */
+ VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
+ VkDescriptorPoolSize *pool_size_desc;
+ VkPushConstantRange *push_consts;
+ void *scratch; /* Scratch memory used only in functions */
+ unsigned int scratch_size;
+} VulkanFilterContext;
+
+/* Generic memory allocation.
+ * Will align size to the minimum map alignment requirement in case req_flags
+ * has VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT set */
+int ff_vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
+ VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
+ VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem);
+
+/* Buffer I/O */
+int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
+ VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
+int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
+ int nb_buffers, int invalidate);
+int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
+ int flush);
+void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf);
+
+/* Command context init/uninit */
+int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, int queue);
+void ff_vk_free_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e);
+
+/* Converts Vulkan return values to strings */
+const char *ff_vk_ret2str(VkResult res);
+
+/* Create a Vulkan sampler, if input isn't NULL the sampler will convert to RGB */
+const VulkanSampler *ff_vk_init_sampler(AVFilterContext *avctx, AVFrame *input,
+ int unnorm_coords, VkFilter filt);
+
+/* Gets the single-plane representation format */
+const VkFormat ff_vk_plane_rep_fmt(enum AVPixelFormat pixfmt, int plane);
+/* Gets the image aspect flags of a plane */
+const enum VkImageAspectFlagBits ff_vk_aspect_flags(enum AVPixelFormat pixfmt,
+ int plane);
+/* Creates an imageview */
+int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, AVVkFrame *f,
+ VkFormat fmt, enum VkImageAspectFlagBits aspect,
+ VkComponentMapping map, const void *pnext);
+/* Destroys an imageview */
+void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView v);
+/* Creates a shader */
+SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, const char *name,
+ VkShaderStageFlags stage);
+/* For compute shaders, defines the workgroup size */
+void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
+ int local_size[3]);
+/* Compiles a completed shader into a module */
+int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
+ const char *entry);
+
+
+
+
+
+/* Needs to be abstracted so it adds them to a certain pipeline layout */
+int ff_vk_add_descriptor_set(AVFilterContext *avctx, SPIRVShader *shd,
+ VulkanDescriptorSetBinding *desc, int num,
+ int only_print_to_shader);
+int ff_vk_add_push_constant(AVFilterContext *avctx, int offset, int size,
+ VkShaderStageFlagBits stage);
+
+
+
+
+/* Creates a Vulkan pipeline layout */
+int ff_vk_init_pipeline_layout(AVFilterContext *avctx);
+
+/* Creates a compute pipeline */
+int ff_vk_init_compute_pipeline(AVFilterContext *avctx);
+
+/* Creates a Vulkan renderpass */
+int ff_vk_init_renderpass(AVFilterContext *avctx);
+/* Creates a graphics pipeline */
+int ff_vk_init_graphics_pipeline(AVFilterContext *avctx);
+/* Init a simple vertex buffer (4 vertices, a rectangle matching the video) */
+int ff_vk_init_simple_vbuffer(AVFilterContext *avctx);
+/* Updates a given descriptor set after pipeline initialization */
+void ff_vk_update_descriptor_set(AVFilterContext *avctx, int set_id);
+
+/* General lavfi IO functions */
+int ff_vk_filter_query_formats (AVFilterContext *avctx);
+int ff_vk_filter_init (AVFilterContext *avctx);
+int ff_vk_filter_config_input (AVFilterLink *inlink);
+int ff_vk_filter_config_output (AVFilterLink *outlink);
+int ff_vk_filter_config_output_inplace(AVFilterLink *outlink);
+void ff_vk_filter_uninit (AVFilterContext *avctx);
+
+#endif /* AVFILTER_VULKAN_COMMON_H */
--
2.17.0
Rostislav Pehlivanov
2018-04-20 04:30:07 UTC
Permalink
Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_avgblur_vulkan.c | 343 ++++++++++++++++++++++++++++++++
4 files changed, 346 insertions(+)
create mode 100644 libavfilter/vf_avgblur_vulkan.c

diff --git a/configure b/configure
index 20fae191ea..99128d74dc 100755
--- a/configure
+++ b/configure
@@ -3299,6 +3299,7 @@ ass_filter_deps="libass"
atempo_filter_deps="avcodec"
atempo_filter_select="rdft"
avgblur_opencl_filter_deps="opencl"
+avgblur_vulkan_filter_deps="vulkan libshaderc"
azmq_filter_deps="libzmq"
blackframe_filter_deps="gpl"
boxblur_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 7fc3de3bb2..915c4009af 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -143,6 +143,7 @@ OBJS-$(CONFIG_ATADENOISE_FILTER) += vf_atadenoise.o
OBJS-$(CONFIG_AVGBLUR_FILTER) += vf_avgblur.o
OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER) += vf_avgblur_opencl.o opencl.o \
opencl/avgblur.o
+OBJS-$(CONFIG_AVGBLUR_VULKAN_FILTER) += vf_avgblur_vulkan.o vulkan.o
OBJS-$(CONFIG_BBOX_FILTER) += bbox.o vf_bbox.o
OBJS-$(CONFIG_BENCH_FILTER) += f_bench.o
OBJS-$(CONFIG_BITPLANENOISE_FILTER) += vf_bitplanenoise.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index bd55463bfc..65f1628249 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -136,6 +136,7 @@ extern AVFilter ff_vf_ass;
extern AVFilter ff_vf_atadenoise;
extern AVFilter ff_vf_avgblur;
extern AVFilter ff_vf_avgblur_opencl;
+extern AVFilter ff_vf_avgblur_vulkan;
extern AVFilter ff_vf_bbox;
extern AVFilter ff_vf_bench;
extern AVFilter ff_vf_bitplanenoise;
diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c
new file mode 100644
index 0000000000..dc913ed60f
--- /dev/null
+++ b/libavfilter/vf_avgblur_vulkan.c
@@ -0,0 +1,343 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "internal.h"
+
+typedef struct AvgBlurVulkanContext {
+ VulkanFilterContext vkctx;
+
+ int initialized;
+ FFVkExecContext exec;
+
+ /* Shader updators, must be in the main filter struct */
+ VkDescriptorImageInfo input_images[3];
+ VkDescriptorImageInfo output_images[3];
+
+ int size_x;
+ int size_y;
+ int planes;
+} AvgBlurVulkanContext;
+
+static const char blur_kernel[] = {
+ C(0, #define CACHE_SIZE (ivec2(gl_WorkGroupSize) + FILTER_RADIUS*2) )
+ C(0, shared vec4 cache[AREA(CACHE_SIZE)]; )
+ C(0, )
+ C(0, void blur_kernel(int idx, ivec2 pos) )
+ C(0, { )
+ C(1, ivec2 d; )
+ C(1, const ivec2 s = CACHE_SIZE; )
+ C(1, const ivec2 w = ivec2(gl_WorkGroupSize); )
+ C(1, const ivec2 l = ivec2(gl_LocalInvocationID.xy); )
+ C(1, )
+ C(1, for (d.y = l.y; d.y < s.y; d.y += w.y) { )
+ C(2, for (d.x = l.x; d.x < s.x; d.x += w.x) { )
+ C(3, const ivec2 np = pos + d - l - FILTER_RADIUS; )
+ C(3, cache[d.y*s.x + d.x] = imageLoad(input_img[idx], np); )
+ C(2, } )
+ C(1, } )
+ C(0, )
+ C(1, barrier(); )
+ C(0, )
+ C(1, vec4 avg = vec4(0.0f); )
+ C(1, ivec2 start = ivec2(0); )
+ C(1, ivec2 end = FILTER_RADIUS*2 + 1; )
+ C(1, for (d.y = start.y; d.y < end.y; d.y++) )
+ C(2, for (d.x = start.x; d.x < end.x; d.x++) )
+ C(3, avg += cache[(l.y + d.y)*s.x + l.x + d.x]; )
+ C(0, )
+ C(1, avg /= AREA(end - start); )
+ C(1, imageStore(output_img[idx], pos, avg); )
+ C(0, } )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+{
+ int err;
+ AvgBlurVulkanContext *s = ctx->priv;
+
+ { /* Create the shader */
+ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ SPIRVShader *shd = ff_vk_init_shader(ctx, "avgblur_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+ VulkanDescriptorSetBinding desc_i[2] = {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = "rgba8",
+ .mem_quali = "readonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->input_images,
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = "rgba8",
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->output_images,
+ },
+ };
+
+ RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 2, 0)); /* set 0 */
+
+ GLSLF(0, #define FILTER_RADIUS ivec2(%i, %i), s->size_x, s->size_y);
+ GLSLD( blur_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, for (int i = 0; i < %i; i++) { ,planes);
+ GLSLC(2, if (!IS_WITHIN(pos, imageSize(input_img[i]))) { );
+ GLSLC(3, barrier(); );
+ GLSLC(3, continue; );
+ GLSLC(2, } );
+ GLSLC(2, else barrier(); ); /* Workaround */
+ GLSLF(2, if ((0x%x & (1 << i)) != 0) { ,s->planes);
+ GLSLC(3, blur_kernel(i, pos); );
+ GLSLC(2, } else { );
+ GLSLC(3, const vec4 val = imageLoad(input_img[i], pos); );
+ GLSLC(3, imageStore(output_img[i], pos, val); );
+ GLSLC(2, } );
+ GLSLC(1, } );
+ GLSLC(0, } );
+
+ RET(ff_vk_compile_shader(ctx, shd, "main"));
+ }
+
+ RET(ff_vk_init_pipeline_layout(ctx));
+
+ /* Execution context */
+ RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+ s->vkctx.hwctx->queue_family_comp_index));
+
+ /* The pipeline */
+ RET(ff_vk_init_compute_pipeline(ctx));
+
+ s->initialized = 1;
+
+ return 0;
+
+fail:
+ return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
+{
+ int err;
+ AvgBlurVulkanContext *s = avctx->priv;
+ int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkComponentMapping null_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ for (int i = 0; i < planes; i++) {
+ RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
+ ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
+ ff_vk_aspect_flags(s->vkctx.input_format, i),
+ null_map, NULL));
+
+ RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+ ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+ ff_vk_aspect_flags(s->vkctx.output_format, i),
+ null_map, NULL));
+
+ s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+
+ ff_vk_update_descriptor_set(avctx, 0);
+
+ vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+ {
+ VkImageMemoryBarrier bar[2] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = in->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = in->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .oldLayout = out->layout,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = out->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ };
+
+ vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+ 0, NULL, 0, NULL, 2, bar);
+
+ in->layout = bar[0].newLayout;
+ in->access = bar[0].dstAccessMask;
+
+ out->layout = bar[1].newLayout;
+ out->access = bar[1].dstAccessMask;
+ }
+
+ vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+ vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+ vkCmdDispatch(s->exec.buf,
+ FFALIGN(s->vkctx.output_width, s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+ FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+ vkEndCommandBuffer(s->exec.buf);
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->exec.buf,
+ };
+
+ VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+ }
+
+fail:
+
+ for (int i = 0; i < planes; i++) {
+ ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+ }
+
+ return err;
+}
+
+static int avgblur_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+ int err;
+ AVFilterContext *ctx = link->dst;
+ AvgBlurVulkanContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+
+ AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ if (!s->initialized)
+ RET(init_filter(ctx, in));
+
+ RET(process_frames(ctx, (AVVkFrame *)out->data[0],
+ (AVVkFrame *) in->data[0]));
+
+ err = av_frame_copy_props(out, in);
+ if (err < 0)
+ goto fail;
+
+ av_frame_free(&in);
+
+ return ff_filter_frame(outlink, out);
+
+fail:
+ av_frame_free(&in);
+ av_frame_free(&out);
+ return err;
+}
+
+static void avgblur_vulkan_uninit(AVFilterContext *avctx)
+{
+ AvgBlurVulkanContext *s = avctx->priv;
+
+ ff_vk_free_exec_ctx(avctx, &s->exec);
+ ff_vk_filter_uninit(avctx);
+
+ s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(AvgBlurVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption avgblur_vulkan_options[] = {
+ { "sizeX", "Set horizontal radius", OFFSET(size_x), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS },
+ { "planes", "Set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT, {.i64 = 0xF}, 0, 0xF, .flags = FLAGS },
+ { "sizeY", "Set vertical radius", OFFSET(size_y), AV_OPT_TYPE_INT, {.i64 = 2}, 0, 32, .flags = FLAGS },
+ { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(avgblur_vulkan);
+
+static const AVFilterPad avgblur_vulkan_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = &avgblur_vulkan_filter_frame,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad avgblur_vulkan_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &ff_vk_filter_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_avgblur_vulkan = {
+ .name = "avgblur_vulkan",
+ .description = NULL_IF_CONFIG_SMALL("Apply avgblur mask to input video"),
+ .priv_size = sizeof(AvgBlurVulkanContext),
+ .init = &ff_vk_filter_init,
+ .uninit = &avgblur_vulkan_uninit,
+ .query_formats = &ff_vk_filter_query_formats,
+ .inputs = avgblur_vulkan_inputs,
+ .outputs = avgblur_vulkan_outputs,
+ .priv_class = &avgblur_vulkan_class,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.17.0
Rostislav Pehlivanov
2018-04-20 04:30:08 UTC
Permalink
It tries to do something similar to it with YUV images, but RGB images
are done properly.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_chromaticaberration_vulkan.c | 342 ++++++++++++++++++++
4 files changed, 345 insertions(+)
create mode 100644 libavfilter/vf_chromaticaberration_vulkan.c

diff --git a/configure b/configure
index 99128d74dc..b93bf0b00f 100755
--- a/configure
+++ b/configure
@@ -3304,6 +3304,7 @@ azmq_filter_deps="libzmq"
blackframe_filter_deps="gpl"
boxblur_filter_deps="gpl"
bs2b_filter_deps="libbs2b"
+chromaticabberation_vulkan_filter_deps="vulkan libshaderc"
colormatrix_filter_deps="gpl"
convolution_opencl_filter_deps="opencl"
convolve_filter_deps="avcodec"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 915c4009af..b4c10daed6 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -153,6 +153,7 @@ OBJS-$(CONFIG_BLEND_FILTER) += vf_blend.o framesync.o
OBJS-$(CONFIG_BOXBLUR_FILTER) += vf_boxblur.o
OBJS-$(CONFIG_BWDIF_FILTER) += vf_bwdif.o
OBJS-$(CONFIG_CHROMAKEY_FILTER) += vf_chromakey.o
+OBJS-$(CONFIG_CHROMATICABERRATION_VULKAN_FILTER) += vf_chromaticaberration_vulkan.o vulkan.o
OBJS-$(CONFIG_CIESCOPE_FILTER) += vf_ciescope.o
OBJS-$(CONFIG_CODECVIEW_FILTER) += vf_codecview.o
OBJS-$(CONFIG_COLORBALANCE_FILTER) += vf_colorbalance.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 65f1628249..a4b59f43ac 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -146,6 +146,7 @@ extern AVFilter ff_vf_blend;
extern AVFilter ff_vf_boxblur;
extern AVFilter ff_vf_bwdif;
extern AVFilter ff_vf_chromakey;
+extern AVFilter ff_vf_chromaticaberration_vulkan;
extern AVFilter ff_vf_ciescope;
extern AVFilter ff_vf_codecview;
extern AVFilter ff_vf_colorbalance;
diff --git a/libavfilter/vf_chromaticaberration_vulkan.c b/libavfilter/vf_chromaticaberration_vulkan.c
new file mode 100644
index 0000000000..e814d2442b
--- /dev/null
+++ b/libavfilter/vf_chromaticaberration_vulkan.c
@@ -0,0 +1,342 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "internal.h"
+
+typedef struct ChromaticAberrationVulkanContext {
+ VulkanFilterContext vkctx;
+
+ int initialized;
+ FFVkExecContext exec;
+
+ /* Shader updators, must be in the main filter struct */
+ VkDescriptorImageInfo input_images[3];
+ VkDescriptorImageInfo output_images[3];
+
+ float dist_x;
+ float dist_y;
+} ChromaticAberrationVulkanContext;
+
+static const char distort_chroma_kernel[] = {
+ C(0, void distort_rgb(ivec2 size, ivec2 pos) )
+ C(0, { )
+ C(1, const vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; )
+ C(1, const vec2 o = p * (FILTER_DIST - 1.0f); )
+ C(0, )
+ C(1, vec4 res; )
+ C(1, res.r = texture(input_img[0], ((p - o)/2.0f) + 0.5f).r; )
+ C(1, res.g = texture(input_img[0], (( p)/2.0f) + 0.5f).g; )
+ C(1, res.b = texture(input_img[0], ((p + o)/2.0f) + 0.5f).b; )
+ C(1, res.a = texture(input_img[0], (( p)/2.0f) + 0.5f).a; )
+ C(1, imageStore(output_img[0], pos, res); )
+ C(0, } )
+ C(0, )
+ C(0, void distort_chroma(int idx, ivec2 size, ivec2 pos) )
+ C(0, { )
+ C(1, vec2 p = ((vec2(pos)/vec2(size)) - 0.5f)*2.0f; )
+ C(1, float d = sqrt(p.x*p.x + p.y*p.y); )
+ C(1, p *= d / (d*FILTER_DIST); )
+ C(1, vec4 res = texture(input_img[idx], (p/2.0f) + 0.5f); )
+ C(1, imageStore(output_img[idx], pos, res); )
+ C(0, } )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+{
+ int err;
+ ChromaticAberrationVulkanContext *s = ctx->priv;
+
+ /* Create a sampler */
+ const VulkanSampler *sampler = ff_vk_init_sampler(ctx, NULL, 0,
+ VK_FILTER_LINEAR);
+ if (!sampler)
+ return AVERROR_EXTERNAL;
+
+ { /* Create the shader */
+ const float dist_x = (s->dist_x / 100.0f) + 1.0f;
+ const float dist_y = (s->dist_y / 100.0f) + 1.0f;
+ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ SPIRVShader *shd = ff_vk_init_shader(ctx, "chromaticaberration_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+ VulkanDescriptorSetBinding desc_i[2] = {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->input_images,
+ .samplers = DUP_SAMPLER_ARRAY4(sampler->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = "rgba8",
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->output_images,
+ },
+ };
+
+ RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 2, 0)); /* set 0 */
+
+ GLSLF(0, #define FILTER_DIST vec2(%f, %f) ,dist_x, dist_y);
+ GLSLD( distort_chroma_kernel );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, int planes = %i; ,planes);
+ GLSLC(1, for (int i = 0; i < planes; i++) { );
+ GLSLC(2, ivec2 size = imageSize(output_img[i]); );
+ GLSLC(2, if (!IS_WITHIN(pos, size)) );
+ GLSLC(3, continue; );
+ GLSLC(2, if (planes == 1) { );
+ GLSLC(3, distort_rgb(size, pos); );
+ GLSLC(2, } else if (i > 0) { );
+ GLSLC(3, distort_chroma(i, size, pos); );
+ GLSLC(2, } else { );
+ GLSLC(3, vec2 npos = vec2(pos)/vec2(size); );
+ GLSLC(3, vec4 res = texture(input_img[i], npos); );
+ GLSLC(3, imageStore(output_img[i], pos, res); );
+ GLSLC(2, } );
+ GLSLC(1, } );
+ GLSLC(0, } );
+
+ RET(ff_vk_compile_shader(ctx, shd, "main"));
+ }
+
+ RET(ff_vk_init_pipeline_layout(ctx));
+
+ /* Execution context */
+ RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+ s->vkctx.hwctx->queue_family_comp_index));
+
+ /* The pipeline */
+ RET(ff_vk_init_compute_pipeline(ctx));
+
+ s->initialized = 1;
+
+ return 0;
+
+fail:
+ return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
+{
+ int err;
+ ChromaticAberrationVulkanContext *s = avctx->priv;
+ int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkComponentMapping null_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ for (int i = 0; i < planes; i++) {
+ RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
+ ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
+ ff_vk_aspect_flags(s->vkctx.input_format, i),
+ null_map, NULL));
+
+ RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+ ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+ ff_vk_aspect_flags(s->vkctx.output_format, i),
+ null_map, NULL));
+
+ s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+
+ ff_vk_update_descriptor_set(avctx, 0);
+
+ vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+ {
+ VkImageMemoryBarrier bar[2] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = in->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = in->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .oldLayout = out->layout,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = out->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ };
+
+ vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+ 0, NULL, 0, NULL, 2, bar);
+
+ in->layout = bar[0].newLayout;
+ in->access = bar[0].dstAccessMask;
+
+ out->layout = bar[1].newLayout;
+ out->access = bar[1].dstAccessMask;
+ }
+
+ vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+ vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+ vkCmdDispatch(s->exec.buf,
+ FFALIGN(s->vkctx.output_width, s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+ FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+ vkEndCommandBuffer(s->exec.buf);
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->exec.buf,
+ };
+
+ VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+ }
+
+fail:
+
+ for (int i = 0; i < planes; i++) {
+ ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+ }
+
+ return err;
+}
+
+static int chromaticaberration_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+ int err;
+ AVFilterContext *ctx = link->dst;
+ ChromaticAberrationVulkanContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+
+ AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ if (!s->initialized)
+ RET(init_filter(ctx, in));
+
+ RET(process_frames(ctx, (AVVkFrame *)out->data[0],
+ (AVVkFrame *) in->data[0]));
+
+ err = av_frame_copy_props(out, in);
+ if (err < 0)
+ goto fail;
+
+ av_frame_free(&in);
+
+ return ff_filter_frame(outlink, out);
+
+fail:
+ av_frame_free(&in);
+ av_frame_free(&out);
+ return err;
+}
+
+static void chromaticaberration_vulkan_uninit(AVFilterContext *avctx)
+{
+ ChromaticAberrationVulkanContext *s = avctx->priv;
+
+ ff_vk_free_exec_ctx(avctx, &s->exec);
+ ff_vk_filter_uninit(avctx);
+
+ s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(ChromaticAberrationVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption chromaticaberration_vulkan_options[] = {
+ { "dist_x", "Set horizontal distortion amount", OFFSET(dist_x), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 10.0f, .flags = FLAGS },
+ { "dist_y", "Set vertical distortion amount", OFFSET(dist_y), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 10.0f, .flags = FLAGS },
+ { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(chromaticaberration_vulkan);
+
+static const AVFilterPad chromaticaberration_vulkan_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = &chromaticaberration_vulkan_filter_frame,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad chromaticaberration_vulkan_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &ff_vk_filter_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_chromaticaberration_vulkan = {
+ .name = "chromaticaberration_vulkan",
+ .description = NULL_IF_CONFIG_SMALL("Offset chroma of input video"),
+ .priv_size = sizeof(ChromaticAberrationVulkanContext),
+ .init = &ff_vk_filter_init,
+ .uninit = &chromaticaberration_vulkan_uninit,
+ .query_formats = &ff_vk_filter_query_formats,
+ .inputs = chromaticaberration_vulkan_inputs,
+ .outputs = chromaticaberration_vulkan_outputs,
+ .priv_class = &chromaticaberration_vulkan_class,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.17.0
Rostislav Pehlivanov
2018-04-20 04:30:09 UTC
Permalink
Could be done in-plane with the main image but framesync segfaults.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_overlay_vulkan.c | 458 ++++++++++++++++++++++++++++++++
4 files changed, 461 insertions(+)
create mode 100644 libavfilter/vf_overlay_vulkan.c

diff --git a/configure b/configure
index b93bf0b00f..6c7b225099 100755
--- a/configure
+++ b/configure
@@ -3354,6 +3354,7 @@ ocr_filter_deps="libtesseract"
ocv_filter_deps="libopencv"
openclsrc_filter_deps="opencl"
overlay_opencl_filter_deps="opencl"
+overlay_vulkan_filter_deps="vulkan libshaderc"
overlay_qsv_filter_deps="libmfx"
overlay_qsv_filter_select="qsvvpp"
owdenoise_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index b4c10daed6..0e825412cb 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -271,6 +271,7 @@ OBJS-$(CONFIG_OSCILLOSCOPE_FILTER) += vf_datascope.o
OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o framesync.o
OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \
opencl/overlay.o framesync.o
+OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER) += vf_overlay_vulkan.o
OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o
OBJS-$(CONFIG_OWDENOISE_FILTER) += vf_owdenoise.o
OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index a4b59f43ac..cbaeb0c3a0 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -262,6 +262,7 @@ extern AVFilter ff_vf_ocv;
extern AVFilter ff_vf_oscilloscope;
extern AVFilter ff_vf_overlay;
extern AVFilter ff_vf_overlay_opencl;
+extern AVFilter ff_vf_overlay_vulkan;
extern AVFilter ff_vf_overlay_qsv;
extern AVFilter ff_vf_owdenoise;
extern AVFilter ff_vf_pad;
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
new file mode 100644
index 0000000000..549e84e308
--- /dev/null
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -0,0 +1,458 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "internal.h"
+#include "framesync.h"
+
+typedef struct OverlayVulkanContext {
+ VulkanFilterContext vkctx;
+
+ int initialized;
+ FFVkExecContext exec;
+ FFFrameSync fs;
+ FFVkBuffer params_buf;
+
+ /* Shader updators, must be in the main filter struct */
+ VkDescriptorImageInfo main_images[3];
+ VkDescriptorImageInfo overlay_images[3];
+ VkDescriptorImageInfo output_images[3];
+ VkDescriptorBufferInfo params_desc;
+
+ int overlay_x;
+ int overlay_y;
+} OverlayVulkanContext;
+
+static const char overlay_noalpha[] = {
+ C(0, void overlay_noalpha(int i, ivec2 pos) )
+ C(0, { )
+ C(1, ivec2 overlay_size = imageSize(overlay_img[i]); )
+ C(1, if ((o_offset[i].x <= pos.x) && (o_offset[i].y <= pos.y) &&
+ (pos.x < (o_offset[i].x + overlay_size.x)) &&
+ (pos.y < (o_offset[i].y + overlay_size.y))) { )
+ C(2, vec4 res = imageLoad(overlay_img[i], pos - o_offset[i]); )
+ C(2, imageStore(output_img[i], pos, res); )
+ C(1, } else { )
+ C(2, vec4 res = imageLoad(main_img[i], pos); )
+ C(2, imageStore(output_img[i], pos, res); )
+ C(1, } )
+ C(0, } )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx)
+{
+ int err;
+ OverlayVulkanContext *s = ctx->priv;
+
+ { /* Create the shader */
+ const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ SPIRVShader *shd = ff_vk_init_shader(ctx, "overlay_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+ VulkanDescriptorSetBinding desc_i[3] = {
+ {
+ .name = "main_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = "rgba8",
+ .mem_quali = "readonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->main_images,
+ },
+ {
+ .name = "overlay_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = "rgba8",
+ .mem_quali = "readonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->overlay_images,
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = "rgba8",
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = planes,
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->output_images,
+ },
+ };
+
+ VulkanDescriptorSetBinding desc_b = {
+ .name = "params",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .mem_quali = "readonly",
+ .mem_layout = "std430",
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = &s->params_desc,
+ .buf_content = "ivec2 o_offset[3];",
+ };
+
+ RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 3, 0)); /* set 0 */
+ RET(ff_vk_add_descriptor_set(ctx, shd, &desc_b, 1, 0)); /* set 1 */
+
+ GLSLD( overlay_noalpha );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLC(1, ivec2 pos = ivec2(gl_GlobalInvocationID.xy); );
+ GLSLF(1, int planes = %i; ,planes);
+ GLSLC(1, for (int i = 0; i < planes; i++) { );
+ GLSLC(2, overlay_noalpha(i, pos); );
+ GLSLC(1, } );
+ GLSLC(0, } );
+
+ RET(ff_vk_compile_shader(ctx, shd, "main"));
+ }
+
+ RET(ff_vk_init_pipeline_layout(ctx));
+
+ {
+ struct {
+ int32_t o_offset[2*3];
+ } *par;
+
+ err = ff_vk_create_buf(ctx, &s->params_buf,
+ sizeof(*par),
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err)
+ return err;
+
+ err = ff_vk_map_buffers(ctx, &s->params_buf, (uint8_t **)&par, 1, 0);
+ if (err)
+ return err;
+
+ par->o_offset[0] = s->overlay_x;
+ par->o_offset[1] = s->overlay_y;
+ par->o_offset[2] = par->o_offset[0]/2;
+ par->o_offset[3] = par->o_offset[1]/2;
+ par->o_offset[4] = par->o_offset[0]/2;
+ par->o_offset[5] = par->o_offset[1]/2;
+
+ err = ff_vk_unmap_buffers(ctx, &s->params_buf, 1, 1);
+ if (err)
+ return err;
+
+ s->params_desc.buffer = s->params_buf.buf;
+ s->params_desc.range = VK_WHOLE_SIZE;
+
+ ff_vk_update_descriptor_set(ctx, 1);
+ }
+
+ /* Execution context */
+ RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+ s->vkctx.hwctx->queue_family_comp_index));
+
+ /* The pipeline */
+ RET(ff_vk_init_compute_pipeline(ctx));
+
+ s->initialized = 1;
+
+ return 0;
+
+fail:
+ return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
+ AVFrame *main_f, AVFrame *overlay_f)
+{
+ int err;
+ OverlayVulkanContext *s = avctx->priv;
+ int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ AVVkFrame *out = (AVVkFrame *)out_f->data[0];
+ AVVkFrame *main = (AVVkFrame *)main_f->data[0];
+ AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0];
+
+ AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data;
+ AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data;
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkComponentMapping null_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ for (int i = 0; i < planes; i++) {
+ RET(ff_vk_create_imageview(avctx, &s->main_images[i].imageView, main,
+ ff_vk_plane_rep_fmt(main_fc->sw_format, i),
+ ff_vk_aspect_flags(main_fc->sw_format, i),
+ null_map, NULL));
+
+ RET(ff_vk_create_imageview(avctx, &s->overlay_images[i].imageView, overlay,
+ ff_vk_plane_rep_fmt(overlay_fc->sw_format, i),
+ ff_vk_aspect_flags(overlay_fc->sw_format, i),
+ null_map, NULL));
+
+ RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+ ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+ ff_vk_aspect_flags(s->vkctx.output_format, i),
+ null_map, NULL));
+
+ s->main_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+
+ ff_vk_update_descriptor_set(avctx, 0);
+
+ vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+ {
+ VkImageMemoryBarrier bar[3] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = main->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = main->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = overlay->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = overlay->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .oldLayout = out->layout,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = out->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ };
+
+ vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+ 0, NULL, 0, NULL, 3, bar);
+
+ main->layout = bar[0].newLayout;
+ main->access = bar[0].dstAccessMask;
+
+ overlay->layout = bar[1].newLayout;
+ overlay->access = bar[1].dstAccessMask;
+
+ out->layout = bar[2].newLayout;
+ out->access = bar[2].dstAccessMask;
+ }
+
+ vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+ vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+ vkCmdDispatch(s->exec.buf,
+ FFALIGN(s->vkctx.output_width, s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+ FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+ vkEndCommandBuffer(s->exec.buf);
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->exec.buf,
+ };
+
+ VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+ }
+
+fail:
+
+ for (int i = 0; i < planes; i++) {
+ ff_vk_destroy_imageview(avctx, s->main_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->overlay_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+ }
+
+ return err;
+}
+
+static int overlay_vulkan_blend(FFFrameSync *fs)
+{
+ int err;
+ AVFilterContext *ctx = fs->parent;
+ OverlayVulkanContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+ AVFrame *input_main, *input_overlay, *out;
+
+ err = ff_framesync_get_frame(fs, 0, &input_main, 0);
+ if (err < 0)
+ goto fail;
+ err = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
+ if (err < 0)
+ goto fail;
+
+ if (!input_main || !input_overlay)
+ return 0;
+
+ if (!s->initialized) {
+ AVHWFramesContext *main_fc = (AVHWFramesContext*)input_main->hw_frames_ctx->data;
+ AVHWFramesContext *overlay_fc = (AVHWFramesContext*)input_overlay->hw_frames_ctx->data;
+ if (main_fc->sw_format != overlay_fc->sw_format) {
+ av_log(ctx, AV_LOG_ERROR, "Mismatching sw formats!\n");
+ return AVERROR(EINVAL);
+ }
+ RET(init_filter(ctx));
+ }
+
+ out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ RET(process_frames(ctx, out, input_main, input_overlay));
+
+ err = av_frame_copy_props(out, input_main);
+ if (err < 0)
+ goto fail;
+
+ return ff_filter_frame(outlink, out);
+
+fail:
+ av_frame_free(&out);
+ return err;
+}
+
+static int overlay_vulkan_config_output(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ OverlayVulkanContext *s = avctx->priv;
+
+ err = ff_vk_filter_config_output(outlink);
+ if (err < 0)
+ return err;
+
+ err = ff_framesync_init_dualinput(&s->fs, avctx);
+ if (err < 0)
+ return err;
+
+ return ff_framesync_configure(&s->fs);
+}
+
+static int overlay_vulkan_activate(AVFilterContext *avctx)
+{
+ OverlayVulkanContext *s = avctx->priv;
+
+ return ff_framesync_activate(&s->fs);
+}
+
+static av_cold int overlay_vulkan_init(AVFilterContext *avctx)
+{
+ OverlayVulkanContext *s = avctx->priv;
+
+ s->fs.on_event = &overlay_vulkan_blend;
+
+ return ff_vk_filter_init(avctx);
+}
+
+static void overlay_vulkan_uninit(AVFilterContext *avctx)
+{
+ OverlayVulkanContext *s = avctx->priv;
+
+ ff_vk_free_exec_ctx(avctx, &s->exec);
+ ff_vk_filter_uninit(avctx);
+ ff_framesync_uninit(&s->fs);
+
+ s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(OverlayVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption overlay_vulkan_options[] = {
+ { "x", "Set horizontal offset", OFFSET(overlay_x), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS },
+ { "y", "Set vertical offset", OFFSET(overlay_y), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS },
+ { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(overlay_vulkan);
+
+static const AVFilterPad overlay_vulkan_inputs[] = {
+ {
+ .name = "main",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ {
+ .name = "overlay",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad overlay_vulkan_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &overlay_vulkan_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_overlay_vulkan = {
+ .name = "overlay_vulkan",
+ .description = NULL_IF_CONFIG_SMALL("Overlay a source on top of another"),
+ .priv_size = sizeof(OverlayVulkanContext),
+ .init = &overlay_vulkan_init,
+ .uninit = &overlay_vulkan_uninit,
+ .query_formats = &ff_vk_filter_query_formats,
+ .activate = &overlay_vulkan_activate,
+ .inputs = overlay_vulkan_inputs,
+ .outputs = overlay_vulkan_outputs,
+ .priv_class = &overlay_vulkan_class,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.17.0
Rostislav Pehlivanov
2018-04-20 04:30:10 UTC
Permalink
Can convert to RGB using very fast fixed-function conversions.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
configure | 1 +
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_scale_vulkan.c | 386 ++++++++++++++++++++++++++++++++++
4 files changed, 389 insertions(+)
create mode 100644 libavfilter/vf_scale_vulkan.c

diff --git a/configure b/configure
index 6c7b225099..90d574bac3 100755
--- a/configure
+++ b/configure
@@ -3411,6 +3411,7 @@ zmq_filter_deps="libzmq"
zoompan_filter_deps="swscale"
zscale_filter_deps="libzimg const_nan"
scale_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer"
+scale_vulkan_filter_deps="vulkan libshaderc"
vpp_qsv_filter_deps="libmfx"
vpp_qsv_filter_select="qsvvpp"

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 0e825412cb..a6aab47ddf 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -309,6 +309,7 @@ OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.pt
OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o
OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o
OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale.o vaapi_vpp.o
+OBJS-$(CONFIG_SCALE_VULKAN_FILTER) += vf_scale_vulkan.o scale.o vulkan.o
OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o
OBJS-$(CONFIG_SELECT_FILTER) += f_select.o
OBJS-$(CONFIG_SELECTIVECOLOR_FILTER) += vf_selectivecolor.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index cbaeb0c3a0..293d5e3e6e 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -300,6 +300,7 @@ extern AVFilter ff_vf_scale_cuda;
extern AVFilter ff_vf_scale_npp;
extern AVFilter ff_vf_scale_qsv;
extern AVFilter ff_vf_scale_vaapi;
+extern AVFilter ff_vf_scale_vulkan;
extern AVFilter ff_vf_scale2ref;
extern AVFilter ff_vf_select;
extern AVFilter ff_vf_selectivecolor;
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
new file mode 100644
index 0000000000..5bcdc0b3d8
--- /dev/null
+++ b/libavfilter/vf_scale_vulkan.c
@@ -0,0 +1,386 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "scale.h"
+#include "internal.h"
+
+enum ScalerFunc {
+ F_BILINEAR = 0,
+ F_NEAREST,
+
+ F_NB,
+};
+
+typedef struct ScaleVulkanContext {
+ VulkanFilterContext vkctx;
+
+ int initialized;
+ FFVkExecContext exec;
+ const VulkanSampler *sampler;
+
+ /* Shader updators, must be in the main filter struct */
+ VkDescriptorImageInfo input_images[3];
+ VkDescriptorImageInfo output_images[3];
+
+ enum ScalerFunc scaler;
+ char *output_format_string;
+ char *w_expr;
+ char *h_expr;
+} ScaleVulkanContext;
+
+static const char scale_bilinear[] = {
+ C(0, void scale_bilinear(int idx, ivec2 pos) )
+ C(0, { )
+ C(1, const vec2 npos = (vec2(pos) + 0.5f) / imageSize(output_img[idx]);)
+ C(1, imageStore(output_img[idx], pos, texture(input_img[idx], npos)); )
+ C(0, } )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+{
+ int err;
+ ScaleVulkanContext *s = ctx->priv;
+ const int conv = s->vkctx.input_format != s->vkctx.output_format;
+ VkFilter sampler_mode;
+
+ switch (s->scaler) {
+ case F_NEAREST:
+ sampler_mode = VK_FILTER_NEAREST;
+ break;
+ case F_BILINEAR:
+ sampler_mode = VK_FILTER_LINEAR;
+ break;
+ };
+
+ /* Create a sampler */
+ s->sampler = ff_vk_init_sampler(ctx, conv ? in : NULL, 0, sampler_mode);
+ if (!s->sampler)
+ return AVERROR_EXTERNAL;
+
+ { /* Create the shader */
+ SPIRVShader *shd = ff_vk_init_shader(ctx, "scale_compute",
+ VK_SHADER_STAGE_COMPUTE_BIT);
+ ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+ VulkanDescriptorSetBinding desc_i[2] = {
+ {
+ .name = "input_img",
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .dimensions = 2,
+ .elems = conv ? 1 :
+ av_pix_fmt_count_planes(s->vkctx.input_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->input_images,
+ .samplers = DUP_SAMPLER_ARRAY4(s->sampler->sampler),
+ },
+ {
+ .name = "output_img",
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .mem_layout = "rgba8",
+ .mem_quali = "writeonly",
+ .dimensions = 2,
+ .elems = av_pix_fmt_count_planes(s->vkctx.output_format),
+ .stages = VK_SHADER_STAGE_COMPUTE_BIT,
+ .updater = s->output_images,
+ },
+ };
+
+ RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 2, 0)); /* set 0 */
+
+ GLSLD( scale_bilinear );
+ GLSLC(0, void main() );
+ GLSLC(0, { );
+ GLSLF(1, for (int i = 0; i < %i; i++) ,desc_i[1].elems);
+
+ switch (s->scaler) {
+ case F_NEAREST:
+ case F_BILINEAR:
+ GLSLC(2, scale_bilinear(i, ivec2(gl_GlobalInvocationID.xy)); );
+ break;
+ };
+
+ GLSLC(0, } );
+
+ RET(ff_vk_compile_shader(ctx, shd, "main"));
+ }
+
+ RET(ff_vk_init_pipeline_layout(ctx));
+
+ /* Execution context */
+ RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+ s->vkctx.hwctx->queue_family_comp_index));
+
+ /* The pipeline */
+ RET(ff_vk_init_compute_pipeline(ctx));
+
+ s->initialized = 1;
+
+ return 0;
+
+fail:
+ return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
+{
+ int i, err;
+ ScaleVulkanContext *s = avctx->priv;
+ int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+ VkCommandBufferBeginInfo cmd_start = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VkComponentMapping null_map = {
+ .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+ .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+ };
+
+ if (s->sampler->converting) { /* RGB */
+ RET(ff_vk_create_imageview(avctx, &s->input_images[0].imageView, in,
+ av_vkfmt_from_pixfmt(s->vkctx.input_format),
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ null_map, &s->sampler->yuv_conv));
+ s->input_images[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ } else {
+ for (i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++)
+ RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
+ ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
+ ff_vk_aspect_flags(s->vkctx.input_format, i),
+ null_map, NULL));
+ s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+ }
+
+ for (i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
+ RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+ ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+ ff_vk_aspect_flags(s->vkctx.output_format, i),
+ null_map, NULL));
+ s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+
+ ff_vk_update_descriptor_set(avctx, 0);
+
+ vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+ {
+ VkImageMemoryBarrier bar[2] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .oldLayout = in->layout,
+ .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = in->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .srcAccessMask = 0,
+ .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .oldLayout = out->layout,
+ .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = out->img,
+ .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ },
+ };
+
+ vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+ 0, NULL, 0, NULL, 2, bar);
+
+ in->layout = bar[0].newLayout;
+ in->access = bar[0].dstAccessMask;
+
+ out->layout = bar[1].newLayout;
+ out->access = bar[1].dstAccessMask;
+ }
+
+ vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+ vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+ vkCmdDispatch(s->exec.buf,
+ FFALIGN(s->vkctx.output_width, s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+ FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+ vkEndCommandBuffer(s->exec.buf);
+
+ VkSubmitInfo s_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &s->exec.buf,
+ };
+
+ VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+ if (ret != VK_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+ ff_vk_ret2str(ret));
+ return AVERROR_EXTERNAL;
+ } else {
+ vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+ vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+ }
+
+fail:
+
+ for (i = 0; i < planes; i++) {
+ ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
+ ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+ }
+
+ return err;
+}
+
+static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+ int err;
+ AVFilterContext *ctx = link->dst;
+ ScaleVulkanContext *s = ctx->priv;
+ AVFilterLink *outlink = ctx->outputs[0];
+
+ AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ err = AVERROR(ENOMEM);
+ goto fail;
+ }
+
+ if (!s->initialized)
+ RET(init_filter(ctx, in));
+
+ RET(process_frames(ctx, (AVVkFrame *)out->data[0],
+ (AVVkFrame *) in->data[0]));
+
+ err = av_frame_copy_props(out, in);
+ if (err < 0)
+ goto fail;
+
+ av_frame_free(&in);
+
+ return ff_filter_frame(outlink, out);
+
+fail:
+ av_frame_free(&in);
+ av_frame_free(&out);
+ return err;
+}
+
+static int scale_vulkan_config_output(AVFilterLink *outlink)
+{
+ int err;
+ AVFilterContext *avctx = outlink->src;
+ ScaleVulkanContext *s = avctx->priv;
+ AVFilterLink *inlink = outlink->src->inputs[0];
+
+ err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink,
+ &s->vkctx.output_width,
+ &s->vkctx.output_height);
+ if (err < 0)
+ return err;
+
+ if (s->output_format_string)
+ s->vkctx.output_format = av_get_pix_fmt(s->output_format_string);
+
+ if ((s->vkctx.input_format != s->vkctx.output_format) &&
+ (s->vkctx.output_format != AV_PIX_FMT_RGBA)) {
+ av_log(avctx, AV_LOG_WARNING, "Unsupported conversion %s -> %s! "
+ "Currently input format must match output format or output "
+ "format must be \"rgba\"!\n",
+ av_get_pix_fmt_name(s->vkctx.input_format),
+ av_get_pix_fmt_name(s->vkctx.output_format));
+ return AVERROR(EINVAL);
+ }
+
+ err = ff_vk_filter_config_output(outlink);
+ if (err < 0)
+ return err;
+
+ if (inlink->sample_aspect_ratio.num)
+ outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
+ else
+ outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+ return 0;
+}
+
+static void scale_vulkan_uninit(AVFilterContext *avctx)
+{
+ ScaleVulkanContext *s = avctx->priv;
+
+ ff_vk_free_exec_ctx(avctx, &s->exec);
+ ff_vk_filter_uninit(avctx);
+
+ s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(ScaleVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption scale_vulkan_options[] = {
+ { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
+ { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
+ { "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, "scaler" },
+ { "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, "scaler" },
+ { "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, "scaler" },
+ { "format", "Output video format (software format of hardware frames)", OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
+ { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(scale_vulkan);
+
+static const AVFilterPad scale_vulkan_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = &scale_vulkan_filter_frame,
+ .config_props = &ff_vk_filter_config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad scale_vulkan_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .config_props = &scale_vulkan_config_output,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_scale_vulkan = {
+ .name = "scale_vulkan",
+ .description = NULL_IF_CONFIG_SMALL("Scale Vulkan frames"),
+ .priv_size = sizeof(ScaleVulkanContext),
+ .init = &ff_vk_filter_init,
+ .uninit = &scale_vulkan_uninit,
+ .query_formats = &ff_vk_filter_query_formats,
+ .inputs = scale_vulkan_inputs,
+ .outputs = scale_vulkan_outputs,
+ .priv_class = &scale_vulkan_class,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
--
2.17.0
Rostislav Pehlivanov
2018-04-20 04:30:03 UTC
Permalink
Used to fix unmapping when no direct interop exists between APIs.

Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
libavutil/hwcontext.c | 7 +++++++
libavutil/hwcontext_internal.h | 5 +++++
2 files changed, 12 insertions(+)

diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 70c556ecac..f9ce2f5b13 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -871,3 +871,10 @@ fail:
av_buffer_unref(&dst_ref);
return ret;
}
+
+int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src)
+{
+ HWMapDescriptor *hwmap = (HWMapDescriptor*)dst->buf[0]->data;
+ av_frame_unref(hwmap->source);
+ return av_frame_ref(hwmap->source, src);
+}
diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h
index 332062ddaa..77dc47ddd6 100644
--- a/libavutil/hwcontext_internal.h
+++ b/libavutil/hwcontext_internal.h
@@ -156,6 +156,11 @@ int ff_hwframe_map_create(AVBufferRef *hwframe_ref,
HWMapDescriptor *hwmap),
void *priv);

+/**
+ * Replace the current hwmap of dst with the one from src, used for indirect
+ * mappings like VAAPI->(DRM)->OpenCL/Vulkan where a direct interop is missing
+ */
+int ff_hwframe_map_replace(AVFrame *dst, const AVFrame *src);

extern const HWContextType ff_hwcontext_type_cuda;
extern const HWContextType ff_hwcontext_type_d3d11va;
--
2.17.0
Rostislav Pehlivanov
2018-04-20 04:30:04 UTC
Permalink
Signed-off-by: Rostislav Pehlivanov <***@gmail.com>
---
libavutil/hwcontext_opencl.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/libavutil/hwcontext_opencl.c b/libavutil/hwcontext_opencl.c
index 43b5c5ae0c..1d18da37bf 100644
--- a/libavutil/hwcontext_opencl.c
+++ b/libavutil/hwcontext_opencl.c
@@ -2171,10 +2171,7 @@ static int opencl_map_from_vaapi(AVHWFramesContext *dst_fc,
if (err < 0)
goto fail;

- // Adjust the map descriptor so that unmap works correctly.
- hwmap = (HWMapDescriptor*)dst->buf[0]->data;
- av_frame_unref(hwmap->source);
- err = av_frame_ref(hwmap->source, src);
+ err = ff_hwframe_map_replace(dst, src);

fail:
av_frame_free(&tmp);
--
2.17.0
Michael Niedermayer
2018-04-20 09:05:03 UTC
Permalink
The plan is to eventually be able to apply effects and encode entirely
on the GPU.
hwcontext_internal: add ff_hwframe_map_replace
hwcontext_opencl: use ff_hwframe_map_replace()
lavu: add a Vulkan hwcontext
lavfi: add common Vulkan filtering code
lavfi: add a Vulkan avgblur filter
lavfi: add a Vulkan chromatic aberration filter
lavfi: add a Vulkan overlay filter
lavfi: add a Vulkan scale filter
This seems to break build here on ubuntu linux x86-64

Thats just with my normal build, no attempt to enable any vulkan stuff

AR libavdevice/libavdevice.a
CC libavfilter/vf_chromaticaberration_vulkan.o
In file included from libavfilter/vulkan.h:29:0,
from libavfilter/vf_chromaticaberration_vulkan.c:20:
./libavutil/hwcontext_vulkan.h:25:27: fatal error: vulkan/vulkan.h: No such file or directory
#include <vulkan/vulkan.h>
^
compilation terminated.
make: *** [libavfilter/vf_chromaticaberration_vulkan.o] Error 1



[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If you fake or manipulate statistics in a paper in physics you will never
get a job again.
If you fake or manipulate statistics in a paper in medicin you will get
a job for life at the pharma industry.
Rostislav Pehlivanov
2018-04-20 11:59:07 UTC
Permalink
Post by Michael Niedermayer
The plan is to eventually be able to apply effects and encode entirely
on the GPU.
hwcontext_internal: add ff_hwframe_map_replace
hwcontext_opencl: use ff_hwframe_map_replace()
lavu: add a Vulkan hwcontext
lavfi: add common Vulkan filtering code
lavfi: add a Vulkan avgblur filter
lavfi: add a Vulkan chromatic aberration filter
lavfi: add a Vulkan overlay filter
lavfi: add a Vulkan scale filter
This seems to break build here on ubuntu linux x86-64
Thats just with my normal build, no attempt to enable any vulkan stuff
AR libavdevice/libavdevice.a
CC libavfilter/vf_chromaticaberration_vulkan.o
In file included from libavfilter/vulkan.h:29:0,
./libavutil/hwcontext_vulkan.h:25:27: fatal error: vulkan/vulkan.h: No
such file or directory
#include <vulkan/vulkan.h>
^
compilation terminated.
make: *** [libavfilter/vf_chromaticaberration_vulkan.o] Error 1
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
If you fake or manipulate statistics in a paper in physics you will never
get a job again.
If you fake or manipulate statistics in a paper in medicin you will get
a job for life at the pharma industry.
_______________________________________________
ffmpeg-devel mailing list
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Thanks for testing, had a typo in configure:
-chromaticabberation_vulkan_filter_deps="vulkan libshaderc"
+chromaticaberration_vulkan_filter_deps="vulkan libshaderc"
Rostislav Pehlivanov
2018-04-21 03:46:13 UTC
Permalink
Post by Rostislav Pehlivanov
This commit adds a Vulkan hwcontext, currently capable of mapping DRM and
VAAPI frames but additional functionality can be added later to support
importing of D3D11 surfaces as well as exporting to various other APIs.
This context requires the newest stable version of the Vulkan API,
and once the new extension for DRM surfaces makes it in will also require
it (in order to properly and fully import them).
It makes use of every part of the Vulkan spec in order to ensure fastest
possible uploading, downloading and mapping of frames. On AMD, it will
also make use of mapping host memory frames in order to upload
very efficiently and with minimal CPU to hardware.
To be useful for non-RGB images an implementation with the YUV images
extension is needed. All current implementations support that with the
exception of AMD, though support is coming soon for Mesa.
---
configure | 12 +
doc/APIchanges | 3 +
libavutil/Makefile | 3 +
libavutil/hwcontext.c | 4 +
libavutil/hwcontext.h | 1 +
libavutil/hwcontext_internal.h | 1 +
libavutil/hwcontext_vulkan.c | 2125 ++++++++++++++++++++++++++++++++
libavutil/hwcontext_vulkan.h | 133 ++
libavutil/pixdesc.c | 4 +
libavutil/pixfmt.h | 4 +
libavutil/version.h | 2 +-
11 files changed, 2291 insertions(+), 1 deletion(-)
create mode 100644 libavutil/hwcontext_vulkan.c
create mode 100644 libavutil/hwcontext_vulkan.h
diff --git a/configure b/configure
index dee507cb6a..cd88f7eae1 100755
--- a/configure
+++ b/configure
--enable-opengl enable OpenGL rendering [no]
--enable-openssl enable openssl, needed for https support
if gnutls or libtls is not used [no]
+ --enable-vulkan enable Vulkan code [no]
--disable-sndio disable sndio support [autodetect]
--disable-schannel disable SChannel SSP, needed for TLS support on
Windows if openssl and gnutls are not used [autodetect]
@@ -1761,6 +1762,7 @@ HWACCEL_LIBRARY_LIST="
mmal
omx
opencl
+ vulkan
"
DOCUMENT_LIST="
@@ -2217,6 +2219,7 @@ HAVE_LIST="
opencl_dxva2
opencl_vaapi_beignet
opencl_vaapi_intel_media
+ vulkan_drm_mod
perl
pod2man
texi2html
@@ -6322,6 +6325,15 @@ enabled vdpau &&
enabled crystalhd && check_lib crystalhd "stdint.h
libcrystalhd/libcrystalhd_if.h" DtsCrystalHDVersion -lcrystalhd
+enabled vulkan &&
+ check_lib vulkan "vulkan/vulkan.h" vkCreateInstance -lvulkan &&
+ check_cpp_condition vulkan vulkan/vulkan.h "defined
VK_API_VERSION_1_1"
+
+if enabled_all vulkan libdrm ; then
+ check_cpp_condition vulkan vulkan/vulkan.h "defined
VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME"
+ enable vulkan_drm_mod
+fi
If anyone's wanting to test this keep in mind there's a mistake here, it
should be VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME and not
VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME. I have it on good authority
the spec will get changed soon so this check will be gone.
Also a change from the WIP patch is this supports forward mapping to DRM.
Carl Eugen Hoyos
2018-04-21 20:24:14 UTC
Permalink
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no meaning).

Carl Eugen
Rostislav Pehlivanov
2018-04-21 21:33:41 UTC
Permalink
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no meaning).
Carl Eugen
_______________________________________________
ffmpeg-devel mailing list
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
They're both correct and work.
Carl Eugen Hoyos
2018-04-21 22:29:23 UTC
Permalink
Post by Rostislav Pehlivanov
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no meaning).
They're both correct and work.
That's really strange...
(Could this be a bug in the driver?)

Thank you for testing, Carl Eugen
Mark Thompson
2018-04-22 11:46:41 UTC
Permalink
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no meaning).
They're both correct and work.
That's really strange...
(Could this be a bug in the driver?)
Sounds like it must be a bug somewhere.

The Vulkan specification says:

"""
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 specifies a unsigned normalized multi-planar format that has a 10-bit G component in the top 10 bits of each 16-bit word of plane 0, and a two-component, 32-bit BR plane 1 consisting of a 10-bit B component in the top 10 bits of the word in bytes 0..1, and a 10-bit R component in the top 10 bits of the word in bytes 2..3, the bottom 6 bits of each word set to 0.

VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 specifies a unsigned normalized multi-planar format that has a 10-bit G component in the top 10 bits of each 16-bit word of plane 0, a 10-bit B component in the top 10 bits of each 16-bit word of plane 1, and a 10-bit R component in the top 10 bits of each 16-bit word of plane 2, with the bottom 6 bits of each word set to 0.
"""

Which I think makes it pretty clear that VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 is indeed P010 but VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 isn't YUV420P10 because they pack the 10 bits at different ends of the 16-bit value. If a driver is getting that wrong then it should be reported to the vendor.

I don't see any formats at all in the Vulkan specification which put the value at the low end of a containing word, but I might not be looking in the right place?

- Mark


(Vaguely related, because it made me look it up, it appears that the device will always match host-endianness:

After talking about the numeric types,
"""
The representation and endianness of these types on the host must match the representation and endianness of the same types on every physical device supported."
"""

I don't know what that actually means for little-endian graphics cards (e.g. AMD/Nvidia) in big-endian machines (e.g. POWER) - maybe Vulkan just doesn't support that, or maybe the driver can fix it up somehow - but we don't need to think about it at all.)
Rostislav Pehlivanov
2018-04-22 16:21:52 UTC
Permalink
Post by Mark Thompson
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no meaning).
They're both correct and work.
That's really strange...
(Could this be a bug in the driver?)
Sounds like it must be a bug somewhere.
"""
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 specifies a unsigned
normalized multi-planar format that has a 10-bit G component in the top 10
bits of each 16-bit word of plane 0, and a two-component, 32-bit BR plane 1
consisting of a 10-bit B component in the top 10 bits of the word in bytes
0..1, and a 10-bit R component in the top 10 bits of the word in bytes
2..3, the bottom 6 bits of each word set to 0.
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 specifies a unsigned
normalized multi-planar format that has a 10-bit G component in the top 10
bits of each 16-bit word of plane 0, a 10-bit B component in the top 10
bits of each 16-bit word of plane 1, and a 10-bit R component in the top 10
bits of each 16-bit word of plane 2, with the bottom 6 bits of each word
set to 0.
"""
Which I think makes it pretty clear that VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16
is indeed P010 but VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16
isn't YUV420P10 because they pack the 10 bits at different ends of the
16-bit value. If a driver is getting that wrong then it should be reported
to the vendor.
I don't see any formats at all in the Vulkan specification which put the
value at the low end of a containing word, but I might not be looking in
the right place?
- Mark
(Vaguely related, because it made me look it up, it appears that the
After talking about the numeric types,
"""
The representation and endianness of these types on the host must match
the representation and endianness of the same types on every physical
device supported."
"""
I don't know what that actually means for little-endian graphics cards
(e.g. AMD/Nvidia) in big-endian machines (e.g. POWER) - maybe Vulkan just
doesn't support that, or maybe the driver can fix it up somehow - but we
don't need to think about it at all.)
_______________________________________________
ffmpeg-devel mailing list
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Something's weird:
Rostislav Pehlivanov
2018-04-22 16:28:23 UTC
Permalink
Post by Mark Thompson
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no meaning).
They're both correct and work.
That's really strange...
(Could this be a bug in the driver?)
Sounds like it must be a bug somewhere.
"""
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 specifies a unsigned
normalized multi-planar format that has a 10-bit G component in the top 10
bits of each 16-bit word of plane 0, and a two-component, 32-bit BR plane 1
consisting of a 10-bit B component in the top 10 bits of the word in bytes
0..1, and a 10-bit R component in the top 10 bits of the word in bytes
2..3, the bottom 6 bits of each word set to 0.
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 specifies a
unsigned normalized multi-planar format that has a 10-bit G component in
the top 10 bits of each 16-bit word of plane 0, a 10-bit B component in the
top 10 bits of each 16-bit word of plane 1, and a 10-bit R component in the
top 10 bits of each 16-bit word of plane 2, with the bottom 6 bits of each
word set to 0.
"""
Which I think makes it pretty clear that VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16
is indeed P010 but VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16
isn't YUV420P10 because they pack the 10 bits at different ends of the
16-bit value. If a driver is getting that wrong then it should be reported
to the vendor.
I don't see any formats at all in the Vulkan specification which put the
value at the low end of a containing word, but I might not be looking in
the right place?
- Mark
(Vaguely related, because it made me look it up, it appears that the
After talking about the numeric types,
"""
The representation and endianness of these types on the host must match
the representation and endianness of the same types on every physical
device supported."
"""
I don't know what that actually means for little-endian graphics cards
(e.g. AMD/Nvidia) in big-endian machines (e.g. POWER) - maybe Vulkan just
doesn't support that, or maybe the driver can fix it up somehow - but we
don't need to think about it at all.)
_______________________________________________
ffmpeg-devel mailing list
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Sorry, pushed the wrong button.

For this filter chain:
format=<SRC_FORMAT>,hwupload,scale_vulkan=w=1024:h=-1:format=rgba,hwdownload,format=rgba

This is what happens for each SRC_FORMAT:
NVIDIA 960M with binary drivers:
p010 - works fine
yuv420p10le - mostly green screen with some minor variations, enough to
make out the original video
yuv420p10be - works fine

Intel 530:
p010 - works fine
yuv420p10le - works fine
yuv420p10be - works fine

I'm not entirely sure what to make of that. How does the intel deal with
formats with different endianess when there's no way to indicate endianess
at all? Why does nvidia deal with big endian when you said its little
endian?
Mark Thompson
2018-04-22 16:36:37 UTC
Permalink
Post by Rostislav Pehlivanov
Post by Mark Thompson
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no meaning).
They're both correct and work.
That's really strange...
(Could this be a bug in the driver?)
Sounds like it must be a bug somewhere.
"""
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 specifies a unsigned
normalized multi-planar format that has a 10-bit G component in the top 10
bits of each 16-bit word of plane 0, and a two-component, 32-bit BR plane 1
consisting of a 10-bit B component in the top 10 bits of the word in bytes
0..1, and a 10-bit R component in the top 10 bits of the word in bytes
2..3, the bottom 6 bits of each word set to 0.
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 specifies a
unsigned normalized multi-planar format that has a 10-bit G component in
the top 10 bits of each 16-bit word of plane 0, a 10-bit B component in the
top 10 bits of each 16-bit word of plane 1, and a 10-bit R component in the
top 10 bits of each 16-bit word of plane 2, with the bottom 6 bits of each
word set to 0.
"""
Which I think makes it pretty clear that VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16
is indeed P010 but VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16
isn't YUV420P10 because they pack the 10 bits at different ends of the
16-bit value. If a driver is getting that wrong then it should be reported
to the vendor.
I don't see any formats at all in the Vulkan specification which put the
value at the low end of a containing word, but I might not be looking in
the right place?
- Mark
(Vaguely related, because it made me look it up, it appears that the
After talking about the numeric types,
"""
The representation and endianness of these types on the host must match
the representation and endianness of the same types on every physical
device supported."
"""
I don't know what that actually means for little-endian graphics cards
(e.g. AMD/Nvidia) in big-endian machines (e.g. POWER) - maybe Vulkan just
doesn't support that, or maybe the driver can fix it up somehow - but we
don't need to think about it at all.)
Sorry, pushed the wrong button.
format=<SRC_FORMAT>,hwupload,scale_vulkan=w=1024:h=-1:format=rgba,hwdownload,format=rgba
p010 - works fine
yuv420p10le - mostly green screen with some minor variations, enough to
make out the original video
yuv420p10be - works fine
p010 - works fine
yuv420p10le - works fine
yuv420p10be - works fine
I'm not entirely sure what to make of that. How does the intel deal with
formats with different endianess when there's no way to indicate endianess
at all? Why does nvidia deal with big endian when you said its little
endian?
hwupload checks the supported formats with get_constraints and only exposes the supported ones to lavfi query_formats. Probably some auto-conversion is happening for the big-endian formats? Maybe on Intel the three-plane format also isn't supported, and so auto-conversion happens there too?

I think the green screen is what we would expect from the above analysis, since all you would be getting is the high 4 bits of each component in the low 4 bits of the output.

- Mark
Mark Thompson
2018-04-22 16:51:47 UTC
Permalink
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
Post by Carl Eugen Hoyos
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no meaning).
They're both correct and work.
That's really strange...
(Could this be a bug in the driver?)
Sounds like it must be a bug somewhere.
"""
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 specifies a unsigned
normalized multi-planar format that has a 10-bit G component in the top 10
bits of each 16-bit word of plane 0, and a two-component, 32-bit BR plane 1
consisting of a 10-bit B component in the top 10 bits of the word in bytes
0..1, and a 10-bit R component in the top 10 bits of the word in bytes
2..3, the bottom 6 bits of each word set to 0.
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 specifies a
unsigned normalized multi-planar format that has a 10-bit G component in
the top 10 bits of each 16-bit word of plane 0, a 10-bit B component in the
top 10 bits of each 16-bit word of plane 1, and a 10-bit R component in the
top 10 bits of each 16-bit word of plane 2, with the bottom 6 bits of each
word set to 0.
"""
Which I think makes it pretty clear that VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16
is indeed P010 but VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16
isn't YUV420P10 because they pack the 10 bits at different ends of the
16-bit value. If a driver is getting that wrong then it should be reported
to the vendor.
I don't see any formats at all in the Vulkan specification which put the
value at the low end of a containing word, but I might not be looking in
the right place?
- Mark
(Vaguely related, because it made me look it up, it appears that the
After talking about the numeric types,
"""
The representation and endianness of these types on the host must match
the representation and endianness of the same types on every physical
device supported."
"""
I don't know what that actually means for little-endian graphics cards
(e.g. AMD/Nvidia) in big-endian machines (e.g. POWER) - maybe Vulkan just
doesn't support that, or maybe the driver can fix it up somehow - but we
don't need to think about it at all.)
Sorry, pushed the wrong button.
format=<SRC_FORMAT>,hwupload,scale_vulkan=w=1024:h=-1:format=rgba,hwdownload,format=rgba
p010 - works fine
yuv420p10le - mostly green screen with some minor variations, enough to
make out the original video
yuv420p10be - works fine
p010 - works fine
yuv420p10le - works fine
yuv420p10be - works fine
I'm not entirely sure what to make of that. How does the intel deal with
formats with different endianess when there's no way to indicate endianess
at all? Why does nvidia deal with big endian when you said its little
endian?
hwupload checks the supported formats with get_constraints and only exposes the supported ones to lavfi query_formats. Probably some auto-conversion is happening for the big-endian formats? Maybe on Intel the three-plane format also isn't supported, and so auto-conversion happens there too?
I think the green screen is what we would expect from the above analysis, since all you would be getting is the high 4 bits of each component in the low 4 bits of the output.
Assuming that by Intel you mean Mesa rather than Windows blob, <https://cgit.freedesktop.org/mesa/mesa/tree/src/intel/vulkan/anv_formats.c#n329> says that none of these formats are supported (P010 or YUV420P10). On that driver it would be converting to something else in software for all of them.

- Mark
Rostislav Pehlivanov
2018-04-22 16:59:33 UTC
Permalink
Post by Rostislav Pehlivanov
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
2018-04-21 23:33 GMT+02:00, Rostislav Pehlivanov <
Post by Rostislav Pehlivanov
2018-04-20 6:30 GMT+02:00, Rostislav Pehlivanov <
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no meaning).
They're both correct and work.
That's really strange...
(Could this be a bug in the driver?)
Sounds like it must be a bug somewhere.
"""
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 specifies a
unsigned
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
normalized multi-planar format that has a 10-bit G component in the
top 10
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
bits of each 16-bit word of plane 0, and a two-component, 32-bit BR
plane 1
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
consisting of a 10-bit B component in the top 10 bits of the word in
bytes
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
0..1, and a 10-bit R component in the top 10 bits of the word in bytes
2..3, the bottom 6 bits of each word set to 0.
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 specifies a
unsigned normalized multi-planar format that has a 10-bit G component
in
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
the top 10 bits of each 16-bit word of plane 0, a 10-bit B component
in the
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
top 10 bits of each 16-bit word of plane 1, and a 10-bit R component
in the
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
top 10 bits of each 16-bit word of plane 2, with the bottom 6 bits of
each
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
word set to 0.
"""
Which I think makes it pretty clear that VK_FORMAT_G10X6_B10X6R10X6_
2PLANE_420_UNORM_3PACK16
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
is indeed P010 but VK_FORMAT_G10X6_B10X6_R10X6_
3PLANE_420_UNORM_3PACK16
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
isn't YUV420P10 because they pack the 10 bits at different ends of the
16-bit value. If a driver is getting that wrong then it should be
reported
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
to the vendor.
I don't see any formats at all in the Vulkan specification which put
the
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
value at the low end of a containing word, but I might not be looking
in
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
the right place?
- Mark
(Vaguely related, because it made me look it up, it appears that the
After talking about the numeric types,
"""
The representation and endianness of these types on the host must
match
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
the representation and endianness of the same types on every physical
device supported."
"""
I don't know what that actually means for little-endian graphics cards
(e.g. AMD/Nvidia) in big-endian machines (e.g. POWER) - maybe Vulkan
just
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
doesn't support that, or maybe the driver can fix it up somehow - but
we
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
don't need to think about it at all.)
Sorry, pushed the wrong button.
format=rgba,hwdownload,format=rgba
Post by Mark Thompson
Post by Rostislav Pehlivanov
p010 - works fine
yuv420p10le - mostly green screen with some minor variations, enough to
make out the original video
yuv420p10be - works fine
p010 - works fine
yuv420p10le - works fine
yuv420p10be - works fine
I'm not entirely sure what to make of that. How does the intel deal with
formats with different endianess when there's no way to indicate
endianess
Post by Mark Thompson
Post by Rostislav Pehlivanov
at all? Why does nvidia deal with big endian when you said its little
endian?
hwupload checks the supported formats with get_constraints and only
exposes the supported ones to lavfi query_formats. Probably some
auto-conversion is happening for the big-endian formats? Maybe on Intel
the three-plane format also isn't supported, and so auto-conversion happens
there too?
Post by Mark Thompson
I think the green screen is what we would expect from the above
analysis, since all you would be getting is the high 4 bits of each
component in the low 4 bits of the output.
Assuming that by Intel you mean Mesa rather than Windows blob, <
https://cgit.freedesktop.org/mesa/mesa/tree/src/intel/
vulkan/anv_formats.c#n329> says that none of these formats are supported
(P010 or YUV420P10). On that driver it would be converting to something
else in software for all of them.
- Mark
_______________________________________________
ffmpeg-devel mailing list
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Ah, seems like it does.
I've locally removed YUV420P10 from the list.
Rostislav Pehlivanov
2018-04-22 17:12:27 UTC
Permalink
Post by Rostislav Pehlivanov
Post by Carl Eugen Hoyos
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
2018-04-21 23:33 GMT+02:00, Rostislav Pehlivanov <
Post by Rostislav Pehlivanov
2018-04-20 6:30 GMT+02:00, Rostislav Pehlivanov <
Post by Rostislav Pehlivanov
+ [AV_PIX_FMT_P010] =
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
+ [AV_PIX_FMT_YUV420P10] =
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
I don't think both can be correct (unless "PACK16" has no
meaning).
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
Post by Rostislav Pehlivanov
They're both correct and work.
That's really strange...
(Could this be a bug in the driver?)
Sounds like it must be a bug somewhere.
"""
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 specifies a
unsigned
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
normalized multi-planar format that has a 10-bit G component in the
top 10
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
bits of each 16-bit word of plane 0, and a two-component, 32-bit BR
plane 1
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
consisting of a 10-bit B component in the top 10 bits of the word in
bytes
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
0..1, and a 10-bit R component in the top 10 bits of the word in
bytes
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
2..3, the bottom 6 bits of each word set to 0.
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 specifies a
unsigned normalized multi-planar format that has a 10-bit G
component in
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
the top 10 bits of each 16-bit word of plane 0, a 10-bit B component
in the
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
top 10 bits of each 16-bit word of plane 1, and a 10-bit R component
in the
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
top 10 bits of each 16-bit word of plane 2, with the bottom 6 bits
of each
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
word set to 0.
"""
Which I think makes it pretty clear that
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
is indeed P010 but VK_FORMAT_G10X6_B10X6_R10X6_3P
LANE_420_UNORM_3PACK16
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
isn't YUV420P10 because they pack the 10 bits at different ends of
the
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
16-bit value. If a driver is getting that wrong then it should be
reported
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
to the vendor.
I don't see any formats at all in the Vulkan specification which put
the
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
value at the low end of a containing word, but I might not be
looking in
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
the right place?
- Mark
(Vaguely related, because it made me look it up, it appears that the
After talking about the numeric types,
"""
The representation and endianness of these types on the host must
match
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
the representation and endianness of the same types on every physical
device supported."
"""
I don't know what that actually means for little-endian graphics
cards
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
(e.g. AMD/Nvidia) in big-endian machines (e.g. POWER) - maybe Vulkan
just
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
doesn't support that, or maybe the driver can fix it up somehow -
but we
Post by Mark Thompson
Post by Rostislav Pehlivanov
Post by Mark Thompson
don't need to think about it at all.)
Sorry, pushed the wrong button.
format=<SRC_FORMAT>,hwupload,scale_vulkan=w=1024:h=-1:format
=rgba,hwdownload,format=rgba
Post by Mark Thompson
Post by Rostislav Pehlivanov
p010 - works fine
yuv420p10le - mostly green screen with some minor variations, enough to
make out the original video
yuv420p10be - works fine
p010 - works fine
yuv420p10le - works fine
yuv420p10be - works fine
I'm not entirely sure what to make of that. How does the intel deal
with
Post by Mark Thompson
Post by Rostislav Pehlivanov
formats with different endianess when there's no way to indicate
endianess
Post by Mark Thompson
Post by Rostislav Pehlivanov
at all? Why does nvidia deal with big endian when you said its little
endian?
hwupload checks the supported formats with get_constraints and only
exposes the supported ones to lavfi query_formats. Probably some
auto-conversion is happening for the big-endian formats? Maybe on Intel
the three-plane format also isn't supported, and so auto-conversion happens
there too?
Post by Mark Thompson
I think the green screen is what we would expect from the above
analysis, since all you would be getting is the high 4 bits of each
component in the low 4 bits of the output.
Assuming that by Intel you mean Mesa rather than Windows blob, <
https://cgit.freedesktop.org/mesa/mesa/tree/src/intel/vulka
n/anv_formats.c#n329> says that none of these formats are supported
(P010 or YUV420P10). On that driver it would be converting to something
else in software for all of them.
- Mark
_______________________________________________
ffmpeg-devel mailing list
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Ah, seems like it does.
I've locally removed YUV420P10 from the list.
I looked over the supported formats, it seems all 4XXX-10 and 12 bit ones
were defined as bottom-zeroe'd. So removed AV_PIX_FMT_YUV420P10,
AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV422P12,
AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV444P12.

All the other 3-plane ones fit in exactly 8 or 16 bits.

Loading...