ffv1enc_vulkan: refactor code to support sharing with decoder

The shaders were written to support sharing, but needed slight
tweaking.
This commit is contained in:
Lynne
2024-12-24 15:57:22 +09:00
parent d3d2e254eb
commit b2ebe9884e
8 changed files with 304 additions and 189 deletions

View File

@@ -18,7 +18,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/crc.h"
#include "libavutil/mem.h"
#include "libavutil/vulkan.h"
#include "libavutil/vulkan_spirv.h"
@@ -32,6 +31,7 @@
#include "ffv1.h"
#include "ffv1enc.h"
#include "ffv1_vulkan.h"
/* Parallel Golomb alignment */
#define LG_ALIGN_W 32
@@ -122,28 +122,10 @@ extern const char *ff_source_ffv1_enc_setup_comp;
extern const char *ff_source_ffv1_enc_comp;
extern const char *ff_source_ffv1_enc_rgb_comp;
typedef struct FFv1VkRCTParameters {
int offset;
uint8_t bits;
uint8_t planar_rgb;
uint8_t transparency;
uint8_t padding[1];
} FFv1VkRCTParameters;
typedef struct FFv1VkResetParameters {
VkDeviceAddress slice_state;
uint32_t plane_state_size;
uint32_t context_count;
uint8_t codec_planes;
uint8_t key_frame;
uint8_t padding[3];
} FFv1VkResetParameters;
typedef struct FFv1VkParameters {
VkDeviceAddress slice_state;
VkDeviceAddress scratch_data;
VkDeviceAddress out_data;
uint64_t slice_size_max;
int32_t sar[2];
uint32_t chroma_shift[2];
@@ -151,6 +133,7 @@ typedef struct FFv1VkParameters {
uint32_t plane_state_size;
uint32_t context_count;
uint32_t crcref;
uint32_t slice_size_max;
uint8_t bits_per_raw_sample;
uint8_t context_model;
@@ -175,7 +158,6 @@ static void add_push_data(FFVulkanShader *shd)
GLSLC(1, u8buf slice_state; );
GLSLC(1, u8buf scratch_data; );
GLSLC(1, u8buf out_data; );
GLSLC(1, uint64_t slice_size_max; );
GLSLC(0, );
GLSLC(1, ivec2 sar; );
GLSLC(1, uvec2 chroma_shift; );
@@ -183,6 +165,7 @@ static void add_push_data(FFVulkanShader *shd)
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint context_count; );
GLSLC(1, uint32_t crcref; );
GLSLC(1, uint32_t slice_size_max; );
GLSLC(0, );
GLSLC(1, uint8_t bits_per_raw_sample; );
GLSLC(1, uint8_t context_model; );
@@ -492,7 +475,6 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.slice_state = slice_data_buf->address + f->slice_count*256,
.scratch_data = tmp_data_buf->address,
.out_data = out_data_buf->address,
.slice_size_max = out_data_buf->size / f->slice_count,
.bits_per_raw_sample = f->bits_per_raw_sample,
.sar[0] = pict->sample_aspect_ratio.num,
.sar[1] = pict->sample_aspect_ratio.den,
@@ -501,6 +483,7 @@ static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
.plane_state_size = plane_state_size,
.context_count = context_count,
.crcref = f->crcref,
.slice_size_max = out_data_buf->size / f->slice_count,
.context_model = fv->ctx.context_model,
.version = f->version,
.micro_version = f->micro_version,
@@ -966,7 +949,6 @@ static void define_shared_code(AVCodecContext *avctx, FFVulkanShader *shd)
GLSLF(0, #define TYPE int%i_t ,smp_bits);
GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
GLSLD(ff_source_common_comp);
GLSLD(ff_source_rangecoder_comp);
if (f->ac == AC_GOLOMB_RICE)
@@ -993,6 +975,10 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
1, 1, 1,
0));
/* Common codec header */
GLSLD(ff_source_common_comp);
add_push_data(shd);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1038,8 +1024,6 @@ static int init_setup_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 2, 0, 0));
add_push_data(shd);
GLSLD(ff_source_ffv1_enc_setup_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
@@ -1074,6 +1058,22 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
wg_dim, 1, 1,
0));
/* Common codec header */
GLSLD(ff_source_common_comp);
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, u8buf slice_state; );
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint context_count; );
GLSLC(1, uint8_t codec_planes; );
GLSLC(1, uint8_t key_frame; );
GLSLC(1, uint8_t version; );
GLSLC(1, uint8_t micro_version; );
GLSLC(1, uint8_t padding[1]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1110,17 +1110,6 @@ static int init_reset_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 1, 0, 0));
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, u8buf slice_state; );
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint context_count; );
GLSLC(1, uint8_t codec_planes; );
GLSLC(1, uint8_t key_frame; );
GLSLC(1, uint8_t padding[3]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
GLSLD(ff_source_ffv1_reset_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
@@ -1164,6 +1153,22 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
wg_count, wg_count, 1,
0));
/* Common codec header */
GLSLD(ff_source_common_comp);
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, int offset; );
GLSLC(1, uint8_t bits; );
GLSLC(1, uint8_t planar_rgb; );
GLSLC(1, uint8_t color_planes; );
GLSLC(1, uint8_t transparency; );
GLSLC(1, uint8_t version; );
GLSLC(1, uint8_t micro_version; );
GLSLC(1, uint8_t padding[2]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1220,16 +1225,6 @@ static int init_rct_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, int offset; );
GLSLC(1, uint8_t bits; );
GLSLC(1, uint8_t planar_rgb; );
GLSLC(1, uint8_t transparency; );
GLSLC(1, uint8_t padding[1]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
GLSLD(ff_source_ffv1_enc_rct_comp);
RET(spv->compile_shader(&fv->s, spv, shd, &spv_data, &spv_len, "main",
@@ -1268,6 +1263,11 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
1, 1, 1,
0));
/* Common codec header */
GLSLD(ff_source_common_comp);
add_push_data(shd);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
@@ -1328,8 +1328,6 @@ static int init_encode_shader(AVCodecContext *avctx, FFVkSPIRVCompiler *spv)
};
RET(ff_vk_shader_add_descriptor_set(&fv->s, shd, desc_set, 3, 0, 0));
add_push_data(shd);
/* Assemble the shader body */
GLSLD(ff_source_ffv1_enc_common_comp);
@@ -1356,110 +1354,6 @@ fail:
return err;
}
static int init_state_transition_data(AVCodecContext *avctx)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
uint8_t *buf_mapped;
size_t buf_len = 512*sizeof(uint8_t);
RET(ff_vk_create_buf(&fv->s, &fv->rangecoder_static_buf,
buf_len,
NULL, NULL,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffer(&fv->s, &fv->rangecoder_static_buf,
&buf_mapped, 0));
for (int i = 1; i < 256; i++) {
buf_mapped[256 + i] = fv->ctx.state_transition[i];
buf_mapped[256 - i] = 256 - (int)fv->ctx.state_transition[i];
}
RET(ff_vk_unmap_buffer(&fv->s, &fv->rangecoder_static_buf, 1));
/* Update descriptors */
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->setup, 0, 0, 0,
&fv->rangecoder_static_buf,
0, fv->rangecoder_static_buf.size,
VK_FORMAT_UNDEFINED));
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->enc, 0, 0, 0,
&fv->rangecoder_static_buf,
0, fv->rangecoder_static_buf.size,
VK_FORMAT_UNDEFINED));
fail:
return err;
}
static int init_quant_table_data(AVCodecContext *avctx)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
int16_t *buf_mapped;
size_t buf_len = MAX_QUANT_TABLES*
MAX_CONTEXT_INPUTS*
MAX_QUANT_TABLE_SIZE*sizeof(int16_t);
RET(ff_vk_create_buf(&fv->s, &fv->quant_buf,
buf_len,
NULL, NULL,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffer(&fv->s, &fv->quant_buf, (void *)&buf_mapped, 0));
memcpy(buf_mapped, fv->ctx.quant_tables,
sizeof(fv->ctx.quant_tables));
RET(ff_vk_unmap_buffer(&fv->s, &fv->quant_buf, 1));
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->enc, 0, 1, 0,
&fv->quant_buf,
0, fv->quant_buf.size,
VK_FORMAT_UNDEFINED));
fail:
return err;
}
static int init_crc_table_data(AVCodecContext *avctx)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
uint32_t *buf_mapped;
size_t buf_len = 256*sizeof(int32_t);
RET(ff_vk_create_buf(&fv->s, &fv->crc_tab_buf,
buf_len,
NULL, NULL,
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
RET(ff_vk_map_buffer(&fv->s, &fv->crc_tab_buf, (void *)&buf_mapped, 0));
memcpy(buf_mapped, av_crc_get_table(AV_CRC_32_IEEE), buf_len);
RET(ff_vk_unmap_buffer(&fv->s, &fv->crc_tab_buf, 1));
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->enc, 0, 2, 0,
&fv->crc_tab_buf,
0, fv->crc_tab_buf.size,
VK_FORMAT_UNDEFINED));
fail:
return err;
}
static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
{
int err;
@@ -1703,20 +1597,50 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
spv->uninit(&spv);
/* Range coder data */
err = init_state_transition_data(avctx);
err = ff_ffv1_vk_init_state_transition_data(&fv->s,
&fv->rangecoder_static_buf,
f);
if (err < 0)
return err;
/* Quantization table data */
err = init_quant_table_data(avctx);
err = ff_ffv1_vk_init_quant_table_data(&fv->s,
&fv->quant_buf,
f);
if (err < 0)
return err;
/* CRC table buffer */
err = init_crc_table_data(avctx);
err = ff_ffv1_vk_init_crc_table_data(&fv->s,
&fv->crc_tab_buf,
f);
if (err < 0)
return err;
/* Update setup global descriptors */
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->setup, 0, 0, 0,
&fv->rangecoder_static_buf,
0, fv->rangecoder_static_buf.size,
VK_FORMAT_UNDEFINED));
/* Update encode global descriptors */
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->enc, 0, 0, 0,
&fv->rangecoder_static_buf,
0, fv->rangecoder_static_buf.size,
VK_FORMAT_UNDEFINED));
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->enc, 0, 1, 0,
&fv->quant_buf,
0, fv->quant_buf.size,
VK_FORMAT_UNDEFINED));
RET(ff_vk_shader_update_desc_buffer(&fv->s, &fv->exec_pool.contexts[0],
&fv->enc, 0, 2, 0,
&fv->crc_tab_buf,
0, fv->crc_tab_buf.size,
VK_FORMAT_UNDEFINED));
/* Temporary frame */
fv->frame = av_frame_alloc();
if (!fv->frame)
@@ -1735,7 +1659,8 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
if (!fv->buf_regions)
return AVERROR(ENOMEM);
return 0;
fail:
return err;
}
static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)