mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2025-05-17 23:17:41 +08:00
vulkan_ffv1: cache only 2 lines when decoding RGB
This reduces the intermediate VRAM used for RGB decoding by a factor of 100x for 6k video. This also speeds the decoder up by 16% for 4k RGB24 and 31% for 6k video. This is equivalent to what the software decoder does, but with less pointers.
This commit is contained in:
@ -14,8 +14,7 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
|
||||
OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
|
||||
vulkan/rangecoder.o vulkan/ffv1_vlc.o \
|
||||
vulkan/ffv1_common.o vulkan/ffv1_reset.o \
|
||||
vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o \
|
||||
vulkan/ffv1_dec_rct.o
|
||||
vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o
|
||||
|
||||
VULKAN = $(subst $(SRC_PATH)/,,$(wildcard $(SRC_PATH)/libavcodec/vulkan/*.comp))
|
||||
.SECONDARY: $(VULKAN:.comp=.c)
|
||||
|
@ -20,23 +20,69 @@
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
ivec2 get_pred(ivec2 pos, ivec2 off, int p, int sw, uint8_t quant_table_idx)
|
||||
#ifndef RGB
|
||||
#define LADDR(p) (p)
|
||||
#else
|
||||
#define RGB_LINECACHE 2
|
||||
#define RGB_LBUF (RGB_LINECACHE - 1)
|
||||
#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF)))
|
||||
#endif
|
||||
|
||||
#ifdef RGB
|
||||
ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
|
||||
{
|
||||
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
|
||||
|
||||
/* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
|
||||
VTYPE3 top = VTYPE3(TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, -1) + yoff_border1))[0]),
|
||||
TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(0, -1)))[0]),
|
||||
TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[0]));
|
||||
|
||||
/* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must
|
||||
* return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous
|
||||
* row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */
|
||||
TYPE cur = TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-1, 0) + yoff_border1))[0]);
|
||||
|
||||
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
if ((quant_table[quant_table_idx][3][127] != 0) ||
|
||||
(quant_table[quant_table_idx][4][127] != 0)) {
|
||||
TYPE cur2 = TYPE(0);
|
||||
if (off.x > 0) {
|
||||
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
|
||||
cur2 = TYPE(imageLoad(dec[p], sp + LADDR(off + ivec2(-2, 0) + yoff_border2))[0]);
|
||||
}
|
||||
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
/* top-2 became current upon swap */
|
||||
TYPE top2 = TYPE(imageLoad(dec[p], sp + LADDR(off))[0]);
|
||||
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
|
||||
}
|
||||
|
||||
/* context, prediction */
|
||||
return ivec2(base, predict(cur, VTYPE2(top)));
|
||||
}
|
||||
#else
|
||||
ivec2 get_pred(ivec2 sp, ivec2 off, int p, int sw, uint8_t quant_table_idx)
|
||||
{
|
||||
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
|
||||
sp += off;
|
||||
|
||||
VTYPE3 top = VTYPE3(TYPE(0),
|
||||
TYPE(0),
|
||||
TYPE(0));
|
||||
if (off.y > 0 && off != ivec2(0, 1))
|
||||
top[0] = TYPE(imageLoad(dst[p], pos + ivec2(-1, -1) + yoff_border1)[0]);
|
||||
top[0] = TYPE(imageLoad(dec[p], sp + ivec2(-1, -1) + yoff_border1)[0]);
|
||||
if (off.y > 0) {
|
||||
top[1] = TYPE(imageLoad(dst[p], pos + ivec2(0, -1))[0]);
|
||||
top[2] = TYPE(imageLoad(dst[p], pos + ivec2(min(1, sw - off.x - 1), -1))[0]);
|
||||
top[1] = TYPE(imageLoad(dec[p], sp + ivec2(0, -1))[0]);
|
||||
top[2] = TYPE(imageLoad(dec[p], sp + ivec2(min(1, sw - off.x - 1), -1))[0]);
|
||||
}
|
||||
|
||||
TYPE cur = TYPE(0);
|
||||
if (off != ivec2(0, 0))
|
||||
cur = TYPE(imageLoad(dst[p], pos + ivec2(-1, 0) + yoff_border1)[0]);
|
||||
cur = TYPE(imageLoad(dec[p], sp + ivec2(-1, 0) + yoff_border1)[0]);
|
||||
|
||||
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
|
||||
@ -47,19 +93,20 @@ ivec2 get_pred(ivec2 pos, ivec2 off, int p, int sw, uint8_t quant_table_idx)
|
||||
TYPE cur2 = TYPE(0);
|
||||
if (off.x > 0 && off != ivec2(1, 0)) {
|
||||
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
|
||||
cur2 = TYPE(imageLoad(dst[p], pos + ivec2(-2, 0) + yoff_border2)[0]);
|
||||
cur2 = TYPE(imageLoad(dec[p], sp + ivec2(-2, 0) + yoff_border2)[0]);
|
||||
}
|
||||
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
TYPE top2 = TYPE(0);
|
||||
if (off.y > 1)
|
||||
top2 = TYPE(imageLoad(dst[p], pos + ivec2(0, -2))[0]);
|
||||
top2 = TYPE(imageLoad(dec[p], sp + ivec2(0, -2))[0]);
|
||||
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
|
||||
}
|
||||
|
||||
/* context, prediction */
|
||||
return ivec2(base, predict(cur, VTYPE2(top)));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef GOLOMB
|
||||
int get_isymbol(inout RangeCoder c, uint64_t state)
|
||||
@ -89,11 +136,8 @@ int get_isymbol(inout RangeCoder c, uint64_t state)
|
||||
return get_rac(c, state - 11 + min(e, 10)) ? -a : a;
|
||||
}
|
||||
|
||||
void decode_line_pcm(inout SliceContext sc, int y, int p, int bits)
|
||||
void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits)
|
||||
{
|
||||
ivec2 sp = sc.slice_pos;
|
||||
int w = sc.slice_dim.x;
|
||||
|
||||
#ifndef RGB
|
||||
if (p > 0 && p < 3) {
|
||||
w >>= chroma_shift.x;
|
||||
@ -106,16 +150,14 @@ void decode_line_pcm(inout SliceContext sc, int y, int p, int bits)
|
||||
for (int i = (bits - 1); i >= 0; i--)
|
||||
v |= uint(get_rac_equi(sc.c)) << i;
|
||||
|
||||
imageStore(dst[p], sp + ivec2(x, y), uvec4(v));
|
||||
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
|
||||
}
|
||||
}
|
||||
|
||||
void decode_line(inout SliceContext sc, uint64_t state,
|
||||
int y, int p, int bits, const int run_index)
|
||||
void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
||||
int y, int p, int bits, uint64_t state,
|
||||
const int run_index)
|
||||
{
|
||||
ivec2 sp = sc.slice_pos;
|
||||
int w = sc.slice_dim.x;
|
||||
|
||||
#ifndef RGB
|
||||
if (p > 0 && p < 3) {
|
||||
w >>= chroma_shift.x;
|
||||
@ -124,7 +166,7 @@ void decode_line(inout SliceContext sc, uint64_t state,
|
||||
#endif
|
||||
|
||||
for (int x = 0; x < w; x++) {
|
||||
ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, w,
|
||||
ivec2 pr = get_pred(sp, ivec2(x, y), p, w,
|
||||
sc.quant_table_idx[p]);
|
||||
|
||||
int diff = get_isymbol(sc.c, state + CONTEXT_SIZE*abs(pr[0]));
|
||||
@ -132,18 +174,16 @@ void decode_line(inout SliceContext sc, uint64_t state,
|
||||
diff = -diff;
|
||||
|
||||
uint v = zero_extend(pr[1] + diff, bits);
|
||||
imageStore(dst[p], sp + ivec2(x, y), uvec4(v));
|
||||
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
|
||||
}
|
||||
}
|
||||
|
||||
#else /* GOLOMB */
|
||||
|
||||
void decode_line(inout SliceContext sc, uint64_t state,
|
||||
int y, int p, int bits, inout int run_index)
|
||||
void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
||||
int y, int p, int bits, uint64_t state,
|
||||
inout int run_index)
|
||||
{
|
||||
ivec2 sp = sc.slice_pos;
|
||||
int w = sc.slice_dim.x;
|
||||
|
||||
#ifndef RGB
|
||||
if (p > 0 && p < 3) {
|
||||
w >>= chroma_shift.x;
|
||||
@ -157,7 +197,7 @@ void decode_line(inout SliceContext sc, uint64_t state,
|
||||
for (int x = 0; x < w; x++) {
|
||||
ivec2 pos = sp + ivec2(x, y);
|
||||
int diff;
|
||||
ivec2 pr = get_pred(sp + ivec2(x, y), ivec2(x, y), p, w,
|
||||
ivec2 pr = get_pred(sp, ivec2(x, y), p, w,
|
||||
sc.quant_table_idx[p]);
|
||||
|
||||
VlcState sb = VlcState(state + VLC_STATE_SIZE*abs(pr[0]));
|
||||
@ -202,7 +242,44 @@ void decode_line(inout SliceContext sc, uint64_t state,
|
||||
diff = -diff;
|
||||
|
||||
uint v = zero_extend(pr[1] + diff, bits);
|
||||
imageStore(dst[p], sp + ivec2(x, y), uvec4(v));
|
||||
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef RGB
|
||||
ivec4 transform_sample(ivec4 pix, ivec2 rct_coef)
|
||||
{
|
||||
pix.b -= rct_offset;
|
||||
pix.r -= rct_offset;
|
||||
pix.g -= (pix.b*rct_coef.y + pix.r*rct_coef.x) >> 2;
|
||||
pix.b += pix.g;
|
||||
pix.r += pix.g;
|
||||
return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
|
||||
pix[fmt_lut[2]], pix[fmt_lut[3]]);
|
||||
}
|
||||
|
||||
void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
|
||||
{
|
||||
for (int x = 0; x < w; x++) {
|
||||
ivec2 lpos = sp + LADDR(ivec2(x, y));
|
||||
ivec2 pos = sc.slice_pos + ivec2(x, y);
|
||||
|
||||
ivec4 pix;
|
||||
pix.r = int(imageLoad(dec[2], lpos)[0]);
|
||||
pix.g = int(imageLoad(dec[0], lpos)[0]);
|
||||
pix.b = int(imageLoad(dec[1], lpos)[0]);
|
||||
if (transparency != 0)
|
||||
pix.a = int(imageLoad(dec[3], lpos)[0]);
|
||||
|
||||
if (apply_rct)
|
||||
pix = transform_sample(pix, sc.slice_rct_coef);
|
||||
|
||||
imageStore(dst[0], pos, pix);
|
||||
if (planar_rgb != 0) {
|
||||
for (int i = 1; i < color_planes; i++)
|
||||
imageStore(dst[i], pos, ivec4(pix[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -210,6 +287,8 @@ void decode_line(inout SliceContext sc, uint64_t state,
|
||||
void decode_slice(inout SliceContext sc, const uint slice_idx)
|
||||
{
|
||||
int run_index = 0;
|
||||
int w = sc.slice_dim.x;
|
||||
ivec2 sp = sc.slice_pos;
|
||||
|
||||
#ifndef RGB
|
||||
int bits = bits_per_raw_sample;
|
||||
@ -217,6 +296,8 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
|
||||
int bits = 9;
|
||||
if (bits != 8 || sc.slice_coding_mode != 0)
|
||||
bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
|
||||
|
||||
sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE;
|
||||
#endif
|
||||
|
||||
/* PCM coding */
|
||||
@ -229,12 +310,14 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
|
||||
h >>= chroma_shift.y;
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
decode_line_pcm(sc, y, p, bits);
|
||||
decode_line_pcm(sc, sp, w, y, p, bits);
|
||||
}
|
||||
#else
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
for (int p = 0; p < color_planes; p++)
|
||||
decode_line_pcm(sc, y, p, bits);
|
||||
decode_line_pcm(sc, sp, w, y, p, bits);
|
||||
|
||||
writeout_rgb(sc, sp, w, y, false);
|
||||
}
|
||||
#endif
|
||||
} else
|
||||
@ -242,8 +325,9 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
|
||||
/* Arithmetic coding */
|
||||
#endif
|
||||
{
|
||||
uint64_t slice_state_off = uint64_t(slice_state) +
|
||||
slice_idx*plane_state_size*codec_planes;
|
||||
u64vec4 slice_state_off = (uint64_t(slice_state) +
|
||||
slice_idx*plane_state_size*codec_planes) +
|
||||
plane_state_size*uvec4(0, 1, 1, 2);
|
||||
|
||||
#ifndef RGB
|
||||
for (int p = 0; p < planes; p++) {
|
||||
@ -252,18 +336,16 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
|
||||
h >>= chroma_shift.y;
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
decode_line(sc, slice_state_off, y, p, bits, run_index);
|
||||
|
||||
/* For the second chroma plane, reuse the first plane's state */
|
||||
if (p != 1)
|
||||
slice_state_off += plane_state_size;
|
||||
decode_line(sc, sp, w, y, p, bits,
|
||||
slice_state_off[p], run_index);
|
||||
}
|
||||
#else
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
for (int p = 0; p < color_planes; p++)
|
||||
decode_line(sc,
|
||||
slice_state_off + plane_state_size*((p + 1) >> 1),
|
||||
y, p, bits, run_index);
|
||||
decode_line(sc, sp, w, y, p, bits,
|
||||
slice_state_off[p], run_index);
|
||||
|
||||
writeout_rgb(sc, sp, w, y, true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -1,88 +0,0 @@
|
||||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2025 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
void bypass_block(in SliceContext sc)
|
||||
{
|
||||
ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
|
||||
ivec2 end = sc.slice_pos + sc.slice_dim;
|
||||
|
||||
for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y) {
|
||||
for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x) {
|
||||
ivec2 pos = ivec2(x, y);
|
||||
ivec4 pix;
|
||||
for (int i = 0; i < color_planes; i++)
|
||||
pix[i] = int(imageLoad(src[i], pos)[0]);
|
||||
|
||||
imageStore(dst[0], pos, pix);
|
||||
if (planar_rgb != 0) {
|
||||
for (int i = 1; i < color_planes; i++)
|
||||
imageStore(dst[i], pos, ivec4(pix[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void transform_sample(ivec2 pos, ivec2 rct_coef)
|
||||
{
|
||||
ivec4 pix;
|
||||
pix.r = int(imageLoad(src[2], pos)[0]);
|
||||
pix.g = int(imageLoad(src[0], pos)[0]);
|
||||
pix.b = int(imageLoad(src[1], pos)[0]);
|
||||
if (transparency != 0)
|
||||
pix.a = int(imageLoad(src[3], pos)[0]);
|
||||
|
||||
pix.b -= offset;
|
||||
pix.r -= offset;
|
||||
pix.g -= (pix.b*rct_coef.y + pix.r*rct_coef.x) >> 2;
|
||||
pix.b += pix.g;
|
||||
pix.r += pix.g;
|
||||
|
||||
pix = ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
|
||||
pix[fmt_lut[2]], pix[fmt_lut[3]]);
|
||||
|
||||
imageStore(dst[0], pos, pix);
|
||||
if (planar_rgb != 0) {
|
||||
for (int i = 1; i < color_planes; i++)
|
||||
imageStore(dst[i], pos, ivec4(pix[i]));
|
||||
}
|
||||
}
|
||||
|
||||
void transform_block(in SliceContext sc)
|
||||
{
|
||||
const ivec2 rct_coef = sc.slice_rct_coef;
|
||||
const ivec2 start = ivec2(gl_LocalInvocationID) + sc.slice_pos;
|
||||
const ivec2 end = sc.slice_pos + sc.slice_dim;
|
||||
|
||||
for (uint y = start.y; y < end.y; y += gl_WorkGroupSize.y)
|
||||
for (uint x = start.x; x < end.x; x += gl_WorkGroupSize.x)
|
||||
transform_sample(ivec2(x, y), rct_coef);
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
|
||||
if (slice_ctx[slice_idx].slice_coding_mode == 1)
|
||||
bypass_block(slice_ctx[slice_idx]);
|
||||
else
|
||||
transform_block(slice_ctx[slice_idx]);
|
||||
}
|
@ -33,7 +33,6 @@ extern const char *ff_source_ffv1_common_comp;
|
||||
extern const char *ff_source_ffv1_dec_setup_comp;
|
||||
extern const char *ff_source_ffv1_reset_comp;
|
||||
extern const char *ff_source_ffv1_dec_comp;
|
||||
extern const char *ff_source_ffv1_dec_rct_comp;
|
||||
|
||||
const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
|
||||
.codec_id = AV_CODEC_ID_FFV1,
|
||||
@ -66,7 +65,6 @@ typedef struct FFv1VulkanDecodeContext {
|
||||
FFVulkanShader setup;
|
||||
FFVulkanShader reset[2]; /* AC/Golomb */
|
||||
FFVulkanShader decode[2][2][2]; /* 16/32 bit, AC/Golomb, Normal/RGB */
|
||||
FFVulkanShader rct[2]; /* 16/32 bit */
|
||||
|
||||
FFVkBuffer rangecoder_static_buf;
|
||||
FFVkBuffer quant_buf;
|
||||
@ -85,11 +83,13 @@ typedef struct FFv1VkParameters {
|
||||
VkDeviceAddress slice_state;
|
||||
VkDeviceAddress scratch_data;
|
||||
|
||||
int fmt_lut[4];
|
||||
uint32_t img_size[2];
|
||||
uint32_t chroma_shift[2];
|
||||
|
||||
uint32_t plane_state_size;
|
||||
uint32_t crcref;
|
||||
int rct_offset;
|
||||
|
||||
uint8_t bits_per_raw_sample;
|
||||
uint8_t quant_table_count;
|
||||
@ -100,6 +100,7 @@ typedef struct FFv1VkParameters {
|
||||
uint8_t codec_planes;
|
||||
uint8_t color_planes;
|
||||
uint8_t transparency;
|
||||
uint8_t planar_rgb;
|
||||
uint8_t colorspace;
|
||||
uint8_t ec;
|
||||
uint8_t golomb;
|
||||
@ -116,11 +117,13 @@ static void add_push_data(FFVulkanShader *shd)
|
||||
GLSLC(1, u8buf slice_state; );
|
||||
GLSLC(1, u8buf scratch_data; );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, ivec4 fmt_lut; );
|
||||
GLSLC(1, uvec2 img_size; );
|
||||
GLSLC(1, uvec2 chroma_shift; );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, uint plane_state_size; );
|
||||
GLSLC(1, uint32_t crcref; );
|
||||
GLSLC(1, int rct_offset; );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, uint8_t bits_per_raw_sample; );
|
||||
GLSLC(1, uint8_t quant_table_count; );
|
||||
@ -131,6 +134,7 @@ static void add_push_data(FFVulkanShader *shd)
|
||||
GLSLC(1, uint8_t codec_planes; );
|
||||
GLSLC(1, uint8_t color_planes; );
|
||||
GLSLC(1, uint8_t transparency; );
|
||||
GLSLC(1, uint8_t planar_rgb; );
|
||||
GLSLC(1, uint8_t colorspace; );
|
||||
GLSLC(1, uint8_t ec; );
|
||||
GLSLC(1, uint8_t golomb; );
|
||||
@ -349,11 +353,17 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
return err;
|
||||
|
||||
if (is_rgb) {
|
||||
RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
|
||||
RET(ff_vk_create_imageviews(&ctx->s, exec, rct_image_views,
|
||||
vp->dpb_frame, FF_VK_REP_NATIVE));
|
||||
RET(ff_vk_exec_add_dep_frame(&ctx->s, exec, vp->dpb_frame,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_CLEAR_BIT));
|
||||
ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_ACCESS_2_TRANSFER_WRITE_BIT,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_QUEUE_FAMILY_IGNORED);
|
||||
}
|
||||
|
||||
if (!(f->picture.f->flags & AV_FRAME_FLAG_KEY)) {
|
||||
@ -391,6 +401,8 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
|
||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pImageMemoryBarriers = img_bar,
|
||||
.imageMemoryBarrierCount = nb_img_bar,
|
||||
.pBufferMemoryBarriers = buf_bar,
|
||||
.bufferMemoryBarrierCount = nb_buf_bar,
|
||||
});
|
||||
@ -431,6 +443,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
|
||||
.plane_state_size = fp->plane_state_size,
|
||||
.crcref = f->crcref,
|
||||
.rct_offset = 1 << bits,
|
||||
|
||||
.bits_per_raw_sample = bits,
|
||||
.quant_table_count = f->quant_table_count,
|
||||
@ -441,11 +454,23 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
.codec_planes = f->plane_count,
|
||||
.color_planes = color_planes,
|
||||
.transparency = f->transparency,
|
||||
.planar_rgb = ff_vk_mt_is_np_rgb(sw_format) &&
|
||||
(ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1),
|
||||
.colorspace = f->colorspace,
|
||||
.ec = f->ec,
|
||||
.golomb = f->ac == AC_GOLOMB_RICE,
|
||||
.check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
|
||||
};
|
||||
|
||||
/* For some reason the C FFv1 encoder/decoder treats these differently */
|
||||
if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 ||
|
||||
sw_format == AV_PIX_FMT_GBRP14)
|
||||
memcpy(pd.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int));
|
||||
else if (sw_format == AV_PIX_FMT_X2BGR10)
|
||||
memcpy(pd.fmt_lut, (int [4]) { 0, 2, 1, 3 }, 4*sizeof(int));
|
||||
else
|
||||
ff_vk_set_perm(sw_format, pd.fmt_lut, 0);
|
||||
|
||||
for (int i = 0; i < MAX_QUANT_TABLES; i++)
|
||||
pd.context_count[i] = f->context_count[i];
|
||||
|
||||
@ -455,6 +480,18 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
|
||||
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
|
||||
|
||||
if (is_rgb) {
|
||||
AVVkFrame *vkf = (AVVkFrame *)vp->dpb_frame->data[0];
|
||||
for (int i = 0; i < color_planes; i++)
|
||||
vk->CmdClearColorImage(exec->buf, vkf->img[i], VK_IMAGE_LAYOUT_GENERAL,
|
||||
&((VkClearColorValue) { 0 }),
|
||||
1, &((VkImageSubresourceRange) {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1,
|
||||
}));
|
||||
}
|
||||
|
||||
/* Reset shader */
|
||||
reset_shader = &fv->reset[f->ac == AC_GOLOMB_RICE];
|
||||
ff_vk_shader_update_desc_buffer(&ctx->s, exec, reset_shader,
|
||||
@ -493,12 +530,15 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
};
|
||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pImageMemoryBarriers = img_bar,
|
||||
.imageMemoryBarrierCount = nb_img_bar,
|
||||
.pBufferMemoryBarriers = buf_bar,
|
||||
.bufferMemoryBarrierCount = nb_buf_bar,
|
||||
});
|
||||
slice_state->stage = buf_bar[0].dstStageMask;
|
||||
slice_state->access = buf_bar[0].dstAccessMask;
|
||||
nb_buf_bar = 0;
|
||||
nb_img_bar = 0;
|
||||
|
||||
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices,
|
||||
f->plane_count);
|
||||
@ -515,6 +555,12 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
1, 1,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
if (is_rgb)
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
|
||||
f->picture.f, vp->view.out,
|
||||
1, 2,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
|
||||
@ -537,12 +583,20 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
};
|
||||
|
||||
/* Input frame barrier */
|
||||
ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
|
||||
ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT |
|
||||
(!is_rgb ? VK_ACCESS_SHADER_READ_BIT : 0),
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_QUEUE_FAMILY_IGNORED);
|
||||
if (is_rgb)
|
||||
ff_vk_frame_barrier(&ctx->s, exec, vp->dpb_frame, img_bar, &nb_img_bar,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_QUEUE_FAMILY_IGNORED);
|
||||
|
||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
@ -558,74 +612,6 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
||||
|
||||
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
|
||||
|
||||
/* RCT */
|
||||
if (is_rgb) {
|
||||
FFVulkanShader *rct_shader = &fv->rct[f->use32bit];
|
||||
FFv1VkRCTParameters pd_rct;
|
||||
|
||||
ff_vk_shader_update_desc_buffer(&ctx->s, exec, rct_shader,
|
||||
1, 0, 0,
|
||||
slice_state,
|
||||
0, fp->slice_data_size*f->slice_count,
|
||||
VK_FORMAT_UNDEFINED);
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
|
||||
decode_dst, decode_dst_view,
|
||||
1, 1,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, rct_shader,
|
||||
f->picture.f, vp->view.out,
|
||||
1, 2,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, rct_shader);
|
||||
|
||||
pd_rct = (FFv1VkRCTParameters) {
|
||||
.offset = 1 << bits,
|
||||
.bits = bits,
|
||||
.planar_rgb = ff_vk_mt_is_np_rgb(sw_format) &&
|
||||
(ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1),
|
||||
.color_planes = color_planes,
|
||||
.transparency = f->transparency,
|
||||
};
|
||||
|
||||
/* For some reason the C FFv1 encoder/decoder treats these differently */
|
||||
if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 ||
|
||||
sw_format == AV_PIX_FMT_GBRP14)
|
||||
memcpy(pd_rct.fmt_lut, (int [4]) { 2, 1, 0, 3 }, 4*sizeof(int));
|
||||
else if (sw_format == AV_PIX_FMT_X2BGR10)
|
||||
memcpy(pd_rct.fmt_lut, (int [4]) { 0, 2, 1, 3 }, 4*sizeof(int));
|
||||
else
|
||||
ff_vk_set_perm(sw_format, pd_rct.fmt_lut, 0);
|
||||
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, rct_shader,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd_rct), &pd_rct);
|
||||
|
||||
ff_vk_frame_barrier(&ctx->s, exec, decode_dst, img_bar, &nb_img_bar,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_QUEUE_FAMILY_IGNORED);
|
||||
ff_vk_frame_barrier(&ctx->s, exec, f->picture.f, img_bar, &nb_img_bar,
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_WRITE_BIT,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_QUEUE_FAMILY_IGNORED);
|
||||
|
||||
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
|
||||
.pImageMemoryBarriers = img_bar,
|
||||
.imageMemoryBarrierCount = nb_img_bar,
|
||||
});
|
||||
nb_img_bar = 0;
|
||||
|
||||
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
|
||||
}
|
||||
|
||||
err = ff_vk_exec_submit(&ctx->s, exec);
|
||||
if (err < 0)
|
||||
return err;
|
||||
@ -845,7 +831,9 @@ fail:
|
||||
|
||||
static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
|
||||
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
||||
FFVulkanShader *shd, AVHWFramesContext *frames_ctx,
|
||||
FFVulkanShader *shd,
|
||||
AVHWFramesContext *dec_frames_ctx,
|
||||
AVHWFramesContext *out_frames_ctx,
|
||||
int use32bit, int ac, int rgb)
|
||||
{
|
||||
int err;
|
||||
@ -910,127 +898,28 @@ static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
|
||||
.buf_elems = f->max_slice_count,
|
||||
},
|
||||
{
|
||||
.name = "dst",
|
||||
.name = "dec",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.dimensions = 2,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(frames_ctx->sw_format,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format,
|
||||
FF_VK_REP_NATIVE),
|
||||
.elems = av_pix_fmt_count_planes(frames_ctx->sw_format),
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 0, 0));
|
||||
|
||||
GLSLD(ff_source_ffv1_dec_comp);
|
||||
|
||||
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
||||
&spv_opaque));
|
||||
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
|
||||
|
||||
RET(ff_vk_shader_register_exec(s, pool, shd));
|
||||
|
||||
fail:
|
||||
if (spv_opaque)
|
||||
spv->free_shader(spv, &spv_opaque);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int init_rct_shader(FFV1Context *f, FFVulkanContext *s,
|
||||
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
||||
FFVulkanShader *shd, int use32bit,
|
||||
AVHWFramesContext *src_ctx, AVHWFramesContext *dst_ctx)
|
||||
{
|
||||
int err;
|
||||
FFVulkanDescriptorSetBinding *desc_set;
|
||||
|
||||
uint8_t *spv_data;
|
||||
size_t spv_len;
|
||||
void *spv_opaque = NULL;
|
||||
int wg_count = sqrt(s->props.properties.limits.maxComputeWorkGroupInvocations);
|
||||
|
||||
RET(ff_vk_shader_init(s, shd, "ffv1_rct",
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
(const char *[]) { "GL_EXT_buffer_reference",
|
||||
"GL_EXT_buffer_reference2" }, 2,
|
||||
wg_count, wg_count, 1,
|
||||
0));
|
||||
|
||||
/* Common codec header */
|
||||
GLSLD(ff_source_common_comp);
|
||||
|
||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
||||
GLSLC(1, ivec4 fmt_lut; );
|
||||
GLSLC(1, int offset; );
|
||||
GLSLC(1, uint8_t bits; );
|
||||
GLSLC(1, uint8_t planar_rgb; );
|
||||
GLSLC(1, uint8_t color_planes; );
|
||||
GLSLC(1, uint8_t transparency; );
|
||||
GLSLC(1, uint8_t version; );
|
||||
GLSLC(1, uint8_t micro_version; );
|
||||
GLSLC(1, uint8_t padding[2]; );
|
||||
GLSLC(0, }; );
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkRCTParameters),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
|
||||
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
|
||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "rangecoder_static_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = "uint8_t zero_one_state[512];",
|
||||
},
|
||||
{
|
||||
.name = "quant_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
|
||||
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
|
||||
},
|
||||
};
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
|
||||
|
||||
define_shared_code(shd, use32bit);
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "slice_data_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.mem_quali = "readonly",
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.buf_content = "SliceContext slice_ctx",
|
||||
.buf_elems = f->max_slice_count,
|
||||
},
|
||||
{
|
||||
.name = "src",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.dimensions = 2,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(src_ctx->sw_format,
|
||||
FF_VK_REP_NATIVE),
|
||||
.mem_quali = "readonly",
|
||||
.elems = av_pix_fmt_count_planes(src_ctx->sw_format),
|
||||
.elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format),
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{
|
||||
.name = "dst",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.dimensions = 2,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(dst_ctx->sw_format,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
|
||||
FF_VK_REP_NATIVE),
|
||||
.mem_quali = "writeonly",
|
||||
.elems = av_pix_fmt_count_planes(dst_ctx->sw_format),
|
||||
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2 + rgb, 0, 0));
|
||||
|
||||
GLSLD(ff_source_ffv1_dec_rct_comp);
|
||||
GLSLD(ff_source_ffv1_dec_comp);
|
||||
|
||||
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
||||
&spv_opaque));
|
||||
@ -1051,6 +940,7 @@ static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
|
||||
int err;
|
||||
AVHWFramesContext *frames_ctx;
|
||||
AVVulkanFramesContext *vk_frames;
|
||||
FFV1Context *f = avctx->priv_data;
|
||||
|
||||
*dst = av_hwframe_ctx_alloc(s->device_ref);
|
||||
if (!(*dst))
|
||||
@ -1059,13 +949,14 @@ static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
|
||||
frames_ctx = (AVHWFramesContext *)((*dst)->data);
|
||||
frames_ctx->format = AV_PIX_FMT_VULKAN;
|
||||
frames_ctx->sw_format = sw_format;
|
||||
frames_ctx->width = FFALIGN(s->frames->width, 32);
|
||||
frames_ctx->height = FFALIGN(s->frames->height, 32);
|
||||
frames_ctx->width = s->frames->width;
|
||||
frames_ctx->height = f->num_v_slices*2;
|
||||
|
||||
vk_frames = frames_ctx->hwctx;
|
||||
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
vk_frames->img_flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
|
||||
vk_frames->usage = VK_IMAGE_USAGE_STORAGE_BIT |
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT;
|
||||
|
||||
err = av_hwframe_ctx_init(*dst);
|
||||
if (err < 0) {
|
||||
@ -1095,9 +986,6 @@ static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx)
|
||||
for (int k = 0; k < 2; k++) /* Normal/RGB */
|
||||
ff_vk_shader_free(&ctx->s, &fv->decode[i][j][k]);
|
||||
|
||||
for (int i = 0; i < 2; i++) /* 16/32 bit */
|
||||
ff_vk_shader_free(&ctx->s, &fv->rct[i]);
|
||||
|
||||
ff_vk_free_buf(&ctx->s, &fv->quant_buf);
|
||||
ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
|
||||
ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
|
||||
@ -1165,12 +1053,13 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
|
||||
for (int i = 0; i < 2; i++) { /* 16/32 bit */
|
||||
for (int j = 0; j < 2; j++) { /* AC/Golomb */
|
||||
for (int k = 0; k < 2; k++) { /* Normal/RGB */
|
||||
AVHWFramesContext *frames_ctx;
|
||||
frames_ctx = k ? (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data :
|
||||
(AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
||||
AVHWFramesContext *dec_frames_ctx;
|
||||
dec_frames_ctx = k ? (AVHWFramesContext *)fv->intermediate_frames_ref[i]->data :
|
||||
(AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
||||
err = init_decode_shader(f, &ctx->s, &ctx->exec_pool,
|
||||
spv, &fv->decode[i][j][k],
|
||||
frames_ctx,
|
||||
dec_frames_ctx,
|
||||
(AVHWFramesContext *)avctx->hw_frames_ctx->data,
|
||||
i,
|
||||
!j ? AC_RANGE_CUSTOM_TAB : AC_GOLOMB_RICE,
|
||||
k);
|
||||
@ -1180,16 +1069,6 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
|
||||
}
|
||||
}
|
||||
|
||||
/* RCT shaders */
|
||||
for (int i = 0; i < 2; i++) { /* 16/32 bit */
|
||||
err = init_rct_shader(f, &ctx->s, &ctx->exec_pool,
|
||||
spv, &fv->rct[i], i,
|
||||
(AVHWFramesContext *)fv->intermediate_frames_ref[i]->data,
|
||||
(AVHWFramesContext *)avctx->hw_frames_ctx->data);
|
||||
if (err < 0)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Range coder data */
|
||||
err = ff_ffv1_vk_init_state_transition_data(&ctx->s,
|
||||
&fv->rangecoder_static_buf,
|
||||
|
@ -147,6 +147,7 @@ typedef uint64_t FFVulkanExtensions;
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier) \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBufferToImage) \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyImageToBuffer) \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdClearColorImage) \
|
||||
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBuffer) \
|
||||
\
|
||||
/* Buffer */ \
|
||||
|
Reference in New Issue
Block a user