Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions src/cdef_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let uv_stride: ptrdiff_t = BD::pxstride(f.cur.stride[1]);

let cdef_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.cdef_line_buf);
let lr_line_buf = BD::cast_pixel_slice(&f.lf.lr_line_buf);

let mut bit = false;
for by in (by_start..by_end).step_by(2) {
Expand Down Expand Up @@ -328,7 +329,7 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
} else {
offset = (sby * ((4 as c_int) << sb128) - 4) as isize * y_stride
+ (bx * 4) as isize;
top = f.lf.lr_lpf_line[0].cast::<BD::Pixel>().offset(offset);
top = lr_line_buf.as_ptr().add(f.lf.lr_lpf_line[0]).offset(offset);
}
bot = bptrs[0].offset(8 * y_stride as isize);
st_y = false;
Expand All @@ -347,7 +348,7 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
} else {
let line = sby * ((4 as c_int) << sb128) + 4 * sb128 + 2;
offset = line as isize * y_stride + (bx * 4) as isize;
bot = f.lf.lr_lpf_line[0].cast::<BD::Pixel>().offset(offset);
bot = lr_line_buf.as_ptr().add(f.lf.lr_lpf_line[0]).offset(offset);
}
st_y = false;
} else {
Expand Down Expand Up @@ -421,8 +422,10 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
let line_0 = sby * ((4 as c_int) << sb128) - 4;
offset = line_0 as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize;
top =
f.lf.lr_lpf_line[pl].cast::<BD::Pixel>().offset(offset);
top = lr_line_buf
.as_ptr()
.add(f.lf.lr_lpf_line[pl])
.offset(offset);
}
bot = bptrs[pl].offset(((8 >> ss_ver) * uv_stride) as isize);
st_uv = false;
Expand All @@ -441,11 +444,13 @@ pub(crate) unsafe fn rav1d_cdef_brow<BD: BitDepth>(
.add(f.lf.cdef_lpf_line[pl])
.offset(offset);
} else {
let line_1 = sby * ((4 as c_int) << sb128) + 4 * sb128 + 2;
offset = line_1 as isize * uv_stride
+ (bx * 4 >> ss_hor) as isize;
bot =
f.lf.lr_lpf_line[pl].cast::<BD::Pixel>().offset(offset);
let line = sby * ((4 as c_int) << sb128) + 4 * sb128 + 2;
offset =
line as isize * uv_stride + (bx * 4 >> ss_hor) as isize;
bot = lr_line_buf
.as_ptr()
.add(f.lf.lr_lpf_line[pl])
.offset(offset);
}
st_uv = false;
} else {
Expand Down
54 changes: 24 additions & 30 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4446,38 +4446,32 @@ pub(crate) unsafe fn rav1d_decode_frame_init(
};
y_stride = f.sr_cur.p.stride[0];
uv_stride = f.sr_cur.p.stride[1];
if y_stride * num_lines as isize != f.lf.lr_buf_plane_sz[0] as isize
|| uv_stride * num_lines as isize * 2 != f.lf.lr_buf_plane_sz[1] as isize
{
// lr simd may overread the input, so slightly over-allocate the lpf buffer
let mut alloc_sz: usize = 128;
alloc_sz += y_stride.unsigned_abs() * num_lines as usize;
alloc_sz += uv_stride.unsigned_abs() * num_lines as usize * 2;
// TODO: Fallible allocation
// On allocation failure set `f.lf.lr_buf_plane_sz` to 0.
f.lf.lr_line_buf.resize(alloc_sz, 0);
let mut ptr = f.lf.lr_line_buf.as_mut_ptr();

ptr = ptr.offset(64);
if y_stride < 0 {
f.lf.lr_lpf_line[0] =
ptr.offset(-(y_stride * (num_lines as isize - 1))) as *mut DynPixel;
} else {
f.lf.lr_lpf_line[0] = ptr as *mut DynPixel;
}
ptr = ptr.offset(y_stride.abs() * num_lines as isize);
if uv_stride < 0 {
f.lf.lr_lpf_line[1] =
ptr.offset(-(uv_stride * (num_lines as isize * 1 - 1))) as *mut DynPixel;
f.lf.lr_lpf_line[2] =
ptr.offset(-(uv_stride * (num_lines as isize * 2 - 1))) as *mut DynPixel;
} else {
f.lf.lr_lpf_line[1] = ptr as *mut DynPixel;
f.lf.lr_lpf_line[2] = ptr.offset(uv_stride * num_lines as isize) as *mut DynPixel;
}
// lr simd may overread the input, so slightly over-allocate the lpf buffer
let mut alloc_sz: usize = 128;
alloc_sz += y_stride.unsigned_abs() * num_lines as usize;
alloc_sz += uv_stride.unsigned_abs() * num_lines as usize * 2;
// TODO: Fallible allocation
f.lf.lr_line_buf.resize(alloc_sz, 0);

let y_stride_px = bpc.pxstride(y_stride);
let uv_stride_px = bpc.pxstride(uv_stride);

f.lf.lr_buf_plane_sz[0] = y_stride as c_int * num_lines;
f.lf.lr_buf_plane_sz[1] = uv_stride as c_int * num_lines * 2;
let mut offset = bpc.pxstride(64usize);
if y_stride < 0 {
f.lf.lr_lpf_line[0] = offset.wrapping_add_signed(-(y_stride_px * (num_lines as isize - 1)));
} else {
f.lf.lr_lpf_line[0] = offset;
}
offset = offset.wrapping_add_signed(y_stride_px.abs() * num_lines as isize);
if uv_stride < 0 {
f.lf.lr_lpf_line[1] =
offset.wrapping_add_signed(-(uv_stride_px * (num_lines as isize * 1 - 1)));
f.lf.lr_lpf_line[2] =
offset.wrapping_add_signed(-(uv_stride_px * (num_lines as isize * 2 - 1)));
} else {
f.lf.lr_lpf_line[1] = offset;
f.lf.lr_lpf_line[2] = offset.wrapping_add_signed(uv_stride_px * num_lines as isize);
}

// update allocation for loopfilter masks
Expand Down
7 changes: 3 additions & 4 deletions src/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -455,16 +455,15 @@ pub struct Rav1dFrameContext_lf {
pub level: Vec<[u8; 4]>,
pub mask: Vec<Av1Filter>, /* len = w*h */
pub lr_mask: Vec<Av1Restoration>,
pub lr_buf_plane_sz: [c_int; 2], /* (stride*sbh*4) << sb128 if n_tc > 1, else stride*4 */
pub lim_lut: Align16<Av1FilterLUT>,
pub last_sharpness: c_int,
pub lvl: [[[[u8; 2]; 8]; 4]; 8], /* [8 seg_id][4 dir][8 ref][2 is_gmv] */
pub tx_lpf_right_edge: TxLpfRightEdge,
pub cdef_line_buf: AlignedVec32<u8>, /* AlignedVec32<DynPixel> */
pub lr_line_buf: AlignedVec64<u8>,
pub cdef_line: [[usize; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [usize; 3], /* plane */
pub lr_lpf_line: [*mut DynPixel; 3], /* plane */
pub cdef_line: [[usize; 3]; 2], /* [2 pre/post][3 plane] */
pub cdef_lpf_line: [usize; 3], /* plane */
pub lr_lpf_line: [usize; 3], /* plane */

// in-loop filter per-frame state keeping
pub start_of_tile_row: *mut u8,
Expand Down
35 changes: 11 additions & 24 deletions src/lf_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,35 +155,22 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
let seq_hdr = &***f.seq_hdr.as_ref().unwrap();
let tt_off = have_tt * sby * ((4 as c_int) << seq_hdr.sb128);

let lr_plane_sz = &f.lf.lr_buf_plane_sz;
let y_stride = BD::pxstride(lr_stride[0]);
let uv_stride = BD::pxstride(lr_stride[1]);
let y_span = lr_plane_sz[0] as isize - y_stride;
let uv_span = lr_plane_sz[1] as isize / 2 - uv_stride;

let dst: [&mut [BD::Pixel]; 3] = [
slice::from_raw_parts_mut(
(f.lf.lr_lpf_line[0] as *mut BD::Pixel).offset(cmp::min(y_span, 0)),
lr_plane_sz[0] as usize,
),
slice::from_raw_parts_mut(
(f.lf.lr_lpf_line[1] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)),
lr_plane_sz[1] as usize / 2,
),
slice::from_raw_parts_mut(
(f.lf.lr_lpf_line[2] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)),
lr_plane_sz[1] as usize / 2,
),
];
let dst_offset: [usize; 2] = [
(tt_off as isize * y_stride - cmp::min(y_span, 0)) as usize,
(tt_off as isize * uv_stride - cmp::min(uv_span, 0)) as usize,
let y_offset = (tt_off as isize * y_stride) as usize;
let uv_offset = (tt_off as isize * uv_stride) as usize;
let dst_offset = [
f.lf.lr_lpf_line[0] + y_offset,
f.lf.lr_lpf_line[1] + uv_offset,
f.lf.lr_lpf_line[2] + uv_offset,
];

// TODO Also check block level restore type to reduce copying.
let restore_planes = f.lf.restore_planes;

let cdef_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.cdef_line_buf);
let lr_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.lr_line_buf);

if seq_hdr.cdef != 0 || restore_planes & LR_RESTORE_Y as c_int != 0 {
let h = f.cur.p.h;
Expand All @@ -193,7 +180,7 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_Y as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
dst[0],
lr_line_buf,
dst_offset[0],
lr_stride[0],
src[0],
Expand Down Expand Up @@ -265,7 +252,7 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_U as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
dst[1],
lr_line_buf,
dst_offset[1],
lr_stride[1],
src[1],
Expand Down Expand Up @@ -325,8 +312,8 @@ pub(crate) unsafe fn rav1d_copy_lpf<BD: BitDepth>(
if restore_planes & LR_RESTORE_V as c_int != 0 || resize == 0 {
backup_lpf::<BD>(
c,
dst[2],
dst_offset[1],
lr_line_buf,
dst_offset[2],
lr_stride[1],
src[2],
(src_offset[1] as isize - offset_uv as isize * BD::pxstride(src_stride[1]))
Expand Down
15 changes: 7 additions & 8 deletions src/lr_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ use libc::ptrdiff_t;
use std::cmp;
use std::ffi::c_int;
use std::ffi::c_uint;
use std::slice;

pub type LrRestorePlanes = c_uint;
pub const LR_RESTORE_V: LrRestorePlanes = 4;
Expand Down Expand Up @@ -47,12 +46,8 @@ unsafe fn lr_stripe<BD: BitDepth>(
let sby = y + (if y != 0 { 8 << ss_ver } else { 0 }) >> 6 - ss_ver + seq_hdr.sb128;
let have_tt = (c.tc.len() > 1) as c_int;
let lpf_stride = BD::pxstride(stride);
let lpf_plane_sz = BD::pxstride(f.lf.lr_buf_plane_sz[(plane != 0) as usize] as isize);
let mut lpf_offset = cmp::max(lpf_stride - lpf_plane_sz, 0);
Comment on lines -50 to -51
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fbossen I ended up removing lpf_plane_sz and lpf_offset in order to make the calculation here the same as the original C. You had added those in #746 to handle negative strides I think. Do we still need those with the fixed pxstride? Was there a reason why we needed those values in Rust but not in the original C?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason I added lpf_plane_sz and lpf_offset is that I defined lpf as a slice that begins at the memory section being effectively used and needed the compute the memory location of the beginning of the slice (and cover the case of a negative stride). If, instead of defining a new slice lpf, you use lr_line_buf, then removing lpf_plane_sz and lpf_offset looks appropriate.

let lpf = &slice::from_raw_parts(
(f.lf.lr_lpf_line[plane as usize] as *const BD::Pixel).offset(-lpf_offset),
lpf_plane_sz.unsigned_abs(),
);
let lr_line_buf = BD::cast_pixel_slice(&f.lf.lr_line_buf);
let mut lpf_offset = f.lf.lr_lpf_line[plane as usize] as isize;
lpf_offset += (have_tt * (sby * (4 << seq_hdr.sb128) - 4)) as isize * lpf_stride + x as isize;
// The first stripe of the frame is shorter by 8 luma pixel rows.
let mut stripe_h = cmp::min(64 - 8 * (y == 0) as c_int >> ss_ver, row_h - y);
Expand Down Expand Up @@ -102,7 +97,11 @@ unsafe fn lr_stripe<BD: BitDepth>(
p.as_mut_ptr().add(p_offset).cast(),
stride,
left.as_ptr().cast(),
lpf.as_ptr().offset(lpf_offset).cast(),
// NOTE: The calculated pointer may point to before the beginning of
// `lr_line_buf`, so we must use `.wrapping_offset` here.
// `.wrapping_offset` is needed since `.offset` requires the pointer is in bounds,
// which `.wrapping_offset` does not, and delays that requirement to when the pointer is dereferenced
lr_line_buf.as_ptr().wrapping_offset(lpf_offset).cast(),
unit_w,
stripe_h,
&mut params,
Expand Down