From 64c507e7e4f79fd526192485e16cb88547172461 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 03:26:52 -0500 Subject: [PATCH 01/26] `fn rav1d_copy_lpf`: use slice instead of pointer for `src_stride` --- src/lf_apply.rs | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index d73475d32..90c5bfeac 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -181,7 +181,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); let resize = (frame_hdr.size.width[0] != frame_hdr.size.width[1]) as c_int; let offset = 8 * (sby != 0) as c_int; - let src_stride: *const ptrdiff_t = (f.cur.stride).as_mut_ptr(); + let src_stride = &f.cur.stride; let lr_stride: *const ptrdiff_t = (f.sr_cur.p.stride).as_mut_ptr(); let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let tt_off = have_tt * sby * ((4 as c_int) << seq_hdr.sb128); @@ -207,10 +207,9 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[0], *lr_stride.offset(0), - (*src.offset(0)).offset( - -(offset as isize * BD::pxstride(*src_stride.offset(0) as usize) as isize), - ), - *src_stride.offset(0), + (*src.offset(0)) + .offset(-(offset as isize * BD::pxstride(src_stride[0] as usize) as isize)), + src_stride[0], 0, seq_hdr.sb128, y_stripe, @@ -228,18 +227,17 @@ pub(crate) unsafe fn rav1d_copy_lpf( } if have_tt != 0 && resize != 0 { let cdef_off_y: ptrdiff_t = - (sby * 4) as isize * BD::pxstride(*src_stride.offset(0) as usize) as isize; + (sby * 4) as isize * BD::pxstride(src_stride[0] as usize) as isize; backup_lpf::( c, cdef_line_buf .as_mut_ptr() .add(f.lf.cdef_lpf_line[0]) .offset(cdef_off_y), - *src_stride.offset(0), - (*src.offset(0)).offset( - -offset as isize * BD::pxstride(*src_stride.offset(0) as usize) as isize, - ), - *src_stride.offset(0), + src_stride[0], + (*src.offset(0)) + .offset(-offset as isize * BD::pxstride(src_stride[0] as usize) as isize), + src_stride[0], 0, seq_hdr.sb128, y_stripe, @@ -267,7 +265,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( let offset_uv = offset >> ss_ver; let y_stripe_0 = (sby << 6 - ss_ver + seq_hdr.sb128) - offset_uv; let cdef_off_uv: ptrdiff_t = - sby as isize * 4 * BD::pxstride(*src_stride.offset(1) as usize) as isize; + sby as isize * 4 * BD::pxstride(src_stride[1] as usize) as isize; if seq_hdr.cdef != 0 || restore_planes & LR_RESTORE_U as c_int != 0 { if restore_planes & LR_RESTORE_U as c_int != 0 || resize == 0 { backup_lpf::( @@ -275,9 +273,9 @@ pub(crate) unsafe fn rav1d_copy_lpf( dst[1], *lr_stride.offset(1), (*src.offset(1)).offset( - -offset_uv as isize * BD::pxstride(*src_stride.offset(1) as usize) as isize, + -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), - *src_stride.offset(1), + src_stride[1], ss_ver, seq_hdr.sb128, y_stripe_0, @@ -300,11 +298,11 @@ pub(crate) unsafe fn rav1d_copy_lpf( .as_mut_ptr() .add(f.lf.cdef_lpf_line[1]) .offset(cdef_off_uv), - *src_stride.offset(1), + src_stride[1], (*src.offset(1)).offset( - -offset_uv as isize * BD::pxstride(*src_stride.offset(1) as usize) as isize, + -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), - *src_stride.offset(1), + src_stride[1], ss_ver, seq_hdr.sb128, y_stripe_0, @@ -328,9 +326,9 @@ pub(crate) unsafe fn rav1d_copy_lpf( dst[2], *lr_stride.offset(1), (*src.offset(2)).offset( - -offset_uv as isize * BD::pxstride(*src_stride.offset(1) as usize) as isize, + -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), - *src_stride.offset(1), + src_stride[1], ss_ver, seq_hdr.sb128, y_stripe_0, @@ -353,11 +351,11 @@ pub(crate) unsafe fn rav1d_copy_lpf( .as_mut_ptr() .add(f.lf.cdef_lpf_line[2]) .offset(cdef_off_uv), - *src_stride.offset(1), + src_stride[1], (*src.offset(2)).offset( - -offset_uv as isize * BD::pxstride(*src_stride.offset(1) as usize) as isize, + -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), - *src_stride.offset(1), + src_stride[1], ss_ver, seq_hdr.sb128, y_stripe_0, From 5c37e6c4a8c6dcc466f0810b4d2714fe2bb4359f Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 03:30:07 -0500 Subject: [PATCH 02/26] `fn rav1d_copy_lpf`: use slice instead of pointer for `lr_stride` --- src/lf_apply.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 90c5bfeac..9cd8fc705 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -182,16 +182,16 @@ pub(crate) unsafe fn rav1d_copy_lpf( let resize = (frame_hdr.size.width[0] != frame_hdr.size.width[1]) as c_int; let offset = 8 * (sby != 0) as c_int; let src_stride = &f.cur.stride; - let lr_stride: *const ptrdiff_t = (f.sr_cur.p.stride).as_mut_ptr(); + let lr_stride = &f.sr_cur.p.stride; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let tt_off = have_tt * sby * ((4 as c_int) << seq_hdr.sb128); let dst: [*mut BD::Pixel; 3] = [ (f.lf.lr_lpf_line[0] as *mut BD::Pixel) - .offset(tt_off as isize * BD::pxstride(*lr_stride.offset(0) as usize) as isize), + .offset(tt_off as isize * BD::pxstride(lr_stride[0] as usize) as isize), (f.lf.lr_lpf_line[1] as *mut BD::Pixel) - .offset(tt_off as isize * BD::pxstride(*lr_stride.offset(1) as usize) as isize), + .offset(tt_off as isize * BD::pxstride(lr_stride[1] as usize) as isize), (f.lf.lr_lpf_line[2] as *mut BD::Pixel) - .offset(tt_off as isize * BD::pxstride(*lr_stride.offset(1) as usize) as isize), + .offset(tt_off as isize * BD::pxstride(lr_stride[1] as usize) as isize), ]; let restore_planes = f.lf.restore_planes; @@ -206,7 +206,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( backup_lpf::( c, dst[0], - *lr_stride.offset(0), + lr_stride[0], (*src.offset(0)) .offset(-(offset as isize * BD::pxstride(src_stride[0] as usize) as isize)), src_stride[0], @@ -271,7 +271,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( backup_lpf::( c, dst[1], - *lr_stride.offset(1), + lr_stride[1], (*src.offset(1)).offset( -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), @@ -324,7 +324,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( backup_lpf::( c, dst[2], - *lr_stride.offset(1), + lr_stride[1], (*src.offset(2)).offset( -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), From d76972592a0075b748833a61bf61645a4e50ecf9 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 03:35:52 -0500 Subject: [PATCH 03/26] `fn rav1d_copy_lpf`: make `src` parameter a slice reference --- src/lf_apply.rs | 16 +++++++--------- src/recon.rs | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 9cd8fc705..9ebd954c8 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -174,7 +174,7 @@ unsafe fn backup_lpf( pub(crate) unsafe fn rav1d_copy_lpf( c: &Rav1dContext, f: &mut Rav1dFrameData, - src: *const *mut BD::Pixel, + src: &[*mut BD::Pixel; 3], sby: c_int, ) { let have_tt = (c.tc.len() > 1) as c_int; @@ -207,8 +207,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[0], lr_stride[0], - (*src.offset(0)) - .offset(-(offset as isize * BD::pxstride(src_stride[0] as usize) as isize)), + src[0].offset(-(offset as isize * BD::pxstride(src_stride[0] as usize) as isize)), src_stride[0], 0, seq_hdr.sb128, @@ -235,8 +234,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( .add(f.lf.cdef_lpf_line[0]) .offset(cdef_off_y), src_stride[0], - (*src.offset(0)) - .offset(-offset as isize * BD::pxstride(src_stride[0] as usize) as isize), + src[0].offset(-offset as isize * BD::pxstride(src_stride[0] as usize) as isize), src_stride[0], 0, seq_hdr.sb128, @@ -272,7 +270,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[1], lr_stride[1], - (*src.offset(1)).offset( + src[1].offset( -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), src_stride[1], @@ -299,7 +297,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( .add(f.lf.cdef_lpf_line[1]) .offset(cdef_off_uv), src_stride[1], - (*src.offset(1)).offset( + src[1].offset( -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), src_stride[1], @@ -325,7 +323,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[2], lr_stride[1], - (*src.offset(2)).offset( + src[2].offset( -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), src_stride[1], @@ -352,7 +350,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( .add(f.lf.cdef_lpf_line[2]) .offset(cdef_off_uv), src_stride[1], - (*src.offset(2)).offset( + src[2].offset( -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), src_stride[1], diff --git a/src/recon.rs b/src/recon.rs index 4c5c54984..9c044a022 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -4540,7 +4540,7 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( rav1d_loopfilter_sbrow_rows::(f, &p, mask_offset as usize, sby); } if cdef != 0 || f.lf.restore_planes != 0 { - rav1d_copy_lpf::(c, &mut *f, p.as_ptr(), sby); + rav1d_copy_lpf::(c, &mut *f, &p, sby); } } From 602b043bedc7a40a0be607a5ec08d87159dbfdbc Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 04:07:24 -0500 Subject: [PATCH 04/26] `fn rav1d_loopfilter_sbrow_cols`: use slice reference instead of raw pointer --- src/lf_apply.rs | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 9ebd954c8..844751076 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -608,48 +608,46 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let bx4: c_int = if x & is_sb64 != 0 { 16 } else { 0 }; let cbx4 = bx4 >> ss_hor; x >>= is_sb64; - let y_hmask: *mut [u16; 2] = - ((*lflvl.offset(x as isize)).filter_y[0][bx4 as usize]).as_mut_ptr(); + let y_hmask: &mut [[u16; 2]; 3] = + &mut (*lflvl.offset(x as isize)).filter_y[0][bx4 as usize]; let mut y = starty4 as c_uint; let mut mask = (1 as c_uint) << y; while y < endy4 { let sidx = (mask >= 0x10000) as c_int; let smask: c_uint = mask >> (sidx << 4); - let idx = 2 as c_int - * ((*y_hmask.offset(2))[sidx as usize] as c_uint & smask != 0) as c_int - + ((*y_hmask.offset(1))[sidx as usize] as c_uint & smask != 0) as c_int; - let ref mut fresh0 = (*y_hmask.offset(2))[sidx as usize]; + let idx = 2 as c_int * (y_hmask[2][sidx as usize] as c_uint & smask != 0) as c_int + + (y_hmask[1][sidx as usize] as c_uint & smask != 0) as c_int; + let ref mut fresh0 = y_hmask[2][sidx as usize]; *fresh0 = (*fresh0 as c_uint & !smask) as u16; - let ref mut fresh1 = (*y_hmask.offset(1))[sidx as usize]; + let ref mut fresh1 = y_hmask[1][sidx as usize]; *fresh1 = (*fresh1 as c_uint & !smask) as u16; - let ref mut fresh2 = (*y_hmask.offset(0))[sidx as usize]; + let ref mut fresh2 = y_hmask[0][sidx as usize]; *fresh2 = (*fresh2 as c_uint & !smask) as u16; - let ref mut fresh3 = (*y_hmask.offset(cmp::min( + let ref mut fresh3 = y_hmask[cmp::min( idx, lpf_y[y.wrapping_sub(starty4 as c_uint) as usize] as c_int, - ) as isize))[sidx as usize]; + ) as usize][sidx as usize]; *fresh3 = (*fresh3 as c_uint | smask) as u16; y = y.wrapping_add(1); mask <<= 1; } if f.cur.p.layout != Rav1dPixelLayout::I400 { - let uv_hmask: *mut [u16; 2] = - ((*lflvl.offset(x as isize)).filter_uv[0][cbx4 as usize]).as_mut_ptr(); + let uv_hmask: &mut [[u16; 2]; 2] = + &mut (*lflvl.offset(x as isize)).filter_uv[0][cbx4 as usize]; let mut y_0: c_uint = (starty4 >> ss_ver) as c_uint; let mut uv_mask: c_uint = ((1 as c_int) << y_0) as c_uint; while y_0 < uv_endy4 { let sidx_0 = (uv_mask >= vmax) as c_int; let smask_0: c_uint = uv_mask >> (sidx_0 << 4 - ss_ver); - let idx_0 = - ((*uv_hmask.offset(1))[sidx_0 as usize] as c_uint & smask_0 != 0) as c_int; - let ref mut fresh4 = (*uv_hmask.offset(1))[sidx_0 as usize]; + let idx_0 = (uv_hmask[1][sidx_0 as usize] as c_uint & smask_0 != 0) as c_int; + let ref mut fresh4 = uv_hmask[1][sidx_0 as usize]; *fresh4 = (*fresh4 as c_uint & !smask_0) as u16; - let ref mut fresh5 = (*uv_hmask.offset(0))[sidx_0 as usize]; + let ref mut fresh5 = uv_hmask[0][sidx_0 as usize]; *fresh5 = (*fresh5 as c_uint & !smask_0) as u16; - let ref mut fresh6 = (*uv_hmask.offset(cmp::min( + let ref mut fresh6 = uv_hmask[cmp::min( idx_0, lpf_uv[y_0.wrapping_sub((starty4 >> ss_ver) as c_uint) as usize] as c_int, - ) as isize))[sidx_0 as usize]; + ) as usize][sidx_0 as usize]; *fresh6 = (*fresh6 as c_uint | smask_0) as u16; y_0 = y_0.wrapping_add(1); uv_mask <<= 1; From 87342bdaeb40124bbc4b7047408c42535b999a13 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 04:28:20 -0500 Subject: [PATCH 05/26] `fn rav1d_loopfilter_sbrow_cols`: clean up loops Use `for` instead of `while` --- src/lf_apply.rs | 40 ++++++++++++++-------------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 844751076..d3bf58126 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -610,9 +610,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( x >>= is_sb64; let y_hmask: &mut [[u16; 2]; 3] = &mut (*lflvl.offset(x as isize)).filter_y[0][bx4 as usize]; - let mut y = starty4 as c_uint; - let mut mask = (1 as c_uint) << y; - while y < endy4 { + for y in starty4 as u32..endy4 as u32 { + let mask: u32 = 1 << y; let sidx = (mask >= 0x10000) as c_int; let smask: c_uint = mask >> (sidx << 4); let idx = 2 as c_int * (y_hmask[2][sidx as usize] as c_uint & smask != 0) as c_int @@ -628,15 +627,12 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( lpf_y[y.wrapping_sub(starty4 as c_uint) as usize] as c_int, ) as usize][sidx as usize]; *fresh3 = (*fresh3 as c_uint | smask) as u16; - y = y.wrapping_add(1); - mask <<= 1; } if f.cur.p.layout != Rav1dPixelLayout::I400 { let uv_hmask: &mut [[u16; 2]; 2] = &mut (*lflvl.offset(x as isize)).filter_uv[0][cbx4 as usize]; - let mut y_0: c_uint = (starty4 >> ss_ver) as c_uint; - let mut uv_mask: c_uint = ((1 as c_int) << y_0) as c_uint; - while y_0 < uv_endy4 { + for y in (starty4 >> ss_ver) as u32..uv_endy4 { + let uv_mask: u32 = 1 << y; let sidx_0 = (uv_mask >= vmax) as c_int; let smask_0: c_uint = uv_mask >> (sidx_0 << 4 - ss_ver); let idx_0 = (uv_hmask[1][sidx_0 as usize] as c_uint & smask_0 != 0) as c_int; @@ -646,11 +642,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( *fresh5 = (*fresh5 as c_uint & !smask_0) as u16; let ref mut fresh6 = uv_hmask[cmp::min( idx_0, - lpf_uv[y_0.wrapping_sub((starty4 >> ss_ver) as c_uint) as usize] as c_int, + lpf_uv[y.wrapping_sub((starty4 >> ss_ver) as c_uint) as usize] as c_int, ) as usize][sidx_0 as usize]; *fresh6 = (*fresh6 as c_uint | smask_0) as u16; - y_0 = y_0.wrapping_add(1); - uv_mask <<= 1; } } lpf_y = &lpf_y[halign as usize..]; @@ -664,11 +658,10 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let y_vmask: *mut [u16; 2] = ((*lflvl.offset(x as isize)).filter_y[1][starty4 as usize]).as_mut_ptr(); let w: c_uint = cmp::min(32 as c_int, f.w4 - (x << 5)) as c_uint; - let mut mask_0: c_uint = 1; - let mut i: c_uint = 0; - while i < w { - let sidx_1 = (mask_0 >= 0x10000 as c_uint) as c_int; - let smask_1: c_uint = mask_0 >> (sidx_1 << 4); + for i in 0..w { + let mask: u32 = 1 << i; + let sidx_1 = (mask >= 0x10000 as c_uint) as c_int; + let smask_1: c_uint = mask >> (sidx_1 << 4); let idx_1 = 2 as c_int * ((*y_vmask.offset(2))[sidx_1 as usize] as c_uint & smask_1 != 0) as c_int + ((*y_vmask.offset(1))[sidx_1 as usize] as c_uint & smask_1 != 0) as c_int; @@ -682,19 +675,16 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( .offset(cmp::min(idx_1, (*a).tx_lpf_y[i as usize] as c_int) as isize)) [sidx_1 as usize]; *fresh10 = (*fresh10 as c_uint | smask_1) as u16; - mask_0 <<= 1; - i = i.wrapping_add(1); } if f.cur.p.layout != Rav1dPixelLayout::I400 { let cw: c_uint = w.wrapping_add(ss_hor as c_uint) >> ss_hor; let uv_vmask: *mut [u16; 2] = ((*lflvl.offset(x as isize)).filter_uv[1] [(starty4 >> ss_ver) as usize]) .as_mut_ptr(); - let mut uv_mask_0: c_uint = 1; - let mut i_0: c_uint = 0; - while i_0 < cw { - let sidx_2 = (uv_mask_0 >= hmax) as c_int; - let smask_2: c_uint = uv_mask_0 >> (sidx_2 << 4 - ss_hor); + for i in 0..cw { + let uv_mask: u32 = 1 << i; + let sidx_2 = (uv_mask >= hmax) as c_int; + let smask_2: c_uint = uv_mask >> (sidx_2 << 4 - ss_hor); let idx_2 = ((*uv_vmask.offset(1))[sidx_2 as usize] as c_uint & smask_2 != 0) as c_int; let ref mut fresh11 = (*uv_vmask.offset(1))[sidx_2 as usize]; @@ -702,11 +692,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let ref mut fresh12 = (*uv_vmask.offset(0))[sidx_2 as usize]; *fresh12 = (*fresh12 as c_uint & !smask_2) as u16; let ref mut fresh13 = (*uv_vmask - .offset(cmp::min(idx_2, (*a).tx_lpf_uv[i_0 as usize] as c_int) as isize)) + .offset(cmp::min(idx_2, (*a).tx_lpf_uv[i as usize] as c_int) as isize)) [sidx_2 as usize]; *fresh13 = (*fresh13 as c_uint | smask_2) as u16; - uv_mask_0 <<= 1; - i_0 = i_0.wrapping_add(1); } } a = a.offset(1); From 7935e6ec25c3a2943a310db313b5a454649e11ea Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 04:36:20 -0500 Subject: [PATCH 06/26] `fn rav1d_loopfilter_sbrow_cols`: avoid repeated casts to `usize` --- src/lf_apply.rs | 56 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index d3bf58126..468bd03e3 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -612,20 +612,20 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( &mut (*lflvl.offset(x as isize)).filter_y[0][bx4 as usize]; for y in starty4 as u32..endy4 as u32 { let mask: u32 = 1 << y; - let sidx = (mask >= 0x10000) as c_int; + let sidx = (mask >= 0x10000) as usize; let smask: c_uint = mask >> (sidx << 4); - let idx = 2 as c_int * (y_hmask[2][sidx as usize] as c_uint & smask != 0) as c_int - + (y_hmask[1][sidx as usize] as c_uint & smask != 0) as c_int; - let ref mut fresh0 = y_hmask[2][sidx as usize]; + let idx = 2 as c_int * (y_hmask[2][sidx] as c_uint & smask != 0) as c_int + + (y_hmask[1][sidx] as c_uint & smask != 0) as c_int; + let ref mut fresh0 = y_hmask[2][sidx]; *fresh0 = (*fresh0 as c_uint & !smask) as u16; - let ref mut fresh1 = y_hmask[1][sidx as usize]; + let ref mut fresh1 = y_hmask[1][sidx]; *fresh1 = (*fresh1 as c_uint & !smask) as u16; - let ref mut fresh2 = y_hmask[0][sidx as usize]; + let ref mut fresh2 = y_hmask[0][sidx]; *fresh2 = (*fresh2 as c_uint & !smask) as u16; let ref mut fresh3 = y_hmask[cmp::min( idx, lpf_y[y.wrapping_sub(starty4 as c_uint) as usize] as c_int, - ) as usize][sidx as usize]; + ) as usize][sidx]; *fresh3 = (*fresh3 as c_uint | smask) as u16; } if f.cur.p.layout != Rav1dPixelLayout::I400 { @@ -633,17 +633,17 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( &mut (*lflvl.offset(x as isize)).filter_uv[0][cbx4 as usize]; for y in (starty4 >> ss_ver) as u32..uv_endy4 { let uv_mask: u32 = 1 << y; - let sidx_0 = (uv_mask >= vmax) as c_int; - let smask_0: c_uint = uv_mask >> (sidx_0 << 4 - ss_ver); - let idx_0 = (uv_hmask[1][sidx_0 as usize] as c_uint & smask_0 != 0) as c_int; - let ref mut fresh4 = uv_hmask[1][sidx_0 as usize]; + let sidx = (uv_mask >= vmax) as usize; + let smask_0: c_uint = uv_mask >> (sidx << 4 - ss_ver); + let idx_0 = (uv_hmask[1][sidx] as c_uint & smask_0 != 0) as c_int; + let ref mut fresh4 = uv_hmask[1][sidx]; *fresh4 = (*fresh4 as c_uint & !smask_0) as u16; - let ref mut fresh5 = uv_hmask[0][sidx_0 as usize]; + let ref mut fresh5 = uv_hmask[0][sidx]; *fresh5 = (*fresh5 as c_uint & !smask_0) as u16; let ref mut fresh6 = uv_hmask[cmp::min( idx_0, lpf_uv[y.wrapping_sub((starty4 >> ss_ver) as c_uint) as usize] as c_int, - ) as usize][sidx_0 as usize]; + ) as usize][sidx]; *fresh6 = (*fresh6 as c_uint | smask_0) as u16; } } @@ -660,20 +660,19 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let w: c_uint = cmp::min(32 as c_int, f.w4 - (x << 5)) as c_uint; for i in 0..w { let mask: u32 = 1 << i; - let sidx_1 = (mask >= 0x10000 as c_uint) as c_int; - let smask_1: c_uint = mask >> (sidx_1 << 4); + let sidx = (mask >= 0x10000) as usize; + let smask_1: c_uint = mask >> (sidx << 4); let idx_1 = 2 as c_int - * ((*y_vmask.offset(2))[sidx_1 as usize] as c_uint & smask_1 != 0) as c_int - + ((*y_vmask.offset(1))[sidx_1 as usize] as c_uint & smask_1 != 0) as c_int; - let ref mut fresh7 = (*y_vmask.offset(2))[sidx_1 as usize]; + * ((*y_vmask.offset(2))[sidx] as c_uint & smask_1 != 0) as c_int + + ((*y_vmask.offset(1))[sidx] as c_uint & smask_1 != 0) as c_int; + let ref mut fresh7 = (*y_vmask.offset(2))[sidx]; *fresh7 = (*fresh7 as c_uint & !smask_1) as u16; - let ref mut fresh8 = (*y_vmask.offset(1))[sidx_1 as usize]; + let ref mut fresh8 = (*y_vmask.offset(1))[sidx]; *fresh8 = (*fresh8 as c_uint & !smask_1) as u16; - let ref mut fresh9 = (*y_vmask.offset(0))[sidx_1 as usize]; + let ref mut fresh9 = (*y_vmask.offset(0))[sidx]; *fresh9 = (*fresh9 as c_uint & !smask_1) as u16; let ref mut fresh10 = (*y_vmask - .offset(cmp::min(idx_1, (*a).tx_lpf_y[i as usize] as c_int) as isize)) - [sidx_1 as usize]; + .offset(cmp::min(idx_1, (*a).tx_lpf_y[i as usize] as c_int) as isize))[sidx]; *fresh10 = (*fresh10 as c_uint | smask_1) as u16; } if f.cur.p.layout != Rav1dPixelLayout::I400 { @@ -683,17 +682,16 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( .as_mut_ptr(); for i in 0..cw { let uv_mask: u32 = 1 << i; - let sidx_2 = (uv_mask >= hmax) as c_int; - let smask_2: c_uint = uv_mask >> (sidx_2 << 4 - ss_hor); - let idx_2 = - ((*uv_vmask.offset(1))[sidx_2 as usize] as c_uint & smask_2 != 0) as c_int; - let ref mut fresh11 = (*uv_vmask.offset(1))[sidx_2 as usize]; + let sidx = (uv_mask >= hmax) as usize; + let smask_2: c_uint = uv_mask >> (sidx << 4 - ss_hor); + let idx_2 = ((*uv_vmask.offset(1))[sidx] as c_uint & smask_2 != 0) as c_int; + let ref mut fresh11 = (*uv_vmask.offset(1))[sidx]; *fresh11 = (*fresh11 as c_uint & !smask_2) as u16; - let ref mut fresh12 = (*uv_vmask.offset(0))[sidx_2 as usize]; + let ref mut fresh12 = (*uv_vmask.offset(0))[sidx]; *fresh12 = (*fresh12 as c_uint & !smask_2) as u16; let ref mut fresh13 = (*uv_vmask .offset(cmp::min(idx_2, (*a).tx_lpf_uv[i as usize] as c_int) as isize)) - [sidx_2 as usize]; + [sidx]; *fresh13 = (*fresh13 as c_uint | smask_2) as u16; } } From 6e15020ad39d38ae4fef9bcbb2f4c082d922c576 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 07:21:11 -0500 Subject: [PATCH 07/26] `fn rav1d_loopfilter_sbrow_cols`: clean up masks Define masks as `u16` and simplify code to look more like original C code. --- src/lf_apply.rs | 79 ++++++++++++++++++++----------------------------- 1 file changed, 32 insertions(+), 47 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 468bd03e3..cf463cb56 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -613,20 +613,16 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( for y in starty4 as u32..endy4 as u32 { let mask: u32 = 1 << y; let sidx = (mask >= 0x10000) as usize; - let smask: c_uint = mask >> (sidx << 4); - let idx = 2 as c_int * (y_hmask[2][sidx] as c_uint & smask != 0) as c_int - + (y_hmask[1][sidx] as c_uint & smask != 0) as c_int; - let ref mut fresh0 = y_hmask[2][sidx]; - *fresh0 = (*fresh0 as c_uint & !smask) as u16; - let ref mut fresh1 = y_hmask[1][sidx]; - *fresh1 = (*fresh1 as c_uint & !smask) as u16; - let ref mut fresh2 = y_hmask[0][sidx]; - *fresh2 = (*fresh2 as c_uint & !smask) as u16; - let ref mut fresh3 = y_hmask[cmp::min( + let smask = (mask >> (sidx << 4)) as u16; + let idx = 2 * (y_hmask[2][sidx] & smask != 0) as usize + + (y_hmask[1][sidx] & smask != 0) as usize; + y_hmask[2][sidx] &= !smask; + y_hmask[1][sidx] &= !smask; + y_hmask[0][sidx] &= !smask; + y_hmask[cmp::min( idx, - lpf_y[y.wrapping_sub(starty4 as c_uint) as usize] as c_int, - ) as usize][sidx]; - *fresh3 = (*fresh3 as c_uint | smask) as u16; + lpf_y[y.wrapping_sub(starty4 as c_uint) as usize] as usize, + )][sidx] |= smask; } if f.cur.p.layout != Rav1dPixelLayout::I400 { let uv_hmask: &mut [[u16; 2]; 2] = @@ -634,17 +630,14 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( for y in (starty4 >> ss_ver) as u32..uv_endy4 { let uv_mask: u32 = 1 << y; let sidx = (uv_mask >= vmax) as usize; - let smask_0: c_uint = uv_mask >> (sidx << 4 - ss_ver); - let idx_0 = (uv_hmask[1][sidx] as c_uint & smask_0 != 0) as c_int; - let ref mut fresh4 = uv_hmask[1][sidx]; - *fresh4 = (*fresh4 as c_uint & !smask_0) as u16; - let ref mut fresh5 = uv_hmask[0][sidx]; - *fresh5 = (*fresh5 as c_uint & !smask_0) as u16; - let ref mut fresh6 = uv_hmask[cmp::min( - idx_0, - lpf_uv[y.wrapping_sub((starty4 >> ss_ver) as c_uint) as usize] as c_int, - ) as usize][sidx]; - *fresh6 = (*fresh6 as c_uint | smask_0) as u16; + let smask = (uv_mask >> (sidx << 4 - ss_ver)) as u16; + let idx = (uv_hmask[1][sidx] & smask != 0) as usize; + uv_hmask[1][sidx] &= !smask; + uv_hmask[0][sidx] &= !smask; + uv_hmask[cmp::min( + idx, + lpf_uv[y.wrapping_sub((starty4 >> ss_ver) as u32) as usize] as usize, + ) as usize][sidx] |= smask; } } lpf_y = &lpf_y[halign as usize..]; @@ -661,19 +654,14 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( for i in 0..w { let mask: u32 = 1 << i; let sidx = (mask >= 0x10000) as usize; - let smask_1: c_uint = mask >> (sidx << 4); - let idx_1 = 2 as c_int - * ((*y_vmask.offset(2))[sidx] as c_uint & smask_1 != 0) as c_int - + ((*y_vmask.offset(1))[sidx] as c_uint & smask_1 != 0) as c_int; - let ref mut fresh7 = (*y_vmask.offset(2))[sidx]; - *fresh7 = (*fresh7 as c_uint & !smask_1) as u16; - let ref mut fresh8 = (*y_vmask.offset(1))[sidx]; - *fresh8 = (*fresh8 as c_uint & !smask_1) as u16; - let ref mut fresh9 = (*y_vmask.offset(0))[sidx]; - *fresh9 = (*fresh9 as c_uint & !smask_1) as u16; - let ref mut fresh10 = (*y_vmask - .offset(cmp::min(idx_1, (*a).tx_lpf_y[i as usize] as c_int) as isize))[sidx]; - *fresh10 = (*fresh10 as c_uint | smask_1) as u16; + let smask = (mask >> (sidx << 4)) as u16; + let idx = 2 * ((*y_vmask.offset(2))[sidx] & smask != 0) as usize + + ((*y_vmask.offset(1))[sidx] & smask != 0) as usize; + (*y_vmask.offset(2))[sidx] &= !smask; + (*y_vmask.offset(1))[sidx] &= !smask; + (*y_vmask.offset(0))[sidx] &= !smask; + (*y_vmask.offset(cmp::min(idx, (*a).tx_lpf_y[i as usize] as usize) as isize)) + [sidx] |= smask; } if f.cur.p.layout != Rav1dPixelLayout::I400 { let cw: c_uint = w.wrapping_add(ss_hor as c_uint) >> ss_hor; @@ -683,16 +671,13 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( for i in 0..cw { let uv_mask: u32 = 1 << i; let sidx = (uv_mask >= hmax) as usize; - let smask_2: c_uint = uv_mask >> (sidx << 4 - ss_hor); - let idx_2 = ((*uv_vmask.offset(1))[sidx] as c_uint & smask_2 != 0) as c_int; - let ref mut fresh11 = (*uv_vmask.offset(1))[sidx]; - *fresh11 = (*fresh11 as c_uint & !smask_2) as u16; - let ref mut fresh12 = (*uv_vmask.offset(0))[sidx]; - *fresh12 = (*fresh12 as c_uint & !smask_2) as u16; - let ref mut fresh13 = (*uv_vmask - .offset(cmp::min(idx_2, (*a).tx_lpf_uv[i as usize] as c_int) as isize)) - [sidx]; - *fresh13 = (*fresh13 as c_uint | smask_2) as u16; + let smask = (uv_mask >> (sidx << 4 - ss_hor)) as u16; + let idx = ((*uv_vmask.offset(1))[sidx] & smask != 0) as usize; + (*uv_vmask.offset(1))[sidx] &= !smask; + (*uv_vmask.offset(0))[sidx] &= !smask; + (*uv_vmask + .offset(cmp::min(idx, (*a).tx_lpf_uv[i as usize] as usize) as isize)) + [sidx] |= smask; } } a = a.offset(1); From 3063ae11698df65ba3bba5b04987dee3817d4247 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 07:29:38 -0500 Subject: [PATCH 08/26] Use slice instead of raw pointer --- src/lf_apply.rs | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index cf463cb56..4bfcd56e2 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -648,36 +648,32 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let mut a: *const BlockContext; a = &mut *(f.a).offset((f.sb128w * (start_of_tile_row - 1)) as isize) as *mut BlockContext; for x in 0..f.sb128w { - let y_vmask: *mut [u16; 2] = - ((*lflvl.offset(x as isize)).filter_y[1][starty4 as usize]).as_mut_ptr(); + let y_vmask: &mut [[u16; 2]; 3] = + &mut (*lflvl.offset(x as isize)).filter_y[1][starty4 as usize]; let w: c_uint = cmp::min(32 as c_int, f.w4 - (x << 5)) as c_uint; for i in 0..w { let mask: u32 = 1 << i; let sidx = (mask >= 0x10000) as usize; let smask = (mask >> (sidx << 4)) as u16; - let idx = 2 * ((*y_vmask.offset(2))[sidx] & smask != 0) as usize - + ((*y_vmask.offset(1))[sidx] & smask != 0) as usize; - (*y_vmask.offset(2))[sidx] &= !smask; - (*y_vmask.offset(1))[sidx] &= !smask; - (*y_vmask.offset(0))[sidx] &= !smask; - (*y_vmask.offset(cmp::min(idx, (*a).tx_lpf_y[i as usize] as usize) as isize)) - [sidx] |= smask; + let idx = 2 * (y_vmask[2][sidx] & smask != 0) as usize + + (y_vmask[1][sidx] & smask != 0) as usize; + y_vmask[2][sidx] &= !smask; + y_vmask[1][sidx] &= !smask; + y_vmask[0][sidx] &= !smask; + y_vmask[cmp::min(idx, (*a).tx_lpf_y[i as usize] as usize)][sidx] |= smask; } if f.cur.p.layout != Rav1dPixelLayout::I400 { let cw: c_uint = w.wrapping_add(ss_hor as c_uint) >> ss_hor; - let uv_vmask: *mut [u16; 2] = ((*lflvl.offset(x as isize)).filter_uv[1] - [(starty4 >> ss_ver) as usize]) - .as_mut_ptr(); + let uv_vmask: &mut [[u16; 2]; 2] = + &mut (*lflvl.offset(x as isize)).filter_uv[1][(starty4 >> ss_ver) as usize]; for i in 0..cw { let uv_mask: u32 = 1 << i; let sidx = (uv_mask >= hmax) as usize; let smask = (uv_mask >> (sidx << 4 - ss_hor)) as u16; - let idx = ((*uv_vmask.offset(1))[sidx] & smask != 0) as usize; - (*uv_vmask.offset(1))[sidx] &= !smask; - (*uv_vmask.offset(0))[sidx] &= !smask; - (*uv_vmask - .offset(cmp::min(idx, (*a).tx_lpf_uv[i as usize] as usize) as isize)) - [sidx] |= smask; + let idx = (uv_vmask[1][sidx] & smask != 0) as usize; + uv_vmask[1][sidx] &= !smask; + uv_vmask[0][sidx] &= !smask; + uv_vmask[cmp::min(idx, (*a).tx_lpf_uv[i as usize] as usize)][sidx] |= smask; } } a = a.offset(1); From 61f83f10e8b5a4f30fccee9a513dc630432a983a Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 07:59:55 -0500 Subject: [PATCH 09/26] `fn rav1d_loopfilter_sbrow_cols`: make `lpf_y` and `lpf_uv` slices --- src/lf_apply.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 4bfcd56e2..a7fcec083 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -586,7 +586,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let starty4 = (sby & is_sb64) << 4; let sbsz = 32 >> is_sb64; let sbl2 = 5 - is_sb64; - let halign = f.bh + 31 & !(31 as c_int); + let halign = (f.bh + 31 & !31) as usize; let ss_ver = (f.cur.p.layout == Rav1dPixelLayout::I420) as c_int; let ss_hor = (f.cur.p.layout != Rav1dPixelLayout::I444) as c_int; let vmask = 16 >> ss_ver; @@ -619,10 +619,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( y_hmask[2][sidx] &= !smask; y_hmask[1][sidx] &= !smask; y_hmask[0][sidx] &= !smask; - y_hmask[cmp::min( - idx, - lpf_y[y.wrapping_sub(starty4 as c_uint) as usize] as usize, - )][sidx] |= smask; + y_hmask[cmp::min(idx, lpf_y[(y - starty4 as u32) as usize] as usize)][sidx] |= smask; } if f.cur.p.layout != Rav1dPixelLayout::I400 { let uv_hmask: &mut [[u16; 2]; 2] = @@ -636,12 +633,12 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( uv_hmask[0][sidx] &= !smask; uv_hmask[cmp::min( idx, - lpf_uv[y.wrapping_sub((starty4 >> ss_ver) as u32) as usize] as usize, + lpf_uv[(y - (starty4 >> ss_ver) as u32) as usize] as usize, ) as usize][sidx] |= smask; } } - lpf_y = &lpf_y[halign as usize..]; - lpf_uv = &lpf_uv[(halign >> ss_ver) as usize..]; + lpf_y = &lpf_y[halign..]; + lpf_uv = &lpf_uv[(halign >> ss_ver)..]; tile_col += 1; } if start_of_tile_row != 0 { From f8daee4dfea3ccbb17c48b13da6b39182246de5f Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 08:14:00 -0500 Subject: [PATCH 10/26] Clean up types and casts --- src/lf_apply.rs | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index a7fcec083..fadc8d3e8 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -421,7 +421,7 @@ unsafe fn filter_plane_cols_y( dst.add(x * 4).cast(), ls, hmask.as_mut_ptr(), - &lvl[x as usize], + lvl[x..].as_ptr(), b4_stride, &f.lf.lim_lut.0, endy4 - starty4, @@ -583,7 +583,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let mut have_left; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let is_sb64 = (seq_hdr.sb128 == 0) as c_int; - let starty4 = (sby & is_sb64) << 4; + let starty4 = ((sby & is_sb64) as u32) << 4; let sbsz = 32 >> is_sb64; let sbl2 = 5 - is_sb64; let halign = (f.bh + 31 & !31) as usize; @@ -593,8 +593,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let hmask = 16 >> ss_hor; let vmax = (1 as c_uint) << vmask; let hmax = (1 as c_uint) << hmask; - let endy4 = (starty4 + cmp::min(f.h4 - sby * sbsz, sbsz)) as c_uint; - let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; + let endy4 = starty4 + cmp::min(f.h4 - sby * sbsz, sbsz) as u32; + let uv_endy4 = (endy4 + ss_ver as u32) >> ss_ver; let (lpf_y, lpf_uv) = f.lf.tx_lpf_right_edge.get(); let mut lpf_y = &lpf_y[(sby << sbl2) as usize..]; let mut lpf_uv = &lpf_uv[(sby << sbl2 - ss_ver) as usize..]; @@ -610,7 +610,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( x >>= is_sb64; let y_hmask: &mut [[u16; 2]; 3] = &mut (*lflvl.offset(x as isize)).filter_y[0][bx4 as usize]; - for y in starty4 as u32..endy4 as u32 { + for y in starty4..endy4 { let mask: u32 = 1 << y; let sidx = (mask >= 0x10000) as usize; let smask = (mask >> (sidx << 4)) as u16; @@ -619,22 +619,20 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( y_hmask[2][sidx] &= !smask; y_hmask[1][sidx] &= !smask; y_hmask[0][sidx] &= !smask; - y_hmask[cmp::min(idx, lpf_y[(y - starty4 as u32) as usize] as usize)][sidx] |= smask; + y_hmask[cmp::min(idx, lpf_y[(y - starty4) as usize] as usize)][sidx] |= smask; } if f.cur.p.layout != Rav1dPixelLayout::I400 { let uv_hmask: &mut [[u16; 2]; 2] = &mut (*lflvl.offset(x as isize)).filter_uv[0][cbx4 as usize]; - for y in (starty4 >> ss_ver) as u32..uv_endy4 { + for y in starty4 >> ss_ver..uv_endy4 { let uv_mask: u32 = 1 << y; let sidx = (uv_mask >= vmax) as usize; let smask = (uv_mask >> (sidx << 4 - ss_ver)) as u16; let idx = (uv_hmask[1][sidx] & smask != 0) as usize; uv_hmask[1][sidx] &= !smask; uv_hmask[0][sidx] &= !smask; - uv_hmask[cmp::min( - idx, - lpf_uv[(y - (starty4 >> ss_ver) as u32) as usize] as usize, - ) as usize][sidx] |= smask; + uv_hmask[cmp::min(idx, lpf_uv[(y - (starty4 >> ss_ver)) as usize] as usize)] + [sidx] |= smask; } } lpf_y = &lpf_y[halign..]; @@ -642,12 +640,12 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( tile_col += 1; } if start_of_tile_row != 0 { - let mut a: *const BlockContext; - a = &mut *(f.a).offset((f.sb128w * (start_of_tile_row - 1)) as isize) as *mut BlockContext; + let mut a: &[BlockContext] = slice::from_raw_parts(f.a, f.a_sz as usize); + a = &a[(f.sb128w * (start_of_tile_row - 1)) as usize..]; for x in 0..f.sb128w { let y_vmask: &mut [[u16; 2]; 3] = &mut (*lflvl.offset(x as isize)).filter_y[1][starty4 as usize]; - let w: c_uint = cmp::min(32 as c_int, f.w4 - (x << 5)) as c_uint; + let w = cmp::min(32, f.w4 - (x << 5)) as u32; for i in 0..w { let mask: u32 = 1 << i; let sidx = (mask >= 0x10000) as usize; @@ -657,7 +655,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( y_vmask[2][sidx] &= !smask; y_vmask[1][sidx] &= !smask; y_vmask[0][sidx] &= !smask; - y_vmask[cmp::min(idx, (*a).tx_lpf_y[i as usize] as usize)][sidx] |= smask; + y_vmask[cmp::min(idx, a[0].tx_lpf_y[i as usize] as usize)][sidx] |= smask; } if f.cur.p.layout != Rav1dPixelLayout::I400 { let cw: c_uint = w.wrapping_add(ss_hor as c_uint) >> ss_hor; @@ -670,10 +668,10 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let idx = (uv_vmask[1][sidx] & smask != 0) as usize; uv_vmask[1][sidx] &= !smask; uv_vmask[0][sidx] &= !smask; - uv_vmask[cmp::min(idx, (*a).tx_lpf_uv[i as usize] as usize)][sidx] |= smask; + uv_vmask[cmp::min(idx, a[0].tx_lpf_uv[i as usize] as usize)][sidx] |= smask; } } - a = a.offset(1); + a = &a[1..]; } } let mut ptr: *mut BD::Pixel; @@ -689,8 +687,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( &(*lflvl.offset(x as isize)).filter_y[0], ptr, f.cur.stride[0], - cmp::min(32 as c_int, f.w4 - x * 32), - starty4, + cmp::min(32, f.w4 - x * 32), + starty4 as c_int, endy4 as c_int, ); have_left = true; @@ -710,11 +708,11 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( level_ptr, f.b4_stride, &(*lflvl.offset(x as isize)).filter_uv[0], - &mut *p[1].offset(uv_off as isize), - &mut *p[2].offset(uv_off as isize), + &mut *p[1].offset(uv_off), + &mut *p[2].offset(uv_off), f.cur.stride[1], - cmp::min(32 as c_int, f.w4 - x * 32) + ss_hor >> ss_hor, - starty4 >> ss_ver, + cmp::min(32, f.w4 - x * 32) + ss_hor >> ss_hor, + starty4 as c_int >> ss_ver, uv_endy4 as c_int, ss_ver, ); From 689fa499404efd7a748334b2cae97eb6da629885 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 10:31:52 -0500 Subject: [PATCH 11/26] Use references to `Rav1dDSPContext` object --- src/lf_apply.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index fadc8d3e8..b368aa4aa 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -398,7 +398,7 @@ unsafe fn filter_plane_cols_y( starty4: c_int, endy4: c_int, ) { - let dsp: *const Rav1dDSPContext = f.dsp; + let dsp: &Rav1dDSPContext = &*f.dsp; for x in 0..w as usize { if !(!have_left && x == 0) { let mut hmask: [u32; 4] = [0; 4]; @@ -444,7 +444,7 @@ unsafe fn filter_plane_rows_y( starty4: c_int, endy4: c_int, ) { - let dsp: *const Rav1dDSPContext = f.dsp; + let dsp: &Rav1dDSPContext = &*f.dsp; for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) { if !(!have_top && y == 0) { let vmask: [u32; 4] = [ @@ -483,7 +483,7 @@ unsafe fn filter_plane_cols_uv( endy4: c_int, ss_ver: c_int, ) { - let dsp: *const Rav1dDSPContext = f.dsp; + let dsp: &Rav1dDSPContext = &*f.dsp; for x in 0..w { if !(!have_left && x == 0) { let mut hmask: [u32; 3] = [0; 3]; @@ -538,7 +538,7 @@ unsafe fn filter_plane_rows_uv( endy4: c_int, ss_hor: c_int, ) { - let dsp: *const Rav1dDSPContext = f.dsp; + let dsp: &Rav1dDSPContext = &*f.dsp; let mut off_l: ptrdiff_t = 0; for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) { if !(!have_top && y == 0) { From b0c447adeb6baa2cafb1297c1f72395df94a3f69 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 30 Jan 2024 16:19:50 -0500 Subject: [PATCH 12/26] `fn rav1d_loopfilter_sbrow_cols`: pass slices of samples as argument Parameter `p` is changed to be an array of slices. A parameter `p_offset` is added to indicate the location of the "origin" sample within the slice. Note that the offset is the same for both chroma components. Hence `p_offset` has one fewer entries than `p`. --- src/lf_apply.rs | 19 ++++++++++++------- src/recon.rs | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index b368aa4aa..90c6d2d19 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -574,7 +574,8 @@ unsafe fn filter_plane_rows_uv( pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( f: &mut Rav1dFrameData, - p: &[*mut BD::Pixel; 3], + p: &mut [&mut [BD::Pixel]; 3], + p_offset: &[usize; 2], lflvl_offset: usize, sby: c_int, start_of_tile_row: c_int, @@ -674,9 +675,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( a = &a[1..]; } } - let mut ptr: *mut BD::Pixel; + let mut slice: &mut [BD::Pixel]; let mut level_ptr = &f.lf.level[(f.b4_stride * sby as isize * sbsz as isize) as usize..]; - ptr = p[0]; + slice = p[0]; have_left = false; for x in 0..f.sb128w { filter_plane_cols_y::( @@ -685,15 +686,15 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( level_ptr, f.b4_stride, &(*lflvl.offset(x as isize)).filter_y[0], - ptr, + slice.as_mut_ptr().offset(p_offset[0] as isize), f.cur.stride[0], cmp::min(32, f.w4 - x * 32), starty4 as c_int, endy4 as c_int, ); have_left = true; - ptr = ptr.offset(128); level_ptr = &level_ptr[32..]; + slice = &mut slice[128..]; } if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 { return; @@ -708,8 +709,12 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( level_ptr, f.b4_stride, &(*lflvl.offset(x as isize)).filter_uv[0], - &mut *p[1].offset(uv_off), - &mut *p[2].offset(uv_off), + p[1][uv_off as usize..] + .as_mut_ptr() + .offset(p_offset[1] as isize), + p[2][uv_off as usize..] + .as_mut_ptr() + .offset(p_offset[1] as isize), f.cur.stride[1], cmp::min(32, f.w4 - x * 32) + ss_hor >> ss_hor, starty4 as c_int >> ss_ver, diff --git a/src/recon.rs b/src/recon.rs index 9c044a022..429927487 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -4496,18 +4496,41 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_cols( } let y = sby * f.sb_step * 4; let ss_ver = (f.cur.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; - let p: [*mut BD::Pixel; 3] = [ - (f.lf.p[0] as *mut BD::Pixel).offset((y as isize * BD::pxstride(f.cur.stride[0])) as isize), - (f.lf.p[1] as *mut BD::Pixel) - .offset((y as isize * BD::pxstride(f.cur.stride[1]) >> ss_ver) as isize), - (f.lf.p[2] as *mut BD::Pixel) - .offset((y as isize * BD::pxstride(f.cur.stride[1]) >> ss_ver) as isize), + let ss_hor = (f.cur.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int; + + let datay_stride = BD::pxstride((*f).cur.stride[0]); + let datay_width = (*f).cur.p.w; + let datay_height = (*f).cur.p.h; + let datay_diff = (datay_height - 1) as isize * datay_stride; + let datauv_stride = BD::pxstride((*f).cur.stride[1]); + let datauv_width = datay_width >> ss_hor; + let datauv_height = datay_height >> ss_ver; + let datauv_diff = (datauv_height - 1) as isize * datauv_stride; + + let mut p: [&mut [BD::Pixel]; 3] = [ + slice::from_raw_parts_mut( + (f.lf.p[0] as *mut BD::Pixel).offset(cmp::min(datay_diff, 0)), + datay_diff.unsigned_abs() + datay_width as usize, + ), + slice::from_raw_parts_mut( + (f.lf.p[1] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), + datauv_diff.unsigned_abs() + datauv_width as usize, + ), + slice::from_raw_parts_mut( + (f.lf.p[2] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), + datauv_diff.unsigned_abs() + datauv_width as usize, + ), + ]; + let p_offset: [usize; 2] = [ + (cmp::max(0, -datay_diff) + y as isize * datay_stride) as usize, + (cmp::max(0, -datauv_diff) + y as isize * datauv_stride >> ss_ver) as usize, ]; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let mask_offset = (sby >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w; rav1d_loopfilter_sbrow_cols::( f, - &p, + &mut p, + &p_offset, mask_offset as usize, sby, *(f.lf.start_of_tile_row).offset(sby as isize) as c_int, From 7666d9eca0e98fc5b6671f9100599c37fc732352 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Wed, 31 Jan 2024 07:32:00 -0500 Subject: [PATCH 13/26] `fn filter_plane_cols_y/uv`: make `dst`/`u`/`v` arguments a slice Offsets `dst_offset` and `uv_offset` are added to point to "origin" position within slice --- src/lf_apply.rs | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 90c6d2d19..1b5914900 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -392,7 +392,8 @@ unsafe fn filter_plane_cols_y( lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: &[[[u16; 2]; 3]; 32], - dst: *mut BD::Pixel, + dst: &mut [BD::Pixel], + dst_offset: usize, ls: ptrdiff_t, w: c_int, starty4: c_int, @@ -418,7 +419,7 @@ unsafe fn filter_plane_cols_y( } // hmask[3] = 0; already initialized above (*dsp).lf.loop_filter_sb[0][0]( - dst.add(x * 4).cast(), + dst.as_mut_ptr().add(dst_offset + x * 4).cast(), ls, hmask.as_mut_ptr(), lvl[x..].as_ptr(), @@ -475,8 +476,9 @@ unsafe fn filter_plane_cols_uv( lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: &[[[u16; 2]; 2]; 32], - u: *mut BD::Pixel, - v: *mut BD::Pixel, + u: &mut [BD::Pixel], + v: &mut [BD::Pixel], + uv_offset: usize, ls: ptrdiff_t, w: c_int, starty4: c_int, @@ -484,7 +486,7 @@ unsafe fn filter_plane_cols_uv( ss_ver: c_int, ) { let dsp: &Rav1dDSPContext = &*f.dsp; - for x in 0..w { + for x in 0..w as usize { if !(!have_left && x == 0) { let mut hmask: [u32; 3] = [0; 3]; if starty4 == 0 { @@ -500,7 +502,7 @@ unsafe fn filter_plane_cols_uv( } // hmask[2] = 0; Already initialized to 0 above (*dsp).lf.loop_filter_sb[1][0]( - u.offset((x * 4) as isize).cast(), + u.as_mut_ptr().add(uv_offset + x * 4).cast(), ls, hmask.as_mut_ptr(), unaligned_lvl_slice(&lvl[x as usize..], 2).as_ptr(), @@ -510,7 +512,7 @@ unsafe fn filter_plane_cols_uv( f.bitdepth_max, ); (*dsp).lf.loop_filter_sb[1][0]( - v.offset((x * 4) as isize).cast(), + v.as_mut_ptr().add(uv_offset + x * 4).cast(), ls, hmask.as_mut_ptr(), unaligned_lvl_slice(&lvl[x as usize..], 3).as_ptr(), @@ -675,9 +677,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( a = &a[1..]; } } - let mut slice: &mut [BD::Pixel]; let mut level_ptr = &f.lf.level[(f.b4_stride * sby as isize * sbsz as isize) as usize..]; - slice = p[0]; + let mut offset = p_offset[0]; have_left = false; for x in 0..f.sb128w { filter_plane_cols_y::( @@ -686,7 +687,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( level_ptr, f.b4_stride, &(*lflvl.offset(x as isize)).filter_y[0], - slice.as_mut_ptr().offset(p_offset[0] as isize), + p[0], + offset, f.cur.stride[0], cmp::min(32, f.w4 - x * 32), starty4 as c_int, @@ -694,13 +696,14 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( ); have_left = true; level_ptr = &level_ptr[32..]; - slice = &mut slice[128..]; + offset += 128; } if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 { return; } - let mut uv_off: ptrdiff_t = 0; let mut level_ptr = &f.lf.level[(f.b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; + let (pu, pv) = p[1..].split_at_mut(1); + let mut uv_off = p_offset[1]; have_left = false; for x in 0..f.sb128w { filter_plane_cols_uv::( @@ -709,12 +712,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( level_ptr, f.b4_stride, &(*lflvl.offset(x as isize)).filter_uv[0], - p[1][uv_off as usize..] - .as_mut_ptr() - .offset(p_offset[1] as isize), - p[2][uv_off as usize..] - .as_mut_ptr() - .offset(p_offset[1] as isize), + pu[0], + pv[0], + uv_off, f.cur.stride[1], cmp::min(32, f.w4 - x * 32) + ss_hor >> ss_hor, starty4 as c_int >> ss_ver, From 78ee731ad4a92619f2a202e11bb112b09b7c3ab6 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Thu, 1 Feb 2024 05:05:15 -0500 Subject: [PATCH 14/26] `lf_apply.rs`: use slices instead of raw pointers Use slices for parameter of `rav1d_loopfilter_sbrow_rows` and `rav1d_copy_lpf` --- src/lf_apply.rs | 48 ++++++++++++++++++++++++++++++------------------ src/recon.rs | 38 ++++++++++++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 1b5914900..63710e125 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -174,7 +174,8 @@ unsafe fn backup_lpf( pub(crate) unsafe fn rav1d_copy_lpf( c: &Rav1dContext, f: &mut Rav1dFrameData, - src: &[*mut BD::Pixel; 3], + src: &[&mut [BD::Pixel]; 3], + src_offset: &[usize; 2], sby: c_int, ) { let have_tt = (c.tc.len() > 1) as c_int; @@ -207,7 +208,10 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[0], lr_stride[0], - src[0].offset(-(offset as isize * BD::pxstride(src_stride[0] as usize) as isize)), + src[0].as_ptr().offset( + src_offset[0] as isize + - offset as isize * BD::pxstride(src_stride[0] as usize) as isize, + ), src_stride[0], 0, seq_hdr.sb128, @@ -234,7 +238,10 @@ pub(crate) unsafe fn rav1d_copy_lpf( .add(f.lf.cdef_lpf_line[0]) .offset(cdef_off_y), src_stride[0], - src[0].offset(-offset as isize * BD::pxstride(src_stride[0] as usize) as isize), + src[0].as_ptr().offset( + src_offset[0] as isize + - offset as isize * BD::pxstride(src_stride[0] as usize) as isize, + ), src_stride[0], 0, seq_hdr.sb128, @@ -270,8 +277,9 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[1], lr_stride[1], - src[1].offset( - -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, + src[1].as_ptr().offset( + src_offset[1] as isize + - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), src_stride[1], ss_ver, @@ -297,8 +305,9 @@ pub(crate) unsafe fn rav1d_copy_lpf( .add(f.lf.cdef_lpf_line[1]) .offset(cdef_off_uv), src_stride[1], - src[1].offset( - -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, + src[1].as_ptr().offset( + src_offset[1] as isize + - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), src_stride[1], ss_ver, @@ -323,8 +332,9 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[2], lr_stride[1], - src[2].offset( - -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, + src[2].as_ptr().offset( + src_offset[1] as isize + - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), src_stride[1], ss_ver, @@ -350,8 +360,9 @@ pub(crate) unsafe fn rav1d_copy_lpf( .add(f.lf.cdef_lpf_line[2]) .offset(cdef_off_uv), src_stride[1], - src[2].offset( - -offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, + src[2].as_ptr().offset( + src_offset[1] as isize + - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, ), src_stride[1], ss_ver, @@ -729,7 +740,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( f: &mut Rav1dFrameData, - p: &[*mut BD::Pixel; 3], + p: &mut [&mut [BD::Pixel]; 3], + p_offset: &[usize; 2], lflvl_offset: usize, sby: c_int, ) { @@ -746,9 +758,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let endy4: c_uint = (starty4 + cmp::min(f.h4 - sby * sbsz, sbsz)) as c_uint; let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; - let mut ptr: *mut BD::Pixel; + let mut slice: &mut [BD::Pixel]; let mut level_ptr = &f.lf.level[(f.b4_stride * sby as isize * sbsz as isize) as usize..]; - ptr = p[0]; + slice = p[0]; for x in 0..f.sb128w { filter_plane_rows_y::( f, @@ -756,13 +768,13 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( level_ptr, f.b4_stride, &(*lflvl.offset(x as isize)).filter_y[1], - ptr, + slice.as_mut_ptr().offset(p_offset[0] as isize), f.cur.stride[0], cmp::min(32, f.w4 - x * 32), starty4, endy4 as c_int, ); - ptr = ptr.offset(128); + slice = &mut slice[128..]; level_ptr = &level_ptr[32..]; } @@ -780,8 +792,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( level_ptr, f.b4_stride, &(*lflvl.offset(x as isize)).filter_uv[1], - &mut *p[1].offset(uv_off as isize), - &mut *p[2].offset(uv_off as isize), + p[1][uv_off as usize..].as_mut_ptr().add(p_offset[1]), + p[2][uv_off as usize..].as_mut_ptr().add(p_offset[1]), f.cur.stride[1], cmp::min(32 as c_int, f.w4 - x * 32) + ss_hor >> ss_hor, starty4 >> ss_ver, diff --git a/src/recon.rs b/src/recon.rs index 429927487..4b04d766a 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -4545,12 +4545,34 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( ) { let y = sby * f.sb_step * 4; let ss_ver = (f.cur.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; - let p: [*mut BD::Pixel; 3] = [ - (f.lf.p[0] as *mut BD::Pixel).offset((y as isize * BD::pxstride(f.cur.stride[0])) as isize), - (f.lf.p[1] as *mut BD::Pixel) - .offset((y as isize * BD::pxstride(f.cur.stride[1]) >> ss_ver) as isize), - (f.lf.p[2] as *mut BD::Pixel) - .offset((y as isize * BD::pxstride(f.cur.stride[1]) >> ss_ver) as isize), + let ss_hor = (f.cur.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int; + + let datay_stride = BD::pxstride(f.cur.stride[0]); + let datay_width = f.cur.p.w; + let datay_height = f.cur.p.h; + let datay_diff = (datay_height - 1) as isize * datay_stride; + let datauv_stride = BD::pxstride(f.cur.stride[1]); + let datauv_width = datay_width >> ss_hor; + let datauv_height = datay_height >> ss_ver; + let datauv_diff = (datauv_height - 1) as isize * datauv_stride; + + let mut p: [&mut [BD::Pixel]; 3] = [ + slice::from_raw_parts_mut( + (f.lf.p[0] as *mut BD::Pixel).offset(cmp::min(datay_diff, 0)), + datay_diff.unsigned_abs() + datay_width as usize, + ), + slice::from_raw_parts_mut( + (f.lf.p[1] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), + datauv_diff.unsigned_abs() + datauv_width as usize, + ), + slice::from_raw_parts_mut( + (f.lf.p[2] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), + datauv_diff.unsigned_abs() + datauv_width as usize, + ), + ]; + let p_offset: [usize; 2] = [ + (cmp::max(0, -datay_diff) + y as isize * datay_stride) as usize, + (cmp::max(0, -datauv_diff) + y as isize * datauv_stride >> ss_ver) as usize, ]; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let sb128 = seq_hdr.sb128; @@ -4560,10 +4582,10 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( if c.inloop_filters.contains(Rav1dInloopFilterType::DEBLOCK) && (frame_hdr.loopfilter.level_y[0] != 0 || frame_hdr.loopfilter.level_y[1] != 0) { - rav1d_loopfilter_sbrow_rows::(f, &p, mask_offset as usize, sby); + rav1d_loopfilter_sbrow_rows::(f, &mut p, &p_offset, mask_offset as usize, sby); } if cdef != 0 || f.lf.restore_planes != 0 { - rav1d_copy_lpf::(c, &mut *f, &p, sby); + rav1d_copy_lpf::(c, f, &p, &p_offset, sby); } } From f0fd40ba9d89a86d07b683f2749312e8036e9a5a Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Thu, 1 Feb 2024 16:30:36 -0500 Subject: [PATCH 15/26] `fn backup_lpf`; use slice as parameter Dimensions of slice initially created had to be adjusted to account for memory allocation constraints (e.g., sizes of multiples of 128). Not doing so leeds to invalid access when picture dimensiosn are odd numbers. --- src/lf_apply.rs | 83 +++++++++++++++++++++++++++---------------------- src/recon.rs | 11 ++++--- 2 files changed, 51 insertions(+), 43 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 63710e125..45643249b 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -24,7 +24,8 @@ unsafe fn backup_lpf( c: &Rav1dContext, mut dst: *mut BD::Pixel, dst_stride: ptrdiff_t, - mut src: *const BD::Pixel, + src: &[BD::Pixel], + mut src_offset: usize, src_stride: ptrdiff_t, ss_ver: c_int, sb128: c_int, @@ -48,7 +49,8 @@ unsafe fn backup_lpf( }; // The first stripe of the frame is shorter by 8 luma pixel rows. let mut stripe_h = ((64 as c_int) << (cdef_backup & sb128)) - 8 * (row == 0) as c_int >> ss_ver; - src = src.offset((stripe_h - 2) as isize * BD::pxstride(src_stride as usize) as isize); + src_offset = src_offset + .wrapping_add_signed((stripe_h - 2) as isize * BD::pxstride(src_stride as usize) as isize); if c.tc.len() == 1 { if row != 0 { let top = (4 as c_int) << sb128; @@ -118,7 +120,7 @@ unsafe fn backup_lpf( ((*dsp).mc.resize)( dst.cast(), dst_stride, - src.cast(), + src.as_ptr().add(src_offset).cast(), src_stride, dst_w, n_lines, @@ -129,7 +131,9 @@ unsafe fn backup_lpf( ); row += stripe_h; // unmodified stripe_h for the 1st stripe stripe_h = 64 >> ss_ver; - src = src.offset(stripe_h as isize * BD::pxstride(src_stride as usize) as isize); + src_offset = (src_offset as isize + + stripe_h as isize * BD::pxstride(src_stride as usize) as isize) + as usize; dst = dst.offset(n_lines as isize * BD::pxstride(dst_stride as usize) as isize); if n_lines == 3 { BD::pixel_copy( @@ -150,23 +154,26 @@ unsafe fn backup_lpf( for i in 0..4 { BD::pixel_copy( slice::from_raw_parts_mut(dst, src_w as usize), - slice::from_raw_parts( - if i == n_lines_0 { + if i == n_lines_0 { + slice::from_raw_parts( &mut *dst.offset(-(BD::pxstride(dst_stride as usize) as isize)) - as *const BD::Pixel as *const BD::Pixel - } else { - src - }, - src_w as usize, - ), + as *const BD::Pixel as *const BD::Pixel, + src_w as usize, + ) + } else { + &src[src_offset..] + }, src_w as usize, ); dst = dst.offset(BD::pxstride(dst_stride as usize) as isize); - src = src.offset(BD::pxstride(src_stride as usize) as isize); + src_offset = + (src_offset as isize + BD::pxstride(src_stride as usize) as isize) as usize; } row += stripe_h; // unmodified stripe_h for the 1st stripe stripe_h = 64 >> ss_ver; - src = src.offset((stripe_h - 4) as isize * BD::pxstride(src_stride as usize) as isize); + src_offset = src_offset.wrapping_add_signed( + (stripe_h - 4) as isize * BD::pxstride(src_stride as usize) as isize, + ); } }; } @@ -208,10 +215,10 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[0], lr_stride[0], - src[0].as_ptr().offset( - src_offset[0] as isize - - offset as isize * BD::pxstride(src_stride[0] as usize) as isize, - ), + src[0], + (src_offset[0] as isize + - offset as isize * BD::pxstride(src_stride[0] as usize) as isize) + as usize, src_stride[0], 0, seq_hdr.sb128, @@ -238,10 +245,10 @@ pub(crate) unsafe fn rav1d_copy_lpf( .add(f.lf.cdef_lpf_line[0]) .offset(cdef_off_y), src_stride[0], - src[0].as_ptr().offset( - src_offset[0] as isize - - offset as isize * BD::pxstride(src_stride[0] as usize) as isize, - ), + src[0], + (src_offset[0] as isize + - offset as isize * BD::pxstride(src_stride[0] as usize) as isize) + as usize, src_stride[0], 0, seq_hdr.sb128, @@ -277,10 +284,10 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[1], lr_stride[1], - src[1].as_ptr().offset( - src_offset[1] as isize - - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, - ), + src[1], + (src_offset[1] as isize + - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize) + as usize, src_stride[1], ss_ver, seq_hdr.sb128, @@ -305,10 +312,10 @@ pub(crate) unsafe fn rav1d_copy_lpf( .add(f.lf.cdef_lpf_line[1]) .offset(cdef_off_uv), src_stride[1], - src[1].as_ptr().offset( - src_offset[1] as isize - - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, - ), + src[1], + (src_offset[1] as isize + - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize) + as usize, src_stride[1], ss_ver, seq_hdr.sb128, @@ -332,10 +339,10 @@ pub(crate) unsafe fn rav1d_copy_lpf( c, dst[2], lr_stride[1], - src[2].as_ptr().offset( - src_offset[1] as isize - - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, - ), + src[2], + (src_offset[1] as isize + - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize) + as usize, src_stride[1], ss_ver, seq_hdr.sb128, @@ -360,10 +367,10 @@ pub(crate) unsafe fn rav1d_copy_lpf( .add(f.lf.cdef_lpf_line[2]) .offset(cdef_off_uv), src_stride[1], - src[2].as_ptr().offset( - src_offset[1] as isize - - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize, - ), + src[2], + (src_offset[1] as isize + - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize) + as usize, src_stride[1], ss_ver, seq_hdr.sb128, diff --git a/src/recon.rs b/src/recon.rs index 4b04d766a..37dbc8424 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -11,6 +11,7 @@ use crate::include::dav1d::dav1d::Rav1dInloopFilterType; use crate::include::dav1d::headers::Rav1dPixelLayout; use crate::include::dav1d::headers::Rav1dWarpedMotionParams; use crate::include::dav1d::headers::RAV1D_WM_TYPE_TRANSLATION; +use crate::include::dav1d::picture::RAV1D_PICTURE_ALIGNMENT; use crate::src::cdef_apply::rav1d_cdef_brow; use crate::src::ctx::CaseSet; use crate::src::env::get_uv_inter_txtp; @@ -4548,8 +4549,8 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( let ss_hor = (f.cur.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int; let datay_stride = BD::pxstride(f.cur.stride[0]); - let datay_width = f.cur.p.w; - let datay_height = f.cur.p.h; + let datay_width = f.cur.p.w + 127 & !127; + let datay_height = f.cur.p.h + 127 & !127; let datay_diff = (datay_height - 1) as isize * datay_stride; let datauv_stride = BD::pxstride(f.cur.stride[1]); let datauv_width = datay_width >> ss_hor; @@ -4559,15 +4560,15 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( let mut p: [&mut [BD::Pixel]; 3] = [ slice::from_raw_parts_mut( (f.lf.p[0] as *mut BD::Pixel).offset(cmp::min(datay_diff, 0)), - datay_diff.unsigned_abs() + datay_width as usize, + datay_diff.unsigned_abs() + datay_width as usize + RAV1D_PICTURE_ALIGNMENT, ), slice::from_raw_parts_mut( (f.lf.p[1] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), - datauv_diff.unsigned_abs() + datauv_width as usize, + datauv_diff.unsigned_abs() + datauv_width as usize + RAV1D_PICTURE_ALIGNMENT, ), slice::from_raw_parts_mut( (f.lf.p[2] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), - datauv_diff.unsigned_abs() + datauv_width as usize, + datauv_diff.unsigned_abs() + datauv_width as usize + RAV1D_PICTURE_ALIGNMENT, ), ]; let p_offset: [usize; 2] = [ From 68f0817acb31732396a444f2a7e66755d5598b61 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 6 Feb 2024 10:43:21 -0500 Subject: [PATCH 16/26] Define inner blocks for computation of `p` and `p_offset` Also rename variables, dropping `data` prefix. --- src/recon.rs | 110 +++++++++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 52 deletions(-) diff --git a/src/recon.rs b/src/recon.rs index 37dbc8424..f73c6c967 100644 --- a/src/recon.rs +++ b/src/recon.rs @@ -4499,33 +4499,36 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_cols( let ss_ver = (f.cur.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; let ss_hor = (f.cur.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int; - let datay_stride = BD::pxstride((*f).cur.stride[0]); - let datay_width = (*f).cur.p.w; - let datay_height = (*f).cur.p.h; - let datay_diff = (datay_height - 1) as isize * datay_stride; - let datauv_stride = BD::pxstride((*f).cur.stride[1]); - let datauv_width = datay_width >> ss_hor; - let datauv_height = datay_height >> ss_ver; - let datauv_diff = (datauv_height - 1) as isize * datauv_stride; + let (mut p, p_offset) = { + let y_stride = BD::pxstride((*f).cur.stride[0]); + let y_width = (*f).cur.p.w + 127 & !127; + let y_height = (*f).cur.p.h + 127 & !127; + let y_span = (y_height - 1) as isize * y_stride; + let uv_stride = BD::pxstride((*f).cur.stride[1]); + let uv_width = y_width >> ss_hor; + let uv_height = y_height >> ss_ver; + let uv_span = (uv_height - 1) as isize * uv_stride; - let mut p: [&mut [BD::Pixel]; 3] = [ - slice::from_raw_parts_mut( - (f.lf.p[0] as *mut BD::Pixel).offset(cmp::min(datay_diff, 0)), - datay_diff.unsigned_abs() + datay_width as usize, - ), - slice::from_raw_parts_mut( - (f.lf.p[1] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), - datauv_diff.unsigned_abs() + datauv_width as usize, - ), - slice::from_raw_parts_mut( - (f.lf.p[2] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), - datauv_diff.unsigned_abs() + datauv_width as usize, - ), - ]; - let p_offset: [usize; 2] = [ - (cmp::max(0, -datay_diff) + y as isize * datay_stride) as usize, - (cmp::max(0, -datauv_diff) + y as isize * datauv_stride >> ss_ver) as usize, - ]; + let p: [&mut [BD::Pixel]; 3] = [ + slice::from_raw_parts_mut( + (f.lf.p[0] as *mut BD::Pixel).offset(cmp::min(y_span, 0)), + y_span.unsigned_abs() + y_width as usize + RAV1D_PICTURE_ALIGNMENT, + ), + slice::from_raw_parts_mut( + (f.lf.p[1] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)), + uv_span.unsigned_abs() + uv_width as usize + RAV1D_PICTURE_ALIGNMENT, + ), + slice::from_raw_parts_mut( + (f.lf.p[2] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)), + uv_span.unsigned_abs() + uv_width as usize + RAV1D_PICTURE_ALIGNMENT, + ), + ]; + let p_offset: [usize; 2] = [ + (cmp::max(0, -y_span) + y as isize * y_stride) as usize, + (cmp::max(0, -uv_span) + y as isize * uv_stride >> ss_ver) as usize, + ]; + (p, p_offset) + }; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let mask_offset = (sby >> (seq_hdr.sb128 == 0) as c_int) * f.sb128w; rav1d_loopfilter_sbrow_cols::( @@ -4548,33 +4551,36 @@ pub(crate) unsafe fn rav1d_filter_sbrow_deblock_rows( let ss_ver = (f.cur.p.layout as c_uint == Rav1dPixelLayout::I420 as c_int as c_uint) as c_int; let ss_hor = (f.cur.p.layout as c_uint != Rav1dPixelLayout::I444 as c_int as c_uint) as c_int; - let datay_stride = BD::pxstride(f.cur.stride[0]); - let datay_width = f.cur.p.w + 127 & !127; - let datay_height = f.cur.p.h + 127 & !127; - let datay_diff = (datay_height - 1) as isize * datay_stride; - let datauv_stride = BD::pxstride(f.cur.stride[1]); - let datauv_width = datay_width >> ss_hor; - let datauv_height = datay_height >> ss_ver; - let datauv_diff = (datauv_height - 1) as isize * datauv_stride; + let (mut p, p_offset) = { + let y_stride = BD::pxstride((*f).cur.stride[0]); + let y_width = (*f).cur.p.w + 127 & !127; + let y_height = (*f).cur.p.h + 127 & !127; + let y_span = (y_height - 1) as isize * y_stride; + let uv_stride = BD::pxstride((*f).cur.stride[1]); + let uv_width = y_width >> ss_hor; + let uv_height = y_height >> ss_ver; + let uv_span = (uv_height - 1) as isize * uv_stride; - let mut p: [&mut [BD::Pixel]; 3] = [ - slice::from_raw_parts_mut( - (f.lf.p[0] as *mut BD::Pixel).offset(cmp::min(datay_diff, 0)), - datay_diff.unsigned_abs() + datay_width as usize + RAV1D_PICTURE_ALIGNMENT, - ), - slice::from_raw_parts_mut( - (f.lf.p[1] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), - datauv_diff.unsigned_abs() + datauv_width as usize + RAV1D_PICTURE_ALIGNMENT, - ), - slice::from_raw_parts_mut( - (f.lf.p[2] as *mut BD::Pixel).offset(cmp::min(datauv_diff, 0)), - datauv_diff.unsigned_abs() + datauv_width as usize + RAV1D_PICTURE_ALIGNMENT, - ), - ]; - let p_offset: [usize; 2] = [ - (cmp::max(0, -datay_diff) + y as isize * datay_stride) as usize, - (cmp::max(0, -datauv_diff) + y as isize * datauv_stride >> ss_ver) as usize, - ]; + let p: [&mut [BD::Pixel]; 3] = [ + slice::from_raw_parts_mut( + (f.lf.p[0] as *mut BD::Pixel).offset(cmp::min(y_span, 0)), + y_span.unsigned_abs() + y_width as usize + RAV1D_PICTURE_ALIGNMENT, + ), + slice::from_raw_parts_mut( + (f.lf.p[1] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)), + uv_span.unsigned_abs() + uv_width as usize + RAV1D_PICTURE_ALIGNMENT, + ), + slice::from_raw_parts_mut( + (f.lf.p[2] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)), + uv_span.unsigned_abs() + uv_width as usize + RAV1D_PICTURE_ALIGNMENT, + ), + ]; + let p_offset: [usize; 2] = [ + (cmp::max(0, -y_span) + y as isize * y_stride) as usize, + (cmp::max(0, -uv_span) + y as isize * uv_stride >> ss_ver) as usize, + ]; + (p, p_offset) + }; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let sb128 = seq_hdr.sb128; let cdef = seq_hdr.cdef; From 789c78fb9dabbee81aadf0ccee35f3620fa51ca0 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 6 Feb 2024 11:58:41 -0500 Subject: [PATCH 17/26] `lflvl`: use slice instead of raw pointer --- src/lf_apply.rs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 45643249b..89849c457 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -600,7 +600,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( sby: c_int, start_of_tile_row: c_int, ) { - let lflvl = f.lf.mask[lflvl_offset..].as_mut_ptr(); + let lflvl = &mut f.lf.mask[lflvl_offset..]; let mut have_left; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let is_sb64 = (seq_hdr.sb128 == 0) as c_int; @@ -629,8 +629,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let bx4: c_int = if x & is_sb64 != 0 { 16 } else { 0 }; let cbx4 = bx4 >> ss_hor; x >>= is_sb64; - let y_hmask: &mut [[u16; 2]; 3] = - &mut (*lflvl.offset(x as isize)).filter_y[0][bx4 as usize]; + let y_hmask: &mut [[u16; 2]; 3] = &mut lflvl[x as usize].filter_y[0][bx4 as usize]; for y in starty4..endy4 { let mask: u32 = 1 << y; let sidx = (mask >= 0x10000) as usize; @@ -643,8 +642,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( y_hmask[cmp::min(idx, lpf_y[(y - starty4) as usize] as usize)][sidx] |= smask; } if f.cur.p.layout != Rav1dPixelLayout::I400 { - let uv_hmask: &mut [[u16; 2]; 2] = - &mut (*lflvl.offset(x as isize)).filter_uv[0][cbx4 as usize]; + let uv_hmask: &mut [[u16; 2]; 2] = &mut lflvl[x as usize].filter_uv[0][cbx4 as usize]; for y in starty4 >> ss_ver..uv_endy4 { let uv_mask: u32 = 1 << y; let sidx = (uv_mask >= vmax) as usize; @@ -664,8 +662,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let mut a: &[BlockContext] = slice::from_raw_parts(f.a, f.a_sz as usize); a = &a[(f.sb128w * (start_of_tile_row - 1)) as usize..]; for x in 0..f.sb128w { - let y_vmask: &mut [[u16; 2]; 3] = - &mut (*lflvl.offset(x as isize)).filter_y[1][starty4 as usize]; + let y_vmask: &mut [[u16; 2]; 3] = &mut lflvl[x as usize].filter_y[1][starty4 as usize]; let w = cmp::min(32, f.w4 - (x << 5)) as u32; for i in 0..w { let mask: u32 = 1 << i; @@ -681,7 +678,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( if f.cur.p.layout != Rav1dPixelLayout::I400 { let cw: c_uint = w.wrapping_add(ss_hor as c_uint) >> ss_hor; let uv_vmask: &mut [[u16; 2]; 2] = - &mut (*lflvl.offset(x as isize)).filter_uv[1][(starty4 >> ss_ver) as usize]; + &mut lflvl[x as usize].filter_uv[1][(starty4 >> ss_ver) as usize]; for i in 0..cw { let uv_mask: u32 = 1 << i; let sidx = (uv_mask >= hmax) as usize; @@ -695,6 +692,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( a = &a[1..]; } } + let lflvl = &f.lf.mask[lflvl_offset..]; let mut level_ptr = &f.lf.level[(f.b4_stride * sby as isize * sbsz as isize) as usize..]; let mut offset = p_offset[0]; have_left = false; @@ -704,7 +702,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( have_left, level_ptr, f.b4_stride, - &(*lflvl.offset(x as isize)).filter_y[0], + &lflvl[x as usize].filter_y[0], p[0], offset, f.cur.stride[0], @@ -729,7 +727,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( have_left, level_ptr, f.b4_stride, - &(*lflvl.offset(x as isize)).filter_uv[0], + &lflvl[x as usize].filter_uv[0], pu[0], pv[0], uv_off, @@ -752,7 +750,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( lflvl_offset: usize, sby: c_int, ) { - let lflvl = f.lf.mask[lflvl_offset..].as_mut_ptr(); + let lflvl = &f.lf.mask[lflvl_offset..]; // Don't filter outside the frame let have_top = sby > 0; @@ -774,7 +772,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( have_top, level_ptr, f.b4_stride, - &(*lflvl.offset(x as isize)).filter_y[1], + &lflvl[x as usize].filter_y[1], slice.as_mut_ptr().offset(p_offset[0] as isize), f.cur.stride[0], cmp::min(32, f.w4 - x * 32), @@ -798,7 +796,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( have_top, level_ptr, f.b4_stride, - &(*lflvl.offset(x as isize)).filter_uv[1], + &lflvl[x as usize].filter_uv[1], p[1][uv_off as usize..].as_mut_ptr().add(p_offset[1]), p[2][uv_off as usize..].as_mut_ptr().add(p_offset[1]), f.cur.stride[1], From ad49d8d2210f4d19034ced891f84b4a66e424f0e Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 6 Feb 2024 17:39:14 -0500 Subject: [PATCH 18/26] `fn backup_lpf`: use slice instead of raw pointer for `dst` `dst_offset` is also added to enable accessing elements in the slice that precede the reference position that was pointed to by `dst` --- src/lf_apply.rs | 224 ++++++++++++++++++++++++++++-------------------- 1 file changed, 129 insertions(+), 95 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 89849c457..6261133c1 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -22,7 +22,8 @@ use std::slice; // stripe with the top of the next super block row. unsafe fn backup_lpf( c: &Rav1dContext, - mut dst: *mut BD::Pixel, + dst: &mut [BD::Pixel], + mut dst_offset: usize, dst_stride: ptrdiff_t, src: &[BD::Pixel], mut src_offset: usize, @@ -56,69 +57,59 @@ unsafe fn backup_lpf( let top = (4 as c_int) << sb128; // Copy the top part of the stored loop filtered pixels from the // previous sb row needed above the first stripe of this sb row. - BD::pixel_copy( - slice::from_raw_parts_mut( - &mut *dst.offset(BD::pxstride(dst_stride as usize * 0) as isize) - as *mut BD::Pixel, - dst_w as usize, - ), - slice::from_raw_parts( - &mut *dst.offset(BD::pxstride(dst_stride as usize * top as usize) as isize) - as *mut BD::Pixel, - dst_w as usize, - ), - dst_w as usize, - ); - BD::pixel_copy( - slice::from_raw_parts_mut( - &mut *dst.offset(BD::pxstride(dst_stride as usize * 1) as isize) - as *mut BD::Pixel, - dst_w as usize, - ), - slice::from_raw_parts( - &mut *dst - .offset(BD::pxstride(dst_stride as usize * (top + 1) as usize) as isize) - as *mut BD::Pixel, - dst_w as usize, - ), - dst_w as usize, - ); - BD::pixel_copy( - slice::from_raw_parts_mut( - &mut *dst.offset(BD::pxstride(dst_stride as usize * 2) as isize) - as *mut BD::Pixel, - dst_w as usize, - ), - slice::from_raw_parts( - &mut *dst - .offset(BD::pxstride(dst_stride as usize * (top + 2) as usize) as isize) - as *mut BD::Pixel, - dst_w as usize, - ), - dst_w as usize, - ); - BD::pixel_copy( - slice::from_raw_parts_mut( - &mut *dst.offset(BD::pxstride(dst_stride as usize * 3) as isize) - as *mut BD::Pixel, - dst_w as usize, - ), - slice::from_raw_parts( - &mut *dst - .offset(BD::pxstride(dst_stride as usize * (top + 3) as usize) as isize) - as *mut BD::Pixel, - dst_w as usize, - ), - dst_w as usize, - ); + if dst_stride < 0 { + let mut dst_offset_delta1 = 3 * BD::pxstride(-dst_stride as usize); + let dst_tmp = dst.split_at_mut(dst_offset - dst_offset_delta1); + let mut dst_offset_delta0 = + dst_offset - top as usize * BD::pxstride(-dst_stride as usize); + dst_tmp.1[dst_offset_delta1..][..dst_w as usize] + .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); + dst_offset_delta0 -= BD::pxstride(-dst_stride as usize); + dst_offset_delta1 -= BD::pxstride(-dst_stride as usize); + dst_tmp.1[dst_offset_delta1..][..dst_w as usize] + .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); + dst_offset_delta0 -= BD::pxstride(-dst_stride as usize); + dst_offset_delta1 -= BD::pxstride(-dst_stride as usize); + dst_tmp.1[dst_offset_delta1..][..dst_w as usize] + .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); + dst_offset_delta0 -= BD::pxstride(-dst_stride as usize); + dst_offset_delta1 -= BD::pxstride(-dst_stride as usize); + dst_tmp.1[dst_offset_delta1..][..dst_w as usize] + .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); + } else { + let mut dst_offset_delta = 0; + let dst_tmp = dst.split_at_mut( + (dst_offset as isize + + top as isize * BD::pxstride(dst_stride as usize) as isize) + as usize, + ); + dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] + .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); + dst_offset_delta = (dst_offset_delta as isize + + BD::pxstride(dst_stride as usize) as isize) + as usize; + dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] + .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); + dst_offset_delta = (dst_offset_delta as isize + + BD::pxstride(dst_stride as usize) as isize) + as usize; + dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] + .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); + dst_offset_delta = (dst_offset_delta as isize + + BD::pxstride(dst_stride as usize) as isize) + as usize; + dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] + .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); + } } - dst = dst.offset(4 * BD::pxstride(dst_stride as usize) as isize); + dst_offset = + (dst_offset as isize + 4 * BD::pxstride(dst_stride as usize) as isize) as usize; } if lr_backup != 0 && frame_hdr.size.width[0] != frame_hdr.size.width[1] { while row + stripe_h <= row_h { let n_lines = 4 - (row + stripe_h + 1 == h) as c_int; ((*dsp).mc.resize)( - dst.cast(), + dst.as_mut_ptr().add(dst_offset).cast(), dst_stride, src.as_ptr().add(src_offset).cast(), src_stride, @@ -134,38 +125,42 @@ unsafe fn backup_lpf( src_offset = (src_offset as isize + stripe_h as isize * BD::pxstride(src_stride as usize) as isize) as usize; - dst = dst.offset(n_lines as isize * BD::pxstride(dst_stride as usize) as isize); + dst_offset = (dst_offset as isize + + n_lines as isize * BD::pxstride(dst_stride as usize) as isize) + as usize; + if n_lines == 3 { - BD::pixel_copy( - slice::from_raw_parts_mut(dst, dst_w as usize), - slice::from_raw_parts( - &mut *dst.offset(-(BD::pxstride(dst_stride as usize) as isize)) - as *mut BD::Pixel, - dst_w as usize, - ), - dst_w as usize, - ); - dst = dst.offset(BD::pxstride(dst_stride as usize) as isize); + if dst_stride < 0 { + let dst_tmp = dst.split_at_mut(dst_offset + BD::pxstride(-dst_stride as usize)); + dst_tmp.0[dst_offset..][..dst_w as usize] + .copy_from_slice(&dst_tmp.1[..dst_w as usize]); + } else { + let dst_tmp = dst.split_at_mut(dst_offset); + dst_tmp.1[..dst_w as usize].copy_from_slice( + &dst_tmp.0[dst_offset - BD::pxstride(dst_stride as usize)..] + [..dst_w as usize], + ); + } + dst_offset = + (dst_offset as isize + BD::pxstride(dst_stride as usize) as isize) as usize; } } } else { while row + stripe_h <= row_h { let n_lines_0 = 4 - (row + stripe_h + 1 == h) as c_int; for i in 0..4 { + let dst_tmp = dst.split_at_mut(dst_offset); BD::pixel_copy( - slice::from_raw_parts_mut(dst, src_w as usize), + dst_tmp.1, if i == n_lines_0 { - slice::from_raw_parts( - &mut *dst.offset(-(BD::pxstride(dst_stride as usize) as isize)) - as *const BD::Pixel as *const BD::Pixel, - src_w as usize, - ) + &dst_tmp.0[dst_offset - BD::pxstride(dst_stride as usize)..] } else { &src[src_offset..] }, src_w as usize, ); - dst = dst.offset(BD::pxstride(dst_stride as usize) as isize); + dst_offset = + (dst_offset as isize + BD::pxstride(dst_stride as usize) as isize) as usize; src_offset = (src_offset as isize + BD::pxstride(src_stride as usize) as isize) as usize; } @@ -193,14 +188,32 @@ pub(crate) unsafe fn rav1d_copy_lpf( let lr_stride = &f.sr_cur.p.stride; let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let tt_off = have_tt * sby * ((4 as c_int) << seq_hdr.sb128); - let dst: [*mut BD::Pixel; 3] = [ - (f.lf.lr_lpf_line[0] as *mut BD::Pixel) - .offset(tt_off as isize * BD::pxstride(lr_stride[0] as usize) as isize), - (f.lf.lr_lpf_line[1] as *mut BD::Pixel) - .offset(tt_off as isize * BD::pxstride(lr_stride[1] as usize) as isize), - (f.lf.lr_lpf_line[2] as *mut BD::Pixel) - .offset(tt_off as isize * BD::pxstride(lr_stride[1] as usize) as isize), + + let lr_plane_sz = &f.lf.lr_buf_plane_sz; + let y_stride = BD::pxstride(lr_stride[0] as usize) as isize; + let uv_stride = BD::pxstride(lr_stride[1] as usize) as isize; + let y_span = lr_plane_sz[0] as isize - y_stride; + let uv_span = lr_plane_sz[1] as isize / 2 - uv_stride; + + let dst: [&mut [BD::Pixel]; 3] = [ + slice::from_raw_parts_mut( + (f.lf.lr_lpf_line[0] as *mut BD::Pixel).offset(cmp::min(y_span, 0)), + lr_plane_sz[0] as usize, + ), + slice::from_raw_parts_mut( + (f.lf.lr_lpf_line[1] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)), + lr_plane_sz[1] as usize / 2, + ), + slice::from_raw_parts_mut( + (f.lf.lr_lpf_line[2] as *mut BD::Pixel).offset(cmp::min(uv_span, 0)), + lr_plane_sz[1] as usize / 2, + ), ]; + let dst_offset: [usize; 2] = [ + (tt_off as isize * y_stride - cmp::min(y_span, 0)) as usize, + (tt_off as isize * uv_stride - cmp::min(uv_span, 0)) as usize, + ]; + let restore_planes = f.lf.restore_planes; let cdef_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.cdef_line_buf); @@ -214,6 +227,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( backup_lpf::( c, dst[0], + dst_offset[0], lr_stride[0], src[0], (src_offset[0] as isize @@ -238,12 +252,18 @@ pub(crate) unsafe fn rav1d_copy_lpf( if have_tt != 0 && resize != 0 { let cdef_off_y: ptrdiff_t = (sby * 4) as isize * BD::pxstride(src_stride[0] as usize) as isize; + let cdef_plane_y_sz = 4 * f.sbh as isize * y_stride; + let y_span = cdef_plane_y_sz - y_stride; backup_lpf::( c, - cdef_line_buf - .as_mut_ptr() - .add(f.lf.cdef_lpf_line[0]) - .offset(cdef_off_y), + slice::from_raw_parts_mut( + cdef_line_buf + .as_mut_ptr() + .add(f.lf.cdef_lpf_line[0]) + .offset(cmp::min(y_span, 0)), + cdef_plane_y_sz.unsigned_abs(), + ), + (cdef_off_y - cmp::min(y_span, 0)) as usize, src_stride[0], src[0], (src_offset[0] as isize @@ -283,6 +303,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( backup_lpf::( c, dst[1], + dst_offset[1], lr_stride[1], src[1], (src_offset[1] as isize @@ -305,12 +326,18 @@ pub(crate) unsafe fn rav1d_copy_lpf( ); } if have_tt != 0 && resize != 0 { + let cdef_plane_uv_sz = 4 * f.sbh as isize * uv_stride; + let uv_span = cdef_plane_uv_sz - uv_stride; backup_lpf::( c, - cdef_line_buf - .as_mut_ptr() - .add(f.lf.cdef_lpf_line[1]) - .offset(cdef_off_uv), + slice::from_raw_parts_mut( + cdef_line_buf + .as_mut_ptr() + .add(f.lf.cdef_lpf_line[1]) + .offset(cmp::min(uv_span, 0)), + cdef_plane_uv_sz.unsigned_abs(), + ), + (cdef_off_uv - cmp::min(uv_span, 0)) as usize, src_stride[1], src[1], (src_offset[1] as isize @@ -338,6 +365,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( backup_lpf::( c, dst[2], + dst_offset[1], lr_stride[1], src[2], (src_offset[1] as isize @@ -360,12 +388,18 @@ pub(crate) unsafe fn rav1d_copy_lpf( ); } if have_tt != 0 && resize != 0 { + let cdef_plane_uv_sz = 4 * f.sbh as isize * uv_stride; + let uv_span = cdef_plane_uv_sz - uv_stride; backup_lpf::( c, - cdef_line_buf - .as_mut_ptr() - .add(f.lf.cdef_lpf_line[2]) - .offset(cdef_off_uv), + slice::from_raw_parts_mut( + cdef_line_buf + .as_mut_ptr() + .add(f.lf.cdef_lpf_line[2]) + .offset(cmp::min(uv_span, 0)), + cdef_plane_uv_sz.unsigned_abs(), + ), + (cdef_off_uv - cmp::min(uv_span, 0)) as usize, src_stride[1], src[2], (src_offset[1] as isize From e4d296625629e366b4e642054be701014672fb93 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 6 Feb 2024 18:02:13 -0500 Subject: [PATCH 19/26] `fn filter_plane_rows_y/uv`: use slice instead of raw pointer Use slice for pixel data. Add an offset parameter to indicate reference index within slice. --- src/lf_apply.rs | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 6261133c1..679341c2d 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -491,7 +491,8 @@ unsafe fn filter_plane_rows_y( lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: &[[[u16; 2]; 3]; 32], - mut dst: *mut BD::Pixel, + dst: &mut [BD::Pixel], + mut dst_offset: usize, ls: ptrdiff_t, w: c_int, starty4: c_int, @@ -507,7 +508,7 @@ unsafe fn filter_plane_rows_y( 0, ]; (*dsp).lf.loop_filter_sb[0][1]( - dst.cast(), + dst.as_mut_ptr().add(dst_offset).cast(), ls, vmask.as_ptr(), unaligned_lvl_slice(&lvl[0..], 1).as_ptr(), @@ -517,7 +518,7 @@ unsafe fn filter_plane_rows_y( f.bitdepth_max, ); } - dst = dst.offset(4 * BD::pxstride(ls as usize) as isize); + dst_offset = (dst_offset as isize + 4 * BD::pxstride(ls as usize) as isize) as usize; } } @@ -584,8 +585,9 @@ unsafe fn filter_plane_rows_uv( lvl: &[[u8; 4]], b4_stride: ptrdiff_t, mask: &[[[u16; 2]; 2]; 32], - u: *mut BD::Pixel, - v: *mut BD::Pixel, + u: &mut [BD::Pixel], + v: &mut [BD::Pixel], + uv_offset: usize, ls: ptrdiff_t, w: c_int, starty4: c_int, @@ -593,7 +595,7 @@ unsafe fn filter_plane_rows_uv( ss_hor: c_int, ) { let dsp: &Rav1dDSPContext = &*f.dsp; - let mut off_l: ptrdiff_t = 0; + let mut off_l = uv_offset as ptrdiff_t; for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) { if !(!have_top && y == 0) { let vmask: [u32; 3] = [ @@ -602,7 +604,7 @@ unsafe fn filter_plane_rows_uv( 0, ]; (*dsp).lf.loop_filter_sb[1][1]( - u.offset(off_l as isize).cast(), + u.as_mut_ptr().offset(off_l).cast(), ls, vmask.as_ptr(), unaligned_lvl_slice(&lvl[0..], 2).as_ptr(), @@ -612,7 +614,7 @@ unsafe fn filter_plane_rows_uv( f.bitdepth_max, ); (*dsp).lf.loop_filter_sb[1][1]( - v.offset(off_l as isize).cast(), + v.as_mut_ptr().offset(off_l).cast(), ls, vmask.as_ptr(), unaligned_lvl_slice(&lvl[0..], 3).as_ptr(), @@ -797,9 +799,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let endy4: c_uint = (starty4 + cmp::min(f.h4 - sby * sbsz, sbsz)) as c_uint; let uv_endy4: c_uint = endy4.wrapping_add(ss_ver as c_uint) >> ss_ver; - let mut slice: &mut [BD::Pixel]; let mut level_ptr = &f.lf.level[(f.b4_stride * sby as isize * sbsz as isize) as usize..]; - slice = p[0]; for x in 0..f.sb128w { filter_plane_rows_y::( f, @@ -807,13 +807,13 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( level_ptr, f.b4_stride, &lflvl[x as usize].filter_y[1], - slice.as_mut_ptr().offset(p_offset[0] as isize), + p[0], + p_offset[0] + 128 * x as usize, f.cur.stride[0], cmp::min(32, f.w4 - x * 32), starty4, endy4 as c_int, ); - slice = &mut slice[128..]; level_ptr = &level_ptr[32..]; } @@ -822,8 +822,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( return; } - let mut uv_off: ptrdiff_t = 0; + let mut uv_off: usize = 0; let mut level_ptr = &f.lf.level[(f.b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; + let uv = p[1..].split_at_mut(1); for x in 0..f.sb128w { filter_plane_rows_uv::( f, @@ -831,8 +832,9 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( level_ptr, f.b4_stride, &lflvl[x as usize].filter_uv[1], - p[1][uv_off as usize..].as_mut_ptr().add(p_offset[1]), - p[2][uv_off as usize..].as_mut_ptr().add(p_offset[1]), + uv.0[0], + uv.1[0], + p_offset[1] + uv_off, f.cur.stride[1], cmp::min(32 as c_int, f.w4 - x * 32) + ss_hor >> ss_hor, starty4 >> ss_ver, From f54cb43586fd2ee01f47c583fbbbd18620747d7f Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Mon, 26 Feb 2024 08:44:40 -0500 Subject: [PATCH 20/26] Address comments from review --- src/lf_apply.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 679341c2d..574ae42e3 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -50,8 +50,9 @@ unsafe fn backup_lpf( }; // The first stripe of the frame is shorter by 8 luma pixel rows. let mut stripe_h = ((64 as c_int) << (cdef_backup & sb128)) - 8 * (row == 0) as c_int >> ss_ver; - src_offset = src_offset - .wrapping_add_signed((stripe_h - 2) as isize * BD::pxstride(src_stride as usize) as isize); + src_offset = (src_offset as isize + + (stripe_h - 2) as isize * BD::pxstride(src_stride as usize) as isize) + as usize; if c.tc.len() == 1 { if row != 0 { let top = (4 as c_int) << sb128; @@ -754,7 +755,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( return; } let mut level_ptr = &f.lf.level[(f.b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; - let (pu, pv) = p[1..].split_at_mut(1); + let [_, pu, pv] = p; let mut uv_off = p_offset[1]; have_left = false; for x in 0..f.sb128w { @@ -764,8 +765,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( level_ptr, f.b4_stride, &lflvl[x as usize].filter_uv[0], - pu[0], - pv[0], + pu, + pv, uv_off, f.cur.stride[1], cmp::min(32, f.w4 - x * 32) + ss_hor >> ss_hor, @@ -824,7 +825,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( let mut uv_off: usize = 0; let mut level_ptr = &f.lf.level[(f.b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; - let uv = p[1..].split_at_mut(1); + let [_, pu, pv] = p; for x in 0..f.sb128w { filter_plane_rows_uv::( f, @@ -832,8 +833,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_rows( level_ptr, f.b4_stride, &lflvl[x as usize].filter_uv[1], - uv.0[0], - uv.1[0], + pu, + pv, p_offset[1] + uv_off, f.cur.stride[1], cmp::min(32 as c_int, f.w4 - x * 32) + ss_hor >> ss_hor, From 060574de0da84673918c87fd80c571a20092f878 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Mon, 26 Feb 2024 08:52:11 -0500 Subject: [PATCH 21/26] Copy comments from C code --- src/lf_apply.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 574ae42e3..da9f019a6 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -215,6 +215,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( (tt_off as isize * uv_stride - cmp::min(uv_span, 0)) as usize, ]; + // TODO Also check block level restore type to reduce copying. let restore_planes = f.lf.restore_planes; let cdef_line_buf = BD::cast_pixel_slice_mut(&mut f.lf.cdef_line_buf); @@ -453,6 +454,8 @@ unsafe fn filter_plane_cols_y( endy4: c_int, ) { let dsp: &Rav1dDSPContext = &*f.dsp; + + // filter edges between columns (e.g. block1 | block2) for x in 0..w as usize { if !(!have_left && x == 0) { let mut hmask: [u32; 4] = [0; 4]; @@ -500,6 +503,10 @@ unsafe fn filter_plane_rows_y( endy4: c_int, ) { let dsp: &Rav1dDSPContext = &*f.dsp; + + // block1 + // filter edges between rows (e.g. ------) + // block2 for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) { if !(!have_top && y == 0) { let vmask: [u32; 4] = [ @@ -540,6 +547,8 @@ unsafe fn filter_plane_cols_uv( ss_ver: c_int, ) { let dsp: &Rav1dDSPContext = &*f.dsp; + + // filter edges between columns (e.g. block1 | block2) for x in 0..w as usize { if !(!have_left && x == 0) { let mut hmask: [u32; 3] = [0; 3]; @@ -597,6 +606,10 @@ unsafe fn filter_plane_rows_uv( ) { let dsp: &Rav1dDSPContext = &*f.dsp; let mut off_l = uv_offset as ptrdiff_t; + + // block1 + // filter edges between rows (e.g. ------) + // block2 for (y, lvl) in (starty4..endy4).zip(lvl.chunks(b4_stride as usize)) { if !(!have_top && y == 0) { let vmask: [u32; 3] = [ @@ -638,7 +651,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( start_of_tile_row: c_int, ) { let lflvl = &mut f.lf.mask[lflvl_offset..]; - let mut have_left; + let mut have_left; // Don't filter outside the frame let seq_hdr = &***f.seq_hdr.as_ref().unwrap(); let is_sb64 = (seq_hdr.sb128 == 0) as c_int; let starty4 = ((sby & is_sb64) as u32) << 4; @@ -657,6 +670,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( let mut lpf_y = &lpf_y[(sby << sbl2) as usize..]; let mut lpf_uv = &lpf_uv[(sby << sbl2 - ss_ver) as usize..]; let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); + + // fix lpf strength at tile col boundaries let mut tile_col = 1; loop { let mut x = frame_hdr.tiling.col_start_sb[tile_col as usize] as c_int; @@ -695,6 +710,8 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( lpf_uv = &lpf_uv[(halign >> ss_ver)..]; tile_col += 1; } + + // fix lpf strength at tile row boundaries if start_of_tile_row != 0 { let mut a: &[BlockContext] = slice::from_raw_parts(f.a, f.a_sz as usize); a = &a[(f.sb128w * (start_of_tile_row - 1)) as usize..]; From 793a00f5259495d80ecde8c1ef59b7cae23c9dca Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Wed, 28 Feb 2024 16:50:49 -0500 Subject: [PATCH 22/26] Avoid unnecessary type casting when invoking `BD::pxstride` --- src/lf_apply.rs | 100 ++++++++++++++++++------------------------------ 1 file changed, 38 insertions(+), 62 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index da9f019a6..80fc34efd 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -50,61 +50,51 @@ unsafe fn backup_lpf( }; // The first stripe of the frame is shorter by 8 luma pixel rows. let mut stripe_h = ((64 as c_int) << (cdef_backup & sb128)) - 8 * (row == 0) as c_int >> ss_ver; - src_offset = (src_offset as isize - + (stripe_h - 2) as isize * BD::pxstride(src_stride as usize) as isize) - as usize; + src_offset = + (src_offset as isize + (stripe_h - 2) as isize * BD::pxstride(src_stride)) as usize; if c.tc.len() == 1 { if row != 0 { let top = (4 as c_int) << sb128; // Copy the top part of the stored loop filtered pixels from the // previous sb row needed above the first stripe of this sb row. if dst_stride < 0 { - let mut dst_offset_delta1 = 3 * BD::pxstride(-dst_stride as usize); + let mut dst_offset_delta1 = 3 * BD::pxstride(-dst_stride) as usize; let dst_tmp = dst.split_at_mut(dst_offset - dst_offset_delta1); let mut dst_offset_delta0 = - dst_offset - top as usize * BD::pxstride(-dst_stride as usize); + dst_offset - top as usize * BD::pxstride(-dst_stride) as usize; dst_tmp.1[dst_offset_delta1..][..dst_w as usize] .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); - dst_offset_delta0 -= BD::pxstride(-dst_stride as usize); - dst_offset_delta1 -= BD::pxstride(-dst_stride as usize); + dst_offset_delta0 -= BD::pxstride(-dst_stride) as usize; + dst_offset_delta1 -= BD::pxstride(-dst_stride) as usize; dst_tmp.1[dst_offset_delta1..][..dst_w as usize] .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); - dst_offset_delta0 -= BD::pxstride(-dst_stride as usize); - dst_offset_delta1 -= BD::pxstride(-dst_stride as usize); + dst_offset_delta0 -= BD::pxstride(-dst_stride) as usize; + dst_offset_delta1 -= BD::pxstride(-dst_stride) as usize; dst_tmp.1[dst_offset_delta1..][..dst_w as usize] .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); - dst_offset_delta0 -= BD::pxstride(-dst_stride as usize); - dst_offset_delta1 -= BD::pxstride(-dst_stride as usize); + dst_offset_delta0 -= BD::pxstride(-dst_stride) as usize; + dst_offset_delta1 -= BD::pxstride(-dst_stride) as usize; dst_tmp.1[dst_offset_delta1..][..dst_w as usize] .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); } else { let mut dst_offset_delta = 0; let dst_tmp = dst.split_at_mut( - (dst_offset as isize - + top as isize * BD::pxstride(dst_stride as usize) as isize) - as usize, + (dst_offset as isize + top as isize * BD::pxstride(dst_stride)) as usize, ); dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); - dst_offset_delta = (dst_offset_delta as isize - + BD::pxstride(dst_stride as usize) as isize) - as usize; + dst_offset_delta = (dst_offset_delta as isize + BD::pxstride(dst_stride)) as usize; dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); - dst_offset_delta = (dst_offset_delta as isize - + BD::pxstride(dst_stride as usize) as isize) - as usize; + dst_offset_delta = (dst_offset_delta as isize + BD::pxstride(dst_stride)) as usize; dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); - dst_offset_delta = (dst_offset_delta as isize - + BD::pxstride(dst_stride as usize) as isize) - as usize; + dst_offset_delta = (dst_offset_delta as isize + BD::pxstride(dst_stride)) as usize; dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); } } - dst_offset = - (dst_offset as isize + 4 * BD::pxstride(dst_stride as usize) as isize) as usize; + dst_offset = (dst_offset as isize + 4 * BD::pxstride(dst_stride)) as usize; } if lr_backup != 0 && frame_hdr.size.width[0] != frame_hdr.size.width[1] { while row + stripe_h <= row_h { @@ -123,27 +113,24 @@ unsafe fn backup_lpf( ); row += stripe_h; // unmodified stripe_h for the 1st stripe stripe_h = 64 >> ss_ver; - src_offset = (src_offset as isize - + stripe_h as isize * BD::pxstride(src_stride as usize) as isize) - as usize; - dst_offset = (dst_offset as isize - + n_lines as isize * BD::pxstride(dst_stride as usize) as isize) - as usize; + src_offset = + (src_offset as isize + stripe_h as isize * BD::pxstride(src_stride)) as usize; + dst_offset = + (dst_offset as isize + n_lines as isize * BD::pxstride(dst_stride)) as usize; if n_lines == 3 { if dst_stride < 0 { - let dst_tmp = dst.split_at_mut(dst_offset + BD::pxstride(-dst_stride as usize)); + let dst_tmp = dst.split_at_mut(dst_offset + BD::pxstride(-dst_stride) as usize); dst_tmp.0[dst_offset..][..dst_w as usize] .copy_from_slice(&dst_tmp.1[..dst_w as usize]); } else { let dst_tmp = dst.split_at_mut(dst_offset); dst_tmp.1[..dst_w as usize].copy_from_slice( - &dst_tmp.0[dst_offset - BD::pxstride(dst_stride as usize)..] + &dst_tmp.0[dst_offset - BD::pxstride(dst_stride) as usize..] [..dst_w as usize], ); } - dst_offset = - (dst_offset as isize + BD::pxstride(dst_stride as usize) as isize) as usize; + dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize; } } } else { @@ -154,22 +141,19 @@ unsafe fn backup_lpf( BD::pixel_copy( dst_tmp.1, if i == n_lines_0 { - &dst_tmp.0[dst_offset - BD::pxstride(dst_stride as usize)..] + &dst_tmp.0[dst_offset - BD::pxstride(dst_stride) as usize..] } else { &src[src_offset..] }, src_w as usize, ); - dst_offset = - (dst_offset as isize + BD::pxstride(dst_stride as usize) as isize) as usize; - src_offset = - (src_offset as isize + BD::pxstride(src_stride as usize) as isize) as usize; + dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize; + src_offset = (src_offset as isize + BD::pxstride(src_stride)) as usize; } row += stripe_h; // unmodified stripe_h for the 1st stripe stripe_h = 64 >> ss_ver; - src_offset = src_offset.wrapping_add_signed( - (stripe_h - 4) as isize * BD::pxstride(src_stride as usize) as isize, - ); + src_offset = + src_offset.wrapping_add_signed((stripe_h - 4) as isize * BD::pxstride(src_stride)); } }; } @@ -191,8 +175,8 @@ pub(crate) unsafe fn rav1d_copy_lpf( let tt_off = have_tt * sby * ((4 as c_int) << seq_hdr.sb128); let lr_plane_sz = &f.lf.lr_buf_plane_sz; - let y_stride = BD::pxstride(lr_stride[0] as usize) as isize; - let uv_stride = BD::pxstride(lr_stride[1] as usize) as isize; + let y_stride = BD::pxstride(lr_stride[0]); + let uv_stride = BD::pxstride(lr_stride[1]); let y_span = lr_plane_sz[0] as isize - y_stride; let uv_span = lr_plane_sz[1] as isize / 2 - uv_stride; @@ -232,9 +216,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( dst_offset[0], lr_stride[0], src[0], - (src_offset[0] as isize - - offset as isize * BD::pxstride(src_stride[0] as usize) as isize) - as usize, + (src_offset[0] as isize - offset as isize * BD::pxstride(src_stride[0])) as usize, src_stride[0], 0, seq_hdr.sb128, @@ -252,8 +234,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( ); } if have_tt != 0 && resize != 0 { - let cdef_off_y: ptrdiff_t = - (sby * 4) as isize * BD::pxstride(src_stride[0] as usize) as isize; + let cdef_off_y: ptrdiff_t = (sby * 4) as isize * BD::pxstride(src_stride[0]); let cdef_plane_y_sz = 4 * f.sbh as isize * y_stride; let y_span = cdef_plane_y_sz - y_stride; backup_lpf::( @@ -298,8 +279,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( let row_h_0 = cmp::min((sby + 1) << 6 - ss_ver + seq_hdr.sb128, h_0 - 1); let offset_uv = offset >> ss_ver; let y_stripe_0 = (sby << 6 - ss_ver + seq_hdr.sb128) - offset_uv; - let cdef_off_uv: ptrdiff_t = - sby as isize * 4 * BD::pxstride(src_stride[1] as usize) as isize; + let cdef_off_uv: ptrdiff_t = sby as isize * 4 * BD::pxstride(src_stride[1]); if seq_hdr.cdef != 0 || restore_planes & LR_RESTORE_U as c_int != 0 { if restore_planes & LR_RESTORE_U as c_int != 0 || resize == 0 { backup_lpf::( @@ -308,8 +288,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( dst_offset[1], lr_stride[1], src[1], - (src_offset[1] as isize - - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize) + (src_offset[1] as isize - offset_uv as isize * BD::pxstride(src_stride[1])) as usize, src_stride[1], ss_ver, @@ -342,8 +321,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( (cdef_off_uv - cmp::min(uv_span, 0)) as usize, src_stride[1], src[1], - (src_offset[1] as isize - - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize) + (src_offset[1] as isize - offset_uv as isize * BD::pxstride(src_stride[1])) as usize, src_stride[1], ss_ver, @@ -370,8 +348,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( dst_offset[1], lr_stride[1], src[2], - (src_offset[1] as isize - - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize) + (src_offset[1] as isize - offset_uv as isize * BD::pxstride(src_stride[1])) as usize, src_stride[1], ss_ver, @@ -404,8 +381,7 @@ pub(crate) unsafe fn rav1d_copy_lpf( (cdef_off_uv - cmp::min(uv_span, 0)) as usize, src_stride[1], src[2], - (src_offset[1] as isize - - offset_uv as isize * BD::pxstride(src_stride[1] as usize) as isize) + (src_offset[1] as isize - offset_uv as isize * BD::pxstride(src_stride[1])) as usize, src_stride[1], ss_ver, @@ -526,7 +502,7 @@ unsafe fn filter_plane_rows_y( f.bitdepth_max, ); } - dst_offset = (dst_offset as isize + 4 * BD::pxstride(ls as usize) as isize) as usize; + dst_offset = (dst_offset as isize + 4 * BD::pxstride(ls)) as usize; } } @@ -638,7 +614,7 @@ unsafe fn filter_plane_rows_uv( f.bitdepth_max, ); } - off_l += 4 * BD::pxstride(ls as usize) as isize; + off_l += 4 * BD::pxstride(ls); } } From 47fb8fa176002044bcafc288a292266ca669c365 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Mon, 4 Mar 2024 07:55:36 -0500 Subject: [PATCH 23/26] Reduce number of casts for variable `top` --- src/lf_apply.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 80fc34efd..0b2f95d03 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -54,14 +54,13 @@ unsafe fn backup_lpf( (src_offset as isize + (stripe_h - 2) as isize * BD::pxstride(src_stride)) as usize; if c.tc.len() == 1 { if row != 0 { - let top = (4 as c_int) << sb128; + let top = 4 << sb128; // Copy the top part of the stored loop filtered pixels from the // previous sb row needed above the first stripe of this sb row. if dst_stride < 0 { let mut dst_offset_delta1 = 3 * BD::pxstride(-dst_stride) as usize; let dst_tmp = dst.split_at_mut(dst_offset - dst_offset_delta1); - let mut dst_offset_delta0 = - dst_offset - top as usize * BD::pxstride(-dst_stride) as usize; + let mut dst_offset_delta0 = dst_offset - (top * BD::pxstride(-dst_stride)) as usize; dst_tmp.1[dst_offset_delta1..][..dst_w as usize] .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); dst_offset_delta0 -= BD::pxstride(-dst_stride) as usize; @@ -79,7 +78,7 @@ unsafe fn backup_lpf( } else { let mut dst_offset_delta = 0; let dst_tmp = dst.split_at_mut( - (dst_offset as isize + top as isize * BD::pxstride(dst_stride)) as usize, + (dst_offset as isize + (top * BD::pxstride(dst_stride))) as usize, ); dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); From d63937783b57b715dd4eb311be661caa322ffd89 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Mon, 4 Mar 2024 08:52:05 -0500 Subject: [PATCH 24/26] `fn backup_lpf`: clean up pixel copy --- src/lf_apply.rs | 47 +++++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 0b2f95d03..213b40b50 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -55,42 +55,25 @@ unsafe fn backup_lpf( if c.tc.len() == 1 { if row != 0 { let top = 4 << sb128; + let px_abs_stride = BD::pxstride(dst_stride.unsigned_abs()); + let top_size = top * px_abs_stride; // Copy the top part of the stored loop filtered pixels from the // previous sb row needed above the first stripe of this sb row. - if dst_stride < 0 { - let mut dst_offset_delta1 = 3 * BD::pxstride(-dst_stride) as usize; - let dst_tmp = dst.split_at_mut(dst_offset - dst_offset_delta1); - let mut dst_offset_delta0 = dst_offset - (top * BD::pxstride(-dst_stride)) as usize; - dst_tmp.1[dst_offset_delta1..][..dst_w as usize] - .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); - dst_offset_delta0 -= BD::pxstride(-dst_stride) as usize; - dst_offset_delta1 -= BD::pxstride(-dst_stride) as usize; - dst_tmp.1[dst_offset_delta1..][..dst_w as usize] - .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); - dst_offset_delta0 -= BD::pxstride(-dst_stride) as usize; - dst_offset_delta1 -= BD::pxstride(-dst_stride) as usize; - dst_tmp.1[dst_offset_delta1..][..dst_w as usize] - .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); - dst_offset_delta0 -= BD::pxstride(-dst_stride) as usize; - dst_offset_delta1 -= BD::pxstride(-dst_stride) as usize; - dst_tmp.1[dst_offset_delta1..][..dst_w as usize] - .copy_from_slice(&dst_tmp.0[dst_offset_delta0..][..dst_w as usize]); + let (dst, dst_top) = if dst_stride < 0 { + let dst = &mut dst[dst_offset - top_size - 3 * px_abs_stride..]; + let (dst_top, dst) = dst.split_at_mut(top_size); + (dst, dst_top) } else { - let mut dst_offset_delta = 0; - let dst_tmp = dst.split_at_mut( - (dst_offset as isize + (top * BD::pxstride(dst_stride))) as usize, + let dst = &mut dst[dst_offset..]; + dst.split_at_mut(top_size) + }; + + for i in 0..4 { + BD::pixel_copy( + &mut dst[i * px_abs_stride..], + &dst_top[i * px_abs_stride..], + dst_w as usize, ); - dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] - .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); - dst_offset_delta = (dst_offset_delta as isize + BD::pxstride(dst_stride)) as usize; - dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] - .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); - dst_offset_delta = (dst_offset_delta as isize + BD::pxstride(dst_stride)) as usize; - dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] - .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); - dst_offset_delta = (dst_offset_delta as isize + BD::pxstride(dst_stride)) as usize; - dst_tmp.0[dst_offset + dst_offset_delta..][..dst_w as usize] - .copy_from_slice(&dst_tmp.1[dst_offset_delta..][..dst_w as usize]); } } dst_offset = (dst_offset as isize + 4 * BD::pxstride(dst_stride)) as usize; From c509c302f05a9f3e5eba2d9c246543c400017359 Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 5 Mar 2024 07:38:52 -0500 Subject: [PATCH 25/26] Remove explicit dereference of `dsp` --- src/lf_apply.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index 213b40b50..b478c73c5 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -432,7 +432,7 @@ unsafe fn filter_plane_cols_y( hmask[2] = mask[x][2][1] as u32; } // hmask[3] = 0; already initialized above - (*dsp).lf.loop_filter_sb[0][0]( + dsp.lf.loop_filter_sb[0][0]( dst.as_mut_ptr().add(dst_offset + x * 4).cast(), ls, hmask.as_mut_ptr(), @@ -473,7 +473,7 @@ unsafe fn filter_plane_rows_y( mask[y as usize][2][0] as u32 | (mask[y as usize][2][1] as u32) << 16, 0, ]; - (*dsp).lf.loop_filter_sb[0][1]( + dsp.lf.loop_filter_sb[0][1]( dst.as_mut_ptr().add(dst_offset).cast(), ls, vmask.as_ptr(), @@ -522,7 +522,7 @@ unsafe fn filter_plane_cols_uv( hmask[1] = mask[x as usize][1][1] as u32; } // hmask[2] = 0; Already initialized to 0 above - (*dsp).lf.loop_filter_sb[1][0]( + dsp.lf.loop_filter_sb[1][0]( u.as_mut_ptr().add(uv_offset + x * 4).cast(), ls, hmask.as_mut_ptr(), @@ -532,7 +532,7 @@ unsafe fn filter_plane_cols_uv( endy4 - starty4, f.bitdepth_max, ); - (*dsp).lf.loop_filter_sb[1][0]( + dsp.lf.loop_filter_sb[1][0]( v.as_mut_ptr().add(uv_offset + x * 4).cast(), ls, hmask.as_mut_ptr(), @@ -575,7 +575,7 @@ unsafe fn filter_plane_rows_uv( mask[y as usize][1][0] as u32 | (mask[y as usize][1][1] as u32) << (16 >> ss_hor), 0, ]; - (*dsp).lf.loop_filter_sb[1][1]( + dsp.lf.loop_filter_sb[1][1]( u.as_mut_ptr().offset(off_l).cast(), ls, vmask.as_ptr(), @@ -585,7 +585,7 @@ unsafe fn filter_plane_rows_uv( w, f.bitdepth_max, ); - (*dsp).lf.loop_filter_sb[1][1]( + dsp.lf.loop_filter_sb[1][1]( v.as_mut_ptr().offset(off_l).cast(), ls, vmask.as_ptr(), From 5a24dbd82dd05b7677083d3bfb7705749c9463da Mon Sep 17 00:00:00 2001 From: Frank Bossen Date: Tue, 5 Mar 2024 08:17:18 -0500 Subject: [PATCH 26/26] Clean up use of `split_at_mut` and pixel copy --- src/lf_apply.rs | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/src/lf_apply.rs b/src/lf_apply.rs index b478c73c5..d621c70ce 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -101,34 +101,33 @@ unsafe fn backup_lpf( (dst_offset as isize + n_lines as isize * BD::pxstride(dst_stride)) as usize; if n_lines == 3 { - if dst_stride < 0 { - let dst_tmp = dst.split_at_mut(dst_offset + BD::pxstride(-dst_stride) as usize); - dst_tmp.0[dst_offset..][..dst_w as usize] - .copy_from_slice(&dst_tmp.1[..dst_w as usize]); + let dst_abs_px_stride = BD::pxstride(dst_stride.unsigned_abs()); + let (src_tmp, dst_tmp) = if dst_stride < 0 { + let (dst_tmp, src_tmp) = dst[dst_offset..].split_at_mut(dst_abs_px_stride); + (src_tmp, dst_tmp) } else { - let dst_tmp = dst.split_at_mut(dst_offset); - dst_tmp.1[..dst_w as usize].copy_from_slice( - &dst_tmp.0[dst_offset - BD::pxstride(dst_stride) as usize..] - [..dst_w as usize], - ); - } + dst[dst_offset - dst_abs_px_stride..].split_at_mut(dst_abs_px_stride) + }; + BD::pixel_copy(dst_tmp, src_tmp, dst_w as usize); dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize; } } } else { while row + stripe_h <= row_h { - let n_lines_0 = 4 - (row + stripe_h + 1 == h) as c_int; + let n_lines = 4 - (row + stripe_h + 1 == h) as c_int; for i in 0..4 { - let dst_tmp = dst.split_at_mut(dst_offset); - BD::pixel_copy( - dst_tmp.1, - if i == n_lines_0 { - &dst_tmp.0[dst_offset - BD::pxstride(dst_stride) as usize..] - } else { - &src[src_offset..] - }, - src_w as usize, - ); + let dst_abs_px_stride = BD::pxstride(dst_stride.unsigned_abs()); + let (src_tmp, dst_tmp) = if i != n_lines { + (&src[src_offset..], &mut dst[dst_offset..]) + } else if dst_stride < 0 { + let (dst_tmp, src_tmp) = dst[dst_offset..].split_at_mut(dst_abs_px_stride); + (&*src_tmp, dst_tmp) + } else { + let (src_tmp, dst_tmp) = + dst[dst_offset - dst_abs_px_stride..].split_at_mut(dst_abs_px_stride); + (&*src_tmp, dst_tmp) + }; + BD::pixel_copy(dst_tmp, src_tmp, src_w as usize); dst_offset = (dst_offset as isize + BD::pxstride(dst_stride)) as usize; src_offset = (src_offset as isize + BD::pxstride(src_stride)) as usize; }