Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 14 additions & 24 deletions polyval/src/field_element.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,28 +37,38 @@ cfg_if! {
// aarch64
mod autodetect;
mod armv8;
pub(crate) use autodetect::{InitToken, detect_intrinsics};
pub(crate) use autodetect::{InitToken, has_intrinsics};
} else if #[cfg(all(
any(target_arch = "x86_64", target_arch = "x86"),
not(polyval_backend = "soft")
))] {
// x86/x86-64
mod autodetect;
mod x86;
pub(crate) use autodetect::{InitToken, detect_intrinsics};
pub(crate) use autodetect::{InitToken, has_intrinsics};
} else {
// Pure Rust fallback implementation for other targets
// "soft" fallback implementation for other targets written in pure Rust
use universal_hash::array::{Array, ArraySize};

pub(crate) type InitToken = ();
pub(crate) fn detect_intrinsics() -> (InitToken, bool) {
pub(crate) fn has_intrinsics() -> (InitToken, bool) {
((), false)
}

impl FieldElement {
/// Default degree of parallelism, i.e. how many powers of `H` to compute.
pub const DEFAULT_PARALLELISM: usize = 8;

/// Stub implementation that works with `Polyval::h` even though we don't support
/// `proc_par_blocks`.
#[inline]
pub(crate) fn powers_of_h<const N: usize>(
self,
_has_intrinsics: InitToken
) -> [Self; N] {
soft::powers_of_h(self)
}

/// Process an individual block.
pub(crate) fn proc_block(
h: FieldElement,
Expand All @@ -83,26 +93,6 @@ cfg_if! {
}
}

impl FieldElement {
/// Compute the first N powers of h, in reverse order.
#[inline]
#[allow(dead_code)] // We may not use this in some configurations
pub(crate) fn powers_of_h<const N: usize>(self) -> [Self; N] {
// TODO: improve pipelining by using more square operations?
let mut pow = [Self::default(); N];
let mut prev = self;

for (i, v) in pow.iter_mut().rev().enumerate() {
*v = self;
if i > 0 {
*v *= prev;
}
prev = *v;
}
pow
}
}

impl Debug for FieldElement {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "FieldElement(")?;
Expand Down
27 changes: 24 additions & 3 deletions polyval/src/field_element/autodetect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,37 @@ use crate::Block;
use universal_hash::array::{Array, ArraySize};

#[cfg(target_arch = "aarch64")]
cpufeatures::new!(mul_intrinsics, "aes"); // `aes` implies PMULL
cpufeatures::new!(detect_intrinsics, "aes"); // `aes` implies PMULL
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
cpufeatures::new!(mul_intrinsics, "pclmulqdq");
cpufeatures::new!(detect_intrinsics, "pclmulqdq");

pub(crate) use mul_intrinsics::{InitToken, init_get as detect_intrinsics};
pub(crate) use detect_intrinsics::{InitToken, init_get as has_intrinsics};

impl FieldElement {
/// Default degree of parallelism, i.e. how many powers of `H` to compute.
pub const DEFAULT_PARALLELISM: usize = 8;

/// Compute the first N powers of h, in reverse order.
#[inline]
pub(crate) fn powers_of_h<const N: usize>(self, has_intrinsics: InitToken) -> [Self; N] {
if has_intrinsics.get() {
// TODO: improve pipelining by using more square operations?
let mut pow = [Self::default(); N];
let mut prev = self;

for (i, v) in pow.iter_mut().rev().enumerate() {
*v = self;
if i > 0 {
*v = unsafe { intrinsics::polymul((*v).into(), prev.into()) }.into();
}
prev = *v;
}
pow
} else {
soft::powers_of_h(self)
}
}

/// Process an individual block.
pub(crate) fn proc_block(
h: FieldElement,
Expand Down
11 changes: 11 additions & 0 deletions polyval/src/field_element/soft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ use core::{
use soft_impl::{karatsuba, mont_reduce};
use universal_hash::array::{Array, ArraySize};

/// Stub implementation which only makes `PolyvalGeneric::h` work.
// TODO(tarcieri): actually implement this optimization?
#[inline]
pub(super) fn powers_of_h<const N: usize>(h: FieldElement) -> [FieldElement; N] {
let mut ret = [FieldElement::default(); N];
ret[N - 1] = h;
ret
}

/// Perform carryless multiplication of `y` by `h` and return the result.
#[inline]
pub(super) fn polymul(y: FieldElement, h: FieldElement) -> FieldElement {
Expand All @@ -46,13 +55,15 @@ pub(super) fn polymul(y: FieldElement, h: FieldElement) -> FieldElement {

/// Process an individual block.
// TODO(tarcieri): implement `proc_par_blocks` for soft backend?
#[inline]
pub(super) fn proc_block(h: FieldElement, y: FieldElement, x: &Block) -> FieldElement {
let x = FieldElement::from(x);
polymul(y + x, h)
}

/// Process multiple blocks.
// TODO(tarcieri): optimized implementation?
#[inline]
pub(super) fn proc_par_blocks<const N: usize, U: ArraySize>(
powers_of_h: &[FieldElement; N],
mut y: FieldElement,
Expand Down
8 changes: 4 additions & 4 deletions polyval/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub use crate::mulx::mulx;
pub use universal_hash;

use core::fmt::{self, Debug};
use field_element::{FieldElement, InitToken, detect_intrinsics};
use field_element::{FieldElement, InitToken, has_intrinsics};
use universal_hash::{
KeyInit, ParBlocks, Reset, UhfBackend, UhfClosure, UniversalHash,
array::{Array, ArraySize},
Expand Down Expand Up @@ -76,11 +76,11 @@ impl<const N: usize> PolyvalGeneric<N> {
/// Initialize POLYVAL with the given `H` field element and initial block.
#[must_use]
pub fn new_with_init_block(h: &Key, init_block: u128) -> Self {
let (token, _has_intrinsics) = detect_intrinsics();
let has_intrinsics = has_intrinsics().0;
Self {
powers_of_h: FieldElement::from(h).powers_of_h(),
powers_of_h: FieldElement::from(h).powers_of_h(has_intrinsics),
y: init_block.into(),
has_intrinsics: token,
has_intrinsics,
}
}

Expand Down