From de674aa381a6a7098c91fcebb78d87943d8276eb Mon Sep 17 00:00:00 2001 From: Markus <66058642+mhovd@users.noreply.github.com> Date: Wed, 14 Jan 2026 10:19:30 +0100 Subject: [PATCH 1/4] chore: Rename modules and structures --- src/data/mod.rs | 1 + src/data/parser/mod.rs | 3 +- src/data/parser/pmetrics.rs | 64 ++------ src/data/{parser/normalized.rs => row.rs} | 185 ++++++++++++---------- src/error/mod.rs | 4 +- src/lib.rs | 2 +- 6 files changed, 123 insertions(+), 136 deletions(-) rename src/data/{parser/normalized.rs => row.rs} (81%) diff --git a/src/data/mod.rs b/src/data/mod.rs index 813c13fd..bd1690bc 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -35,6 +35,7 @@ pub mod error_model; pub mod event; pub mod parser; pub mod residual_error; +pub mod row; pub mod structs; pub use covariate::*; pub use error_model::*; diff --git a/src/data/parser/mod.rs b/src/data/parser/mod.rs index 613edc69..90ca87f0 100644 --- a/src/data/parser/mod.rs +++ b/src/data/parser/mod.rs @@ -1,5 +1,4 @@ -pub mod normalized; pub mod pmetrics; -pub use normalized::{build_data, NormalizedRow, NormalizedRowBuilder}; +pub use crate::data::row::{build_data, DataRow, DataRowBuilder}; pub use pmetrics::*; diff --git a/src/data/parser/pmetrics.rs b/src/data/parser/pmetrics.rs index 8886561e..8149d10a 100644 --- a/src/data/parser/pmetrics.rs +++ b/src/data/parser/pmetrics.rs @@ -4,45 +4,11 @@ use serde::de::{MapAccess, Visitor}; use serde::{de, Deserialize, Deserializer, Serialize}; use std::collections::HashMap; +use crate::data::row::build_data; +use crate::data::row::DataError; +use crate::data::row::DataRow; use std::fmt; use std::str::FromStr; -use thiserror::Error; - -/// Custom error type for the module -#[allow(private_interfaces)] -#[derive(Error, Debug, Clone)] -pub enum PmetricsError { - /// Error encountered when reading CSV data - #[error("CSV error: {0}")] - CSVError(String), - /// Error during data deserialization - #[error("Parse error: {0}")] - SerdeError(String), - /// Encountered an unknown EVID value - #[error("Unknown EVID: {evid} for ID {id} at time {time}")] - UnknownEvid { evid: isize, id: String, time: f64 }, - /// Required observation value (OUT) is missing - #[error("Observation OUT is missing for {id} at time {time}")] - MissingObservationOut { id: String, time: f64 }, - /// Required observation output equation (OUTEQ) is missing - #[error("Observation OUTEQ is missing in for {id} at time {time}")] - MissingObservationOuteq { id: String, time: f64 }, - /// Required infusion dose amount is missing - #[error("Infusion amount (DOSE) is missing for {id} at time {time}")] - MissingInfusionDose { id: String, time: f64 }, - /// Required infusion input compartment is missing - #[error("Infusion compartment (INPUT) is missing for {id} at time {time}")] - MissingInfusionInput { id: String, time: f64 }, - /// Required infusion duration is missing - #[error("Infusion duration (DUR) is missing for {id} at time {time}")] - MissingInfusionDur { id: String, time: f64 }, - /// Required bolus dose amount is missing - #[error("Bolus amount (DOSE) is missing for {id} at time {time}")] - MissingBolusDose { id: String, time: f64 }, - /// Required bolus input compartment is missing - #[error("Bolus compartment (INPUT) is missing for {id} at time {time}")] - MissingBolusInput { id: String, time: f64 }, -} /// Read a Pmetrics datafile and convert it to a [Data] object /// @@ -56,7 +22,7 @@ pub enum PmetricsError { /// /// # Returns /// -/// * `Result` - A result containing either the parsed [Data] object or an error +/// * `Result` - A result containing either the parsed [Data] object or an error /// /// # Example /// @@ -78,32 +44,32 @@ pub enum PmetricsError { /// /// For specific column definitions, see the `Row` struct. #[allow(dead_code)] -pub fn read_pmetrics(path: impl Into) -> Result { +pub fn read_pmetrics(path: impl Into) -> Result { let path = path.into(); let mut reader = csv::ReaderBuilder::new() .comment(Some(b'#')) .has_headers(true) .from_path(&path) - .map_err(|e| PmetricsError::CSVError(e.to_string()))?; + .map_err(|e| DataError::CSVError(e.to_string()))?; // Convert headers to lowercase let headers = reader .headers() - .map_err(|e| PmetricsError::CSVError(e.to_string()))? + .map_err(|e| DataError::CSVError(e.to_string()))? .iter() .map(|h| h.to_lowercase()) .collect::>(); reader.set_headers(csv::StringRecord::from(headers)); - // Parse CSV rows and convert to NormalizedRows - let mut normalized_rows: Vec = Vec::new(); + // Parse CSV rows and convert to DataRows + let mut normalized_rows: Vec = Vec::new(); for row_result in reader.deserialize() { - let row: Row = row_result.map_err(|e| PmetricsError::CSVError(e.to_string()))?; - normalized_rows.push(row.to_normalized()); + let row: Row = row_result.map_err(|e| DataError::CSVError(e.to_string()))?; + normalized_rows.push(row.to_datarow()); } // Use the shared build_data logic - super::normalized::build_data(normalized_rows) + build_data(normalized_rows) } /// A [Row] represents a row in the Pmetrics data format @@ -158,9 +124,9 @@ struct Row { } impl Row { - /// Convert this Row to a NormalizedRow for parsing - fn to_normalized(&self) -> super::normalized::NormalizedRow { - super::normalized::NormalizedRow { + /// Convert this Row to a DataRow for parsing + fn to_datarow(&self) -> DataRow { + DataRow { id: self.id.clone(), time: self.time, evid: self.evid as i32, diff --git a/src/data/parser/normalized.rs b/src/data/row.rs similarity index 81% rename from src/data/parser/normalized.rs rename to src/data/row.rs index 72ba1a16..e73f19aa 100644 --- a/src/data/parser/normalized.rs +++ b/src/data/row.rs @@ -1,26 +1,12 @@ -//! Normalized row representation for flexible data parsing -//! -//! This module provides a format-agnostic intermediate representation that decouples -//! column naming/mapping from event creation logic. Any data source (CSV with custom -//! columns, Excel, DataFrames) can construct [`NormalizedRow`] instances, then use -//! [`NormalizedRow::into_events()`] to get properly parsed pharmsol Events. -//! -//! # Design Philosophy -//! -//! The key insight is separating two concerns: -//! 1. **Row Normalization** - Transform arbitrary input formats into a standard representation -//! 2. **Event Creation** - Convert normalized rows into pharmsol Events (with ADDL expansion, etc.) -//! -//! This allows any consumer (GUI applications, scripts, other tools) to bring their own -//! "column mapping" while reusing parsing logic. +//! Row representation of [Data] for flexible parsing //! //! # Example //! //! ```rust -//! use pharmsol::data::parser::NormalizedRow; +//! use pharmsol::data::parser::DataRow; //! //! // Create a dosing row with ADDL expansion -//! let row = NormalizedRow::builder("subject_1", 0.0) +//! let row = DataRow::builder("subject_1", 0.0) //! .evid(1) //! .dose(100.0) //! .input(1) @@ -33,15 +19,15 @@ //! ``` //! -use super::PmetricsError; use crate::data::*; use std::collections::HashMap; +use thiserror::Error; /// A format-agnostic representation of a single data row /// /// This struct represents the canonical fields needed to create pharmsol Events. /// Consumers construct this from their source data (regardless of column names), -/// then call [`into_events()`](NormalizedRow::into_events) to get properly parsed +/// then call [`into_events()`](DataRow::into_events) to get properly parsed /// Events with full ADDL expansion, EVID handling, censoring, etc. /// /// # Fields @@ -54,17 +40,17 @@ use std::collections::HashMap; /// # Example /// /// ```rust -/// use pharmsol::data::parser::NormalizedRow; +/// use pharmsol::data::parser::DataRow; /// /// // Observation row -/// let obs = NormalizedRow::builder("pt1", 1.0) +/// let obs = DataRow::builder("pt1", 1.0) /// .evid(0) /// .out(25.5) /// .outeq(1) /// .build(); /// /// // Dosing row with negative ADDL (doses before time 0) -/// let dose = NormalizedRow::builder("pt1", 0.0) +/// let dose = DataRow::builder("pt1", 0.0) /// .evid(1) /// .dose(100.0) /// .input(1) @@ -77,7 +63,7 @@ use std::collections::HashMap; /// assert_eq!(events.len(), 11); /// ``` #[derive(Debug, Clone, Default)] -pub struct NormalizedRow { +pub struct DataRow { /// Subject identifier (required) pub id: String, /// Event time (required) @@ -112,8 +98,8 @@ pub struct NormalizedRow { pub covariates: HashMap, } -impl NormalizedRow { - /// Create a new builder for constructing a NormalizedRow +impl DataRow { + /// Create a new builder for constructing a DataRow /// /// # Arguments /// @@ -123,16 +109,16 @@ impl NormalizedRow { /// # Example /// /// ```rust - /// use pharmsol::data::parser::NormalizedRow; + /// use pharmsol::data::parser::DataRow; /// - /// let row = NormalizedRow::builder("patient_001", 0.0) + /// let row = DataRow::builder("patient_001", 0.0) /// .evid(1) /// .dose(100.0) /// .input(1) /// .build(); /// ``` - pub fn builder(id: impl Into, time: f64) -> NormalizedRowBuilder { - NormalizedRowBuilder::new(id, time) + pub fn builder(id: impl Into, time: f64) -> DataRowBuilder { + DataRowBuilder::new(id, time) } /// Get error polynomial if all coefficients are present @@ -165,16 +151,16 @@ impl NormalizedRow { /// /// # Errors /// - /// Returns [`PmetricsError`] if required fields are missing for the given EVID: + /// Returns [`DataError`] if required fields are missing for the given EVID: /// - EVID=0: Requires `outeq` /// - EVID=1: Requires `dose` and `input`; if `dur > 0`, it's an infusion /// /// # Example /// /// ```rust - /// use pharmsol::data::parser::NormalizedRow; + /// use pharmsol::data::parser::DataRow; /// - /// let row = NormalizedRow::builder("pt1", 0.0) + /// let row = DataRow::builder("pt1", 0.0) /// .evid(1) /// .dose(100.0) /// .input(1) @@ -188,7 +174,7 @@ impl NormalizedRow { /// let times: Vec = events.iter().map(|e| e.time()).collect(); /// assert_eq!(times, vec![24.0, 48.0, 0.0]); /// ``` - pub fn into_events(self) -> Result, PmetricsError> { + pub fn into_events(self) -> Result, DataError> { let mut events: Vec = Vec::new(); match self.evid { @@ -198,7 +184,7 @@ impl NormalizedRow { self.time, self.out, self.outeq - .ok_or_else(|| PmetricsError::MissingObservationOuteq { + .ok_or_else(|| DataError::MissingObservationOuteq { id: self.id.clone(), time: self.time, })? @@ -212,7 +198,7 @@ impl NormalizedRow { // Dosing event (1) or reset with dose (4) let input_0indexed = self .input - .ok_or_else(|| PmetricsError::MissingBolusInput { + .ok_or_else(|| DataError::MissingBolusInput { id: self.id.clone(), time: self.time, })? @@ -222,13 +208,12 @@ impl NormalizedRow { // Infusion Event::Infusion(Infusion::new( self.time, - self.dose - .ok_or_else(|| PmetricsError::MissingInfusionDose { - id: self.id.clone(), - time: self.time, - })?, + self.dose.ok_or_else(|| DataError::MissingInfusionDose { + id: self.id.clone(), + time: self.time, + })?, input_0indexed, - self.dur.ok_or_else(|| PmetricsError::MissingInfusionDur { + self.dur.ok_or_else(|| DataError::MissingInfusionDur { id: self.id.clone(), time: self.time, })?, @@ -238,7 +223,7 @@ impl NormalizedRow { // Bolus Event::Bolus(Bolus::new( self.time, - self.dose.ok_or_else(|| PmetricsError::MissingBolusDose { + self.dose.ok_or_else(|| DataError::MissingBolusDose { id: self.id.clone(), time: self.time, })?, @@ -265,7 +250,7 @@ impl NormalizedRow { events.push(event); } _ => { - return Err(PmetricsError::UnknownEvid { + return Err(DataError::UnknownEvid { evid: self.evid as isize, id: self.id.clone(), time: self.time, @@ -299,15 +284,15 @@ impl NormalizedRow { } } -/// Builder for constructing NormalizedRow with a fluent API +/// Builder for constructing DataRow with a fluent API /// /// # Example /// /// ```rust -/// use pharmsol::data::parser::NormalizedRow; +/// use pharmsol::data::parser::DataRow; /// use pharmsol::data::Censor; /// -/// let row = NormalizedRow::builder("patient_001", 1.5) +/// let row = DataRow::builder("patient_001", 1.5) /// .evid(0) /// .out(25.5) /// .outeq(1) @@ -317,11 +302,11 @@ impl NormalizedRow { /// .build(); /// ``` #[derive(Debug, Clone)] -pub struct NormalizedRowBuilder { - row: NormalizedRow, +pub struct DataRowBuilder { + row: DataRow, } -impl NormalizedRowBuilder { +impl DataRowBuilder { /// Create a new builder with required fields /// /// # Arguments @@ -330,7 +315,7 @@ impl NormalizedRowBuilder { /// * `time` - Event time pub fn new(id: impl Into, time: f64) -> Self { Self { - row: NormalizedRow { + row: DataRow { id: id.into(), time, evid: 0, // Default to observation @@ -442,47 +427,47 @@ impl NormalizedRowBuilder { self } - /// Build the NormalizedRow - pub fn build(self) -> NormalizedRow { + /// Build the DataRow + pub fn build(self) -> DataRow { self.row } } -/// Build a [Data] object from an iterator of [NormalizedRow]s +/// Build a [Data] object from an iterator of [DataRow]s /// /// This function handles all the complex assembly logic: /// - Groups rows by subject ID /// - Splits into occasions at EVID=4 boundaries -/// - Converts rows to events via [`NormalizedRow::into_events()`] +/// - Converts rows to events via [`DataRow::into_events()`] /// - Builds covariates from row covariate data /// /// # Example /// /// ```rust -/// use pharmsol::data::parser::{NormalizedRow, build_data}; +/// use pharmsol::data::parser::{DataRow, build_data}; /// /// let rows = vec![ /// // Subject 1, Occasion 0 -/// NormalizedRow::builder("pt1", 0.0) +/// DataRow::builder("pt1", 0.0) /// .evid(1).dose(100.0).input(1).build(), -/// NormalizedRow::builder("pt1", 1.0) +/// DataRow::builder("pt1", 1.0) /// .evid(0).out(50.0).outeq(1).build(), /// // Subject 1, Occasion 1 (EVID=4 starts new occasion) -/// NormalizedRow::builder("pt1", 24.0) +/// DataRow::builder("pt1", 24.0) /// .evid(4).dose(100.0).input(1).build(), -/// NormalizedRow::builder("pt1", 25.0) +/// DataRow::builder("pt1", 25.0) /// .evid(0).out(48.0).outeq(1).build(), /// // Subject 2 -/// NormalizedRow::builder("pt2", 0.0) +/// DataRow::builder("pt2", 0.0) /// .evid(1).dose(50.0).input(1).build(), /// ]; /// /// let data = build_data(rows).unwrap(); /// assert_eq!(data.subjects().len(), 2); /// ``` -pub fn build_data(rows: impl IntoIterator) -> Result { +pub fn build_data(rows: impl IntoIterator) -> Result { // Group rows by subject ID - let mut rows_map: std::collections::HashMap> = + let mut rows_map: std::collections::HashMap> = std::collections::HashMap::new(); for row in rows { rows_map.entry(row.id.clone()).or_default().push(row); @@ -498,7 +483,7 @@ pub fn build_data(rows: impl IntoIterator) -> Result = Vec::new(); + let mut block_rows_vec: Vec<&[DataRow]> = Vec::new(); let mut start = 0; for &split_index in &split_indices { if start < split_index { @@ -558,13 +543,49 @@ pub fn build_data(rows: impl IntoIterator) -> Result Date: Wed, 14 Jan 2026 10:27:54 +0100 Subject: [PATCH 2/4] Update src/error/mod.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/error/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/error/mod.rs b/src/error/mod.rs index 37e86c93..a7629021 100644 --- a/src/error/mod.rs +++ b/src/error/mod.rs @@ -15,7 +15,7 @@ pub enum PharmsolError { CovariateError(#[from] CovariateError), #[error("Shape error: {0}")] NdarrayShapeError(#[from] ShapeError), - #[error("Error parsing Pmetrics datafile: {0}")] + #[error("Error parsing data: {0}")] DataError(#[from] DataError), #[error("Diffsol error: {0}")] DiffsolError(String), From cec1e5917523b70eb8a1cb316b6fb7bba7b4c3a9 Mon Sep 17 00:00:00 2001 From: Markus Hovd Date: Wed, 14 Jan 2026 10:28:27 +0100 Subject: [PATCH 3/4] Update src/data/parser/mod.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/data/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/parser/mod.rs b/src/data/parser/mod.rs index 90ca87f0..7bfde3ca 100644 --- a/src/data/parser/mod.rs +++ b/src/data/parser/mod.rs @@ -1,4 +1,4 @@ pub mod pmetrics; -pub use crate::data::row::{build_data, DataRow, DataRowBuilder}; +pub use crate::data::row::{build_data, DataError, DataRow, DataRowBuilder}; pub use pmetrics::*; From 00d618008a4097657b754eb9f5305c0d421bd511 Mon Sep 17 00:00:00 2001 From: Markus <66058642+mhovd@users.noreply.github.com> Date: Wed, 14 Jan 2026 10:29:32 +0100 Subject: [PATCH 4/4] Name changes --- src/data/parser/pmetrics.rs | 6 +++--- src/data/row.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/data/parser/pmetrics.rs b/src/data/parser/pmetrics.rs index 8149d10a..60ba3060 100644 --- a/src/data/parser/pmetrics.rs +++ b/src/data/parser/pmetrics.rs @@ -62,14 +62,14 @@ pub fn read_pmetrics(path: impl Into) -> Result { reader.set_headers(csv::StringRecord::from(headers)); // Parse CSV rows and convert to DataRows - let mut normalized_rows: Vec = Vec::new(); + let mut data_rows: Vec = Vec::new(); for row_result in reader.deserialize() { let row: Row = row_result.map_err(|e| DataError::CSVError(e.to_string()))?; - normalized_rows.push(row.to_datarow()); + data_rows.push(row.to_datarow()); } // Use the shared build_data logic - build_data(normalized_rows) + build_data(data_rows) } /// A [Row] represents a row in the Pmetrics data format diff --git a/src/data/row.rs b/src/data/row.rs index e73f19aa..d45105a5 100644 --- a/src/data/row.rs +++ b/src/data/row.rs @@ -129,7 +129,7 @@ impl DataRow { } } - /// Convert this normalized row into pharmsol Events + /// Convert this row into pharmsol Events /// /// This method contains all the complex parsing logic: /// - EVID interpretation (0=observation, 1=dose, 4=reset)