diff --git a/examples/sde.rs b/examples/sde.rs index 95fb4133..2513a889 100644 --- a/examples/sde.rs +++ b/examples/sde.rs @@ -1,4 +1,4 @@ -use pharmsol::{prelude::data::read_pmetrics, *}; +use pharmsol::*; fn one_c_ode() -> ODE { equation::ODE::new( @@ -161,7 +161,7 @@ fn main() { let ode = three_c_ode(); let sde = three_c_sde(); - let data = read_pmetrics("../PMcore/examples/vanco_sde/data.csv").unwrap(); + let data = Data::read_pmetrics("../PMcore/examples/vanco_sde/data.csv").unwrap(); let subject = data.get_subject("51").unwrap(); let ode_predictions = ode.estimate_predictions(subject, &spp_ode); diff --git a/src/data/mod.rs b/src/data/mod.rs index b1022204..f5e31586 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -30,7 +30,7 @@ pub mod builder; pub mod covariate; pub mod error_model; pub mod event; -pub(crate) mod parse_pmetrics; +pub mod parser; pub mod structs; pub use covariate::*; pub use error_model::*; diff --git a/src/data/parse_pmetrics.rs b/src/data/parse_pmetrics.rs deleted file mode 100644 index 9a72ab2e..00000000 --- a/src/data/parse_pmetrics.rs +++ /dev/null @@ -1,532 +0,0 @@ -use crate::data::*; -use serde::de::{MapAccess, Visitor}; -use serde::{de, Deserialize, Deserializer, Serialize}; -use std::collections::HashMap; - -use std::fmt; -use std::str::FromStr; -use thiserror::Error; - -/// Custom error type for the module -#[allow(private_interfaces)] -#[derive(Error, Debug)] -pub enum PmetricsError { - /// Error encountered when reading CSV data - #[error("CSV error: {0}")] - ReadError(#[from] csv::Error), - /// Error during data deserialization - #[error("Parse error: {0}")] - SerdeError(#[from] serde::de::value::Error), - /// Encountered an unknown EVID value - #[error("Unknown EVID: {evid} for ID {id} at time {time}")] - UnknownEvid { evid: isize, id: String, time: f64 }, - /// Required observation value (OUT) is missing - #[error("Observation OUT is missing for {id} at time {time}")] - MissingObservationOut { id: String, time: f64 }, - /// Required observation output equation (OUTEQ) is missing - #[error("Observation OUTEQ is missing in for {id} at time {time}")] - MissingObservationOuteq { id: String, time: f64 }, - /// Required infusion dose amount is missing - #[error("Infusion amount (DOSE) is missing for {id} at time {time}")] - MissingInfusionDose { id: String, time: f64 }, - /// Required infusion input compartment is missing - #[error("Infusion compartment (INPUT) is missing for {id} at time {time}")] - MissingInfusionInput { id: String, time: f64 }, - /// Required infusion duration is missing - #[error("Infusion duration (DUR) is missing for {id} at time {time}")] - MissingInfusionDur { id: String, time: f64 }, - /// Required bolus dose amount is missing - #[error("Bolus amount (DOSE) is missing for {id} at time {time}")] - MissingBolusDose { id: String, time: f64 }, - /// Required bolus input compartment is missing - #[error("Bolus compartment (INPUT) is missing for {id} at time {time}")] - MissingBolusInput { id: String, time: f64 }, -} - -/// Read a Pmetrics datafile and convert it to a [Data] object -/// -/// This function parses a Pmetrics-formatted CSV file and constructs a [Data] object containing the structured -/// pharmacokinetic/pharmacodynamic data. The function handles various data formats including doses, observations, -/// and covariates. -/// -/// # Arguments -/// -/// * `path` - The path to the Pmetrics CSV file -/// -/// # Returns -/// -/// * `Result` - A result containing either the parsed [Data] object or an error -/// -/// # Example -/// -/// ```rust,no_run -/// use pharmsol::prelude::data::read_pmetrics; -/// -/// let data = read_pmetrics("path/to/pmetrics_data.csv").unwrap(); -/// println!("Number of subjects: {}", data.get_subjects().len()); -/// ``` -/// -/// # Format details -/// -/// The Pmetrics format expects columns like ID, TIME, EVID, DOSE, DUR, etc. The function will: -/// - Convert all headers to lowercase for case-insensitivity -/// - Group rows by subject ID -/// - Create occasions based on EVID=4 events -/// - Parse covariates and create appropriate interpolations -/// - Handle additional doses via ADDL and II fields -/// -/// For specific column definitions, see the [Row] struct. -#[allow(dead_code)] -pub fn read_pmetrics(path: impl Into) -> Result { - let path = path.into(); - - let mut reader = csv::ReaderBuilder::new() - .comment(Some(b'#')) - .has_headers(true) - .from_path(path)?; - - // Convert headers to lowercase - let headers = reader - .headers()? - .iter() - .map(|h| h.to_lowercase()) - .collect::>(); - reader.set_headers(csv::StringRecord::from(headers)); - - // This is the object we are building, which can be converted to [Data] - let mut subjects: Vec = Vec::new(); - - // Read the datafile into a hashmap of rows by ID - let mut rows_map: HashMap> = HashMap::new(); - for row_result in reader.deserialize() { - let row: Row = row_result?; - - rows_map.entry(row.id.clone()).or_default().push(row); - } - - // For each ID, we ultimately create a [Subject] object - for (id, rows) in rows_map { - // Split rows into vectors of rows, creating the occasions - let split_indices: Vec = rows - .iter() - .enumerate() - .filter_map(|(i, row)| if row.evid == 4 { Some(i) } else { None }) - .collect(); - - let mut block_rows_vec = Vec::new(); - let mut start = 0; - for &split_index in &split_indices { - let end = split_index; - if start < rows.len() { - block_rows_vec.push(&rows[start..end]); - } - start = end; - } - - if start < rows.len() { - block_rows_vec.push(&rows[start..]); - } - - let block_rows: Vec> = block_rows_vec.iter().map(|block| block.to_vec()).collect(); - let mut occasions: Vec = Vec::new(); - for (block_index, rows) in block_rows.clone().iter().enumerate() { - // Collector for all events - let mut events: Vec = Vec::new(); - // Collector for covariates - let mut covariates = Covariates::new(); - - // Parse events - for row in rows.clone() { - match row.parse_events() { - Ok(ev) => events.extend(ev), - Err(e) => { - // dbg!(&row); - // dbg!(&e); - return Err(e); - } - } - } - - // Parse covariates - let mut cloned_rows = rows.clone(); - cloned_rows.retain(|row| !row.covs.is_empty()); - - // Collect all covariates by name - let mut observed_covariates: HashMap)>> = HashMap::new(); - for row in &cloned_rows { - for (key, value) in &row.covs { - if let Some(val) = value { - observed_covariates - .entry(key.clone()) - .or_default() - .push((row.time, Some(*val))); - } - } - } - - // Create segments for each covariate - for (key, mut occurrences) in observed_covariates { - occurrences.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap()); - let is_fixed = key.ends_with('!'); - - // If it's a fixed covariate, modify the name to remove "!" - let name = if is_fixed { - key.trim_end_matches('!').to_string() - } else { - key.clone() - }; - - let mut covariate = Covariate::new(name.clone(), vec![]); - - // If only one occurence, add a single segment to infinity - if occurrences.len() == 1 { - let (time, value) = occurrences[0]; - covariate.add_segment(CovariateSegment::new( - time, - f64::INFINITY, - InterpolationMethod::CarryForward { - value: value.unwrap(), - }, - )); - covariates.add_covariate(name, covariate); - continue; - } - - let mut last_value = None; - for i in 0..occurrences.len() { - let (time, value) = occurrences[i]; - let next_occurrence = occurrences.get(i + 1); - let to_time = - next_occurrence.map_or(f64::INFINITY, |&(next_time, _)| next_time); - - if is_fixed { - // Use CarryForward for fixed covariates - covariate.add_segment(CovariateSegment::new( - time, - to_time, - InterpolationMethod::CarryForward { - value: value.unwrap(), - }, - )); - } else if let Some((next_time, next_value)) = next_occurrence { - if let Some(current_value) = value { - if *next_time == time { - covariate.add_segment(CovariateSegment::new( - time, - *next_time, - InterpolationMethod::CarryForward { - value: current_value, - }, - )); - } else { - let slope = - (next_value.unwrap() - current_value) / (next_time - time); - covariate.add_segment(CovariateSegment::new( - time, - *next_time, - InterpolationMethod::Linear { - slope, - intercept: current_value - slope * time, - }, - )); - } - - last_value = Some((next_time, next_value)); - } - } else if let Some((last_time, last_value)) = last_value { - // Extend the last linear segment to infinity if no more segments are available - covariate.add_segment(CovariateSegment::new( - *last_time, - f64::INFINITY, - InterpolationMethod::CarryForward { - value: last_value.unwrap(), - }, - )); - } - } - covariates.add_covariate(name, covariate) - } - // Create the block - let mut occasion = Occasion::new(events, covariates, block_index); - occasion.sort(); - occasions.push(occasion); - } - - let subject = Subject::new(id, occasions); - subjects.push(subject); - } - - // Sort subjects alphabetically by ID to get consistent ordering - subjects.sort_by(|a, b| a.id().cmp(b.id())); - let data = Data::new(subjects); - - Ok(data) -} - -/// A [Row] represents a row in the Pmetrics data format -#[derive(Deserialize, Debug, Serialize, Default, Clone)] -#[serde(rename_all = "lowercase")] -struct Row { - /// Subject ID - id: String, - /// Event type - evid: isize, - /// Event time - time: f64, - /// Infusion duration - #[serde(deserialize_with = "deserialize_option_f64")] - dur: Option, - /// Dose amount - #[serde(deserialize_with = "deserialize_option_f64")] - dose: Option, - /// Additional doses - #[serde(deserialize_with = "deserialize_option_isize")] - addl: Option, - /// Dosing interval - #[serde(deserialize_with = "deserialize_option_f64")] - ii: Option, - /// Input compartment - #[serde(deserialize_with = "deserialize_option_usize")] - input: Option, - /// Observed value - #[serde(deserialize_with = "deserialize_option_f64")] - out: Option, - /// Corresponding output equation for the observation - #[serde(deserialize_with = "deserialize_option_usize")] - outeq: Option, - /// First element of the error polynomial - #[serde(deserialize_with = "deserialize_option_f64")] - c0: Option, - /// Second element of the error polynomial - #[serde(deserialize_with = "deserialize_option_f64")] - c1: Option, - /// Third element of the error polynomial - #[serde(deserialize_with = "deserialize_option_f64")] - c2: Option, - /// Fourth element of the error polynomial - #[serde(deserialize_with = "deserialize_option_f64")] - c3: Option, - /// All other columns are covariates - #[serde(deserialize_with = "deserialize_covs", flatten)] - covs: HashMap>, -} - -impl Row { - /// Get the error polynomial coefficients - fn get_errorpoly(&self) -> Option<(f64, f64, f64, f64)> { - match (self.c0, self.c1, self.c2, self.c3) { - (Some(c0), Some(c1), Some(c2), Some(c3)) => Some((c0, c1, c2, c3)), - _ => None, - } - } - fn parse_events(self) -> Result, PmetricsError> { - let mut events: Vec = Vec::new(); - - match self.evid { - 0 => events.push(Event::Observation(Observation::new( - self.time, - self.out - .ok_or_else(|| PmetricsError::MissingObservationOut { - id: self.id.clone(), - time: self.time, - })?, - self.outeq - .ok_or_else(|| PmetricsError::MissingObservationOuteq { - id: self.id.clone(), - time: self.time, - })? - - 1, - self.get_errorpoly(), - self.out == Some(-99.0), - ))), - 1 | 4 => { - let event = if self.dur.unwrap_or(0.0) > 0.0 { - Event::Infusion(Infusion::new( - self.time, - self.dose - .ok_or_else(|| PmetricsError::MissingInfusionDose { - id: self.id.clone(), - time: self.time, - })?, - self.input - .ok_or_else(|| PmetricsError::MissingInfusionInput { - id: self.id.clone(), - time: self.time, - })? - - 1, - self.dur.ok_or_else(|| PmetricsError::MissingInfusionDur { - id: self.id.clone(), - time: self.time, - })?, - )) - } else { - Event::Bolus(Bolus::new( - self.time, - self.dose.ok_or_else(|| PmetricsError::MissingBolusDose { - id: self.id.clone(), - time: self.time, - })?, - self.input.ok_or(PmetricsError::MissingBolusInput { - id: self.id, - time: self.time, - })? - 1, - )) - }; - if self.addl.is_some() - && self.ii.is_some() - && self.addl.unwrap_or(0) != 0 - && self.ii.unwrap_or(0.0) > 0.0 - { - let mut ev = event.clone(); - let interval = &self.ii.unwrap().abs(); - let repetitions = &self.addl.unwrap().abs(); - let direction = &self.addl.unwrap().signum(); - - for _ in 0..*repetitions { - ev.inc_time((*direction as f64) * interval); - events.push(ev.clone()); - } - } - events.push(event); - } - _ => { - return Err(PmetricsError::UnknownEvid { - evid: self.evid, - id: self.id.clone(), - time: self.time, - }); - } - }; - Ok(events) - } -} - -/// Deserialize Option from a string -fn deserialize_option<'de, T, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, - T: FromStr, - T::Err: std::fmt::Display, -{ - let s: String = Deserialize::deserialize(deserializer)?; - if s.is_empty() || s == "." { - Ok(None) - } else { - T::from_str(&s).map(Some).map_err(serde::de::Error::custom) - } -} - -fn deserialize_option_f64<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - deserialize_option::(deserializer) -} - -fn deserialize_option_usize<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - deserialize_option::(deserializer) -} - -fn deserialize_option_isize<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - deserialize_option::(deserializer) -} - -fn deserialize_covs<'de, D>(deserializer: D) -> Result>, D::Error> -where - D: Deserializer<'de>, -{ - struct CovsVisitor; - - impl<'de> Visitor<'de> for CovsVisitor { - type Value = HashMap>; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str( - "a map of string keys to optionally floating-point numbers or placeholders", - ) - } - - fn visit_map(self, mut map: M) -> Result - where - M: MapAccess<'de>, - { - let mut covs = HashMap::new(); - while let Some((key, value)) = map.next_entry::()? { - let opt_value = match value { - serde_json::Value::String(s) => match s.as_str() { - "" => None, - "." => None, - _ => match s.parse::() { - Ok(val) => Some(val), - Err(_) => { - return Err(de::Error::custom( - "expected a floating-point number or empty string", - )) - } - }, - }, - serde_json::Value::Number(n) => Some(n.as_f64().unwrap()), - _ => return Err(de::Error::custom("expected a string or number")), - }; - covs.insert(key, opt_value); - } - Ok(covs) - } - } - - deserializer.deserialize_map(CovsVisitor) -} - -#[cfg(test)] -mod tests { - - use super::*; - - #[test] - fn test_addl() { - let data = read_pmetrics("src/tests/data/addl_test.csv"); - - assert!(data.is_ok(), "Failed to parse data"); - - let data = data.unwrap(); - let subjects = data.get_subjects(); - let first_subject = subjects.first().unwrap(); - let second_subject = subjects.get(1).unwrap(); - let s1_occasions = first_subject.occasions(); - let s2_occasions = second_subject.occasions(); - let first_scenario = s1_occasions.first().unwrap(); - let second_scenario = s2_occasions.first().unwrap(); - - let s1_times = first_scenario - .events() - .iter() - .map(|e| e.time()) - .collect::>(); - - // Negative ADDL, observations shifted forward - - assert_eq!( - s1_times, - vec![-120.0, -108.0, -96.0, -84.0, -72.0, -60.0, -48.0, -36.0, -24.0, -12.0, 0.0, 9.0] - ); - - let s2_times = second_scenario - .events() - .iter() - .map(|e| e.time()) - .collect::>(); - - // Positive ADDL, no shift in observations - - assert_eq!( - s2_times, - vec![0.0, 9.0, 12.0, 24.0, 36.0, 48.0, 60.0, 72.0, 84.0, 96.0, 108.0, 120.0] - ); - } -} diff --git a/src/data/parser/mod.rs b/src/data/parser/mod.rs new file mode 100644 index 00000000..9d716ab9 --- /dev/null +++ b/src/data/parser/mod.rs @@ -0,0 +1,7 @@ +pub mod nonmem; +pub mod pmetrics; + +// Expose the PmetricsError type +pub use pmetrics::PmetricsError; +// Expose the main loading functions +pub use pmetrics::{from_csv as load_pmetrics_csv, from_reader as load_pmetrics_reader}; diff --git a/src/data/parser/nonmem.rs b/src/data/parser/nonmem.rs new file mode 100644 index 00000000..02993042 --- /dev/null +++ b/src/data/parser/nonmem.rs @@ -0,0 +1 @@ +// Parse NONMEM data files into the [Data] struct diff --git a/src/data/parser/pmetrics.rs b/src/data/parser/pmetrics.rs new file mode 100644 index 00000000..413d40d6 --- /dev/null +++ b/src/data/parser/pmetrics.rs @@ -0,0 +1,611 @@ +use crate::data::*; +use serde::de::{MapAccess, Visitor}; +use serde::{de, Deserialize, Deserializer}; +use std::collections::HashMap; +use std::fmt; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use std::str::FromStr; +use thiserror::Error; + +/// Defines the structure of data in the Pmetrics data format +#[derive(Debug, Clone)] +struct PmetricsRow { + // Subject ID + id: String, + // Event ID (EVID) + evid: usize, + // Time of the event + time: f64, + // Duration of the event (optional) + dur: Option, + // Dose amount (optional) + dose: Option, + // Additional doses (optional) + addl: Option, + // Dosing interval (optional) + ii: Option, + // Input type (optional) + input: Option, + // Output value (optional) + out: Option, + // Output equation (optional) + outeq: Option, + // Error polynomial coefficients (optional) + c0: Option, + c1: Option, + c2: Option, + c3: Option, + // Covariates is provided as a HashMap, with keys (name, time) and value (f64) + covs: HashMap, +} + +// Define specific errors for Pmetrics parsing +#[derive(Error, Debug, PartialEq)] +pub enum PmetricsError { + #[error("Missing required field: {0}")] + MissingField(String), + #[error("Invalid value for field {field}: {message}")] + InvalidValue { field: String, message: String }, + #[error("CSV parsing error: {0}")] + CsvError(String), // Wrap underlying CSV errors + #[error("Unknown EVID: {0}")] + UnknownEvid(usize), +} + +// Implement From for PmetricsError +impl From for PmetricsError { + fn from(err: csv::Error) -> Self { + PmetricsError::CsvError(err.to_string()) + } +} + +struct PmetricsVisitor; + +impl<'de> Visitor<'de> for PmetricsVisitor { + type Value = PmetricsRow; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a map representing a Pmetrics CSV row") + } + + fn visit_map(self, mut map: M) -> Result + where + M: MapAccess<'de>, + { + let mut id: Option = None; + let mut evid: Option = None; + let mut time: Option = None; + let mut dur: Option = None; + let mut dose: Option = None; + let mut addl: Option = None; + let mut ii: Option = None; + let mut input: Option = None; + let mut out: Option = None; + let mut outeq: Option = None; + let mut c0: Option = None; + let mut c1: Option = None; + let mut c2: Option = None; + let mut c3: Option = None; + let mut covs: HashMap = HashMap::new(); + + // Helper to parse optional fields that might be empty strings + fn parse_optional<'de, T, E>(value: Option<&str>) -> Result, E> + where + T: FromStr, + E: de::Error, + T::Err: fmt::Display, + { + // Treat both empty strings and "." as empty values + match value { + Some(s) if !s.is_empty() && s != "." => s + .parse::() + .map(Some) + .map_err(|e| E::custom(format!("Failed to parse value '{}': {}", s, e))), + _ => Ok(None), + } + } + + while let Some((key, value)) = map.next_entry::>()? { + let value_str = value.as_deref(); // Get Option<&str> + match key.to_lowercase().as_str() { + "id" => id = value_str.map(String::from), + "evid" => evid = parse_optional(value_str)?, + "time" => time = parse_optional(value_str)?, + "dur" => dur = parse_optional(value_str)?, + "dose" => dose = parse_optional(value_str)?, + "addl" => addl = parse_optional(value_str)?, + "ii" => ii = parse_optional(value_str)?, + "input" => input = parse_optional(value_str)?, + "out" => out = parse_optional(value_str)?, + "outeq" => outeq = parse_optional(value_str)?, + "c0" => c0 = parse_optional(value_str)?, + "c1" => c1 = parse_optional(value_str)?, + "c2" => c2 = parse_optional(value_str)?, + "c3" => c3 = parse_optional(value_str)?, + + // Collect any other columns as covariates + other_key => { + if let Some(val_float) = parse_optional::(value_str)? { + covs.insert(other_key.to_string(), val_float); + } + // Decide how to handle non-f64 covariates if needed + // else { warn!("Covariate '{}' has non-numeric value '{:?}', skipping", other_key, value_str); } + } + } + } + + // Check for required fields + let id = id.ok_or_else(|| de::Error::missing_field("id"))?; + let evid = evid.ok_or_else(|| de::Error::missing_field("evid"))?; + let time = time.ok_or_else(|| de::Error::missing_field("time"))?; + + Ok(PmetricsRow { + id, + evid, + time, + dur, + dose, + addl, + ii, + input, + out, + outeq, + c0, + c1, + c2, + c3, + covs, + }) + } +} + +// Implement Deserialize manually using the visitor +impl<'de> Deserialize<'de> for PmetricsRow { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_map(PmetricsVisitor) + } +} + +// Conversion from PmetricsRow to Vec, returning PmetricsError on failure +impl TryFrom for Vec { + type Error = PmetricsError; + + fn try_from(row: PmetricsRow) -> Result { + let covariates = if row.covs.is_empty() { + None + } else { + Some(row.covs) + }; + match row.evid { + // Observation Event (EVID 0) + 0 => { + let time = row.time; + let value = row + .out + .ok_or_else(|| PmetricsError::MissingField("out".to_string()))?; + let outeq = row + .outeq + .ok_or_else(|| PmetricsError::MissingField("outeq".to_string()))?; + + // Create the errorpoly tuple + let errorpoly = match (row.c0, row.c1, row.c2, row.c3) { + (Some(c0), Some(c1), Some(c2), Some(c3)) => Some((c0, c1, c2, c3)), + _ => None, + }; + + // If out == -99, set ignore to TRUE + let ignore = if value == -99.0 { true } else { false }; + + let obs = Observation::new(time, value, outeq, errorpoly, ignore); + Ok(vec![Event::Observation(obs)]) + } + // Dosing Event (EVID 1 or EVID 4) + 1 | 4 => { + // Minimum information + let time = row.time; + let amount = row + .dose + .ok_or_else(|| PmetricsError::MissingField("dose".to_string()))?; + let input = row + .input + .ok_or_else(|| PmetricsError::MissingField("input".to_string()))?; + + // If dur is None or 0, set to None + let dur = if let Some(duration) = row.dur { + if duration > 0.0 { + Some(duration) + } else { + None + } + } else { + None + }; + + // For addl and ii, both must be provided, otherwise they are both none. + // ii must be > 0.0 + let ii = if let Some(interval) = row.ii { + if interval > 0.0 { + Some(interval) + } else { + None + } + } else { + None + }; + let addl = if let Some(additional) = row.addl { + if additional > 0 { + Some(additional) + } else { + None + } + } else { + None + }; + + let mut events: Vec = Vec::new(); + + match addl { + Some(addl) => { + // Create additional events + for i in 0..addl { + let time = time + (i as f64) * ii.unwrap_or(0.0); + // If duration is some, we have an infusion + match dur { + Some(duration) => { + let infusion = Infusion::new(time, amount, input, duration); + events.push(Event::Infusion(infusion)); + } + None => { + // If duration is None, we have a bolus + let bolus = Bolus::new(time, amount, input); + events.push(Event::Bolus(bolus)); + } + } + } + } + None => { + match dur { + Some(duration) => { + let infusion = Infusion::new(time, amount, input, duration); + events.push(Event::Infusion(infusion)); + } + None => { + // If duration is None, we have a bolus + let bolus = Bolus::new(time, amount, input); + events.push(Event::Bolus(bolus)); + } + } + } + } + + Ok(events) + } + // Unknown EVID + _ => Err(PmetricsError::UnknownEvid(row.evid)), + } + } +} + +/// Deserialize PmetricsRow data from a CSV file into the Data struct +/// +/// # Arguments +/// +/// * `path` - Path to the CSV file containing Pmetrics data +/// +/// # Returns +/// +/// Result with the deserialized Data struct or a PmetricsError +pub fn from_csv>(path: P) -> Result { + let file = File::open(path).map_err(|e| PmetricsError::CsvError(e.to_string()))?; + from_reader(file) +} + +/// Deserialize PmetricsRow data from a reader into the Data struct +/// +/// # Arguments +/// +/// * `rdr` - Any type that implements Read trait containing Pmetrics CSV data +/// +/// # Returns +/// +/// Result with the deserialized Data struct or a PmetricsError +pub fn from_reader(rdr: R) -> Result { + let mut csv_reader = csv::ReaderBuilder::new() + .has_headers(true) + .trim(csv::Trim::All) + .from_reader(rdr); + + // Prepare a HashMap to store subjects with their occasions + let mut subjects_map: HashMap< + String, + Vec<(usize, Vec, HashMap>)>, + > = HashMap::new(); + + // Track occasion indices for each subject + let mut subject_occasions: HashMap = HashMap::new(); + + // Process each row in the CSV file + for record_result in csv_reader.deserialize() { + let record: PmetricsRow = record_result?; + let subject_id = record.id.clone(); + + // Check if this is a new occasion based on EVID=4 + if record.evid == 4 { + // Increment the occasion index for this subject if EVID=4 + let current_idx = subject_occasions.get(&subject_id).copied().unwrap_or(0); + let new_idx = current_idx + 1; + subject_occasions.insert(subject_id.clone(), new_idx); + } else if !subject_occasions.contains_key(&subject_id) { + // First record for this subject + subject_occasions.insert(subject_id.clone(), 0); + } + + // Get the occasion for this subject + let occasion_idx = subject_occasions[&subject_id]; + + // Convert the PmetricsRow to events + let events = Vec::::try_from(record.clone())?; + + // Get or create the subject's data + let subject_data = subjects_map + .entry(subject_id.clone()) + .or_insert_with(Vec::new); + + // Find the occasion for this subject + let occasion_data = if let Some(occ) = subject_data + .iter_mut() + .find(|(idx, _, _)| *idx == occasion_idx) + { + occ + } else { + // Create a new occasion for this subject + subject_data.push((occasion_idx, Vec::new(), HashMap::new())); + subject_data.last_mut().unwrap() + }; + + // Add the events to the occasion + occasion_data.1.extend(events); + + // Process covariates + for (name, value) in record.covs { + let time_values = occasion_data.2.entry(name).or_insert_with(Vec::new); + time_values.push((record.time, value)); + } + } + + // Convert the collected data into Subject and Occasion objects + let mut data_subjects = Vec::new(); + + for (subject_id, occasions) in subjects_map { + let mut subject_occasions = Vec::new(); + + for (idx, events, covariate_map) in occasions { + // Convert covariate map to Covariates structure + let mut covariates = Covariates::new(); + + // Process each covariate name and its time-value pairs + for (name, mut time_points) in covariate_map { + if time_points.is_empty() { + continue; + } + + let mut covariate = Covariate::new(name.clone(), Vec::new()); + + // Sort time points by time + time_points.sort_by(|(t1, _), (t2, _)| t1.partial_cmp(t2).unwrap()); + + // Create segments between time points with linear interpolation + for i in 0..time_points.len() - 1 { + let (t1, v1) = time_points[i]; + let (t2, v2) = time_points[i + 1]; + + // Calculate linear interpolation parameters + let slope = (v2 - v1) / (t2 - t1); + let intercept = v1 - slope * t1; + + // Create and add the segment to the covariate + let segment = CovariateSegment::new( + t1, + t2, + InterpolationMethod::Linear { slope, intercept }, + ); + covariate.add_segment(segment); + } + + // Add a carry-forward segment for the last time point + if let Some(&(last_time, last_value)) = time_points.last() { + let segment = CovariateSegment::new( + last_time, + f64::INFINITY, + InterpolationMethod::CarryForward { value: last_value }, + ); + covariate.add_segment(segment); + } + + // Add the completed covariate to the covariates collection + covariates.add_covariate(name, covariate); + } + + let mut occasion = Occasion::new(events, covariates, idx); + occasion.sort(); + subject_occasions.push(occasion); + } + + let subject = Subject::new(subject_id, subject_occasions); + data_subjects.push(subject); + } + + Ok(Data::new(data_subjects)) +} + +impl Data { + pub fn read_pmetrics>(path: P) -> Result { + from_csv(path) + } + + pub fn read_pmetrics_csv_from_reader(rdr: R) -> Result { + from_reader(rdr) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Cursor; + + #[test] + fn test_parse_dose() { + let data = r#" + id,evid,time,dur,dose,addl,ii,input,out,outeq,c0,c1,c2,c3 + 1,1,0.0,0.0,100.0,0,0,1,0.0,1,0.0,0.0,0.0,0.0 + 1,2,24.0,,100.0,,24,,1.5,,, + 2,1,12.0,,50.0,,12,,2.5,,, + "#; + + let parsed_data: Vec = serde_json::from_str(data).unwrap(); + dbg!(&parsed_data); + assert_eq!(parsed_data.len(), 3); + } + + #[test] + fn test_deserialize_data_from_csv() { + // Create a sample CSV in Pmetrics format + let csv_data = "\ +ID,EVID,TIME,DUR,DOSE,ADDL,II,INPUT,OUT,OUTEQ,weight,age,C0,C1,C2,C3 +1,1,0.0,0,100.0,0,0,1,.,.,70,30,.,.,.,. +1,0,1.0,.,.,.,.,.,10.0,1,70,30,0.1,0,0,0 +1,0,2.0,.,.,.,.,.,8.0,1,72,30,0.1,0,0,0 +1,1,24.0,0,100.0,0,0,1,.,.,75,30,.,.,.,. +1,0,25.0,.,.,.,.,.,12.0,1,75,30,0.1,0,0,0 +1,0,26.0,.,.,.,.,.,9.0,1,75,30,0.1,0,0,0 +2,1,0.0,1.5,150.0,0,0,1,.,.,65,25,.,.,.,. +2,0,2.0,.,.,.,.,.,15.0,1,65,25,0.1,0,0,0 +2,0,4.0,.,.,.,.,.,12.0,1,67,25,0.1,0,0,0 +2,1,24.0,1.5,150.0,0,0,1,.,.,68,25,.,.,.,. +2,0,26.0,.,.,.,.,.,18.0,1,68,25,0.1,0,0,0 +2,0,28.0,.,.,.,.,.,14.0,1,68,25,0.1,0,0,0"; + + // Create a reader from the CSV data + let cursor = Cursor::new(csv_data); + + // Deserialize the data + let data = from_reader(cursor).unwrap(); + + // Verify the structure + assert_eq!(data.len(), 2, "Should have 2 subjects"); + + // Check subject 1 + let subject1 = data.get_subject("1").unwrap(); + assert_eq!( + subject1.occasions().len(), + 1, + "Subject 1 should have 1 occasion" + ); + + let occasion = subject1.occasions()[0]; + + // Check events + let events = occasion.events(); + assert_eq!(events.len(), 6, "Subject 1 should have 6 events"); + + // Check covariates + let covariates = occasion.get_covariates().unwrap(); + assert!( + covariates.get_covariate("weight").is_some(), + "Subject 1 should have weight covariate" + ); + assert!( + covariates.get_covariate("age").is_some(), + "Subject 1 should have age covariate" + ); + + // Check subject 2 + let subject2 = data.get_subject("2").unwrap(); + assert_eq!( + subject2.occasions().len(), + 1, + "Subject 2 should have 1 occasion" + ); + + let occasion = subject2.occasions()[0]; + + // Check events + let events = occasion.events(); + assert_eq!(events.len(), 6, "Subject 2 should have 6 events"); + + // Check covariates + let covariates = occasion.get_covariates().unwrap(); + assert!( + covariates.get_covariate("weight").is_some(), + "Subject 2 should have weight covariate" + ); + assert!( + covariates.get_covariate("age").is_some(), + "Subject 2 should have age covariate" + ); + + // Check specific covariate values + let weight_cov = covariates.get_covariate("weight").unwrap(); + assert_eq!( + weight_cov.interpolate(0.0).unwrap(), + 65.0, + "Subject 2 weight at time 0.0 should be 65.0" + ); + assert_eq!( + weight_cov.interpolate(24.0).unwrap(), + 68.0, + "Subject 2 weight at time 24.0 should be 68.0" + ); + } + + #[test] + fn test_deserialize_data_from_csv_occasions() { + // Create a sample CSV in Pmetrics format + let csv_data = "\ +ID,EVID,TIME,DUR,DOSE,ADDL,II,INPUT,OUT,OUTEQ,weight,age,C0,C1,C2,C3 +1,1,0.0,0,100.0,0,0,1,.,.,70,30,.,.,.,. +1,0,1.0,.,.,.,.,.,10.0,1,70,30,0.1,0,0,0 +1,0,2.0,.,.,.,.,.,8.0,1,72,30,0.1,0,0,0 +1,1,24.0,0,100.0,0,0,1,.,.,75,30,.,.,.,. +1,0,25.0,.,.,.,.,.,12.0,1,75,30,0.1,0,0,0 +1,0,26.0,.,.,.,.,.,9.0,1,75,30,0.1,0,0,0 +1,4,0.0,1.5,150.0,0,0,1,.,.,65,25,.,.,.,. +1,0,2.0,.,.,.,.,.,15.0,1,65,25,0.1,0,0,0 +1,0,4.0,.,.,.,.,.,12.0,1,67,25,0.1,0,0,0 +1,1,24.0,1.5,150.0,0,0,1,.,.,68,25,.,.,.,. +1,0,26.0,.,.,.,.,.,18.0,1,68,25,0.1,0,0,0 +1,0,28.0,.,.,.,.,.,14.0,1,68,25,0.1,0,0,0"; + + // Create a reader from the CSV data + let cursor = Cursor::new(csv_data); + + // Deserialize the data + let data = from_reader(cursor).unwrap(); + + // Verify the structure + assert_eq!(data.len(), 1, "Should have 1 subject"); + + let subject = data.get_subject("1").unwrap(); + assert_eq!( + subject.occasions().len(), + 2, + "Subject 1 should have 2 occasions" + ); + + assert_eq!( + subject.occasions()[0].events().len(), + 6, + "Occasion 1 should have 6 events" + ); + assert_eq!( + subject.occasions()[1].events().len(), + 6, + "Occasion 2 should have 6 events" + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index c72d0dde..8c86bf0b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,8 +15,7 @@ pub use std::collections::HashMap; pub mod prelude { pub mod data { pub use crate::data::{ - error_model::ErrorModel, parse_pmetrics::read_pmetrics, Covariates, Data, Event, - Occasion, Subject, + error_model::ErrorModel, Covariates, Data, Event, Occasion, Subject, }; } pub mod simulator {