diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4d87368..aa44d11 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,9 +7,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Install Rust - uses: hecrj/setup-rust-action@v1 + uses: dtolnay/rust-toolchain@stable with: - rust-version: stable + components: clippy, rustfmt - uses: actions/checkout@v3 - name: Check formatting run: cargo fmt -- --check @@ -23,7 +23,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest] - rust: [stable, 1.70.0] + rust: [stable, 1.81.0] steps: - name: Install Rust uses: hecrj/setup-rust-action@v1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 8bd8fbf..f5097c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Versioning](https://semver.org/spec/v2.0.0.html). ### Changed +- Requires Rust 1.81 or newer. - Updated arrow to version 54. ## [0.15.0] - 2024-12-10 diff --git a/Cargo.toml b/Cargo.toml index f3f5f6a..b8c17ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ authors = [ "Sehkone Kim ", ] edition = "2021" -rust-version = "1.70" +rust-version = "1.81" description = "Data structures to handle large, structured data." homepage = "https://github.com/petabi/structured" repository = "https://github.com/petabi/structured" diff --git a/src/csv/reader.rs b/src/csv/reader.rs index 31e5d82..97abd17 100644 --- a/src/csv/reader.rs +++ b/src/csv/reader.rs @@ -1,4 +1,8 @@ -use crate::record; +use std::fmt; +use std::io::{BufRead, BufReader, Read}; +use std::str::{self, FromStr}; +use std::sync::Arc; + use arrow::array::{Array, BinaryBuilder, PrimitiveBuilder, StringBuilder}; use arrow::datatypes::{ ArrowPrimitiveType, DataType, Field, Float64Type, Int64Type, Schema, UInt32Type, @@ -6,10 +10,8 @@ use arrow::datatypes::{ use arrow::error::ArrowError; use csv_core::ReadRecordResult; use serde::{Deserialize, Serialize}; -use std::fmt; -use std::io::{BufRead, BufReader, Read}; -use std::str::{self, FromStr}; -use std::sync::Arc; + +use crate::record; pub struct Record { fields: Vec, @@ -34,7 +36,7 @@ impl Record { outlen += nout; endlen += nend; match res { - ReadRecordResult::InputEmpty => continue, + ReadRecordResult::InputEmpty => {} ReadRecordResult::OutputFull => { fields.resize(std::cmp::max(4, fields.len().checked_mul(2).unwrap()), 0); } @@ -72,7 +74,7 @@ impl Record { outlen += nout; endlen += nend; match res { - ReadRecordResult::InputEmpty => continue, + ReadRecordResult::InputEmpty => {} ReadRecordResult::OutputFull => { fields.resize(std::cmp::max(4, fields.len().checked_mul(2).unwrap()), 0); } @@ -471,20 +473,22 @@ pub fn infer_schema(reader: &mut BufReader) -> Result (Vec>, Vec) { let c0_v: Vec = vec![1, 3, 3, 5, 2, 1, 3]; let c1_v: Vec<_> = vec!["111a qwer", "b", "c", "d", "b", "111a qwer", "111a qwer"]; let c2_v: Vec = vec![ - Ipv4Addr::new(127, 0, 0, 1), + Ipv4Addr::LOCALHOST, Ipv4Addr::new(127, 0, 0, 2), Ipv4Addr::new(127, 0, 0, 3), Ipv4Addr::new(127, 0, 0, 4), diff --git a/src/record.rs b/src/record.rs index e3c59c6..aaf7776 100644 --- a/src/record.rs +++ b/src/record.rs @@ -1,8 +1,9 @@ //! Definitions to help handling CSV data as a set of records. -use arrow::array::Array; use std::sync::Arc; +use arrow::array::Array; + /// A batch of multi-field data. #[derive(Clone)] pub struct Batch { diff --git a/src/stats.rs b/src/stats.rs index c3d2a87..e4479bd 100644 --- a/src/stats.rs +++ b/src/stats.rs @@ -1,14 +1,15 @@ -use arrow::datatypes::{Float64Type, Int64Type, UInt32Type, UInt64Type}; -use chrono::{DateTime, NaiveDateTime}; -use num_traits::ToPrimitive; -use serde::{Deserialize, Serialize}; -use statistical::{mean, population_standard_deviation}; use std::collections::HashMap; use std::fmt; use std::hash::Hash; use std::iter::Iterator; use std::net::{IpAddr, Ipv4Addr}; +use arrow::datatypes::{Float64Type, Int64Type, UInt32Type, UInt64Type}; +use chrono::{DateTime, NaiveDateTime}; +use num_traits::ToPrimitive; +use serde::{Deserialize, Serialize}; +use statistical::{mean, population_standard_deviation}; + use crate::table::{Column, ColumnType}; const MAX_TIME_INTERVAL: u32 = 86_400; // one day in seconds @@ -628,11 +629,12 @@ where #[cfg(test)] mod tests { - use super::*; - use crate::Column; use arrow::datatypes::Int64Type; use chrono::NaiveDate; + use super::*; + use crate::Column; + #[test] fn test_convert_time_intervals() { let c4_v: Vec = vec![ diff --git a/src/table.rs b/src/table.rs index 546d6fa..03f5a29 100644 --- a/src/table.rs +++ b/src/table.rs @@ -1,3 +1,10 @@ +use std::collections::HashMap; +use std::iter::{Flatten, Iterator}; +use std::marker::PhantomData; +use std::slice; +use std::sync::Arc; +use std::vec; + use arrow::array::{ Array, BinaryArray, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, PrimitiveArray, PrimitiveBuilder, StringArray, UInt16Array, UInt32Array, UInt64Array, @@ -6,12 +13,6 @@ use arrow::array::{ use arrow::datatypes::{ArrowPrimitiveType, DataType, Int64Type, Schema, TimeUnit}; use num_traits::ToPrimitive; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::iter::{Flatten, Iterator}; -use std::marker::PhantomData; -use std::slice; -use std::sync::Arc; -use std::vec; use strum_macros::EnumString; use crate::stats::{ @@ -36,7 +37,6 @@ pub enum ColumnType { } impl From for DataType { - #[must_use] fn from(ct: ColumnType) -> Self { match ct { ColumnType::Int64 => Self::Int64, @@ -105,7 +105,7 @@ where } /// Returns an `Iterator` for columns. - pub fn columns(&self) -> slice::Iter { + pub fn columns(&self) -> slice::Iter<'_, Column> { self.columns.iter() } @@ -485,7 +485,6 @@ impl Column { } impl PartialEq for Column { - #[must_use] fn eq(&self, other: &Self) -> bool { let data_type = match (self.arrays.first(), other.arrays.first()) { (Some(x_arr), Some(y_arr)) => { @@ -569,7 +568,6 @@ impl PartialEq for Column { } impl From> for Column { - #[must_use] fn from(array: Arc) -> Self { let len = array.len(); Self { @@ -658,13 +656,15 @@ impl<'a> Iterator for StringIter<'a, '_> { #[cfg(test)] mod tests { - use super::*; - use crate::Column; + use std::hash::{Hash, Hasher}; + use std::net::{IpAddr, Ipv4Addr}; + use ahash::AHasher; use arrow::datatypes::{Field, Float64Type, UInt32Type, UInt64Type}; use chrono::NaiveDate; - use std::hash::{Hash, Hasher}; - use std::net::{IpAddr, Ipv4Addr}; + + use super::*; + use crate::Column; fn hash(seq: &str) -> u64 { let mut hasher = AHasher::default(); @@ -815,7 +815,7 @@ mod tests { let c0_v: Vec = vec![1, 3, 3, 5, 2, 1, 3]; let c1_v: Vec<_> = vec!["111a qwer", "b", "c", "d", "b", "111a qwer", "111a qwer"]; let c2_v: Vec = vec![ - Ipv4Addr::new(127, 0, 0, 1).into(), + Ipv4Addr::LOCALHOST.into(), Ipv4Addr::new(127, 0, 0, 2).into(), Ipv4Addr::new(127, 0, 0, 3).into(), Ipv4Addr::new(127, 0, 0, 4).into(),