Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.13"]
python-version: ["3.12", "3.13"]

steps:
- name: Checkout repo
Expand All @@ -20,19 +20,32 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install R and dependencies
- name: Install R and dependencies (Python 3.13 only)
if: matrix.python-version == '3.13'
run: |
sudo apt-get update
sudo apt-get install -y r-base r-base-dev libtirpc-dev
- name: Install R packages
- name: Install R packages (Python 3.13 only)
if: matrix.python-version == '3.13'
run: |
sudo Rscript -e 'install.packages("StatMatch", repos="https://cloud.r-project.org")'
sudo Rscript -e 'install.packages("clue", repos="https://cloud.r-project.org")'
- name: Install Python dependencies
- name: Install full dependencies (Python 3.13)
if: matrix.python-version == '3.13'
run: |
uv pip install -e ".[dev,docs,matching,images]" --system
- name: Run tests with coverage
- name: Install minimal dependencies (Python 3.12)
if: matrix.python-version == '3.12'
run: |
uv pip install -e ".[dev]" --system
- name: Run full tests with coverage (Python 3.13)
if: matrix.python-version == '3.13'
run: make test
- name: Run smoke test only (Python 3.12)
if: matrix.python-version == '3.12'
run: |
python -m pytest tests/test_smoke_qrf.py -v
python -m pytest tests/test_basic.py -v
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
Expand Down
25 changes: 19 additions & 6 deletions .github/workflows/pr_code_changes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
strategy:
matrix:
os: [ ubuntu-latest ]
python-version: ["3.13"]
python-version: ["3.12", "3.13"]
fail-fast: false
runs-on: ${{ matrix.os }}
steps:
Expand All @@ -39,22 +39,35 @@ jobs:
- name: Install slim version
run: |
uv pip install -e "." --system
- name: Install R and dependencies
- name: Install R and dependencies (Python 3.13 only)
if: matrix.python-version == '3.13'
run: |
sudo apt-get update
sudo apt-get install -y r-base r-base-dev libtirpc-dev
- name: Install R packages
- name: Install R packages (Python 3.13 only)
if: matrix.python-version == '3.13'
run: |
sudo Rscript -e 'install.packages("StatMatch", repos="https://cloud.r-project.org")'
sudo Rscript -e 'install.packages("clue", repos="https://cloud.r-project.org")'
- name: Install Python dependencies
- name: Install full test dependencies (Python 3.13)
if: matrix.python-version == '3.13'
run: |
uv pip install -e ".[dev,matching]" --system
- name: Run tests with coverage
- name: Install minimal test dependencies (Python 3.12)
if: matrix.python-version == '3.12'
run: |
uv pip install -e ".[dev]" --system
- name: Run full tests with coverage (Python 3.13)
if: matrix.python-version == '3.13'
run: make test
- name: Run smoke test only (Python 3.12)
if: matrix.python-version == '3.12'
run: |
python -m pytest tests/test_smoke_qrf.py -v
python -m pytest tests/test_basic.py -v
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
fail_ci_if_error: false
verbose: true
verbose: true
15 changes: 15 additions & 0 deletions changelog_entry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
- bump: minor
changes:
added:
- Support for Python 3.12 alongside Python 3.13
- Python 3.12 to CI/CD test matrix for comprehensive testing
- Graceful handling of optional Matching module when R dependencies are unavailable
changed:
- Python version requirement from ">=3.13,<3.14" to ">=3.12,<3.14"
- Black formatter target versions to include both py312 and py313
- GitHub Actions workflows to test against both Python 3.12 and 3.13
- Python 3.12 CI tests to run minimal smoke test only (QRF basic functionality)
fixed:
- Issue where predict() returns DataFrame instead of Dict for single quantile in autoimpute
- Import errors when Matching module is not available due to missing R dependencies
- Unconditional import of rpy2-dependent modules in utils package causing test failures
21 changes: 16 additions & 5 deletions microimpute/comparisons/autoimpute.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,14 @@
VALIDATE_CONFIG,
)
from microimpute.evaluations import cross_validate_model
from microimpute.models import *
from microimpute.models import OLS, QRF, Imputer, ImputerResults, QuantReg

try:
from microimpute.models import Matching

HAS_MATCHING = True
except ImportError:
HAS_MATCHING = False
from microimpute.utils.data import preprocess_data

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -284,7 +291,9 @@ def autoimpute(

if not models:
# If no models are provided, use default models
model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg, Matching]
model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg]
if HAS_MATCHING:
model_classes.append(Matching)
else:
model_classes = models

Expand Down Expand Up @@ -485,6 +494,10 @@ def evaluate_model(
imputing_data, quantiles=[imputation_q]
)

# Handle case where predict returns a DataFrame directly (single quantile)
if isinstance(imputations, pd.DataFrame):
imputations = {imputation_q: imputations}

if normalize_data:
# Unnormalize the imputations
mean = pd.Series(
Expand All @@ -511,9 +524,7 @@ def evaluate_model(
main_progress.set_description("Complete")
main_progress.close()

median_imputations = final_imputations[
0.5
] # this may not work if we change the value of imputation_q
median_imputations = final_imputations[imputation_q]
# Add the imputed variables to the receiver data
try:
missing_imputed_vars = []
Expand Down
8 changes: 7 additions & 1 deletion microimpute/evaluations/cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,13 @@ def cross_validate_model(
RuntimeError: If cross-validation fails.
"""
# Set up parallel processing
n_jobs: Optional[int] = -1
# Disable parallel processing for Matching (R/rpy2 doesn't work well with multiprocessing)
if Matching is not None and model_class == Matching:
n_jobs: Optional[int] = 1 # Sequential processing for R-based models
else:
n_jobs: Optional[int] = (
-1
) # Parallel processing for Python-only models

try:
# Validate predictor and imputed variable columns exist
Expand Down
8 changes: 7 additions & 1 deletion microimpute/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,10 @@

from .data import preprocess_data
from .logging_utils import configure_logging
from .statmatch_hotdeck import nnd_hotdeck_using_rpy2

# Optional import for R-based functions
try:
from .statmatch_hotdeck import nnd_hotdeck_using_rpy2
except ImportError:
# rpy2 is not available, matching functionality will be limited
nnd_hotdeck_using_rpy2 = None
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ authors = [
{ name = "María Juaristi", email = "juaristi@uni.minerva.edu" },
{ name = "Nikhil Woodruff", email = "nikhil.woodruff@outlook.com" }
]
requires-python = ">=3.13,<3.14"
requires-python = ">=3.12,<3.14"
dependencies = [
"numpy>=2.0.0,<3.0.0",
"pandas>=2.2.0,<3.0.0",
Expand Down Expand Up @@ -72,4 +72,4 @@ line_length = 79

[tool.black]
line-length = 79
target-version = ["py313"]
target-version = ["py312", "py313"]
16 changes: 12 additions & 4 deletions tests/test_autoimpute.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@
from microimpute.comparisons.autoimpute import autoimpute
from microimpute.visualizations.plotting import *

try:
from microimpute.models import Matching

HAS_MATCHING = True
except ImportError:
HAS_MATCHING = False


def test_autoimpute_basic() -> None:
"""Test that autoimpute returns expected data structures."""
Expand All @@ -26,15 +33,16 @@ def test_autoimpute_basic() -> None:
predictors = ["age", "sex", "bmi", "bp"]
imputed_variables = ["s1", "bool"]

hyperparams = {"QRF": {"n_estimators": 100}}
if HAS_MATCHING:
hyperparams["Matching"] = {"constrained": True}

results = autoimpute(
donor_data=diabetes_donor,
receiver_data=diabetes_receiver,
predictors=predictors,
imputed_variables=imputed_variables,
hyperparameters={
"QRF": {"n_estimators": 100},
"Matching": {"constrained": True},
},
hyperparameters=hyperparams,
log_level="INFO",
)

Expand Down
17 changes: 14 additions & 3 deletions tests/test_quantile_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@

from microimpute.comparisons import *
from microimpute.config import RANDOM_STATE, VALID_YEARS
from microimpute.models import *
from microimpute.models import Imputer, OLS, QRF, QuantReg

try:
from microimpute.models import Matching

HAS_MATCHING = True
except ImportError:
HAS_MATCHING = False
from microimpute.visualizations.plotting import *
from microimpute.utils.data import preprocess_data

Expand All @@ -41,7 +48,9 @@ def test_quantile_comparison_diabetes() -> None:

Y_test: pd.DataFrame = X_test[imputed_variables]

model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg, Matching]
model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg]
if HAS_MATCHING:
model_classes.append(Matching)
method_imputations = get_imputations(
model_classes, X_train, X_test, predictors, imputed_variables
)
Expand Down Expand Up @@ -96,7 +105,9 @@ def test_quantile_comparison_scf() -> None:

Y_test: pd.DataFrame = X_test[IMPUTED_VARIABLES]

model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg, Matching]
model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg]
if HAS_MATCHING:
model_classes.append(Matching)
method_imputations = get_imputations(
model_classes, X_train, X_test, PREDICTORS, IMPUTED_VARIABLES
)
Expand Down
64 changes: 64 additions & 0 deletions tests/test_smoke_qrf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
Minimal smoke test for Python 3.12 compatibility.
Tests only the core QRF functionality that PolicyEngine actually uses.
"""

import pandas as pd
import numpy as np
from microimpute.models.qrf import QRF


def test_qrf_basic_usage():
"""Test basic QRF usage as PolicyEngine uses it."""
# Create simple test data
np.random.seed(42)
n_samples = 100

X_train = pd.DataFrame(
{
"age": np.random.randint(18, 80, n_samples),
"income": np.random.randint(10000, 100000, n_samples),
"household_size": np.random.randint(1, 6, n_samples),
}
)
X_train["benefits"] = X_train["income"] * 0.1 + np.random.normal(
0, 1000, n_samples
)

predictors = ["age", "income", "household_size"]
imputed_variables = ["benefits"]

# Test QRF instantiation with parameters PolicyEngine uses
qrf = QRF(
log_level="ERROR", # Suppress logs for smoke test
memory_efficient=True,
batch_size=10,
cleanup_interval=5,
)

# Test fit
fitted_model = qrf.fit(
X_train=X_train,
predictors=predictors,
imputed_variables=imputed_variables,
n_jobs=1, # Single thread as PolicyEngine uses
)

# Test predict
X_test = X_train.iloc[:10].copy()
predictions = fitted_model.predict(X_test=X_test)

# Basic assertions
assert "benefits" in predictions, "Should have predictions for 'benefits'"
assert len(predictions["benefits"]) == len(
X_test
), "Should have predictions for all test samples"
assert (
not predictions["benefits"].isna().any()
), "Should not have NaN predictions"

print("✓ QRF smoke test passed")


if __name__ == "__main__":
test_qrf_basic_usage()