diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8a9085b..6a68d07 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.13"] + python-version: ["3.12", "3.13"] steps: - name: Checkout repo @@ -20,19 +20,32 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Install R and dependencies + - name: Install R and dependencies (Python 3.13 only) + if: matrix.python-version == '3.13' run: | sudo apt-get update sudo apt-get install -y r-base r-base-dev libtirpc-dev - - name: Install R packages + - name: Install R packages (Python 3.13 only) + if: matrix.python-version == '3.13' run: | sudo Rscript -e 'install.packages("StatMatch", repos="https://cloud.r-project.org")' sudo Rscript -e 'install.packages("clue", repos="https://cloud.r-project.org")' - - name: Install Python dependencies + - name: Install full dependencies (Python 3.13) + if: matrix.python-version == '3.13' run: | uv pip install -e ".[dev,docs,matching,images]" --system - - name: Run tests with coverage + - name: Install minimal dependencies (Python 3.12) + if: matrix.python-version == '3.12' + run: | + uv pip install -e ".[dev]" --system + - name: Run full tests with coverage (Python 3.13) + if: matrix.python-version == '3.13' run: make test + - name: Run smoke test only (Python 3.12) + if: matrix.python-version == '3.12' + run: | + python -m pytest tests/test_smoke_qrf.py -v + python -m pytest tests/test_basic.py -v - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 with: diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index 13feb80..0b9fea9 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -24,7 +24,7 @@ jobs: strategy: matrix: os: [ ubuntu-latest ] - python-version: ["3.13"] + python-version: ["3.12", "3.13"] fail-fast: false runs-on: ${{ matrix.os }} steps: @@ -39,22 +39,35 @@ jobs: - name: Install slim version run: | uv pip install -e "." --system - - name: Install R and dependencies + - name: Install R and dependencies (Python 3.13 only) + if: matrix.python-version == '3.13' run: | sudo apt-get update sudo apt-get install -y r-base r-base-dev libtirpc-dev - - name: Install R packages + - name: Install R packages (Python 3.13 only) + if: matrix.python-version == '3.13' run: | sudo Rscript -e 'install.packages("StatMatch", repos="https://cloud.r-project.org")' sudo Rscript -e 'install.packages("clue", repos="https://cloud.r-project.org")' - - name: Install Python dependencies + - name: Install full test dependencies (Python 3.13) + if: matrix.python-version == '3.13' run: | uv pip install -e ".[dev,matching]" --system - - name: Run tests with coverage + - name: Install minimal test dependencies (Python 3.12) + if: matrix.python-version == '3.12' + run: | + uv pip install -e ".[dev]" --system + - name: Run full tests with coverage (Python 3.13) + if: matrix.python-version == '3.13' run: make test + - name: Run smoke test only (Python 3.12) + if: matrix.python-version == '3.12' + run: | + python -m pytest tests/test_smoke_qrf.py -v + python -m pytest tests/test_basic.py -v - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 with: file: ./coverage.xml fail_ci_if_error: false - verbose: true \ No newline at end of file + verbose: true diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..90f2583 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,15 @@ +- bump: minor + changes: + added: + - Support for Python 3.12 alongside Python 3.13 + - Python 3.12 to CI/CD test matrix for comprehensive testing + - Graceful handling of optional Matching module when R dependencies are unavailable + changed: + - Python version requirement from ">=3.13,<3.14" to ">=3.12,<3.14" + - Black formatter target versions to include both py312 and py313 + - GitHub Actions workflows to test against both Python 3.12 and 3.13 + - Python 3.12 CI tests to run minimal smoke test only (QRF basic functionality) + fixed: + - Issue where predict() returns DataFrame instead of Dict for single quantile in autoimpute + - Import errors when Matching module is not available due to missing R dependencies + - Unconditional import of rpy2-dependent modules in utils package causing test failures diff --git a/microimpute/comparisons/autoimpute.py b/microimpute/comparisons/autoimpute.py index 3e7cc74..867d629 100644 --- a/microimpute/comparisons/autoimpute.py +++ b/microimpute/comparisons/autoimpute.py @@ -21,7 +21,14 @@ VALIDATE_CONFIG, ) from microimpute.evaluations import cross_validate_model -from microimpute.models import * +from microimpute.models import OLS, QRF, Imputer, ImputerResults, QuantReg + +try: + from microimpute.models import Matching + + HAS_MATCHING = True +except ImportError: + HAS_MATCHING = False from microimpute.utils.data import preprocess_data log = logging.getLogger(__name__) @@ -284,7 +291,9 @@ def autoimpute( if not models: # If no models are provided, use default models - model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg, Matching] + model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg] + if HAS_MATCHING: + model_classes.append(Matching) else: model_classes = models @@ -485,6 +494,10 @@ def evaluate_model( imputing_data, quantiles=[imputation_q] ) + # Handle case where predict returns a DataFrame directly (single quantile) + if isinstance(imputations, pd.DataFrame): + imputations = {imputation_q: imputations} + if normalize_data: # Unnormalize the imputations mean = pd.Series( @@ -511,9 +524,7 @@ def evaluate_model( main_progress.set_description("Complete") main_progress.close() - median_imputations = final_imputations[ - 0.5 - ] # this may not work if we change the value of imputation_q + median_imputations = final_imputations[imputation_q] # Add the imputed variables to the receiver data try: missing_imputed_vars = [] diff --git a/microimpute/evaluations/cross_validation.py b/microimpute/evaluations/cross_validation.py index b93ee33..4ff122f 100644 --- a/microimpute/evaluations/cross_validation.py +++ b/microimpute/evaluations/cross_validation.py @@ -67,7 +67,13 @@ def cross_validate_model( RuntimeError: If cross-validation fails. """ # Set up parallel processing - n_jobs: Optional[int] = -1 + # Disable parallel processing for Matching (R/rpy2 doesn't work well with multiprocessing) + if Matching is not None and model_class == Matching: + n_jobs: Optional[int] = 1 # Sequential processing for R-based models + else: + n_jobs: Optional[int] = ( + -1 + ) # Parallel processing for Python-only models try: # Validate predictor and imputed variable columns exist diff --git a/microimpute/utils/__init__.py b/microimpute/utils/__init__.py index 2f694c7..e7e41ab 100644 --- a/microimpute/utils/__init__.py +++ b/microimpute/utils/__init__.py @@ -4,4 +4,10 @@ from .data import preprocess_data from .logging_utils import configure_logging -from .statmatch_hotdeck import nnd_hotdeck_using_rpy2 + +# Optional import for R-based functions +try: + from .statmatch_hotdeck import nnd_hotdeck_using_rpy2 +except ImportError: + # rpy2 is not available, matching functionality will be limited + nnd_hotdeck_using_rpy2 = None diff --git a/pyproject.toml b/pyproject.toml index 756f16d..86ca2df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ authors = [ { name = "María Juaristi", email = "juaristi@uni.minerva.edu" }, { name = "Nikhil Woodruff", email = "nikhil.woodruff@outlook.com" } ] -requires-python = ">=3.13,<3.14" +requires-python = ">=3.12,<3.14" dependencies = [ "numpy>=2.0.0,<3.0.0", "pandas>=2.2.0,<3.0.0", @@ -72,4 +72,4 @@ line_length = 79 [tool.black] line-length = 79 -target-version = ["py313"] \ No newline at end of file +target-version = ["py312", "py313"] diff --git a/tests/test_autoimpute.py b/tests/test_autoimpute.py index f327a82..9fe3301 100644 --- a/tests/test_autoimpute.py +++ b/tests/test_autoimpute.py @@ -8,6 +8,13 @@ from microimpute.comparisons.autoimpute import autoimpute from microimpute.visualizations.plotting import * +try: + from microimpute.models import Matching + + HAS_MATCHING = True +except ImportError: + HAS_MATCHING = False + def test_autoimpute_basic() -> None: """Test that autoimpute returns expected data structures.""" @@ -26,15 +33,16 @@ def test_autoimpute_basic() -> None: predictors = ["age", "sex", "bmi", "bp"] imputed_variables = ["s1", "bool"] + hyperparams = {"QRF": {"n_estimators": 100}} + if HAS_MATCHING: + hyperparams["Matching"] = {"constrained": True} + results = autoimpute( donor_data=diabetes_donor, receiver_data=diabetes_receiver, predictors=predictors, imputed_variables=imputed_variables, - hyperparameters={ - "QRF": {"n_estimators": 100}, - "Matching": {"constrained": True}, - }, + hyperparameters=hyperparams, log_level="INFO", ) diff --git a/tests/test_quantile_comparison.py b/tests/test_quantile_comparison.py index 6d759d4..fee3aea 100644 --- a/tests/test_quantile_comparison.py +++ b/tests/test_quantile_comparison.py @@ -19,7 +19,14 @@ from microimpute.comparisons import * from microimpute.config import RANDOM_STATE, VALID_YEARS -from microimpute.models import * +from microimpute.models import Imputer, OLS, QRF, QuantReg + +try: + from microimpute.models import Matching + + HAS_MATCHING = True +except ImportError: + HAS_MATCHING = False from microimpute.visualizations.plotting import * from microimpute.utils.data import preprocess_data @@ -41,7 +48,9 @@ def test_quantile_comparison_diabetes() -> None: Y_test: pd.DataFrame = X_test[imputed_variables] - model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg, Matching] + model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg] + if HAS_MATCHING: + model_classes.append(Matching) method_imputations = get_imputations( model_classes, X_train, X_test, predictors, imputed_variables ) @@ -96,7 +105,9 @@ def test_quantile_comparison_scf() -> None: Y_test: pd.DataFrame = X_test[IMPUTED_VARIABLES] - model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg, Matching] + model_classes: List[Type[Imputer]] = [QRF, OLS, QuantReg] + if HAS_MATCHING: + model_classes.append(Matching) method_imputations = get_imputations( model_classes, X_train, X_test, PREDICTORS, IMPUTED_VARIABLES ) diff --git a/tests/test_smoke_qrf.py b/tests/test_smoke_qrf.py new file mode 100644 index 0000000..b6be02b --- /dev/null +++ b/tests/test_smoke_qrf.py @@ -0,0 +1,64 @@ +""" +Minimal smoke test for Python 3.12 compatibility. +Tests only the core QRF functionality that PolicyEngine actually uses. +""" + +import pandas as pd +import numpy as np +from microimpute.models.qrf import QRF + + +def test_qrf_basic_usage(): + """Test basic QRF usage as PolicyEngine uses it.""" + # Create simple test data + np.random.seed(42) + n_samples = 100 + + X_train = pd.DataFrame( + { + "age": np.random.randint(18, 80, n_samples), + "income": np.random.randint(10000, 100000, n_samples), + "household_size": np.random.randint(1, 6, n_samples), + } + ) + X_train["benefits"] = X_train["income"] * 0.1 + np.random.normal( + 0, 1000, n_samples + ) + + predictors = ["age", "income", "household_size"] + imputed_variables = ["benefits"] + + # Test QRF instantiation with parameters PolicyEngine uses + qrf = QRF( + log_level="ERROR", # Suppress logs for smoke test + memory_efficient=True, + batch_size=10, + cleanup_interval=5, + ) + + # Test fit + fitted_model = qrf.fit( + X_train=X_train, + predictors=predictors, + imputed_variables=imputed_variables, + n_jobs=1, # Single thread as PolicyEngine uses + ) + + # Test predict + X_test = X_train.iloc[:10].copy() + predictions = fitted_model.predict(X_test=X_test) + + # Basic assertions + assert "benefits" in predictions, "Should have predictions for 'benefits'" + assert len(predictions["benefits"]) == len( + X_test + ), "Should have predictions for all test samples" + assert ( + not predictions["benefits"].isna().any() + ), "Should not have NaN predictions" + + print("✓ QRF smoke test passed") + + +if __name__ == "__main__": + test_qrf_basic_usage()