PolicyEngine · juaristi22 · Oct 24, 2025 · Oct 22, 2025 · Oct 22, 2025 · Oct 22, 2025
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -51,7 +51,7 @@ jobs:
         run: |
           python examples/pipeline.py
       - name: Upload microimputation results
-        if: always()
+        if: always() && matrix.python-version == '3.13'
         uses: actions/upload-artifact@v4
         with:
           name: microimputation-results-${{ github.sha }}

diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml
@@ -70,7 +70,7 @@ jobs:
         run: |
           python examples/pipeline.py
       - name: Upload microimputation results
-        if: always()
+        if: always() && matrix.python-version == '3.13'
         uses: actions/upload-artifact@v4
         with:
           name: microimputation-results-${{ github.sha }}

diff --git a/.gitignore b/.gitignore
@@ -76,6 +76,7 @@ celerybeat.pid
 
 # Ignore Data Files
 *.csv
+!microimputation-dashboard/**/*.csv
 *.jpg
 *.html
 *.h5

diff --git a/README.md b/README.md
@@ -1,7 +1,47 @@
 # Microimpute
 
-Microimpute enables variable imputation through different statistical methods. It facilitates comparison and benchmarking across methods through quantile loss calculations.
+Microimpute enables variable imputation through a variety of statistical methods. By providing a consistent interface across different imputation techniques, it allows researchers and data scientists to easily compare and benchmark different approaches using quantile loss and log loss calculations to determine the method providing most accurate results.
 
-To install, run pip install microimpute.
+## Features
 
-For image export functionality (PNG/JPG), install with: pip install microimpute[images]
+### Multiple imputation methods
+- **Statistical Matching**: Distance-based matching for finding similar observations
+- **Ordinary Least Squares (OLS)**: Linear regression-based imputation
+- **Quantile Regression**: Distribution-aware regression imputation
+- **Quantile Random Forests (QRF)**: Non-parametric forest-based approach
+
+### Automated method selection
+- **AutoImpute**: Automatically compares and selects the best imputation method for your data
+- **Cross-validation**: Built-in evaluation using quantile loss (numerical) and log loss (categorical)
+- **Variable type support**: Handles numerical, categorical, and boolean variables
+
+### Developer-friendly design
+- **Consistent API**: Standardized `fit()` and `predict()` interface across all models
+- **Extensible architecture**: Easy to implement custom imputation methods
+- **Weighted data handling**: Preserve data distributions with sample weights
+- **Input validation**: Automatic parameter and data validation
+
+### Interactive dashboard
+- **Visual exploration**: Analyze imputation results through interactive charts at https://microimpute-dashboard.vercel.app/
+- **GitHub integration**: Load artifacts directly from CI/CD workflows
+- **Multiple data sources**: File upload, URL loading and sample data
+
+## Installation
+
+```bash
+pip install microimpute
+```
+
+For image export functionality (PNG/JPG), install with:
+
+```bash
+pip install microimpute[images]
+```
+
+## Examples and documentation
+
+For detailed examples and interactive notebooks, see the [documentation](https://policyengine.github.io/microimpute/).
+
+## Contributing
+
+Contributions are welcome to the project. Please feel free to submit a Pull Request with your improvements.
diff --git a/changelog_entry.yaml b/changelog_entry.yaml
@@ -0,0 +1,5 @@
+- bump: minor
+  changes:
+    added:
+    - Links to dashboard in README.md and documentation.
+    - First dashboard visualizations.
diff --git a/docs/index.md b/docs/index.md
@@ -9,3 +9,19 @@ The framework currently supports the following imputation methods:
 - Quantile Regression
 
 This is a work in progress that may evolve over time, including new statistical imputation methods and features. 
+
+## Microimputation dashboard
+
+Users can visualize imputation and benchmarking results at https://microimpute-dashboard.vercel.app/.
+
+To use the dashboard for visualization, CSV files must contain the following columns in this exact order:
+- `type`: Type of metric (e.g., "benchmark_loss", "distribution_distance", "predictor_correlation")
+- `method`: Imputation method name (e.g., "QRF", "OLS", "QuantReg", "Matching")
+- `variable`: Variable being imputed or analyzed
+- `quantile`: Quantile level (numeric value, "mean", or "N/A")
+- `metric_name`: Name of the metric (e.g., "quantile_loss", "log_loss")
+- `metric_value`: Numeric value of the metric
+- `split`: Data split indicator (e.g., "train", "test", "full")
+- `additional_info`: JSON-formatted string with additional metadata
+
+Users can use the `format_csv()` function from `microimpute.utils` to automatically format imputation and benchmarking results into the correct structure for dashboard visualization. This function accepts outputs from various analysis functions (autoimpute results, comparison metrics, distribution comparisons, etc.) and returns a properly formatted DataFrame.
diff --git a/examples/pipeline.py b/examples/pipeline.py
@@ -75,9 +75,22 @@ def run_full_pipeline(output_path="microimpute_results.csv"):
     donor_data = diabetes_data.iloc[donor_indices].reset_index(drop=True)
     receiver_data = diabetes_data.iloc[receiver_indices].reset_index(drop=True)
 
+    # Create a categorical risk_factor variable based on cholesterol levels (s4)
+    # Categorize into low, medium, high based on s4 values
+    def categorize_risk(s4_value):
+        if s4_value < -0.02:
+            return "low"
+        elif s4_value < 0.02:
+            return "medium"
+        else:
+            return "high"
+
+    donor_data["risk_factor"] = donor_data["s4"].apply(categorize_risk)
+    receiver_data["risk_factor"] = receiver_data["s4"].apply(categorize_risk)
+
     # Define predictors and variables to impute
     predictors = ["age", "sex", "bmi", "bp"]
-    imputed_variables = ["s1", "s4"]
+    imputed_variables = ["s1", "s4", "risk_factor"]
 
     # Remove imputed variables from receiver data
     receiver_data_without_targets = receiver_data.drop(
@@ -88,6 +101,8 @@ def run_full_pipeline(output_path="microimpute_results.csv"):
     print(f"Receiver data shape: {receiver_data_without_targets.shape}")
     print(f"Predictors: {predictors}")
     print(f"Variables to impute: {imputed_variables}")
+    print(f"Risk factor distribution in donor data:")
+    print(donor_data["risk_factor"].value_counts())
     print()
 
     # ========================================================================
@@ -245,6 +260,8 @@ def run_full_pipeline(output_path="microimpute_results.csv"):
     print(f"  - Best imputation method: {best_method_name}")
     print(f"  - Number of predictors analyzed: {len(predictors)}")
     print(f"  - Number of imputed variables: {len(imputed_variables)}")
+    print(f"    - Numerical variables: s1, s4")
+    print(f"    - Categorical variables: risk_factor")
     print()
     print("Output CSV contains:")
     for result_type in formatted_df["type"].unique():

diff --git a/microimputation-dashboard/app/api/github/artifacts/route.ts b/microimputation-dashboard/app/api/github/artifacts/route.ts
@@ -0,0 +1,117 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+export async function GET(request: NextRequest) {
+    const searchParams = request.nextUrl.searchParams;
+    const repo = searchParams.get('repo');
+    const commitSha = searchParams.get('commit');
+
+    if (!repo || !commitSha) {
+        return NextResponse.json(
+            { error: 'Missing repo or commit parameter' },
+            { status: 400 }
+        );
+    }
+
+    const githubToken = process.env.GITHUB_TOKEN;
+    if (!githubToken) {
+        return NextResponse.json(
+            { error: 'GitHub token not configured on server' },
+            { status: 500 }
+        );
+    }
+
+    try {
+        const [owner, repoName] = repo.split('/');
+
+        // Get workflow runs for the commit
+        const runsResponse = await fetch(
+            `https://api.github.com/repos/${owner}/${repoName}/actions/runs?head_sha=${commitSha}`,
+            {
+                headers: {
+                    Authorization: `Bearer ${githubToken}`,
+                    Accept: 'application/vnd.github.v3+json',
+                    'User-Agent': 'PolicyEngine-Dashboard/1.0',
+                },
+            }
+        );
+
+        if (!runsResponse.ok) {
+            return NextResponse.json(
+                { error: `GitHub API error: ${runsResponse.status}` },
+                { status: runsResponse.status }
+            );
+        }
+
+        const runsData = await runsResponse.json();
+        const runs = runsData.workflow_runs;
+
+        if (!runs || runs.length === 0) {
+            return NextResponse.json([]);
+        }
+
+        // Collect all imputation artifacts from completed runs
+        const allArtifacts = [];
+
+        for (const run of runs) {
+            if (run.status !== 'completed') continue;
+
+            try {
+                const artifactsResponse = await fetch(
+                    `https://api.github.com/repos/${owner}/${repoName}/actions/runs/${run.id}/artifacts`,
+                    {
+                        headers: {
+                            Authorization: `Bearer ${githubToken}`,
+                            Accept: 'application/vnd.github.v3+json',
+                            'User-Agent': 'PolicyEngine-Dashboard/1.0',
+                        },
+                    }
+                );
+
+                if (!artifactsResponse.ok) continue;
+
+                const artifactsData = await artifactsResponse.json();
+                const artifacts = artifactsData.artifacts;
+
+                // Filter for imputation artifacts
+                const imputationArtifacts = artifacts.filter(
+                    (artifact: { name: string }) =>
+                        artifact.name.toLowerCase().includes('impute') ||
+                        artifact.name
+                            .toLowerCase()
+                            .includes('imputation') ||
+                        artifact.name.toLowerCase().includes('result') ||
+                        artifact.name.toLowerCase().includes('.csv')
+                );
+
+                allArtifacts.push(...imputationArtifacts);
+            } catch {
+                continue;
+            }
+        }
+
+        // Remove duplicates and sort by creation date (newest first)
+        const uniqueArtifacts = allArtifacts
+            .filter(
+                (artifact: { name: string }, index: number, self: Array<{ name: string }>) =>
+                    index ===
+                    self.findIndex((a: { name: string }) => a.name === artifact.name)
+            )
+            .sort(
+                (a: { created_at: string }, b: { created_at: string }) =>
+                    new Date(b.created_at).getTime() -
+                    new Date(a.created_at).getTime()
+            );
+
+        return NextResponse.json(uniqueArtifacts);
+    } catch (error) {
+        return NextResponse.json(
+            {
+                error:
+                    error instanceof Error
+                        ? error.message
+                        : 'Unknown error',
+            },
+            { status: 500 }
+        );
+    }
+}
diff --git a/microimputation-dashboard/app/api/github/branches/route.ts b/microimputation-dashboard/app/api/github/branches/route.ts
@@ -0,0 +1,77 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+export async function GET(request: NextRequest) {
+    const searchParams = request.nextUrl.searchParams;
+    const repo = searchParams.get('repo');
+
+    if (!repo) {
+        return NextResponse.json(
+            { error: 'Missing repo parameter' },
+            { status: 400 }
+        );
+    }
+
+    const githubToken = process.env.GITHUB_TOKEN;
+    if (!githubToken) {
+        return NextResponse.json(
+            { error: 'GitHub token not configured on server' },
+            { status: 500 }
+        );
+    }
+
+    try {
+        const allBranches = [];
+        let page = 1;
+        const perPage = 100;
+
+        while (true) {
+            const response = await fetch(
+                `https://api.github.com/repos/${repo}/branches?per_page=${perPage}&page=${page}`,
+                {
+                    headers: {
+                        Authorization: `Bearer ${githubToken}`,
+                        Accept: 'application/vnd.github.v3+json',
+                        'User-Agent': 'PolicyEngine-Dashboard/1.0',
+                    },
+                }
+            );
+
+            if (!response.ok) {
+                return NextResponse.json(
+                    { error: `GitHub API error: ${response.status}` },
+                    { status: response.status }
+                );
+            }
+
+            const branches = await response.json();
+
+            if (branches.length === 0) {
+                break;
+            }
+
+            allBranches.push(...branches);
+
+            if (branches.length < perPage) {
+                break;
+            }
+
+            page++;
+
+            if (page > 10) {
+                break;
+            }
+        }
+
+        return NextResponse.json(allBranches);
+    } catch (error) {
+        return NextResponse.json(
+            {
+                error:
+                    error instanceof Error
+                        ? error.message
+                        : 'Unknown error',
+            },
+            { status: 500 }
+        );
+    }
+}
diff --git a/microimputation-dashboard/app/api/github/commits/route.ts b/microimputation-dashboard/app/api/github/commits/route.ts
@@ -0,0 +1,55 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+export async function GET(request: NextRequest) {
+    const searchParams = request.nextUrl.searchParams;
+    const repo = searchParams.get('repo');
+    const branch = searchParams.get('branch');
+
+    if (!repo || !branch) {
+        return NextResponse.json(
+            { error: 'Missing repo or branch parameter' },
+            { status: 400 }
+        );
+    }
+
+    const githubToken = process.env.GITHUB_TOKEN;
+    if (!githubToken) {
+        return NextResponse.json(
+            { error: 'GitHub token not configured on server' },
+            { status: 500 }
+        );
+    }
+
+    try {
+        const response = await fetch(
+            `https://api.github.com/repos/${repo}/commits?sha=${branch}&per_page=20`,
+            {
+                headers: {
+                    Authorization: `Bearer ${githubToken}`,
+                    Accept: 'application/vnd.github.v3+json',
+                    'User-Agent': 'PolicyEngine-Dashboard/1.0',
+                },
+            }
+        );
+
+        if (!response.ok) {
+            return NextResponse.json(
+                { error: `GitHub API error: ${response.status}` },
+                { status: response.status }
+            );
+        }
+
+        const commits = await response.json();
+        return NextResponse.json(commits);
+    } catch (error) {
+        return NextResponse.json(
+            {
+                error:
+                    error instanceof Error
+                        ? error.message
+                        : 'Unknown error',
+            },
+            { status: 500 }
+        );
+    }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -76,6 +76,7 @@ celerybeat.pid @@
     # Ignore Data Files
     *.csv
+    !microimputation-dashboard/**/*.csv
     *.jpg
     *.html
     *.h5
@@ Expand Down @@