From 25982932c565413cba4ce408d96f448a63f2d5e9 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Wed, 22 Oct 2025 14:41:29 +0800 Subject: [PATCH 01/12] add dashboard link to documentation --- .github/workflows/main.yml | 2 +- .github/workflows/pr_code_changes.yaml | 2 +- README.md | 46 +- changelog_entry.yaml | 4 + docs/index.md | 16 + .../app/api/github/artifacts/route.ts | 117 +++++ .../app/api/github/branches/route.ts | 77 ++++ .../app/api/github/commits/route.ts | 55 +++ .../app/api/github/download/route.ts | 59 +++ microimputation-dashboard/app/page.tsx | 1 + .../components/FileUpload.tsx | 430 +++++------------- microimputation-dashboard/package.json | 2 +- 12 files changed, 478 insertions(+), 333 deletions(-) create mode 100644 microimputation-dashboard/app/api/github/artifacts/route.ts create mode 100644 microimputation-dashboard/app/api/github/branches/route.ts create mode 100644 microimputation-dashboard/app/api/github/commits/route.ts create mode 100644 microimputation-dashboard/app/api/github/download/route.ts diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6f5b968..6a82c25 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -51,7 +51,7 @@ jobs: run: | python examples/pipeline.py - name: Upload microimputation results - if: always() + if: always() && matrix.python-version == '3.13' uses: actions/upload-artifact@v4 with: name: microimputation-results-${{ github.sha }} diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index a2118b2..c8e2ce8 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -70,7 +70,7 @@ jobs: run: | python examples/pipeline.py - name: Upload microimputation results - if: always() + if: always() && matrix.python-version == '3.13' uses: actions/upload-artifact@v4 with: name: microimputation-results-${{ github.sha }} diff --git a/README.md b/README.md index 63cf263..e9b83bb 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,47 @@ # Microimpute -Microimpute enables variable imputation through different statistical methods. It facilitates comparison and benchmarking across methods through quantile loss calculations. +Microimpute enables variable imputation through a variety of statistical methods. By providing a consistent interface across different imputation techniques, it allows researchers and data scientists to easily compare and benchmark different approaches using quantile loss and log loss calculations to determine the method providing most accurate results. -To install, run pip install microimpute. +## Features -For image export functionality (PNG/JPG), install with: pip install microimpute[images] +### Multiple imputation methods +- **Statistical Matching**: Distance-based matching for finding similar observations +- **Ordinary Least Squares (OLS)**: Linear regression-based imputation +- **Quantile Regression**: Distribution-aware regression imputation +- **Quantile Random Forests (QRF)**: Non-parametric forest-based approach + +### Automated method selection +- **AutoImpute**: Automatically compares and selects the best imputation method for your data +- **Cross-validation**: Built-in evaluation using quantile loss (numerical) and log loss (categorical) +- **Variable type support**: Handles numerical, categorical, and boolean variables + +### Developer-friendly design +- **Consistent API**: Standardized `fit()` and `predict()` interface across all models +- **Extensible architecture**: Easy to implement custom imputation methods +- **Weighted data handling**: Preserve data distributions with sample weights +- **Input validation**: Automatic parameter and data validation + +### Interactive dashboard +- **Visual exploration**: Analyze imputation results through interactive charts at https://microimpute-dashboard.vercel.app/ +- **GitHub integration**: Load artifacts directly from CI/CD workflows +- **Multiple data sources**: File upload, URL loading and sample data + +## Installation + +```bash +pip install microimpute +``` + +For image export functionality (PNG/JPG), install with: + +```bash +pip install microimpute[images] +``` + +## Examples and documentation + +For detailed examples and interactive notebooks, see the [documentation](https://policyengine.github.io/microimpute/). + +## Contributing + +Contributions are welcome to the project. Please feel free to submit a Pull Request with your improvements. diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..9670cc6 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: minor + changes: + added: + - Links to dashboard in README.md and documentation. diff --git a/docs/index.md b/docs/index.md index dfa8a83..391c20c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,3 +9,19 @@ The framework currently supports the following imputation methods: - Quantile Regression This is a work in progress that may evolve over time, including new statistical imputation methods and features. + +## Microimputation dashboard + +Users can visualize imputation and benchmarking results at https://microimpute-dashboard.vercel.app/. + +To use the dashboard for visualization, CSV files must contain the following columns in this exact order: +- `type`: Type of metric (e.g., "benchmark_loss", "distribution_distance", "predictor_correlation") +- `method`: Imputation method name (e.g., "QRF", "OLS", "QuantReg", "Matching") +- `variable`: Variable being imputed or analyzed +- `quantile`: Quantile level (numeric value, "mean", or "N/A") +- `metric_name`: Name of the metric (e.g., "quantile_loss", "log_loss") +- `metric_value`: Numeric value of the metric +- `split`: Data split indicator (e.g., "train", "test", "full") +- `additional_info`: JSON-formatted string with additional metadata + +Users can use the `format_csv()` function from `microimpute.utils` to automatically format imputation and benchmarking results into the correct structure for dashboard visualization. This function accepts outputs from various analysis functions (autoimpute results, comparison metrics, distribution comparisons, etc.) and returns a properly formatted DataFrame. \ No newline at end of file diff --git a/microimputation-dashboard/app/api/github/artifacts/route.ts b/microimputation-dashboard/app/api/github/artifacts/route.ts new file mode 100644 index 0000000..d071169 --- /dev/null +++ b/microimputation-dashboard/app/api/github/artifacts/route.ts @@ -0,0 +1,117 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function GET(request: NextRequest) { + const searchParams = request.nextUrl.searchParams; + const repo = searchParams.get('repo'); + const commitSha = searchParams.get('commit'); + + if (!repo || !commitSha) { + return NextResponse.json( + { error: 'Missing repo or commit parameter' }, + { status: 400 } + ); + } + + const githubToken = process.env.GITHUB_TOKEN; + if (!githubToken) { + return NextResponse.json( + { error: 'GitHub token not configured on server' }, + { status: 500 } + ); + } + + try { + const [owner, repoName] = repo.split('/'); + + // Get workflow runs for the commit + const runsResponse = await fetch( + `https://api.github.com/repos/${owner}/${repoName}/actions/runs?head_sha=${commitSha}`, + { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + } + ); + + if (!runsResponse.ok) { + return NextResponse.json( + { error: `GitHub API error: ${runsResponse.status}` }, + { status: runsResponse.status } + ); + } + + const runsData = await runsResponse.json(); + const runs = runsData.workflow_runs; + + if (!runs || runs.length === 0) { + return NextResponse.json([]); + } + + // Collect all imputation artifacts from completed runs + const allArtifacts = []; + + for (const run of runs) { + if (run.status !== 'completed') continue; + + try { + const artifactsResponse = await fetch( + `https://api.github.com/repos/${owner}/${repoName}/actions/runs/${run.id}/artifacts`, + { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + } + ); + + if (!artifactsResponse.ok) continue; + + const artifactsData = await artifactsResponse.json(); + const artifacts = artifactsData.artifacts; + + // Filter for imputation artifacts + const imputationArtifacts = artifacts.filter( + (artifact: { name: string }) => + artifact.name.toLowerCase().includes('impute') || + artifact.name + .toLowerCase() + .includes('imputation') || + artifact.name.toLowerCase().includes('result') || + artifact.name.toLowerCase().includes('.csv') + ); + + allArtifacts.push(...imputationArtifacts); + } catch { + continue; + } + } + + // Remove duplicates and sort by creation date (newest first) + const uniqueArtifacts = allArtifacts + .filter( + (artifact: { name: string }, index: number, self: any[]) => + index === + self.findIndex((a: { name: string }) => a.name === artifact.name) + ) + .sort( + (a: { created_at: string }, b: { created_at: string }) => + new Date(b.created_at).getTime() - + new Date(a.created_at).getTime() + ); + + return NextResponse.json(uniqueArtifacts); + } catch (error) { + return NextResponse.json( + { + error: + error instanceof Error + ? error.message + : 'Unknown error', + }, + { status: 500 } + ); + } +} diff --git a/microimputation-dashboard/app/api/github/branches/route.ts b/microimputation-dashboard/app/api/github/branches/route.ts new file mode 100644 index 0000000..02a6bbd --- /dev/null +++ b/microimputation-dashboard/app/api/github/branches/route.ts @@ -0,0 +1,77 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function GET(request: NextRequest) { + const searchParams = request.nextUrl.searchParams; + const repo = searchParams.get('repo'); + + if (!repo) { + return NextResponse.json( + { error: 'Missing repo parameter' }, + { status: 400 } + ); + } + + const githubToken = process.env.GITHUB_TOKEN; + if (!githubToken) { + return NextResponse.json( + { error: 'GitHub token not configured on server' }, + { status: 500 } + ); + } + + try { + const allBranches = []; + let page = 1; + const perPage = 100; + + while (true) { + const response = await fetch( + `https://api.github.com/repos/${repo}/branches?per_page=${perPage}&page=${page}`, + { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + } + ); + + if (!response.ok) { + return NextResponse.json( + { error: `GitHub API error: ${response.status}` }, + { status: response.status } + ); + } + + const branches = await response.json(); + + if (branches.length === 0) { + break; + } + + allBranches.push(...branches); + + if (branches.length < perPage) { + break; + } + + page++; + + if (page > 10) { + break; + } + } + + return NextResponse.json(allBranches); + } catch (error) { + return NextResponse.json( + { + error: + error instanceof Error + ? error.message + : 'Unknown error', + }, + { status: 500 } + ); + } +} diff --git a/microimputation-dashboard/app/api/github/commits/route.ts b/microimputation-dashboard/app/api/github/commits/route.ts new file mode 100644 index 0000000..9847aee --- /dev/null +++ b/microimputation-dashboard/app/api/github/commits/route.ts @@ -0,0 +1,55 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function GET(request: NextRequest) { + const searchParams = request.nextUrl.searchParams; + const repo = searchParams.get('repo'); + const branch = searchParams.get('branch'); + + if (!repo || !branch) { + return NextResponse.json( + { error: 'Missing repo or branch parameter' }, + { status: 400 } + ); + } + + const githubToken = process.env.GITHUB_TOKEN; + if (!githubToken) { + return NextResponse.json( + { error: 'GitHub token not configured on server' }, + { status: 500 } + ); + } + + try { + const response = await fetch( + `https://api.github.com/repos/${repo}/commits?sha=${branch}&per_page=20`, + { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + } + ); + + if (!response.ok) { + return NextResponse.json( + { error: `GitHub API error: ${response.status}` }, + { status: response.status } + ); + } + + const commits = await response.json(); + return NextResponse.json(commits); + } catch (error) { + return NextResponse.json( + { + error: + error instanceof Error + ? error.message + : 'Unknown error', + }, + { status: 500 } + ); + } +} diff --git a/microimputation-dashboard/app/api/github/download/route.ts b/microimputation-dashboard/app/api/github/download/route.ts new file mode 100644 index 0000000..4938bab --- /dev/null +++ b/microimputation-dashboard/app/api/github/download/route.ts @@ -0,0 +1,59 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function GET(request: NextRequest) { + const searchParams = request.nextUrl.searchParams; + const url = searchParams.get('url'); + + if (!url) { + return NextResponse.json( + { error: 'Missing url parameter' }, + { status: 400 } + ); + } + + const githubToken = process.env.GITHUB_TOKEN; + if (!githubToken) { + return NextResponse.json( + { error: 'GitHub token not configured on server' }, + { status: 500 } + ); + } + + try { + const downloadResponse = await fetch(url, { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + }); + + if (!downloadResponse.ok) { + return NextResponse.json( + { error: `GitHub API error: ${downloadResponse.status}` }, + { status: downloadResponse.status } + ); + } + + // Get the artifact ZIP as an ArrayBuffer + const zipBuffer = await downloadResponse.arrayBuffer(); + + // Return the ZIP file as a response + return new NextResponse(zipBuffer, { + headers: { + 'Content-Type': 'application/zip', + 'Content-Length': zipBuffer.byteLength.toString(), + }, + }); + } catch (error) { + return NextResponse.json( + { + error: + error instanceof Error + ? error.message + : 'Unknown error', + }, + { status: 500 } + ); + } +} diff --git a/microimputation-dashboard/app/page.tsx b/microimputation-dashboard/app/page.tsx index 2b31bbf..c24753a 100644 --- a/microimputation-dashboard/app/page.tsx +++ b/microimputation-dashboard/app/page.tsx @@ -92,6 +92,7 @@ function HomeContent() { } }; + return (
{/* Header */} diff --git a/microimputation-dashboard/components/FileUpload.tsx b/microimputation-dashboard/components/FileUpload.tsx index 581a2f3..24e8bd2 100644 --- a/microimputation-dashboard/components/FileUpload.tsx +++ b/microimputation-dashboard/components/FileUpload.tsx @@ -75,61 +75,32 @@ export default function FileUpload({ const [selectedSecondArtifact, setSelectedSecondArtifact] = useState(''); // Helper function to load a single artifact from deeplink parameters - const loadArtifactFromDeeplink = useCallback(async (artifactInfo: GitHubArtifactInfo, githubToken: string): Promise => { - // First, get the artifacts for the specific commit - const [owner, repo] = artifactInfo.repo.split('/'); - const runsResponse = await fetch(`https://api.github.com/repos/${owner}/${repo}/actions/runs?head_sha=${artifactInfo.commit}`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); - - if (!runsResponse.ok) { - throw new Error(`Failed to fetch workflow runs: ${runsResponse.status} ${runsResponse.statusText}`); + const loadArtifactFromDeeplink = useCallback(async (artifactInfo: GitHubArtifactInfo): Promise => { + // Get artifacts for the specific commit using API route + const artifactsResponse = await fetch( + `/api/github/artifacts?repo=${encodeURIComponent(artifactInfo.repo)}&commit=${encodeURIComponent(artifactInfo.commit)}` + ); + + if (!artifactsResponse.ok) { + throw new Error(`Failed to fetch artifacts: ${artifactsResponse.status}`); } - const runsData = await runsResponse.json(); - const completedRuns = runsData.workflow_runs.filter((run: { status: string }) => run.status === 'completed'); - - if (completedRuns.length === 0) { - throw new Error('No completed workflow runs found for this commit'); - } + const artifacts = await artifactsResponse.json(); // Find the artifact by name - let targetArtifact = null; - for (const run of completedRuns) { - const artifactsResponse = await fetch(`https://api.github.com/repos/${owner}/${repo}/actions/runs/${run.id}/artifacts`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); - - if (artifactsResponse.ok) { - const artifactsData = await artifactsResponse.json(); - targetArtifact = artifactsData.artifacts.find((artifact: { name: string }) => artifact.name === artifactInfo.artifact); - if (targetArtifact) break; - } - } + const targetArtifact = artifacts.find((artifact: { name: string }) => artifact.name === artifactInfo.artifact); if (!targetArtifact) { throw new Error(`Artifact "${artifactInfo.artifact}" not found for commit ${artifactInfo.commit}`); } - // Download and extract the artifact - const downloadResponse = await fetch(targetArtifact.archive_download_url, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); + // Download and extract the artifact using API route + const downloadResponse = await fetch( + `/api/github/download?url=${encodeURIComponent(targetArtifact.archive_download_url)}` + ); if (!downloadResponse.ok) { - throw new Error(`Failed to download artifact: ${downloadResponse.status} ${downloadResponse.statusText}`); + throw new Error(`Failed to download artifact: ${downloadResponse.status}`); } const zipBuffer = await downloadResponse.arrayBuffer(); @@ -155,12 +126,6 @@ export default function FileUpload({ // Load GitHub artifacts directly from deeplink parameters const loadDeeplinkArtifacts = useCallback(async (primary: GitHubArtifactInfo, secondary?: GitHubArtifactInfo) => { - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoading(true); setError(''); @@ -168,11 +133,11 @@ export default function FileUpload({ setError('🔄 Loading data from GitHub artifacts...'); // Load primary artifact - const primaryData = await loadArtifactFromDeeplink(primary, githubToken); + const primaryData = await loadArtifactFromDeeplink(primary); if (secondary && onCompareLoad) { // Load secondary artifact for comparison - const secondaryData = await loadArtifactFromDeeplink(secondary, githubToken); + const secondaryData = await loadArtifactFromDeeplink(secondary); // Generate display names with commit info const primaryDisplayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`; @@ -436,14 +401,24 @@ export default function FileUpload({ } let url: URL; + let finalUrl = urlInput.trim(); + try { - url = new URL(urlInput.trim()); + url = new URL(finalUrl); } catch { setError('Invalid URL format. Please enter a valid URL (e.g., https://example.com/data.csv).'); return; } - if (!url.pathname.toLowerCase().endsWith('.csv') && !urlInput.toLowerCase().includes('csv')) { + // Handle Google Drive URLs + if (url.hostname === 'drive.google.com') { + setError( + 'Google Drive links are not supported due to CORS restrictions. Please download the file and use the "Drop file" tab instead or host the file on a different public server.' + ); + return; + } + + if (!url.pathname.toLowerCase().endsWith('.csv') && !finalUrl.toLowerCase().includes('csv')) { setError('URL should point to a CSV file. Please ensure the URL ends with .csv or contains CSV data.'); return; } @@ -460,7 +435,7 @@ export default function FileUpload({ const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 30_000); // 30 s timeout - const response = await fetch(urlInput.trim(), { + const response = await fetch(finalUrl, { signal: controller.signal, headers: { Accept: 'text/csv, text/plain, */*' } }); @@ -557,62 +532,23 @@ export default function FileUpload({ return; } - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoadingGithubData(true); setError(''); try { - // Fetch all branches with pagination support - const allBranches: GitHubBranch[] = []; - let page = 1; - const perPage = 100; // Maximum allowed by GitHub API - - while (true) { - const response = await fetch(`https://api.github.com/repos/${githubRepo}/branches?per_page=${perPage}&page=${page}`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); - - if (!response.ok) { - if (response.status === 404) { - throw new Error('Repository not found. Please check the repository name and ensure it is accessible.'); - } else if (response.status === 403) { - throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.'); - } - throw new Error(`Failed to fetch branches: ${response.status} ${response.statusText}`); - } - - const branches: GitHubBranch[] = await response.json(); - - if (branches.length === 0) { - // No more branches to fetch - break; - } + const response = await fetch(`/api/github/branches?repo=${encodeURIComponent(githubRepo)}`); - allBranches.push(...branches); - - // If we got fewer branches than requested, we've reached the end - if (branches.length < perPage) { - break; - } - - page++; - - // Safety check to prevent infinite loops (GitHub repos rarely have more than 1000 branches) - if (page > 10) { - console.warn('Stopped fetching branches after 10 pages (1000 branches) to prevent excessive API calls'); - break; + if (!response.ok) { + if (response.status === 404) { + throw new Error('Repository not found. Please check the repository name and ensure it is accessible.'); + } else if (response.status === 403) { + throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.'); } + const errorData = await response.json(); + throw new Error(errorData.error || `Failed to fetch branches: ${response.status}`); } + const allBranches: GitHubBranch[] = await response.json(); setGithubBranches(allBranches); // Auto-select main/master branch if available @@ -631,28 +567,20 @@ export default function FileUpload({ async function fetchGithubCommits(branch: string) { if (!githubRepo.trim() || !branch) return; - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoadingGithubData(true); try { - const response = await fetch(`https://api.github.com/repos/${githubRepo}/commits?sha=${branch}&per_page=20`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); + const response = await fetch( + `/api/github/commits?repo=${encodeURIComponent(githubRepo)}&branch=${encodeURIComponent(branch)}` + ); + if (!response.ok) { if (response.status === 404) { throw new Error('Branch not found or repository is private.'); } else if (response.status === 403) { throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.'); } - throw new Error(`Failed to fetch commits: ${response.status} ${response.statusText}`); + const errorData = await response.json(); + throw new Error(errorData.error || `Failed to fetch commits: ${response.status}`); } const commits: GitHubCommit[] = await response.json(); @@ -673,98 +601,32 @@ export default function FileUpload({ async function fetchGithubArtifacts(commitSha: string) { if (!githubRepo.trim() || !commitSha) return; - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoadingGithubData(true); setAvailableArtifacts([]); setSelectedArtifact(''); try { - const [owner, repo] = githubRepo.split('/'); - - // Get workflow runs for the commit - const runsResponse = await fetch( - `https://api.github.com/repos/${owner}/${repo}/actions/runs?head_sha=${commitSha}`, - { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - } + const response = await fetch( + `/api/github/artifacts?repo=${encodeURIComponent(githubRepo)}&commit=${encodeURIComponent(commitSha)}` ); - if (!runsResponse.ok) { - if (runsResponse.status === 403) { + if (!response.ok) { + if (response.status === 403) { throw new Error(`GitHub API rate limit exceeded or token permissions insufficient (403). Please try again later or check your token permissions.`); - } else if (runsResponse.status === 404) { + } else if (response.status === 404) { throw new Error(`Repository or commit not found (404). Please check the repository name and commit SHA.`); - } else { - throw new Error(`Failed to fetch workflow runs: ${runsResponse.status} ${runsResponse.statusText}`); } + const errorData = await response.json(); + throw new Error(errorData.error || `Failed to fetch artifacts: ${response.status}`); } - const runsData = await runsResponse.json(); - const runs = runsData.workflow_runs; - - if (!runs || runs.length === 0) { - setError('No workflow runs found for this commit.'); - return; - } - - // Collect all imputation artifacts from completed runs - const allArtifacts: GitHubArtifact[] = []; - - for (const run of runs) { - if (run.status !== 'completed') continue; + const uniqueArtifacts: GitHubArtifact[] = await response.json(); - try { - const artifactsResponse = await fetch( - `https://api.github.com/repos/${owner}/${repo}/actions/runs/${run.id}/artifacts`, - { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - } - ); - - if (!artifactsResponse.ok) continue; - - const artifactsData = await artifactsResponse.json(); - const artifacts = artifactsData.artifacts; - - // Filter for imputation artifacts - const imputationArtifacts = artifacts.filter((artifact: GitHubArtifact) => - artifact.name.toLowerCase().includes('impute') || - artifact.name.toLowerCase().includes('imputation') || - artifact.name.toLowerCase().includes('result') || - artifact.name.toLowerCase().includes('.csv') - ); - - allArtifacts.push(...imputationArtifacts); - } catch { - continue; - } - } - - if (allArtifacts.length === 0) { + if (uniqueArtifacts.length === 0) { setError('No imputation artifacts found for this commit.'); return; } - // Remove duplicates and sort by creation date (newest first) - const uniqueArtifacts = allArtifacts - .filter((artifact, index, self) => - index === self.findIndex(a => a.name === artifact.name) - ) - .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()); - setAvailableArtifacts(uniqueArtifacts); // Auto-select the first artifact @@ -791,25 +653,15 @@ export default function FileUpload({ return; } - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoading(true); setError(''); try { setError('🔄 Downloading and extracting CSV from artifact...'); - const downloadResponse = await fetch(artifact.archive_download_url, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); + const downloadResponse = await fetch( + `/api/github/download?url=${encodeURIComponent(artifact.archive_download_url)}` + ); if (!downloadResponse.ok) { throw new Error(`Failed to download artifact: ${downloadResponse.status}`); @@ -887,28 +739,20 @@ export default function FileUpload({ async function fetchSecondBranchCommits(branch: string) { if (!githubRepo.trim() || !branch) return; - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoadingGithubData(true); try { - const response = await fetch(`https://api.github.com/repos/${githubRepo}/commits?sha=${branch}&per_page=20`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); + const response = await fetch( + `/api/github/commits?repo=${encodeURIComponent(githubRepo)}&branch=${encodeURIComponent(branch)}` + ); + if (!response.ok) { if (response.status === 404) { throw new Error('Branch not found or repository is private.'); } else if (response.status === 403) { throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.'); } - throw new Error(`Failed to fetch commits: ${response.status} ${response.statusText}`); + const errorData = await response.json(); + throw new Error(errorData.error || `Failed to fetch commits: ${response.status}`); } const commits: GitHubCommit[] = await response.json(); @@ -929,98 +773,32 @@ export default function FileUpload({ async function fetchSecondArtifacts(commitSha: string) { if (!githubRepo.trim() || !commitSha) return; - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoadingGithubData(true); setSecondArtifacts([]); setSelectedSecondArtifact(''); try { - const [owner, repo] = githubRepo.split('/'); - - // Get workflow runs for the commit - const runsResponse = await fetch( - `https://api.github.com/repos/${owner}/${repo}/actions/runs?head_sha=${commitSha}`, - { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - } + const response = await fetch( + `/api/github/artifacts?repo=${encodeURIComponent(githubRepo)}&commit=${encodeURIComponent(commitSha)}` ); - if (!runsResponse.ok) { - if (runsResponse.status === 403) { + if (!response.ok) { + if (response.status === 403) { throw new Error(`GitHub API rate limit exceeded or token permissions insufficient (403). Please try again later or check your token permissions.`); - } else if (runsResponse.status === 404) { + } else if (response.status === 404) { throw new Error(`Repository or commit not found (404). Please check the repository name and commit SHA.`); - } else { - throw new Error(`Failed to fetch workflow runs: ${runsResponse.status} ${runsResponse.statusText}`); } + const errorData = await response.json(); + throw new Error(errorData.error || `Failed to fetch artifacts: ${response.status}`); } - const runsData = await runsResponse.json(); - const runs = runsData.workflow_runs; - - if (!runs || runs.length === 0) { - setError('No workflow runs found for this commit.'); - return; - } - - // Collect all imputation artifacts from completed runs - const allArtifacts: GitHubArtifact[] = []; - - for (const run of runs) { - if (run.status !== 'completed') continue; - - try { - const artifactsResponse = await fetch( - `https://api.github.com/repos/${owner}/${repo}/actions/runs/${run.id}/artifacts`, - { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - } - ); + const uniqueArtifacts: GitHubArtifact[] = await response.json(); - if (!artifactsResponse.ok) continue; - - const artifactsData = await artifactsResponse.json(); - const artifacts = artifactsData.artifacts; - - // Filter for imputation artifacts - const imputationArtifacts = artifacts.filter((artifact: GitHubArtifact) => - artifact.name.toLowerCase().includes('impute') || - artifact.name.toLowerCase().includes('imputation') || - artifact.name.toLowerCase().includes('result') || - artifact.name.toLowerCase().includes('.csv') - ); - - allArtifacts.push(...imputationArtifacts); - } catch { - continue; - } - } - - if (allArtifacts.length === 0) { + if (uniqueArtifacts.length === 0) { setError('No imputation artifacts found for this commit.'); return; } - // Remove duplicates and sort by creation date (newest first) - const uniqueArtifacts = allArtifacts - .filter((artifact, index, self) => - index === self.findIndex(a => a.name === artifact.name) - ) - .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()); - setSecondArtifacts(uniqueArtifacts); // Auto-select the first artifact @@ -1049,12 +827,6 @@ export default function FileUpload({ return; } - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoading(true); setError(''); @@ -1063,20 +835,8 @@ export default function FileUpload({ // Download both artifacts const [firstDownload, secondDownload] = await Promise.all([ - fetch(firstArtifact.archive_download_url, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }), - fetch(secondArtifact.archive_download_url, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }) + fetch(`/api/github/download?url=${encodeURIComponent(firstArtifact.archive_download_url)}`), + fetch(`/api/github/download?url=${encodeURIComponent(secondArtifact.archive_download_url)}`) ]); if (!firstDownload.ok || !secondDownload.ok) { @@ -1176,7 +936,11 @@ export default function FileUpload({ {/* Tab navigation */}
+ ))} + +
+ ); +} diff --git a/microimputation-dashboard/types/imputation.ts b/microimputation-dashboard/types/imputation.ts index c6163bc..799862d 100644 --- a/microimputation-dashboard/types/imputation.ts +++ b/microimputation-dashboard/types/imputation.ts @@ -1,13 +1,13 @@ // Type definitions for microimputation data export interface ImputationDataPoint { - // Add fields based on what microimpute outputs - // These are placeholder fields that will be updated based on actual CSV structure - id?: string; - variable?: string; - original_value?: number; - imputed_value?: number; - method?: string; - confidence?: number; + type: string; // e.g., "benchmark_loss", "distribution_distance", "predictor_correlation" + method: string; // e.g., "QRF", "OLS", "QuantReg", "Matching" + variable: string; // e.g., "quantile_loss_mean_all", "log_loss_mean_all", or actual variable names + quantile: string | number; // numeric (0.05, 0.1, etc.), "mean", or "N/A" + metric_name: string; // e.g., "quantile_loss", "log_loss" + metric_value: number | null; // numeric value of the metric + split: string; // e.g., "train", "test", "full" + additional_info: string; // JSON-formatted string with metadata [key: string]: any; // Allow additional fields } diff --git a/microimputation-dashboard/utils/colors.ts b/microimputation-dashboard/utils/colors.ts new file mode 100644 index 0000000..9e7e870 --- /dev/null +++ b/microimputation-dashboard/utils/colors.ts @@ -0,0 +1,29 @@ +// Consistent color mapping for imputation methods across all charts +// Using Plotly color palette for consistency with Python visualizations + +export const METHOD_COLORS: Record = { + QRF: '#636EFA', // Plotly blue + OLS: '#EF553B', // Plotly red + QuantReg: '#00CC96', // Plotly teal + Matching: '#AB63FA', // Plotly purple + // Add more methods as needed +}; + +export const FALLBACK_COLORS = [ + '#FFA15A', // Plotly orange + '#19D3F3', // Plotly cyan + '#FF6692', // Plotly pink + '#B6E880', // Plotly lime + '#FF97FF', // Plotly magenta + '#FECB52', // Plotly yellow +]; + +/** + * Get color for a method, using predefined colors or fallback palette + */ +export function getMethodColor(method: string, index: number = 0): string { + if (method in METHOD_COLORS) { + return METHOD_COLORS[method]; + } + return FALLBACK_COLORS[index % FALLBACK_COLORS.length]; +} From ff998d361bf28893a18093e92d2505f00ef92c9f Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Wed, 22 Oct 2025 15:24:37 +0800 Subject: [PATCH 03/12] fix linting --- changelog_entry.yaml | 1 + .../app/api/github/artifacts/route.ts | 2 +- .../components/BenchmarkLossCharts.tsx | 5 ++--- microimputation-dashboard/components/FileUpload.tsx | 2 +- .../components/PerVariableCharts.tsx | 8 ++++---- .../components/VisualizationTabs.tsx | 2 -- 6 files changed, 9 insertions(+), 11 deletions(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index 9670cc6..39bbe07 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -2,3 +2,4 @@ changes: added: - Links to dashboard in README.md and documentation. + - First dashboard visualizations. diff --git a/microimputation-dashboard/app/api/github/artifacts/route.ts b/microimputation-dashboard/app/api/github/artifacts/route.ts index d071169..1ab5163 100644 --- a/microimputation-dashboard/app/api/github/artifacts/route.ts +++ b/microimputation-dashboard/app/api/github/artifacts/route.ts @@ -92,7 +92,7 @@ export async function GET(request: NextRequest) { // Remove duplicates and sort by creation date (newest first) const uniqueArtifacts = allArtifacts .filter( - (artifact: { name: string }, index: number, self: any[]) => + (artifact: { name: string }, index: number, self: Array<{ name: string }>) => index === self.findIndex((a: { name: string }) => a.name === artifact.name) ) diff --git a/microimputation-dashboard/components/BenchmarkLossCharts.tsx b/microimputation-dashboard/components/BenchmarkLossCharts.tsx index c057e93..70a00c3 100644 --- a/microimputation-dashboard/components/BenchmarkLossCharts.tsx +++ b/microimputation-dashboard/components/BenchmarkLossCharts.tsx @@ -11,7 +11,6 @@ import { Tooltip, Legend, ResponsiveContainer, - ReferenceLine, } from 'recharts'; import { ImputationDataPoint } from '@/types/imputation'; import { getMethodColor } from '@/utils/colors'; @@ -60,7 +59,7 @@ export default function BenchmarkLossCharts({ data }: BenchmarkLossChartsProps) if (quantileLossData.length === 0) return []; // Group by quantile - const quantileMap = new Map>(); + const quantileMap = new Map>(); quantileLossData.forEach(d => { const quantile = Number(d.quantile); @@ -72,7 +71,7 @@ export default function BenchmarkLossCharts({ data }: BenchmarkLossChartsProps) }); return Array.from(quantileMap.values()).sort( - (a, b) => parseFloat(a.quantile) - parseFloat(b.quantile) + (a, b) => parseFloat(a.quantile as string) - parseFloat(b.quantile as string) ); }, [quantileLossData]); diff --git a/microimputation-dashboard/components/FileUpload.tsx b/microimputation-dashboard/components/FileUpload.tsx index 52eb223..6ac9f63 100644 --- a/microimputation-dashboard/components/FileUpload.tsx +++ b/microimputation-dashboard/components/FileUpload.tsx @@ -401,7 +401,7 @@ export default function FileUpload({ } let url: URL; - let finalUrl = urlInput.trim(); + const finalUrl = urlInput.trim(); try { url = new URL(finalUrl); diff --git a/microimputation-dashboard/components/PerVariableCharts.tsx b/microimputation-dashboard/components/PerVariableCharts.tsx index 7f7302e..01f5933 100644 --- a/microimputation-dashboard/components/PerVariableCharts.tsx +++ b/microimputation-dashboard/components/PerVariableCharts.tsx @@ -50,7 +50,7 @@ export default function PerVariableCharts({ typeof d.quantile === 'number' && d.quantile >= 0 && d.quantile <= 1 ); - const quantileMap = new Map>(); + const quantileMap = new Map>(); numericData.forEach((d) => { const quantile = Number(d.quantile); @@ -62,7 +62,7 @@ export default function PerVariableCharts({ }); return Array.from(quantileMap.values()).sort( - (a, b) => parseFloat(a.quantile) - parseFloat(b.quantile) + (a, b) => parseFloat(a.quantile as string) - parseFloat(b.quantile as string) ); }, [variableData, metricType]); @@ -104,7 +104,7 @@ export default function PerVariableCharts({ {metricType === 'quantile_loss' && quantileChartData.length > 0 && (

- Quantile Loss by Method for "{variable}" + Quantile Loss by Method for "{variable}"

@@ -153,7 +153,7 @@ export default function PerVariableCharts({ {metricType === 'log_loss' && logLossChartData.length > 0 && (

- Log Loss by Method for "{variable}" + Log Loss by Method for "{variable}"

diff --git a/microimputation-dashboard/components/VisualizationTabs.tsx b/microimputation-dashboard/components/VisualizationTabs.tsx index 5dfbef1..25c9736 100644 --- a/microimputation-dashboard/components/VisualizationTabs.tsx +++ b/microimputation-dashboard/components/VisualizationTabs.tsx @@ -1,7 +1,5 @@ 'use client'; -import { useState } from 'react'; - interface Tab { id: string; label: string; From 90f43023250a7f586ef239de1664e15d01e2dc77 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 24 Oct 2025 12:14:48 +0800 Subject: [PATCH 04/12] fix all loading pages --- .gitignore | 1 + microimputation-dashboard/app/page.tsx | 51 +-- .../components/FileUpload.tsx | 390 ++---------------- .../components/VisualizationDashboard.tsx | 15 +- .../public/microimputation_results.csv | 294 +++++++++++++ microimputation-dashboard/utils/deeplinks.ts | 97 +---- 6 files changed, 348 insertions(+), 500 deletions(-) create mode 100644 microimputation-dashboard/public/microimputation_results.csv diff --git a/.gitignore b/.gitignore index 555168e..a790fe1 100644 --- a/.gitignore +++ b/.gitignore @@ -76,6 +76,7 @@ celerybeat.pid # Ignore Data Files *.csv +!microimputation-dashboard/**/*.csv *.jpg *.html *.h5 diff --git a/microimputation-dashboard/app/page.tsx b/microimputation-dashboard/app/page.tsx index c24753a..8905f4e 100644 --- a/microimputation-dashboard/app/page.tsx +++ b/microimputation-dashboard/app/page.tsx @@ -13,18 +13,7 @@ function HomeContent() { const [fileName, setFileName] = useState(''); const [showDashboard, setShowDashboard] = useState(false); const [isLoadingFromDeeplink, setIsLoadingFromDeeplink] = useState(false); - const [githubArtifactInfo, setGithubArtifactInfo] = useState<{ - primary: GitHubArtifactInfo | null; - secondary?: GitHubArtifactInfo | null; - } | null>(null); - - // Comparison mode state - const [comparisonData, setComparisonData] = useState<{ - data1: ImputationDataPoint[]; - filename1: string; - data2: ImputationDataPoint[]; - filename2: string; - } | null>(null); + const [githubArtifactInfo, setGithubArtifactInfo] = useState(null); const searchParams = useSearchParams(); const deeplinkParams = parseDeeplinkParams(searchParams); @@ -40,32 +29,14 @@ function HomeContent() { const parsedData = parseImputationCSV(csvContent); setData(parsedData); setFileName(filename); - setComparisonData(null); // Clear comparison data when loading single file } catch (error) { console.error('Error parsing CSV:', error); alert('Failed to parse CSV file. Please check the file format.'); } }; - const handleCompareLoad = (content1: string, filename1: string, content2: string, filename2: string) => { - try { - const data1 = parseImputationCSV(content1); - const data2 = parseImputationCSV(content2); - setComparisonData({ - data1, - filename1, - data2, - filename2 - }); - setData([]); // Clear single data when loading comparison - } catch (error) { - console.error('Error parsing comparison CSVs:', error); - alert('Failed to parse one or both CSV files. Please check the file formats.'); - } - }; - const handleViewDashboard = () => { - if (data.length > 0 || comparisonData) { + if (data.length > 0) { setShowDashboard(true); } }; @@ -74,21 +45,20 @@ function HomeContent() { setShowDashboard(false); setData([]); setFileName(''); - setComparisonData(null); setGithubArtifactInfo(null); }; - const handleDeeplinkLoadComplete = (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null) => { + const handleDeeplinkLoadComplete = (primary: GitHubArtifactInfo | null) => { setIsLoadingFromDeeplink(false); if (primary) { - setGithubArtifactInfo({ primary, secondary: secondary || undefined }); + setGithubArtifactInfo(primary); setShowDashboard(true); } }; - const handleGithubLoad = (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null) => { + const handleGithubLoad = (primary: GitHubArtifactInfo | null) => { if (primary) { - setGithubArtifactInfo({ primary, secondary: secondary || undefined }); + setGithubArtifactInfo(primary); } }; @@ -125,7 +95,6 @@ function HomeContent() { ) : ( )} diff --git a/microimputation-dashboard/components/FileUpload.tsx b/microimputation-dashboard/components/FileUpload.tsx index 6ac9f63..a9bf564 100644 --- a/microimputation-dashboard/components/FileUpload.tsx +++ b/microimputation-dashboard/components/FileUpload.tsx @@ -8,11 +8,10 @@ import { DeeplinkParams, GitHubArtifactInfo } from '@/utils/deeplinks'; interface FileUploadProps { onFileLoad: (content: string, filename: string) => void; onViewDashboard: () => void; - onCompareLoad?: (content1: string, filename1: string, content2: string, filename2: string) => void; deeplinkParams?: DeeplinkParams | null; isLoadingFromDeeplink?: boolean; - onDeeplinkLoadComplete?: (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null | undefined) => void; - onGithubLoad?: (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null) => void; + onDeeplinkLoadComplete?: (primary: GitHubArtifactInfo | null) => void; + onGithubLoad?: (primary: GitHubArtifactInfo | null) => void; } interface GitHubCommit { @@ -43,7 +42,6 @@ interface GitHubArtifact { export default function FileUpload({ onFileLoad, onViewDashboard, - onCompareLoad, deeplinkParams, isLoadingFromDeeplink, onDeeplinkLoadComplete, @@ -66,14 +64,6 @@ export default function FileUpload({ const [selectedArtifact, setSelectedArtifact] = useState(''); const [isLoadingGithubData, setIsLoadingGithubData] = useState(false); - // Comparison mode state - const [comparisonMode, setComparisonMode] = useState(false); - const [selectedSecondBranch, setSelectedSecondBranch] = useState(''); - const [secondCommits, setSecondCommits] = useState([]); - const [selectedSecondCommit, setSelectedSecondCommit] = useState(''); - const [secondArtifacts, setSecondArtifacts] = useState([]); - const [selectedSecondArtifact, setSelectedSecondArtifact] = useState(''); - // Helper function to load a single artifact from deeplink parameters const loadArtifactFromDeeplink = useCallback(async (artifactInfo: GitHubArtifactInfo): Promise => { // Get artifacts for the specific commit using API route @@ -125,7 +115,7 @@ export default function FileUpload({ }, []); // Load GitHub artifacts directly from deeplink parameters - const loadDeeplinkArtifacts = useCallback(async (primary: GitHubArtifactInfo, secondary?: GitHubArtifactInfo) => { + const loadDeeplinkArtifacts = useCallback(async (primary: GitHubArtifactInfo) => { setIsLoading(true); setError(''); @@ -135,26 +125,14 @@ export default function FileUpload({ // Load primary artifact const primaryData = await loadArtifactFromDeeplink(primary); - if (secondary && onCompareLoad) { - // Load secondary artifact for comparison - const secondaryData = await loadArtifactFromDeeplink(secondary); - - // Generate display names with commit info - const primaryDisplayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`; - const secondaryDisplayName = `${secondary.repo}@${secondary.branch} (${secondary.commit.substring(0, 7)}) - ${secondary.artifact}`; - - onCompareLoad(primaryData, primaryDisplayName, secondaryData, secondaryDisplayName); - setLoadedFile(`Comparison: ${primaryDisplayName} vs ${secondaryDisplayName}`); - } else { - // Single artifact load - const displayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`; - onFileLoad(primaryData, displayName); - setLoadedFile(displayName); - } + // Single artifact load + const displayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`; + onFileLoad(primaryData, displayName); + setLoadedFile(displayName); // Notify parent component that deeplink loading is complete if (onDeeplinkLoadComplete) { - onDeeplinkLoadComplete(primary, secondary); + onDeeplinkLoadComplete(primary); } setError(''); @@ -166,35 +144,19 @@ export default function FileUpload({ } finally { setIsLoading(false); } - }, [onFileLoad, onCompareLoad, onDeeplinkLoadComplete, loadArtifactFromDeeplink]); + }, [onFileLoad, onDeeplinkLoadComplete, loadArtifactFromDeeplink]); // Handle deeplink loading on mount useEffect(() => { - if (deeplinkParams && isLoadingFromDeeplink) { + if (deeplinkParams && isLoadingFromDeeplink && deeplinkParams.primary) { setActiveTab('github'); + setGithubRepo(deeplinkParams.primary.repo); + setSelectedBranch(deeplinkParams.primary.branch); + setSelectedCommit(deeplinkParams.primary.commit); + setSelectedArtifact(deeplinkParams.primary.artifact); - if (deeplinkParams.mode === 'comparison' && deeplinkParams.primary && deeplinkParams.secondary) { - setComparisonMode(true); - setGithubRepo(deeplinkParams.primary.repo); - setSelectedBranch(deeplinkParams.primary.branch); - setSelectedCommit(deeplinkParams.primary.commit); - setSelectedArtifact(deeplinkParams.primary.artifact); - setSelectedSecondBranch(deeplinkParams.secondary.branch); - setSelectedSecondCommit(deeplinkParams.secondary.commit); - setSelectedSecondArtifact(deeplinkParams.secondary.artifact); - - // Auto-load comparison data - loadDeeplinkArtifacts(deeplinkParams.primary, deeplinkParams.secondary); - } else if (deeplinkParams.primary) { - setComparisonMode(false); - setGithubRepo(deeplinkParams.primary.repo); - setSelectedBranch(deeplinkParams.primary.branch); - setSelectedCommit(deeplinkParams.primary.commit); - setSelectedArtifact(deeplinkParams.primary.artifact); - - // Auto-load single artifact data - loadDeeplinkArtifacts(deeplinkParams.primary); - } + // Auto-load artifact data + loadDeeplinkArtifacts(deeplinkParams.primary); } }, [deeplinkParams, isLoadingFromDeeplink, loadDeeplinkArtifacts]); @@ -540,7 +502,7 @@ export default function FileUpload({ if (!response.ok) { if (response.status === 404) { - throw new Error('Repository not found. Please check the repository name and ensure it is accessible.'); + throw new Error('Repository not found. Please check the repository name and ensure it is publicly accessible.'); } else if (response.status === 403) { throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.'); } @@ -716,7 +678,7 @@ export default function FileUpload({ commit: selectedCommit, artifact: artifact.name }; - onGithubLoad(artifactInfo, null); + onGithubLoad(artifactInfo); } // Clear the GitHub state since we successfully loaded the file @@ -736,183 +698,6 @@ export default function FileUpload({ } } - async function fetchSecondBranchCommits(branch: string) { - if (!githubRepo.trim() || !branch) return; - - setIsLoadingGithubData(true); - try { - const response = await fetch( - `/api/github/commits?repo=${encodeURIComponent(githubRepo)}&branch=${encodeURIComponent(branch)}` - ); - - if (!response.ok) { - if (response.status === 404) { - throw new Error('Branch not found or repository is private.'); - } else if (response.status === 403) { - throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.'); - } - const errorData = await response.json(); - throw new Error(errorData.error || `Failed to fetch commits: ${response.status}`); - } - - const commits: GitHubCommit[] = await response.json(); - setSecondCommits(commits); - - // Auto-select latest commit and fetch its artifacts - if (commits.length > 0) { - setSelectedSecondCommit(commits[0].sha); - await fetchSecondArtifacts(commits[0].sha); - } - } catch (err) { - setError(`GitHub API error: ${err instanceof Error ? err.message : 'Unknown error'}`); - } finally { - setIsLoadingGithubData(false); - } - } - - async function fetchSecondArtifacts(commitSha: string) { - if (!githubRepo.trim() || !commitSha) return; - - setIsLoadingGithubData(true); - setSecondArtifacts([]); - setSelectedSecondArtifact(''); - - try { - const response = await fetch( - `/api/github/artifacts?repo=${encodeURIComponent(githubRepo)}&commit=${encodeURIComponent(commitSha)}` - ); - - if (!response.ok) { - if (response.status === 403) { - throw new Error(`GitHub API rate limit exceeded or token permissions insufficient (403). Please try again later or check your token permissions.`); - } else if (response.status === 404) { - throw new Error(`Repository or commit not found (404). Please check the repository name and commit SHA.`); - } - const errorData = await response.json(); - throw new Error(errorData.error || `Failed to fetch artifacts: ${response.status}`); - } - - const uniqueArtifacts: GitHubArtifact[] = await response.json(); - - if (uniqueArtifacts.length === 0) { - setError('No imputation artifacts found for this commit.'); - return; - } - - setSecondArtifacts(uniqueArtifacts); - - // Auto-select the first artifact - if (uniqueArtifacts.length > 0) { - setSelectedSecondArtifact(uniqueArtifacts[0].id.toString()); - } - - } catch (err) { - setError(`Failed to fetch artifacts: ${err instanceof Error ? err.message : 'Unknown error'}`); - } finally { - setIsLoadingGithubData(false); - } - } - - async function loadComparisonData() { - if (!selectedArtifact || !selectedSecondArtifact || !onCompareLoad) { - setError('Please select artifacts from both commits to compare'); - return; - } - - const firstArtifact = availableArtifacts.find(a => a.id.toString() === selectedArtifact); - const secondArtifact = secondArtifacts.find(a => a.id.toString() === selectedSecondArtifact); - - if (!firstArtifact || !secondArtifact) { - setError('Selected artifacts not found'); - return; - } - - setIsLoading(true); - setError(''); - - try { - setError('🔄 Downloading and extracting CSV files for comparison...'); - - // Download both artifacts - const [firstDownload, secondDownload] = await Promise.all([ - fetch(`/api/github/download?url=${encodeURIComponent(firstArtifact.archive_download_url)}`), - fetch(`/api/github/download?url=${encodeURIComponent(secondArtifact.archive_download_url)}`) - ]); - - if (!firstDownload.ok || !secondDownload.ok) { - throw new Error('Failed to download one or both artifacts'); - } - - // Extract CSVs from both artifacts - const [firstZipBuffer, secondZipBuffer] = await Promise.all([ - firstDownload.arrayBuffer(), - secondDownload.arrayBuffer() - ]); - - const firstZip = new JSZip(); - const secondZip = new JSZip(); - const [firstZipContent, secondZipContent] = await Promise.all([ - firstZip.loadAsync(firstZipBuffer), - secondZip.loadAsync(secondZipBuffer) - ]); - - // Find CSV files in both ZIPs - const firstCsvFiles = Object.keys(firstZipContent.files).filter(filename => - filename.toLowerCase().endsWith('.csv') && !firstZipContent.files[filename].dir - ); - const secondCsvFiles = Object.keys(secondZipContent.files).filter(filename => - filename.toLowerCase().endsWith('.csv') && !secondZipContent.files[filename].dir - ); - - if (firstCsvFiles.length === 0 || secondCsvFiles.length === 0) { - throw new Error('No CSV files found in one or both artifacts'); - } - - // Extract CSV content - const [firstCsvContent, secondCsvContent] = await Promise.all([ - firstZipContent.files[firstCsvFiles[0]].async('text'), - secondZipContent.files[secondCsvFiles[0]].async('text') - ]); - - // Create display names with commit info - const firstCommitShort = selectedCommit.slice(0, 8); - const secondCommitShort = selectedSecondCommit.slice(0, 8); - - const firstBranchInfo = selectedBranch !== selectedSecondBranch ? ` (${selectedBranch})` : ''; - const secondBranchInfo = selectedBranch !== selectedSecondBranch ? ` (${selectedSecondBranch})` : ''; - - const firstName = `${firstCsvFiles[0]} @ ${firstCommitShort}${firstBranchInfo}`; - const secondName = `${secondCsvFiles[0]} @ ${secondCommitShort}${secondBranchInfo}`; - - // Load into comparison mode - onCompareLoad(firstCsvContent, firstName, secondCsvContent, secondName); - - // Notify parent component about GitHub artifact info for sharing - if (onGithubLoad) { - const primaryArtifactInfo: GitHubArtifactInfo = { - repo: githubRepo, - branch: selectedBranch, - commit: selectedCommit, - artifact: firstArtifact.name - }; - const secondaryArtifactInfo: GitHubArtifactInfo = { - repo: githubRepo, - branch: selectedSecondBranch, - commit: selectedSecondCommit, - artifact: secondArtifact.name - }; - onGithubLoad(primaryArtifactInfo, secondaryArtifactInfo); - } - - setError(''); - - } catch (extractError) { - console.error('Comparison extraction error:', extractError); - setError(`❌ Failed to extract comparison data: ${extractError instanceof Error ? extractError.message : 'Unknown error'}`); - } finally { - setIsLoading(false); - } - } return (
@@ -922,8 +707,14 @@ export default function FileUpload({
{error && ( -
-

{error}

+
+

{error}

)} @@ -1121,34 +912,6 @@ export default function FileUpload({
- {/* Comparison Mode Toggle */} -
- -

- Enable this to compare imputation results between different branches or commits -

-
- {/* Branch Selection */} {githubBranches.length > 0 && (
@@ -1236,109 +999,14 @@ export default function FileUpload({
)} - {/* Second Selection for Comparison */} - {comparisonMode && githubBranches.length > 0 && ( -
-

Second Imputation Run (for comparison)

- - {/* Second Branch Selection */} -
- - -
- - {/* Second Commit Selection */} - {secondCommits.length > 0 && ( -
- - - {selectedSecondCommit && ( -

- {secondCommits.find(c => c.sha === selectedSecondCommit)?.commit.author.date && - new Date(secondCommits.find(c => c.sha === selectedSecondCommit)!.commit.author.date).toLocaleString() - } -

- )} -
- )} - - {/* Second Artifact Selection */} - {secondArtifacts.length > 0 && ( -
- - - {selectedSecondArtifact && ( -

- {secondArtifacts.find(a => a.id.toString() === selectedSecondArtifact)?.created_at && - `Created: ${new Date(secondArtifacts.find(a => a.id.toString() === selectedSecondArtifact)!.created_at).toLocaleString()}` - } -

- )} -
- )} -
- )} - {/* Load Button */} - {selectedArtifact && (!comparisonMode || selectedSecondArtifact) && ( + {selectedArtifact && ( )} diff --git a/microimputation-dashboard/components/VisualizationDashboard.tsx b/microimputation-dashboard/components/VisualizationDashboard.tsx index ffdf3cd..6e046dd 100644 --- a/microimputation-dashboard/components/VisualizationDashboard.tsx +++ b/microimputation-dashboard/components/VisualizationDashboard.tsx @@ -10,20 +10,12 @@ import VisualizationTabs from './VisualizationTabs'; interface VisualizationDashboardProps { data: ImputationDataPoint[]; fileName: string; - comparisonData?: { - data: ImputationDataPoint[]; - filename: string; - }; - githubArtifactInfo?: { - primary: GitHubArtifactInfo | null; - secondary?: GitHubArtifactInfo | null; - } | null; + githubArtifactInfo?: GitHubArtifactInfo | null; } export default function VisualizationDashboard({ data, fileName, - comparisonData, }: VisualizationDashboardProps) { const [activeTab, setActiveTab] = useState('overview'); @@ -145,11 +137,6 @@ export default function VisualizationDashboard({ Categorical variables: {dataAnalysis.categoricalVars.length}

)} - {comparisonData && ( -

- Comparison file: {comparisonData.filename} ({comparisonData.data.length} records) -

- )}
diff --git a/microimputation-dashboard/public/microimputation_results.csv b/microimputation-dashboard/public/microimputation_results.csv new file mode 100644 index 0000000..9ef58a5 --- /dev/null +++ b/microimputation-dashboard/public/microimputation_results.csv @@ -0,0 +1,294 @@ +type,method,variable,quantile,metric_name,metric_value,split,additional_info +benchmark_loss,QRF,quantile_loss_mean_all,0.05,quantile_loss,0.0019931334519540313,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.1,quantile_loss,0.003676832529510976,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.15,quantile_loss,0.005051964323131603,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.2,quantile_loss,0.007150055272652542,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.25,quantile_loss,0.007272266767142256,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.3,quantile_loss,0.008540254219041532,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.35,quantile_loss,0.007621934560531267,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.4,quantile_loss,0.00916139860523047,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.45,quantile_loss,0.008137742361262587,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.5,quantile_loss,0.008501949409742712,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.55,quantile_loss,0.006943288815511526,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.6,quantile_loss,0.006674970777747578,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.65,quantile_loss,0.006300488362370938,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.7,quantile_loss,0.005762547086974565,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.75,quantile_loss,0.005910973227513356,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.8,quantile_loss,0.005674293245652812,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.85,quantile_loss,0.0045532698330881145,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.9,quantile_loss,0.003777490459909232,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.95,quantile_loss,0.0022857221356438738,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.05,quantile_loss,0.004976762851613436,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.1,quantile_loss,0.007958901707037789,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.15,quantile_loss,0.011348256884255009,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.2,quantile_loss,0.014479045607066829,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.25,quantile_loss,0.01804729212868635,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.3,quantile_loss,0.020713966247658862,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.35,quantile_loss,0.022742931417508282,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.4,quantile_loss,0.02258721277917371,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.45,quantile_loss,0.023351581275292608,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.5,quantile_loss,0.023625607730980425,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.55,quantile_loss,0.023186470004863385,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.6,quantile_loss,0.022905432991342744,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.65,quantile_loss,0.022230731438156376,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.7,quantile_loss,0.02057637886132135,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.75,quantile_loss,0.01912773338390523,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.8,quantile_loss,0.01739101077609373,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.85,quantile_loss,0.013723503511700213,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.9,quantile_loss,0.010967503180152355,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.95,quantile_loss,0.0071899999204732045,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,mean,quantile_loss,0.006052135549716419,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,mean,quantile_loss,0.017217385405120095,test,"{""n_variables"": 2}" +benchmark_loss,QRF,log_loss_mean_all,0.05,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.1,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.15,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.2,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.25,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.3,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.35,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.4,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.45,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.5,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.55,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.6,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.65,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.7,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.75,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.8,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.85,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.9,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.95,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.05,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.1,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.15,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.2,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.25,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.3,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.35,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.4,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.45,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.5,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.55,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.6,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.65,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.7,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.75,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.8,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.85,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.9,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.95,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,mean,log_loss,2.6144466540656834,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,mean,log_loss,6.156691078407443,test,"{""n_variables"": 1}" +benchmark_loss,OLS,quantile_loss_mean_all,0.05,quantile_loss,0.003793097970980961,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.1,quantile_loss,0.0064813273535348865,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.15,quantile_loss,0.008718559399878228,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.2,quantile_loss,0.010655610008580571,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.25,quantile_loss,0.012256033388805257,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.3,quantile_loss,0.013565140770903024,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.35,quantile_loss,0.014611889242340062,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.4,quantile_loss,0.015425975031196375,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.45,quantile_loss,0.01605944525518228,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.5,quantile_loss,0.01647854556609527,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.55,quantile_loss,0.01661220911122457,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.6,quantile_loss,0.01645760847896342,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.65,quantile_loss,0.016061546015992825,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.7,quantile_loss,0.015395729893732578,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.75,quantile_loss,0.014398366337920352,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.8,quantile_loss,0.012997538438059541,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.85,quantile_loss,0.011142832423357984,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.9,quantile_loss,0.008676966523108521,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.95,quantile_loss,0.0053039200554893294,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.05,quantile_loss,0.003875743024333408,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.1,quantile_loss,0.006636892853346667,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.15,quantile_loss,0.008964045456920146,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.2,quantile_loss,0.010925268697485854,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.25,quantile_loss,0.012586171018988979,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.3,quantile_loss,0.013971078532560128,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.35,quantile_loss,0.015078371048174203,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.4,quantile_loss,0.015960673093506517,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.45,quantile_loss,0.016611314804344936,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.5,quantile_loss,0.017023427694261576,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.55,quantile_loss,0.017162262823655853,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.6,quantile_loss,0.016994331164241493,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.65,quantile_loss,0.016563877768320707,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.7,quantile_loss,0.01584349745574873,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.75,quantile_loss,0.014830479499612956,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.8,quantile_loss,0.013414418494530157,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.85,quantile_loss,0.011604173888267462,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.9,quantile_loss,0.009116693282835488,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.95,quantile_loss,0.005653910575626517,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,mean,quantile_loss,0.012373281119228738,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,mean,quantile_loss,0.012779822693513777,test,"{""n_variables"": 2}" +benchmark_loss,OLS,log_loss_mean_all,0.05,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.1,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.15,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.2,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.25,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.3,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.35,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.4,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.45,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.5,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.55,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.6,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.65,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.7,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.75,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.8,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.85,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.9,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.95,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.05,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.1,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.15,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.2,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.25,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.3,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.35,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.4,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.45,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.5,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.55,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.6,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.65,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.7,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.75,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.8,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.85,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.9,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.95,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,mean,log_loss,0.9858032860627663,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,mean,log_loss,1.0101334442421654,test,"{""n_variables"": 1}" +benchmark_loss,QuantReg,quantile_loss_mean_all,mean,quantile_loss,,train,"{""n_variables"": 0}" +benchmark_loss,QuantReg,quantile_loss_mean_all,mean,quantile_loss,,test,"{""n_variables"": 0}" +benchmark_loss,QuantReg,log_loss_mean_all,mean,log_loss,,train,"{""n_variables"": 0}" +benchmark_loss,QuantReg,log_loss_mean_all,mean,log_loss,,test,"{""n_variables"": 0}" +benchmark_loss,Matching,quantile_loss_mean_all,0.05,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.1,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.15,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.2,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.25,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.3,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.35,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.4,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.45,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.5,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.55,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.6,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.65,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.7,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.75,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.8,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.85,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.9,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.95,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.05,quantile_loss,0.022039382576829306,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.1,quantile_loss,0.022147598558993443,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.15,quantile_loss,0.022255814541157576,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.2,quantile_loss,0.022364030523321716,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.25,quantile_loss,0.022472246505485852,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.3,quantile_loss,0.02258046248764999,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.35,quantile_loss,0.022688678469814125,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.4,quantile_loss,0.02279689445197826,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.45,quantile_loss,0.0229051104341424,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.5,quantile_loss,0.02301332641630653,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.55,quantile_loss,0.02312154239847067,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.6,quantile_loss,0.023229758380634808,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.65,quantile_loss,0.023337974362798945,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.7,quantile_loss,0.023446190344963078,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.75,quantile_loss,0.023554406327127214,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.8,quantile_loss,0.02366262230929135,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.85,quantile_loss,0.023770838291455487,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.9,quantile_loss,0.023879054273619627,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.95,quantile_loss,0.02398727025578376,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,mean,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,mean,quantile_loss,0.02301332641630653,test,"{""n_variables"": 2}" +benchmark_loss,Matching,log_loss_mean_all,0.05,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.1,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.15,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.2,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.25,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.3,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.35,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.4,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.45,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.5,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.55,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.6,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.65,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.7,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.75,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.8,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.85,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.9,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.95,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.05,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.1,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.15,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.2,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.25,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.3,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.35,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.4,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.45,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.5,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.55,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.6,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.65,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.7,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.75,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.8,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.85,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.9,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.95,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,mean,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,mean,log_loss,19.71319554291521,test,"{""n_variables"": 1}" +distribution_distance,OLSResults,s1,N/A,wasserstein_distance,0.024660387596042915,full,{} +distribution_distance,OLSResults,s4,N/A,wasserstein_distance,0.020422408337066628,full,{} +distribution_distance,OLSResults,risk_factor,N/A,kl_divergence,6.033154649748202,full,{} +predictor_correlation,N/A,age,N/A,pearson,0.22798247938962524,full,"{""predictor2"": ""sex""}" +predictor_correlation,N/A,age,N/A,pearson,0.2501624011317938,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,age,N/A,pearson,0.3534419190240634,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,sex,N/A,pearson,0.08064137058331985,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,sex,N/A,pearson,0.26905206635937173,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,bmi,N/A,pearson,0.43643800885198847,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,age,N/A,spearman,0.2309511404045441,full,"{""predictor2"": ""sex""}" +predictor_correlation,N/A,age,N/A,spearman,0.2559475199483165,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,age,N/A,spearman,0.3697134718827012,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,sex,N/A,spearman,0.10617808669020486,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,sex,N/A,spearman,0.3010250570740961,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,bmi,N/A,spearman,0.4472525722966459,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,age,N/A,mutual_info,0.0,full,"{""predictor2"": ""sex""}" +predictor_correlation,N/A,age,N/A,mutual_info,0.01992537029466096,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,age,N/A,mutual_info,0.02051274401878463,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,sex,N/A,mutual_info,0.024974606868113543,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,sex,N/A,mutual_info,0.06825522582483731,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,bmi,N/A,mutual_info,0.012362825726377452,full,"{""predictor2"": ""bp""}" +predictor_target_mi,N/A,age,N/A,mutual_info,0.007275692599840133,full,"{""target"": ""s1""}" +predictor_target_mi,N/A,age,N/A,mutual_info,0.017843189636206607,full,"{""target"": ""s4""}" +predictor_target_mi,N/A,age,N/A,mutual_info,0.0527351415960489,full,"{""target"": ""risk_factor""}" +predictor_target_mi,N/A,sex,N/A,mutual_info,0.002833409209270051,full,"{""target"": ""s1""}" +predictor_target_mi,N/A,sex,N/A,mutual_info,0.022502563145111076,full,"{""target"": ""s4""}" +predictor_target_mi,N/A,sex,N/A,mutual_info,0.03936347909918465,full,"{""target"": ""risk_factor""}" +predictor_target_mi,N/A,bmi,N/A,mutual_info,0.00712501381273246,full,"{""target"": ""s1""}" +predictor_target_mi,N/A,bmi,N/A,mutual_info,0.042828982564792506,full,"{""target"": ""s4""}" +predictor_target_mi,N/A,bmi,N/A,mutual_info,0.09273531468209292,full,"{""target"": ""risk_factor""}" +predictor_target_mi,N/A,bp,N/A,mutual_info,0.007630872443873875,full,"{""target"": ""s1""}" +predictor_target_mi,N/A,bp,N/A,mutual_info,0.009030290161605078,full,"{""target"": ""s4""}" +predictor_target_mi,N/A,bp,N/A,mutual_info,0.012933835727825435,full,"{""target"": ""risk_factor""}" +predictor_importance,OLSResults,sex,N/A,relative_impact,30.680328353256453,test,"{""removed_predictor"": ""sex""}" +predictor_importance,OLSResults,sex,N/A,loss_increase,0.7393423418942775,test,"{""removed_predictor"": ""sex""}" +predictor_importance,OLSResults,bmi,N/A,relative_impact,0.03247893420537184,test,"{""removed_predictor"": ""bmi""}" +predictor_importance,OLSResults,bmi,N/A,loss_increase,0.000782685602355393,test,"{""removed_predictor"": ""bmi""}" +predictor_importance,OLSResults,age,N/A,relative_impact,0.00454596377561416,test,"{""removed_predictor"": ""age""}" +predictor_importance,OLSResults,age,N/A,loss_increase,0.00010954978921118297,test,"{""removed_predictor"": ""age""}" +predictor_importance,OLSResults,bp,N/A,relative_impact,-0.004826970371974454,test,"{""removed_predictor"": ""bp""}" +predictor_importance,OLSResults,bp,N/A,loss_increase,-0.0001163215575132881,test,"{""removed_predictor"": ""bp""}" +progressive_inclusion,OLSResults,N/A,N/A,cumulative_improvement,0.0,test,"{""step"": 1, ""predictor_added"": ""sex"", ""predictors"": [""sex""]}" +progressive_inclusion,OLSResults,N/A,N/A,marginal_improvement,0.0,test,"{""step"": 1, ""predictor_added"": ""sex""}" +progressive_inclusion,OLSResults,N/A,N/A,cumulative_improvement,0.0010940165047195194,test,"{""step"": 2, ""predictor_added"": ""bmi"", ""predictors"": [""sex"", ""bmi""]}" +progressive_inclusion,OLSResults,N/A,N/A,marginal_improvement,0.0010940165047195194,test,"{""step"": 2, ""predictor_added"": ""bmi""}" +progressive_inclusion,OLSResults,N/A,N/A,cumulative_improvement,0.0012175858369123382,test,"{""step"": 3, ""predictor_added"": ""age"", ""predictors"": [""sex"", ""bmi"", ""age""]}" +progressive_inclusion,OLSResults,N/A,N/A,marginal_improvement,0.00012356933219281885,test,"{""step"": 3, ""predictor_added"": ""age""}" +progressive_inclusion,OLSResults,N/A,N/A,cumulative_improvement,0.0011012642793990501,test,"{""step"": 4, ""predictor_added"": ""bp"", ""predictors"": [""sex"", ""bmi"", ""age"", ""bp""]}" +progressive_inclusion,OLSResults,N/A,N/A,marginal_improvement,-0.0001163215575132881,test,"{""step"": 4, ""predictor_added"": ""bp""}" diff --git a/microimputation-dashboard/utils/deeplinks.ts b/microimputation-dashboard/utils/deeplinks.ts index 55b7af3..31d48bf 100644 --- a/microimputation-dashboard/utils/deeplinks.ts +++ b/microimputation-dashboard/utils/deeplinks.ts @@ -8,103 +8,36 @@ export interface GitHubArtifactInfo { } export interface DeeplinkParams { - mode?: 'single' | 'comparison'; primary?: GitHubArtifactInfo; - secondary?: GitHubArtifactInfo; } export function parseDeeplinkParams(searchParams: URLSearchParams): DeeplinkParams | null { - const mode = searchParams.get('mode') || 'single'; - - const primaryRepo = searchParams.get('repo'); - const primaryBranch = searchParams.get('branch'); - const primaryCommit = searchParams.get('commit'); - const primaryArtifact = searchParams.get('artifact'); - - if (!primaryRepo || !primaryBranch || !primaryCommit || !primaryArtifact) { - // Check for comparison mode parameters - const repo1 = searchParams.get('repo1'); - const branch1 = searchParams.get('branch1'); - const commit1 = searchParams.get('commit1'); - const artifact1 = searchParams.get('artifact1'); - - const repo2 = searchParams.get('repo2'); - const branch2 = searchParams.get('branch2'); - const commit2 = searchParams.get('commit2'); - const artifact2 = searchParams.get('artifact2'); - - if (repo1 && branch1 && commit1 && artifact1 && repo2 && branch2 && commit2 && artifact2) { - return { - mode: 'comparison', - primary: { - repo: repo1, - branch: branch1, - commit: commit1, - artifact: artifact1, - }, - secondary: { - repo: repo2, - branch: branch2, - commit: commit2, - artifact: artifact2, - }, - }; - } + const repo = searchParams.get('repo'); + const branch = searchParams.get('branch'); + const commit = searchParams.get('commit'); + const artifact = searchParams.get('artifact'); + if (!repo || !branch || !commit || !artifact) { return null; } - const params: DeeplinkParams = { - mode: mode as 'single' | 'comparison', + return { primary: { - repo: primaryRepo, - branch: primaryBranch, - commit: primaryCommit, - artifact: primaryArtifact, + repo, + branch, + commit, + artifact, }, }; - - // Check for secondary parameters for comparison mode - const secondaryRepo = searchParams.get('repo2') || primaryRepo; - const secondaryBranch = searchParams.get('branch2'); - const secondaryCommit = searchParams.get('commit2'); - const secondaryArtifact = searchParams.get('artifact2'); - - if (secondaryBranch && secondaryCommit && secondaryArtifact) { - params.mode = 'comparison'; - params.secondary = { - repo: secondaryRepo, - branch: secondaryBranch, - commit: secondaryCommit, - artifact: secondaryArtifact, - }; - } - - return params; } -export function createShareableUrl(baseUrl: string, artifactInfo: GitHubArtifactInfo, secondaryInfo?: GitHubArtifactInfo): string { +export function createShareableUrl(baseUrl: string, artifactInfo: GitHubArtifactInfo): string { const url = new URL(baseUrl); - if (secondaryInfo) { - // Comparison mode - url.searchParams.set('mode', 'comparison'); - url.searchParams.set('repo1', artifactInfo.repo); - url.searchParams.set('branch1', artifactInfo.branch); - url.searchParams.set('commit1', artifactInfo.commit); - url.searchParams.set('artifact1', artifactInfo.artifact); - url.searchParams.set('repo2', secondaryInfo.repo); - url.searchParams.set('branch2', secondaryInfo.branch); - url.searchParams.set('commit2', secondaryInfo.commit); - url.searchParams.set('artifact2', secondaryInfo.artifact); - } else { - // Single mode - url.searchParams.set('mode', 'single'); - url.searchParams.set('repo', artifactInfo.repo); - url.searchParams.set('branch', artifactInfo.branch); - url.searchParams.set('commit', artifactInfo.commit); - url.searchParams.set('artifact', artifactInfo.artifact); - } + url.searchParams.set('repo', artifactInfo.repo); + url.searchParams.set('branch', artifactInfo.branch); + url.searchParams.set('commit', artifactInfo.commit); + url.searchParams.set('artifact', artifactInfo.artifact); return url.toString(); } \ No newline at end of file From d3637644b98795091468da38a66b61e02506d97a Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 24 Oct 2025 13:07:05 +0800 Subject: [PATCH 05/12] add share dashboard button for deeplinks --- microimputation-dashboard/app/page.tsx | 25 +--- .../components/FileUpload.tsx | 35 +++--- .../components/VisualizationDashboard.tsx | 109 ++++++++++++++---- microimputation-dashboard/utils/deeplinks.ts | 21 ++-- 4 files changed, 123 insertions(+), 67 deletions(-) diff --git a/microimputation-dashboard/app/page.tsx b/microimputation-dashboard/app/page.tsx index 8905f4e..a8bc1b6 100644 --- a/microimputation-dashboard/app/page.tsx +++ b/microimputation-dashboard/app/page.tsx @@ -65,30 +65,6 @@ function HomeContent() { return (
- {/* Header */} -
-
-
-
-

- Microimpute Dashboard -

- - Beta - -
- {showDashboard && ( - - )} -
-
-
- {/* Main content */}
{!showDashboard ? ( @@ -105,6 +81,7 @@ function HomeContent() { data={data} fileName={fileName} githubArtifactInfo={githubArtifactInfo} + onBackToUpload={handleBackToUpload} /> )}
diff --git a/microimputation-dashboard/components/FileUpload.tsx b/microimputation-dashboard/components/FileUpload.tsx index a9bf564..fad0ec7 100644 --- a/microimputation-dashboard/components/FileUpload.tsx +++ b/microimputation-dashboard/components/FileUpload.tsx @@ -700,12 +700,20 @@ export default function FileUpload({ return ( -
-
-

Load imputation data

-

Choose how you would like to load your CSV file

+
+ {/* Page Title */} +
+

Microimpute Dashboard

+

Microimputation quality and model benchmarking assessment

+ {/* Upload Card */} +
+
+

Load imputation data

+

Choose how you would like to load your CSV file

+
+ {error && (
)} - {/* Global loading indicator */} - {(isLoading || isLoadingGithubData) && ( -
-
-

- {isLoadingGithubData ? 'Loading GitHub data...' : 'Loading file...'} -

-
- )} + {/* Global loading indicator */} + {(isLoading || isLoadingGithubData) && ( +
+
+

+ {isLoadingGithubData ? 'Loading GitHub data...' : 'Loading file...'} +

+
+ )} +
); } \ No newline at end of file diff --git a/microimputation-dashboard/components/VisualizationDashboard.tsx b/microimputation-dashboard/components/VisualizationDashboard.tsx index 6e046dd..16ce893 100644 --- a/microimputation-dashboard/components/VisualizationDashboard.tsx +++ b/microimputation-dashboard/components/VisualizationDashboard.tsx @@ -2,23 +2,41 @@ import { useMemo, useState } from 'react'; import { ImputationDataPoint } from '@/types/imputation'; -import { GitHubArtifactInfo } from '@/utils/deeplinks'; +import { GitHubArtifactInfo, createShareableUrl } from '@/utils/deeplinks'; import BenchmarkLossCharts from './BenchmarkLossCharts'; import PerVariableCharts from './PerVariableCharts'; import VisualizationTabs from './VisualizationTabs'; +import { Share } from 'lucide-react'; interface VisualizationDashboardProps { data: ImputationDataPoint[]; fileName: string; githubArtifactInfo?: GitHubArtifactInfo | null; + onBackToUpload: () => void; } export default function VisualizationDashboard({ data, fileName, + githubArtifactInfo, + onBackToUpload, }: VisualizationDashboardProps) { const [activeTab, setActiveTab] = useState('overview'); + // Handle sharing the dashboard via deeplink + const handleShare = async () => { + if (!githubArtifactInfo) return; + + try { + const shareUrl = createShareableUrl(githubArtifactInfo); + await navigator.clipboard.writeText(shareUrl); + alert('Shareable URL copied to clipboard!'); + } catch (err) { + console.error('Failed to copy URL:', err); + alert('Failed to copy URL to clipboard'); + } + }; + // Analyze data structure and available visualizations const dataAnalysis = useMemo(() => { const types = new Set(data.map(d => d.type)); @@ -95,12 +113,32 @@ export default function VisualizationDashboard({ if (!dataAnalysis.hasBenchmarkLoss) { return (
-
-

Visualization Dashboard

-
-

- Successfully loaded: {fileName} ({data.length} records) -

+ {/* Header */} +
+
+
+

Microimpute Dashboard

+

+ Loaded: {fileName} +

+
+
+ {githubArtifactInfo && ( + + )} + +
@@ -118,24 +156,53 @@ export default function VisualizationDashboard({ return (
{/* Header */} +
+
+
+

Microimpute Dashboard

+

+ Loaded: {fileName} +

+
+
+ {githubArtifactInfo && ( + + )} + +
+
+
+ + {/* Data Info */}
-

Visualization Dashboard

-
-

- Successfully loaded: {fileName} -

-

- Records: {data.length} -

+

Dataset Overview

+
+
+

Total Records

+

{data.length}

+
{dataAnalysis.numericalVars.length > 0 && ( -

- Numerical variables: {dataAnalysis.numericalVars.length} -

+
+

Numerical Variables

+

{dataAnalysis.numericalVars.length}

+
)} {dataAnalysis.categoricalVars.length > 0 && ( -

- Categorical variables: {dataAnalysis.categoricalVars.length} -

+
+

Categorical Variables

+

{dataAnalysis.categoricalVars.length}

+
)}
diff --git a/microimputation-dashboard/utils/deeplinks.ts b/microimputation-dashboard/utils/deeplinks.ts index 31d48bf..5ce99b9 100644 --- a/microimputation-dashboard/utils/deeplinks.ts +++ b/microimputation-dashboard/utils/deeplinks.ts @@ -31,13 +31,16 @@ export function parseDeeplinkParams(searchParams: URLSearchParams): DeeplinkPara }; } -export function createShareableUrl(baseUrl: string, artifactInfo: GitHubArtifactInfo): string { - const url = new URL(baseUrl); - - url.searchParams.set('repo', artifactInfo.repo); - url.searchParams.set('branch', artifactInfo.branch); - url.searchParams.set('commit', artifactInfo.commit); - url.searchParams.set('artifact', artifactInfo.artifact); - - return url.toString(); +export function createShareableUrl(artifactInfo: GitHubArtifactInfo): string { + const baseUrl = typeof window !== 'undefined' + ? `${window.location.protocol}//${window.location.host}${window.location.pathname}` + : ''; + + const urlParams = new URLSearchParams(); + urlParams.set('repo', artifactInfo.repo); + urlParams.set('branch', artifactInfo.branch); + urlParams.set('commit', artifactInfo.commit); + urlParams.set('artifact', artifactInfo.artifact); + + return `${baseUrl}?${urlParams.toString()}`; } \ No newline at end of file From a7f756a1e1d733c7ec678bbb992f0a689117e4d1 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 24 Oct 2025 14:57:37 +0800 Subject: [PATCH 06/12] add train/test split viz --- microimputation-dashboard/app/globals.css | 6 +- microimputation-dashboard/app/layout.tsx | 18 +- .../components/BenchmarkLossCharts.tsx | 573 +++++++++++++++--- 3 files changed, 511 insertions(+), 86 deletions(-) diff --git a/microimputation-dashboard/app/globals.css b/microimputation-dashboard/app/globals.css index a2dc41e..86b4953 100644 --- a/microimputation-dashboard/app/globals.css +++ b/microimputation-dashboard/app/globals.css @@ -8,8 +8,8 @@ @theme inline { --color-background: var(--background); --color-foreground: var(--foreground); - --font-sans: var(--font-geist-sans); - --font-mono: var(--font-geist-mono); + --font-sans: var(--font-roboto-serif); + --font-mono: var(--font-roboto-mono); } @media (prefers-color-scheme: dark) { @@ -22,5 +22,5 @@ body { background: var(--background); color: var(--foreground); - font-family: Arial, Helvetica, sans-serif; + font-family: var(--font-roboto-serif), ui-serif, Georgia, Cambria, "Times New Roman", Times, serif; } diff --git a/microimputation-dashboard/app/layout.tsx b/microimputation-dashboard/app/layout.tsx index f7fa87e..a1be77b 100644 --- a/microimputation-dashboard/app/layout.tsx +++ b/microimputation-dashboard/app/layout.tsx @@ -1,20 +1,22 @@ import type { Metadata } from "next"; -import { Geist, Geist_Mono } from "next/font/google"; +import { Roboto_Serif, Roboto_Mono } from "next/font/google"; import "./globals.css"; -const geistSans = Geist({ - variable: "--font-geist-sans", +const robotoSerif = Roboto_Serif({ + variable: "--font-roboto-serif", subsets: ["latin"], + weight: ["300", "400", "500", "600", "700"], }); -const geistMono = Geist_Mono({ - variable: "--font-geist-mono", +const robotoMono = Roboto_Mono({ + variable: "--font-roboto-mono", subsets: ["latin"], + weight: ["300", "400", "500", "600", "700"], }); export const metadata: Metadata = { - title: "Create Next App", - description: "Generated by create next app", + title: "Microimpute Dashboard", + description: "Microimputation quality and model benchmarking assessment", }; export default function RootLayout({ @@ -25,7 +27,7 @@ export default function RootLayout({ return ( {children} diff --git a/microimputation-dashboard/components/BenchmarkLossCharts.tsx b/microimputation-dashboard/components/BenchmarkLossCharts.tsx index 70a00c3..88b579d 100644 --- a/microimputation-dashboard/components/BenchmarkLossCharts.tsx +++ b/microimputation-dashboard/components/BenchmarkLossCharts.tsx @@ -1,6 +1,6 @@ 'use client'; -import { useMemo } from 'react'; +import { useMemo, useState } from 'react'; import { BarChart, Bar, @@ -25,6 +25,9 @@ export default function BenchmarkLossCharts({ data }: BenchmarkLossChartsProps) return data.filter(d => d.type === 'benchmark_loss'); }, [data]); + // State for selected method in train/test comparison + const [selectedMethod, setSelectedMethod] = useState(''); + // Check if we have benchmark loss data const hasBenchmarkData = benchmarkData.length > 0; @@ -99,90 +102,510 @@ export default function BenchmarkLossCharts({ data }: BenchmarkLossChartsProps) })); }, [logLossData]); + // Determine best performing model + const bestModel = useMemo(() => { + if (methods.length === 0) return null; + + // Calculate average quantile loss per method (test only) + const quantileLossAvg = new Map(); + // Count unique variables per method for quantile loss + const quantileVarCounts = new Map>(); + + if (quantileLossData.length > 0) { + const methodSums = new Map(); + quantileLossData.forEach(d => { + if (d.metric_value !== null) { + if (!methodSums.has(d.method)) { + methodSums.set(d.method, { sum: 0, count: 0 }); + } + const entry = methodSums.get(d.method)!; + entry.sum += d.metric_value; + entry.count += 1; + + // Track unique variables + if (!quantileVarCounts.has(d.method)) { + quantileVarCounts.set(d.method, new Set()); + } + quantileVarCounts.get(d.method)!.add(d.variable); + } + }); + methodSums.forEach((value, method) => { + quantileLossAvg.set(method, value.sum / value.count); + }); + } + + // Calculate average log loss per method (test only, already have this in logLossChartData) + const logLossAvg = new Map(); + // Count unique variables per method for log loss + const logLossVarCounts = new Map>(); + + logLossData.forEach(d => { + if (d.metric_value !== null) { + if (!logLossVarCounts.has(d.method)) { + logLossVarCounts.set(d.method, new Set()); + } + logLossVarCounts.get(d.method)!.add(d.variable); + } + }); + + logLossChartData.forEach(({ method, value }) => { + logLossAvg.set(method, value); + }); + + // Rank methods by each metric (lower is better, so rank 1 is best) + const rankMethods = (avgMap: Map): Map => { + const sorted = Array.from(avgMap.entries()).sort((a, b) => a[1] - b[1]); + const ranks = new Map(); + sorted.forEach(([method], index) => { + ranks.set(method, index + 1); + }); + return ranks; + }; + + const quantileRanks = rankMethods(quantileLossAvg); + const logLossRanks = rankMethods(logLossAvg); + + // Calculate weighted combined rank (weighted by number of variables of each type) + // This matches autoimpute's select_best_model_dual_metrics approach + const combinedRanks = new Map(); + methods.forEach(method => { + const qRank = quantileRanks.get(method); + const lRank = logLossRanks.get(method); + const nQuantileVars = quantileVarCounts.get(method)?.size || 0; + const nLogLossVars = logLossVarCounts.get(method)?.size || 0; + const totalVars = nQuantileVars + nLogLossVars; + + if (totalVars > 0) { + let weightedRank = 0; + if (qRank !== undefined) { + weightedRank += nQuantileVars * qRank; + } + if (lRank !== undefined) { + weightedRank += nLogLossVars * lRank; + } + combinedRanks.set(method, weightedRank / totalVars); + } else { + combinedRanks.set(method, Infinity); + } + }); + + // Find best method (lowest combined rank) + let bestMethod = ''; + let bestRank = Infinity; + combinedRanks.forEach((rank, method) => { + if (rank < bestRank) { + bestRank = rank; + bestMethod = method; + } + }); + + // Calculate train/test ratios for the best method + let quantileTrainTestRatio: number | undefined; + let logLossTrainTestRatio: number | undefined; + + // Quantile loss train/test ratio + const bestQuantileTrain = benchmarkData.filter( + d => d.method === bestMethod && d.metric_name === 'quantile_loss' && d.split === 'train' && d.metric_value !== null + ); + const bestQuantileTest = benchmarkData.filter( + d => d.method === bestMethod && d.metric_name === 'quantile_loss' && d.split === 'test' && d.metric_value !== null + ); + + if (bestQuantileTrain.length > 0 && bestQuantileTest.length > 0) { + const trainAvg = bestQuantileTrain.reduce((sum, d) => sum + d.metric_value!, 0) / bestQuantileTrain.length; + const testAvg = bestQuantileTest.reduce((sum, d) => sum + d.metric_value!, 0) / bestQuantileTest.length; + quantileTrainTestRatio = testAvg / trainAvg; + } + + // Log loss train/test ratio + const bestLogLossTrain = benchmarkData.filter( + d => d.method === bestMethod && d.metric_name === 'log_loss' && d.split === 'train' && d.metric_value !== null + ); + const bestLogLossTest = benchmarkData.filter( + d => d.method === bestMethod && d.metric_name === 'log_loss' && d.split === 'test' && d.metric_value !== null + ); + + if (bestLogLossTrain.length > 0 && bestLogLossTest.length > 0) { + const trainAvg = bestLogLossTrain.reduce((sum, d) => sum + d.metric_value!, 0) / bestLogLossTrain.length; + const testAvg = bestLogLossTest.reduce((sum, d) => sum + d.metric_value!, 0) / bestLogLossTest.length; + logLossTrainTestRatio = testAvg / trainAvg; + } + + return { + method: bestMethod, + quantileLoss: quantileLossAvg.get(bestMethod), + logLoss: logLossAvg.get(bestMethod), + quantileTrainTestRatio, + logLossTrainTestRatio, + }; + }, [methods, quantileLossData, logLossChartData, benchmarkData]); + + // Set default selected method to best model + useMemo(() => { + if (bestModel && bestModel.method && !selectedMethod) { + setSelectedMethod(bestModel.method); + } + }, [bestModel, selectedMethod]); + + // Prepare train/test comparison data for selected method + const trainTestData = useMemo(() => { + if (!selectedMethod) return { quantile: [], logLoss: [] }; + + // Quantile loss train vs test + const quantileTrainTest: Array<{ quantile: string; train: number | null; test: number | null }> = []; + const quantileData = benchmarkData.filter( + d => d.method === selectedMethod && d.metric_name === 'quantile_loss' + ); + + if (quantileData.length > 0) { + const quantileMap = new Map(); + + quantileData.forEach(d => { + const q = typeof d.quantile === 'number' ? d.quantile.toFixed(2) : String(d.quantile || ''); + // Skip 'mean' quantiles + if (q.toLowerCase().includes('mean')) return; + + if (!quantileMap.has(q)) { + quantileMap.set(q, { train: null, test: null }); + } + const entry = quantileMap.get(q)!; + if (d.split === 'train') entry.train = d.metric_value; + if (d.split === 'test') entry.test = d.metric_value; + }); + + quantileMap.forEach((value, quantile) => { + quantileTrainTest.push({ quantile, ...value }); + }); + + quantileTrainTest.sort((a, b) => parseFloat(a.quantile) - parseFloat(b.quantile)); + } + + // Log loss train vs test (average across variables) + const logLossTrainTest: Array<{ category: string; train: number; test: number }> = []; + const logData = benchmarkData.filter( + d => d.method === selectedMethod && d.metric_name === 'log_loss' && d.metric_value !== null + ); + + if (logData.length > 0) { + const trainVals: number[] = []; + const testVals: number[] = []; + + logData.forEach(d => { + if (d.split === 'train') trainVals.push(d.metric_value!); + if (d.split === 'test') testVals.push(d.metric_value!); + }); + + if (trainVals.length > 0 || testVals.length > 0) { + const trainAvg = trainVals.length > 0 ? trainVals.reduce((a, b) => a + b, 0) / trainVals.length : 0; + const testAvg = testVals.length > 0 ? testVals.reduce((a, b) => a + b, 0) / testVals.length : 0; + + logLossTrainTest.push({ + category: 'Average', + train: trainAvg, + test: testAvg, + }); + } + } + + return { + quantile: quantileTrainTest, + logLoss: logLossTrainTest, + }; + }, [selectedMethod, benchmarkData]); + + const hasQuantileTrainTest = trainTestData.quantile.length > 0; + const hasLogLossTrainTest = trainTestData.logLoss.length > 0; + + // Filter methods that have train/test data + const methodsWithData = useMemo(() => { + const validMethods = new Set(); + + methods.forEach(method => { + const methodQuantileData = benchmarkData.filter( + d => d.method === method && d.metric_name === 'quantile_loss' && d.metric_value !== null + ); + const methodLogLossData = benchmarkData.filter( + d => d.method === method && d.metric_name === 'log_loss' && d.metric_value !== null + ); + + if (methodQuantileData.length > 0 || methodLogLossData.length > 0) { + validMethods.add(method); + } + }); + + return Array.from(validMethods); + }, [methods, benchmarkData]); + + const methodsWithoutData = methods.filter(m => !methodsWithData.includes(m)); + if (!hasBenchmarkData) { return null; } return ( -
- {/* Quantile Loss Comparison */} - {quantileChartData.length > 0 && ( -
-

- Test Quantile Loss Across Quantiles for Different Imputation Methods -

- - - - - - value.toFixed(6)} - /> - - {methods.map((method, index) => ( - - ))} - - +
+

+ Benchmarking imputation methods +

+ + {/* Best Model Highlight */} + {bestModel && bestModel.method && ( +
+
+
+ + + +
+
+

+ Best performing model: {bestModel.method} +

+

Based on combined performance across all metrics

+
+
+
+ {bestModel.quantileLoss !== undefined && ( +
+
+ Avg. quantile loss (test): + {bestModel.quantileLoss.toFixed(6)} +
+ {bestModel.quantileTrainTestRatio !== undefined && ( + 1.1 ? 'text-amber-600' : 'text-gray-700'}`}> + Train/test ratio: {bestModel.quantileTrainTestRatio.toFixed(3)} + + )} +
+ )} + {bestModel.logLoss !== undefined && ( +
+
+ Avg. log loss (test): + {bestModel.logLoss.toFixed(6)} +
+ {bestModel.logLossTrainTestRatio !== undefined && ( + 1.1 ? 'text-amber-600' : 'text-gray-700'}`}> + Train/test ratio: {bestModel.logLossTrainTestRatio.toFixed(3)} + + )} +
+ )} +
)} - {/* Log Loss Comparison */} - {logLossChartData.length > 0 && ( -
-

- Log Loss Comparison Across Methods -

- - - - - - [value.toFixed(6), 'Log Loss']} - /> - - {logLossChartData.map((entry, index) => ( - - ))} - - - + {/* Note about methods without data */} + {methodsWithoutData.length > 0 && ( +
+

+ Note: {methodsWithoutData.length === 1 ? 'The following method does' : 'The following methods do'} not appear in visualizations because {methodsWithoutData.length === 1 ? 'it does' : 'they do'} not support imputation of the selected variables due to variable types: {methodsWithoutData.join(', ')} +

)} + +
+ {/* Quantile Loss Comparison */} + {quantileChartData.length > 0 && ( +
+

+ Test quantile loss across quantiles for different imputation methods +

+ + + + + + value.toFixed(6)} + /> + + {methods.map((method, index) => ( + + ))} + + +
+

+ Quantile loss measures how well the imputation method predicts different quantiles of the distribution for numerical variables, creating an asymmetric loss function that penalizes under-prediction more heavily for higher quantiles and over-prediction more heavily for lower quantiles. +
+ Lower values indicate better performance. +

+
+
+ )} + + {/* Log Loss Comparison */} + {logLossChartData.length > 0 && ( +
+

+ Test log loss across different imputation methods +

+ + + + + + [value.toFixed(6), 'Log loss']} + /> + + {logLossChartData.map((entry, index) => ( + + ))} + + + +
+

+ Log loss measures how well the imputation method predicts categorical and boolean variables by evaluating the accuracy of predicted probabilities. It heavily penalizes confident misclassifications, such that a perfect classifier would have a log loss of 0, while worse predictions yield increasingly higher values. +

+
+
+ )} + + {/* Train/Test Overfitting Assessment */} + {(hasQuantileTrainTest || hasLogLossTrainTest) && methods.length > 0 && ( +
+
+

+ Train vs test performance +

+

+ Compare training and test set performance to assess potential overfitting or underfitting. +

+ + {/* Method Selector */} +
+ + +
+
+ +
+ {/* Quantile Loss Train/Test */} + {hasQuantileTrainTest && ( +
+

Quantile loss: train vs test

+ + + + + + value.toFixed(6)} + /> + + + + + +
+ )} + + {/* Log Loss Train/Test */} + {hasLogLossTrainTest && ( +
+

Log loss: train vs test

+ + + + + + value.toFixed(6)} + /> + + + + + +
+ )} +
+ +
+

+ Overfitting assessment: When test performance (green bars) is significantly worse than train performance (cyan bars), it suggests the model may be overfitting to the training data and not generalizing well to unseen data. If both train and test performances are poor, the model may be underfitting and failing to capture underlying patterns. +
+ Healthy performance is indicated by similar train and test metrics, with both being reasonably low. +

+
+
+ )} +
); } From ba77ba69fd64a0f597f0047f015a3aefda18fd51 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 24 Oct 2025 21:26:04 +0800 Subject: [PATCH 07/12] adding predictor correlation to dashboard --- .../components/PredictorCorrelationMatrix.tsx | 466 ++++++++++++++++++ .../components/VisualizationDashboard.tsx | 15 +- 2 files changed, 480 insertions(+), 1 deletion(-) create mode 100644 microimputation-dashboard/components/PredictorCorrelationMatrix.tsx diff --git a/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx b/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx new file mode 100644 index 0000000..331a5cc --- /dev/null +++ b/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx @@ -0,0 +1,466 @@ +'use client'; + +import { useMemo, useState } from 'react'; +import { ImputationDataPoint } from '@/types/imputation'; + +interface PredictorCorrelationMatrixProps { + data: ImputationDataPoint[]; +} + +interface CorrelationData { + predictor1: string; + predictor2: string; + value: number; +} + +export default function PredictorCorrelationMatrix({ data }: PredictorCorrelationMatrixProps) { + // Filter for predictor_correlation data + const correlationData = useMemo(() => { + return data.filter(d => d.type === 'predictor_correlation'); + }, [data]); + + // Filter for predictor-target mutual information data + const predictorTargetMIData = useMemo(() => { + return data.filter(d => d.type === 'predictor_target_mi'); + }, [data]); + + // Check available correlation metrics + const availableMetrics = useMemo(() => { + const metrics = new Set(correlationData.map(d => d.metric_name)); + return Array.from(metrics); + }, [correlationData]); + + // State for selected correlation metric + const [selectedMetric, setSelectedMetric] = useState(''); + + // Set default metric to pearson if available, otherwise first available + useMemo(() => { + if (!selectedMetric && availableMetrics.length > 0) { + setSelectedMetric(availableMetrics.includes('pearson') ? 'pearson' : availableMetrics[0]); + } + }, [availableMetrics, selectedMetric]); + + // Build correlation matrix data + const { predictors, matrixData } = useMemo(() => { + if (!selectedMetric) return { predictors: [], matrixData: new Map>() }; + + // Filter data for selected metric + const metricData = correlationData.filter(d => d.metric_name === selectedMetric); + + // Extract all unique predictors + const predSet = new Set(); + const correlations: CorrelationData[] = []; + + metricData.forEach(d => { + const pred1 = d.variable; + let pred2: string | undefined; + + try { + const additionalInfo = typeof d.additional_info === 'string' + ? JSON.parse(d.additional_info) + : d.additional_info; + pred2 = additionalInfo?.predictor2; + } catch (e) { + console.error('Failed to parse additional_info:', e); + } + + if (pred1 && pred2) { + predSet.add(pred1); + predSet.add(pred2); + correlations.push({ + predictor1: pred1, + predictor2: pred2, + value: d.metric_value ?? 0, + }); + } + }); + + const predictorList = Array.from(predSet).sort(); + + // Build symmetric matrix + const matrix = new Map>(); + + predictorList.forEach(p => { + matrix.set(p, new Map()); + }); + + // Add diagonal (1.0 for self-correlation) + predictorList.forEach(p => { + matrix.get(p)!.set(p, 1.0); + }); + + // Add correlations (symmetric) + correlations.forEach(({ predictor1, predictor2, value }) => { + matrix.get(predictor1)!.set(predictor2, value); + matrix.get(predictor2)!.set(predictor1, value); + }); + + return { predictors: predictorList, matrixData: matrix }; + }, [correlationData, selectedMetric]); + + // Build predictor-target mutual information matrix + const { predictorsList, targetsList, miMatrixData } = useMemo(() => { + if (predictorTargetMIData.length === 0) { + return { predictorsList: [], targetsList: [], miMatrixData: new Map>() }; + } + + const predSet = new Set(); + const targSet = new Set(); + const miValues: Array<{ predictor: string; target: string; value: number }> = []; + + predictorTargetMIData.forEach(d => { + const predictor = d.variable; + let target: string | undefined; + + try { + const additionalInfo = typeof d.additional_info === 'string' + ? JSON.parse(d.additional_info) + : d.additional_info; + target = additionalInfo?.target; + } catch (e) { + console.error('Failed to parse additional_info:', e); + } + + if (predictor && target && d.metric_value !== null) { + predSet.add(predictor); + targSet.add(target); + miValues.push({ + predictor, + target, + value: d.metric_value, + }); + } + }); + + const predList = Array.from(predSet).sort(); + const targList = Array.from(targSet).sort(); + + // Build matrix + const matrix = new Map>(); + predList.forEach(p => { + matrix.set(p, new Map()); + }); + + miValues.forEach(({ predictor, target, value }) => { + matrix.get(predictor)!.set(target, value); + }); + + return { predictorsList: predList, targetsList: targList, miMatrixData: matrix }; + }, [predictorTargetMIData]); + + const hasPredictorTargetMI = predictorsList.length > 0 && targetsList.length > 0; + + if (correlationData.length === 0 || predictors.length === 0) { + return null; + } + + // Helper function to get color based on correlation value + const getColor = (value: number): string => { + // Scale from -1 to 1 + // Negative: red shades, Positive: blue shades, Zero: white + if (value === 1.0) return '#1e40af'; // Dark blue for diagonal + if (value >= 0.7) return '#3b82f6'; // Blue + if (value >= 0.4) return '#60a5fa'; // Light blue + if (value >= 0.2) return '#93c5fd'; // Very light blue + if (value >= -0.2) return '#f3f4f6'; // Nearly white + if (value >= -0.4) return '#fca5a5'; // Light red + if (value >= -0.7) return '#f87171'; // Red + return '#ef4444'; // Dark red + }; + + // Helper function to get color based on mutual information value (0 to ~1) + const getMIColor = (value: number): string => { + // Scale from 0 (white) to high values (dark purple) + if (value >= 0.15) return '#581c87'; // Dark purple + if (value >= 0.10) return '#7c3aed'; // Purple + if (value >= 0.07) return '#a78bfa'; // Light purple + if (value >= 0.04) return '#c4b5fd'; // Very light purple + if (value >= 0.02) return '#ddd6fe'; // Almost white purple + return '#f3f4f6'; // Nearly white + }; + + const cellSize = 80; // Size of each cell in pixels + + return ( +
+
+

+ Predictor correlation analysis +

+

+ Correlation matrix showing relationships between predictor variables +

+ + {/* Metric Selector */} + {availableMetrics.length > 1 && ( +
+ + +
+ )} +
+ + {/* Correlation Matrix */} +
+
+
+ {/* Top-left empty cell */} +
+ + {/* Column headers */} + {predictors.map((pred, idx) => ( +
+
+ {pred} +
+
+ ))} + + {/* Rows */} + {predictors.map((pred1, rowIdx) => ( + <> + {/* Row header */} +
+ {pred1} +
+ + {/* Correlation cells */} + {predictors.map((pred2, colIdx) => { + const value = matrixData.get(pred1)?.get(pred2) ?? 0; + // Use purple scale for mutual_info, blue/red scale for correlations + const bgColor = selectedMetric === 'mutual_info' ? getMIColor(value) : getColor(value); + const textColor = selectedMetric === 'mutual_info' + ? (value > 0.07 ? '#ffffff' : '#000000') + : (Math.abs(value) > 0.5 ? '#ffffff' : '#000000'); + + return ( +
+ {selectedMetric === 'mutual_info' ? value.toFixed(3) : value.toFixed(2)} +
+ ); + })} + + ))} +
+
+
+ + {/* Legend - only for correlation metrics (not mutual_info) */} + {selectedMetric !== 'mutual_info' && ( +
+

+ Interpretation: Correlation values range from -1 to 1. Positive values (blue) indicate variables that increase together, negative values (red) indicate variables that move in opposite directions, and values near 0 (white) indicate little to no linear relationship. +

+
+ Color scale: +
+
+
+
+
+
+
+
+
+ + ◄ Negative + | + Positive ► + +
+
+

+ Pearson vs Spearman: Pearson correlation measures linear relationships between variables and is sensitive to outliers. Spearman correlation measures monotonic relationships (whether variables consistently increase or decrease together) by ranking the data first, making it more robust to outliers and non-linear but monotonic relationships. Use Pearson for linear relationships and Spearman when the relationship may be non-linear or when data contains outliers. +

+
+
+ )} + + {/* Predictor-Target Mutual Information Section */} +
+

+ Predictor-imputed variable mutual information +

+ + {hasPredictorTargetMI ? ( + <> +

+ Mutual information between predictor variables and imputed target variables +

+ + {/* MI Matrix */} +
+
+
+ {/* Top-left empty cell */} +
+ + {/* Column headers (targets) */} + {targetsList.map((target, idx) => ( +
+
+ {target} +
+
+ ))} + + {/* Rows */} + {predictorsList.map((predictor, rowIdx) => ( + <> + {/* Row header */} +
+ {predictor} +
+ + {/* MI cells */} + {targetsList.map((target, colIdx) => { + const value = miMatrixData.get(predictor)?.get(target) ?? 0; + const bgColor = getMIColor(value); + const textColor = value > 0.07 ? '#ffffff' : '#000000'; + + return ( +
+ {value.toFixed(3)} +
+ ); + })} + + ))} +
+
+
+ + ) : null} + + {/* Explanation box - always shown */} +
+

+ What is mutual information? Mutual information measures how much information one variable provides about another. Unlike correlation, it captures both linear and non-linear relationships between variables. Values range from 0 (independent variables) to higher positive values (strong dependency). +

+

+ Why measure it for imputed variables? Mutual information between predictors and imputed variables reveals which predictors are most informative for imputation. High mutual information indicates that a predictor strongly influences the imputed variable's distribution, making it crucial for accurate imputation. This helps validate that your imputation models are using the most relevant predictors and can identify when key predictive relationships exist in your data. +

+ + {/* Color scale within explanation box */} +
+
+ Color scale: +
+
+
+
+
+
+
+
+ + Weak + + Strong ► + +
+
+
+ + {/* Message when no predictor-target data is available */} + {!hasPredictorTargetMI && ( +
+

+ Note: No predictor-imputed variable mutual information data was found in this CSV file. It is recommended to include this data in your analysis to understand which predictors are most informative for imputing each variable. This helps validate that your imputation models are leveraging the most relevant predictive relationships in your data. +

+
+ )} +
+
+ ); +} diff --git a/microimputation-dashboard/components/VisualizationDashboard.tsx b/microimputation-dashboard/components/VisualizationDashboard.tsx index 16ce893..d98be55 100644 --- a/microimputation-dashboard/components/VisualizationDashboard.tsx +++ b/microimputation-dashboard/components/VisualizationDashboard.tsx @@ -6,6 +6,7 @@ import { GitHubArtifactInfo, createShareableUrl } from '@/utils/deeplinks'; import BenchmarkLossCharts from './BenchmarkLossCharts'; import PerVariableCharts from './PerVariableCharts'; import VisualizationTabs from './VisualizationTabs'; +import PredictorCorrelationMatrix from './PredictorCorrelationMatrix'; import { Share } from 'lucide-react'; interface VisualizationDashboardProps { @@ -88,7 +89,7 @@ export default function VisualizationDashboard({ const tabsList = []; if (dataAnalysis.hasBenchmarkLoss) { - tabsList.push({ id: 'overview', label: 'Overview' }); + tabsList.push({ id: 'overview', label: 'Model benchmarking' }); } if (dataAnalysis.numericalVars.length > 0) { @@ -107,6 +108,13 @@ export default function VisualizationDashboard({ }); } + if (dataAnalysis.hasPredictorCorrelation) { + tabsList.push({ + id: 'correlation', + label: 'Predictor correlation', + }); + } + return tabsList; }, [dataAnalysis]); @@ -254,6 +262,11 @@ export default function VisualizationDashboard({ ))}
)} + + {/* Predictor Correlation Tab */} + {activeTab === 'correlation' && ( + + )}
); From 961ab5a335c1747281cc5ec654a8e5140a53183b Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 24 Oct 2025 21:50:29 +0800 Subject: [PATCH 08/12] adding predictor selection to dashboard --- .../PredictorOrderingRobustness.tsx | 387 ++++++++++++++++++ .../components/VisualizationDashboard.tsx | 14 + 2 files changed, 401 insertions(+) create mode 100644 microimputation-dashboard/components/PredictorOrderingRobustness.tsx diff --git a/microimputation-dashboard/components/PredictorOrderingRobustness.tsx b/microimputation-dashboard/components/PredictorOrderingRobustness.tsx new file mode 100644 index 0000000..e784a24 --- /dev/null +++ b/microimputation-dashboard/components/PredictorOrderingRobustness.tsx @@ -0,0 +1,387 @@ +'use client'; + +import { useMemo } from 'react'; +import { ImputationDataPoint } from '@/types/imputation'; +import { BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip, Legend, ResponsiveContainer, Cell } from 'recharts'; + +interface PredictorOrderingRobustnessProps { + data: ImputationDataPoint[]; +} + +interface ProgressiveStep { + step: number; + predictorAdded: string; + predictors: string[]; + cumulativeImprovement: number; + marginalImprovement: number; +} + +interface PredictorImportance { + predictor: string; + relativeImpact: number; + lossIncrease: number; +} + +export default function PredictorOrderingRobustness({ data }: PredictorOrderingRobustnessProps) { + // Filter for progressive inclusion data + const progressiveInclusionData = useMemo(() => { + return data.filter(d => d.type === 'progressive_inclusion'); + }, [data]); + + // Filter for predictor importance data + const predictorImportanceData = useMemo(() => { + return data.filter(d => d.type === 'predictor_importance'); + }, [data]); + + // Parse progressive inclusion steps + const progressiveSteps = useMemo(() => { + const stepData: ProgressiveStep[] = []; + const cumulativeData = progressiveInclusionData.filter( + d => d.metric_name === 'cumulative_improvement' + ); + + cumulativeData.forEach(d => { + try { + const additionalInfo = typeof d.additional_info === 'string' + ? JSON.parse(d.additional_info) + : d.additional_info; + + const step = additionalInfo?.step; + const predictorAdded = additionalInfo?.predictor_added; + const predictors = additionalInfo?.predictors || []; + + if (step !== undefined && predictorAdded) { + // Find corresponding marginal improvement + const marginalData = progressiveInclusionData.find( + m => m.metric_name === 'marginal_improvement' && + JSON.parse(typeof m.additional_info === 'string' ? m.additional_info : JSON.stringify(m.additional_info))?.step === step + ); + + stepData.push({ + step, + predictorAdded, + predictors, + cumulativeImprovement: d.metric_value ?? 0, + marginalImprovement: marginalData?.metric_value ?? 0, + }); + } + } catch (e) { + console.error('Failed to parse progressive inclusion data:', e); + } + }); + + return stepData.sort((a, b) => a.step - b.step); + }, [progressiveInclusionData]); + + // Parse predictor importance + const importanceData = useMemo(() => { + const importanceMap = new Map(); + + predictorImportanceData.forEach(d => { + try { + const additionalInfo = typeof d.additional_info === 'string' + ? JSON.parse(d.additional_info) + : d.additional_info; + + const predictor = additionalInfo?.removed_predictor || d.variable; + + if (predictor) { + if (!importanceMap.has(predictor)) { + importanceMap.set(predictor, { + predictor, + relativeImpact: 0, + lossIncrease: 0, + }); + } + + const entry = importanceMap.get(predictor)!; + if (d.metric_name === 'relative_impact') { + entry.relativeImpact = d.metric_value ?? 0; + } else if (d.metric_name === 'loss_increase') { + entry.lossIncrease = d.metric_value ?? 0; + } + } + } catch (e) { + console.error('Failed to parse predictor importance data:', e); + } + }); + + return Array.from(importanceMap.values()).sort( + (a, b) => Math.abs(b.relativeImpact) - Math.abs(a.relativeImpact) + ); + }, [predictorImportanceData]); + + const hasProgressiveData = progressiveSteps.length > 0; + const hasImportanceData = importanceData.length > 0; + + if (!hasProgressiveData && !hasImportanceData) { + return null; + } + + // Find best combination (highest cumulative improvement) + const bestCombination = progressiveSteps.reduce((best, current) => + current.cumulativeImprovement > best.cumulativeImprovement ? current : best, + progressiveSteps[0] + ); + + return ( +
+
+

+ Predictor selection and robustness +

+

+ Analysis of predictor combinations and their impact on model performance +

+
+ + {/* Progressive Inclusion Section */} + {hasProgressiveData && ( +
+

+ Predictor addition order +

+ + {/* Explanation */} +
+

+ How this works: This analysis adds predictors one at a time, + choosing the predictor that improves performance the most at each step. This + step-by-step approach is efficient but doesn't test + every possible combination of predictors. +

+

+ Reading the chart: The bars show cumulative improvement from + baseline as predictors are added. Larger improvements indicate more valuable + predictor combinations. +

+
+ + {/* Best Combination Highlight */} + {bestCombination && ( +
+

+ Best predictor combination +

+
+
+

+ Predictors:{' '} + + {bestCombination.predictors.join(' → ')} + +

+

+ Cumulative improvement:{' '} + + {(bestCombination.cumulativeImprovement * 100).toFixed(3)}% + +

+
+
+
+ )} + + {/* Step-by-step visualization */} +
+ {progressiveSteps.map((step) => { + const isPositive = step.marginalImprovement >= 0; + const isBest = step.step === bestCombination?.step; + + return ( +
+
+
+
+ {step.step} +
+
+ +
+
+ Add: + + {step.predictorAdded} + +
+ +
+
+ Marginal improvement: +
+
+
+
+ + {isPositive ? '+' : ''}{(step.marginalImprovement * 100).toFixed(3)}% + +
+
+ +
+ Cumulative improvement: +
+
+
+
+ + {(step.cumulativeImprovement * 100).toFixed(3)}% + +
+
+
+ +
+ Current predictors: {step.predictors.join(' → ')} +
+
+
+
+ ); + })} +
+
+ )} + + {/* Predictor Importance Section */} + {hasImportanceData && ( +
+

+ Predictor robustness check +

+ + {/* Explanation */} +
+

+ What this shows: This analysis measures how much performance + degrades when each predictor is removed. Predictors that cause large performance + drops when removed are critical to the model's accuracy. +

+

+ Reading the chart: Positive values (bars pointing right) indicate + performance worsens when the predictor is removed, meaning the predictor is helpful. + Negative values suggest removing the predictor might actually improve performance. +

+
+ + {/* Bar chart */} +
+ + + + `${val.toFixed(1)}%`} tick={{ fill: '#000000' }} /> + + { + if (name === 'relativeImpact') { + return [`${value.toFixed(3)}%`, 'Relative Impact']; + } + return [value.toFixed(6), 'Loss Increase']; + }} + /> + + + {importanceData.map((entry, index) => ( + = 0 ? '#ef4444' : '#22c55e'} + /> + ))} + + + +
+ + {/* Detailed table */} +
+ + + + + + + + + + + {importanceData.map((item) => { + const isHelpful = item.relativeImpact > 1; + const isCritical = item.relativeImpact > 10; + const isHarmful = item.relativeImpact < -1; + + let assessment = 'Minimal impact'; + let assessmentColor = 'text-gray-600'; + + if (isCritical) { + assessment = 'Critical predictor'; + assessmentColor = 'text-red-700 font-semibold'; + } else if (isHelpful) { + assessment = 'Helpful predictor'; + assessmentColor = 'text-orange-600'; + } else if (isHarmful) { + assessment = 'Consider removing'; + assessmentColor = 'text-green-600'; + } + + return ( + + + + + + + ); + })} + +
+ Predictor + + Impact when removed + + Loss increase + + Assessment +
+ {item.predictor} + + = 0 ? 'text-red-600' : 'text-green-600'}> + {item.relativeImpact >= 0 ? '+' : ''}{item.relativeImpact.toFixed(3)}% + + + {item.lossIncrease >= 0 ? '+' : ''}{item.lossIncrease.toFixed(6)} + + {assessment} +
+
+
+ )} +
+ ); +} diff --git a/microimputation-dashboard/components/VisualizationDashboard.tsx b/microimputation-dashboard/components/VisualizationDashboard.tsx index d98be55..23ba060 100644 --- a/microimputation-dashboard/components/VisualizationDashboard.tsx +++ b/microimputation-dashboard/components/VisualizationDashboard.tsx @@ -7,6 +7,7 @@ import BenchmarkLossCharts from './BenchmarkLossCharts'; import PerVariableCharts from './PerVariableCharts'; import VisualizationTabs from './VisualizationTabs'; import PredictorCorrelationMatrix from './PredictorCorrelationMatrix'; +import PredictorOrderingRobustness from './PredictorOrderingRobustness'; import { Share } from 'lucide-react'; interface VisualizationDashboardProps { @@ -78,6 +79,7 @@ export default function VisualizationDashboard({ hasBenchmarkLoss, hasDistributionDistance: types.has('distribution_distance'), hasPredictorCorrelation: types.has('predictor_correlation'), + hasPredictorOrdering: types.has('progressive_inclusion') || types.has('predictor_importance'), numericalVars, categoricalVars, hasPerVariableData: numericalVars.length > 0 || categoricalVars.length > 0, @@ -115,6 +117,13 @@ export default function VisualizationDashboard({ }); } + if (dataAnalysis.hasPredictorOrdering) { + tabsList.push({ + id: 'ordering', + label: 'Predictor selection', + }); + } + return tabsList; }, [dataAnalysis]); @@ -267,6 +276,11 @@ export default function VisualizationDashboard({ {activeTab === 'correlation' && ( )} + + {/* Predictor Ordering and Robustness Tab */} + {activeTab === 'ordering' && ( + + )}
); From 8f942879277c64bdab277ae0e31845e946aedad9 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 24 Oct 2025 22:41:40 +0800 Subject: [PATCH 09/12] adding distribution metrics to dashboard --- .../components/ImputationResults.tsx | 315 ++++++++++++++++++ .../PredictorOrderingRobustness.tsx | 6 +- .../components/VisualizationDashboard.tsx | 245 ++++++++++++-- 3 files changed, 542 insertions(+), 24 deletions(-) create mode 100644 microimputation-dashboard/components/ImputationResults.tsx diff --git a/microimputation-dashboard/components/ImputationResults.tsx b/microimputation-dashboard/components/ImputationResults.tsx new file mode 100644 index 0000000..ce3ab42 --- /dev/null +++ b/microimputation-dashboard/components/ImputationResults.tsx @@ -0,0 +1,315 @@ +'use client'; + +import { useMemo } from 'react'; +import { ImputationDataPoint } from '@/types/imputation'; +import { BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip, Legend, ResponsiveContainer, Cell } from 'recharts'; + +interface ImputationResultsProps { + data: ImputationDataPoint[]; +} + +interface DistributionMetric { + variable: string; + method: string; + metricName: string; + value: number; +} + +export default function ImputationResults({ data }: ImputationResultsProps) { + // Filter for distribution distance data + const distributionData = useMemo(() => { + return data.filter(d => d.type === 'distribution_distance'); + }, [data]); + + // Group by metric type + const { wassersteinData, klDivergenceData } = useMemo(() => { + const wasserstein: DistributionMetric[] = []; + const klDiv: DistributionMetric[] = []; + + distributionData.forEach(d => { + const metric: DistributionMetric = { + variable: d.variable, + method: d.method, + metricName: d.metric_name, + value: d.metric_value ?? 0, + }; + + if (d.metric_name === 'wasserstein_distance') { + wasserstein.push(metric); + } else if (d.metric_name === 'kl_divergence') { + klDiv.push(metric); + } + }); + + // Sort by value (ascending - lower is better) + wasserstein.sort((a, b) => a.value - b.value); + klDiv.sort((a, b) => a.value - b.value); + + return { + wassersteinData: wasserstein, + klDivergenceData: klDiv + }; + }, [distributionData]); + + const hasWasserstein = wassersteinData.length > 0; + const hasKLDivergence = klDivergenceData.length > 0; + + if (!hasWasserstein && !hasKLDivergence) { + return null; + } + + // Color function based on value quality (lower is better) + const getWassersteinColor = (value: number): string => { + if (value < 0.01) return '#16a34a'; // Dark green - excellent + if (value < 0.05) return '#22c55e'; // Green - good + if (value < 0.1) return '#eab308'; // Yellow - moderate + if (value < 0.2) return '#f97316'; // Orange - fair + return '#ef4444'; // Red - poor + }; + + const getKLColor = (value: number): string => { + if (value < 0.1) return '#16a34a'; // Dark green - excellent + if (value < 0.5) return '#22c55e'; // Green - good + if (value < 1.0) return '#eab308'; // Yellow - moderate + if (value < 5.0) return '#f97316'; // Orange - fair + return '#ef4444'; // Red - poor + }; + + return ( +
+
+

+ Imputation results +

+

+ Distributional quality metrics comparing imputed values to true values +

+
+ + {/* Wasserstein Distance Section */} + {hasWasserstein && ( +
+

+ Numerical variables (Wasserstein distance) +

+ + {/* Explanation */} +
+

+ What is Wasserstein distance? Also known as "Earth Mover's Distance," + this metric measures how much "work" is needed to transform one probability distribution + into another. Think of it as the minimum cost to rearrange one pile of dirt to match + another pile's shape. +

+

+ Why use it for imputation? Wasserstein distance is ideal for numerical + variables because it considers the actual distances between values, not just whether + they match exactly. A value of 0 means perfect imputation, and larger values indicate + greater differences between imputed and true distributions. +

+

+ Interpretation: Values closer to 0 are better. Generally, values below + 0.05 indicate good imputation quality, while values above 0.2 suggest significant + distributional differences. +

+
+ + {/* Bar chart */} +
+ + + + + + [value.toFixed(6), 'Wasserstein Distance']} + /> + + + {wassersteinData.map((entry, index) => ( + + ))} + + + +
+ + {/* Detailed table */} +
+ + + + + + + + + + {wassersteinData.map((item) => { + let assessment = ''; + let assessmentColor = ''; + + if (item.value < 0.01) { + assessment = 'Excellent'; + assessmentColor = 'text-green-700 font-semibold'; + } else if (item.value < 0.05) { + assessment = 'Good'; + assessmentColor = 'text-green-600'; + } else if (item.value < 0.1) { + assessment = 'Moderate'; + assessmentColor = 'text-yellow-600'; + } else if (item.value < 0.2) { + assessment = 'Fair'; + assessmentColor = 'text-orange-600'; + } else { + assessment = 'Poor'; + assessmentColor = 'text-red-600 font-semibold'; + } + + return ( + + + + + + ); + })} + +
+ Variable + + Wasserstein Distance + + Quality Assessment +
+ {item.variable} + + {item.value.toFixed(6)} + + {assessment} +
+
+
+ )} + + {/* KL Divergence Section */} + {hasKLDivergence && ( +
+

+ Categorical variables (KL-divergence) +

+ + {/* Explanation */} +
+

+ What is KL-divergence? Kullback-Leibler divergence measures how much + one probability distribution differs from another. It quantifies the "information lost" + when using the imputed distribution to approximate the true distribution. +

+

+ Why use it for categorical variables? KL-divergence is particularly + useful for categorical data because it compares probability distributions across + categories. It's sensitive to differences in how probabilities are distributed across + all possible categories. +

+

+ Interpretation: A value of 0 means perfect match. Values below 0.5 + indicate good imputation, while values above 5.0 suggest substantial distributional + differences. Note that KL-divergence is not symmetric and can range from 0 to infinity. +

+
+ + {/* Bar chart */} +
+ + + + + + [value.toFixed(6), 'KL-Divergence']} + /> + + + {klDivergenceData.map((entry, index) => ( + + ))} + + + +
+ + {/* Detailed table */} +
+ + + + + + + + + + {klDivergenceData.map((item) => { + let assessment = ''; + let assessmentColor = ''; + + if (item.value < 0.1) { + assessment = 'Excellent'; + assessmentColor = 'text-green-700 font-semibold'; + } else if (item.value < 0.5) { + assessment = 'Good'; + assessmentColor = 'text-green-600'; + } else if (item.value < 1.0) { + assessment = 'Moderate'; + assessmentColor = 'text-yellow-600'; + } else if (item.value < 5.0) { + assessment = 'Fair'; + assessmentColor = 'text-orange-600'; + } else { + assessment = 'Poor'; + assessmentColor = 'text-red-600 font-semibold'; + } + + return ( + + + + + + ); + })} + +
+ Variable + + KL-Divergence + + Quality Assessment +
+ {item.variable} + + {item.value.toFixed(6)} + + {assessment} +
+
+
+ )} +
+ ); +} diff --git a/microimputation-dashboard/components/PredictorOrderingRobustness.tsx b/microimputation-dashboard/components/PredictorOrderingRobustness.tsx index e784a24..65ceee0 100644 --- a/microimputation-dashboard/components/PredictorOrderingRobustness.tsx +++ b/microimputation-dashboard/components/PredictorOrderingRobustness.tsx @@ -148,7 +148,7 @@ export default function PredictorOrderingRobustness({ data }: PredictorOrderingR How this works: This analysis adds predictors one at a time, choosing the predictor that improves performance the most at each step. This step-by-step approach is efficient but doesn't test - every possible combination of predictors. + every possible combination of predictors. Note that this analysis may differ depending on the model type passed when using the `progressive_predictor_inclusion` function that produced these results.

Reading the chart: The bars show cumulative improvement from @@ -176,6 +176,10 @@ export default function PredictorOrderingRobustness({ data }: PredictorOrderingR {(bestCombination.cumulativeImprovement * 100).toFixed(3)}% + {' '} + + (relative to the first predictor added, which was the best single predictor) +

diff --git a/microimputation-dashboard/components/VisualizationDashboard.tsx b/microimputation-dashboard/components/VisualizationDashboard.tsx index 23ba060..2071999 100644 --- a/microimputation-dashboard/components/VisualizationDashboard.tsx +++ b/microimputation-dashboard/components/VisualizationDashboard.tsx @@ -8,6 +8,7 @@ import PerVariableCharts from './PerVariableCharts'; import VisualizationTabs from './VisualizationTabs'; import PredictorCorrelationMatrix from './PredictorCorrelationMatrix'; import PredictorOrderingRobustness from './PredictorOrderingRobustness'; +import ImputationResults from './ImputationResults'; import { Share } from 'lucide-react'; interface VisualizationDashboardProps { @@ -75,14 +76,215 @@ export default function VisualizationDashboard({ categoricalVars.push(...Array.from(llVars)); } + // Check for actual distribution distance data (wasserstein or kl_divergence) + const distributionData = data.filter(d => d.type === 'distribution_distance'); + const hasWasserstein = distributionData.some(d => d.metric_name === 'wasserstein_distance' && d.metric_value !== null); + const hasKLDivergence = distributionData.some(d => d.metric_name === 'kl_divergence' && d.metric_value !== null); + const hasDistributionDistance = hasWasserstein || hasKLDivergence; + + // Check for predictor correlation data + const correlationData = data.filter(d => d.type === 'predictor_correlation'); + const hasPredictorCorrelation = correlationData.length > 0 && correlationData.some(d => d.metric_value !== null); + + // Check for predictor ordering/importance data + const progressiveData = data.filter(d => d.type === 'progressive_inclusion'); + const importanceData = data.filter(d => d.type === 'predictor_importance'); + const hasPredictorOrdering = (progressiveData.length > 0 && progressiveData.some(d => d.metric_value !== null)) || + (importanceData.length > 0 && importanceData.some(d => d.metric_value !== null)); + + // Find imputed variables (from distribution_distance data) + const imputedVars = new Set(); + distributionData.forEach(d => { + if (d.variable && d.metric_value !== null) { + imputedVars.add(d.variable); + } + }); + + // Calculate best performing model + let bestModel = ''; + let avgLoss = 0; + + if (hasBenchmarkLoss) { + const benchmarkData = data.filter(d => d.type === 'benchmark_loss'); + const methods = Array.from(new Set(benchmarkData.map(d => d.method))); + + // Calculate weighted combined rank for each method (same logic as BenchmarkLossCharts) + const quantileRanks = new Map(); + const logLossRanks = new Map(); + const quantileVarCounts = new Map>(); + const logLossVarCounts = new Map>(); + + methods.forEach(method => { + const quantileData = benchmarkData.filter( + d => d.method === method && d.metric_name === 'quantile_loss' && d.split === 'test' && + d.quantile === 'mean' && !d.variable.includes('_mean_all') && d.metric_value !== null + ); + const logLossData = benchmarkData.filter( + d => d.method === method && d.metric_name === 'log_loss' && d.split === 'test' && + d.quantile === 'mean' && !d.variable.includes('_mean_all') && d.metric_value !== null + ); + + if (quantileData.length > 0) { + const avgQuantile = quantileData.reduce((sum, d) => sum + (d.metric_value ?? 0), 0) / quantileData.length; + quantileRanks.set(method, avgQuantile); + quantileVarCounts.set(method, new Set(quantileData.map(d => d.variable))); + } + + if (logLossData.length > 0) { + const avgLogLoss = logLossData.reduce((sum, d) => sum + (d.metric_value ?? 0), 0) / logLossData.length; + logLossRanks.set(method, avgLogLoss); + logLossVarCounts.set(method, new Set(logLossData.map(d => d.variable))); + } + }); + + // Rank methods by their average losses + const rankedQuantile = Array.from(quantileRanks.entries()).sort((a, b) => a[1] - b[1]); + const rankedLogLoss = Array.from(logLossRanks.entries()).sort((a, b) => a[1] - b[1]); + + const combinedRanks = new Map(); + methods.forEach(method => { + const qRank = rankedQuantile.findIndex(([m]) => m === method) + 1; + const lRank = rankedLogLoss.findIndex(([m]) => m === method) + 1; + const nQuantileVars = quantileVarCounts.get(method)?.size || 0; + const nLogLossVars = logLossVarCounts.get(method)?.size || 0; + const totalVars = nQuantileVars + nLogLossVars; + + if (totalVars > 0) { + let weightedRank = 0; + if (qRank > 0) { + weightedRank += nQuantileVars * qRank; + } + if (lRank > 0) { + weightedRank += nLogLossVars * lRank; + } + combinedRanks.set(method, weightedRank / totalVars); + } + }); + + const sortedMethods = Array.from(combinedRanks.entries()).sort((a, b) => a[1] - b[1]); + if (sortedMethods.length > 0) { + bestModel = sortedMethods[0][0]; + + // Calculate average loss for best model + const bestMethodData = benchmarkData.filter( + d => d.method === bestModel && d.split === 'test' && + d.quantile === 'mean' && !d.variable.includes('_mean_all') && d.metric_value !== null + ); + if (bestMethodData.length > 0) { + avgLoss = bestMethodData.reduce((sum, d) => sum + (d.metric_value ?? 0), 0) / bestMethodData.length; + } + } + } + + // Calculate quality scores by variable for model performance + let modelExcellent = 0; + let modelGood = 0; + let modelPoor = 0; + let modelScore = 0; + let modelQuality = ''; + + if (hasBenchmarkLoss && bestModel) { + const benchmarkData = data.filter(d => d.type === 'benchmark_loss'); + const bestModelVars = benchmarkData.filter( + d => d.method === bestModel && d.split === 'test' && + d.quantile === 'mean' && !d.variable.includes('_mean_all') && d.metric_value !== null + ); + + bestModelVars.forEach(d => { + const loss = d.metric_value ?? 0; + if (loss < 0.02) modelExcellent++; + else if (loss < 0.05) modelGood++; + else modelPoor++; + }); + + const totalModelVars = modelExcellent + modelGood + modelPoor; + if (totalModelVars > 0) { + modelScore = ((modelExcellent * 100) + (modelGood * 75)) / totalModelVars; + if (modelScore >= 90) modelQuality = 'Excellent'; + else if (modelScore >= 70) modelQuality = 'Good'; + else modelQuality = 'Needs improvement'; + } + } + + // Calculate quality scores by variable for distributional accuracy + let distExcellent = 0; + let distGood = 0; + let distPoor = 0; + let distScore = 0; + let distQuality = ''; + + distributionData.forEach(d => { + const value = d.metric_value ?? 0; + // Different thresholds for Wasserstein vs KL-divergence + if (d.metric_name === 'wasserstein_distance') { + if (value < 0.01) distExcellent++; + else if (value < 0.05) distGood++; + else distPoor++; + } else if (d.metric_name === 'kl_divergence') { + if (value < 0.1) distExcellent++; + else if (value < 0.5) distGood++; + else distPoor++; + } + }); + + const totalDistVars = distExcellent + distGood + distPoor; + if (totalDistVars > 0) { + distScore = ((distExcellent * 100) + (distGood * 75)) / totalDistVars; + if (distScore >= 90) distQuality = 'Excellent'; + else if (distScore >= 70) distQuality = 'Good'; + else distQuality = 'Needs improvement'; + } + + // Calculate overall quality (weighted average) + let overallScore = 0; + let overallQuality = ''; + let overallColor = ''; + const hasModelScore = modelScore > 0; + const hasDistScore = distScore > 0; + + if (hasModelScore && hasDistScore) { + overallScore = (modelScore + distScore) / 2; + } else if (hasModelScore) { + overallScore = modelScore; + } else if (hasDistScore) { + overallScore = distScore; + } + + if (overallScore >= 90) { + overallQuality = 'Excellent quality'; + overallColor = 'text-green-700 bg-green-50 border-green-500'; + } else if (overallScore >= 70) { + overallQuality = 'Good quality'; + overallColor = 'text-yellow-700 bg-yellow-50 border-yellow-500'; + } else if (overallScore > 0) { + overallQuality = 'Needs improvement'; + overallColor = 'text-red-700 bg-red-50 border-red-500'; + } + return { hasBenchmarkLoss, - hasDistributionDistance: types.has('distribution_distance'), - hasPredictorCorrelation: types.has('predictor_correlation'), - hasPredictorOrdering: types.has('progressive_inclusion') || types.has('predictor_importance'), + hasDistributionDistance, + hasPredictorCorrelation, + hasPredictorOrdering, numericalVars, categoricalVars, hasPerVariableData: numericalVars.length > 0 || categoricalVars.length > 0, + imputedVars: Array.from(imputedVars).sort(), + bestModel, + avgLoss, + overallScore, + overallQuality, + overallColor, + modelScore, + modelQuality, + modelExcellent, + modelGood, + modelPoor, + distScore, + distQuality, + distExcellent, + distGood, + distPoor, }; }, [data]); @@ -94,6 +296,13 @@ export default function VisualizationDashboard({ tabsList.push({ id: 'overview', label: 'Model benchmarking' }); } + if (dataAnalysis.hasDistributionDistance) { + tabsList.push({ + id: 'imputation', + label: 'Imputation results', + }); + } + if (dataAnalysis.numericalVars.length > 0) { tabsList.push({ id: 'numerical', @@ -201,27 +410,12 @@ export default function VisualizationDashboard({
- {/* Data Info */} + {/* Imputation Summary */}
-

Dataset Overview

-
-
-

Total Records

-

{data.length}

-
- {dataAnalysis.numericalVars.length > 0 && ( -
-

Numerical Variables

-

{dataAnalysis.numericalVars.length}

-
- )} - {dataAnalysis.categoricalVars.length > 0 && ( -
-

Categorical Variables

-

{dataAnalysis.categoricalVars.length}

-
- )} -
+

Imputation summary

+

+ Assessment of the quality of the imputations produced by the best-performing (or the only selected) model +

{/* Tabs Navigation */} @@ -281,6 +475,11 @@ export default function VisualizationDashboard({ {activeTab === 'ordering' && ( )} + + {/* Imputation Results Tab */} + {activeTab === 'imputation' && ( + + )}
); From e93bfa375a452d56eda5944f1a5b1b49f72eec58 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 24 Oct 2025 23:43:18 +0800 Subject: [PATCH 10/12] add summary and fix deployment --- .../components/BenchmarkLossCharts.tsx | 2 +- .../components/ImputationResults.tsx | 6 +- .../components/VisualizationDashboard.tsx | 201 ++++++++++++++---- 3 files changed, 161 insertions(+), 48 deletions(-) diff --git a/microimputation-dashboard/components/BenchmarkLossCharts.tsx b/microimputation-dashboard/components/BenchmarkLossCharts.tsx index 88b579d..8cf9e5c 100644 --- a/microimputation-dashboard/components/BenchmarkLossCharts.tsx +++ b/microimputation-dashboard/components/BenchmarkLossCharts.tsx @@ -238,7 +238,7 @@ export default function BenchmarkLossCharts({ data }: BenchmarkLossChartsProps) quantileTrainTestRatio, logLossTrainTestRatio, }; - }, [methods, quantileLossData, logLossChartData, benchmarkData]); + }, [methods, quantileLossData, logLossData, logLossChartData, benchmarkData]); // Set default selected method to best model useMemo(() => { diff --git a/microimputation-dashboard/components/ImputationResults.tsx b/microimputation-dashboard/components/ImputationResults.tsx index ce3ab42..6eceeca 100644 --- a/microimputation-dashboard/components/ImputationResults.tsx +++ b/microimputation-dashboard/components/ImputationResults.tsx @@ -96,10 +96,10 @@ export default function ImputationResults({ data }: ImputationResultsProps) { {/* Explanation */}

- What is Wasserstein distance? Also known as "Earth Mover's Distance," - this metric measures how much "work" is needed to transform one probability distribution + What is Wasserstein distance? Also known as "Earth Mover's Distance", + this metric measures how much "work" is needed to transform one probability distribution into another. Think of it as the minimum cost to rearrange one pile of dirt to match - another pile's shape. + another pile's shape.

Why use it for imputation? Wasserstein distance is ideal for numerical diff --git a/microimputation-dashboard/components/VisualizationDashboard.tsx b/microimputation-dashboard/components/VisualizationDashboard.tsx index 2071999..f620925 100644 --- a/microimputation-dashboard/components/VisualizationDashboard.tsx +++ b/microimputation-dashboard/components/VisualizationDashboard.tsx @@ -49,6 +49,11 @@ export default function VisualizationDashboard({ const numericalVars: string[] = []; const categoricalVars: string[] = []; + // Get all unique methods from benchmark data + const allMethods = hasBenchmarkLoss + ? Array.from(new Set(data.filter(d => d.type === 'benchmark_loss').map(d => d.method))) + : []; + if (hasBenchmarkLoss) { const benchmarkData = data.filter(d => d.type === 'benchmark_loss'); @@ -100,7 +105,7 @@ export default function VisualizationDashboard({ } }); - // Calculate best performing model + // Calculate best performing model (same logic as BenchmarkLossCharts) let bestModel = ''; let avgLoss = 0; @@ -108,72 +113,117 @@ export default function VisualizationDashboard({ const benchmarkData = data.filter(d => d.type === 'benchmark_loss'); const methods = Array.from(new Set(benchmarkData.map(d => d.method))); - // Calculate weighted combined rank for each method (same logic as BenchmarkLossCharts) - const quantileRanks = new Map(); - const logLossRanks = new Map(); + // Filter quantile and log loss data (matching BenchmarkLossCharts logic) + const quantileLossData = benchmarkData.filter( + d => d.metric_name === 'quantile_loss' && + d.split === 'test' && + typeof d.quantile === 'number' && + d.quantile >= 0 && + d.quantile <= 1 + ); + + const logLossData = benchmarkData.filter( + d => d.metric_name === 'log_loss' && + d.split === 'test' && + d.metric_value !== null + ); + + // Calculate average quantile loss per method + const quantileLossAvg = new Map(); const quantileVarCounts = new Map>(); + + if (quantileLossData.length > 0) { + const methodSums = new Map(); + quantileLossData.forEach(d => { + if (d.metric_value !== null) { + if (!methodSums.has(d.method)) { + methodSums.set(d.method, { sum: 0, count: 0 }); + } + const entry = methodSums.get(d.method)!; + entry.sum += d.metric_value; + entry.count += 1; + + if (!quantileVarCounts.has(d.method)) { + quantileVarCounts.set(d.method, new Set()); + } + quantileVarCounts.get(d.method)!.add(d.variable); + } + }); + methodSums.forEach((value, method) => { + quantileLossAvg.set(method, value.sum / value.count); + }); + } + + // Calculate average log loss per method + const logLossAvg = new Map(); const logLossVarCounts = new Map>(); - methods.forEach(method => { - const quantileData = benchmarkData.filter( - d => d.method === method && d.metric_name === 'quantile_loss' && d.split === 'test' && - d.quantile === 'mean' && !d.variable.includes('_mean_all') && d.metric_value !== null - ); - const logLossData = benchmarkData.filter( - d => d.method === method && d.metric_name === 'log_loss' && d.split === 'test' && - d.quantile === 'mean' && !d.variable.includes('_mean_all') && d.metric_value !== null - ); - - if (quantileData.length > 0) { - const avgQuantile = quantileData.reduce((sum, d) => sum + (d.metric_value ?? 0), 0) / quantileData.length; - quantileRanks.set(method, avgQuantile); - quantileVarCounts.set(method, new Set(quantileData.map(d => d.variable))); - } + if (logLossData.length > 0) { + const methodSums = new Map(); + logLossData.forEach(d => { + if (d.metric_value !== null) { + if (!methodSums.has(d.method)) { + methodSums.set(d.method, { sum: 0, count: 0 }); + } + const entry = methodSums.get(d.method)!; + entry.sum += d.metric_value; + entry.count += 1; + + if (!logLossVarCounts.has(d.method)) { + logLossVarCounts.set(d.method, new Set()); + } + logLossVarCounts.get(d.method)!.add(d.variable); + } + }); + methodSums.forEach((value, method) => { + logLossAvg.set(method, value.sum / value.count); + }); + } - if (logLossData.length > 0) { - const avgLogLoss = logLossData.reduce((sum, d) => sum + (d.metric_value ?? 0), 0) / logLossData.length; - logLossRanks.set(method, avgLogLoss); - logLossVarCounts.set(method, new Set(logLossData.map(d => d.variable))); - } - }); + // Rank methods by each metric (lower is better) + const rankMethods = (avgMap: Map): Map => { + const sorted = Array.from(avgMap.entries()).sort((a, b) => a[1] - b[1]); + const ranks = new Map(); + sorted.forEach(([method], index) => { + ranks.set(method, index + 1); + }); + return ranks; + }; - // Rank methods by their average losses - const rankedQuantile = Array.from(quantileRanks.entries()).sort((a, b) => a[1] - b[1]); - const rankedLogLoss = Array.from(logLossRanks.entries()).sort((a, b) => a[1] - b[1]); + const quantileRanks = rankMethods(quantileLossAvg); + const logLossRanks = rankMethods(logLossAvg); + // Calculate weighted combined rank const combinedRanks = new Map(); methods.forEach(method => { - const qRank = rankedQuantile.findIndex(([m]) => m === method) + 1; - const lRank = rankedLogLoss.findIndex(([m]) => m === method) + 1; + const qRank = quantileRanks.get(method); + const lRank = logLossRanks.get(method); const nQuantileVars = quantileVarCounts.get(method)?.size || 0; const nLogLossVars = logLossVarCounts.get(method)?.size || 0; const totalVars = nQuantileVars + nLogLossVars; if (totalVars > 0) { let weightedRank = 0; - if (qRank > 0) { + if (qRank !== undefined) { weightedRank += nQuantileVars * qRank; } - if (lRank > 0) { + if (lRank !== undefined) { weightedRank += nLogLossVars * lRank; } combinedRanks.set(method, weightedRank / totalVars); + } else { + combinedRanks.set(method, Infinity); } }); - const sortedMethods = Array.from(combinedRanks.entries()).sort((a, b) => a[1] - b[1]); - if (sortedMethods.length > 0) { - bestModel = sortedMethods[0][0]; - - // Calculate average loss for best model - const bestMethodData = benchmarkData.filter( - d => d.method === bestModel && d.split === 'test' && - d.quantile === 'mean' && !d.variable.includes('_mean_all') && d.metric_value !== null - ); - if (bestMethodData.length > 0) { - avgLoss = bestMethodData.reduce((sum, d) => sum + (d.metric_value ?? 0), 0) / bestMethodData.length; + // Find best method (lowest combined rank) + let bestRank = Infinity; + combinedRanks.forEach((rank, method) => { + if (rank < bestRank) { + bestRank = rank; + bestModel = method; } - } + }); } // Calculate quality scores by variable for model performance @@ -285,6 +335,7 @@ export default function VisualizationDashboard({ distExcellent, distGood, distPoor, + allMethods, }; }, [data]); @@ -416,6 +467,68 @@ export default function VisualizationDashboard({

Assessment of the quality of the imputations produced by the best-performing (or the only selected) model

+ +
+ {/* Imputed Variables Section */} +
+

+ Imputed Variables +

+ {dataAnalysis.imputedVars.length > 0 ? ( +
+

+ {dataAnalysis.imputedVars.length} variable{dataAnalysis.imputedVars.length !== 1 ? 's' : ''} imputed +

+
    + {dataAnalysis.imputedVars.map((variable) => ( +
  • + {variable} +
  • + ))} +
+
+ ) : ( +

+ No imputed variable information available in the CSV +

+ )} +
+ + {/* Best Model Section */} +
+

+ {dataAnalysis.allMethods.length === 1 ? 'Imputation Model' : 'Best Performing Model'} +

+ {dataAnalysis.bestModel ? ( +
+
+ + {dataAnalysis.bestModel} + + {dataAnalysis.allMethods.length === 1 && ( + + Only model + + )} + {dataAnalysis.allMethods.length > 1 && ( + + Best of {dataAnalysis.allMethods.length} + + )} +
+ {dataAnalysis.allMethods.length > 1 && ( +

+ Selected based on combined performance across all cross-validation loss metrics +

+ )} +
+ ) : ( +

+ No model information available in the CSV +

+ )} +
+
{/* Tabs Navigation */} From 4e6ab42fb885e23cd7dded6db1081506fa9eeb31 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 24 Oct 2025 23:46:41 +0800 Subject: [PATCH 11/12] replace quotes to fix deployment --- microimputation-dashboard/components/ImputationResults.tsx | 4 ++-- .../components/PredictorCorrelationMatrix.tsx | 2 +- .../components/PredictorOrderingRobustness.tsx | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/microimputation-dashboard/components/ImputationResults.tsx b/microimputation-dashboard/components/ImputationResults.tsx index 6eceeca..598b507 100644 --- a/microimputation-dashboard/components/ImputationResults.tsx +++ b/microimputation-dashboard/components/ImputationResults.tsx @@ -210,13 +210,13 @@ export default function ImputationResults({ data }: ImputationResultsProps) {

What is KL-divergence? Kullback-Leibler divergence measures how much - one probability distribution differs from another. It quantifies the "information lost" + one probability distribution differs from another. It quantifies the "information lost" when using the imputed distribution to approximate the true distribution.

Why use it for categorical variables? KL-divergence is particularly useful for categorical data because it compares probability distributions across - categories. It's sensitive to differences in how probabilities are distributed across + categories. It's sensitive to differences in how probabilities are distributed across all possible categories.

diff --git a/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx b/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx index 331a5cc..6a47799 100644 --- a/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx +++ b/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx @@ -428,7 +428,7 @@ export default function PredictorCorrelationMatrix({ data }: PredictorCorrelatio What is mutual information? Mutual information measures how much information one variable provides about another. Unlike correlation, it captures both linear and non-linear relationships between variables. Values range from 0 (independent variables) to higher positive values (strong dependency).

- Why measure it for imputed variables? Mutual information between predictors and imputed variables reveals which predictors are most informative for imputation. High mutual information indicates that a predictor strongly influences the imputed variable's distribution, making it crucial for accurate imputation. This helps validate that your imputation models are using the most relevant predictors and can identify when key predictive relationships exist in your data. + Why measure it for imputed variables? Mutual information between predictors and imputed variables reveals which predictors are most informative for imputation. High mutual information indicates that a predictor strongly influences the imputed variable's distribution, making it crucial for accurate imputation. This helps validate that your imputation models are using the most relevant predictors and can identify when key predictive relationships exist in your data.

{/* Color scale within explanation box */} diff --git a/microimputation-dashboard/components/PredictorOrderingRobustness.tsx b/microimputation-dashboard/components/PredictorOrderingRobustness.tsx index 65ceee0..fea48da 100644 --- a/microimputation-dashboard/components/PredictorOrderingRobustness.tsx +++ b/microimputation-dashboard/components/PredictorOrderingRobustness.tsx @@ -147,7 +147,7 @@ export default function PredictorOrderingRobustness({ data }: PredictorOrderingR

How this works: This analysis adds predictors one at a time, choosing the predictor that improves performance the most at each step. This - step-by-step approach is efficient but doesn't test + step-by-step approach is efficient but doesn't test every possible combination of predictors. Note that this analysis may differ depending on the model type passed when using the `progressive_predictor_inclusion` function that produced these results.

@@ -282,7 +282,7 @@ export default function PredictorOrderingRobustness({ data }: PredictorOrderingR

What this shows: This analysis measures how much performance degrades when each predictor is removed. Predictors that cause large performance - drops when removed are critical to the model's accuracy. + drops when removed are critical to the model's accuracy.

Reading the chart: Positive values (bars pointing right) indicate From 70fed2baf6645c102f721c4110d77db005e5a798 Mon Sep 17 00:00:00 2001 From: juaristi22 Date: Fri, 24 Oct 2025 23:49:06 +0800 Subject: [PATCH 12/12] remove unused variable --- microimputation-dashboard/components/VisualizationDashboard.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/microimputation-dashboard/components/VisualizationDashboard.tsx b/microimputation-dashboard/components/VisualizationDashboard.tsx index f620925..67a1d88 100644 --- a/microimputation-dashboard/components/VisualizationDashboard.tsx +++ b/microimputation-dashboard/components/VisualizationDashboard.tsx @@ -107,7 +107,6 @@ export default function VisualizationDashboard({ // Calculate best performing model (same logic as BenchmarkLossCharts) let bestModel = ''; - let avgLoss = 0; if (hasBenchmarkLoss) { const benchmarkData = data.filter(d => d.type === 'benchmark_loss'); @@ -321,7 +320,6 @@ export default function VisualizationDashboard({ hasPerVariableData: numericalVars.length > 0 || categoricalVars.length > 0, imputedVars: Array.from(imputedVars).sort(), bestModel, - avgLoss, overallScore, overallQuality, overallColor,