diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 6f5b968..6a82c25 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -51,7 +51,7 @@ jobs: run: | python examples/pipeline.py - name: Upload microimputation results - if: always() + if: always() && matrix.python-version == '3.13' uses: actions/upload-artifact@v4 with: name: microimputation-results-${{ github.sha }} diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index a2118b2..c8e2ce8 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -70,7 +70,7 @@ jobs: run: | python examples/pipeline.py - name: Upload microimputation results - if: always() + if: always() && matrix.python-version == '3.13' uses: actions/upload-artifact@v4 with: name: microimputation-results-${{ github.sha }} diff --git a/.gitignore b/.gitignore index 555168e..a790fe1 100644 --- a/.gitignore +++ b/.gitignore @@ -76,6 +76,7 @@ celerybeat.pid # Ignore Data Files *.csv +!microimputation-dashboard/**/*.csv *.jpg *.html *.h5 diff --git a/README.md b/README.md index 63cf263..e9b83bb 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,47 @@ # Microimpute -Microimpute enables variable imputation through different statistical methods. It facilitates comparison and benchmarking across methods through quantile loss calculations. +Microimpute enables variable imputation through a variety of statistical methods. By providing a consistent interface across different imputation techniques, it allows researchers and data scientists to easily compare and benchmark different approaches using quantile loss and log loss calculations to determine the method providing most accurate results. -To install, run pip install microimpute. +## Features -For image export functionality (PNG/JPG), install with: pip install microimpute[images] +### Multiple imputation methods +- **Statistical Matching**: Distance-based matching for finding similar observations +- **Ordinary Least Squares (OLS)**: Linear regression-based imputation +- **Quantile Regression**: Distribution-aware regression imputation +- **Quantile Random Forests (QRF)**: Non-parametric forest-based approach + +### Automated method selection +- **AutoImpute**: Automatically compares and selects the best imputation method for your data +- **Cross-validation**: Built-in evaluation using quantile loss (numerical) and log loss (categorical) +- **Variable type support**: Handles numerical, categorical, and boolean variables + +### Developer-friendly design +- **Consistent API**: Standardized `fit()` and `predict()` interface across all models +- **Extensible architecture**: Easy to implement custom imputation methods +- **Weighted data handling**: Preserve data distributions with sample weights +- **Input validation**: Automatic parameter and data validation + +### Interactive dashboard +- **Visual exploration**: Analyze imputation results through interactive charts at https://microimpute-dashboard.vercel.app/ +- **GitHub integration**: Load artifacts directly from CI/CD workflows +- **Multiple data sources**: File upload, URL loading and sample data + +## Installation + +```bash +pip install microimpute +``` + +For image export functionality (PNG/JPG), install with: + +```bash +pip install microimpute[images] +``` + +## Examples and documentation + +For detailed examples and interactive notebooks, see the [documentation](https://policyengine.github.io/microimpute/). + +## Contributing + +Contributions are welcome to the project. Please feel free to submit a Pull Request with your improvements. diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29..39bbe07 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,5 @@ +- bump: minor + changes: + added: + - Links to dashboard in README.md and documentation. + - First dashboard visualizations. diff --git a/docs/index.md b/docs/index.md index dfa8a83..391c20c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,3 +9,19 @@ The framework currently supports the following imputation methods: - Quantile Regression This is a work in progress that may evolve over time, including new statistical imputation methods and features. + +## Microimputation dashboard + +Users can visualize imputation and benchmarking results at https://microimpute-dashboard.vercel.app/. + +To use the dashboard for visualization, CSV files must contain the following columns in this exact order: +- `type`: Type of metric (e.g., "benchmark_loss", "distribution_distance", "predictor_correlation") +- `method`: Imputation method name (e.g., "QRF", "OLS", "QuantReg", "Matching") +- `variable`: Variable being imputed or analyzed +- `quantile`: Quantile level (numeric value, "mean", or "N/A") +- `metric_name`: Name of the metric (e.g., "quantile_loss", "log_loss") +- `metric_value`: Numeric value of the metric +- `split`: Data split indicator (e.g., "train", "test", "full") +- `additional_info`: JSON-formatted string with additional metadata + +Users can use the `format_csv()` function from `microimpute.utils` to automatically format imputation and benchmarking results into the correct structure for dashboard visualization. This function accepts outputs from various analysis functions (autoimpute results, comparison metrics, distribution comparisons, etc.) and returns a properly formatted DataFrame. \ No newline at end of file diff --git a/examples/pipeline.py b/examples/pipeline.py index c9dc90e..da848d8 100644 --- a/examples/pipeline.py +++ b/examples/pipeline.py @@ -75,9 +75,22 @@ def run_full_pipeline(output_path="microimpute_results.csv"): donor_data = diabetes_data.iloc[donor_indices].reset_index(drop=True) receiver_data = diabetes_data.iloc[receiver_indices].reset_index(drop=True) + # Create a categorical risk_factor variable based on cholesterol levels (s4) + # Categorize into low, medium, high based on s4 values + def categorize_risk(s4_value): + if s4_value < -0.02: + return "low" + elif s4_value < 0.02: + return "medium" + else: + return "high" + + donor_data["risk_factor"] = donor_data["s4"].apply(categorize_risk) + receiver_data["risk_factor"] = receiver_data["s4"].apply(categorize_risk) + # Define predictors and variables to impute predictors = ["age", "sex", "bmi", "bp"] - imputed_variables = ["s1", "s4"] + imputed_variables = ["s1", "s4", "risk_factor"] # Remove imputed variables from receiver data receiver_data_without_targets = receiver_data.drop( @@ -88,6 +101,8 @@ def run_full_pipeline(output_path="microimpute_results.csv"): print(f"Receiver data shape: {receiver_data_without_targets.shape}") print(f"Predictors: {predictors}") print(f"Variables to impute: {imputed_variables}") + print(f"Risk factor distribution in donor data:") + print(donor_data["risk_factor"].value_counts()) print() # ======================================================================== @@ -245,6 +260,8 @@ def run_full_pipeline(output_path="microimpute_results.csv"): print(f" - Best imputation method: {best_method_name}") print(f" - Number of predictors analyzed: {len(predictors)}") print(f" - Number of imputed variables: {len(imputed_variables)}") + print(f" - Numerical variables: s1, s4") + print(f" - Categorical variables: risk_factor") print() print("Output CSV contains:") for result_type in formatted_df["type"].unique(): diff --git a/microimputation-dashboard/app/api/github/artifacts/route.ts b/microimputation-dashboard/app/api/github/artifacts/route.ts new file mode 100644 index 0000000..1ab5163 --- /dev/null +++ b/microimputation-dashboard/app/api/github/artifacts/route.ts @@ -0,0 +1,117 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function GET(request: NextRequest) { + const searchParams = request.nextUrl.searchParams; + const repo = searchParams.get('repo'); + const commitSha = searchParams.get('commit'); + + if (!repo || !commitSha) { + return NextResponse.json( + { error: 'Missing repo or commit parameter' }, + { status: 400 } + ); + } + + const githubToken = process.env.GITHUB_TOKEN; + if (!githubToken) { + return NextResponse.json( + { error: 'GitHub token not configured on server' }, + { status: 500 } + ); + } + + try { + const [owner, repoName] = repo.split('/'); + + // Get workflow runs for the commit + const runsResponse = await fetch( + `https://api.github.com/repos/${owner}/${repoName}/actions/runs?head_sha=${commitSha}`, + { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + } + ); + + if (!runsResponse.ok) { + return NextResponse.json( + { error: `GitHub API error: ${runsResponse.status}` }, + { status: runsResponse.status } + ); + } + + const runsData = await runsResponse.json(); + const runs = runsData.workflow_runs; + + if (!runs || runs.length === 0) { + return NextResponse.json([]); + } + + // Collect all imputation artifacts from completed runs + const allArtifacts = []; + + for (const run of runs) { + if (run.status !== 'completed') continue; + + try { + const artifactsResponse = await fetch( + `https://api.github.com/repos/${owner}/${repoName}/actions/runs/${run.id}/artifacts`, + { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + } + ); + + if (!artifactsResponse.ok) continue; + + const artifactsData = await artifactsResponse.json(); + const artifacts = artifactsData.artifacts; + + // Filter for imputation artifacts + const imputationArtifacts = artifacts.filter( + (artifact: { name: string }) => + artifact.name.toLowerCase().includes('impute') || + artifact.name + .toLowerCase() + .includes('imputation') || + artifact.name.toLowerCase().includes('result') || + artifact.name.toLowerCase().includes('.csv') + ); + + allArtifacts.push(...imputationArtifacts); + } catch { + continue; + } + } + + // Remove duplicates and sort by creation date (newest first) + const uniqueArtifacts = allArtifacts + .filter( + (artifact: { name: string }, index: number, self: Array<{ name: string }>) => + index === + self.findIndex((a: { name: string }) => a.name === artifact.name) + ) + .sort( + (a: { created_at: string }, b: { created_at: string }) => + new Date(b.created_at).getTime() - + new Date(a.created_at).getTime() + ); + + return NextResponse.json(uniqueArtifacts); + } catch (error) { + return NextResponse.json( + { + error: + error instanceof Error + ? error.message + : 'Unknown error', + }, + { status: 500 } + ); + } +} diff --git a/microimputation-dashboard/app/api/github/branches/route.ts b/microimputation-dashboard/app/api/github/branches/route.ts new file mode 100644 index 0000000..02a6bbd --- /dev/null +++ b/microimputation-dashboard/app/api/github/branches/route.ts @@ -0,0 +1,77 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function GET(request: NextRequest) { + const searchParams = request.nextUrl.searchParams; + const repo = searchParams.get('repo'); + + if (!repo) { + return NextResponse.json( + { error: 'Missing repo parameter' }, + { status: 400 } + ); + } + + const githubToken = process.env.GITHUB_TOKEN; + if (!githubToken) { + return NextResponse.json( + { error: 'GitHub token not configured on server' }, + { status: 500 } + ); + } + + try { + const allBranches = []; + let page = 1; + const perPage = 100; + + while (true) { + const response = await fetch( + `https://api.github.com/repos/${repo}/branches?per_page=${perPage}&page=${page}`, + { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + } + ); + + if (!response.ok) { + return NextResponse.json( + { error: `GitHub API error: ${response.status}` }, + { status: response.status } + ); + } + + const branches = await response.json(); + + if (branches.length === 0) { + break; + } + + allBranches.push(...branches); + + if (branches.length < perPage) { + break; + } + + page++; + + if (page > 10) { + break; + } + } + + return NextResponse.json(allBranches); + } catch (error) { + return NextResponse.json( + { + error: + error instanceof Error + ? error.message + : 'Unknown error', + }, + { status: 500 } + ); + } +} diff --git a/microimputation-dashboard/app/api/github/commits/route.ts b/microimputation-dashboard/app/api/github/commits/route.ts new file mode 100644 index 0000000..9847aee --- /dev/null +++ b/microimputation-dashboard/app/api/github/commits/route.ts @@ -0,0 +1,55 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function GET(request: NextRequest) { + const searchParams = request.nextUrl.searchParams; + const repo = searchParams.get('repo'); + const branch = searchParams.get('branch'); + + if (!repo || !branch) { + return NextResponse.json( + { error: 'Missing repo or branch parameter' }, + { status: 400 } + ); + } + + const githubToken = process.env.GITHUB_TOKEN; + if (!githubToken) { + return NextResponse.json( + { error: 'GitHub token not configured on server' }, + { status: 500 } + ); + } + + try { + const response = await fetch( + `https://api.github.com/repos/${repo}/commits?sha=${branch}&per_page=20`, + { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + } + ); + + if (!response.ok) { + return NextResponse.json( + { error: `GitHub API error: ${response.status}` }, + { status: response.status } + ); + } + + const commits = await response.json(); + return NextResponse.json(commits); + } catch (error) { + return NextResponse.json( + { + error: + error instanceof Error + ? error.message + : 'Unknown error', + }, + { status: 500 } + ); + } +} diff --git a/microimputation-dashboard/app/api/github/download/route.ts b/microimputation-dashboard/app/api/github/download/route.ts new file mode 100644 index 0000000..4938bab --- /dev/null +++ b/microimputation-dashboard/app/api/github/download/route.ts @@ -0,0 +1,59 @@ +import { NextRequest, NextResponse } from 'next/server'; + +export async function GET(request: NextRequest) { + const searchParams = request.nextUrl.searchParams; + const url = searchParams.get('url'); + + if (!url) { + return NextResponse.json( + { error: 'Missing url parameter' }, + { status: 400 } + ); + } + + const githubToken = process.env.GITHUB_TOKEN; + if (!githubToken) { + return NextResponse.json( + { error: 'GitHub token not configured on server' }, + { status: 500 } + ); + } + + try { + const downloadResponse = await fetch(url, { + headers: { + Authorization: `Bearer ${githubToken}`, + Accept: 'application/vnd.github.v3+json', + 'User-Agent': 'PolicyEngine-Dashboard/1.0', + }, + }); + + if (!downloadResponse.ok) { + return NextResponse.json( + { error: `GitHub API error: ${downloadResponse.status}` }, + { status: downloadResponse.status } + ); + } + + // Get the artifact ZIP as an ArrayBuffer + const zipBuffer = await downloadResponse.arrayBuffer(); + + // Return the ZIP file as a response + return new NextResponse(zipBuffer, { + headers: { + 'Content-Type': 'application/zip', + 'Content-Length': zipBuffer.byteLength.toString(), + }, + }); + } catch (error) { + return NextResponse.json( + { + error: + error instanceof Error + ? error.message + : 'Unknown error', + }, + { status: 500 } + ); + } +} diff --git a/microimputation-dashboard/app/globals.css b/microimputation-dashboard/app/globals.css index a2dc41e..86b4953 100644 --- a/microimputation-dashboard/app/globals.css +++ b/microimputation-dashboard/app/globals.css @@ -8,8 +8,8 @@ @theme inline { --color-background: var(--background); --color-foreground: var(--foreground); - --font-sans: var(--font-geist-sans); - --font-mono: var(--font-geist-mono); + --font-sans: var(--font-roboto-serif); + --font-mono: var(--font-roboto-mono); } @media (prefers-color-scheme: dark) { @@ -22,5 +22,5 @@ body { background: var(--background); color: var(--foreground); - font-family: Arial, Helvetica, sans-serif; + font-family: var(--font-roboto-serif), ui-serif, Georgia, Cambria, "Times New Roman", Times, serif; } diff --git a/microimputation-dashboard/app/layout.tsx b/microimputation-dashboard/app/layout.tsx index f7fa87e..a1be77b 100644 --- a/microimputation-dashboard/app/layout.tsx +++ b/microimputation-dashboard/app/layout.tsx @@ -1,20 +1,22 @@ import type { Metadata } from "next"; -import { Geist, Geist_Mono } from "next/font/google"; +import { Roboto_Serif, Roboto_Mono } from "next/font/google"; import "./globals.css"; -const geistSans = Geist({ - variable: "--font-geist-sans", +const robotoSerif = Roboto_Serif({ + variable: "--font-roboto-serif", subsets: ["latin"], + weight: ["300", "400", "500", "600", "700"], }); -const geistMono = Geist_Mono({ - variable: "--font-geist-mono", +const robotoMono = Roboto_Mono({ + variable: "--font-roboto-mono", subsets: ["latin"], + weight: ["300", "400", "500", "600", "700"], }); export const metadata: Metadata = { - title: "Create Next App", - description: "Generated by create next app", + title: "Microimpute Dashboard", + description: "Microimputation quality and model benchmarking assessment", }; export default function RootLayout({ @@ -25,7 +27,7 @@ export default function RootLayout({ return ( {children} diff --git a/microimputation-dashboard/app/page.tsx b/microimputation-dashboard/app/page.tsx index 2b31bbf..a8bc1b6 100644 --- a/microimputation-dashboard/app/page.tsx +++ b/microimputation-dashboard/app/page.tsx @@ -13,18 +13,7 @@ function HomeContent() { const [fileName, setFileName] = useState(''); const [showDashboard, setShowDashboard] = useState(false); const [isLoadingFromDeeplink, setIsLoadingFromDeeplink] = useState(false); - const [githubArtifactInfo, setGithubArtifactInfo] = useState<{ - primary: GitHubArtifactInfo | null; - secondary?: GitHubArtifactInfo | null; - } | null>(null); - - // Comparison mode state - const [comparisonData, setComparisonData] = useState<{ - data1: ImputationDataPoint[]; - filename1: string; - data2: ImputationDataPoint[]; - filename2: string; - } | null>(null); + const [githubArtifactInfo, setGithubArtifactInfo] = useState(null); const searchParams = useSearchParams(); const deeplinkParams = parseDeeplinkParams(searchParams); @@ -40,32 +29,14 @@ function HomeContent() { const parsedData = parseImputationCSV(csvContent); setData(parsedData); setFileName(filename); - setComparisonData(null); // Clear comparison data when loading single file } catch (error) { console.error('Error parsing CSV:', error); alert('Failed to parse CSV file. Please check the file format.'); } }; - const handleCompareLoad = (content1: string, filename1: string, content2: string, filename2: string) => { - try { - const data1 = parseImputationCSV(content1); - const data2 = parseImputationCSV(content2); - setComparisonData({ - data1, - filename1, - data2, - filename2 - }); - setData([]); // Clear single data when loading comparison - } catch (error) { - console.error('Error parsing comparison CSVs:', error); - alert('Failed to parse one or both CSV files. Please check the file formats.'); - } - }; - const handleViewDashboard = () => { - if (data.length > 0 || comparisonData) { + if (data.length > 0) { setShowDashboard(true); } }; @@ -74,57 +45,32 @@ function HomeContent() { setShowDashboard(false); setData([]); setFileName(''); - setComparisonData(null); setGithubArtifactInfo(null); }; - const handleDeeplinkLoadComplete = (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null) => { + const handleDeeplinkLoadComplete = (primary: GitHubArtifactInfo | null) => { setIsLoadingFromDeeplink(false); if (primary) { - setGithubArtifactInfo({ primary, secondary: secondary || undefined }); + setGithubArtifactInfo(primary); setShowDashboard(true); } }; - const handleGithubLoad = (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null) => { + const handleGithubLoad = (primary: GitHubArtifactInfo | null) => { if (primary) { - setGithubArtifactInfo({ primary, secondary: secondary || undefined }); + setGithubArtifactInfo(primary); } }; + return (
- {/* Header */} -
-
-
-
-

- Microimpute Dashboard -

- - Beta - -
- {showDashboard && ( - - )} -
-
-
- {/* Main content */}
{!showDashboard ? ( ) : ( )}
diff --git a/microimputation-dashboard/components/BenchmarkLossCharts.tsx b/microimputation-dashboard/components/BenchmarkLossCharts.tsx new file mode 100644 index 0000000..8cf9e5c --- /dev/null +++ b/microimputation-dashboard/components/BenchmarkLossCharts.tsx @@ -0,0 +1,611 @@ +'use client'; + +import { useMemo, useState } from 'react'; +import { + BarChart, + Bar, + Cell, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + Legend, + ResponsiveContainer, +} from 'recharts'; +import { ImputationDataPoint } from '@/types/imputation'; +import { getMethodColor } from '@/utils/colors'; + +interface BenchmarkLossChartsProps { + data: ImputationDataPoint[]; +} + +export default function BenchmarkLossCharts({ data }: BenchmarkLossChartsProps) { + // Filter for benchmark_loss data + const benchmarkData = useMemo(() => { + return data.filter(d => d.type === 'benchmark_loss'); + }, [data]); + + // State for selected method in train/test comparison + const [selectedMethod, setSelectedMethod] = useState(''); + + // Check if we have benchmark loss data + const hasBenchmarkData = benchmarkData.length > 0; + + // Separate quantile loss and log loss data + const { quantileLossData, logLossData, methods } = useMemo(() => { + const quantile = benchmarkData.filter( + d => d.metric_name === 'quantile_loss' && + d.split === 'test' && + typeof d.quantile === 'number' && + d.quantile >= 0 && + d.quantile <= 1 + ); + + const logLoss = benchmarkData.filter( + d => d.metric_name === 'log_loss' && + d.split === 'test' && + d.metric_value !== null + ); + + // Get unique methods + const uniqueMethods = Array.from(new Set(benchmarkData.map(d => d.method))); + + return { + quantileLossData: quantile, + logLossData: logLoss, + methods: uniqueMethods, + }; + }, [benchmarkData]); + + // Transform quantile loss data for grouped bar chart + const quantileChartData = useMemo(() => { + if (quantileLossData.length === 0) return []; + + // Group by quantile + const quantileMap = new Map>(); + + quantileLossData.forEach(d => { + const quantile = Number(d.quantile); + if (!quantileMap.has(quantile)) { + quantileMap.set(quantile, { quantile: quantile.toFixed(2) }); + } + const entry = quantileMap.get(quantile)!; + entry[d.method] = d.metric_value; + }); + + return Array.from(quantileMap.values()).sort( + (a, b) => parseFloat(a.quantile as string) - parseFloat(b.quantile as string) + ); + }, [quantileLossData]); + + // Transform log loss data for bar chart + const logLossChartData = useMemo(() => { + if (logLossData.length === 0) return []; + + // Average log loss per method + const methodMap = new Map(); + + logLossData.forEach(d => { + if (d.metric_value !== null) { + if (!methodMap.has(d.method)) { + methodMap.set(d.method, { sum: 0, count: 0 }); + } + const entry = methodMap.get(d.method)!; + entry.sum += d.metric_value; + entry.count += 1; + } + }); + + return Array.from(methodMap.entries()).map(([method, { sum, count }]) => ({ + method, + value: sum / count, + })); + }, [logLossData]); + + // Determine best performing model + const bestModel = useMemo(() => { + if (methods.length === 0) return null; + + // Calculate average quantile loss per method (test only) + const quantileLossAvg = new Map(); + // Count unique variables per method for quantile loss + const quantileVarCounts = new Map>(); + + if (quantileLossData.length > 0) { + const methodSums = new Map(); + quantileLossData.forEach(d => { + if (d.metric_value !== null) { + if (!methodSums.has(d.method)) { + methodSums.set(d.method, { sum: 0, count: 0 }); + } + const entry = methodSums.get(d.method)!; + entry.sum += d.metric_value; + entry.count += 1; + + // Track unique variables + if (!quantileVarCounts.has(d.method)) { + quantileVarCounts.set(d.method, new Set()); + } + quantileVarCounts.get(d.method)!.add(d.variable); + } + }); + methodSums.forEach((value, method) => { + quantileLossAvg.set(method, value.sum / value.count); + }); + } + + // Calculate average log loss per method (test only, already have this in logLossChartData) + const logLossAvg = new Map(); + // Count unique variables per method for log loss + const logLossVarCounts = new Map>(); + + logLossData.forEach(d => { + if (d.metric_value !== null) { + if (!logLossVarCounts.has(d.method)) { + logLossVarCounts.set(d.method, new Set()); + } + logLossVarCounts.get(d.method)!.add(d.variable); + } + }); + + logLossChartData.forEach(({ method, value }) => { + logLossAvg.set(method, value); + }); + + // Rank methods by each metric (lower is better, so rank 1 is best) + const rankMethods = (avgMap: Map): Map => { + const sorted = Array.from(avgMap.entries()).sort((a, b) => a[1] - b[1]); + const ranks = new Map(); + sorted.forEach(([method], index) => { + ranks.set(method, index + 1); + }); + return ranks; + }; + + const quantileRanks = rankMethods(quantileLossAvg); + const logLossRanks = rankMethods(logLossAvg); + + // Calculate weighted combined rank (weighted by number of variables of each type) + // This matches autoimpute's select_best_model_dual_metrics approach + const combinedRanks = new Map(); + methods.forEach(method => { + const qRank = quantileRanks.get(method); + const lRank = logLossRanks.get(method); + const nQuantileVars = quantileVarCounts.get(method)?.size || 0; + const nLogLossVars = logLossVarCounts.get(method)?.size || 0; + const totalVars = nQuantileVars + nLogLossVars; + + if (totalVars > 0) { + let weightedRank = 0; + if (qRank !== undefined) { + weightedRank += nQuantileVars * qRank; + } + if (lRank !== undefined) { + weightedRank += nLogLossVars * lRank; + } + combinedRanks.set(method, weightedRank / totalVars); + } else { + combinedRanks.set(method, Infinity); + } + }); + + // Find best method (lowest combined rank) + let bestMethod = ''; + let bestRank = Infinity; + combinedRanks.forEach((rank, method) => { + if (rank < bestRank) { + bestRank = rank; + bestMethod = method; + } + }); + + // Calculate train/test ratios for the best method + let quantileTrainTestRatio: number | undefined; + let logLossTrainTestRatio: number | undefined; + + // Quantile loss train/test ratio + const bestQuantileTrain = benchmarkData.filter( + d => d.method === bestMethod && d.metric_name === 'quantile_loss' && d.split === 'train' && d.metric_value !== null + ); + const bestQuantileTest = benchmarkData.filter( + d => d.method === bestMethod && d.metric_name === 'quantile_loss' && d.split === 'test' && d.metric_value !== null + ); + + if (bestQuantileTrain.length > 0 && bestQuantileTest.length > 0) { + const trainAvg = bestQuantileTrain.reduce((sum, d) => sum + d.metric_value!, 0) / bestQuantileTrain.length; + const testAvg = bestQuantileTest.reduce((sum, d) => sum + d.metric_value!, 0) / bestQuantileTest.length; + quantileTrainTestRatio = testAvg / trainAvg; + } + + // Log loss train/test ratio + const bestLogLossTrain = benchmarkData.filter( + d => d.method === bestMethod && d.metric_name === 'log_loss' && d.split === 'train' && d.metric_value !== null + ); + const bestLogLossTest = benchmarkData.filter( + d => d.method === bestMethod && d.metric_name === 'log_loss' && d.split === 'test' && d.metric_value !== null + ); + + if (bestLogLossTrain.length > 0 && bestLogLossTest.length > 0) { + const trainAvg = bestLogLossTrain.reduce((sum, d) => sum + d.metric_value!, 0) / bestLogLossTrain.length; + const testAvg = bestLogLossTest.reduce((sum, d) => sum + d.metric_value!, 0) / bestLogLossTest.length; + logLossTrainTestRatio = testAvg / trainAvg; + } + + return { + method: bestMethod, + quantileLoss: quantileLossAvg.get(bestMethod), + logLoss: logLossAvg.get(bestMethod), + quantileTrainTestRatio, + logLossTrainTestRatio, + }; + }, [methods, quantileLossData, logLossData, logLossChartData, benchmarkData]); + + // Set default selected method to best model + useMemo(() => { + if (bestModel && bestModel.method && !selectedMethod) { + setSelectedMethod(bestModel.method); + } + }, [bestModel, selectedMethod]); + + // Prepare train/test comparison data for selected method + const trainTestData = useMemo(() => { + if (!selectedMethod) return { quantile: [], logLoss: [] }; + + // Quantile loss train vs test + const quantileTrainTest: Array<{ quantile: string; train: number | null; test: number | null }> = []; + const quantileData = benchmarkData.filter( + d => d.method === selectedMethod && d.metric_name === 'quantile_loss' + ); + + if (quantileData.length > 0) { + const quantileMap = new Map(); + + quantileData.forEach(d => { + const q = typeof d.quantile === 'number' ? d.quantile.toFixed(2) : String(d.quantile || ''); + // Skip 'mean' quantiles + if (q.toLowerCase().includes('mean')) return; + + if (!quantileMap.has(q)) { + quantileMap.set(q, { train: null, test: null }); + } + const entry = quantileMap.get(q)!; + if (d.split === 'train') entry.train = d.metric_value; + if (d.split === 'test') entry.test = d.metric_value; + }); + + quantileMap.forEach((value, quantile) => { + quantileTrainTest.push({ quantile, ...value }); + }); + + quantileTrainTest.sort((a, b) => parseFloat(a.quantile) - parseFloat(b.quantile)); + } + + // Log loss train vs test (average across variables) + const logLossTrainTest: Array<{ category: string; train: number; test: number }> = []; + const logData = benchmarkData.filter( + d => d.method === selectedMethod && d.metric_name === 'log_loss' && d.metric_value !== null + ); + + if (logData.length > 0) { + const trainVals: number[] = []; + const testVals: number[] = []; + + logData.forEach(d => { + if (d.split === 'train') trainVals.push(d.metric_value!); + if (d.split === 'test') testVals.push(d.metric_value!); + }); + + if (trainVals.length > 0 || testVals.length > 0) { + const trainAvg = trainVals.length > 0 ? trainVals.reduce((a, b) => a + b, 0) / trainVals.length : 0; + const testAvg = testVals.length > 0 ? testVals.reduce((a, b) => a + b, 0) / testVals.length : 0; + + logLossTrainTest.push({ + category: 'Average', + train: trainAvg, + test: testAvg, + }); + } + } + + return { + quantile: quantileTrainTest, + logLoss: logLossTrainTest, + }; + }, [selectedMethod, benchmarkData]); + + const hasQuantileTrainTest = trainTestData.quantile.length > 0; + const hasLogLossTrainTest = trainTestData.logLoss.length > 0; + + // Filter methods that have train/test data + const methodsWithData = useMemo(() => { + const validMethods = new Set(); + + methods.forEach(method => { + const methodQuantileData = benchmarkData.filter( + d => d.method === method && d.metric_name === 'quantile_loss' && d.metric_value !== null + ); + const methodLogLossData = benchmarkData.filter( + d => d.method === method && d.metric_name === 'log_loss' && d.metric_value !== null + ); + + if (methodQuantileData.length > 0 || methodLogLossData.length > 0) { + validMethods.add(method); + } + }); + + return Array.from(validMethods); + }, [methods, benchmarkData]); + + const methodsWithoutData = methods.filter(m => !methodsWithData.includes(m)); + + if (!hasBenchmarkData) { + return null; + } + + return ( +
+

+ Benchmarking imputation methods +

+ + {/* Best Model Highlight */} + {bestModel && bestModel.method && ( +
+
+
+ + + +
+
+

+ Best performing model: {bestModel.method} +

+

Based on combined performance across all metrics

+
+
+
+ {bestModel.quantileLoss !== undefined && ( +
+
+ Avg. quantile loss (test): + {bestModel.quantileLoss.toFixed(6)} +
+ {bestModel.quantileTrainTestRatio !== undefined && ( + 1.1 ? 'text-amber-600' : 'text-gray-700'}`}> + Train/test ratio: {bestModel.quantileTrainTestRatio.toFixed(3)} + + )} +
+ )} + {bestModel.logLoss !== undefined && ( +
+
+ Avg. log loss (test): + {bestModel.logLoss.toFixed(6)} +
+ {bestModel.logLossTrainTestRatio !== undefined && ( + 1.1 ? 'text-amber-600' : 'text-gray-700'}`}> + Train/test ratio: {bestModel.logLossTrainTestRatio.toFixed(3)} + + )} +
+ )} +
+
+ )} + + {/* Note about methods without data */} + {methodsWithoutData.length > 0 && ( +
+

+ Note: {methodsWithoutData.length === 1 ? 'The following method does' : 'The following methods do'} not appear in visualizations because {methodsWithoutData.length === 1 ? 'it does' : 'they do'} not support imputation of the selected variables due to variable types: {methodsWithoutData.join(', ')} +

+
+ )} + +
+ {/* Quantile Loss Comparison */} + {quantileChartData.length > 0 && ( +
+

+ Test quantile loss across quantiles for different imputation methods +

+ + + + + + value.toFixed(6)} + /> + + {methods.map((method, index) => ( + + ))} + + +
+

+ Quantile loss measures how well the imputation method predicts different quantiles of the distribution for numerical variables, creating an asymmetric loss function that penalizes under-prediction more heavily for higher quantiles and over-prediction more heavily for lower quantiles. +
+ Lower values indicate better performance. +

+
+
+ )} + + {/* Log Loss Comparison */} + {logLossChartData.length > 0 && ( +
+

+ Test log loss across different imputation methods +

+ + + + + + [value.toFixed(6), 'Log loss']} + /> + + {logLossChartData.map((entry, index) => ( + + ))} + + + +
+

+ Log loss measures how well the imputation method predicts categorical and boolean variables by evaluating the accuracy of predicted probabilities. It heavily penalizes confident misclassifications, such that a perfect classifier would have a log loss of 0, while worse predictions yield increasingly higher values. +

+
+
+ )} + + {/* Train/Test Overfitting Assessment */} + {(hasQuantileTrainTest || hasLogLossTrainTest) && methods.length > 0 && ( +
+
+

+ Train vs test performance +

+

+ Compare training and test set performance to assess potential overfitting or underfitting. +

+ + {/* Method Selector */} +
+ + +
+
+ +
+ {/* Quantile Loss Train/Test */} + {hasQuantileTrainTest && ( +
+

Quantile loss: train vs test

+ + + + + + value.toFixed(6)} + /> + + + + + +
+ )} + + {/* Log Loss Train/Test */} + {hasLogLossTrainTest && ( +
+

Log loss: train vs test

+ + + + + + value.toFixed(6)} + /> + + + + + +
+ )} +
+ +
+

+ Overfitting assessment: When test performance (green bars) is significantly worse than train performance (cyan bars), it suggests the model may be overfitting to the training data and not generalizing well to unseen data. If both train and test performances are poor, the model may be underfitting and failing to capture underlying patterns. +
+ Healthy performance is indicated by similar train and test metrics, with both being reasonably low. +

+
+
+ )} +
+
+ ); +} diff --git a/microimputation-dashboard/components/FileUpload.tsx b/microimputation-dashboard/components/FileUpload.tsx index 581a2f3..fad0ec7 100644 --- a/microimputation-dashboard/components/FileUpload.tsx +++ b/microimputation-dashboard/components/FileUpload.tsx @@ -8,11 +8,10 @@ import { DeeplinkParams, GitHubArtifactInfo } from '@/utils/deeplinks'; interface FileUploadProps { onFileLoad: (content: string, filename: string) => void; onViewDashboard: () => void; - onCompareLoad?: (content1: string, filename1: string, content2: string, filename2: string) => void; deeplinkParams?: DeeplinkParams | null; isLoadingFromDeeplink?: boolean; - onDeeplinkLoadComplete?: (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null | undefined) => void; - onGithubLoad?: (primary: GitHubArtifactInfo | null, secondary?: GitHubArtifactInfo | null) => void; + onDeeplinkLoadComplete?: (primary: GitHubArtifactInfo | null) => void; + onGithubLoad?: (primary: GitHubArtifactInfo | null) => void; } interface GitHubCommit { @@ -43,7 +42,6 @@ interface GitHubArtifact { export default function FileUpload({ onFileLoad, onViewDashboard, - onCompareLoad, deeplinkParams, isLoadingFromDeeplink, onDeeplinkLoadComplete, @@ -66,70 +64,33 @@ export default function FileUpload({ const [selectedArtifact, setSelectedArtifact] = useState(''); const [isLoadingGithubData, setIsLoadingGithubData] = useState(false); - // Comparison mode state - const [comparisonMode, setComparisonMode] = useState(false); - const [selectedSecondBranch, setSelectedSecondBranch] = useState(''); - const [secondCommits, setSecondCommits] = useState([]); - const [selectedSecondCommit, setSelectedSecondCommit] = useState(''); - const [secondArtifacts, setSecondArtifacts] = useState([]); - const [selectedSecondArtifact, setSelectedSecondArtifact] = useState(''); - // Helper function to load a single artifact from deeplink parameters - const loadArtifactFromDeeplink = useCallback(async (artifactInfo: GitHubArtifactInfo, githubToken: string): Promise => { - // First, get the artifacts for the specific commit - const [owner, repo] = artifactInfo.repo.split('/'); - const runsResponse = await fetch(`https://api.github.com/repos/${owner}/${repo}/actions/runs?head_sha=${artifactInfo.commit}`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); - - if (!runsResponse.ok) { - throw new Error(`Failed to fetch workflow runs: ${runsResponse.status} ${runsResponse.statusText}`); + const loadArtifactFromDeeplink = useCallback(async (artifactInfo: GitHubArtifactInfo): Promise => { + // Get artifacts for the specific commit using API route + const artifactsResponse = await fetch( + `/api/github/artifacts?repo=${encodeURIComponent(artifactInfo.repo)}&commit=${encodeURIComponent(artifactInfo.commit)}` + ); + + if (!artifactsResponse.ok) { + throw new Error(`Failed to fetch artifacts: ${artifactsResponse.status}`); } - const runsData = await runsResponse.json(); - const completedRuns = runsData.workflow_runs.filter((run: { status: string }) => run.status === 'completed'); - - if (completedRuns.length === 0) { - throw new Error('No completed workflow runs found for this commit'); - } + const artifacts = await artifactsResponse.json(); // Find the artifact by name - let targetArtifact = null; - for (const run of completedRuns) { - const artifactsResponse = await fetch(`https://api.github.com/repos/${owner}/${repo}/actions/runs/${run.id}/artifacts`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); - - if (artifactsResponse.ok) { - const artifactsData = await artifactsResponse.json(); - targetArtifact = artifactsData.artifacts.find((artifact: { name: string }) => artifact.name === artifactInfo.artifact); - if (targetArtifact) break; - } - } + const targetArtifact = artifacts.find((artifact: { name: string }) => artifact.name === artifactInfo.artifact); if (!targetArtifact) { throw new Error(`Artifact "${artifactInfo.artifact}" not found for commit ${artifactInfo.commit}`); } - // Download and extract the artifact - const downloadResponse = await fetch(targetArtifact.archive_download_url, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); + // Download and extract the artifact using API route + const downloadResponse = await fetch( + `/api/github/download?url=${encodeURIComponent(targetArtifact.archive_download_url)}` + ); if (!downloadResponse.ok) { - throw new Error(`Failed to download artifact: ${downloadResponse.status} ${downloadResponse.statusText}`); + throw new Error(`Failed to download artifact: ${downloadResponse.status}`); } const zipBuffer = await downloadResponse.arrayBuffer(); @@ -154,13 +115,7 @@ export default function FileUpload({ }, []); // Load GitHub artifacts directly from deeplink parameters - const loadDeeplinkArtifacts = useCallback(async (primary: GitHubArtifactInfo, secondary?: GitHubArtifactInfo) => { - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - + const loadDeeplinkArtifacts = useCallback(async (primary: GitHubArtifactInfo) => { setIsLoading(true); setError(''); @@ -168,28 +123,16 @@ export default function FileUpload({ setError('🔄 Loading data from GitHub artifacts...'); // Load primary artifact - const primaryData = await loadArtifactFromDeeplink(primary, githubToken); - - if (secondary && onCompareLoad) { - // Load secondary artifact for comparison - const secondaryData = await loadArtifactFromDeeplink(secondary, githubToken); + const primaryData = await loadArtifactFromDeeplink(primary); - // Generate display names with commit info - const primaryDisplayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`; - const secondaryDisplayName = `${secondary.repo}@${secondary.branch} (${secondary.commit.substring(0, 7)}) - ${secondary.artifact}`; - - onCompareLoad(primaryData, primaryDisplayName, secondaryData, secondaryDisplayName); - setLoadedFile(`Comparison: ${primaryDisplayName} vs ${secondaryDisplayName}`); - } else { - // Single artifact load - const displayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`; - onFileLoad(primaryData, displayName); - setLoadedFile(displayName); - } + // Single artifact load + const displayName = `${primary.repo}@${primary.branch} (${primary.commit.substring(0, 7)}) - ${primary.artifact}`; + onFileLoad(primaryData, displayName); + setLoadedFile(displayName); // Notify parent component that deeplink loading is complete if (onDeeplinkLoadComplete) { - onDeeplinkLoadComplete(primary, secondary); + onDeeplinkLoadComplete(primary); } setError(''); @@ -201,35 +144,19 @@ export default function FileUpload({ } finally { setIsLoading(false); } - }, [onFileLoad, onCompareLoad, onDeeplinkLoadComplete, loadArtifactFromDeeplink]); + }, [onFileLoad, onDeeplinkLoadComplete, loadArtifactFromDeeplink]); // Handle deeplink loading on mount useEffect(() => { - if (deeplinkParams && isLoadingFromDeeplink) { + if (deeplinkParams && isLoadingFromDeeplink && deeplinkParams.primary) { setActiveTab('github'); + setGithubRepo(deeplinkParams.primary.repo); + setSelectedBranch(deeplinkParams.primary.branch); + setSelectedCommit(deeplinkParams.primary.commit); + setSelectedArtifact(deeplinkParams.primary.artifact); - if (deeplinkParams.mode === 'comparison' && deeplinkParams.primary && deeplinkParams.secondary) { - setComparisonMode(true); - setGithubRepo(deeplinkParams.primary.repo); - setSelectedBranch(deeplinkParams.primary.branch); - setSelectedCommit(deeplinkParams.primary.commit); - setSelectedArtifact(deeplinkParams.primary.artifact); - setSelectedSecondBranch(deeplinkParams.secondary.branch); - setSelectedSecondCommit(deeplinkParams.secondary.commit); - setSelectedSecondArtifact(deeplinkParams.secondary.artifact); - - // Auto-load comparison data - loadDeeplinkArtifacts(deeplinkParams.primary, deeplinkParams.secondary); - } else if (deeplinkParams.primary) { - setComparisonMode(false); - setGithubRepo(deeplinkParams.primary.repo); - setSelectedBranch(deeplinkParams.primary.branch); - setSelectedCommit(deeplinkParams.primary.commit); - setSelectedArtifact(deeplinkParams.primary.artifact); - - // Auto-load single artifact data - loadDeeplinkArtifacts(deeplinkParams.primary); - } + // Auto-load artifact data + loadDeeplinkArtifacts(deeplinkParams.primary); } }, [deeplinkParams, isLoadingFromDeeplink, loadDeeplinkArtifacts]); @@ -436,14 +363,24 @@ export default function FileUpload({ } let url: URL; + const finalUrl = urlInput.trim(); + try { - url = new URL(urlInput.trim()); + url = new URL(finalUrl); } catch { setError('Invalid URL format. Please enter a valid URL (e.g., https://example.com/data.csv).'); return; } - if (!url.pathname.toLowerCase().endsWith('.csv') && !urlInput.toLowerCase().includes('csv')) { + // Handle Google Drive URLs + if (url.hostname === 'drive.google.com') { + setError( + 'Google Drive links are not supported due to CORS restrictions. Please download the file and use the "Drop file" tab instead or host the file on a different public server.' + ); + return; + } + + if (!url.pathname.toLowerCase().endsWith('.csv') && !finalUrl.toLowerCase().includes('csv')) { setError('URL should point to a CSV file. Please ensure the URL ends with .csv or contains CSV data.'); return; } @@ -460,7 +397,7 @@ export default function FileUpload({ const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), 30_000); // 30 s timeout - const response = await fetch(urlInput.trim(), { + const response = await fetch(finalUrl, { signal: controller.signal, headers: { Accept: 'text/csv, text/plain, */*' } }); @@ -557,62 +494,23 @@ export default function FileUpload({ return; } - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoadingGithubData(true); setError(''); try { - // Fetch all branches with pagination support - const allBranches: GitHubBranch[] = []; - let page = 1; - const perPage = 100; // Maximum allowed by GitHub API - - while (true) { - const response = await fetch(`https://api.github.com/repos/${githubRepo}/branches?per_page=${perPage}&page=${page}`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); + const response = await fetch(`/api/github/branches?repo=${encodeURIComponent(githubRepo)}`); if (!response.ok) { if (response.status === 404) { - throw new Error('Repository not found. Please check the repository name and ensure it is accessible.'); + throw new Error('Repository not found. Please check the repository name and ensure it is publicly accessible.'); } else if (response.status === 403) { throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.'); } - throw new Error(`Failed to fetch branches: ${response.status} ${response.statusText}`); - } - - const branches: GitHubBranch[] = await response.json(); - - if (branches.length === 0) { - // No more branches to fetch - break; - } - - allBranches.push(...branches); - - // If we got fewer branches than requested, we've reached the end - if (branches.length < perPage) { - break; + const errorData = await response.json(); + throw new Error(errorData.error || `Failed to fetch branches: ${response.status}`); } - page++; - - // Safety check to prevent infinite loops (GitHub repos rarely have more than 1000 branches) - if (page > 10) { - console.warn('Stopped fetching branches after 10 pages (1000 branches) to prevent excessive API calls'); - break; - } - } - + const allBranches: GitHubBranch[] = await response.json(); setGithubBranches(allBranches); // Auto-select main/master branch if available @@ -631,28 +529,20 @@ export default function FileUpload({ async function fetchGithubCommits(branch: string) { if (!githubRepo.trim() || !branch) return; - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoadingGithubData(true); try { - const response = await fetch(`https://api.github.com/repos/${githubRepo}/commits?sha=${branch}&per_page=20`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); + const response = await fetch( + `/api/github/commits?repo=${encodeURIComponent(githubRepo)}&branch=${encodeURIComponent(branch)}` + ); + if (!response.ok) { if (response.status === 404) { throw new Error('Branch not found or repository is private.'); } else if (response.status === 403) { throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.'); } - throw new Error(`Failed to fetch commits: ${response.status} ${response.statusText}`); + const errorData = await response.json(); + throw new Error(errorData.error || `Failed to fetch commits: ${response.status}`); } const commits: GitHubCommit[] = await response.json(); @@ -673,98 +563,32 @@ export default function FileUpload({ async function fetchGithubArtifacts(commitSha: string) { if (!githubRepo.trim() || !commitSha) return; - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoadingGithubData(true); setAvailableArtifacts([]); setSelectedArtifact(''); try { - const [owner, repo] = githubRepo.split('/'); - - // Get workflow runs for the commit - const runsResponse = await fetch( - `https://api.github.com/repos/${owner}/${repo}/actions/runs?head_sha=${commitSha}`, - { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - } + const response = await fetch( + `/api/github/artifacts?repo=${encodeURIComponent(githubRepo)}&commit=${encodeURIComponent(commitSha)}` ); - if (!runsResponse.ok) { - if (runsResponse.status === 403) { + if (!response.ok) { + if (response.status === 403) { throw new Error(`GitHub API rate limit exceeded or token permissions insufficient (403). Please try again later or check your token permissions.`); - } else if (runsResponse.status === 404) { + } else if (response.status === 404) { throw new Error(`Repository or commit not found (404). Please check the repository name and commit SHA.`); - } else { - throw new Error(`Failed to fetch workflow runs: ${runsResponse.status} ${runsResponse.statusText}`); } + const errorData = await response.json(); + throw new Error(errorData.error || `Failed to fetch artifacts: ${response.status}`); } - const runsData = await runsResponse.json(); - const runs = runsData.workflow_runs; - - if (!runs || runs.length === 0) { - setError('No workflow runs found for this commit.'); - return; - } + const uniqueArtifacts: GitHubArtifact[] = await response.json(); - // Collect all imputation artifacts from completed runs - const allArtifacts: GitHubArtifact[] = []; - - for (const run of runs) { - if (run.status !== 'completed') continue; - - try { - const artifactsResponse = await fetch( - `https://api.github.com/repos/${owner}/${repo}/actions/runs/${run.id}/artifacts`, - { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - } - ); - - if (!artifactsResponse.ok) continue; - - const artifactsData = await artifactsResponse.json(); - const artifacts = artifactsData.artifacts; - - // Filter for imputation artifacts - const imputationArtifacts = artifacts.filter((artifact: GitHubArtifact) => - artifact.name.toLowerCase().includes('impute') || - artifact.name.toLowerCase().includes('imputation') || - artifact.name.toLowerCase().includes('result') || - artifact.name.toLowerCase().includes('.csv') - ); - - allArtifacts.push(...imputationArtifacts); - } catch { - continue; - } - } - - if (allArtifacts.length === 0) { + if (uniqueArtifacts.length === 0) { setError('No imputation artifacts found for this commit.'); return; } - // Remove duplicates and sort by creation date (newest first) - const uniqueArtifacts = allArtifacts - .filter((artifact, index, self) => - index === self.findIndex(a => a.name === artifact.name) - ) - .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()); - setAvailableArtifacts(uniqueArtifacts); // Auto-select the first artifact @@ -791,25 +615,15 @@ export default function FileUpload({ return; } - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - setIsLoading(true); setError(''); try { setError('🔄 Downloading and extracting CSV from artifact...'); - const downloadResponse = await fetch(artifact.archive_download_url, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); + const downloadResponse = await fetch( + `/api/github/download?url=${encodeURIComponent(artifact.archive_download_url)}` + ); if (!downloadResponse.ok) { throw new Error(`Failed to download artifact: ${downloadResponse.status}`); @@ -864,7 +678,7 @@ export default function FileUpload({ commit: selectedCommit, artifact: artifact.name }; - onGithubLoad(artifactInfo, null); + onGithubLoad(artifactInfo); } // Clear the GitHub state since we successfully loaded the file @@ -884,286 +698,31 @@ export default function FileUpload({ } } - async function fetchSecondBranchCommits(branch: string) { - if (!githubRepo.trim() || !branch) return; - - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - - setIsLoadingGithubData(true); - try { - const response = await fetch(`https://api.github.com/repos/${githubRepo}/commits?sha=${branch}&per_page=20`, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }); - if (!response.ok) { - if (response.status === 404) { - throw new Error('Branch not found or repository is private.'); - } else if (response.status === 403) { - throw new Error('Access forbidden. Please check your GitHub token permissions or repository access.'); - } - throw new Error(`Failed to fetch commits: ${response.status} ${response.statusText}`); - } - - const commits: GitHubCommit[] = await response.json(); - setSecondCommits(commits); - - // Auto-select latest commit and fetch its artifacts - if (commits.length > 0) { - setSelectedSecondCommit(commits[0].sha); - await fetchSecondArtifacts(commits[0].sha); - } - } catch (err) { - setError(`GitHub API error: ${err instanceof Error ? err.message : 'Unknown error'}`); - } finally { - setIsLoadingGithubData(false); - } - } - - async function fetchSecondArtifacts(commitSha: string) { - if (!githubRepo.trim() || !commitSha) return; - - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - - setIsLoadingGithubData(true); - setSecondArtifacts([]); - setSelectedSecondArtifact(''); - - try { - const [owner, repo] = githubRepo.split('/'); - - // Get workflow runs for the commit - const runsResponse = await fetch( - `https://api.github.com/repos/${owner}/${repo}/actions/runs?head_sha=${commitSha}`, - { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - } - ); - - if (!runsResponse.ok) { - if (runsResponse.status === 403) { - throw new Error(`GitHub API rate limit exceeded or token permissions insufficient (403). Please try again later or check your token permissions.`); - } else if (runsResponse.status === 404) { - throw new Error(`Repository or commit not found (404). Please check the repository name and commit SHA.`); - } else { - throw new Error(`Failed to fetch workflow runs: ${runsResponse.status} ${runsResponse.statusText}`); - } - } - - const runsData = await runsResponse.json(); - const runs = runsData.workflow_runs; - - if (!runs || runs.length === 0) { - setError('No workflow runs found for this commit.'); - return; - } - - // Collect all imputation artifacts from completed runs - const allArtifacts: GitHubArtifact[] = []; - - for (const run of runs) { - if (run.status !== 'completed') continue; - - try { - const artifactsResponse = await fetch( - `https://api.github.com/repos/${owner}/${repo}/actions/runs/${run.id}/artifacts`, - { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - } - ); - - if (!artifactsResponse.ok) continue; - - const artifactsData = await artifactsResponse.json(); - const artifacts = artifactsData.artifacts; - - // Filter for imputation artifacts - const imputationArtifacts = artifacts.filter((artifact: GitHubArtifact) => - artifact.name.toLowerCase().includes('impute') || - artifact.name.toLowerCase().includes('imputation') || - artifact.name.toLowerCase().includes('result') || - artifact.name.toLowerCase().includes('.csv') - ); - - allArtifacts.push(...imputationArtifacts); - } catch { - continue; - } - } - - if (allArtifacts.length === 0) { - setError('No imputation artifacts found for this commit.'); - return; - } - - // Remove duplicates and sort by creation date (newest first) - const uniqueArtifacts = allArtifacts - .filter((artifact, index, self) => - index === self.findIndex(a => a.name === artifact.name) - ) - .sort((a, b) => new Date(b.created_at).getTime() - new Date(a.created_at).getTime()); - - setSecondArtifacts(uniqueArtifacts); - - // Auto-select the first artifact - if (uniqueArtifacts.length > 0) { - setSelectedSecondArtifact(uniqueArtifacts[0].id.toString()); - } - - } catch (err) { - setError(`Failed to fetch artifacts: ${err instanceof Error ? err.message : 'Unknown error'}`); - } finally { - setIsLoadingGithubData(false); - } - } - - async function loadComparisonData() { - if (!selectedArtifact || !selectedSecondArtifact || !onCompareLoad) { - setError('Please select artifacts from both commits to compare'); - return; - } - - const firstArtifact = availableArtifacts.find(a => a.id.toString() === selectedArtifact); - const secondArtifact = secondArtifacts.find(a => a.id.toString() === selectedSecondArtifact); - - if (!firstArtifact || !secondArtifact) { - setError('Selected artifacts not found'); - return; - } - - const githubToken = process.env.NEXT_PUBLIC_GITHUB_TOKEN; - if (!githubToken) { - setError('GitHub token not configured. Please set NEXT_PUBLIC_GITHUB_TOKEN environment variable.'); - return; - } - - setIsLoading(true); - setError(''); - - try { - setError('🔄 Downloading and extracting CSV files for comparison...'); - - // Download both artifacts - const [firstDownload, secondDownload] = await Promise.all([ - fetch(firstArtifact.archive_download_url, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }), - fetch(secondArtifact.archive_download_url, { - headers: { - 'Authorization': `Bearer ${githubToken}`, - 'Accept': 'application/vnd.github.v3+json', - 'User-Agent': 'PolicyEngine-Dashboard/1.0' - } - }) - ]); - - if (!firstDownload.ok || !secondDownload.ok) { - throw new Error('Failed to download one or both artifacts'); - } - - // Extract CSVs from both artifacts - const [firstZipBuffer, secondZipBuffer] = await Promise.all([ - firstDownload.arrayBuffer(), - secondDownload.arrayBuffer() - ]); - - const firstZip = new JSZip(); - const secondZip = new JSZip(); - const [firstZipContent, secondZipContent] = await Promise.all([ - firstZip.loadAsync(firstZipBuffer), - secondZip.loadAsync(secondZipBuffer) - ]); - - // Find CSV files in both ZIPs - const firstCsvFiles = Object.keys(firstZipContent.files).filter(filename => - filename.toLowerCase().endsWith('.csv') && !firstZipContent.files[filename].dir - ); - const secondCsvFiles = Object.keys(secondZipContent.files).filter(filename => - filename.toLowerCase().endsWith('.csv') && !secondZipContent.files[filename].dir - ); - - if (firstCsvFiles.length === 0 || secondCsvFiles.length === 0) { - throw new Error('No CSV files found in one or both artifacts'); - } - - // Extract CSV content - const [firstCsvContent, secondCsvContent] = await Promise.all([ - firstZipContent.files[firstCsvFiles[0]].async('text'), - secondZipContent.files[secondCsvFiles[0]].async('text') - ]); - - // Create display names with commit info - const firstCommitShort = selectedCommit.slice(0, 8); - const secondCommitShort = selectedSecondCommit.slice(0, 8); - - const firstBranchInfo = selectedBranch !== selectedSecondBranch ? ` (${selectedBranch})` : ''; - const secondBranchInfo = selectedBranch !== selectedSecondBranch ? ` (${selectedSecondBranch})` : ''; - - const firstName = `${firstCsvFiles[0]} @ ${firstCommitShort}${firstBranchInfo}`; - const secondName = `${secondCsvFiles[0]} @ ${secondCommitShort}${secondBranchInfo}`; - - // Load into comparison mode - onCompareLoad(firstCsvContent, firstName, secondCsvContent, secondName); - - // Notify parent component about GitHub artifact info for sharing - if (onGithubLoad) { - const primaryArtifactInfo: GitHubArtifactInfo = { - repo: githubRepo, - branch: selectedBranch, - commit: selectedCommit, - artifact: firstArtifact.name - }; - const secondaryArtifactInfo: GitHubArtifactInfo = { - repo: githubRepo, - branch: selectedSecondBranch, - commit: selectedSecondCommit, - artifact: secondArtifact.name - }; - onGithubLoad(primaryArtifactInfo, secondaryArtifactInfo); - } - - setError(''); - - } catch (extractError) { - console.error('Comparison extraction error:', extractError); - setError(`❌ Failed to extract comparison data: ${extractError instanceof Error ? extractError.message : 'Unknown error'}`); - } finally { - setIsLoading(false); - } - } return ( -
-
-

Load imputation data

-

Choose how you would like to load your CSV file

+
+ {/* Page Title */} +
+

Microimpute Dashboard

+

Microimputation quality and model benchmarking assessment

+ {/* Upload Card */} +
+
+

Load imputation data

+

Choose how you would like to load your CSV file

+
+ {error && ( -
-

{error}

+
+

{error}

)} @@ -1176,7 +735,11 @@ export default function FileUpload({ {/* Tab navigation */}
- {/* Comparison Mode Toggle */} -
- -

- Enable this to compare imputation results between different branches or commits -

-
- {/* Branch Selection */} {githubBranches.length > 0 && (
@@ -1386,7 +933,7 @@ export default function FileUpload({ setSelectedBranch(e.target.value); fetchGithubCommits(e.target.value); }} - className="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500" + className="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-900" > {githubBranches.map((branch) => ( @@ -1411,7 +958,7 @@ export default function FileUpload({ setSelectedCommit(e.target.value); fetchGithubArtifacts(e.target.value); }} - className="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500" + className="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-900" > {githubCommits.map((commit) => ( @@ -1441,7 +988,7 @@ export default function FileUpload({ id="github-artifact" value={selectedArtifact} onChange={(e) => setSelectedArtifact(e.target.value)} - className="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500" + className="w-full px-3 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500 text-gray-900" > {availableArtifacts.map((artifact) => ( @@ -1460,109 +1007,14 @@ export default function FileUpload({
)} - {/* Second Selection for Comparison */} - {comparisonMode && githubBranches.length > 0 && ( -
-

Second Imputation Run (for comparison)

- - {/* Second Branch Selection */} -
- - -
- - {/* Second Commit Selection */} - {secondCommits.length > 0 && ( -
- - - {selectedSecondCommit && ( -

- {secondCommits.find(c => c.sha === selectedSecondCommit)?.commit.author.date && - new Date(secondCommits.find(c => c.sha === selectedSecondCommit)!.commit.author.date).toLocaleString() - } -

- )} -
- )} - - {/* Second Artifact Selection */} - {secondArtifacts.length > 0 && ( -
- - - {selectedSecondArtifact && ( -

- {secondArtifacts.find(a => a.id.toString() === selectedSecondArtifact)?.created_at && - `Created: ${new Date(secondArtifacts.find(a => a.id.toString() === selectedSecondArtifact)!.created_at).toLocaleString()}` - } -

- )} -
- )} -
- )} - {/* Load Button */} - {selectedArtifact && (!comparisonMode || selectedSecondArtifact) && ( + {selectedArtifact && ( )} @@ -1593,15 +1045,16 @@ export default function FileUpload({
)} - {/* Global loading indicator */} - {(isLoading || isLoadingGithubData) && ( -
-
-

- {isLoadingGithubData ? 'Loading GitHub data...' : 'Loading file...'} -

-
- )} + {/* Global loading indicator */} + {(isLoading || isLoadingGithubData) && ( +
+
+

+ {isLoadingGithubData ? 'Loading GitHub data...' : 'Loading file...'} +

+
+ )} +
); } \ No newline at end of file diff --git a/microimputation-dashboard/components/ImputationResults.tsx b/microimputation-dashboard/components/ImputationResults.tsx new file mode 100644 index 0000000..598b507 --- /dev/null +++ b/microimputation-dashboard/components/ImputationResults.tsx @@ -0,0 +1,315 @@ +'use client'; + +import { useMemo } from 'react'; +import { ImputationDataPoint } from '@/types/imputation'; +import { BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip, Legend, ResponsiveContainer, Cell } from 'recharts'; + +interface ImputationResultsProps { + data: ImputationDataPoint[]; +} + +interface DistributionMetric { + variable: string; + method: string; + metricName: string; + value: number; +} + +export default function ImputationResults({ data }: ImputationResultsProps) { + // Filter for distribution distance data + const distributionData = useMemo(() => { + return data.filter(d => d.type === 'distribution_distance'); + }, [data]); + + // Group by metric type + const { wassersteinData, klDivergenceData } = useMemo(() => { + const wasserstein: DistributionMetric[] = []; + const klDiv: DistributionMetric[] = []; + + distributionData.forEach(d => { + const metric: DistributionMetric = { + variable: d.variable, + method: d.method, + metricName: d.metric_name, + value: d.metric_value ?? 0, + }; + + if (d.metric_name === 'wasserstein_distance') { + wasserstein.push(metric); + } else if (d.metric_name === 'kl_divergence') { + klDiv.push(metric); + } + }); + + // Sort by value (ascending - lower is better) + wasserstein.sort((a, b) => a.value - b.value); + klDiv.sort((a, b) => a.value - b.value); + + return { + wassersteinData: wasserstein, + klDivergenceData: klDiv + }; + }, [distributionData]); + + const hasWasserstein = wassersteinData.length > 0; + const hasKLDivergence = klDivergenceData.length > 0; + + if (!hasWasserstein && !hasKLDivergence) { + return null; + } + + // Color function based on value quality (lower is better) + const getWassersteinColor = (value: number): string => { + if (value < 0.01) return '#16a34a'; // Dark green - excellent + if (value < 0.05) return '#22c55e'; // Green - good + if (value < 0.1) return '#eab308'; // Yellow - moderate + if (value < 0.2) return '#f97316'; // Orange - fair + return '#ef4444'; // Red - poor + }; + + const getKLColor = (value: number): string => { + if (value < 0.1) return '#16a34a'; // Dark green - excellent + if (value < 0.5) return '#22c55e'; // Green - good + if (value < 1.0) return '#eab308'; // Yellow - moderate + if (value < 5.0) return '#f97316'; // Orange - fair + return '#ef4444'; // Red - poor + }; + + return ( +
+
+

+ Imputation results +

+

+ Distributional quality metrics comparing imputed values to true values +

+
+ + {/* Wasserstein Distance Section */} + {hasWasserstein && ( +
+

+ Numerical variables (Wasserstein distance) +

+ + {/* Explanation */} +
+

+ What is Wasserstein distance? Also known as "Earth Mover's Distance", + this metric measures how much "work" is needed to transform one probability distribution + into another. Think of it as the minimum cost to rearrange one pile of dirt to match + another pile's shape. +

+

+ Why use it for imputation? Wasserstein distance is ideal for numerical + variables because it considers the actual distances between values, not just whether + they match exactly. A value of 0 means perfect imputation, and larger values indicate + greater differences between imputed and true distributions. +

+

+ Interpretation: Values closer to 0 are better. Generally, values below + 0.05 indicate good imputation quality, while values above 0.2 suggest significant + distributional differences. +

+
+ + {/* Bar chart */} +
+ + + + + + [value.toFixed(6), 'Wasserstein Distance']} + /> + + + {wassersteinData.map((entry, index) => ( + + ))} + + + +
+ + {/* Detailed table */} +
+ + + + + + + + + + {wassersteinData.map((item) => { + let assessment = ''; + let assessmentColor = ''; + + if (item.value < 0.01) { + assessment = 'Excellent'; + assessmentColor = 'text-green-700 font-semibold'; + } else if (item.value < 0.05) { + assessment = 'Good'; + assessmentColor = 'text-green-600'; + } else if (item.value < 0.1) { + assessment = 'Moderate'; + assessmentColor = 'text-yellow-600'; + } else if (item.value < 0.2) { + assessment = 'Fair'; + assessmentColor = 'text-orange-600'; + } else { + assessment = 'Poor'; + assessmentColor = 'text-red-600 font-semibold'; + } + + return ( + + + + + + ); + })} + +
+ Variable + + Wasserstein Distance + + Quality Assessment +
+ {item.variable} + + {item.value.toFixed(6)} + + {assessment} +
+
+
+ )} + + {/* KL Divergence Section */} + {hasKLDivergence && ( +
+

+ Categorical variables (KL-divergence) +

+ + {/* Explanation */} +
+

+ What is KL-divergence? Kullback-Leibler divergence measures how much + one probability distribution differs from another. It quantifies the "information lost" + when using the imputed distribution to approximate the true distribution. +

+

+ Why use it for categorical variables? KL-divergence is particularly + useful for categorical data because it compares probability distributions across + categories. It's sensitive to differences in how probabilities are distributed across + all possible categories. +

+

+ Interpretation: A value of 0 means perfect match. Values below 0.5 + indicate good imputation, while values above 5.0 suggest substantial distributional + differences. Note that KL-divergence is not symmetric and can range from 0 to infinity. +

+
+ + {/* Bar chart */} +
+ + + + + + [value.toFixed(6), 'KL-Divergence']} + /> + + + {klDivergenceData.map((entry, index) => ( + + ))} + + + +
+ + {/* Detailed table */} +
+ + + + + + + + + + {klDivergenceData.map((item) => { + let assessment = ''; + let assessmentColor = ''; + + if (item.value < 0.1) { + assessment = 'Excellent'; + assessmentColor = 'text-green-700 font-semibold'; + } else if (item.value < 0.5) { + assessment = 'Good'; + assessmentColor = 'text-green-600'; + } else if (item.value < 1.0) { + assessment = 'Moderate'; + assessmentColor = 'text-yellow-600'; + } else if (item.value < 5.0) { + assessment = 'Fair'; + assessmentColor = 'text-orange-600'; + } else { + assessment = 'Poor'; + assessmentColor = 'text-red-600 font-semibold'; + } + + return ( + + + + + + ); + })} + +
+ Variable + + KL-Divergence + + Quality Assessment +
+ {item.variable} + + {item.value.toFixed(6)} + + {assessment} +
+
+
+ )} +
+ ); +} diff --git a/microimputation-dashboard/components/PerVariableCharts.tsx b/microimputation-dashboard/components/PerVariableCharts.tsx new file mode 100644 index 0000000..01f5933 --- /dev/null +++ b/microimputation-dashboard/components/PerVariableCharts.tsx @@ -0,0 +1,191 @@ +'use client'; + +import { useMemo } from 'react'; +import { + BarChart, + Bar, + Cell, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + Legend, + ResponsiveContainer, +} from 'recharts'; +import { ImputationDataPoint } from '@/types/imputation'; +import { getMethodColor } from '@/utils/colors'; + +interface PerVariableChartsProps { + data: ImputationDataPoint[]; + variable: string; + metricType: 'quantile_loss' | 'log_loss'; +} + +export default function PerVariableCharts({ + data, + variable, + metricType, +}: PerVariableChartsProps) { + // Filter data for this specific variable + const variableData = useMemo(() => { + return data.filter( + (d) => + d.type === 'benchmark_loss' && + d.variable === variable && + d.metric_name === metricType && + d.split === 'test' + ); + }, [data, variable, metricType]); + + const methods = useMemo(() => { + return Array.from(new Set(variableData.map((d) => d.method))); + }, [variableData]); + + // For numerical variables (quantile_loss), show quantile breakdown + const quantileChartData = useMemo(() => { + if (metricType !== 'quantile_loss') return []; + + const numericData = variableData.filter( + (d) => + typeof d.quantile === 'number' && d.quantile >= 0 && d.quantile <= 1 + ); + + const quantileMap = new Map>(); + + numericData.forEach((d) => { + const quantile = Number(d.quantile); + if (!quantileMap.has(quantile)) { + quantileMap.set(quantile, { quantile: quantile.toFixed(2) }); + } + const entry = quantileMap.get(quantile)!; + entry[d.method] = d.metric_value; + }); + + return Array.from(quantileMap.values()).sort( + (a, b) => parseFloat(a.quantile as string) - parseFloat(b.quantile as string) + ); + }, [variableData, metricType]); + + // For categorical variables (log_loss), show simple bar comparison + const logLossChartData = useMemo(() => { + if (metricType !== 'log_loss') return []; + + const methodMap = new Map(); + + variableData.forEach((d) => { + if (d.metric_value !== null) { + if (!methodMap.has(d.method)) { + methodMap.set(d.method, { sum: 0, count: 0 }); + } + const entry = methodMap.get(d.method)!; + entry.sum += d.metric_value; + entry.count += 1; + } + }); + + return Array.from(methodMap.entries()).map( + ([method, { sum, count }]) => ({ + method, + value: sum / count, + }) + ); + }, [variableData, metricType]); + + if (variableData.length === 0) { + return ( +
+ No data available for variable: {variable} +
+ ); + } + + return ( +
+ {metricType === 'quantile_loss' && quantileChartData.length > 0 && ( +
+

+ Quantile Loss by Method for "{variable}" +

+ + + + + + value.toFixed(6)} + /> + + {methods.map((method, index) => ( + + ))} + + +
+ )} + + {metricType === 'log_loss' && logLossChartData.length > 0 && ( +
+

+ Log Loss by Method for "{variable}" +

+ + + + + + [value.toFixed(6), 'Log Loss']} + /> + + {logLossChartData.map((entry, index) => ( + + ))} + + + +
+ )} +
+ ); +} diff --git a/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx b/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx new file mode 100644 index 0000000..6a47799 --- /dev/null +++ b/microimputation-dashboard/components/PredictorCorrelationMatrix.tsx @@ -0,0 +1,466 @@ +'use client'; + +import { useMemo, useState } from 'react'; +import { ImputationDataPoint } from '@/types/imputation'; + +interface PredictorCorrelationMatrixProps { + data: ImputationDataPoint[]; +} + +interface CorrelationData { + predictor1: string; + predictor2: string; + value: number; +} + +export default function PredictorCorrelationMatrix({ data }: PredictorCorrelationMatrixProps) { + // Filter for predictor_correlation data + const correlationData = useMemo(() => { + return data.filter(d => d.type === 'predictor_correlation'); + }, [data]); + + // Filter for predictor-target mutual information data + const predictorTargetMIData = useMemo(() => { + return data.filter(d => d.type === 'predictor_target_mi'); + }, [data]); + + // Check available correlation metrics + const availableMetrics = useMemo(() => { + const metrics = new Set(correlationData.map(d => d.metric_name)); + return Array.from(metrics); + }, [correlationData]); + + // State for selected correlation metric + const [selectedMetric, setSelectedMetric] = useState(''); + + // Set default metric to pearson if available, otherwise first available + useMemo(() => { + if (!selectedMetric && availableMetrics.length > 0) { + setSelectedMetric(availableMetrics.includes('pearson') ? 'pearson' : availableMetrics[0]); + } + }, [availableMetrics, selectedMetric]); + + // Build correlation matrix data + const { predictors, matrixData } = useMemo(() => { + if (!selectedMetric) return { predictors: [], matrixData: new Map>() }; + + // Filter data for selected metric + const metricData = correlationData.filter(d => d.metric_name === selectedMetric); + + // Extract all unique predictors + const predSet = new Set(); + const correlations: CorrelationData[] = []; + + metricData.forEach(d => { + const pred1 = d.variable; + let pred2: string | undefined; + + try { + const additionalInfo = typeof d.additional_info === 'string' + ? JSON.parse(d.additional_info) + : d.additional_info; + pred2 = additionalInfo?.predictor2; + } catch (e) { + console.error('Failed to parse additional_info:', e); + } + + if (pred1 && pred2) { + predSet.add(pred1); + predSet.add(pred2); + correlations.push({ + predictor1: pred1, + predictor2: pred2, + value: d.metric_value ?? 0, + }); + } + }); + + const predictorList = Array.from(predSet).sort(); + + // Build symmetric matrix + const matrix = new Map>(); + + predictorList.forEach(p => { + matrix.set(p, new Map()); + }); + + // Add diagonal (1.0 for self-correlation) + predictorList.forEach(p => { + matrix.get(p)!.set(p, 1.0); + }); + + // Add correlations (symmetric) + correlations.forEach(({ predictor1, predictor2, value }) => { + matrix.get(predictor1)!.set(predictor2, value); + matrix.get(predictor2)!.set(predictor1, value); + }); + + return { predictors: predictorList, matrixData: matrix }; + }, [correlationData, selectedMetric]); + + // Build predictor-target mutual information matrix + const { predictorsList, targetsList, miMatrixData } = useMemo(() => { + if (predictorTargetMIData.length === 0) { + return { predictorsList: [], targetsList: [], miMatrixData: new Map>() }; + } + + const predSet = new Set(); + const targSet = new Set(); + const miValues: Array<{ predictor: string; target: string; value: number }> = []; + + predictorTargetMIData.forEach(d => { + const predictor = d.variable; + let target: string | undefined; + + try { + const additionalInfo = typeof d.additional_info === 'string' + ? JSON.parse(d.additional_info) + : d.additional_info; + target = additionalInfo?.target; + } catch (e) { + console.error('Failed to parse additional_info:', e); + } + + if (predictor && target && d.metric_value !== null) { + predSet.add(predictor); + targSet.add(target); + miValues.push({ + predictor, + target, + value: d.metric_value, + }); + } + }); + + const predList = Array.from(predSet).sort(); + const targList = Array.from(targSet).sort(); + + // Build matrix + const matrix = new Map>(); + predList.forEach(p => { + matrix.set(p, new Map()); + }); + + miValues.forEach(({ predictor, target, value }) => { + matrix.get(predictor)!.set(target, value); + }); + + return { predictorsList: predList, targetsList: targList, miMatrixData: matrix }; + }, [predictorTargetMIData]); + + const hasPredictorTargetMI = predictorsList.length > 0 && targetsList.length > 0; + + if (correlationData.length === 0 || predictors.length === 0) { + return null; + } + + // Helper function to get color based on correlation value + const getColor = (value: number): string => { + // Scale from -1 to 1 + // Negative: red shades, Positive: blue shades, Zero: white + if (value === 1.0) return '#1e40af'; // Dark blue for diagonal + if (value >= 0.7) return '#3b82f6'; // Blue + if (value >= 0.4) return '#60a5fa'; // Light blue + if (value >= 0.2) return '#93c5fd'; // Very light blue + if (value >= -0.2) return '#f3f4f6'; // Nearly white + if (value >= -0.4) return '#fca5a5'; // Light red + if (value >= -0.7) return '#f87171'; // Red + return '#ef4444'; // Dark red + }; + + // Helper function to get color based on mutual information value (0 to ~1) + const getMIColor = (value: number): string => { + // Scale from 0 (white) to high values (dark purple) + if (value >= 0.15) return '#581c87'; // Dark purple + if (value >= 0.10) return '#7c3aed'; // Purple + if (value >= 0.07) return '#a78bfa'; // Light purple + if (value >= 0.04) return '#c4b5fd'; // Very light purple + if (value >= 0.02) return '#ddd6fe'; // Almost white purple + return '#f3f4f6'; // Nearly white + }; + + const cellSize = 80; // Size of each cell in pixels + + return ( +
+
+

+ Predictor correlation analysis +

+

+ Correlation matrix showing relationships between predictor variables +

+ + {/* Metric Selector */} + {availableMetrics.length > 1 && ( +
+ + +
+ )} +
+ + {/* Correlation Matrix */} +
+
+
+ {/* Top-left empty cell */} +
+ + {/* Column headers */} + {predictors.map((pred, idx) => ( +
+
+ {pred} +
+
+ ))} + + {/* Rows */} + {predictors.map((pred1, rowIdx) => ( + <> + {/* Row header */} +
+ {pred1} +
+ + {/* Correlation cells */} + {predictors.map((pred2, colIdx) => { + const value = matrixData.get(pred1)?.get(pred2) ?? 0; + // Use purple scale for mutual_info, blue/red scale for correlations + const bgColor = selectedMetric === 'mutual_info' ? getMIColor(value) : getColor(value); + const textColor = selectedMetric === 'mutual_info' + ? (value > 0.07 ? '#ffffff' : '#000000') + : (Math.abs(value) > 0.5 ? '#ffffff' : '#000000'); + + return ( +
+ {selectedMetric === 'mutual_info' ? value.toFixed(3) : value.toFixed(2)} +
+ ); + })} + + ))} +
+
+
+ + {/* Legend - only for correlation metrics (not mutual_info) */} + {selectedMetric !== 'mutual_info' && ( +
+

+ Interpretation: Correlation values range from -1 to 1. Positive values (blue) indicate variables that increase together, negative values (red) indicate variables that move in opposite directions, and values near 0 (white) indicate little to no linear relationship. +

+
+ Color scale: +
+
+
+
+
+
+
+
+
+ + ◄ Negative + | + Positive ► + +
+
+

+ Pearson vs Spearman: Pearson correlation measures linear relationships between variables and is sensitive to outliers. Spearman correlation measures monotonic relationships (whether variables consistently increase or decrease together) by ranking the data first, making it more robust to outliers and non-linear but monotonic relationships. Use Pearson for linear relationships and Spearman when the relationship may be non-linear or when data contains outliers. +

+
+
+ )} + + {/* Predictor-Target Mutual Information Section */} +
+

+ Predictor-imputed variable mutual information +

+ + {hasPredictorTargetMI ? ( + <> +

+ Mutual information between predictor variables and imputed target variables +

+ + {/* MI Matrix */} +
+
+
+ {/* Top-left empty cell */} +
+ + {/* Column headers (targets) */} + {targetsList.map((target, idx) => ( +
+
+ {target} +
+
+ ))} + + {/* Rows */} + {predictorsList.map((predictor, rowIdx) => ( + <> + {/* Row header */} +
+ {predictor} +
+ + {/* MI cells */} + {targetsList.map((target, colIdx) => { + const value = miMatrixData.get(predictor)?.get(target) ?? 0; + const bgColor = getMIColor(value); + const textColor = value > 0.07 ? '#ffffff' : '#000000'; + + return ( +
+ {value.toFixed(3)} +
+ ); + })} + + ))} +
+
+
+ + ) : null} + + {/* Explanation box - always shown */} +
+

+ What is mutual information? Mutual information measures how much information one variable provides about another. Unlike correlation, it captures both linear and non-linear relationships between variables. Values range from 0 (independent variables) to higher positive values (strong dependency). +

+

+ Why measure it for imputed variables? Mutual information between predictors and imputed variables reveals which predictors are most informative for imputation. High mutual information indicates that a predictor strongly influences the imputed variable's distribution, making it crucial for accurate imputation. This helps validate that your imputation models are using the most relevant predictors and can identify when key predictive relationships exist in your data. +

+ + {/* Color scale within explanation box */} +
+
+ Color scale: +
+
+
+
+
+
+
+
+ + Weak + + Strong ► + +
+
+
+ + {/* Message when no predictor-target data is available */} + {!hasPredictorTargetMI && ( +
+

+ Note: No predictor-imputed variable mutual information data was found in this CSV file. It is recommended to include this data in your analysis to understand which predictors are most informative for imputing each variable. This helps validate that your imputation models are leveraging the most relevant predictive relationships in your data. +

+
+ )} +
+
+ ); +} diff --git a/microimputation-dashboard/components/PredictorOrderingRobustness.tsx b/microimputation-dashboard/components/PredictorOrderingRobustness.tsx new file mode 100644 index 0000000..fea48da --- /dev/null +++ b/microimputation-dashboard/components/PredictorOrderingRobustness.tsx @@ -0,0 +1,391 @@ +'use client'; + +import { useMemo } from 'react'; +import { ImputationDataPoint } from '@/types/imputation'; +import { BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip, Legend, ResponsiveContainer, Cell } from 'recharts'; + +interface PredictorOrderingRobustnessProps { + data: ImputationDataPoint[]; +} + +interface ProgressiveStep { + step: number; + predictorAdded: string; + predictors: string[]; + cumulativeImprovement: number; + marginalImprovement: number; +} + +interface PredictorImportance { + predictor: string; + relativeImpact: number; + lossIncrease: number; +} + +export default function PredictorOrderingRobustness({ data }: PredictorOrderingRobustnessProps) { + // Filter for progressive inclusion data + const progressiveInclusionData = useMemo(() => { + return data.filter(d => d.type === 'progressive_inclusion'); + }, [data]); + + // Filter for predictor importance data + const predictorImportanceData = useMemo(() => { + return data.filter(d => d.type === 'predictor_importance'); + }, [data]); + + // Parse progressive inclusion steps + const progressiveSteps = useMemo(() => { + const stepData: ProgressiveStep[] = []; + const cumulativeData = progressiveInclusionData.filter( + d => d.metric_name === 'cumulative_improvement' + ); + + cumulativeData.forEach(d => { + try { + const additionalInfo = typeof d.additional_info === 'string' + ? JSON.parse(d.additional_info) + : d.additional_info; + + const step = additionalInfo?.step; + const predictorAdded = additionalInfo?.predictor_added; + const predictors = additionalInfo?.predictors || []; + + if (step !== undefined && predictorAdded) { + // Find corresponding marginal improvement + const marginalData = progressiveInclusionData.find( + m => m.metric_name === 'marginal_improvement' && + JSON.parse(typeof m.additional_info === 'string' ? m.additional_info : JSON.stringify(m.additional_info))?.step === step + ); + + stepData.push({ + step, + predictorAdded, + predictors, + cumulativeImprovement: d.metric_value ?? 0, + marginalImprovement: marginalData?.metric_value ?? 0, + }); + } + } catch (e) { + console.error('Failed to parse progressive inclusion data:', e); + } + }); + + return stepData.sort((a, b) => a.step - b.step); + }, [progressiveInclusionData]); + + // Parse predictor importance + const importanceData = useMemo(() => { + const importanceMap = new Map(); + + predictorImportanceData.forEach(d => { + try { + const additionalInfo = typeof d.additional_info === 'string' + ? JSON.parse(d.additional_info) + : d.additional_info; + + const predictor = additionalInfo?.removed_predictor || d.variable; + + if (predictor) { + if (!importanceMap.has(predictor)) { + importanceMap.set(predictor, { + predictor, + relativeImpact: 0, + lossIncrease: 0, + }); + } + + const entry = importanceMap.get(predictor)!; + if (d.metric_name === 'relative_impact') { + entry.relativeImpact = d.metric_value ?? 0; + } else if (d.metric_name === 'loss_increase') { + entry.lossIncrease = d.metric_value ?? 0; + } + } + } catch (e) { + console.error('Failed to parse predictor importance data:', e); + } + }); + + return Array.from(importanceMap.values()).sort( + (a, b) => Math.abs(b.relativeImpact) - Math.abs(a.relativeImpact) + ); + }, [predictorImportanceData]); + + const hasProgressiveData = progressiveSteps.length > 0; + const hasImportanceData = importanceData.length > 0; + + if (!hasProgressiveData && !hasImportanceData) { + return null; + } + + // Find best combination (highest cumulative improvement) + const bestCombination = progressiveSteps.reduce((best, current) => + current.cumulativeImprovement > best.cumulativeImprovement ? current : best, + progressiveSteps[0] + ); + + return ( +
+
+

+ Predictor selection and robustness +

+

+ Analysis of predictor combinations and their impact on model performance +

+
+ + {/* Progressive Inclusion Section */} + {hasProgressiveData && ( +
+

+ Predictor addition order +

+ + {/* Explanation */} +
+

+ How this works: This analysis adds predictors one at a time, + choosing the predictor that improves performance the most at each step. This + step-by-step approach is efficient but doesn't test + every possible combination of predictors. Note that this analysis may differ depending on the model type passed when using the `progressive_predictor_inclusion` function that produced these results. +

+

+ Reading the chart: The bars show cumulative improvement from + baseline as predictors are added. Larger improvements indicate more valuable + predictor combinations. +

+
+ + {/* Best Combination Highlight */} + {bestCombination && ( +
+

+ Best predictor combination +

+
+
+

+ Predictors:{' '} + + {bestCombination.predictors.join(' → ')} + +

+

+ Cumulative improvement:{' '} + + {(bestCombination.cumulativeImprovement * 100).toFixed(3)}% + + {' '} + + (relative to the first predictor added, which was the best single predictor) + +

+
+
+
+ )} + + {/* Step-by-step visualization */} +
+ {progressiveSteps.map((step) => { + const isPositive = step.marginalImprovement >= 0; + const isBest = step.step === bestCombination?.step; + + return ( +
+
+
+
+ {step.step} +
+
+ +
+
+ Add: + + {step.predictorAdded} + +
+ +
+
+ Marginal improvement: +
+
+
+
+ + {isPositive ? '+' : ''}{(step.marginalImprovement * 100).toFixed(3)}% + +
+
+ +
+ Cumulative improvement: +
+
+
+
+ + {(step.cumulativeImprovement * 100).toFixed(3)}% + +
+
+
+ +
+ Current predictors: {step.predictors.join(' → ')} +
+
+
+
+ ); + })} +
+
+ )} + + {/* Predictor Importance Section */} + {hasImportanceData && ( +
+

+ Predictor robustness check +

+ + {/* Explanation */} +
+

+ What this shows: This analysis measures how much performance + degrades when each predictor is removed. Predictors that cause large performance + drops when removed are critical to the model's accuracy. +

+

+ Reading the chart: Positive values (bars pointing right) indicate + performance worsens when the predictor is removed, meaning the predictor is helpful. + Negative values suggest removing the predictor might actually improve performance. +

+
+ + {/* Bar chart */} +
+ + + + `${val.toFixed(1)}%`} tick={{ fill: '#000000' }} /> + + { + if (name === 'relativeImpact') { + return [`${value.toFixed(3)}%`, 'Relative Impact']; + } + return [value.toFixed(6), 'Loss Increase']; + }} + /> + + + {importanceData.map((entry, index) => ( + = 0 ? '#ef4444' : '#22c55e'} + /> + ))} + + + +
+ + {/* Detailed table */} +
+ + + + + + + + + + + {importanceData.map((item) => { + const isHelpful = item.relativeImpact > 1; + const isCritical = item.relativeImpact > 10; + const isHarmful = item.relativeImpact < -1; + + let assessment = 'Minimal impact'; + let assessmentColor = 'text-gray-600'; + + if (isCritical) { + assessment = 'Critical predictor'; + assessmentColor = 'text-red-700 font-semibold'; + } else if (isHelpful) { + assessment = 'Helpful predictor'; + assessmentColor = 'text-orange-600'; + } else if (isHarmful) { + assessment = 'Consider removing'; + assessmentColor = 'text-green-600'; + } + + return ( + + + + + + + ); + })} + +
+ Predictor + + Impact when removed + + Loss increase + + Assessment +
+ {item.predictor} + + = 0 ? 'text-red-600' : 'text-green-600'}> + {item.relativeImpact >= 0 ? '+' : ''}{item.relativeImpact.toFixed(3)}% + + + {item.lossIncrease >= 0 ? '+' : ''}{item.lossIncrease.toFixed(6)} + + {assessment} +
+
+
+ )} +
+ ); +} diff --git a/microimputation-dashboard/components/VisualizationDashboard.tsx b/microimputation-dashboard/components/VisualizationDashboard.tsx index f5806da..67a1d88 100644 --- a/microimputation-dashboard/components/VisualizationDashboard.tsx +++ b/microimputation-dashboard/components/VisualizationDashboard.tsx @@ -1,49 +1,597 @@ 'use client'; +import { useMemo, useState } from 'react'; import { ImputationDataPoint } from '@/types/imputation'; -import { GitHubArtifactInfo } from '@/utils/deeplinks'; +import { GitHubArtifactInfo, createShareableUrl } from '@/utils/deeplinks'; +import BenchmarkLossCharts from './BenchmarkLossCharts'; +import PerVariableCharts from './PerVariableCharts'; +import VisualizationTabs from './VisualizationTabs'; +import PredictorCorrelationMatrix from './PredictorCorrelationMatrix'; +import PredictorOrderingRobustness from './PredictorOrderingRobustness'; +import ImputationResults from './ImputationResults'; +import { Share } from 'lucide-react'; interface VisualizationDashboardProps { data: ImputationDataPoint[]; fileName: string; - comparisonData?: { - data: ImputationDataPoint[]; - filename: string; - }; - githubArtifactInfo?: { - primary: GitHubArtifactInfo | null; - secondary?: GitHubArtifactInfo | null; - } | null; + githubArtifactInfo?: GitHubArtifactInfo | null; + onBackToUpload: () => void; } export default function VisualizationDashboard({ data, fileName, - comparisonData, - githubArtifactInfo + githubArtifactInfo, + onBackToUpload, }: VisualizationDashboardProps) { + const [activeTab, setActiveTab] = useState('overview'); + + // Handle sharing the dashboard via deeplink + const handleShare = async () => { + if (!githubArtifactInfo) return; + + try { + const shareUrl = createShareableUrl(githubArtifactInfo); + await navigator.clipboard.writeText(shareUrl); + alert('Shareable URL copied to clipboard!'); + } catch (err) { + console.error('Failed to copy URL:', err); + alert('Failed to copy URL to clipboard'); + } + }; + + // Analyze data structure and available visualizations + const dataAnalysis = useMemo(() => { + const types = new Set(data.map(d => d.type)); + const hasBenchmarkLoss = types.has('benchmark_loss'); + + // Extract individual variables (not aggregates) + const numericalVars: string[] = []; + const categoricalVars: string[] = []; + + // Get all unique methods from benchmark data + const allMethods = hasBenchmarkLoss + ? Array.from(new Set(data.filter(d => d.type === 'benchmark_loss').map(d => d.method))) + : []; + + if (hasBenchmarkLoss) { + const benchmarkData = data.filter(d => d.type === 'benchmark_loss'); + + // Find variables with quantile_loss (numerical) + const qlVars = new Set( + benchmarkData + .filter(d => + d.metric_name === 'quantile_loss' && + !d.variable.includes('_mean_all') + ) + .map(d => d.variable) + ); + numericalVars.push(...Array.from(qlVars)); + + // Find variables with log_loss (categorical) + const llVars = new Set( + benchmarkData + .filter(d => + d.metric_name === 'log_loss' && + !d.variable.includes('_mean_all') && + d.metric_value !== null + ) + .map(d => d.variable) + ); + categoricalVars.push(...Array.from(llVars)); + } + + // Check for actual distribution distance data (wasserstein or kl_divergence) + const distributionData = data.filter(d => d.type === 'distribution_distance'); + const hasWasserstein = distributionData.some(d => d.metric_name === 'wasserstein_distance' && d.metric_value !== null); + const hasKLDivergence = distributionData.some(d => d.metric_name === 'kl_divergence' && d.metric_value !== null); + const hasDistributionDistance = hasWasserstein || hasKLDivergence; + + // Check for predictor correlation data + const correlationData = data.filter(d => d.type === 'predictor_correlation'); + const hasPredictorCorrelation = correlationData.length > 0 && correlationData.some(d => d.metric_value !== null); + + // Check for predictor ordering/importance data + const progressiveData = data.filter(d => d.type === 'progressive_inclusion'); + const importanceData = data.filter(d => d.type === 'predictor_importance'); + const hasPredictorOrdering = (progressiveData.length > 0 && progressiveData.some(d => d.metric_value !== null)) || + (importanceData.length > 0 && importanceData.some(d => d.metric_value !== null)); + + // Find imputed variables (from distribution_distance data) + const imputedVars = new Set(); + distributionData.forEach(d => { + if (d.variable && d.metric_value !== null) { + imputedVars.add(d.variable); + } + }); + + // Calculate best performing model (same logic as BenchmarkLossCharts) + let bestModel = ''; + + if (hasBenchmarkLoss) { + const benchmarkData = data.filter(d => d.type === 'benchmark_loss'); + const methods = Array.from(new Set(benchmarkData.map(d => d.method))); + + // Filter quantile and log loss data (matching BenchmarkLossCharts logic) + const quantileLossData = benchmarkData.filter( + d => d.metric_name === 'quantile_loss' && + d.split === 'test' && + typeof d.quantile === 'number' && + d.quantile >= 0 && + d.quantile <= 1 + ); + + const logLossData = benchmarkData.filter( + d => d.metric_name === 'log_loss' && + d.split === 'test' && + d.metric_value !== null + ); + + // Calculate average quantile loss per method + const quantileLossAvg = new Map(); + const quantileVarCounts = new Map>(); + + if (quantileLossData.length > 0) { + const methodSums = new Map(); + quantileLossData.forEach(d => { + if (d.metric_value !== null) { + if (!methodSums.has(d.method)) { + methodSums.set(d.method, { sum: 0, count: 0 }); + } + const entry = methodSums.get(d.method)!; + entry.sum += d.metric_value; + entry.count += 1; + + if (!quantileVarCounts.has(d.method)) { + quantileVarCounts.set(d.method, new Set()); + } + quantileVarCounts.get(d.method)!.add(d.variable); + } + }); + methodSums.forEach((value, method) => { + quantileLossAvg.set(method, value.sum / value.count); + }); + } + + // Calculate average log loss per method + const logLossAvg = new Map(); + const logLossVarCounts = new Map>(); + + if (logLossData.length > 0) { + const methodSums = new Map(); + logLossData.forEach(d => { + if (d.metric_value !== null) { + if (!methodSums.has(d.method)) { + methodSums.set(d.method, { sum: 0, count: 0 }); + } + const entry = methodSums.get(d.method)!; + entry.sum += d.metric_value; + entry.count += 1; + + if (!logLossVarCounts.has(d.method)) { + logLossVarCounts.set(d.method, new Set()); + } + logLossVarCounts.get(d.method)!.add(d.variable); + } + }); + methodSums.forEach((value, method) => { + logLossAvg.set(method, value.sum / value.count); + }); + } + + // Rank methods by each metric (lower is better) + const rankMethods = (avgMap: Map): Map => { + const sorted = Array.from(avgMap.entries()).sort((a, b) => a[1] - b[1]); + const ranks = new Map(); + sorted.forEach(([method], index) => { + ranks.set(method, index + 1); + }); + return ranks; + }; + + const quantileRanks = rankMethods(quantileLossAvg); + const logLossRanks = rankMethods(logLossAvg); + + // Calculate weighted combined rank + const combinedRanks = new Map(); + methods.forEach(method => { + const qRank = quantileRanks.get(method); + const lRank = logLossRanks.get(method); + const nQuantileVars = quantileVarCounts.get(method)?.size || 0; + const nLogLossVars = logLossVarCounts.get(method)?.size || 0; + const totalVars = nQuantileVars + nLogLossVars; + + if (totalVars > 0) { + let weightedRank = 0; + if (qRank !== undefined) { + weightedRank += nQuantileVars * qRank; + } + if (lRank !== undefined) { + weightedRank += nLogLossVars * lRank; + } + combinedRanks.set(method, weightedRank / totalVars); + } else { + combinedRanks.set(method, Infinity); + } + }); + + // Find best method (lowest combined rank) + let bestRank = Infinity; + combinedRanks.forEach((rank, method) => { + if (rank < bestRank) { + bestRank = rank; + bestModel = method; + } + }); + } + + // Calculate quality scores by variable for model performance + let modelExcellent = 0; + let modelGood = 0; + let modelPoor = 0; + let modelScore = 0; + let modelQuality = ''; + + if (hasBenchmarkLoss && bestModel) { + const benchmarkData = data.filter(d => d.type === 'benchmark_loss'); + const bestModelVars = benchmarkData.filter( + d => d.method === bestModel && d.split === 'test' && + d.quantile === 'mean' && !d.variable.includes('_mean_all') && d.metric_value !== null + ); + + bestModelVars.forEach(d => { + const loss = d.metric_value ?? 0; + if (loss < 0.02) modelExcellent++; + else if (loss < 0.05) modelGood++; + else modelPoor++; + }); + + const totalModelVars = modelExcellent + modelGood + modelPoor; + if (totalModelVars > 0) { + modelScore = ((modelExcellent * 100) + (modelGood * 75)) / totalModelVars; + if (modelScore >= 90) modelQuality = 'Excellent'; + else if (modelScore >= 70) modelQuality = 'Good'; + else modelQuality = 'Needs improvement'; + } + } + + // Calculate quality scores by variable for distributional accuracy + let distExcellent = 0; + let distGood = 0; + let distPoor = 0; + let distScore = 0; + let distQuality = ''; + + distributionData.forEach(d => { + const value = d.metric_value ?? 0; + // Different thresholds for Wasserstein vs KL-divergence + if (d.metric_name === 'wasserstein_distance') { + if (value < 0.01) distExcellent++; + else if (value < 0.05) distGood++; + else distPoor++; + } else if (d.metric_name === 'kl_divergence') { + if (value < 0.1) distExcellent++; + else if (value < 0.5) distGood++; + else distPoor++; + } + }); + + const totalDistVars = distExcellent + distGood + distPoor; + if (totalDistVars > 0) { + distScore = ((distExcellent * 100) + (distGood * 75)) / totalDistVars; + if (distScore >= 90) distQuality = 'Excellent'; + else if (distScore >= 70) distQuality = 'Good'; + else distQuality = 'Needs improvement'; + } + + // Calculate overall quality (weighted average) + let overallScore = 0; + let overallQuality = ''; + let overallColor = ''; + const hasModelScore = modelScore > 0; + const hasDistScore = distScore > 0; + + if (hasModelScore && hasDistScore) { + overallScore = (modelScore + distScore) / 2; + } else if (hasModelScore) { + overallScore = modelScore; + } else if (hasDistScore) { + overallScore = distScore; + } + + if (overallScore >= 90) { + overallQuality = 'Excellent quality'; + overallColor = 'text-green-700 bg-green-50 border-green-500'; + } else if (overallScore >= 70) { + overallQuality = 'Good quality'; + overallColor = 'text-yellow-700 bg-yellow-50 border-yellow-500'; + } else if (overallScore > 0) { + overallQuality = 'Needs improvement'; + overallColor = 'text-red-700 bg-red-50 border-red-500'; + } + + return { + hasBenchmarkLoss, + hasDistributionDistance, + hasPredictorCorrelation, + hasPredictorOrdering, + numericalVars, + categoricalVars, + hasPerVariableData: numericalVars.length > 0 || categoricalVars.length > 0, + imputedVars: Array.from(imputedVars).sort(), + bestModel, + overallScore, + overallQuality, + overallColor, + modelScore, + modelQuality, + modelExcellent, + modelGood, + modelPoor, + distScore, + distQuality, + distExcellent, + distGood, + distPoor, + allMethods, + }; + }, [data]); + + // Build tabs based on available data + const tabs = useMemo(() => { + const tabsList = []; + + if (dataAnalysis.hasBenchmarkLoss) { + tabsList.push({ id: 'overview', label: 'Model benchmarking' }); + } + + if (dataAnalysis.hasDistributionDistance) { + tabsList.push({ + id: 'imputation', + label: 'Imputation results', + }); + } + + if (dataAnalysis.numericalVars.length > 0) { + tabsList.push({ + id: 'numerical', + label: 'Numerical Variables', + count: dataAnalysis.numericalVars.length, + }); + } + + if (dataAnalysis.categoricalVars.length > 0) { + tabsList.push({ + id: 'categorical', + label: 'Categorical Variables', + count: dataAnalysis.categoricalVars.length, + }); + } + + if (dataAnalysis.hasPredictorCorrelation) { + tabsList.push({ + id: 'correlation', + label: 'Predictor correlation', + }); + } + + if (dataAnalysis.hasPredictorOrdering) { + tabsList.push({ + id: 'ordering', + label: 'Predictor selection', + }); + } + + return tabsList; + }, [dataAnalysis]); + + if (!dataAnalysis.hasBenchmarkLoss) { + return ( +
+ {/* Header */} +
+
+
+

Microimpute Dashboard

+

+ Loaded: {fileName} +

+
+
+ {githubArtifactInfo && ( + + )} + +
+
+
+
+
+

No visualization data found

+

+ Upload a CSV file with benchmark_loss data to see visualizations. +

+
+
+
+ ); + } + return ( -
-
-

Visualization Dashboard

-

Coming Soon...

-

- The visualization components for microimputation results will be implemented here. -

-
-

- Successfully loaded: {fileName} -

-

- Records: {data.length} -

- {comparisonData && ( -

- Comparison file: {comparisonData.filename} ({comparisonData.data.length} records) +

+ {/* Header */} +
+
+
+

Microimpute Dashboard

+

+ Loaded: {fileName}

- )} +
+
+ {githubArtifactInfo && ( + + )} + +
+ + {/* Imputation Summary */} +
+

Imputation summary

+

+ Assessment of the quality of the imputations produced by the best-performing (or the only selected) model +

+ +
+ {/* Imputed Variables Section */} +
+

+ Imputed Variables +

+ {dataAnalysis.imputedVars.length > 0 ? ( +
+

+ {dataAnalysis.imputedVars.length} variable{dataAnalysis.imputedVars.length !== 1 ? 's' : ''} imputed +

+
    + {dataAnalysis.imputedVars.map((variable) => ( +
  • + {variable} +
  • + ))} +
+
+ ) : ( +

+ No imputed variable information available in the CSV +

+ )} +
+ + {/* Best Model Section */} +
+

+ {dataAnalysis.allMethods.length === 1 ? 'Imputation Model' : 'Best Performing Model'} +

+ {dataAnalysis.bestModel ? ( +
+
+ + {dataAnalysis.bestModel} + + {dataAnalysis.allMethods.length === 1 && ( + + Only model + + )} + {dataAnalysis.allMethods.length > 1 && ( + + Best of {dataAnalysis.allMethods.length} + + )} +
+ {dataAnalysis.allMethods.length > 1 && ( +

+ Selected based on combined performance across all cross-validation loss metrics +

+ )} +
+ ) : ( +

+ No model information available in the CSV +

+ )} +
+
+
+ + {/* Tabs Navigation */} + {tabs.length > 1 && ( +
+ +
+ )} + + {/* Tab Content */} +
+ {/* Overview Tab */} + {activeTab === 'overview' && ( + + )} + + {/* Numerical Variables Tab */} + {activeTab === 'numerical' && ( +
+ {dataAnalysis.numericalVars.map((variable) => ( +
+ +
+ ))} +
+ )} + + {/* Categorical Variables Tab */} + {activeTab === 'categorical' && ( +
+ {dataAnalysis.categoricalVars.map((variable) => ( +
+ +
+ ))} +
+ )} + + {/* Predictor Correlation Tab */} + {activeTab === 'correlation' && ( + + )} + + {/* Predictor Ordering and Robustness Tab */} + {activeTab === 'ordering' && ( + + )} + + {/* Imputation Results Tab */} + {activeTab === 'imputation' && ( + + )} +
); } \ No newline at end of file diff --git a/microimputation-dashboard/components/VisualizationTabs.tsx b/microimputation-dashboard/components/VisualizationTabs.tsx new file mode 100644 index 0000000..25c9736 --- /dev/null +++ b/microimputation-dashboard/components/VisualizationTabs.tsx @@ -0,0 +1,56 @@ +'use client'; + +interface Tab { + id: string; + label: string; + count?: number; +} + +interface VisualizationTabsProps { + tabs: Tab[]; + activeTab: string; + onTabChange: (tabId: string) => void; +} + +export default function VisualizationTabs({ + tabs, + activeTab, + onTabChange, +}: VisualizationTabsProps) { + return ( +
+ +
+ ); +} diff --git a/microimputation-dashboard/package.json b/microimputation-dashboard/package.json index ab76b36..83f379e 100644 --- a/microimputation-dashboard/package.json +++ b/microimputation-dashboard/package.json @@ -4,7 +4,7 @@ "private": true, "scripts": { "dev": "next dev --turbopack", - "build": "next build --turbopack", + "build": "next build", "start": "next start", "lint": "eslint" }, diff --git a/microimputation-dashboard/public/microimputation_results.csv b/microimputation-dashboard/public/microimputation_results.csv new file mode 100644 index 0000000..9ef58a5 --- /dev/null +++ b/microimputation-dashboard/public/microimputation_results.csv @@ -0,0 +1,294 @@ +type,method,variable,quantile,metric_name,metric_value,split,additional_info +benchmark_loss,QRF,quantile_loss_mean_all,0.05,quantile_loss,0.0019931334519540313,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.1,quantile_loss,0.003676832529510976,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.15,quantile_loss,0.005051964323131603,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.2,quantile_loss,0.007150055272652542,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.25,quantile_loss,0.007272266767142256,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.3,quantile_loss,0.008540254219041532,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.35,quantile_loss,0.007621934560531267,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.4,quantile_loss,0.00916139860523047,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.45,quantile_loss,0.008137742361262587,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.5,quantile_loss,0.008501949409742712,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.55,quantile_loss,0.006943288815511526,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.6,quantile_loss,0.006674970777747578,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.65,quantile_loss,0.006300488362370938,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.7,quantile_loss,0.005762547086974565,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.75,quantile_loss,0.005910973227513356,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.8,quantile_loss,0.005674293245652812,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.85,quantile_loss,0.0045532698330881145,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.9,quantile_loss,0.003777490459909232,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.95,quantile_loss,0.0022857221356438738,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.05,quantile_loss,0.004976762851613436,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.1,quantile_loss,0.007958901707037789,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.15,quantile_loss,0.011348256884255009,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.2,quantile_loss,0.014479045607066829,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.25,quantile_loss,0.01804729212868635,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.3,quantile_loss,0.020713966247658862,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.35,quantile_loss,0.022742931417508282,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.4,quantile_loss,0.02258721277917371,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.45,quantile_loss,0.023351581275292608,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.5,quantile_loss,0.023625607730980425,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.55,quantile_loss,0.023186470004863385,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.6,quantile_loss,0.022905432991342744,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.65,quantile_loss,0.022230731438156376,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.7,quantile_loss,0.02057637886132135,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.75,quantile_loss,0.01912773338390523,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.8,quantile_loss,0.01739101077609373,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.85,quantile_loss,0.013723503511700213,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.9,quantile_loss,0.010967503180152355,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,0.95,quantile_loss,0.0071899999204732045,test,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,mean,quantile_loss,0.006052135549716419,train,"{""n_variables"": 2}" +benchmark_loss,QRF,quantile_loss_mean_all,mean,quantile_loss,0.017217385405120095,test,"{""n_variables"": 2}" +benchmark_loss,QRF,log_loss_mean_all,0.05,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.1,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.15,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.2,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.25,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.3,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.35,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.4,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.45,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.5,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.55,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.6,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.65,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.7,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.75,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.8,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.85,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.9,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.95,log_loss,2.614446654065684,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.05,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.1,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.15,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.2,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.25,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.3,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.35,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.4,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.45,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.5,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.55,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.6,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.65,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.7,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.75,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.8,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.85,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.9,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,0.95,log_loss,6.156691078407442,test,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,mean,log_loss,2.6144466540656834,train,"{""n_variables"": 1}" +benchmark_loss,QRF,log_loss_mean_all,mean,log_loss,6.156691078407443,test,"{""n_variables"": 1}" +benchmark_loss,OLS,quantile_loss_mean_all,0.05,quantile_loss,0.003793097970980961,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.1,quantile_loss,0.0064813273535348865,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.15,quantile_loss,0.008718559399878228,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.2,quantile_loss,0.010655610008580571,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.25,quantile_loss,0.012256033388805257,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.3,quantile_loss,0.013565140770903024,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.35,quantile_loss,0.014611889242340062,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.4,quantile_loss,0.015425975031196375,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.45,quantile_loss,0.01605944525518228,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.5,quantile_loss,0.01647854556609527,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.55,quantile_loss,0.01661220911122457,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.6,quantile_loss,0.01645760847896342,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.65,quantile_loss,0.016061546015992825,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.7,quantile_loss,0.015395729893732578,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.75,quantile_loss,0.014398366337920352,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.8,quantile_loss,0.012997538438059541,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.85,quantile_loss,0.011142832423357984,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.9,quantile_loss,0.008676966523108521,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.95,quantile_loss,0.0053039200554893294,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.05,quantile_loss,0.003875743024333408,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.1,quantile_loss,0.006636892853346667,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.15,quantile_loss,0.008964045456920146,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.2,quantile_loss,0.010925268697485854,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.25,quantile_loss,0.012586171018988979,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.3,quantile_loss,0.013971078532560128,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.35,quantile_loss,0.015078371048174203,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.4,quantile_loss,0.015960673093506517,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.45,quantile_loss,0.016611314804344936,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.5,quantile_loss,0.017023427694261576,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.55,quantile_loss,0.017162262823655853,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.6,quantile_loss,0.016994331164241493,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.65,quantile_loss,0.016563877768320707,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.7,quantile_loss,0.01584349745574873,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.75,quantile_loss,0.014830479499612956,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.8,quantile_loss,0.013414418494530157,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.85,quantile_loss,0.011604173888267462,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.9,quantile_loss,0.009116693282835488,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,0.95,quantile_loss,0.005653910575626517,test,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,mean,quantile_loss,0.012373281119228738,train,"{""n_variables"": 2}" +benchmark_loss,OLS,quantile_loss_mean_all,mean,quantile_loss,0.012779822693513777,test,"{""n_variables"": 2}" +benchmark_loss,OLS,log_loss_mean_all,0.05,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.1,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.15,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.2,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.25,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.3,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.35,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.4,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.45,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.5,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.55,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.6,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.65,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.7,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.75,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.8,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.85,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.9,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.95,log_loss,0.9858032860627665,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.05,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.1,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.15,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.2,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.25,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.3,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.35,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.4,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.45,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.5,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.55,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.6,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.65,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.7,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.75,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.8,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.85,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.9,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,0.95,log_loss,1.0101334442421657,test,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,mean,log_loss,0.9858032860627663,train,"{""n_variables"": 1}" +benchmark_loss,OLS,log_loss_mean_all,mean,log_loss,1.0101334442421654,test,"{""n_variables"": 1}" +benchmark_loss,QuantReg,quantile_loss_mean_all,mean,quantile_loss,,train,"{""n_variables"": 0}" +benchmark_loss,QuantReg,quantile_loss_mean_all,mean,quantile_loss,,test,"{""n_variables"": 0}" +benchmark_loss,QuantReg,log_loss_mean_all,mean,log_loss,,train,"{""n_variables"": 0}" +benchmark_loss,QuantReg,log_loss_mean_all,mean,log_loss,,test,"{""n_variables"": 0}" +benchmark_loss,Matching,quantile_loss_mean_all,0.05,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.1,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.15,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.2,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.25,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.3,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.35,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.4,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.45,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.5,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.55,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.6,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.65,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.7,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.75,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.8,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.85,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.9,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.95,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.05,quantile_loss,0.022039382576829306,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.1,quantile_loss,0.022147598558993443,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.15,quantile_loss,0.022255814541157576,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.2,quantile_loss,0.022364030523321716,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.25,quantile_loss,0.022472246505485852,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.3,quantile_loss,0.02258046248764999,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.35,quantile_loss,0.022688678469814125,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.4,quantile_loss,0.02279689445197826,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.45,quantile_loss,0.0229051104341424,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.5,quantile_loss,0.02301332641630653,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.55,quantile_loss,0.02312154239847067,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.6,quantile_loss,0.023229758380634808,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.65,quantile_loss,0.023337974362798945,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.7,quantile_loss,0.023446190344963078,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.75,quantile_loss,0.023554406327127214,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.8,quantile_loss,0.02366262230929135,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.85,quantile_loss,0.023770838291455487,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.9,quantile_loss,0.023879054273619627,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,0.95,quantile_loss,0.02398727025578376,test,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,mean,quantile_loss,0.0,train,"{""n_variables"": 2}" +benchmark_loss,Matching,quantile_loss_mean_all,mean,quantile_loss,0.02301332641630653,test,"{""n_variables"": 2}" +benchmark_loss,Matching,log_loss_mean_all,0.05,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.1,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.15,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.2,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.25,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.3,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.35,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.4,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.45,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.5,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.55,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.6,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.65,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.7,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.75,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.8,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.85,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.9,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.95,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.05,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.1,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.15,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.2,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.25,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.3,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.35,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.4,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.45,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.5,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.55,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.6,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.65,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.7,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.75,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.8,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.85,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.9,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,0.95,log_loss,19.713195542915205,test,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,mean,log_loss,2.2204460492503136e-16,train,"{""n_variables"": 1}" +benchmark_loss,Matching,log_loss_mean_all,mean,log_loss,19.71319554291521,test,"{""n_variables"": 1}" +distribution_distance,OLSResults,s1,N/A,wasserstein_distance,0.024660387596042915,full,{} +distribution_distance,OLSResults,s4,N/A,wasserstein_distance,0.020422408337066628,full,{} +distribution_distance,OLSResults,risk_factor,N/A,kl_divergence,6.033154649748202,full,{} +predictor_correlation,N/A,age,N/A,pearson,0.22798247938962524,full,"{""predictor2"": ""sex""}" +predictor_correlation,N/A,age,N/A,pearson,0.2501624011317938,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,age,N/A,pearson,0.3534419190240634,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,sex,N/A,pearson,0.08064137058331985,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,sex,N/A,pearson,0.26905206635937173,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,bmi,N/A,pearson,0.43643800885198847,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,age,N/A,spearman,0.2309511404045441,full,"{""predictor2"": ""sex""}" +predictor_correlation,N/A,age,N/A,spearman,0.2559475199483165,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,age,N/A,spearman,0.3697134718827012,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,sex,N/A,spearman,0.10617808669020486,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,sex,N/A,spearman,0.3010250570740961,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,bmi,N/A,spearman,0.4472525722966459,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,age,N/A,mutual_info,0.0,full,"{""predictor2"": ""sex""}" +predictor_correlation,N/A,age,N/A,mutual_info,0.01992537029466096,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,age,N/A,mutual_info,0.02051274401878463,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,sex,N/A,mutual_info,0.024974606868113543,full,"{""predictor2"": ""bmi""}" +predictor_correlation,N/A,sex,N/A,mutual_info,0.06825522582483731,full,"{""predictor2"": ""bp""}" +predictor_correlation,N/A,bmi,N/A,mutual_info,0.012362825726377452,full,"{""predictor2"": ""bp""}" +predictor_target_mi,N/A,age,N/A,mutual_info,0.007275692599840133,full,"{""target"": ""s1""}" +predictor_target_mi,N/A,age,N/A,mutual_info,0.017843189636206607,full,"{""target"": ""s4""}" +predictor_target_mi,N/A,age,N/A,mutual_info,0.0527351415960489,full,"{""target"": ""risk_factor""}" +predictor_target_mi,N/A,sex,N/A,mutual_info,0.002833409209270051,full,"{""target"": ""s1""}" +predictor_target_mi,N/A,sex,N/A,mutual_info,0.022502563145111076,full,"{""target"": ""s4""}" +predictor_target_mi,N/A,sex,N/A,mutual_info,0.03936347909918465,full,"{""target"": ""risk_factor""}" +predictor_target_mi,N/A,bmi,N/A,mutual_info,0.00712501381273246,full,"{""target"": ""s1""}" +predictor_target_mi,N/A,bmi,N/A,mutual_info,0.042828982564792506,full,"{""target"": ""s4""}" +predictor_target_mi,N/A,bmi,N/A,mutual_info,0.09273531468209292,full,"{""target"": ""risk_factor""}" +predictor_target_mi,N/A,bp,N/A,mutual_info,0.007630872443873875,full,"{""target"": ""s1""}" +predictor_target_mi,N/A,bp,N/A,mutual_info,0.009030290161605078,full,"{""target"": ""s4""}" +predictor_target_mi,N/A,bp,N/A,mutual_info,0.012933835727825435,full,"{""target"": ""risk_factor""}" +predictor_importance,OLSResults,sex,N/A,relative_impact,30.680328353256453,test,"{""removed_predictor"": ""sex""}" +predictor_importance,OLSResults,sex,N/A,loss_increase,0.7393423418942775,test,"{""removed_predictor"": ""sex""}" +predictor_importance,OLSResults,bmi,N/A,relative_impact,0.03247893420537184,test,"{""removed_predictor"": ""bmi""}" +predictor_importance,OLSResults,bmi,N/A,loss_increase,0.000782685602355393,test,"{""removed_predictor"": ""bmi""}" +predictor_importance,OLSResults,age,N/A,relative_impact,0.00454596377561416,test,"{""removed_predictor"": ""age""}" +predictor_importance,OLSResults,age,N/A,loss_increase,0.00010954978921118297,test,"{""removed_predictor"": ""age""}" +predictor_importance,OLSResults,bp,N/A,relative_impact,-0.004826970371974454,test,"{""removed_predictor"": ""bp""}" +predictor_importance,OLSResults,bp,N/A,loss_increase,-0.0001163215575132881,test,"{""removed_predictor"": ""bp""}" +progressive_inclusion,OLSResults,N/A,N/A,cumulative_improvement,0.0,test,"{""step"": 1, ""predictor_added"": ""sex"", ""predictors"": [""sex""]}" +progressive_inclusion,OLSResults,N/A,N/A,marginal_improvement,0.0,test,"{""step"": 1, ""predictor_added"": ""sex""}" +progressive_inclusion,OLSResults,N/A,N/A,cumulative_improvement,0.0010940165047195194,test,"{""step"": 2, ""predictor_added"": ""bmi"", ""predictors"": [""sex"", ""bmi""]}" +progressive_inclusion,OLSResults,N/A,N/A,marginal_improvement,0.0010940165047195194,test,"{""step"": 2, ""predictor_added"": ""bmi""}" +progressive_inclusion,OLSResults,N/A,N/A,cumulative_improvement,0.0012175858369123382,test,"{""step"": 3, ""predictor_added"": ""age"", ""predictors"": [""sex"", ""bmi"", ""age""]}" +progressive_inclusion,OLSResults,N/A,N/A,marginal_improvement,0.00012356933219281885,test,"{""step"": 3, ""predictor_added"": ""age""}" +progressive_inclusion,OLSResults,N/A,N/A,cumulative_improvement,0.0011012642793990501,test,"{""step"": 4, ""predictor_added"": ""bp"", ""predictors"": [""sex"", ""bmi"", ""age"", ""bp""]}" +progressive_inclusion,OLSResults,N/A,N/A,marginal_improvement,-0.0001163215575132881,test,"{""step"": 4, ""predictor_added"": ""bp""}" diff --git a/microimputation-dashboard/types/imputation.ts b/microimputation-dashboard/types/imputation.ts index c6163bc..799862d 100644 --- a/microimputation-dashboard/types/imputation.ts +++ b/microimputation-dashboard/types/imputation.ts @@ -1,13 +1,13 @@ // Type definitions for microimputation data export interface ImputationDataPoint { - // Add fields based on what microimpute outputs - // These are placeholder fields that will be updated based on actual CSV structure - id?: string; - variable?: string; - original_value?: number; - imputed_value?: number; - method?: string; - confidence?: number; + type: string; // e.g., "benchmark_loss", "distribution_distance", "predictor_correlation" + method: string; // e.g., "QRF", "OLS", "QuantReg", "Matching" + variable: string; // e.g., "quantile_loss_mean_all", "log_loss_mean_all", or actual variable names + quantile: string | number; // numeric (0.05, 0.1, etc.), "mean", or "N/A" + metric_name: string; // e.g., "quantile_loss", "log_loss" + metric_value: number | null; // numeric value of the metric + split: string; // e.g., "train", "test", "full" + additional_info: string; // JSON-formatted string with metadata [key: string]: any; // Allow additional fields } diff --git a/microimputation-dashboard/utils/colors.ts b/microimputation-dashboard/utils/colors.ts new file mode 100644 index 0000000..9e7e870 --- /dev/null +++ b/microimputation-dashboard/utils/colors.ts @@ -0,0 +1,29 @@ +// Consistent color mapping for imputation methods across all charts +// Using Plotly color palette for consistency with Python visualizations + +export const METHOD_COLORS: Record = { + QRF: '#636EFA', // Plotly blue + OLS: '#EF553B', // Plotly red + QuantReg: '#00CC96', // Plotly teal + Matching: '#AB63FA', // Plotly purple + // Add more methods as needed +}; + +export const FALLBACK_COLORS = [ + '#FFA15A', // Plotly orange + '#19D3F3', // Plotly cyan + '#FF6692', // Plotly pink + '#B6E880', // Plotly lime + '#FF97FF', // Plotly magenta + '#FECB52', // Plotly yellow +]; + +/** + * Get color for a method, using predefined colors or fallback palette + */ +export function getMethodColor(method: string, index: number = 0): string { + if (method in METHOD_COLORS) { + return METHOD_COLORS[method]; + } + return FALLBACK_COLORS[index % FALLBACK_COLORS.length]; +} diff --git a/microimputation-dashboard/utils/deeplinks.ts b/microimputation-dashboard/utils/deeplinks.ts index 55b7af3..5ce99b9 100644 --- a/microimputation-dashboard/utils/deeplinks.ts +++ b/microimputation-dashboard/utils/deeplinks.ts @@ -8,103 +8,39 @@ export interface GitHubArtifactInfo { } export interface DeeplinkParams { - mode?: 'single' | 'comparison'; primary?: GitHubArtifactInfo; - secondary?: GitHubArtifactInfo; } export function parseDeeplinkParams(searchParams: URLSearchParams): DeeplinkParams | null { - const mode = searchParams.get('mode') || 'single'; - - const primaryRepo = searchParams.get('repo'); - const primaryBranch = searchParams.get('branch'); - const primaryCommit = searchParams.get('commit'); - const primaryArtifact = searchParams.get('artifact'); - - if (!primaryRepo || !primaryBranch || !primaryCommit || !primaryArtifact) { - // Check for comparison mode parameters - const repo1 = searchParams.get('repo1'); - const branch1 = searchParams.get('branch1'); - const commit1 = searchParams.get('commit1'); - const artifact1 = searchParams.get('artifact1'); - - const repo2 = searchParams.get('repo2'); - const branch2 = searchParams.get('branch2'); - const commit2 = searchParams.get('commit2'); - const artifact2 = searchParams.get('artifact2'); - - if (repo1 && branch1 && commit1 && artifact1 && repo2 && branch2 && commit2 && artifact2) { - return { - mode: 'comparison', - primary: { - repo: repo1, - branch: branch1, - commit: commit1, - artifact: artifact1, - }, - secondary: { - repo: repo2, - branch: branch2, - commit: commit2, - artifact: artifact2, - }, - }; - } + const repo = searchParams.get('repo'); + const branch = searchParams.get('branch'); + const commit = searchParams.get('commit'); + const artifact = searchParams.get('artifact'); + if (!repo || !branch || !commit || !artifact) { return null; } - const params: DeeplinkParams = { - mode: mode as 'single' | 'comparison', + return { primary: { - repo: primaryRepo, - branch: primaryBranch, - commit: primaryCommit, - artifact: primaryArtifact, + repo, + branch, + commit, + artifact, }, }; - - // Check for secondary parameters for comparison mode - const secondaryRepo = searchParams.get('repo2') || primaryRepo; - const secondaryBranch = searchParams.get('branch2'); - const secondaryCommit = searchParams.get('commit2'); - const secondaryArtifact = searchParams.get('artifact2'); - - if (secondaryBranch && secondaryCommit && secondaryArtifact) { - params.mode = 'comparison'; - params.secondary = { - repo: secondaryRepo, - branch: secondaryBranch, - commit: secondaryCommit, - artifact: secondaryArtifact, - }; - } - - return params; } -export function createShareableUrl(baseUrl: string, artifactInfo: GitHubArtifactInfo, secondaryInfo?: GitHubArtifactInfo): string { - const url = new URL(baseUrl); +export function createShareableUrl(artifactInfo: GitHubArtifactInfo): string { + const baseUrl = typeof window !== 'undefined' + ? `${window.location.protocol}//${window.location.host}${window.location.pathname}` + : ''; - if (secondaryInfo) { - // Comparison mode - url.searchParams.set('mode', 'comparison'); - url.searchParams.set('repo1', artifactInfo.repo); - url.searchParams.set('branch1', artifactInfo.branch); - url.searchParams.set('commit1', artifactInfo.commit); - url.searchParams.set('artifact1', artifactInfo.artifact); - url.searchParams.set('repo2', secondaryInfo.repo); - url.searchParams.set('branch2', secondaryInfo.branch); - url.searchParams.set('commit2', secondaryInfo.commit); - url.searchParams.set('artifact2', secondaryInfo.artifact); - } else { - // Single mode - url.searchParams.set('mode', 'single'); - url.searchParams.set('repo', artifactInfo.repo); - url.searchParams.set('branch', artifactInfo.branch); - url.searchParams.set('commit', artifactInfo.commit); - url.searchParams.set('artifact', artifactInfo.artifact); - } + const urlParams = new URLSearchParams(); + urlParams.set('repo', artifactInfo.repo); + urlParams.set('branch', artifactInfo.branch); + urlParams.set('commit', artifactInfo.commit); + urlParams.set('artifact', artifactInfo.artifact); - return url.toString(); + return `${baseUrl}?${urlParams.toString()}`; } \ No newline at end of file