diff --git a/.gitignore b/.gitignore index 038b81e..15c8118 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ yarn-error.log* .env.development .env.production .env.test - +.CLAUDE.md # Runtime data pids *.pid diff --git a/src/common/interfaces/client.interface.ts b/src/common/interfaces/client.interface.ts index 7da164e..0e5af99 100644 --- a/src/common/interfaces/client.interface.ts +++ b/src/common/interfaces/client.interface.ts @@ -1,27 +1,7 @@ import { PodStatus } from "./podstatus.interface"; - - -export interface DiagnoseRequest { - podName: string; - namespace: string; - logs: string[]; - events?: string[]; - phase?: string; - containerState?: PodStatus; // refine if needed -} - -export interface CredentialsFile { - authenticated: boolean; - token: string; - user_id: string; - first_name?: string; -} - -export interface HelmReleaseInfo { - releaseName: string; - confidence: number; -} +// Re-export from helm-release-resolver for consistency +export type { HelmReleaseInfo, HelmReleaseEvidence } from '../../core/helm-release-resolver'; export interface StackComponent { podName: string; @@ -30,60 +10,27 @@ export interface StackComponent { logs: string[]; } +export interface StackAnalysisPayload { + primaryPod: string; + helmRelease: string; + namespace: string; + timestamp: string; + components: StackComponent[]; +} - -export interface StackDiagnosisResult { +export interface StackAnalysisResponse { stackOverview: { - summary: string; - components: Array<{ - name: string; - role: string; - status: 'healthy' | 'degraded' | 'down'; - restartCount: number; - issues: string[]; - }>; + totalPods: number; + failingPods: number; + healthyPods: number; + namespace: string; + helmRelease: string; }; rootCauseAnalysis: { - primaryCause: { - component: string; - issue: string; - evidence: string[]; - startTime: string | null; - containerState: string; - }; - failureCascade: Array<{ - step: number; - component: string; - effect: string; - evidence: string; - timestamp: string; - }>; - }; - recommendations: { - immediateFix: { - description: string; - commands: string[]; - actions: string[]; - }; - preventRecurrence: { - description: string; - helmValues: Record; - configChanges: string[]; - }; - improvements: string[]; - }; - verification: { - commands: Array<{ - description: string; - command: string; - expectedOutput: string; - }>; - healthyLogPatterns: string[]; + primaryCause: string; + confidence: number; + evidence: string[]; }; - debuggingCommands: Array<{ - description: string; - command: string; - }>; - confidence: number; - alternativeCauses: string[]; + recommendations: string[]; + analysis: string; } \ No newline at end of file diff --git a/src/common/interfaces/containerStatus.interface.ts b/src/common/interfaces/containerStatus.interface.ts index dcfced0..1e69006 100644 --- a/src/common/interfaces/containerStatus.interface.ts +++ b/src/common/interfaces/containerStatus.interface.ts @@ -3,4 +3,5 @@ export interface ContainerStatusSummary { type: 'init' | 'main'; state: string; reason?: string; + message?: string; } diff --git a/src/core/client.ts b/src/core/client.ts index 75fa9b7..1fdd69b 100644 --- a/src/core/client.ts +++ b/src/core/client.ts @@ -1,8 +1,6 @@ -import { promisify } from "util"; -import { gzip } from "zlib"; import axios from 'axios'; import { printErrorAndExit } from "../utils/utils"; -import { DiagnoseRequest, HelmReleaseInfo } from "../common/interfaces/client.interface"; +import { StackAnalysisPayload, StackAnalysisResponse } from "../common/interfaces/client.interface"; import { DEFAULT_API_URL } from "./config"; import { TokenStorage } from "./token-storage"; @@ -12,80 +10,27 @@ interface RefreshTokenResponseDto { expiresIn: number; } -const gzipAsync = promisify(gzip); - -export async function runFurtherDiagnosis(payload: DiagnoseRequest): Promise { - try { - const compressed = await gzipAsync(Buffer.from(JSON.stringify(payload))); - - const tokenStorage = new TokenStorage(); - const token = await tokenStorage.getValidAccessToken(); - if (!token) { - console.warn('No valid authentication token available. Please ensure cluster is registered.'); - } - - const response = await axios.post(`${DEFAULT_API_URL}/diagnose`, compressed, { - headers: { - 'Content-Type': 'application/json', - 'Content-Encoding': 'gzip', - Authorization: `Bearer ${token}`, - }, - }); - - return response.data; - } catch (error: any) { - printErrorAndExit(error.response?.data.message ?? 'External request failed'); - } -} - -export async function parsePodManifest(manifest: any): Promise { - try { - const tokenStorage = new TokenStorage(); - const token = await tokenStorage.getValidAccessToken(); - if (!token) { - console.warn('No valid authentication token available. Please ensure cluster is registered.'); - } - - const response = await fetch(`${DEFAULT_API_URL}/diagnose/parse-pod-manifest`, { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${token}`, - }, - body: JSON.stringify(manifest), - }); - - return await response.json() as HelmReleaseInfo; - } catch (error) { - printErrorAndExit('Error parsing pod manifest'); - throw error; // This will never execute but satisfies TypeScript - } -} - -export async function runStackAnalysis(compressedPayload: Buffer): Promise { +export async function runStackAnalysis(payload: StackAnalysisPayload): Promise { try { const tokenStorage = new TokenStorage(); - const token = await tokenStorage.getValidAccessToken(); - if (!token) { - console.warn('No valid authentication token available. Please ensure cluster is registered.'); - } - const response = await axios.post( - `${DEFAULT_API_URL}/diagnose/analyze-stack`, - compressedPayload, - { - headers: { - 'Content-Type': 'application/json', - 'Content-Encoding': 'gzip', - Authorization: `Bearer ${token}`, + return await tokenStorage.makeAuthenticatedRequest(async (token) => { + const response = await axios.post( + `${DEFAULT_API_URL}/daemon/analyze-stack`, + payload, + { + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}`, + }, }, - }, - ); + ); - return response.data.analysis || response.data; // depends on backend response shape + return response.data as StackAnalysisResponse; + }); } catch (error: any) { - printErrorAndExit(error.response?.data.message ?? 'Failed to analyze stack'); - throw error; // This will never execute but satisfies TypeScript + console.error(`❌ Failed to analyze stack: ${error.response?.data?.message || error.message}`); + return null; } } @@ -115,7 +60,6 @@ export async function getUserClusterTokens( orgId: string ): Promise { try { - const response = await axios.get( `${DEFAULT_API_URL}/auth/cluster/tokens?clusterId=${clusterId}&orgId=${orgId}`, { @@ -128,13 +72,13 @@ export async function getUserClusterTokens( return response.data; } catch (error: any) { console.error(`📡 getUserClusterTokens error:`, error.response?.data || error.message); - } + } } export async function getDaemonInfo(): Promise { try { const tokenStorage = new TokenStorage(); - + return await tokenStorage.makeAuthenticatedRequest(async (token) => { const response = await axios.get( `${DEFAULT_API_URL}/daemon/me`, @@ -147,38 +91,8 @@ export async function getDaemonInfo(): Promise { ); return response.data; }); - } catch (error: any) { - console.error(error.response?.data.message ?? 'Failed to get daemon info'); - return null; - } -} - -export async function reportPodFailure(failureData: { - podName: string; - namespace: string; - logs: string[]; - events?: string[]; - phase?: string; - containerState?: any; -}): Promise { - try { - const tokenStorage = new TokenStorage(); - - return await tokenStorage.makeAuthenticatedRequest(async (token) => { - const response = await axios.post( - `${DEFAULT_API_URL}/daemon/diagnose-pod`, - failureData, - { - headers: { - 'Content-Type': 'application/json', - Authorization: `Bearer ${token}`, - }, - } - ); - return response.data; - }); } catch (error: any) { - console.error(`❌ Failed to report pod failure: ${error.response?.data?.message || error.message}`); + console.error(error.response?.data.message ?? 'Failed to get daemon info'); return null; } } diff --git a/src/core/diagnosis.ts b/src/core/diagnosis.ts index d44e0e7..c601ca3 100644 --- a/src/core/diagnosis.ts +++ b/src/core/diagnosis.ts @@ -1,4 +1,4 @@ -import { CoreV1Event, V1Pod } from '@kubernetes/client-node'; +import { CoreV1Event, V1Pod, V1ContainerStatus } from '@kubernetes/client-node'; import chalk from 'chalk'; import { ContainerStatusSummary } from "../common/interfaces/containerStatus.interface"; @@ -7,6 +7,7 @@ import { getCoreV1 } from "./kube"; import { PodStatus } from '../common/interfaces/podstatus.interface'; import { parseContainerState, printErrorAndExit } from '../utils/utils'; import { StackComponent } from '../common/interfaces/client.interface'; +import { resolveHelmRelease } from './helm-release-resolver'; import rules from '../assets/rules.json' @@ -50,11 +51,11 @@ async function getPodStatus(podName: string, namespace: string): Promise { + initContainers.forEach((initContainer: V1ContainerStatus) => { containerStates.push(parseContainerState(initContainer, 'init')); }); - mainContainers.forEach((mainContainer) => { + mainContainers.forEach((mainContainer: V1ContainerStatus) => { containerStates.push(parseContainerState(mainContainer, 'main')); }); @@ -73,16 +74,54 @@ async function getPodStatus(podName: string, namespace: string): Promise { +export async function getPodEvents(podName: string, namespace: string, retryCount = 0): Promise { const coreV1 = getCoreV1(); + const MAX_RETRIES = 2; + const RETRY_DELAY_MS = 500; try { - const res = await coreV1.listNamespacedEvent({ namespace, limit: 10, timeoutSeconds: 10 }); + // Get the pod to find its owner references + const pod = await coreV1.readNamespacedPod({ name: podName, namespace }); + const ownerRefs = pod.metadata?.ownerReferences || []; + + // Collect event targets: pod + its owners (ReplicaSet, StatefulSet, etc.) + const targets = [podName, ...ownerRefs.map(ref => ref.name)]; + + // Fetch events for all targets in parallel + const eventPromises = targets.map(async (targetName) => { + try { + const res = await coreV1.listNamespacedEvent({ + namespace, + fieldSelector: `involvedObject.name=${targetName}`, + limit: 30, + timeoutSeconds: 10 + }); + return res.items; + } catch { + return []; + } + }); + + const allEventArrays = await Promise.all(eventPromises); + const allEvents: CoreV1Event[] = allEventArrays.flat(); - const events: CoreV1Event[] = res.items; + // If no events found and we haven't exhausted retries, wait and try again + // (Events may not have propagated to API yet) + if (allEvents.length === 0 && retryCount < MAX_RETRIES) { + await new Promise(resolve => setTimeout(resolve, RETRY_DELAY_MS)); + return getPodEvents(podName, namespace, retryCount + 1); + } + + // Dedupe by event UID, sort by time, take top 20 + const seenUids = new Set(); + const uniqueEvents = allEvents.filter(e => { + const uid = e.metadata?.uid || `${e.involvedObject?.name}-${e.reason}-${e.message}`; + if (seenUids.has(uid)) return false; + seenUids.add(uid); + return true; + }); - const filteredEvents = events - .filter((event) => event.involvedObject?.name === podName) + const sortedEvents = uniqueEvents .sort((a, b) => { const aTime = new Date(a.lastTimestamp || a.eventTime || '').getTime(); const bTime = new Date(b.lastTimestamp || b.eventTime || '').getTime(); @@ -90,13 +129,17 @@ export async function getPodEvents(podName: string, namespace: string): Promise< }) .slice(0, 20); - return filteredEvents.map((e) => e.message || '(no message)'); + return sortedEvents.map((e) => { + const type = e.type || 'Unknown'; + const reason = e.reason || ''; + const message = e.message || '(no message)'; + const source = e.involvedObject?.name !== podName ? `(${e.involvedObject?.kind})` : ''; + return `[${type}] ${reason}: ${message} ${source}`.trim(); + }); } catch (err) { - console.log(err); - - console.error(`\n ${chalk.red(`Error fetching events for pod ${podName}`)}`); - - printErrorAndExit(`Error fetching events for pod ${podName}`); + // Don't crash on event fetch failure - just return empty array + console.error(`⚠️ Failed to fetch events for pod ${podName}: ${err}`); + return []; } } @@ -233,8 +276,8 @@ export async function diagnoseStack(podName: string, namespace: string): Promise const startTime = performance.now(); try { - // Step 1: Extract Helm release - const releaseInfo = await extractHelmRelease(podName, namespace); + // Step 1: Resolve Helm release (hybrid: local labels + backend fallback) + const releaseInfo = await resolveHelmRelease(podName, namespace); if (!releaseInfo.releaseName || releaseInfo.confidence < 0.7) { // Single pod analysis @@ -274,53 +317,6 @@ export async function diagnoseStack(podName: string, namespace: string): Promise } } -/** - * Extracts Helm release information from a pod using local heuristics. - * @param podName - * @param namespace - * @returns - */ -async function extractHelmRelease(podName: string, namespace: string): Promise<{ releaseName: string; confidence: number }> { - const coreV1 = getCoreV1(); - - try { - const pod = await coreV1.readNamespacedPod({ name: podName, namespace }); - const labels = pod.metadata?.labels || {}; - const _annotations = pod.metadata?.annotations || {}; - - let releaseName = ''; - let confidence = 0; - - // Check Helm v3 labels (most common) - if (labels['app.kubernetes.io/managed-by'] === 'Helm') { - releaseName = labels['app.kubernetes.io/instance'] || ''; - confidence = 0.9; - } - // Check legacy Helm v2 labels - else if (labels['heritage'] === 'Tiller') { - releaseName = labels['release'] || ''; - confidence = 0.8; - } - // Check app labels that might indicate Helm deployment - else if (labels['helm.sh/chart']) { - releaseName = labels['app.kubernetes.io/instance'] || labels['app'] || ''; - confidence = 0.7; - } - // Fallback: try to infer from pod name patterns - else { - const podNameParts = podName.split('-'); - if (podNameParts.length >= 2) { - // Common pattern: release-name-component-hash - releaseName = podNameParts.slice(0, -2).join('-'); - confidence = 0.5; - } - } - - return { releaseName, confidence }; - } catch (error) { - return { releaseName: '', confidence: 0 }; - } -} /** * @@ -495,8 +491,8 @@ export async function getStackDataForBackend(podName: string, namespace: string) stackComponents?: { releaseName: string; confidence: number; components: any[] }; }> { try { - // Extract Helm release - const releaseInfo = await extractHelmRelease(podName, namespace); + // Resolve Helm release (hybrid: local labels + backend fallback) + const releaseInfo = await resolveHelmRelease(podName, namespace); // Always collect primary pod data const [status, events, logs] = await Promise.all([ diff --git a/src/core/helm-release-resolver.ts b/src/core/helm-release-resolver.ts new file mode 100644 index 0000000..6613e2e --- /dev/null +++ b/src/core/helm-release-resolver.ts @@ -0,0 +1,304 @@ +import { V1Pod } from '@kubernetes/client-node'; +import { getCoreV1 } from './kube'; +import { TokenStorage } from './token-storage'; +import axios from 'axios'; +import { DEFAULT_API_URL } from './config'; + +/** + * Evidence supporting the release detection + */ +export interface HelmReleaseEvidence { + labelFound?: string; // e.g., "app.kubernetes.io/instance=myapp" + annotationFound?: string; // e.g., "meta.helm.sh/release-name=myapp" + namingPattern?: string; // e.g., "pod name follows {release}-{component}-{hash} pattern" +} + +/** + * Result of Helm release resolution + */ +export interface HelmReleaseInfo { + releaseName: string; + confidence: number; + detectionMethod: 'label' | 'annotation' | 'naming_convention' | 'owner_reference' | 'none'; + evidence: HelmReleaseEvidence; +} + +/** + * Configuration for the resolver + */ +export interface ResolverConfig { + /** Minimum confidence to accept local result (default: 0.7) */ + localConfidenceThreshold: number; + /** Whether to use backend LLM fallback (default: true) */ + enableBackendFallback: boolean; + /** Timeout for backend calls in ms (default: 5000) */ + backendTimeoutMs: number; +} + +const DEFAULT_CONFIG: ResolverConfig = { + localConfidenceThreshold: 0.7, + enableBackendFallback: true, + backendTimeoutMs: 5000, +}; + +/** + * HelmReleaseResolver - Resolves Helm release names from pods + * + * Uses a hybrid approach: + * 1. First tries fast local label-based extraction + * 2. Falls back to backend LLM inference if confidence is low + * + * @example + * ```ts + * const resolver = new HelmReleaseResolver(); + * const release = await resolver.resolve('my-pod', 'default'); + * console.log(release.releaseName, release.confidence); + * ``` + */ +export class HelmReleaseResolver { + private config: ResolverConfig; + + constructor(config: Partial = {}) { + this.config = { ...DEFAULT_CONFIG, ...config }; + } + + /** + * Resolve Helm release name for a pod + */ + async resolve(podName: string, namespace: string): Promise { + // Step 1: Try local extraction first (fast, no network) + const localResult = await this.extractFromLabels(podName, namespace); + + if (localResult.confidence >= this.config.localConfidenceThreshold) { + return localResult; + } + + // Step 2: Fallback to backend LLM if enabled and local confidence is low + if (this.config.enableBackendFallback) { + const backendResult = await this.inferFromBackend(podName, namespace); + if (backendResult && backendResult.confidence > localResult.confidence) { + return backendResult; + } + } + + // Step 3: Return best local result (even if low confidence) + return localResult; + } + + /** + * Extract Helm release from pod labels (fast, local) + */ + private async extractFromLabels(podName: string, namespace: string): Promise { + try { + const coreV1 = getCoreV1(); + const pod = await coreV1.readNamespacedPod({ name: podName, namespace }); + return this.parseLabels(pod, podName); + } catch { + return this.fallbackFromPodName(podName); + } + } + + /** + * Parse Helm release info from pod labels and annotations + */ + private parseLabels(pod: V1Pod, podName: string): HelmReleaseInfo { + const labels = pod.metadata?.labels || {}; + const annotations = pod.metadata?.annotations || {}; + + // Check annotations first (meta.helm.sh/release-name is authoritative) + if (annotations['meta.helm.sh/release-name']) { + return { + releaseName: annotations['meta.helm.sh/release-name'], + confidence: 0.98, + detectionMethod: 'annotation', + evidence: { annotationFound: `meta.helm.sh/release-name=${annotations['meta.helm.sh/release-name']}` } + }; + } + + // Helm v3 (most common) - highest confidence + if (labels['app.kubernetes.io/managed-by'] === 'Helm') { + const releaseName = labels['app.kubernetes.io/instance'] || ''; + if (releaseName) { + return { + releaseName, + confidence: 0.95, + detectionMethod: 'label', + evidence: { labelFound: `app.kubernetes.io/instance=${releaseName}` } + }; + } + } + + // Helm v2 (legacy Tiller) + if (labels['heritage'] === 'Tiller') { + const releaseName = labels['release'] || ''; + if (releaseName) { + return { + releaseName, + confidence: 0.85, + detectionMethod: 'label', + evidence: { labelFound: `release=${releaseName}` } + }; + } + } + + // Chart label present (partial Helm metadata) + if (labels['helm.sh/chart']) { + const releaseName = labels['app.kubernetes.io/instance'] || labels['app'] || ''; + if (releaseName) { + return { + releaseName, + confidence: 0.75, + detectionMethod: 'label', + evidence: { labelFound: `helm.sh/chart=${labels['helm.sh/chart']}` } + }; + } + } + + // ArgoCD managed (common in GitOps) + if (labels['argocd.argoproj.io/instance']) { + return { + releaseName: labels['argocd.argoproj.io/instance'], + confidence: 0.8, + detectionMethod: 'label', + evidence: { labelFound: `argocd.argoproj.io/instance=${labels['argocd.argoproj.io/instance']}` } + }; + } + + // Flux managed + if (labels['helm.toolkit.fluxcd.io/name']) { + return { + releaseName: labels['helm.toolkit.fluxcd.io/name'], + confidence: 0.8, + detectionMethod: 'label', + evidence: { labelFound: `helm.toolkit.fluxcd.io/name=${labels['helm.toolkit.fluxcd.io/name']}` } + }; + } + + // App label as last resort before pod name inference + if (labels['app'] || labels['app.kubernetes.io/name']) { + const releaseName = labels['app'] || labels['app.kubernetes.io/name']; + return { + releaseName, + confidence: 0.6, + detectionMethod: 'label', + evidence: { labelFound: `app=${releaseName}` } + }; + } + + return this.fallbackFromPodName(podName); + } + + /** + * Infer release name from pod naming pattern + */ + private fallbackFromPodName(podName: string): HelmReleaseInfo { + const parts = podName.split('-'); + + // Pattern: release-name-component-replicaset-pod (e.g., nginx-app-web-7d4f8b9c6-x2k4p) + if (parts.length >= 4) { + const releaseName = parts.slice(0, -2).join('-'); + return { + releaseName, + confidence: 0.4, + detectionMethod: 'naming_convention', + evidence: { namingPattern: 'pod name follows {release}-{component}-{hash} pattern' } + }; + } + + // Pattern: release-name-hash (e.g., nginx-7d4f8b9c6) + if (parts.length >= 2) { + const releaseName = parts.slice(0, -1).join('-'); + return { + releaseName, + confidence: 0.3, + detectionMethod: 'naming_convention', + evidence: { namingPattern: 'pod name follows {release}-{hash} pattern' } + }; + } + + return { + releaseName: podName, + confidence: 0.1, + detectionMethod: 'none', + evidence: {} + }; + } + + /** + * Call backend LLM to infer Helm release (slower, more accurate for edge cases) + */ + private async inferFromBackend(podName: string, namespace: string): Promise { + try { + const coreV1 = getCoreV1(); + const pod = await coreV1.readNamespacedPod({ name: podName, namespace }); + + // Build manifest matching backend's ParsePodManifestDto + const manifest = { + metadata: { + name: pod.metadata?.name, + labels: pod.metadata?.labels || {}, + annotations: pod.metadata?.annotations || {}, + ownerReferences: pod.metadata?.ownerReferences?.map(ref => ref.name) || [], + }, + spec: { + containers: pod.spec?.containers?.map(c => ({ name: c.name, image: c.image })) || [], + }, + }; + + const tokenStorage = new TokenStorage(); + const result = await tokenStorage.makeAuthenticatedRequest(async (token) => { + const response = await axios.post( + `${DEFAULT_API_URL}/daemon/parse-manifest`, + manifest, + { + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}`, + }, + timeout: this.config.backendTimeoutMs, + } + ); + return response.data; + }); + + if (result?.releaseName) { + // Backend returns the full HelmReleaseInfo structure + return { + releaseName: result.releaseName, + confidence: result.confidence ?? 0.8, + detectionMethod: result.detectionMethod ?? 'none', + evidence: result.evidence ?? {} + }; + } + + return null; + } catch { + // Silently fail - local extraction is the fallback + return null; + } + } +} + +/** + * Singleton instance for convenience + */ +let defaultResolver: HelmReleaseResolver | null = null; + +export function getHelmReleaseResolver(config?: Partial): HelmReleaseResolver { + if (!defaultResolver || config) { + defaultResolver = new HelmReleaseResolver(config); + } + return defaultResolver; +} + +/** + * Quick helper for one-off resolution + */ +export async function resolveHelmRelease( + podName: string, + namespace: string, + config?: Partial +): Promise { + const resolver = getHelmReleaseResolver(config); + return resolver.resolve(podName, namespace); +} diff --git a/src/core/watchdog.ts b/src/core/watchdog.ts index f88d888..9111e50 100644 --- a/src/core/watchdog.ts +++ b/src/core/watchdog.ts @@ -18,7 +18,8 @@ import { import { TokenStorage } from './token-storage'; import { initKube } from './kube'; import { printErrorAndExit } from '../utils/utils'; -import { reportPodFailure } from './client'; +import { runStackAnalysis } from './client'; +import { StackAnalysisPayload, StackComponent } from '../common/interfaces/client.interface'; @@ -1090,71 +1091,56 @@ export class KubernetesPodWatchdog extends EventEmitter { } /** - * Report failure to backend with comprehensive stack data aggregated into single pod format - * + * Report failure to backend with comprehensive stack analysis + * * @private */ private async reportFailureToBackend(failureEvent: PodFailureEvent): Promise { try { - // Collecting stack data for backend reporting - // Collect comprehensive stack data for backend reporting const stackData = await getStackDataForBackend( - failureEvent.metadata.podName, + failureEvent.metadata.podName, failureEvent.metadata.namespace ); - - let aggregatedLogs = stackData.primaryPod.logs; - let aggregatedEvents = stackData.primaryPod.events; - - // If we have stack components, aggregate all their logs and events + + // Build components array for stack analysis + const components: StackComponent[] = []; + + // Add primary pod as first component + components.push({ + podName: stackData.primaryPod.name, + status: { + phase: failureEvent.podSnapshot.phase, + containerStates: stackData.primaryPod.containerStates || [] + }, + events: stackData.primaryPod.events, + logs: stackData.primaryPod.logs + }); + + // Add stack components if available if (stackData.stackComponents) { - const totalComponents = stackData.stackComponents.components.length; - - // Aggregate logs from all stack components - const allStackLogs: string[] = []; - const allStackEvents: string[] = []; - - // Add header for stack context (only in logs, not events) - allStackLogs.push(`=== STACK ANALYSIS: ${stackData.stackComponents.releaseName} (${totalComponents} components) ===`); - - // Add primary pod data first - allStackLogs.push(`--- PRIMARY POD: ${stackData.primaryPod.name} ---`); - allStackLogs.push(...stackData.primaryPod.logs); - allStackEvents.push(...stackData.primaryPod.events); - - // Add data from all other stack components stackData.stackComponents.components.forEach(comp => { if (comp.podName !== stackData.primaryPod.name) { - allStackLogs.push(`--- COMPONENT: ${comp.podName} ---`); - allStackLogs.push(...comp.logs); - // Only add actual Kubernetes events, not custom messages - allStackEvents.push(...comp.events); + components.push({ + podName: comp.podName, + status: comp.status, + events: comp.events, + logs: comp.logs + }); } }); - - aggregatedLogs = allStackLogs; - aggregatedEvents = allStackEvents; - - // Stack data collected - no verbose logging } - // Prepare failure data in existing API format but with comprehensive stack data - const failureData = { - podName: failureEvent.metadata.podName, + // Build the stack analysis payload + const payload: StackAnalysisPayload = { + primaryPod: failureEvent.metadata.podName, + helmRelease: stackData.stackComponents?.releaseName || failureEvent.metadata.podName, namespace: failureEvent.metadata.namespace, - logs: aggregatedLogs.length > 0 ? aggregatedLogs : ['No logs available'], - events: aggregatedEvents.length > 0 ? aggregatedEvents : ['No events available'], - phase: failureEvent.podSnapshot.phase, - containerState: { - phase: failureEvent.podSnapshot.phase, - containerStates: stackData.primaryPod.containerStates || [] - } + timestamp: failureEvent.metadata.timestamp.toISOString(), + components }; - await reportPodFailure(failureData); - - // Backend reporting completed + await runStackAnalysis(payload); } catch (error) { console.error(`❌ Failed to report failure to backend: ${error}`); } diff --git a/src/utils/utils.ts b/src/utils/utils.ts index 560f4ff..6ff45c5 100644 --- a/src/utils/utils.ts +++ b/src/utils/utils.ts @@ -59,6 +59,7 @@ export function parseContainerState( type, state: `Waiting: ${state.waiting.reason || 'Unknown'}`, reason: state.waiting.reason, + message: state.waiting.message, }; } @@ -68,6 +69,7 @@ export function parseContainerState( type, state: `Terminated: ${state.terminated.reason || 'Unknown'}`, reason: state.terminated.reason, + message: state.terminated.message, }; }