diff --git a/ChangeLog.md b/ChangeLog.md deleted file mode 100644 index f0cf6fa..0000000 --- a/ChangeLog.md +++ /dev/null @@ -1,12 +0,0 @@ -# FTIO ChangeLog - -## Version 0.0.2 -- Set the default plot unit to Bytes or Bytes/s rather than MB or MB/s -- Adjusted the plot script to automatically detect the best unit for the y-axis and scale the values accordingly - - -## Version 0.0.1 - -- Speed-up with Msgpack -- Added autocorrelation to FTIO -- Added 4 new outlier detection methods \ No newline at end of file diff --git a/README.md b/README.md index a668ea9..8fb4a50 100644 --- a/README.md +++ b/README.md @@ -348,6 +348,7 @@ Distributed under the BSD 3-Clause License. See [LICENCE](./LICENSE) for more in Authors: - Ahmad Tarraf +- Amine Aherbil This work is a result of cooperation between the Technical University of Darmstadt and INRIA in the scope of the [EuroHPC ADMIRE project](https://admire-eurohpc.eu/). diff --git a/ftio/freq/_dft.py b/ftio/freq/_dft.py index 30f39be..ab9ccff 100644 --- a/ftio/freq/_dft.py +++ b/ftio/freq/_dft.py @@ -79,6 +79,8 @@ def dft_fast(b: np.ndarray) -> np.ndarray: - np.ndarray, DFT of the input signal. """ N = len(b) + if N == 0: + return np.array([]) X = np.repeat(complex(0, 0), N) # np.zeros(N) for k in range(0, N): for n in range(0, N): @@ -98,6 +100,8 @@ def numpy_dft(b: np.ndarray) -> np.ndarray: Returns: - np.ndarray, DFT of the input signal. """ + if len(b) == 0: + return np.array([]) return np.fft.fft(b) diff --git a/ftio/freq/_dft_workflow.py b/ftio/freq/_dft_workflow.py index 381e44f..d52d0e6 100644 --- a/ftio/freq/_dft_workflow.py +++ b/ftio/freq/_dft_workflow.py @@ -46,6 +46,10 @@ def ftio_dft( - analysis_figures (AnalysisFigures): Data and plot figures. - share (SharedSignalData): Contains shared information, including sampled bandwidth and total bytes. """ + # Suppress numpy warnings for empty array operations + import warnings + warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy') + #! Default values for variables share = SharedSignalData() prediction = Prediction(args.transformation) @@ -75,42 +79,66 @@ def ftio_dft( n = len(b_sampled) frequencies = args.freq * np.arange(0, n) / n X = dft(b_sampled) - X = X * np.exp( - -2j * np.pi * frequencies * time_stamps[0] - ) # Correct phase offset due to start time t0 + + # Safety check for empty time_stamps array + if len(time_stamps) > 0: + X = X * np.exp( + -2j * np.pi * frequencies * time_stamps[0] + ) # Correct phase offset due to start time t0 + # If time_stamps is empty, skip phase correction + amp = abs(X) phi = np.arctan2(X.imag, X.real) conf = np.zeros(len(amp)) # welch(bandwidth,freq) #! Find the dominant frequency - (dominant_index, conf[1 : int(n / 2) + 1], outlier_text) = outlier_detection( - amp, frequencies, args - ) + # Safety check for empty arrays + if n > 0: + (dominant_index, conf[1 : int(n / 2) + 1], outlier_text) = outlier_detection( + amp, frequencies, args + ) - # Ignore DC offset - conf[0] = np.inf - if n % 2 == 0: - conf[int(n / 2) + 1 :] = np.flip(conf[1 : int(n / 2)]) + # Ignore DC offset + conf[0] = np.inf + if n % 2 == 0: + conf[int(n / 2) + 1 :] = np.flip(conf[1 : int(n / 2)]) + else: + conf[int(n / 2) + 1 :] = np.flip(conf[1 : int(n / 2) + 1]) else: - conf[int(n / 2) + 1 :] = np.flip(conf[1 : int(n / 2) + 1]) + # Handle empty data case + dominant_index = np.array([]) + outlier_text = "No data available for outlier detection" #! Assign data - prediction.dominant_freq = frequencies[dominant_index] - prediction.conf = conf[dominant_index] - if args.periodicity_detection is not None: - prediction.periodicity = conf[dominant_index] - prediction.amp = amp[dominant_index] - prediction.phi = phi[dominant_index] - prediction.t_start = time_stamps[0] - prediction.t_end = time_stamps[-1] + if n > 0 and len(dominant_index) > 0: + prediction.dominant_freq = frequencies[dominant_index] + prediction.conf = conf[dominant_index] + if args.periodicity_detection is not None: + prediction.periodicity = conf[dominant_index] + prediction.amp = amp[dominant_index] + prediction.phi = phi[dominant_index] + else: + # Handle empty data case + prediction.dominant_freq = np.array([]) + prediction.conf = np.array([]) + prediction.amp = np.array([]) + prediction.phi = np.array([]) + + # Safety check for empty time_stamps + if len(time_stamps) > 0: + prediction.t_start = time_stamps[0] + prediction.t_end = time_stamps[-1] + else: + prediction.t_start = 0.0 + prediction.t_end = 0.0 prediction.freq = args.freq prediction.ranks = ranks prediction.total_bytes = total_bytes prediction.n_samples = n #! Save up to n_freq from the top candidates - if args.n_freq > 0: + if args.n_freq > 0 and n > 0: arr = amp[0 : int(np.ceil(n / 2))] top_candidates = np.argsort(-arr) # from max to min n_freq = int(min(len(arr), args.n_freq)) @@ -124,7 +152,11 @@ def ftio_dft( periodicity_score = new_periodicity_scores(amp, b_sampled, prediction, args) - t_sampled = time_stamps[0] + np.arange(0, n) * 1 / args.freq + # Safety check for empty time_stamps + if len(time_stamps) > 0 and args.freq > 0: + t_sampled = time_stamps[0] + np.arange(0, n) * 1 / args.freq + else: + t_sampled = np.arange(0, n) * (1 / args.freq if args.freq > 0 else 1.0) #! Fourier fit if set if args.fourier_fit: fourier_fit(args, prediction, analysis_figures, b_sampled, t_sampled) diff --git a/ftio/freq/discretize.py b/ftio/freq/discretize.py index 7780945..39a7e3c 100644 --- a/ftio/freq/discretize.py +++ b/ftio/freq/discretize.py @@ -38,6 +38,9 @@ def sample_data( Raises: RuntimeError: If no data is found in the sampled bandwidth. """ + if len(t) == 0: + return np.empty(0), 0, " " + if args is not None: freq = args.freq memory_limit = args.memory_limit * 1000**3 # args.memory_limit GB @@ -53,9 +56,6 @@ def sample_data( f"Frequency step: {1/ duration if duration > 0 else 0:.3e} Hz\n" ) - if len(t) == 0: - return np.empty(0), 0, " " - # Calculate recommended frequency: if freq == -1: # Auto-detect frequency based on smallest time delta diff --git a/ftio/freq/time_window.py b/ftio/freq/time_window.py index 0ec3e82..86a3a2f 100644 --- a/ftio/freq/time_window.py +++ b/ftio/freq/time_window.py @@ -33,12 +33,20 @@ def data_in_time_window( indices = np.where(time_b >= args.ts) time_b = time_b[indices] bandwidth = bandwidth[indices] - total_bytes = int( - np.sum(bandwidth * (np.concatenate([time_b[1:], time_b[-1:]]) - time_b)) - ) - text += f"[green]Start time set to {args.ts:.2f}[/] s\n" + + if len(time_b) > 0: + total_bytes = int( + np.sum(bandwidth * (np.concatenate([time_b[1:], time_b[-1:]]) - time_b)) + ) + text += f"[green]Start time set to {args.ts:.2f}[/] s\n" + else: + total_bytes = 0 + text += f"[red]Warning: No data after start time {args.ts:.2f}[/] s\n" else: - text += f"Start time: [cyan]{time_b[0]:.2f}[/] s \n" + if len(time_b) > 0: + text += f"Start time: [cyan]{time_b[0]:.2f}[/] s \n" + else: + text += f"[red]Warning: No data available[/]\n" # shorten data according to end time if args.te: @@ -50,7 +58,10 @@ def data_in_time_window( ) text += f"[green]End time set to {args.te:.2f}[/] s\n" else: - text += f"End time: [cyan]{time_b[-1]:.2f}[/] s\n" + if len(time_b) > 0: + text += f"End time: [cyan]{time_b[-1]:.2f}[/] s\n" + else: + text += f"[red]Warning: No data in time window[/]\n" # ignored bytes ignored_bytes = ignored_bytes - total_bytes diff --git a/ftio/parse/args.py b/ftio/parse/args.py index b184da5..acb5c9e 100644 --- a/ftio/parse/args.py +++ b/ftio/parse/args.py @@ -257,6 +257,14 @@ def parse_args(argv: list, name="") -> argparse.Namespace: help="specifies the number of hits needed to adapt the time window. A hit occurs once a dominant frequency is found", ) parser.set_defaults(hits=3) + parser.add_argument( + "--algorithm", + dest="algorithm", + type=str, + choices=["adwin", "cusum", "ph"], + help="change point detection algorithm to use. 'adwin' (default) uses Adaptive Windowing with automatic window sizing and mathematical guarantees. 'cusum' uses Cumulative Sum detection for rapid change detection. 'ph' uses Page-Hinkley test for sequential change point detection.", + ) + parser.set_defaults(algorithm="adwin") parser.add_argument( "-v", "--verbose", diff --git a/ftio/prediction/change_point_detection.py b/ftio/prediction/change_point_detection.py new file mode 100644 index 0000000..a096c81 --- /dev/null +++ b/ftio/prediction/change_point_detection.py @@ -0,0 +1,1064 @@ +"""Change point detection algorithms for FTIO online predictor.""" + +from __future__ import annotations + +import numpy as np +import math +from typing import List, Tuple, Optional, Dict, Any +from multiprocessing import Lock +from rich.console import Console +from ftio.prediction.helper import get_dominant +from ftio.freq.prediction import Prediction + + +class ChangePointDetector: + """ADWIN detector for I/O pattern changes with automatic window sizing.""" + + def __init__(self, delta: float = 0.05, shared_resources=None, show_init: bool = True, verbose: bool = False): + """Initialize ADWIN detector with confidence parameter delta (default: 0.05).""" + self.delta = min(max(delta, 1e-12), 1 - 1e-12) + self.shared_resources = shared_resources + self.verbose = verbose + + if shared_resources and not shared_resources.adwin_initialized.value: + if hasattr(shared_resources, 'adwin_lock'): + with shared_resources.adwin_lock: + if not shared_resources.adwin_initialized.value: + shared_resources.adwin_frequencies[:] = [] + shared_resources.adwin_timestamps[:] = [] + shared_resources.adwin_total_samples.value = 0 + shared_resources.adwin_change_count.value = 0 + shared_resources.adwin_last_change_time.value = 0.0 + shared_resources.adwin_initialized.value = True + else: + if not shared_resources.adwin_initialized.value: + shared_resources.adwin_frequencies[:] = [] + shared_resources.adwin_timestamps[:] = [] + shared_resources.adwin_total_samples.value = 0 + shared_resources.adwin_change_count.value = 0 + shared_resources.adwin_last_change_time.value = 0.0 + shared_resources.adwin_initialized.value = True + + if shared_resources is None: + self.frequencies: List[float] = [] + self.timestamps: List[float] = [] + self.total_samples = 0 + self.change_count = 0 + self.last_change_time: Optional[float] = None + + self.last_change_point: Optional[int] = None + self.min_window_size = 2 + self.console = Console() + + if show_init: + self.console.print(f"[green][ADWIN] Initialized with δ={delta:.3f} " + f"({(1-delta)*100:.0f}% confidence) " + f"[Process-safe: {shared_resources is not None}][/]") + + def _get_frequencies(self): + if self.shared_resources: + return self.shared_resources.adwin_frequencies + return self.frequencies + + def _get_timestamps(self): + if self.shared_resources: + return self.shared_resources.adwin_timestamps + return self.timestamps + + def _get_total_samples(self): + if self.shared_resources: + return self.shared_resources.adwin_total_samples.value + return self.total_samples + + def _set_total_samples(self, value): + if self.shared_resources: + self.shared_resources.adwin_total_samples.value = value + else: + self.total_samples = value + + def _get_change_count(self): + if self.shared_resources: + return self.shared_resources.adwin_change_count.value + return self.change_count + + def _set_change_count(self, value): + if self.shared_resources: + self.shared_resources.adwin_change_count.value = value + else: + self.change_count = value + + def _get_last_change_time(self): + if self.shared_resources: + return self.shared_resources.adwin_last_change_time.value if self.shared_resources.adwin_last_change_time.value > 0 else None + return self.last_change_time + + def _set_last_change_time(self, value): + if self.shared_resources: + self.shared_resources.adwin_last_change_time.value = value if value is not None else 0.0 + else: + self.last_change_time = value + + def _reset_window(self): + frequencies = self._get_frequencies() + timestamps = self._get_timestamps() + + if self.shared_resources: + del frequencies[:] + del timestamps[:] + self._set_total_samples(0) + self._set_last_change_time(None) + else: + self.frequencies.clear() + self.timestamps.clear() + self._set_total_samples(0) + self._set_last_change_time(None) + + self.console.print("[dim yellow][ADWIN] Window cleared: No frequency data to analyze[/]") + + def add_prediction(self, prediction: Prediction, timestamp: float) -> Optional[Tuple[int, float]]: + + freq = get_dominant(prediction) + + if np.isnan(freq) or freq <= 0: + self.console.print("[yellow][ADWIN] No frequency found - resetting window history[/]") + self._reset_window() + return None + + if self.shared_resources and hasattr(self.shared_resources, 'adwin_lock'): + with self.shared_resources.adwin_lock: + return self._add_prediction_synchronized(prediction, timestamp, freq) + else: + return self._add_prediction_local(prediction, timestamp, freq) + + def _add_prediction_synchronized(self, prediction: Prediction, timestamp: float, freq: float) -> Optional[Tuple[int, float]]: + frequencies = self._get_frequencies() + timestamps = self._get_timestamps() + + frequencies.append(freq) + timestamps.append(timestamp) + self._set_total_samples(self._get_total_samples() + 1) + + if len(frequencies) < self.min_window_size: + return None + + change_point = self._detect_change() + + if change_point is not None: + exact_change_timestamp = timestamps[change_point] + + self._process_change_point(change_point) + self._set_change_count(self._get_change_count() + 1) + + return (change_point, exact_change_timestamp) + + return None + + def _add_prediction_local(self, prediction: Prediction, timestamp: float, freq: float) -> Optional[Tuple[int, float]]: + frequencies = self._get_frequencies() + timestamps = self._get_timestamps() + + frequencies.append(freq) + timestamps.append(timestamp) + self._set_total_samples(self._get_total_samples() + 1) + + if len(frequencies) < self.min_window_size: + return None + + change_point = self._detect_change() + + if change_point is not None: + exact_change_timestamp = timestamps[change_point] + + self._process_change_point(change_point) + self._set_change_count(self._get_change_count() + 1) + + return (change_point, exact_change_timestamp) + + return None + + def _detect_change(self) -> Optional[int]: + + frequencies = self._get_frequencies() + timestamps = self._get_timestamps() + n = len(frequencies) + + if n < 2 * self.min_window_size: + return None + + for cut in range(self.min_window_size, n - self.min_window_size + 1): + if self._test_cut_point(cut): + self.console.print(f"[blue][ADWIN] Change detected at position {cut}/{n}, " + f"time={timestamps[cut]:.3f}s[/]") + return cut + + return None + + def _test_cut_point(self, cut: int) -> bool: + + frequencies = self._get_frequencies() + n = len(frequencies) + + left_data = frequencies[:cut] + n0 = len(left_data) + mean0 = np.mean(left_data) + + right_data = frequencies[cut:] + n1 = len(right_data) + mean1 = np.mean(right_data) + + if n0 <= 0 or n1 <= 0: + return False + + n_harmonic = (n0 * n1) / (n0 + n1) + + try: + + confidence_term = math.log(2.0 / self.delta) / (2.0 * n_harmonic) + threshold = math.sqrt(2.0 * confidence_term) + + except (ValueError, ZeroDivisionError): + threshold = 0.05 + + mean_diff = abs(mean1 - mean0) + + if self.verbose: + self.console.print(f"[dim blue][ADWIN DEBUG] Cut={cut}:[/]") + self.console.print(f" [dim]• Left window: {n0} samples, mean={mean0:.3f}Hz[/]") + self.console.print(f" [dim]• Right window: {n1} samples, mean={mean1:.3f}Hz[/]") + self.console.print(f" [dim]• Mean difference: |{mean1:.3f} - {mean0:.3f}| = {mean_diff:.3f}[/]") + self.console.print(f" [dim]• Harmonic mean: {n_harmonic:.1f}[/]") + self.console.print(f" [dim]• Confidence term: log(2/{self.delta}) / (2×{n_harmonic:.1f}) = {confidence_term:.6f}[/]") + self.console.print(f" [dim]• Threshold: √(2×{confidence_term:.6f}) = {threshold:.3f}[/]") + self.console.print(f" [dim]• Test: {mean_diff:.3f} > {threshold:.3f} ? {'CHANGE!' if mean_diff > threshold else 'No change'}[/]") + + return mean_diff > threshold + + def _process_change_point(self, change_point: int): + + frequencies = self._get_frequencies() + timestamps = self._get_timestamps() + + self.last_change_point = change_point + change_time = timestamps[change_point] + self._set_last_change_time(change_time) + + old_window_size = len(frequencies) + old_freq = np.mean(frequencies[:change_point]) if change_point > 0 else 0 + + if self.shared_resources: + del frequencies[:change_point] + del timestamps[:change_point] + new_frequencies = frequencies + new_timestamps = timestamps + else: + self.frequencies = frequencies[change_point:] + self.timestamps = timestamps[change_point:] + new_frequencies = self.frequencies + new_timestamps = self.timestamps + + new_window_size = len(new_frequencies) + new_freq = np.mean(new_frequencies) if new_frequencies else 0 + + freq_change = abs(new_freq - old_freq) / old_freq * 100 if old_freq > 0 else 0 + time_span = new_timestamps[-1] - new_timestamps[0] if len(new_timestamps) > 1 else 0 + + self.console.print(f"[green][ADWIN] Window adapted: " + f"{old_window_size} → {new_window_size} samples[/]") + self.console.print(f"[green][ADWIN] Frequency shift: " + f"{old_freq:.3f} → {new_freq:.3f} Hz ({freq_change:.1f}%)[/]") + self.console.print(f"[green][ADWIN] New window span: {time_span:.2f} seconds[/]") + + def get_adaptive_start_time(self, current_prediction: Prediction) -> float: + + timestamps = self._get_timestamps() + + if len(timestamps) == 0: + return current_prediction.t_start + + last_change_time = self._get_last_change_time() + if last_change_time is not None: + exact_change_start = last_change_time + + min_window = 0.5 + max_lookback = 10.0 + + window_span = current_prediction.t_end - exact_change_start + + if window_span < min_window: + adaptive_start = max(0, current_prediction.t_end - min_window) + self.console.print(f"[yellow][ADWIN] Change point too recent, using min window: " + f"{adaptive_start:.6f}s[/]") + elif window_span > max_lookback: + adaptive_start = max(0, current_prediction.t_end - max_lookback) + self.console.print(f"[yellow][ADWIN] Change point too old, using max lookback: " + f"{adaptive_start:.6f}s[/]") + else: + adaptive_start = exact_change_start + self.console.print(f"[green][ADWIN] Using EXACT change point timestamp: " + f"{adaptive_start:.6f}s (window span: {window_span:.3f}s)[/]") + + return adaptive_start + + window_start = timestamps[0] + + min_start = current_prediction.t_end - 10.0 + max_start = current_prediction.t_end - 0.5 + + adaptive_start = max(min_start, min(window_start, max_start)) + + return adaptive_start + + def get_window_stats(self) -> Dict[str, Any]: + """Get current ADWIN window statistics for debugging and logging.""" + frequencies = self._get_frequencies() + timestamps = self._get_timestamps() + + if not frequencies: + return { + "size": 0, "mean": 0.0, "std": 0.0, + "range": [0.0, 0.0], "time_span": 0.0, + "total_samples": self._get_total_samples(), + "change_count": self._get_change_count() + } + + return { + "size": len(frequencies), + "mean": np.mean(frequencies), + "std": np.std(frequencies), + "range": [float(np.min(frequencies)), float(np.max(frequencies))], + "time_span": float(timestamps[-1] - timestamps[0]) if len(timestamps) > 1 else 0.0, + "total_samples": self._get_total_samples(), + "change_count": self._get_change_count() + } + + def should_adapt_window(self) -> bool: + """Check if window adaptation should be triggered.""" + return self.last_change_point is not None + + def log_change_point(self, counter: int, old_freq: float, new_freq: float) -> str: + + last_change_time = self._get_last_change_time() + if last_change_time is None: + return "" + + freq_change_pct = abs(new_freq - old_freq) / old_freq * 100 if old_freq > 0 else 0 + stats = self.get_window_stats() + + log_msg = ( + f"[red bold][CHANGE_POINT] t_s={last_change_time:.3f} sec[/]\n" + f"[purple][PREDICTOR] (#{counter}):[/][yellow] " + f"ADWIN detected pattern change: {old_freq:.3f} → {new_freq:.3f} Hz " + f"({freq_change_pct:.1f}% change)[/]\n" + f"[purple][PREDICTOR] (#{counter}):[/][yellow] " + f"Adaptive window: {stats['size']} samples, " + f"span={stats['time_span']:.1f}s, " + f"changes={stats['change_count']}/{stats['total_samples']}[/]\n" + f"[dim blue]ADWIN ANALYSIS: Statistical significance detected using Hoeffding bounds[/]\n" + f"[dim blue]Window split analysis found mean difference > confidence threshold[/]\n" + f"[dim blue]Confidence level: {(1-self.delta)*100:.0f}% (δ={self.delta:.3f})[/]" + ) + + + self.last_change_point = None + + return log_msg + + def get_change_point_time(self, shared_resources=None) -> Optional[float]: + + return self._get_last_change_time() + +def detect_pattern_change_adwin(shared_resources, current_prediction: Prediction, + detector: ChangePointDetector, counter: int) -> Tuple[bool, Optional[str], float]: + + change_point = detector.add_prediction(current_prediction, current_prediction.t_end) + + if change_point is not None: + change_idx, change_time = change_point + + current_freq = get_dominant(current_prediction) + + old_freq = current_freq + frequencies = detector._get_frequencies() + if len(frequencies) > 1: + window_stats = detector.get_window_stats() + old_freq = max(0.1, window_stats["mean"] * 0.9) + + log_msg = detector.log_change_point(counter, old_freq, current_freq) + + new_start_time = detector.get_adaptive_start_time(current_prediction) + + try: + from ftio.prediction.online_analysis import get_socket_logger + logger = get_socket_logger() + logger.send_log("change_point", "ADWIN Change Point Detected", { + 'exact_time': change_time, + 'old_freq': old_freq, + 'new_freq': current_freq, + 'adaptive_start': new_start_time, + 'counter': counter + }) + except ImportError: + pass + + return True, log_msg, new_start_time + + return False, None, current_prediction.t_start + + +class CUSUMDetector: + """Adaptive-Variance CUSUM detector with variance-based threshold adaptation.""" + + def __init__(self, window_size: int = 50, shared_resources=None, show_init: bool = True, verbose: bool = False): + """Initialize AV-CUSUM detector with rolling window size (default: 50).""" + self.window_size = window_size + self.shared_resources = shared_resources + self.show_init = show_init + self.verbose = verbose + + self.sum_pos = 0.0 + self.sum_neg = 0.0 + self.reference = None + self.initialized = False + + self.adaptive_threshold = 0.0 + self.adaptive_drift = 0.0 + self.rolling_std = 0.0 + self.frequency_buffer = [] + + self.console = Console() + + def _update_adaptive_parameters(self, freq: float): + """Calculate thresholds automatically from data standard deviation.""" + import numpy as np + + if self.shared_resources and hasattr(self.shared_resources, 'cusum_frequencies'): + if hasattr(self.shared_resources, 'cusum_lock'): + with self.shared_resources.cusum_lock: + all_freqs = list(self.shared_resources.cusum_frequencies) + recent_freqs = all_freqs[-self.window_size-1:-1] if len(all_freqs) > 1 else [] + else: + all_freqs = list(self.shared_resources.cusum_frequencies) + recent_freqs = all_freqs[-self.window_size-1:-1] if len(all_freqs) > 1 else [] + else: + self.frequency_buffer.append(freq) + if len(self.frequency_buffer) > self.window_size: + self.frequency_buffer.pop(0) + recent_freqs = self.frequency_buffer[:-1] if len(self.frequency_buffer) > 1 else [] + + if self.verbose: + self.console.print(f"[dim magenta][CUSUM DEBUG] Buffer for σ calculation (excluding current): {[f'{f:.3f}' for f in recent_freqs]} (len={len(recent_freqs)})[/]") + + if len(recent_freqs) >= 3: + freqs = np.array(recent_freqs) + self.rolling_std = np.std(freqs) + + + std_factor = max(self.rolling_std, 0.01) + + self.adaptive_threshold = 2.0 * std_factor + self.adaptive_drift = 0.5 * std_factor + + if self.verbose: + self.console.print(f"[dim cyan][CUSUM] σ={self.rolling_std:.3f}, " + f"h_t={self.adaptive_threshold:.3f} (2σ threshold), " + f"k_t={self.adaptive_drift:.3f} (0.5σ drift)[/]") + + def _reset_cusum_state(self): + """Reset CUSUM state when no frequency is detected.""" + self.sum_pos = 0.0 + self.sum_neg = 0.0 + self.reference = None + self.initialized = False + + self.frequency_buffer.clear() + self.rolling_std = 0.0 + self.adaptive_threshold = 0.0 + self.adaptive_drift = 0.0 + + if self.shared_resources: + if hasattr(self.shared_resources, 'cusum_lock'): + with self.shared_resources.cusum_lock: + del self.shared_resources.cusum_frequencies[:] + del self.shared_resources.cusum_timestamps[:] + else: + del self.shared_resources.cusum_frequencies[:] + del self.shared_resources.cusum_timestamps[:] + + self.console.print("[dim yellow][CUSUM] State cleared: Starting fresh when frequency resumes[/]") + + def add_frequency(self, freq: float, timestamp: float = None) -> Tuple[bool, Dict[str, Any]]: + + if np.isnan(freq) or freq <= 0: + self.console.print("[yellow][AV-CUSUM] No frequency found - resetting algorithm state[/]") + self._reset_cusum_state() + return False, {} + + if self.shared_resources: + if hasattr(self.shared_resources, 'cusum_lock'): + with self.shared_resources.cusum_lock: + self.shared_resources.cusum_frequencies.append(freq) + self.shared_resources.cusum_timestamps.append(timestamp or 0.0) + else: + self.shared_resources.cusum_frequencies.append(freq) + self.shared_resources.cusum_timestamps.append(timestamp or 0.0) + + self._update_adaptive_parameters(freq) + + if not self.initialized: + min_init_samples = 3 + if self.shared_resources and len(self.shared_resources.cusum_frequencies) >= min_init_samples: + first_freqs = list(self.shared_resources.cusum_frequencies)[:min_init_samples] + self.reference = np.mean(first_freqs) + self.initialized = True + if self.show_init: + self.console.print(f"[yellow][AV-CUSUM] Reference established: {self.reference:.3f} Hz " + f"(from first {min_init_samples} observations: {[f'{f:.3f}' for f in first_freqs]})[/]") + else: + current_count = len(self.shared_resources.cusum_frequencies) if self.shared_resources else 0 + self.console.print(f"[dim yellow][AV-CUSUM] Collecting calibration data ({current_count}/{min_init_samples})[/]") + return False, {} + + deviation = freq - self.reference + + + new_sum_pos = max(0, self.sum_pos + deviation - self.adaptive_drift) + new_sum_neg = max(0, self.sum_neg - deviation - self.adaptive_drift) + + self.sum_pos = new_sum_pos + self.sum_neg = new_sum_neg + + if self.verbose: + current_window_size = len(self.shared_resources.cusum_frequencies) if self.shared_resources else 0 + + self.console.print(f"[dim yellow][AV-CUSUM DEBUG] Observation #{current_window_size}:[/]") + self.console.print(f" [dim]• Current freq: {freq:.3f} Hz[/]") + self.console.print(f" [dim]• Reference: {self.reference:.3f} Hz[/]") + self.console.print(f" [dim]• Deviation: {freq:.3f} - {self.reference:.3f} = {deviation:.3f}[/]") + self.console.print(f" [dim]• Adaptive drift: {self.adaptive_drift:.3f} (k_t = 0.5×σ, σ={self.rolling_std:.3f})[/]") + self.console.print(f" [dim]• Sum_pos before: {self.sum_pos:.3f}[/]") + self.console.print(f" [dim]• Sum_neg before: {self.sum_neg:.3f}[/]") + self.console.print(f" [dim]• Sum_pos calculation: max(0, {self.sum_pos:.3f} + {deviation:.3f} - {self.adaptive_drift:.3f}) = {new_sum_pos:.3f}[/]") + self.console.print(f" [dim]• Sum_neg calculation: max(0, {self.sum_neg:.3f} - {deviation:.3f} - {self.adaptive_drift:.3f}) = {new_sum_neg:.3f}[/]") + self.console.print(f" [dim]• Adaptive threshold: {self.adaptive_threshold:.3f} (h_t = 2.0×σ, σ={self.rolling_std:.3f})[/]") + self.console.print(f" [dim]• Upward change test: {self.sum_pos:.3f} > {self.adaptive_threshold:.3f} = {'UPWARD CHANGE!' if self.sum_pos > self.adaptive_threshold else 'No change'}[/]") + self.console.print(f" [dim]• Downward change test: {self.sum_neg:.3f} > {self.adaptive_threshold:.3f} = {'DOWNWARD CHANGE!' if self.sum_neg > self.adaptive_threshold else 'No change'}[/]") + + if self.shared_resources and hasattr(self.shared_resources, 'cusum_frequencies'): + sample_count = len(self.shared_resources.cusum_frequencies) + else: + sample_count = len(self.frequency_buffer) + + if sample_count < 3 or self.adaptive_threshold <= 0: + return False, {} + + upward_change = self.sum_pos > self.adaptive_threshold + downward_change = self.sum_neg > self.adaptive_threshold + change_detected = upward_change or downward_change + + change_info = { + 'timestamp': timestamp, + 'frequency': freq, + 'reference': self.reference, + 'sum_pos': self.sum_pos, + 'sum_neg': self.sum_neg, + 'threshold': self.adaptive_threshold, + 'rolling_std': self.rolling_std, + 'deviation': deviation, + 'change_type': 'increase' if upward_change else 'decrease' if downward_change else 'none' + } + + if change_detected: + change_type = change_info['change_type'] + change_percent = abs(deviation / self.reference * 100) if self.reference != 0 else 0 + + self.console.print(f"[bold yellow][AV-CUSUM] CHANGE DETECTED! " + f"{self.reference:.3f}Hz → {freq:.3f}Hz " + f"({change_percent:.1f}% {change_type})[/]") + self.console.print(f"[yellow][AV-CUSUM] Sum_pos={self.sum_pos:.2f}, Sum_neg={self.sum_neg:.2f}, " + f"Adaptive_Threshold={self.adaptive_threshold:.2f}[/]") + self.console.print(f"[dim yellow]AV-CUSUM ANALYSIS: Cumulative sum exceeded adaptive threshold {self.adaptive_threshold:.2f}[/]") + self.console.print(f"[dim yellow]Detection method: {'Positive sum (upward trend)' if upward_change else 'Negative sum (downward trend)'}[/]") + self.console.print(f"[dim yellow]Adaptive drift: {self.adaptive_drift:.3f} (σ={self.rolling_std:.3f})[/]") + + old_reference = self.reference + self.reference = freq + self.console.print(f"[cyan][CUSUM] Reference updated: {old_reference:.3f} → {self.reference:.3f} Hz " + f"({change_percent:.1f}% change)[/]") + + self.sum_pos = 0.0 + self.sum_neg = 0.0 + + if self.shared_resources: + if hasattr(self.shared_resources, 'cusum_lock'): + with self.shared_resources.cusum_lock: + old_window_size = len(self.shared_resources.cusum_frequencies) + + current_freq_list = [freq] + current_timestamp_list = [timestamp or 0.0] + + self.shared_resources.cusum_frequencies[:] = current_freq_list + self.shared_resources.cusum_timestamps[:] = current_timestamp_list + + self.console.print(f"[green][CUSUM] CHANGE POINT ADAPTATION: Discarded {old_window_size-1} past samples, " + f"starting fresh from current detection[/]") + self.console.print(f"[green][CUSUM] WINDOW RESET: {old_window_size} → {len(self.shared_resources.cusum_frequencies)} samples[/]") + + self.shared_resources.cusum_change_count.value += 1 + else: + old_window_size = len(self.shared_resources.cusum_frequencies) + current_freq_list = [freq] + current_timestamp_list = [timestamp or 0.0] + self.shared_resources.cusum_frequencies[:] = current_freq_list + self.shared_resources.cusum_timestamps[:] = current_timestamp_list + self.console.print(f"[green][CUSUM] CHANGE POINT ADAPTATION: Discarded {old_window_size-1} past samples[/]") + self.shared_resources.cusum_change_count.value += 1 + + return change_detected, change_info + + +def detect_pattern_change_cusum( + shared_resources, + current_prediction: Prediction, + detector: CUSUMDetector, + counter: int +) -> Tuple[bool, Optional[str], float]: + + + current_freq = get_dominant(current_prediction) + current_time = current_prediction.t_end + + if np.isnan(current_freq): + detector._reset_cusum_state() + return False, None, current_prediction.t_start + + change_detected, change_info = detector.add_frequency(current_freq, current_time) + + if not change_detected: + return False, None, current_prediction.t_start + + change_type = change_info['change_type'] + reference = change_info['reference'] + threshold = change_info['threshold'] + sum_pos = change_info['sum_pos'] + sum_neg = change_info['sum_neg'] + + magnitude = abs(current_freq - reference) + percent_change = (magnitude / reference * 100) if reference > 0 else 0 + + log_msg = ( + f"[bold red][CUSUM] CHANGE DETECTED! " + f"{reference:.1f}Hz → {current_freq:.1f}Hz " + f"(Δ={magnitude:.1f}Hz, {percent_change:.1f}% {change_type}) " + f"at sample {len(shared_resources.cusum_frequencies)}, time={current_time:.3f}s[/]\n" + f"[red][CUSUM] CUSUM stats: sum_pos={sum_pos:.2f}, sum_neg={sum_neg:.2f}, " + f"threshold={threshold}[/]\n" + f"[red][CUSUM] Cumulative sum exceeded threshold -> Starting fresh analysis[/]" + ) + + if percent_change > 100: + min_window_size = 0.5 + + elif percent_change > 50: + min_window_size = 1.0 + else: + min_window_size = 2.0 + + new_start_time = max(0, current_time - min_window_size) + + try: + from ftio.prediction.online_analysis import get_socket_logger + logger = get_socket_logger() + logger.send_log("change_point", "CUSUM Change Point Detected", { + 'algorithm': 'CUSUM', + 'detection_time': current_time, + 'change_type': change_type, + 'frequency': current_freq, + 'reference': reference, + 'magnitude': magnitude, + 'percent_change': percent_change, + 'threshold': threshold, + 'counter': counter + }) + except ImportError: + pass + + return True, log_msg, new_start_time + + +class SelfTuningPageHinkleyDetector: + """Self-Tuning Page-Hinkley detector with adaptive running mean baseline.""" + + def __init__(self, window_size: int = 10, shared_resources=None, show_init: bool = True, verbose: bool = False): + """Initialize STPH detector with rolling window size (default: 10).""" + self.window_size = window_size + self.shared_resources = shared_resources + self.show_init = show_init + self.verbose = verbose + self.console = Console() + + self.adaptive_threshold = 0.0 + self.adaptive_delta = 0.0 + self.rolling_std = 0.0 + self.frequency_buffer = [] + + self.cumulative_sum_pos = 0.0 + self.cumulative_sum_neg = 0.0 + self.reference_mean = 0.0 + self.sum_of_samples = 0.0 + self.sample_count = 0 + + if shared_resources and hasattr(shared_resources, 'pagehinkley_state'): + try: + state = dict(shared_resources.pagehinkley_state) + if state.get('initialized', False): + self.cumulative_sum_pos = state.get('cumulative_sum_pos', 0.0) + self.cumulative_sum_neg = state.get('cumulative_sum_neg', 0.0) + self.reference_mean = state.get('reference_mean', 0.0) + self.sum_of_samples = state.get('sum_of_samples', 0.0) + self.sample_count = state.get('sample_count', 0) + if self.verbose: + self.console.print(f"[green][PH DEBUG] Restored state: cusum_pos={self.cumulative_sum_pos:.3f}, cusum_neg={self.cumulative_sum_neg:.3f}, ref_mean={self.reference_mean:.3f}[/]") + else: + self._initialize_fresh_state() + except Exception as e: + if self.verbose: + self.console.print(f"[red][PH DEBUG] State restore failed: {e}[/]") + self._initialize_fresh_state() + else: + self._initialize_fresh_state() + + def _update_adaptive_parameters(self, freq: float): + """Calculate thresholds automatically from data standard deviation.""" + import numpy as np + + + if self.shared_resources and hasattr(self.shared_resources, 'pagehinkley_frequencies'): + if hasattr(self.shared_resources, 'ph_lock'): + with self.shared_resources.ph_lock: + all_freqs = list(self.shared_resources.pagehinkley_frequencies) + recent_freqs = all_freqs[-self.window_size-1:-1] if len(all_freqs) > 1 else [] + else: + all_freqs = list(self.shared_resources.pagehinkley_frequencies) + recent_freqs = all_freqs[-self.window_size-1:-1] if len(all_freqs) > 1 else [] + else: + self.frequency_buffer.append(freq) + if len(self.frequency_buffer) > self.window_size: + self.frequency_buffer.pop(0) + recent_freqs = self.frequency_buffer[:-1] if len(self.frequency_buffer) > 1 else [] + + if len(recent_freqs) >= 3: + freqs = np.array(recent_freqs) + self.rolling_std = np.std(freqs) + + + std_factor = max(self.rolling_std, 0.01) + + self.adaptive_threshold = 2.0 * std_factor + self.adaptive_delta = 0.5 * std_factor + + if self.verbose: + self.console.print(f"[dim magenta][Page-Hinkley] σ={self.rolling_std:.3f}, " + f"λ_t={self.adaptive_threshold:.3f} (2σ threshold), " + f"δ_t={self.adaptive_delta:.3f} (0.5σ delta)[/]") + + def _reset_pagehinkley_state(self): + """Reset Page-Hinkley state when no frequency is detected.""" + self.cumulative_sum_pos = 0.0 + self.cumulative_sum_neg = 0.0 + self.reference_mean = 0.0 + self.sum_of_samples = 0.0 + self.sample_count = 0 + + self.frequency_buffer.clear() + self.rolling_std = 0.0 + self.adaptive_threshold = 0.0 + self.adaptive_delta = 0.0 + + if self.shared_resources: + if hasattr(self.shared_resources, 'pagehinkley_lock'): + with self.shared_resources.pagehinkley_lock: + if hasattr(self.shared_resources, 'pagehinkley_frequencies'): + del self.shared_resources.pagehinkley_frequencies[:] + if hasattr(self.shared_resources, 'pagehinkley_timestamps'): + del self.shared_resources.pagehinkley_timestamps[:] + if hasattr(self.shared_resources, 'pagehinkley_state'): + self.shared_resources.pagehinkley_state.clear() + else: + if hasattr(self.shared_resources, 'pagehinkley_frequencies'): + del self.shared_resources.pagehinkley_frequencies[:] + if hasattr(self.shared_resources, 'pagehinkley_timestamps'): + del self.shared_resources.pagehinkley_timestamps[:] + if hasattr(self.shared_resources, 'pagehinkley_state'): + self.shared_resources.pagehinkley_state.clear() + + self.console.print("[dim yellow][STPH] State cleared: Starting fresh when frequency resumes[/]") + + def _initialize_fresh_state(self): + """Initialize fresh Page-Hinkley state.""" + self.cumulative_sum_pos = 0.0 + self.cumulative_sum_neg = 0.0 + self.reference_mean = 0.0 + self.sum_of_samples = 0.0 + self.sample_count = 0 + + def reset(self, current_freq: float = None): + + self.cumulative_sum_pos = 0.0 + self.cumulative_sum_neg = 0.0 + + if current_freq is not None: + self.reference_mean = current_freq + self.sum_of_samples = current_freq + self.sample_count = 1 + else: + self.reference_mean = 0.0 + self.sum_of_samples = 0.0 + self.sample_count = 0 + + if self.shared_resources: + if hasattr(self.shared_resources, 'pagehinkley_lock'): + with self.shared_resources.pagehinkley_lock: + if hasattr(self.shared_resources, 'pagehinkley_state'): + self.shared_resources.pagehinkley_state.update({ + 'cumulative_sum_pos': 0.0, + 'cumulative_sum_neg': 0.0, + 'reference_mean': self.reference_mean, + 'sum_of_samples': self.sum_of_samples, + 'sample_count': self.sample_count, + 'initialized': True + }) + + + if hasattr(self.shared_resources, 'pagehinkley_frequencies'): + if current_freq is not None: + self.shared_resources.pagehinkley_frequencies[:] = [current_freq] + else: + del self.shared_resources.pagehinkley_frequencies[:] + if hasattr(self.shared_resources, 'pagehinkley_timestamps'): + if current_freq is not None: + last_timestamp = self.shared_resources.pagehinkley_timestamps[-1] if len(self.shared_resources.pagehinkley_timestamps) > 0 else 0.0 + self.shared_resources.pagehinkley_timestamps[:] = [last_timestamp] + else: + del self.shared_resources.pagehinkley_timestamps[:] + else: + if hasattr(self.shared_resources, 'pagehinkley_state'): + self.shared_resources.pagehinkley_state.update({ + 'cumulative_sum_pos': 0.0, + 'cumulative_sum_neg': 0.0, + 'reference_mean': self.reference_mean, + 'sum_of_samples': self.sum_of_samples, + 'sample_count': self.sample_count, + 'initialized': True + }) + if hasattr(self.shared_resources, 'pagehinkley_frequencies'): + if current_freq is not None: + self.shared_resources.pagehinkley_frequencies[:] = [current_freq] + else: + del self.shared_resources.pagehinkley_frequencies[:] + if hasattr(self.shared_resources, 'pagehinkley_timestamps'): + if current_freq is not None: + last_timestamp = self.shared_resources.pagehinkley_timestamps[-1] if len(self.shared_resources.pagehinkley_timestamps) > 0 else 0.0 + self.shared_resources.pagehinkley_timestamps[:] = [last_timestamp] + else: + del self.shared_resources.pagehinkley_timestamps[:] + + if current_freq is not None: + self.console.print(f"[cyan][PH] Internal state reset with new reference: {current_freq:.3f} Hz[/]") + else: + self.console.print(f"[cyan][PH] Internal state reset: Page-Hinkley parameters reinitialized[/]") + + def add_frequency(self, freq: float, timestamp: float = None) -> Tuple[bool, float, Dict[str, Any]]: + + if np.isnan(freq) or freq <= 0: + self.console.print("[yellow][STPH] No frequency found - resetting Page-Hinkley state[/]") + self._reset_pagehinkley_state() + return False, 0.0, {} + + self._update_adaptive_parameters(freq) + + if self.shared_resources: + if hasattr(self.shared_resources, 'pagehinkley_lock'): + with self.shared_resources.pagehinkley_lock: + self.shared_resources.pagehinkley_frequencies.append(freq) + self.shared_resources.pagehinkley_timestamps.append(timestamp or 0.0) + else: + self.shared_resources.pagehinkley_frequencies.append(freq) + self.shared_resources.pagehinkley_timestamps.append(timestamp or 0.0) + + if self.sample_count == 0: + self.sample_count = 1 + self.reference_mean = freq + self.sum_of_samples = freq + if self.show_init: + self.console.print(f"[yellow][STPH] Reference mean initialized: {self.reference_mean:.3f} Hz[/]") + else: + self.sample_count += 1 + self.sum_of_samples += freq + self.reference_mean = self.sum_of_samples / self.sample_count + + pos_difference = freq - self.reference_mean - self.adaptive_delta + old_cumsum_pos = self.cumulative_sum_pos + self.cumulative_sum_pos = max(0, self.cumulative_sum_pos + pos_difference) + + neg_difference = self.reference_mean - freq - self.adaptive_delta + old_cumsum_neg = self.cumulative_sum_neg + self.cumulative_sum_neg = max(0, self.cumulative_sum_neg + neg_difference) + + if self.verbose: + self.console.print(f"[dim magenta][STPH DEBUG] Sample #{self.sample_count}:[/]") + self.console.print(f" [dim]• Current freq: {freq:.3f} Hz[/]") + self.console.print(f" [dim]• Reference mean: {self.reference_mean:.3f} Hz[/]") + self.console.print(f" [dim]• Adaptive delta: {self.adaptive_delta:.3f}[/]") + self.console.print(f" [dim]• Positive difference: {freq:.3f} - {self.reference_mean:.3f} - {self.adaptive_delta:.3f} = {pos_difference:.3f}[/]") + self.console.print(f" [dim]• Sum_pos = max(0, {old_cumsum_pos:.3f} + {pos_difference:.3f}) = {self.cumulative_sum_pos:.3f}[/]") + self.console.print(f" [dim]• Negative difference: {self.reference_mean:.3f} - {freq:.3f} - {self.adaptive_delta:.3f} = {neg_difference:.3f}[/]") + self.console.print(f" [dim]• Sum_neg = max(0, {old_cumsum_neg:.3f} + {neg_difference:.3f}) = {self.cumulative_sum_neg:.3f}[/]") + self.console.print(f" [dim]• Adaptive threshold: {self.adaptive_threshold:.3f}[/]") + self.console.print(f" [dim]• Upward change test: {self.cumulative_sum_pos:.3f} > {self.adaptive_threshold:.3f} = {'UPWARD CHANGE!' if self.cumulative_sum_pos > self.adaptive_threshold else 'No change'}[/]") + self.console.print(f" [dim]• Downward change test: {self.cumulative_sum_neg:.3f} > {self.adaptive_threshold:.3f} = {'DOWNWARD CHANGE!' if self.cumulative_sum_neg > self.adaptive_threshold else 'No change'}[/]") + + if self.shared_resources and hasattr(self.shared_resources, 'pagehinkley_state'): + if hasattr(self.shared_resources, 'pagehinkley_lock'): + with self.shared_resources.pagehinkley_lock: + self.shared_resources.pagehinkley_state.update({ + 'cumulative_sum_pos': self.cumulative_sum_pos, + 'cumulative_sum_neg': self.cumulative_sum_neg, + 'reference_mean': self.reference_mean, + 'sum_of_samples': self.sum_of_samples, + 'sample_count': self.sample_count, + 'initialized': True + }) + else: + self.shared_resources.pagehinkley_state.update({ + 'cumulative_sum_pos': self.cumulative_sum_pos, + 'cumulative_sum_neg': self.cumulative_sum_neg, + 'reference_mean': self.reference_mean, + 'sum_of_samples': self.sum_of_samples, + 'sample_count': self.sample_count, + 'initialized': True + }) + + if self.shared_resources and hasattr(self.shared_resources, 'pagehinkley_frequencies'): + sample_count = len(self.shared_resources.pagehinkley_frequencies) + else: + sample_count = len(self.frequency_buffer) + + if sample_count < 3 or self.adaptive_threshold <= 0: + return False, 0.0, {} + + upward_change = self.cumulative_sum_pos > self.adaptive_threshold + downward_change = self.cumulative_sum_neg > self.adaptive_threshold + change_detected = upward_change or downward_change + + if upward_change: + change_type = "increase" + triggering_sum = self.cumulative_sum_pos + elif downward_change: + change_type = "decrease" + triggering_sum = self.cumulative_sum_neg + else: + change_type = "none" + triggering_sum = max(self.cumulative_sum_pos, self.cumulative_sum_neg) + + if change_detected: + magnitude = abs(freq - self.reference_mean) + percent_change = (magnitude / self.reference_mean * 100) if self.reference_mean > 0 else 0 + + self.console.print(f"[bold magenta][STPH] CHANGE DETECTED! " + f"{self.reference_mean:.3f}Hz → {freq:.3f}Hz " + f"({percent_change:.1f}% {change_type})[/]") + self.console.print(f"[magenta][STPH] Sum_pos={self.cumulative_sum_pos:.2f}, Sum_neg={self.cumulative_sum_neg:.2f}, " + f"Adaptive_Threshold={self.adaptive_threshold:.3f} (σ={self.rolling_std:.3f})[/]") + self.console.print(f"[dim magenta]STPH ANALYSIS: Cumulative sum exceeded adaptive threshold {self.adaptive_threshold:.2f}[/]") + self.console.print(f"[dim magenta]Detection method: {'Positive sum (upward trend)' if upward_change else 'Negative sum (downward trend)'}[/]") + self.console.print(f"[dim magenta]Adaptive minimum detectable change: {self.adaptive_delta:.3f}[/]") + + if self.shared_resources and hasattr(self.shared_resources, 'pagehinkley_change_count'): + if hasattr(self.shared_resources, 'pagehinkley_lock'): + with self.shared_resources.pagehinkley_lock: + self.shared_resources.pagehinkley_change_count.value += 1 + else: + self.shared_resources.pagehinkley_change_count.value += 1 + + current_window_size = len(self.shared_resources.pagehinkley_frequencies) if self.shared_resources else self.sample_count + + metadata = { + 'cumulative_sum_pos': self.cumulative_sum_pos, + 'cumulative_sum_neg': self.cumulative_sum_neg, + 'triggering_sum': triggering_sum, + 'change_type': change_type, + 'reference_mean': self.reference_mean, + 'frequency': freq, + 'window_size': current_window_size, + 'threshold': self.adaptive_threshold, + 'adaptive_delta': self.adaptive_delta, + 'rolling_std': self.rolling_std + } + + return change_detected, triggering_sum, metadata + + +def detect_pattern_change_pagehinkley( + shared_resources, + current_prediction: Prediction, + detector: SelfTuningPageHinkleyDetector, + counter: int +) -> Tuple[bool, Optional[str], float]: + + import numpy as np + + current_freq = get_dominant(current_prediction) + current_time = current_prediction.t_end + + if current_freq is None or np.isnan(current_freq): + detector._reset_pagehinkley_state() + return False, None, current_prediction.t_start + + change_detected, triggering_sum, metadata = detector.add_frequency(current_freq, current_time) + + if change_detected: + detector.reset(current_freq=current_freq) + + change_type = metadata.get("change_type", "unknown") + frequency = metadata.get("frequency", current_freq) + reference_mean = metadata.get("reference_mean", 0.0) + window_size = metadata.get("window_size", 0) + + magnitude = abs(frequency - reference_mean) + percent_change = (magnitude / reference_mean * 100) if reference_mean > 0 else 0 + + direction_arrow = "increasing" if change_type == "increase" else "decreasing" if change_type == "decrease" else "stable" + log_message = ( + f"[bold red][Page-Hinkley] PAGE-HINKLEY CHANGE DETECTED! {direction_arrow} " + f"{reference_mean:.1f}Hz → {frequency:.1f}Hz " + f"(Δ={magnitude:.1f}Hz, {percent_change:.1f}% {change_type}) " + f"at sample {window_size}, time={current_time:.3f}s[/]\n" + f"[red][Page-Hinkley] Page-Hinkley stats: sum_pos={metadata.get('cumulative_sum_pos', 0):.2f}, " + f"sum_neg={metadata.get('cumulative_sum_neg', 0):.2f}, threshold={detector.adaptive_threshold:.3f}[/]\n" + f"[red][Page-Hinkley] Cumulative sum exceeded threshold -> Starting fresh analysis[/]" + ) + + adaptive_start_time = current_time + if hasattr(shared_resources, 'pagehinkley_last_change_time'): + shared_resources.pagehinkley_last_change_time.value = current_time + + logger = shared_resources.logger if hasattr(shared_resources, 'logger') else None + if logger: + logger.send_log("change_point", "Page-Hinkley Change Point Detected", { + 'algorithm': 'PageHinkley', + 'frequency': frequency, + 'reference_mean': reference_mean, + 'magnitude': magnitude, + 'percent_change': percent_change, + 'triggering_sum': triggering_sum, + 'change_type': change_type, + 'position': window_size, + 'timestamp': current_time, + 'threshold': detector.adaptive_threshold, + 'delta': detector.adaptive_delta, + 'prediction_counter': counter + }) + + return True, log_message, adaptive_start_time + + return False, None, current_prediction.t_start diff --git a/ftio/prediction/online_analysis.py b/ftio/prediction/online_analysis.py index 839ac85..c797fb9 100644 --- a/ftio/prediction/online_analysis.py +++ b/ftio/prediction/online_analysis.py @@ -1,10 +1,10 @@ -"""Performs the analysis for prediction. This includes the calculation of ftio and parsing of the data into a queue""" - from __future__ import annotations from argparse import Namespace - import numpy as np +import socket +import json +import time from rich.console import Console from ftio.cli import ftio_core @@ -13,53 +13,186 @@ from ftio.plot.units import set_unit from ftio.prediction.helper import get_dominant from ftio.prediction.shared_resources import SharedResources +from ftio.prediction.change_point_detection import ChangePointDetector, detect_pattern_change_adwin, CUSUMDetector, detect_pattern_change_cusum, SelfTuningPageHinkleyDetector, detect_pattern_change_pagehinkley + + +class SocketLogger: + + def __init__(self, host='localhost', port=9999): + self.host = host + self.port = port + self.socket = None + self.connected = False + self._connect() + + def _connect(self): + """Attempt to connect to the GUI server""" + try: + self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.socket.settimeout(1.0) # 1 second timeout + self.socket.connect((self.host, self.port)) + self.connected = True + print(f"[INFO] Connected to GUI server at {self.host}:{self.port}") + except (socket.error, ConnectionRefusedError, socket.timeout) as e: + self.connected = False + if self.socket: + self.socket.close() + self.socket = None + print(f"[WARNING] Failed to connect to GUI server at {self.host}:{self.port}: {e}") + print(f"[WARNING] GUI logging disabled - messages will only appear in console") + + def send_log(self, log_type: str, message: str, data: dict = None): + if not self.connected: + return + + try: + log_data = { + 'timestamp': time.time(), + 'type': log_type, + 'message': message, + 'data': data or {} + } + + json_data = json.dumps(log_data) + '\n' + self.socket.send(json_data.encode('utf-8')) + + except (socket.error, BrokenPipeError, ConnectionResetError) as e: + print(f"[WARNING] Failed to send to GUI: {e}") + self.connected = False + if self.socket: + self.socket.close() + self.socket = None + + def close(self): + if self.socket: + self.socket.close() + self.socket = None + self.connected = False + + +_socket_logger = None + +def get_socket_logger(): + global _socket_logger + if _socket_logger is None: + _socket_logger = SocketLogger() + return _socket_logger + +def strip_rich_formatting(text: str) -> str: + import re + + clean_text = re.sub(r'\[/?(?:purple|blue|green|yellow|red|bold|dim|/)\]', '', text) + + clean_text = re.sub(r'\[(?:purple|blue|green|yellow|red|bold|dim)\[', '[', clean_text) + + return clean_text + +def log_to_gui_and_console(console: Console, message: str, log_type: str = "info", data: dict = None): + logger = get_socket_logger() + clean_message = strip_rich_formatting(message) + + console.print(message) + + logger.send_log(log_type, clean_message, data) + + +def get_change_detector(shared_resources: SharedResources, algorithm: str = "adwin"): + console = Console() + algo = (algorithm or "adwin").lower() + global _local_detector_cache + if '_local_detector_cache' not in globals(): + _local_detector_cache = {} + detector_key = f"{algo}_detector" + init_flag_attr = f"{algo}_initialized" -def ftio_process(shared_resources: SharedResources, args: list[str], msgs=None) -> None: - """Perform a single prediction + if detector_key in _local_detector_cache: + return _local_detector_cache[detector_key] + + init_flag = getattr(shared_resources, init_flag_attr) + show_init_message = not init_flag.value - Args: - shared_resources (SharedResources): shared resources among processes - args (list[str]): additional arguments passed to ftio - """ + if algo == "cusum": + detector = CUSUMDetector(window_size=50, shared_resources=shared_resources, show_init=show_init_message, verbose=True) + elif algo == "ph": + detector = SelfTuningPageHinkleyDetector(shared_resources=shared_resources, show_init=show_init_message, verbose=True) + else: + detector = ChangePointDetector(delta=0.05, shared_resources=shared_resources, show_init=show_init_message, verbose=True) + + _local_detector_cache[detector_key] = detector + init_flag.value = True + return detector + +def ftio_process(shared_resources: SharedResources, args: list[str], msgs=None) -> None: console = Console() - console.print(f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Started") + pred_id = shared_resources.count.value + start_msg = f"[purple][PREDICTOR] (#{pred_id}):[/] Started" + log_to_gui_and_console(console, start_msg, "predictor_start", {"count": pred_id}) - # Modify the arguments args.extend(["-e", "no"]) args.extend(["-ts", f"{shared_resources.start_time.value:.2f}"]) - # perform prediction - prediction, parsed_args = ftio_core.main(args, msgs) - if not prediction: - console.print("[yellow]Terminating prediction (no data passed) [/]") - console.print( - f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Stopped" - ) - exit(0) + prediction_list, parsed_args = ftio_core.main(args, msgs) + if not prediction_list: + log_to_gui_and_console(console, + "[yellow]Terminating prediction (no data passed)[/]", + "termination", {"reason": "no_data"}) + return - if not isinstance(prediction, list) or len(prediction) != 1: - raise ValueError( - "[red][PREDICTOR] (#{shared_resources.count.value}):[/] predictor should be called on exactly on file" - ) - - # get the prediction - prediction = prediction[-1] - # plot_bar_with_rich(shared_resources.t_app,shared_resources.b_app, width_percentage=0.9) + prediction = prediction_list[-1] + freq = get_dominant(prediction) or 0.0 - # get data - freq = get_dominant(prediction) # just get a single dominant value - - # save prediction results save_data(prediction, shared_resources) - # display results text = display_result(freq, prediction, shared_resources) - - # data analysis to decrease window thus change start_time text += window_adaptation(parsed_args, prediction, freq, shared_resources) + is_change_point = "[CHANGE_POINT]" in text + change_point_info = None + if is_change_point: + import re + t_match = re.search(r"t_s=([0-9.]+)", text) + f_match = re.search(r"change:\s*([0-9.]+)\s*→\s*([0-9.]+)", text) + change_point_info = { + "prediction_id": pred_id, + "timestamp": float(prediction.t_end), + "old_frequency": float(f_match.group(1)) if f_match else 0.0, + "new_frequency": float(f_match.group(2)) if f_match else freq, + "start_time": float(t_match.group(1)) if t_match else float(prediction.t_start) + } + candidates = [ + {"frequency": f, "confidence": c} + for f, c in zip(prediction.dominant_freq, prediction.conf) + ] + if candidates: + best = max(candidates, key=lambda c: c["confidence"]) + dominant_freq = best["frequency"] + dominant_period = 1.0 / dominant_freq if dominant_freq > 0 else 0.0 + confidence = best["confidence"] + else: + dominant_freq = dominant_period = confidence = 0.0 + + structured_prediction = { + "prediction_id": pred_id, + "timestamp": str(time.time()), + "dominant_freq": dominant_freq, + "dominant_period": dominant_period, + "confidence": confidence, + "candidates": candidates, + "time_window": (float(prediction.t_start), float(prediction.t_end)), + "total_bytes": str(prediction.total_bytes), + "bytes_transferred": str(prediction.total_bytes), + "current_hits": int(shared_resources.hits.value), + "periodic_probability": 0.0, + "frequency_range": (0.0, 0.0), + "period_range": (0.0, 0.0), + "is_change_point": is_change_point, + "change_point": change_point_info, + } + + get_socket_logger().send_log("prediction", "FTIO structured prediction", structured_prediction) + log_to_gui_and_console(console, text, "prediction_log", {"count": pred_id, "freq": dominant_freq}) + + shared_resources.count.value += 1 - # print text - console.print(text) def window_adaptation( @@ -68,33 +201,84 @@ def window_adaptation( freq: float, shared_resources: SharedResources, ) -> str: - """modifies the start time if conditions are true - - Args: - args (argparse): command line arguments - prediction (Prediction): result from FTIO - freq (float|Nan): dominant frequency - shared_resources (SharedResources): shared resources among processes - text (str): text to display - - Returns: - str: _description_ - """ - # average data/data processing text = "" t_s = prediction.t_start t_e = prediction.t_end total_bytes = prediction.total_bytes - # Hits + prediction_count = shared_resources.count.value + text += f"Prediction #{prediction_count}\n" + text += hits(args, prediction, shared_resources) - # time window adaptation - if not np.isnan(freq): - n_phases = (t_e - t_s) * freq - avr_bytes = int(total_bytes / float(n_phases)) - unit, order = set_unit(avr_bytes, "B") - avr_bytes = order * avr_bytes + algorithm = args.algorithm + + detector = get_change_detector(shared_resources, algorithm) + if algorithm == "cusum": + change_detected, change_log, adaptive_start_time = detect_pattern_change_cusum( + shared_resources, prediction, detector, shared_resources.count.value + ) + elif algorithm == "ph": + change_detected, change_log, adaptive_start_time = detect_pattern_change_pagehinkley( + shared_resources, prediction, detector, shared_resources.count.value + ) + else: + change_detected, change_log, adaptive_start_time = detect_pattern_change_adwin( + shared_resources, prediction, detector, shared_resources.count.value + ) + + if np.isnan(freq): + if algorithm == "cusum": + cusum_samples = len(shared_resources.cusum_frequencies) + cusum_changes = shared_resources.cusum_change_count.value + text += f"[dim][CUSUM STATE: {cusum_samples} samples, {cusum_changes} changes detected so far][/]\n" + if cusum_samples > 0: + last_freq = shared_resources.cusum_frequencies[-1] if shared_resources.cusum_frequencies else "None" + text += f"[dim][LAST KNOWN FREQ: {last_freq:.3f} Hz][/]\n" + elif algorithm == "ph": + ph_samples = len(shared_resources.pagehinkley_frequencies) + ph_changes = shared_resources.pagehinkley_change_count.value + text += f"[dim][PAGE-HINKLEY STATE: {ph_samples} samples, {ph_changes} changes detected so far][/]\n" + if ph_samples > 0: + last_freq = shared_resources.pagehinkley_frequencies[-1] if shared_resources.pagehinkley_frequencies else "None" + text += f"[dim][LAST KNOWN FREQ: {last_freq:.3f} Hz][/]\n" + else: # ADWIN + adwin_samples = len(shared_resources.adwin_frequencies) + adwin_changes = shared_resources.adwin_change_count.value + text += f"[dim][ADWIN STATE: {adwin_samples} samples, {adwin_changes} changes detected so far][/]\n" + if adwin_samples > 0: + last_freq = shared_resources.adwin_frequencies[-1] if shared_resources.adwin_frequencies else "None" + text += f"[dim][LAST KNOWN FREQ: {last_freq:.3f} Hz][/]\n" + + if change_detected and change_log: + text += f"{change_log}\n" + min_window_size = 1.0 + safe_adaptive_start = min(adaptive_start_time, t_e - min_window_size) + + if safe_adaptive_start >= 0 and (t_e - safe_adaptive_start) >= min_window_size: + t_s = safe_adaptive_start + algorithm_name = args.algorithm.upper() if hasattr(args, 'algorithm') else "UNKNOWN" + text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][green] {algorithm_name} adapted window to start at {t_s:.3f}s (window size: {t_e - t_s:.3f}s)[/]\n" + else: + t_s = max(0, t_e - min_window_size) + algorithm_name = args.algorithm.upper() if hasattr(args, 'algorithm') else "UNKNOWN" + text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][yellow] {algorithm_name} adaptation would create unsafe window, using conservative {min_window_size}s window[/]\n" + if not np.isnan(freq) and freq > 0: + time_window = t_e - t_s + if time_window > 0: + n_phases = time_window * freq + if n_phases > 0: + avr_bytes = int(total_bytes / float(n_phases)) + unit, order = set_unit(avr_bytes, "B") + avr_bytes = order * avr_bytes + else: + n_phases = 0 + avr_bytes = 0 + unit = "B" + else: + n_phases = 0 + avr_bytes = 0 + unit = "B" # FIXME this needs to compensate for a smaller windows if not args.window_adaptation: @@ -103,48 +287,69 @@ def window_adaptation( f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Average transferred {avr_bytes:.0f} {unit}\n" ) - # adaptive time window - if "frequency_hits" in args.window_adaptation: + if "frequency_hits" in args.window_adaptation and not change_detected: if shared_resources.hits.value > args.hits: if ( True - ): # np.abs(avr_bytes - (total_bytes-aggregated_bytes.value)) < 100: + ): tmp = t_e - 3 * 1 / freq t_s = tmp if tmp > 0 else 0 text += f"[bold purple][PREDICTOR] (#{shared_resources.count.value}):[/][green]Adjusting start time to {t_s} sec\n[/]" else: - t_s = 0 - if shared_resources.hits.value == 0: - text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][red bold] Resetting start time to {t_s} sec\n[/]" - elif "data" in args.window_adaptation and len(shared_resources.data) > 0: + if not change_detected: + t_s = 0 + if shared_resources.hits.value == 0: + text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][red bold] Resetting start time to {t_s} sec\n[/]" + elif "data" in args.window_adaptation and len(shared_resources.data) > 0 and not change_detected: text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][green]Trying time window adaptation: {shared_resources.count.value:.0f} =? { args.hits * shared_resources.hits.value:.0f}\n[/]" if shared_resources.count.value == args.hits * shared_resources.hits.value: - # t_s = shared_resources.data[-shared_resources.count.value]['t_start'] - # text += f'[bold purple][PREDICTOR] (#{shared_resources.count.value}):[/][green]Adjusting start time to t_start {t_s} sec\n[/]' if len(shared_resources.t_flush) > 0: print(shared_resources.t_flush) index = int(args.hits * shared_resources.hits.value - 1) t_s = shared_resources.t_flush[index] text += f"[bold purple][PREDICTOR] (#{shared_resources.count.value}):[/][green]Adjusting start time to t_flush[{index}] {t_s} sec\n[/]" - # TODO 1: Make sanity check -- see if the same number of bytes was transferred - # TODO 2: Train a model to validate the predictions? + if not np.isnan(freq): + if algorithm == "cusum": + samples = len(shared_resources.cusum_frequencies) + changes = shared_resources.cusum_change_count.value + recent_freqs = list(shared_resources.cusum_frequencies)[-5:] if len(shared_resources.cusum_frequencies) >= 5 else list(shared_resources.cusum_frequencies) + elif algorithm == "ph": + samples = len(shared_resources.pagehinkley_frequencies) + changes = shared_resources.pagehinkley_change_count.value + recent_freqs = list(shared_resources.pagehinkley_frequencies)[-5:] if len(shared_resources.pagehinkley_frequencies) >= 5 else list(shared_resources.pagehinkley_frequencies) + else: # ADWIN + samples = len(shared_resources.adwin_frequencies) + changes = shared_resources.adwin_change_count.value + recent_freqs = list(shared_resources.adwin_frequencies)[-5:] if len(shared_resources.adwin_frequencies) >= 5 else list(shared_resources.adwin_frequencies) + + success_rate = (samples / prediction_count) * 100 if prediction_count > 0 else 0 + + text += f"\n[bold cyan]{algorithm.upper()} ANALYSIS (Prediction #{prediction_count})[/]\n" + text += f"[cyan]Frequency detections: {samples}/{prediction_count} ({success_rate:.1f}% success)[/]\n" + text += f"[cyan]Pattern changes detected: {changes}[/]\n" + text += f"[cyan]Current frequency: {freq:.3f} Hz ({1/freq:.2f}s period)[/]\n" + + if samples > 1: + text += f"[cyan]Recent freq history: {[f'{f:.3f}Hz' for f in recent_freqs]}[/]\n" + + if len(recent_freqs) >= 2: + trend = "increasing" if recent_freqs[-1] > recent_freqs[-2] else "decreasing" if recent_freqs[-1] < recent_freqs[-2] else "stable" + text += f"[cyan]Frequency trend: {trend}[/]\n" + + text += f"[cyan]{algorithm.upper()} window size: {samples} samples[/]\n" + text += f"[cyan]{algorithm.upper()} changes detected: {changes}[/]\n" + + text += f"[bold cyan]{'='*50}[/]\n\n" + text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Ended" shared_resources.start_time.value = t_s return text def save_data(prediction, shared_resources) -> None: - """Put all data from `prediction` in a `queue`. The total bytes are as well saved here. - - Args: - prediction (dict): result from FTIO - shared_resources (SharedResources): shared resources among processes - """ - # safe total transferred bytes shared_resources.aggregated_bytes.value += prediction.total_bytes - # save data shared_resources.queue.put( { "phase": shared_resources.count.value, @@ -157,7 +362,6 @@ def save_data(prediction, shared_resources) -> None: "total_bytes": prediction.total_bytes, "ranks": prediction.ranks, "freq": prediction.freq, - # 'hits': shared_resources.hits.value, } ) @@ -165,43 +369,29 @@ def save_data(prediction, shared_resources) -> None: def display_result( freq: float, prediction: Prediction, shared_resources: SharedResources ) -> str: - """Displays the results from FTIO - - Args: - freq (float): dominant frequency - prediction (Prediction): prediction setting from FTIO - shared_resources (SharedResources): shared resources among processes - - Returns: - str: text to print to console - """ text = "" - # Dominant frequency if not np.isnan(freq): text = f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Dominant freq {freq:.3f} Hz ({1/freq if freq != 0 else 0:.2f} sec)\n" + else: + text = f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] No dominant frequency found\n" - # Candidates - text += ( - f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Freq candidates: \n" - ) - for i, f_d in enumerate(prediction.dominant_freq): - text += ( - f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] {i}) " - f"{f_d:.2f} Hz -- conf {prediction.conf[i]:.2f}\n" - ) + if len(prediction.dominant_freq) > 0: + text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Freq candidates ({len(prediction.dominant_freq)} found): \n" + for i, f_d in enumerate(prediction.dominant_freq): + text += ( + f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] {i}) " + f"{f_d:.2f} Hz -- conf {prediction.conf[i]:.2f}\n" + ) + else: + text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] No frequency candidates detected\n" - # time window text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Time window {prediction.t_end-prediction.t_start:.3f} sec ([{prediction.t_start:.3f},{prediction.t_end:.3f}] sec)\n" - # total bytes total_bytes = shared_resources.aggregated_bytes.value - # total_bytes = prediction.total_bytes unit, order = set_unit(total_bytes, "B") total_bytes = order * total_bytes text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Total bytes {total_bytes:.0f} {unit}\n" - # Bytes since last time - # tmp = abs(prediction.total_bytes -shared_resources.aggregated_bytes.value) tmp = abs(shared_resources.aggregated_bytes.value) unit, order = set_unit(tmp, "B") tmp = order * tmp diff --git a/ftio/prediction/probability_analysis.py b/ftio/prediction/probability_analysis.py index d7498f0..092f6c9 100644 --- a/ftio/prediction/probability_analysis.py +++ b/ftio/prediction/probability_analysis.py @@ -1,28 +1,12 @@ import numpy as np from rich.console import Console - import ftio.prediction.group as gp from ftio.prediction.helper import get_dominant from ftio.prediction.probability import Probability +from ftio.prediction.change_point_detection import ChangePointDetector -def find_probability(data: list[dict], method: str = "db", counter: int = -1) -> list: - """Calculates the conditional probability that expresses - how probable the frequency (event A) is given that the signal - is periodic occurred (probability B). - According to Bayes' Theorem, P(A|B) = P(B|A)*P(A)/P(B) - P(B|A): Probability that the signal is periodic given that it has a frequency A --> 1 - P(A): Probability that the signal has the frequency A - P(B): Probability that the signal has is periodic - - Args: - data (dict): contacting predictions - method (str): method to group the predictions (step or db) - counter (int): number of predictions already executed - - Returns: - out (dict): probability of predictions in ranges - """ +def find_probability(data: list[dict], method: str = "db", counter:int = -1) -> list: p_b = 0 p_a = [] p_a_given_b = 0 @@ -56,12 +40,9 @@ def find_probability(data: list[dict], method: str = "db", counter: int = -1) -> f_min = np.inf f_max = 0 for pred in grouped_prediction: - # print(pred) - # print(f"index is {group}, group is {pred['group']}") if group == pred["group"]: f_min = min(get_dominant(pred), f_min) f_max = max(get_dominant(pred), f_max) - # print(f"group: {group}, pred_group: {pred['group']}, freq: {get_dominant(pred):.3f}, f_min: {f_min:.3f}, f_max:{f_max:.3f}") p_a += 1 p_a = p_a / len(data) if len(data) > 0 else 0 @@ -73,3 +54,41 @@ def find_probability(data: list[dict], method: str = "db", counter: int = -1) -> out.append(prob) return out + + +def detect_pattern_change(shared_resources, prediction, detector, count): + try: + from ftio.prediction.helper import get_dominant + + freq = get_dominant(prediction) + + if hasattr(detector, 'verbose') and detector.verbose: + console = Console() + console.print(f"[cyan][DEBUG] Change point detection called for prediction #{count}, freq={freq:.3f} Hz[/]") + console.print(f"[cyan][DEBUG] Detector calibrated: {detector.is_calibrated}, samples: {len(detector.frequencies)}[/]") + + current_time = prediction.t_end + result = detector.add_prediction(prediction, current_time) + + if hasattr(detector, 'verbose') and detector.verbose: + console = Console() + console.print(f"[cyan][DEBUG] Detector result: {result}[/]") + + if result is not None: + change_point_idx, change_point_time = result + + if hasattr(detector, 'verbose') and detector.verbose: + console = Console() + console.print(f"[green][DEBUG] CHANGE POINT DETECTED! Index: {change_point_idx}, Time: {change_point_time:.3f}[/]") + + change_log = f"[red bold][CHANGE_POINT] t_s={change_point_time:.3f} sec[/]" + change_log += f"\n[purple][PREDICTOR] (#{count}):[/][yellow] Adapting analysis window to start at t_s={change_point_time:.3f}[/]" + + return True, change_log, change_point_time + + return False, "", prediction.t_start + + except Exception as e: + console = Console() + console.print(f"[red]Change point detection error: {e}[/]") + return False, "", prediction.t_start \ No newline at end of file diff --git a/ftio/prediction/shared_resources.py b/ftio/prediction/shared_resources.py index 45b21f9..636c565 100644 --- a/ftio/prediction/shared_resources.py +++ b/ftio/prediction/shared_resources.py @@ -9,25 +9,80 @@ def __init__(self): def _init_shared_resources(self): """Initialize the shared resources.""" - # Queue for FTIO data + self.queue = self.manager.Queue() - # list of dicts with all predictions so far + + self.data = self.manager.list() - # Total bytes transferred so far + self.aggregated_bytes = self.manager.Value("d", 0.0) - # Hits indicating how often a dominant frequency was found + self.hits = self.manager.Value("d", 0.0) - # Start time window for ftio + self.start_time = self.manager.Value("d", 0.0) - # Number of prediction + self.count = self.manager.Value("i", 0) - # Bandwidth and time appended between predictions + self.b_app = self.manager.list() self.t_app = self.manager.list() - # For triggering cargo + self.sync_trigger = self.manager.Queue() - # saves when the dada ti received from gkfs + self.t_flush = self.manager.list() + + + self.adwin_frequencies = self.manager.list() + self.adwin_timestamps = self.manager.list() + self.adwin_total_samples = self.manager.Value("i", 0) + self.adwin_change_count = self.manager.Value("i", 0) + self.adwin_last_change_time = self.manager.Value("d", 0.0) + self.adwin_initialized = self.manager.Value("b", False) + + + self.adwin_lock = self.manager.Lock() + + + self.cusum_frequencies = self.manager.list() + self.cusum_timestamps = self.manager.list() + self.cusum_change_count = self.manager.Value("i", 0) + self.cusum_last_change_time = self.manager.Value("d", 0.0) + self.cusum_initialized = self.manager.Value("b", False) + + + self.cusum_lock = self.manager.Lock() + + + self.pagehinkley_frequencies = self.manager.list() + self.pagehinkley_timestamps = self.manager.list() + self.pagehinkley_change_count = self.manager.Value("i", 0) + self.pagehinkley_last_change_time = self.manager.Value("d", 0.0) + self.pagehinkley_initialized = self.manager.Value("b", False) + + + self.pagehinkley_state = self.manager.dict({ + 'cumulative_sum_pos': 0.0, + 'cumulative_sum_neg': 0.0, + 'reference_mean': 0.0, + 'sum_of_samples': 0.0, + 'sample_count': 0, + 'initialized': False + }) + + + self.pagehinkley_lock = self.manager.Lock() + + + self.detector_frequencies = self.manager.list() + self.detector_timestamps = self.manager.list() + self.detector_is_calibrated = self.manager.Value("b", False) + self.detector_reference_freq = self.manager.Value("d", 0.0) + self.detector_sensitivity = self.manager.Value("d", 0.0) + self.detector_threshold_factor = self.manager.Value("d", 0.0) + + + self.adwin_initialized = self.manager.Value("b", False) + self.cusum_initialized = self.manager.Value("b", False) + self.ph_initialized = self.manager.Value("b", False) def restart(self): """Restart the manager and reinitialize shared resources.""" diff --git a/ftio/prediction/tasks.py b/ftio/prediction/tasks.py index 73d74cb..c260ec0 100644 --- a/ftio/prediction/tasks.py +++ b/ftio/prediction/tasks.py @@ -70,23 +70,7 @@ def ftio_metric_task_save( show: bool = False, ) -> None: prediction = ftio_metric_task(metric, arrays, argv, ranks, show) - # freq = get_dominant(prediction) #just get a single dominant value if prediction: - # data.append( - # { - # "metric": f"{metric}", - # "dominant_freq": prediction.dominant_freq, - # "conf": prediction.conf, - # "amp": prediction.amp, - # "phi": prediction.phi, - # "t_start": prediction.t_start, - # "t_end": prediction.t_end, - # "total_bytes": prediction.total_bytes, - # "ranks": prediction.ranks, - # "freq": prediction.freq, - # "top_freq": prediction.top_freqs, - # } - # ) prediction.metric = metric data.append(prediction) else: diff --git a/gui/dashboard.py b/gui/dashboard.py new file mode 100644 index 0000000..50d280b --- /dev/null +++ b/gui/dashboard.py @@ -0,0 +1,500 @@ +""" +Main Dash application for FTIO prediction visualization +""" +import dash +from dash import dcc, html, Input, Output, State, callback_context +import plotly.graph_objects as go +import threading +import time +from datetime import datetime +import logging + +from gui.data_models import PredictionDataStore +from gui.socket_listener import SocketListener +from gui.visualizations import FrequencyTimelineViz, CosineWaveViz, DashboardViz + + +class FTIODashApp: + """Main Dash application for FTIO prediction visualization""" + + def __init__(self, host='localhost', port=8050, socket_port=9999): + self.app = dash.Dash(__name__) + self.host = host + self.port = port + self.socket_port = socket_port + + + self.data_store = PredictionDataStore() + self.selected_prediction_id = None + self.auto_update = True + self.last_update = time.time() + + self.socket_listener = SocketListener( + port=socket_port, + data_callback=self._on_data_received + ) + + + self._setup_layout() + self._setup_callbacks() + + + self.socket_thread = self.socket_listener.start_in_thread() + + print(f"FTIO Dashboard starting on http://{host}:{port}") + print(f"Socket listener on port {socket_port}") + + def _setup_layout(self): + """Setup the Dash app layout""" + + self.app.layout = html.Div([ + + html.Div([ + html.H1("FTIO Prediction Visualizer", + style={'textAlign': 'center', 'color': '#2c3e50', 'marginBottom': '20px'}), + html.Div([ + html.P(f"Socket listening on port {self.socket_port}", + style={'textAlign': 'center', 'color': '#7f8c8d', 'margin': '0'}), + html.P(id='connection-status', children="Waiting for predictions...", + style={'textAlign': 'center', 'color': '#e74c3c', 'margin': '0'}) + ]) + ], style={'marginBottom': '30px'}), + + + html.Div([ + html.Div([ + html.Label("View Mode:"), + dcc.Dropdown( + id='view-mode', + options=[ + {'label': 'Dashboard (Merged Cosine Wave)', 'value': 'dashboard'}, + {'label': 'Individual Prediction (Single Wave)', 'value': 'cosine'} + ], + value='dashboard', + style={'width': '250px'} + ) + ], style={'display': 'inline-block', 'marginRight': '20px'}), + + html.Div([ + html.Label("Select Prediction:"), + dcc.Dropdown( + id='prediction-selector', + options=[], + value=None, + placeholder="Select prediction for cosine view", + style={'width': '250px'} + ) + ], style={'display': 'inline-block', 'marginRight': '20px'}), + + html.Div([ + html.Button("Clear Data", id='clear-button', n_clicks=0, + style={'backgroundColor': '#e74c3c', 'color': 'white', + 'border': 'none', 'padding': '8px 16px', 'cursor': 'pointer'}), + html.Button("Auto Update", id='auto-update-button', n_clicks=0, + style={'backgroundColor': '#27ae60', 'color': 'white', + 'border': 'none', 'padding': '8px 16px', 'cursor': 'pointer', + 'marginLeft': '10px'}) + ], style={'display': 'inline-block'}) + + ], style={'textAlign': 'center', 'marginBottom': '20px', 'padding': '20px', + 'backgroundColor': '#ecf0f1', 'borderRadius': '5px'}), + + + html.Div(id='stats-bar', style={'marginBottom': '20px'}), + + + html.Div(id='main-viz', style={'height': '600px'}), + + + html.Div([ + html.Hr(), + html.H3("All Predictions", style={'color': '#2c3e50', 'marginTop': '30px'}), + html.Div( + id='recent-predictions-table', + style={ + 'maxHeight': '400px', + 'overflowY': 'auto', + 'border': '1px solid #ddd', + 'borderRadius': '8px', + 'padding': '10px', + 'backgroundColor': '#f9f9f9' + } + ) + ], style={'marginTop': '20px'}), + + + dcc.Interval( + id='interval-component', + interval=2000, # Update every 2 seconds + n_intervals=0 + ), + + + dcc.Store(id='data-store-trigger') + ]) + + def _setup_callbacks(self): + """Setup Dash callbacks""" + + @self.app.callback( + [Output('main-viz', 'children'), + Output('prediction-selector', 'options'), + Output('prediction-selector', 'value'), + Output('connection-status', 'children'), + Output('connection-status', 'style'), + Output('stats-bar', 'children')], + [Input('interval-component', 'n_intervals'), + Input('view-mode', 'value'), + Input('prediction-selector', 'value'), + Input('clear-button', 'n_clicks')], + [State('auto-update-button', 'n_clicks')] + ) + def update_visualization(n_intervals, view_mode, selected_pred_id, clear_clicks, auto_clicks): + + + ctx = callback_context + if ctx.triggered and ctx.triggered[0]['prop_id'] == 'clear-button.n_clicks': + if clear_clicks > 0: + self.data_store.clear_data() + self.selected_prediction_id = None + + + pred_options = [] + pred_value = selected_pred_id + + if self.data_store.predictions: + pred_options = [ + {'label': f"Prediction #{p.prediction_id} ({p.dominant_freq:.2f} Hz)", + 'value': p.prediction_id} + for p in self.data_store.predictions[-50:] # Last 50 predictions + ] + + + if pred_value is None and self.data_store.predictions: + pred_value = self.data_store.predictions[-1].prediction_id + + + if self.data_store.predictions: + status_text = f"Connected - {len(self.data_store.predictions)} predictions received" + status_style = {'textAlign': 'center', 'color': '#27ae60', 'margin': '0'} + else: + status_text = "Waiting for predictions..." + status_style = {'textAlign': 'center', 'color': '#e74c3c', 'margin': '0'} + + + stats_bar = self._create_stats_bar() + + + if view_mode == 'cosine' and pred_value is not None: + fig = CosineWaveViz.create_cosine_plot(self.data_store, pred_value) + viz_component = dcc.Graph(figure=fig, style={'height': '600px'}) + + elif view_mode == 'dashboard': + + fig = self._create_cosine_timeline_plot(self.data_store) + viz_component = dcc.Graph(figure=fig, style={'height': '600px'}) + + else: + viz_component = html.Div([ + html.H3("Select a view mode and prediction to visualize", + style={'textAlign': 'center', 'color': '#7f8c8d', 'marginTop': '200px'}) + ]) + + return viz_component, pred_options, pred_value, status_text, status_style, stats_bar + + @self.app.callback( + Output('recent-predictions-table', 'children'), + [Input('interval-component', 'n_intervals')] + ) + def update_recent_predictions_table(n_intervals): + """Update the recent predictions table""" + + if not self.data_store.predictions: + return html.P("No predictions yet", style={'textAlign': 'center', 'color': '#7f8c8d'}) + + + recent_preds = self.data_store.predictions + + + seen_ids = set() + unique_preds = [] + for pred in reversed(recent_preds): # Newest first + if pred.prediction_id not in seen_ids: + seen_ids.add(pred.prediction_id) + unique_preds.append(pred) + + + rows = [] + for i, pred in enumerate(unique_preds): + + row_style = { + 'backgroundColor': '#ffffff' if i % 2 == 0 else '#f8f9fa', + 'padding': '8px', + 'borderBottom': '1px solid #dee2e6' + } + + + if pred.dominant_freq == 0 or pred.dominant_freq is None: + + row = html.Tr([ + html.Td(f"#{pred.prediction_id}", style={'fontWeight': 'bold', 'color': '#999'}), + html.Td("—", style={'color': '#999', 'textAlign': 'center', 'fontStyle': 'italic'}), + html.Td("No pattern detected", style={'color': '#999', 'fontStyle': 'italic'}) + ], style=row_style) + else: + + change_point_text = "" + if pred.is_change_point and pred.change_point: + cp = pred.change_point + change_point_text = f"🔴 {cp.old_frequency:.2f} → {cp.new_frequency:.2f} Hz" + + row = html.Tr([ + html.Td(f"#{pred.prediction_id}", style={'fontWeight': 'bold', 'color': '#495057'}), + html.Td(f"{pred.dominant_freq:.2f} Hz", style={'color': '#007bff'}), + html.Td(change_point_text, style={'color': 'red' if pred.is_change_point else 'black'}) + ], style=row_style) + + rows.append(row) + + + table = html.Table([ + html.Thead([ + html.Tr([ + html.Th("ID", style={'backgroundColor': '#6c757d', 'color': 'white', 'padding': '12px'}), + html.Th("Frequency", style={'backgroundColor': '#6c757d', 'color': 'white', 'padding': '12px'}), + html.Th("Change Point", style={'backgroundColor': '#6c757d', 'color': 'white', 'padding': '12px'}) + ]) + ]), + html.Tbody(rows) + ], style={ + 'width': '100%', + 'borderCollapse': 'collapse', + 'marginTop': '10px', + 'boxShadow': '0 2px 4px rgba(0,0,0,0.1)', + 'borderRadius': '8px', + 'overflow': 'hidden' + }) + + return table + + def _create_stats_bar(self): + """Create statistics bar component""" + + if not self.data_store.predictions: + return html.Div() + + + total_preds = len(self.data_store.predictions) + total_changes = len(self.data_store.change_points) + latest_pred = self.data_store.predictions[-1] + + stats_items = [ + html.Div([ + html.H4(str(total_preds), style={'margin': '0', 'color': '#2c3e50'}), + html.P("Total Predictions", style={'margin': '0', 'fontSize': '12px', 'color': '#7f8c8d'}) + ], style={'textAlign': 'center', 'flex': '1'}), + + html.Div([ + html.H4(str(total_changes), style={'margin': '0', 'color': '#e74c3c'}), + html.P("Change Points", style={'margin': '0', 'fontSize': '12px', 'color': '#7f8c8d'}) + ], style={'textAlign': 'center', 'flex': '1'}), + + html.Div([ + html.H4(f"{latest_pred.dominant_freq:.2f} Hz", style={'margin': '0', 'color': '#27ae60'}), + html.P("Latest Frequency", style={'margin': '0', 'fontSize': '12px', 'color': '#7f8c8d'}) + ], style={'textAlign': 'center', 'flex': '1'}), + + html.Div([ + html.H4(f"{latest_pred.confidence:.1f}%", style={'margin': '0', 'color': '#3498db'}), + html.P("Latest Confidence", style={'margin': '0', 'fontSize': '12px', 'color': '#7f8c8d'}) + ], style={'textAlign': 'center', 'flex': '1'}) + ] + + return html.Div(stats_items, style={ + 'display': 'flex', + 'justifyContent': 'space-around', + 'backgroundColor': '#f8f9fa', + 'padding': '15px', + 'borderRadius': '5px', + 'border': '1px solid #dee2e6' + }) + + def _on_data_received(self, data): + """Callback when new data is received from socket""" + print(f"[DEBUG] Dashboard received data: {data}") + + if data['type'] == 'prediction': + prediction_data = data['data'] + self.data_store.add_prediction(prediction_data) + + print(f"[DEBUG] Added prediction #{prediction_data.prediction_id}: " + f"{prediction_data.dominant_freq:.2f} Hz " + f"({'CHANGE POINT' if prediction_data.is_change_point else 'normal'})") + + self.last_update = time.time() + else: + print(f"[DEBUG] Received non-prediction data: type={data.get('type')}") + + def _create_cosine_timeline_plot(self, data_store): + """Create single continuous cosine wave showing I/O pattern evolution""" + import plotly.graph_objs as go + import numpy as np + + if not data_store.predictions: + fig = go.Figure() + fig.add_annotation( + x=0.5, y=0.5, + text="Waiting for predictions...", + showarrow=False, + font=dict(size=16, color="gray") + ) + fig.update_layout( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + title="I/O Pattern Timeline (Continuous Cosine Wave)" + ) + return fig + + + last_3_predictions = data_store.get_latest_predictions(3) + + + sorted_predictions = sorted(last_3_predictions, key=lambda p: p.time_window[0]) + + + global_time = [] + global_cosine = [] + cumulative_time = 0.0 + segment_info = [] # For change point markers + + for pred in sorted_predictions: + t_start, t_end = pred.time_window + duration = max(0.001, t_end - t_start) # Ensure positive duration + freq = pred.dominant_freq + + + if freq == 0 or freq is None: + + num_points = 100 + t_local = np.linspace(0, duration, num_points) + t_global = cumulative_time + t_local + + + global_time.extend(t_global.tolist()) + global_cosine.extend([None] * num_points) # None creates a gap + else: + + num_points = max(100, int(freq * duration * 50)) # 50 points per cycle + + + t_local = np.linspace(0, duration, num_points) + + + cosine_segment = np.cos(2 * np.pi * freq * t_local) + + + t_global = cumulative_time + t_local + + + global_time.extend(t_global.tolist()) + global_cosine.extend(cosine_segment.tolist()) + + + segment_start = cumulative_time + segment_end = cumulative_time + duration + segment_info.append((segment_start, segment_end, pred)) + + + cumulative_time += duration + + fig = go.Figure() + + + fig.add_trace(go.Scatter( + x=global_time, + y=global_cosine, + mode='lines', + name='I/O Pattern Evolution', + line=dict(color='#1f77b4', width=2), + connectgaps=False, # DON'T connect across None values - creates visible gaps + hovertemplate="I/O Pattern
" + + "Time: %{x:.3f} s
" + + "Amplitude: %{y:.3f}" + )) + + + for seg_start, seg_end, pred in segment_info: + if pred.dominant_freq == 0 or pred.dominant_freq is None: + fig.add_vrect( + x0=seg_start, + x1=seg_end, + fillcolor="gray", + opacity=0.15, + layer="below", + line_width=0, + annotation_text="No pattern", + annotation_position="top" + ) + + + for seg_start, seg_end, pred in segment_info: + if pred.is_change_point and pred.change_point: + marker_time = seg_start # Mark at the START of the changed segment + + + fig.add_vline( + x=marker_time, + line_dash="solid", + line_color="red", + line_width=4, + opacity=0.8 + ) + + + fig.add_annotation( + x=marker_time, + y=1.1, + text=f"🔴 CHANGE
{pred.change_point.old_frequency:.2f}→{pred.change_point.new_frequency:.2f} Hz", + showarrow=True, + arrowhead=2, + arrowsize=1, + arrowwidth=2, + arrowcolor="red", + ax=0, + ay=-40, + font=dict(size=12, color="red", family="Arial Black"), + bgcolor="rgba(255,255,255,0.9)", + bordercolor="red", + borderwidth=2 + ) + + + fig.update_layout( + title="I/O Pattern Timeline (Continuous Evolution)", + xaxis_title="Time (s) - Concatenated Segments", + yaxis_title="I/O Pattern Amplitude", + showlegend=True, + height=600, + hovermode='x unified', + yaxis=dict(range=[-1.2, 1.2]), + uirevision='constant' # Prevents full page refresh - keeps zoom/pan state + ) + + return fig + + def run(self, debug=False): + """Run the Dash application""" + try: + self.app.run(host=self.host, port=self.port, debug=debug) + except KeyboardInterrupt: + print("\nShutting down FTIO Dashboard...") + self.socket_listener.stop_server() + except Exception as e: + print(f"Error running dashboard: {e}") + self.socket_listener.stop_server() + + +if __name__ == "__main__": + + dashboard = FTIODashApp(host='localhost', port=8050, socket_port=9999) + dashboard.run(debug=False) diff --git a/gui/data_models.py b/gui/data_models.py new file mode 100644 index 0000000..d2e1a30 --- /dev/null +++ b/gui/data_models.py @@ -0,0 +1,128 @@ +""" +Data models for storing and managing prediction data from FTIO +""" +from dataclasses import dataclass +from typing import List, Optional, Dict, Any +import numpy as np +from datetime import datetime + + +@dataclass +class FrequencyCandidate: + """Individual frequency candidate with confidence""" + frequency: float + confidence: float + + +@dataclass +class ChangePoint: + """ADWIN detected change point information""" + prediction_id: int + timestamp: float + old_frequency: float + new_frequency: float + frequency_change_percent: float + sample_number: int + cut_position: int + total_samples: int + + +@dataclass +class PredictionData: + """Single prediction instance data""" + prediction_id: int + timestamp: str + dominant_freq: float + dominant_period: float + confidence: float + candidates: List[FrequencyCandidate] + time_window: tuple # (start, end) in seconds + total_bytes: str + bytes_transferred: str + current_hits: int + periodic_probability: float + frequency_range: tuple # (min_freq, max_freq) + period_range: tuple # (min_period, max_period) + is_change_point: bool = False + change_point: Optional[ChangePoint] = None + sample_number: Optional[int] = None + + +class PredictionDataStore: + """Manages all prediction data and provides query methods""" + + def __init__(self): + self.predictions: List[PredictionData] = [] + self.change_points: List[ChangePoint] = [] + self.current_prediction_id = -1 + + def add_prediction(self, prediction: PredictionData): + """Add a new prediction to the store""" + self.predictions.append(prediction) + if prediction.is_change_point and prediction.change_point: + self.change_points.append(prediction.change_point) + + def get_prediction_by_id(self, pred_id: int) -> Optional[PredictionData]: + """Get prediction by ID""" + for pred in self.predictions: + if pred.prediction_id == pred_id: + return pred + return None + + def get_frequency_timeline(self) -> tuple: + """Get data for frequency timeline plot""" + if not self.predictions: + return [], [], [] + + pred_ids = [p.prediction_id for p in self.predictions] + frequencies = [p.dominant_freq for p in self.predictions] + confidences = [p.confidence for p in self.predictions] + + return pred_ids, frequencies, confidences + + def get_candidate_frequencies(self) -> Dict[int, List[FrequencyCandidate]]: + """Get all candidate frequencies by prediction ID""" + candidates_dict = {} + for pred in self.predictions: + if pred.candidates: + candidates_dict[pred.prediction_id] = pred.candidates + return candidates_dict + + def get_change_points_for_timeline(self) -> tuple: + """Get change point data for timeline visualization""" + if not self.change_points: + return [], [], [] + + pred_ids = [cp.prediction_id for cp in self.change_points] + frequencies = [cp.new_frequency for cp in self.change_points] + labels = [f"{cp.old_frequency:.2f} → {cp.new_frequency:.2f} Hz" + for cp in self.change_points] + + return pred_ids, frequencies, labels + + def generate_cosine_wave(self, prediction_id: int, num_points: int = 1000) -> tuple: + """Generate cosine wave data for a specific prediction - DOMINANT FREQUENCY ONLY""" + pred = self.get_prediction_by_id(prediction_id) + if not pred: + return [], [], [] + + start_time, end_time = pred.time_window + duration = end_time - start_time + + t_relative = np.linspace(0, duration, num_points) + + primary_wave = np.cos(2 * np.pi * pred.dominant_freq * t_relative) + + candidate_waves = [] + + return t_relative, primary_wave, candidate_waves + + def get_latest_predictions(self, n: int = 50) -> List[PredictionData]: + """Get the latest N predictions""" + return self.predictions[-n:] if len(self.predictions) >= n else self.predictions + + def clear_data(self): + """Clear all stored data""" + self.predictions.clear() + self.change_points.clear() + self.current_prediction_id = -1 diff --git a/gui/requirements.txt b/gui/requirements.txt new file mode 100644 index 0000000..620d95a --- /dev/null +++ b/gui/requirements.txt @@ -0,0 +1,5 @@ +# GUI Dependencies for FTIO Dashboard +dash>=2.14.0 +plotly>=5.15.0 +pandas>=1.5.0 +numpy>=1.24.0 diff --git a/gui/run_dashboard.py b/gui/run_dashboard.py new file mode 100755 index 0000000..dc5b4f7 --- /dev/null +++ b/gui/run_dashboard.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +""" +Launcher script for FTIO GUI Dashboard +""" +import sys +import os +import argparse + +# Add the parent directory to Python path so we can import from ftio +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from gui.dashboard import FTIODashApp + + +def main(): + parser = argparse.ArgumentParser(description='FTIO Prediction GUI Dashboard') + parser.add_argument('--host', default='localhost', help='Dashboard host (default: localhost)') + parser.add_argument('--port', type=int, default=8050, help='Dashboard port (default: 8050)') + parser.add_argument('--socket-port', type=int, default=9999, help='Socket listener port (default: 9999)') + parser.add_argument('--debug', action='store_true', help='Run in debug mode') + + args = parser.parse_args() + + print("=" * 60) + print("FTIO Prediction GUI Dashboard") + print("=" * 60) + print(f"Dashboard URL: http://{args.host}:{args.port}") + print(f"Socket listener: {args.socket_port}") + print("") + print("Instructions:") + print("1. Start this dashboard") + print("2. Run your FTIO predictor with socket logging enabled") + print("3. Watch real-time predictions and change points in the browser") + print("") + print("Press Ctrl+C to stop") + print("=" * 60) + + try: + dashboard = FTIODashApp( + host=args.host, + port=args.port, + socket_port=args.socket_port + ) + dashboard.run(debug=args.debug) + except KeyboardInterrupt: + print("\nDashboard stopped by user") + except Exception as e: + print(f"Error: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/gui/socket_listener.py b/gui/socket_listener.py new file mode 100644 index 0000000..ad0b0c2 --- /dev/null +++ b/gui/socket_listener.py @@ -0,0 +1,377 @@ +""" +Socket listener for receiving FTIO prediction logs and parsing them into structured data +""" +import socket +import json +import threading +import re +import logging +from typing import Optional, Callable +from gui.data_models import PredictionData, ChangePoint, FrequencyCandidate, PredictionDataStore + + +class LogParser: + """Parses FTIO prediction log messages into structured data""" + + def __init__(self): + self.patterns = { + 'prediction_start': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Started'), + 'prediction_end': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Ended'), + 'dominant_freq': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Dominant freq\s+([\d.]+)\s+Hz\s+\(([\d.]+)\s+sec\)'), + 'freq_candidates': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+\d+\)\s+([\d.]+)\s+Hz\s+--\s+conf\s+([\d.]+)'), + 'time_window': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Time window\s+([\d.]+)\s+sec\s+\(\[([\d.]+),([\d.]+)\]\s+sec\)'), + 'total_bytes': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Total bytes\s+(.+)'), + 'bytes_transferred': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Bytes transferred since last time\s+(.+)'), + 'current_hits': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Current hits\s+([\d.]+)'), + 'periodic_prob': re.compile(r'\[PREDICTOR\]\s+P\(periodic\)\s+=\s+([\d.]+)%'), + 'freq_range': re.compile(r'\[PREDICTOR\]\s+P\(\[([\d.]+),([\d.]+)\]\s+Hz\)\s+=\s+([\d.]+)%'), + 'period_range': re.compile(r'\[PREDICTOR\]\s+\|->\s+\[([\d.]+),([\d.]+)\]\s+Hz\s+=\s+\[([\d.]+),([\d.]+)\]\s+sec'), + 'change_point': re.compile(r'\[ADWIN\]\s+Change detected at cut\s+(\d+)/(\d+)!'), + 'exact_change_point': re.compile(r'EXACT CHANGE POINT detected at\s+([\d.]+)\s+seconds!'), + 'frequency_shift': re.compile(r'\[ADWIN\]\s+Frequency shift:\s+([\d.]+)\s+→\s+([\d.]+)\s+Hz\s+\(([\d.]+)%\)'), + 'sample_number': re.compile(r'\[ADWIN\]\s+Sample\s+#(\d+):\s+freq=([\d.]+)\s+Hz'), + 'ph_change': re.compile(r'\[Page-Hinkley\]\s+PAGE-HINKLEY CHANGE DETECTED!\s+\w+\s+([\d.]+)Hz\s+→\s+([\d.]+)Hz.*?at sample\s+(\d+),\s+time=([\d.]+)s'), + 'stph_change': re.compile(r'\[STPH\]\s+CHANGE DETECTED!\s+([\d.]+)Hz\s+→\s+([\d.]+)Hz\s+\(([\d.]+)%'), + 'cusum_change': re.compile(r'\[AV-CUSUM\]\s+CHANGE DETECTED!\s+([\d.]+)Hz\s+→\s+([\d.]+)Hz\s+\(([\d.]+)%'), + 'cusum_change_alt': re.compile(r'\[CUSUM\]\s+CHANGE DETECTED!\s+([\d.]+)Hz\s+→\s+([\d.]+)Hz.*?time=([\d.]+)s'), + } + + self.current_prediction = None + self.current_change_point = None + self.candidates_buffer = [] + + def parse_log_message(self, message: str) -> Optional[dict]: + + match = self.patterns['prediction_start'].search(message) + if match: + pred_id = int(match.group(1)) + self.current_prediction = { + 'prediction_id': pred_id, + 'candidates': [], + 'is_change_point': False, + 'change_point': None, + 'timestamp': '', + 'sample_number': None + } + self.candidates_buffer = [] + return None + + if not self.current_prediction: + return None + + pred_id = self.current_prediction['prediction_id'] + + match = self.patterns['dominant_freq'].search(message) + if match and int(match.group(1)) == pred_id: + self.current_prediction['dominant_freq'] = float(match.group(2)) + self.current_prediction['dominant_period'] = float(match.group(3)) + + match = self.patterns['freq_candidates'].search(message) + if match and int(match.group(1)) == pred_id: + freq = float(match.group(2)) + conf = float(match.group(3)) + self.candidates_buffer.append(FrequencyCandidate(freq, conf)) + + match = self.patterns['time_window'].search(message) + if match and int(match.group(1)) == pred_id: + self.current_prediction['time_window'] = (float(match.group(3)), float(match.group(4))) + + match = self.patterns['total_bytes'].search(message) + if match and int(match.group(1)) == pred_id: + self.current_prediction['total_bytes'] = match.group(2).strip() + + match = self.patterns['bytes_transferred'].search(message) + if match and int(match.group(1)) == pred_id: + self.current_prediction['bytes_transferred'] = match.group(2).strip() + + match = self.patterns['current_hits'].search(message) + if match and int(match.group(1)) == pred_id: + self.current_prediction['current_hits'] = int(float(match.group(2))) + + match = self.patterns['periodic_prob'].search(message) + if match: + self.current_prediction['periodic_probability'] = float(match.group(1)) + + match = self.patterns['freq_range'].search(message) + if match: + self.current_prediction['frequency_range'] = (float(match.group(1)), float(match.group(2))) + self.current_prediction['confidence'] = float(match.group(3)) + + match = self.patterns['period_range'].search(message) + if match: + self.current_prediction['period_range'] = (float(match.group(3)), float(match.group(4))) + + match = self.patterns['change_point'].search(message) + if match: + self.current_change_point = { + 'cut_position': int(match.group(1)), + 'total_samples': int(match.group(2)), + 'prediction_id': pred_id + } + self.current_prediction['is_change_point'] = True + + match = self.patterns['exact_change_point'].search(message) + if match and self.current_change_point: + self.current_change_point['timestamp'] = float(match.group(1)) + + match = self.patterns['frequency_shift'].search(message) + if match and self.current_change_point: + self.current_change_point['old_frequency'] = float(match.group(1)) + self.current_change_point['new_frequency'] = float(match.group(2)) + self.current_change_point['frequency_change_percent'] = float(match.group(3)) + + match = self.patterns['sample_number'].search(message) + if match: + self.current_prediction['sample_number'] = int(match.group(1)) + + match = self.patterns['ph_change'].search(message) + if match: + self.current_change_point = { + 'old_frequency': float(match.group(1)), + 'new_frequency': float(match.group(2)), + 'cut_position': int(match.group(3)), + 'total_samples': int(match.group(3)), + 'timestamp': float(match.group(4)), + 'frequency_change_percent': abs((float(match.group(2)) - float(match.group(1))) / float(match.group(1)) * 100) if float(match.group(1)) > 0 else 0, + 'prediction_id': pred_id + } + self.current_prediction['is_change_point'] = True + + match = self.patterns['stph_change'].search(message) + if match: + if not self.current_change_point: + self.current_change_point = {'prediction_id': pred_id} + self.current_change_point['old_frequency'] = float(match.group(1)) + self.current_change_point['new_frequency'] = float(match.group(2)) + self.current_change_point['frequency_change_percent'] = float(match.group(3)) + self.current_prediction['is_change_point'] = True + + match = self.patterns['cusum_change'].search(message) + if match: + if not self.current_change_point: + self.current_change_point = {'prediction_id': pred_id} + self.current_change_point['old_frequency'] = float(match.group(1)) + self.current_change_point['new_frequency'] = float(match.group(2)) + self.current_change_point['frequency_change_percent'] = float(match.group(3)) + self.current_prediction['is_change_point'] = True + + match = self.patterns['cusum_change_alt'].search(message) + if match: + if not self.current_change_point: + self.current_change_point = {'prediction_id': pred_id} + self.current_change_point['old_frequency'] = float(match.group(1)) + self.current_change_point['new_frequency'] = float(match.group(2)) + self.current_change_point['timestamp'] = float(match.group(3)) + self.current_change_point['frequency_change_percent'] = abs((float(match.group(2)) - float(match.group(1))) / float(match.group(1)) * 100) if float(match.group(1)) > 0 else 0 + self.current_prediction['is_change_point'] = True + + # Check for prediction end + match = self.patterns['prediction_end'].search(message) + if match and int(match.group(1)) == pred_id: + self.current_prediction['candidates'] = self.candidates_buffer.copy() + + if self.current_prediction['is_change_point'] and self.current_change_point: + change_point = ChangePoint( + prediction_id=pred_id, + timestamp=self.current_change_point.get('timestamp', 0.0), + old_frequency=self.current_change_point.get('old_frequency', 0.0), + new_frequency=self.current_change_point.get('new_frequency', 0.0), + frequency_change_percent=self.current_change_point.get('frequency_change_percent', 0.0), + sample_number=self.current_prediction.get('sample_number', 0), + cut_position=self.current_change_point.get('cut_position', 0), + total_samples=self.current_change_point.get('total_samples', 0) + ) + self.current_prediction['change_point'] = change_point + + prediction_data = PredictionData( + prediction_id=pred_id, + timestamp=self.current_prediction.get('timestamp', ''), + dominant_freq=self.current_prediction.get('dominant_freq', 0.0), + dominant_period=self.current_prediction.get('dominant_period', 0.0), + confidence=self.current_prediction.get('confidence', 0.0), + candidates=self.current_prediction['candidates'], + time_window=self.current_prediction.get('time_window', (0.0, 0.0)), + total_bytes=self.current_prediction.get('total_bytes', ''), + bytes_transferred=self.current_prediction.get('bytes_transferred', ''), + current_hits=self.current_prediction.get('current_hits', 0), + periodic_probability=self.current_prediction.get('periodic_probability', 0.0), + frequency_range=self.current_prediction.get('frequency_range', (0.0, 0.0)), + period_range=self.current_prediction.get('period_range', (0.0, 0.0)), + is_change_point=self.current_prediction['is_change_point'], + change_point=self.current_prediction['change_point'], + sample_number=self.current_prediction.get('sample_number') + ) + + self.current_prediction = None + self.current_change_point = None + self.candidates_buffer = [] + + return {'type': 'prediction', 'data': prediction_data} + + return None + + +class SocketListener: + """Listens for socket connections and processes FTIO prediction logs""" + + def __init__(self, host='localhost', port=9999, data_callback: Optional[Callable] = None): + self.host = host + self.port = port + self.data_callback = data_callback + self.parser = LogParser() + self.running = False + self.server_socket = None + self.client_connections = [] + + def start_server(self): + try: + self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + + print(f"Attempting to bind to {self.host}:{self.port}") + self.server_socket.bind((self.host, self.port)) + self.server_socket.listen(5) + self.running = True + + print(f" Socket server successfully listening on {self.host}:{self.port}") + + while self.running: + try: + client_socket, address = self.server_socket.accept() + print(f" Client connected from {address}") + + client_thread = threading.Thread( + target=self._handle_client, + args=(client_socket, address) + ) + client_thread.daemon = True + client_thread.start() + + except socket.error as e: + if self.running: + print(f"Error accepting client connection: {e}") + break + except KeyboardInterrupt: + print(" Socket server interrupted") + break + + except OSError as e: + if e.errno == 98: # Address already in use + print(f"Port {self.port} is already in use! Please use a different port or kill the process using it.") + else: + print(f"OS Error starting socket server: {e}") + self.running = False + except Exception as e: + print(f"Unexpected error starting socket server: {e}") + import traceback + traceback.print_exc() + self.running = False + finally: + self.stop_server() + + def _handle_client(self, client_socket, address): + try: + while self.running: + try: + data = client_socket.recv(4096).decode('utf-8') + if not data: + break + + try: + message_data = json.loads(data) + + if message_data.get('type') == 'prediction' and 'data' in message_data: + print(f"[DEBUG] Direct prediction data received: #{message_data['data']['prediction_id']}") + + pred_data = message_data['data'] + + candidates = [] + for cand in pred_data.get('candidates', []): + candidates.append(FrequencyCandidate( + frequency=cand['frequency'], + confidence=cand['confidence'] + )) + + change_point = None + if pred_data.get('is_change_point') and pred_data.get('change_point'): + cp_data = pred_data['change_point'] + change_point = ChangePoint( + prediction_id=cp_data['prediction_id'], + timestamp=cp_data['timestamp'], + old_frequency=cp_data['old_frequency'], + new_frequency=cp_data['new_frequency'], + frequency_change_percent=cp_data['frequency_change_percent'], + sample_number=cp_data['sample_number'], + cut_position=cp_data['cut_position'], + total_samples=cp_data['total_samples'] + ) + + prediction_data = PredictionData( + prediction_id=pred_data['prediction_id'], + timestamp=pred_data['timestamp'], + dominant_freq=pred_data['dominant_freq'], + dominant_period=pred_data['dominant_period'], + confidence=pred_data['confidence'], + candidates=candidates, + time_window=tuple(pred_data['time_window']), + total_bytes=pred_data['total_bytes'], + bytes_transferred=pred_data['bytes_transferred'], + current_hits=pred_data['current_hits'], + periodic_probability=pred_data['periodic_probability'], + frequency_range=tuple(pred_data['frequency_range']), + period_range=tuple(pred_data['period_range']), + is_change_point=pred_data['is_change_point'], + change_point=change_point, + sample_number=pred_data.get('sample_number') + ) + + if self.data_callback: + self.data_callback({'type': 'prediction', 'data': prediction_data}) + + else: + log_message = message_data.get('message', '') + + parsed_data = self.parser.parse_log_message(log_message) + + if parsed_data and self.data_callback: + self.data_callback(parsed_data) + + except json.JSONDecodeError: + # Handle plain text messages + parsed_data = self.parser.parse_log_message(data.strip()) + if parsed_data and self.data_callback: + self.data_callback(parsed_data) + + except socket.error: + break + + except Exception as e: + logging.error(f"Error handling client {address}: {e}") + finally: + try: + client_socket.close() + print(f"Client {address} disconnected") + except: + pass + + def stop_server(self): + self.running = False + if self.server_socket: + try: + self.server_socket.close() + except: + pass + + for client_socket in self.client_connections: + try: + client_socket.close() + except: + pass + self.client_connections.clear() + print("Socket server stopped") + + def start_in_thread(self): + server_thread = threading.Thread(target=self.start_server) + server_thread.daemon = True + server_thread.start() + return server_thread diff --git a/gui/visualizations.py b/gui/visualizations.py new file mode 100644 index 0000000..d713899 --- /dev/null +++ b/gui/visualizations.py @@ -0,0 +1,314 @@ +""" +Plotly/Dash visualization components for FTIO prediction data +""" +import plotly.graph_objects as go +import plotly.express as px +from plotly.subplots import make_subplots +import numpy as np +from typing import List, Tuple, Dict +from gui.data_models import PredictionData, ChangePoint, PredictionDataStore + + +class FrequencyTimelineViz: + """Creates frequency timeline visualization""" + + @staticmethod + def create_timeline_plot(data_store: PredictionDataStore, title="FTIO Frequency Timeline"): + """Create main frequency timeline plot""" + + pred_ids, frequencies, confidences = data_store.get_frequency_timeline() + + if not pred_ids: + fig = go.Figure() + fig.add_annotation( + text="No prediction data available", + x=0.5, y=0.5, + xref="paper", yref="paper", + showarrow=False, + font=dict(size=16, color="gray") + ) + fig.update_layout( + title=title, + xaxis_title="Prediction Index", + yaxis_title="Frequency (Hz)", + height=500 + ) + return fig + + fig = go.Figure() + + fig.add_trace(go.Scatter( + x=pred_ids, + y=frequencies, + mode='lines+markers', + name='Dominant Frequency', + line=dict(color='blue', width=2), + marker=dict( + size=8, + opacity=[conf/100.0 for conf in confidences], + color='blue', + line=dict(width=1, color='darkblue') + ), + hovertemplate="Prediction #%{x}
" + + "Frequency: %{y:.2f} Hz
" + + "Confidence: %{customdata:.1f}%", + customdata=confidences + )) + + candidates_dict = data_store.get_candidate_frequencies() + for pred_id, candidates in candidates_dict.items(): + for candidate in candidates: + if candidate.frequency != data_store.get_prediction_by_id(pred_id).dominant_freq: + fig.add_trace(go.Scatter( + x=[pred_id], + y=[candidate.frequency], + mode='markers', + name=f'Candidate (conf: {candidate.confidence:.2f})', + marker=dict( + size=6, + opacity=candidate.confidence, + color='orange', + symbol='diamond' + ), + showlegend=False, + hovertemplate=f"Candidate Frequency
" + + f"Frequency: {candidate.frequency:.2f} Hz
" + + f"Confidence: {candidate.confidence:.2f}" + )) + + cp_pred_ids, cp_frequencies, cp_labels = data_store.get_change_points_for_timeline() + + if cp_pred_ids: + fig.add_trace(go.Scatter( + x=cp_pred_ids, + y=cp_frequencies, + mode='markers', + name='Change Points', + marker=dict( + size=12, + color='red', + symbol='diamond', + line=dict(width=2, color='darkred') + ), + hovertemplate="Change Point
" + + "Prediction #%{x}
" + + "%{customdata}", + customdata=cp_labels + )) + + for pred_id, freq, label in zip(cp_pred_ids, cp_frequencies, cp_labels): + fig.add_vline( + x=pred_id, + line_dash="dash", + line_color="red", + opacity=0.7, + annotation_text=label, + annotation_position="top" + ) + + fig.update_layout( + title=dict( + text=title, + font=dict(size=18, color='darkblue') + ), + xaxis=dict( + title="Prediction Index", + showgrid=True, + gridcolor='lightgray', + tickmode='linear' + ), + yaxis=dict( + title="Frequency (Hz)", + showgrid=True, + gridcolor='lightgray' + ), + hovermode='closest', + height=500, + margin=dict(l=60, r=60, t=80, b=60), + plot_bgcolor='white', + showlegend=True, + legend=dict( + x=0.02, + y=0.98, + bgcolor='rgba(255, 255, 255, 0.8)', + bordercolor='gray', + borderwidth=1 + ) + ) + + return fig + + +class CosineWaveViz: + """Creates cosine wave visualization for individual predictions""" + + @staticmethod + def create_cosine_plot(data_store: PredictionDataStore, prediction_id: int, + title=None, num_points=1000): + """Create cosine wave plot for a specific prediction""" + + prediction = data_store.get_prediction_by_id(prediction_id) + if not prediction: + fig = go.Figure() + fig.add_annotation( + text=f"Prediction #{prediction_id} not found", + x=0.5, y=0.5, + xref="paper", yref="paper", + showarrow=False, + font=dict(size=16, color="gray") + ) + fig.update_layout( + title=f"Cosine Wave - Prediction #{prediction_id}", + xaxis_title="Time (s)", + yaxis_title="Amplitude", + height=400 + ) + return fig + + t, primary_wave, candidate_waves = data_store.generate_cosine_wave( + prediction_id, num_points + ) + + if title is None: + title = (f"Cosine Wave - Prediction #{prediction_id} " + f"(f = {prediction.dominant_freq:.2f} Hz)") + + fig = go.Figure() + + fig.add_trace(go.Scatter( + x=t, + y=primary_wave, + mode='lines', + name=f'I/O Pattern: {prediction.dominant_freq:.2f} Hz', + line=dict(color='#1f77b4', width=3), + hovertemplate="I/O Pattern
" + + "Time: %{x:.3f} s
" + + "Amplitude: %{y:.3f}
" + + f"Frequency: {prediction.dominant_freq:.2f} Hz" + )) + + if prediction.is_change_point and prediction.change_point: + cp_time = prediction.change_point.timestamp + start_time, end_time = prediction.time_window + if start_time <= cp_time <= end_time: + cp_relative = cp_time - start_time + fig.add_vline( + x=cp_relative, + line_dash="dash", + line_color="red", + line_width=3, + opacity=0.8, + annotation_text=(f"Change Point
" + f"{prediction.change_point.old_frequency:.2f} → " + f"{prediction.change_point.new_frequency:.2f} Hz"), + annotation_position="top" + ) + + start_time, end_time = prediction.time_window + duration = end_time - start_time + fig.update_layout( + title=dict( + text=title, + font=dict(size=16, color='darkblue') + ), + xaxis=dict( + title=f"Time (s) - Duration: {duration:.2f}s", + range=[0, duration], + showgrid=True, + gridcolor='lightgray' + ), + yaxis=dict( + title="Amplitude", + range=[-1.2, 1.2], + showgrid=True, + gridcolor='lightgray' + ), + height=400, + margin=dict(l=60, r=60, t=60, b=60), + plot_bgcolor='white', + showlegend=True, + legend=dict( + x=0.02, + y=0.98, + bgcolor='rgba(255, 255, 255, 0.8)', + bordercolor='gray', + borderwidth=1 + ) + ) + + return fig + + +class DashboardViz: + """Creates comprehensive dashboard visualization""" + + @staticmethod + def create_dashboard(data_store: PredictionDataStore, selected_prediction_id=None): + """Create comprehensive dashboard with multiple views""" + + fig = make_subplots( + rows=2, cols=2, + subplot_titles=( + "Frequency Timeline", + "Latest Predictions", + "Cosine Wave View", + "Statistics" + ), + specs=[ + [{"colspan": 2}, None], + [{}, {}] + ], + row_heights=[0.6, 0.4], + vertical_spacing=0.1 + ) + + timeline_fig = FrequencyTimelineViz.create_timeline_plot(data_store) + for trace in timeline_fig.data: + fig.add_trace(trace, row=1, col=1) + + if selected_prediction_id is not None: + cosine_fig = CosineWaveViz.create_cosine_plot(data_store, selected_prediction_id) + for trace in cosine_fig.data: + fig.add_trace(trace, row=2, col=1) + + stats = DashboardViz._calculate_stats(data_store) + fig.add_trace(go.Bar( + x=list(stats.keys()), + y=list(stats.values()), + name="Statistics", + marker_color='lightblue' + ), row=2, col=2) + + fig.update_layout( + height=800, + title_text="FTIO Prediction Dashboard", + showlegend=True + ) + + fig.update_xaxes(title_text="Prediction Index", row=1, col=1) + fig.update_yaxes(title_text="Frequency (Hz)", row=1, col=1) + fig.update_xaxes(title_text="Time (s)", row=2, col=1) + fig.update_yaxes(title_text="Amplitude", row=2, col=1) + fig.update_xaxes(title_text="Metric", row=2, col=2) + fig.update_yaxes(title_text="Value", row=2, col=2) + + return fig + + @staticmethod + def _calculate_stats(data_store: PredictionDataStore) -> Dict[str, float]: + """Calculate basic statistics from prediction data""" + if not data_store.predictions: + return {} + + frequencies = [p.dominant_freq for p in data_store.predictions] + confidences = [p.confidence for p in data_store.predictions] + + stats = { + 'Total Predictions': len(data_store.predictions), + 'Change Points': len(data_store.change_points), + 'Avg Frequency': np.mean(frequencies), + 'Avg Confidence': np.mean(confidences), + 'Freq Std Dev': np.std(frequencies) + } + + return stats