diff --git a/ChangeLog.md b/ChangeLog.md
deleted file mode 100644
index f0cf6fa..0000000
--- a/ChangeLog.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# FTIO ChangeLog
-
-## Version 0.0.2
-- Set the default plot unit to Bytes or Bytes/s rather than MB or MB/s
-- Adjusted the plot script to automatically detect the best unit for the y-axis and scale the values accordingly
-
-
-## Version 0.0.1
-
-- Speed-up with Msgpack
-- Added autocorrelation to FTIO
-- Added 4 new outlier detection methods
\ No newline at end of file
diff --git a/README.md b/README.md
index a668ea9..8fb4a50 100644
--- a/README.md
+++ b/README.md
@@ -348,6 +348,7 @@ Distributed under the BSD 3-Clause License. See [LICENCE](./LICENSE) for more in
Authors:
- Ahmad Tarraf
+- Amine Aherbil
This work is a result of cooperation between the Technical University of Darmstadt and INRIA in the scope of
the [EuroHPC ADMIRE project](https://admire-eurohpc.eu/).
diff --git a/ftio/freq/_dft.py b/ftio/freq/_dft.py
index 30f39be..ab9ccff 100644
--- a/ftio/freq/_dft.py
+++ b/ftio/freq/_dft.py
@@ -79,6 +79,8 @@ def dft_fast(b: np.ndarray) -> np.ndarray:
- np.ndarray, DFT of the input signal.
"""
N = len(b)
+ if N == 0:
+ return np.array([])
X = np.repeat(complex(0, 0), N) # np.zeros(N)
for k in range(0, N):
for n in range(0, N):
@@ -98,6 +100,8 @@ def numpy_dft(b: np.ndarray) -> np.ndarray:
Returns:
- np.ndarray, DFT of the input signal.
"""
+ if len(b) == 0:
+ return np.array([])
return np.fft.fft(b)
diff --git a/ftio/freq/_dft_workflow.py b/ftio/freq/_dft_workflow.py
index 381e44f..d52d0e6 100644
--- a/ftio/freq/_dft_workflow.py
+++ b/ftio/freq/_dft_workflow.py
@@ -46,6 +46,10 @@ def ftio_dft(
- analysis_figures (AnalysisFigures): Data and plot figures.
- share (SharedSignalData): Contains shared information, including sampled bandwidth and total bytes.
"""
+ # Suppress numpy warnings for empty array operations
+ import warnings
+ warnings.filterwarnings('ignore', category=RuntimeWarning, module='numpy')
+
#! Default values for variables
share = SharedSignalData()
prediction = Prediction(args.transformation)
@@ -75,42 +79,66 @@ def ftio_dft(
n = len(b_sampled)
frequencies = args.freq * np.arange(0, n) / n
X = dft(b_sampled)
- X = X * np.exp(
- -2j * np.pi * frequencies * time_stamps[0]
- ) # Correct phase offset due to start time t0
+
+ # Safety check for empty time_stamps array
+ if len(time_stamps) > 0:
+ X = X * np.exp(
+ -2j * np.pi * frequencies * time_stamps[0]
+ ) # Correct phase offset due to start time t0
+ # If time_stamps is empty, skip phase correction
+
amp = abs(X)
phi = np.arctan2(X.imag, X.real)
conf = np.zeros(len(amp))
# welch(bandwidth,freq)
#! Find the dominant frequency
- (dominant_index, conf[1 : int(n / 2) + 1], outlier_text) = outlier_detection(
- amp, frequencies, args
- )
+ # Safety check for empty arrays
+ if n > 0:
+ (dominant_index, conf[1 : int(n / 2) + 1], outlier_text) = outlier_detection(
+ amp, frequencies, args
+ )
- # Ignore DC offset
- conf[0] = np.inf
- if n % 2 == 0:
- conf[int(n / 2) + 1 :] = np.flip(conf[1 : int(n / 2)])
+ # Ignore DC offset
+ conf[0] = np.inf
+ if n % 2 == 0:
+ conf[int(n / 2) + 1 :] = np.flip(conf[1 : int(n / 2)])
+ else:
+ conf[int(n / 2) + 1 :] = np.flip(conf[1 : int(n / 2) + 1])
else:
- conf[int(n / 2) + 1 :] = np.flip(conf[1 : int(n / 2) + 1])
+ # Handle empty data case
+ dominant_index = np.array([])
+ outlier_text = "No data available for outlier detection"
#! Assign data
- prediction.dominant_freq = frequencies[dominant_index]
- prediction.conf = conf[dominant_index]
- if args.periodicity_detection is not None:
- prediction.periodicity = conf[dominant_index]
- prediction.amp = amp[dominant_index]
- prediction.phi = phi[dominant_index]
- prediction.t_start = time_stamps[0]
- prediction.t_end = time_stamps[-1]
+ if n > 0 and len(dominant_index) > 0:
+ prediction.dominant_freq = frequencies[dominant_index]
+ prediction.conf = conf[dominant_index]
+ if args.periodicity_detection is not None:
+ prediction.periodicity = conf[dominant_index]
+ prediction.amp = amp[dominant_index]
+ prediction.phi = phi[dominant_index]
+ else:
+ # Handle empty data case
+ prediction.dominant_freq = np.array([])
+ prediction.conf = np.array([])
+ prediction.amp = np.array([])
+ prediction.phi = np.array([])
+
+ # Safety check for empty time_stamps
+ if len(time_stamps) > 0:
+ prediction.t_start = time_stamps[0]
+ prediction.t_end = time_stamps[-1]
+ else:
+ prediction.t_start = 0.0
+ prediction.t_end = 0.0
prediction.freq = args.freq
prediction.ranks = ranks
prediction.total_bytes = total_bytes
prediction.n_samples = n
#! Save up to n_freq from the top candidates
- if args.n_freq > 0:
+ if args.n_freq > 0 and n > 0:
arr = amp[0 : int(np.ceil(n / 2))]
top_candidates = np.argsort(-arr) # from max to min
n_freq = int(min(len(arr), args.n_freq))
@@ -124,7 +152,11 @@ def ftio_dft(
periodicity_score = new_periodicity_scores(amp, b_sampled, prediction, args)
- t_sampled = time_stamps[0] + np.arange(0, n) * 1 / args.freq
+ # Safety check for empty time_stamps
+ if len(time_stamps) > 0 and args.freq > 0:
+ t_sampled = time_stamps[0] + np.arange(0, n) * 1 / args.freq
+ else:
+ t_sampled = np.arange(0, n) * (1 / args.freq if args.freq > 0 else 1.0)
#! Fourier fit if set
if args.fourier_fit:
fourier_fit(args, prediction, analysis_figures, b_sampled, t_sampled)
diff --git a/ftio/freq/discretize.py b/ftio/freq/discretize.py
index 7780945..39a7e3c 100644
--- a/ftio/freq/discretize.py
+++ b/ftio/freq/discretize.py
@@ -38,6 +38,9 @@ def sample_data(
Raises:
RuntimeError: If no data is found in the sampled bandwidth.
"""
+ if len(t) == 0:
+ return np.empty(0), 0, " "
+
if args is not None:
freq = args.freq
memory_limit = args.memory_limit * 1000**3 # args.memory_limit GB
@@ -53,9 +56,6 @@ def sample_data(
f"Frequency step: {1/ duration if duration > 0 else 0:.3e} Hz\n"
)
- if len(t) == 0:
- return np.empty(0), 0, " "
-
# Calculate recommended frequency:
if freq == -1:
# Auto-detect frequency based on smallest time delta
diff --git a/ftio/freq/time_window.py b/ftio/freq/time_window.py
index 0ec3e82..86a3a2f 100644
--- a/ftio/freq/time_window.py
+++ b/ftio/freq/time_window.py
@@ -33,12 +33,20 @@ def data_in_time_window(
indices = np.where(time_b >= args.ts)
time_b = time_b[indices]
bandwidth = bandwidth[indices]
- total_bytes = int(
- np.sum(bandwidth * (np.concatenate([time_b[1:], time_b[-1:]]) - time_b))
- )
- text += f"[green]Start time set to {args.ts:.2f}[/] s\n"
+
+ if len(time_b) > 0:
+ total_bytes = int(
+ np.sum(bandwidth * (np.concatenate([time_b[1:], time_b[-1:]]) - time_b))
+ )
+ text += f"[green]Start time set to {args.ts:.2f}[/] s\n"
+ else:
+ total_bytes = 0
+ text += f"[red]Warning: No data after start time {args.ts:.2f}[/] s\n"
else:
- text += f"Start time: [cyan]{time_b[0]:.2f}[/] s \n"
+ if len(time_b) > 0:
+ text += f"Start time: [cyan]{time_b[0]:.2f}[/] s \n"
+ else:
+ text += f"[red]Warning: No data available[/]\n"
# shorten data according to end time
if args.te:
@@ -50,7 +58,10 @@ def data_in_time_window(
)
text += f"[green]End time set to {args.te:.2f}[/] s\n"
else:
- text += f"End time: [cyan]{time_b[-1]:.2f}[/] s\n"
+ if len(time_b) > 0:
+ text += f"End time: [cyan]{time_b[-1]:.2f}[/] s\n"
+ else:
+ text += f"[red]Warning: No data in time window[/]\n"
# ignored bytes
ignored_bytes = ignored_bytes - total_bytes
diff --git a/ftio/parse/args.py b/ftio/parse/args.py
index b184da5..acb5c9e 100644
--- a/ftio/parse/args.py
+++ b/ftio/parse/args.py
@@ -257,6 +257,14 @@ def parse_args(argv: list, name="") -> argparse.Namespace:
help="specifies the number of hits needed to adapt the time window. A hit occurs once a dominant frequency is found",
)
parser.set_defaults(hits=3)
+ parser.add_argument(
+ "--algorithm",
+ dest="algorithm",
+ type=str,
+ choices=["adwin", "cusum", "ph"],
+ help="change point detection algorithm to use. 'adwin' (default) uses Adaptive Windowing with automatic window sizing and mathematical guarantees. 'cusum' uses Cumulative Sum detection for rapid change detection. 'ph' uses Page-Hinkley test for sequential change point detection.",
+ )
+ parser.set_defaults(algorithm="adwin")
parser.add_argument(
"-v",
"--verbose",
diff --git a/ftio/prediction/change_point_detection.py b/ftio/prediction/change_point_detection.py
new file mode 100644
index 0000000..a096c81
--- /dev/null
+++ b/ftio/prediction/change_point_detection.py
@@ -0,0 +1,1064 @@
+"""Change point detection algorithms for FTIO online predictor."""
+
+from __future__ import annotations
+
+import numpy as np
+import math
+from typing import List, Tuple, Optional, Dict, Any
+from multiprocessing import Lock
+from rich.console import Console
+from ftio.prediction.helper import get_dominant
+from ftio.freq.prediction import Prediction
+
+
+class ChangePointDetector:
+ """ADWIN detector for I/O pattern changes with automatic window sizing."""
+
+ def __init__(self, delta: float = 0.05, shared_resources=None, show_init: bool = True, verbose: bool = False):
+ """Initialize ADWIN detector with confidence parameter delta (default: 0.05)."""
+ self.delta = min(max(delta, 1e-12), 1 - 1e-12)
+ self.shared_resources = shared_resources
+ self.verbose = verbose
+
+ if shared_resources and not shared_resources.adwin_initialized.value:
+ if hasattr(shared_resources, 'adwin_lock'):
+ with shared_resources.adwin_lock:
+ if not shared_resources.adwin_initialized.value:
+ shared_resources.adwin_frequencies[:] = []
+ shared_resources.adwin_timestamps[:] = []
+ shared_resources.adwin_total_samples.value = 0
+ shared_resources.adwin_change_count.value = 0
+ shared_resources.adwin_last_change_time.value = 0.0
+ shared_resources.adwin_initialized.value = True
+ else:
+ if not shared_resources.adwin_initialized.value:
+ shared_resources.adwin_frequencies[:] = []
+ shared_resources.adwin_timestamps[:] = []
+ shared_resources.adwin_total_samples.value = 0
+ shared_resources.adwin_change_count.value = 0
+ shared_resources.adwin_last_change_time.value = 0.0
+ shared_resources.adwin_initialized.value = True
+
+ if shared_resources is None:
+ self.frequencies: List[float] = []
+ self.timestamps: List[float] = []
+ self.total_samples = 0
+ self.change_count = 0
+ self.last_change_time: Optional[float] = None
+
+ self.last_change_point: Optional[int] = None
+ self.min_window_size = 2
+ self.console = Console()
+
+ if show_init:
+ self.console.print(f"[green][ADWIN] Initialized with δ={delta:.3f} "
+ f"({(1-delta)*100:.0f}% confidence) "
+ f"[Process-safe: {shared_resources is not None}][/]")
+
+ def _get_frequencies(self):
+ if self.shared_resources:
+ return self.shared_resources.adwin_frequencies
+ return self.frequencies
+
+ def _get_timestamps(self):
+ if self.shared_resources:
+ return self.shared_resources.adwin_timestamps
+ return self.timestamps
+
+ def _get_total_samples(self):
+ if self.shared_resources:
+ return self.shared_resources.adwin_total_samples.value
+ return self.total_samples
+
+ def _set_total_samples(self, value):
+ if self.shared_resources:
+ self.shared_resources.adwin_total_samples.value = value
+ else:
+ self.total_samples = value
+
+ def _get_change_count(self):
+ if self.shared_resources:
+ return self.shared_resources.adwin_change_count.value
+ return self.change_count
+
+ def _set_change_count(self, value):
+ if self.shared_resources:
+ self.shared_resources.adwin_change_count.value = value
+ else:
+ self.change_count = value
+
+ def _get_last_change_time(self):
+ if self.shared_resources:
+ return self.shared_resources.adwin_last_change_time.value if self.shared_resources.adwin_last_change_time.value > 0 else None
+ return self.last_change_time
+
+ def _set_last_change_time(self, value):
+ if self.shared_resources:
+ self.shared_resources.adwin_last_change_time.value = value if value is not None else 0.0
+ else:
+ self.last_change_time = value
+
+ def _reset_window(self):
+ frequencies = self._get_frequencies()
+ timestamps = self._get_timestamps()
+
+ if self.shared_resources:
+ del frequencies[:]
+ del timestamps[:]
+ self._set_total_samples(0)
+ self._set_last_change_time(None)
+ else:
+ self.frequencies.clear()
+ self.timestamps.clear()
+ self._set_total_samples(0)
+ self._set_last_change_time(None)
+
+ self.console.print("[dim yellow][ADWIN] Window cleared: No frequency data to analyze[/]")
+
+ def add_prediction(self, prediction: Prediction, timestamp: float) -> Optional[Tuple[int, float]]:
+
+ freq = get_dominant(prediction)
+
+ if np.isnan(freq) or freq <= 0:
+ self.console.print("[yellow][ADWIN] No frequency found - resetting window history[/]")
+ self._reset_window()
+ return None
+
+ if self.shared_resources and hasattr(self.shared_resources, 'adwin_lock'):
+ with self.shared_resources.adwin_lock:
+ return self._add_prediction_synchronized(prediction, timestamp, freq)
+ else:
+ return self._add_prediction_local(prediction, timestamp, freq)
+
+ def _add_prediction_synchronized(self, prediction: Prediction, timestamp: float, freq: float) -> Optional[Tuple[int, float]]:
+ frequencies = self._get_frequencies()
+ timestamps = self._get_timestamps()
+
+ frequencies.append(freq)
+ timestamps.append(timestamp)
+ self._set_total_samples(self._get_total_samples() + 1)
+
+ if len(frequencies) < self.min_window_size:
+ return None
+
+ change_point = self._detect_change()
+
+ if change_point is not None:
+ exact_change_timestamp = timestamps[change_point]
+
+ self._process_change_point(change_point)
+ self._set_change_count(self._get_change_count() + 1)
+
+ return (change_point, exact_change_timestamp)
+
+ return None
+
+ def _add_prediction_local(self, prediction: Prediction, timestamp: float, freq: float) -> Optional[Tuple[int, float]]:
+ frequencies = self._get_frequencies()
+ timestamps = self._get_timestamps()
+
+ frequencies.append(freq)
+ timestamps.append(timestamp)
+ self._set_total_samples(self._get_total_samples() + 1)
+
+ if len(frequencies) < self.min_window_size:
+ return None
+
+ change_point = self._detect_change()
+
+ if change_point is not None:
+ exact_change_timestamp = timestamps[change_point]
+
+ self._process_change_point(change_point)
+ self._set_change_count(self._get_change_count() + 1)
+
+ return (change_point, exact_change_timestamp)
+
+ return None
+
+ def _detect_change(self) -> Optional[int]:
+
+ frequencies = self._get_frequencies()
+ timestamps = self._get_timestamps()
+ n = len(frequencies)
+
+ if n < 2 * self.min_window_size:
+ return None
+
+ for cut in range(self.min_window_size, n - self.min_window_size + 1):
+ if self._test_cut_point(cut):
+ self.console.print(f"[blue][ADWIN] Change detected at position {cut}/{n}, "
+ f"time={timestamps[cut]:.3f}s[/]")
+ return cut
+
+ return None
+
+ def _test_cut_point(self, cut: int) -> bool:
+
+ frequencies = self._get_frequencies()
+ n = len(frequencies)
+
+ left_data = frequencies[:cut]
+ n0 = len(left_data)
+ mean0 = np.mean(left_data)
+
+ right_data = frequencies[cut:]
+ n1 = len(right_data)
+ mean1 = np.mean(right_data)
+
+ if n0 <= 0 or n1 <= 0:
+ return False
+
+ n_harmonic = (n0 * n1) / (n0 + n1)
+
+ try:
+
+ confidence_term = math.log(2.0 / self.delta) / (2.0 * n_harmonic)
+ threshold = math.sqrt(2.0 * confidence_term)
+
+ except (ValueError, ZeroDivisionError):
+ threshold = 0.05
+
+ mean_diff = abs(mean1 - mean0)
+
+ if self.verbose:
+ self.console.print(f"[dim blue][ADWIN DEBUG] Cut={cut}:[/]")
+ self.console.print(f" [dim]• Left window: {n0} samples, mean={mean0:.3f}Hz[/]")
+ self.console.print(f" [dim]• Right window: {n1} samples, mean={mean1:.3f}Hz[/]")
+ self.console.print(f" [dim]• Mean difference: |{mean1:.3f} - {mean0:.3f}| = {mean_diff:.3f}[/]")
+ self.console.print(f" [dim]• Harmonic mean: {n_harmonic:.1f}[/]")
+ self.console.print(f" [dim]• Confidence term: log(2/{self.delta}) / (2×{n_harmonic:.1f}) = {confidence_term:.6f}[/]")
+ self.console.print(f" [dim]• Threshold: √(2×{confidence_term:.6f}) = {threshold:.3f}[/]")
+ self.console.print(f" [dim]• Test: {mean_diff:.3f} > {threshold:.3f} ? {'CHANGE!' if mean_diff > threshold else 'No change'}[/]")
+
+ return mean_diff > threshold
+
+ def _process_change_point(self, change_point: int):
+
+ frequencies = self._get_frequencies()
+ timestamps = self._get_timestamps()
+
+ self.last_change_point = change_point
+ change_time = timestamps[change_point]
+ self._set_last_change_time(change_time)
+
+ old_window_size = len(frequencies)
+ old_freq = np.mean(frequencies[:change_point]) if change_point > 0 else 0
+
+ if self.shared_resources:
+ del frequencies[:change_point]
+ del timestamps[:change_point]
+ new_frequencies = frequencies
+ new_timestamps = timestamps
+ else:
+ self.frequencies = frequencies[change_point:]
+ self.timestamps = timestamps[change_point:]
+ new_frequencies = self.frequencies
+ new_timestamps = self.timestamps
+
+ new_window_size = len(new_frequencies)
+ new_freq = np.mean(new_frequencies) if new_frequencies else 0
+
+ freq_change = abs(new_freq - old_freq) / old_freq * 100 if old_freq > 0 else 0
+ time_span = new_timestamps[-1] - new_timestamps[0] if len(new_timestamps) > 1 else 0
+
+ self.console.print(f"[green][ADWIN] Window adapted: "
+ f"{old_window_size} → {new_window_size} samples[/]")
+ self.console.print(f"[green][ADWIN] Frequency shift: "
+ f"{old_freq:.3f} → {new_freq:.3f} Hz ({freq_change:.1f}%)[/]")
+ self.console.print(f"[green][ADWIN] New window span: {time_span:.2f} seconds[/]")
+
+ def get_adaptive_start_time(self, current_prediction: Prediction) -> float:
+
+ timestamps = self._get_timestamps()
+
+ if len(timestamps) == 0:
+ return current_prediction.t_start
+
+ last_change_time = self._get_last_change_time()
+ if last_change_time is not None:
+ exact_change_start = last_change_time
+
+ min_window = 0.5
+ max_lookback = 10.0
+
+ window_span = current_prediction.t_end - exact_change_start
+
+ if window_span < min_window:
+ adaptive_start = max(0, current_prediction.t_end - min_window)
+ self.console.print(f"[yellow][ADWIN] Change point too recent, using min window: "
+ f"{adaptive_start:.6f}s[/]")
+ elif window_span > max_lookback:
+ adaptive_start = max(0, current_prediction.t_end - max_lookback)
+ self.console.print(f"[yellow][ADWIN] Change point too old, using max lookback: "
+ f"{adaptive_start:.6f}s[/]")
+ else:
+ adaptive_start = exact_change_start
+ self.console.print(f"[green][ADWIN] Using EXACT change point timestamp: "
+ f"{adaptive_start:.6f}s (window span: {window_span:.3f}s)[/]")
+
+ return adaptive_start
+
+ window_start = timestamps[0]
+
+ min_start = current_prediction.t_end - 10.0
+ max_start = current_prediction.t_end - 0.5
+
+ adaptive_start = max(min_start, min(window_start, max_start))
+
+ return adaptive_start
+
+ def get_window_stats(self) -> Dict[str, Any]:
+ """Get current ADWIN window statistics for debugging and logging."""
+ frequencies = self._get_frequencies()
+ timestamps = self._get_timestamps()
+
+ if not frequencies:
+ return {
+ "size": 0, "mean": 0.0, "std": 0.0,
+ "range": [0.0, 0.0], "time_span": 0.0,
+ "total_samples": self._get_total_samples(),
+ "change_count": self._get_change_count()
+ }
+
+ return {
+ "size": len(frequencies),
+ "mean": np.mean(frequencies),
+ "std": np.std(frequencies),
+ "range": [float(np.min(frequencies)), float(np.max(frequencies))],
+ "time_span": float(timestamps[-1] - timestamps[0]) if len(timestamps) > 1 else 0.0,
+ "total_samples": self._get_total_samples(),
+ "change_count": self._get_change_count()
+ }
+
+ def should_adapt_window(self) -> bool:
+ """Check if window adaptation should be triggered."""
+ return self.last_change_point is not None
+
+ def log_change_point(self, counter: int, old_freq: float, new_freq: float) -> str:
+
+ last_change_time = self._get_last_change_time()
+ if last_change_time is None:
+ return ""
+
+ freq_change_pct = abs(new_freq - old_freq) / old_freq * 100 if old_freq > 0 else 0
+ stats = self.get_window_stats()
+
+ log_msg = (
+ f"[red bold][CHANGE_POINT] t_s={last_change_time:.3f} sec[/]\n"
+ f"[purple][PREDICTOR] (#{counter}):[/][yellow] "
+ f"ADWIN detected pattern change: {old_freq:.3f} → {new_freq:.3f} Hz "
+ f"({freq_change_pct:.1f}% change)[/]\n"
+ f"[purple][PREDICTOR] (#{counter}):[/][yellow] "
+ f"Adaptive window: {stats['size']} samples, "
+ f"span={stats['time_span']:.1f}s, "
+ f"changes={stats['change_count']}/{stats['total_samples']}[/]\n"
+ f"[dim blue]ADWIN ANALYSIS: Statistical significance detected using Hoeffding bounds[/]\n"
+ f"[dim blue]Window split analysis found mean difference > confidence threshold[/]\n"
+ f"[dim blue]Confidence level: {(1-self.delta)*100:.0f}% (δ={self.delta:.3f})[/]"
+ )
+
+
+ self.last_change_point = None
+
+ return log_msg
+
+ def get_change_point_time(self, shared_resources=None) -> Optional[float]:
+
+ return self._get_last_change_time()
+
+def detect_pattern_change_adwin(shared_resources, current_prediction: Prediction,
+ detector: ChangePointDetector, counter: int) -> Tuple[bool, Optional[str], float]:
+
+ change_point = detector.add_prediction(current_prediction, current_prediction.t_end)
+
+ if change_point is not None:
+ change_idx, change_time = change_point
+
+ current_freq = get_dominant(current_prediction)
+
+ old_freq = current_freq
+ frequencies = detector._get_frequencies()
+ if len(frequencies) > 1:
+ window_stats = detector.get_window_stats()
+ old_freq = max(0.1, window_stats["mean"] * 0.9)
+
+ log_msg = detector.log_change_point(counter, old_freq, current_freq)
+
+ new_start_time = detector.get_adaptive_start_time(current_prediction)
+
+ try:
+ from ftio.prediction.online_analysis import get_socket_logger
+ logger = get_socket_logger()
+ logger.send_log("change_point", "ADWIN Change Point Detected", {
+ 'exact_time': change_time,
+ 'old_freq': old_freq,
+ 'new_freq': current_freq,
+ 'adaptive_start': new_start_time,
+ 'counter': counter
+ })
+ except ImportError:
+ pass
+
+ return True, log_msg, new_start_time
+
+ return False, None, current_prediction.t_start
+
+
+class CUSUMDetector:
+ """Adaptive-Variance CUSUM detector with variance-based threshold adaptation."""
+
+ def __init__(self, window_size: int = 50, shared_resources=None, show_init: bool = True, verbose: bool = False):
+ """Initialize AV-CUSUM detector with rolling window size (default: 50)."""
+ self.window_size = window_size
+ self.shared_resources = shared_resources
+ self.show_init = show_init
+ self.verbose = verbose
+
+ self.sum_pos = 0.0
+ self.sum_neg = 0.0
+ self.reference = None
+ self.initialized = False
+
+ self.adaptive_threshold = 0.0
+ self.adaptive_drift = 0.0
+ self.rolling_std = 0.0
+ self.frequency_buffer = []
+
+ self.console = Console()
+
+ def _update_adaptive_parameters(self, freq: float):
+ """Calculate thresholds automatically from data standard deviation."""
+ import numpy as np
+
+ if self.shared_resources and hasattr(self.shared_resources, 'cusum_frequencies'):
+ if hasattr(self.shared_resources, 'cusum_lock'):
+ with self.shared_resources.cusum_lock:
+ all_freqs = list(self.shared_resources.cusum_frequencies)
+ recent_freqs = all_freqs[-self.window_size-1:-1] if len(all_freqs) > 1 else []
+ else:
+ all_freqs = list(self.shared_resources.cusum_frequencies)
+ recent_freqs = all_freqs[-self.window_size-1:-1] if len(all_freqs) > 1 else []
+ else:
+ self.frequency_buffer.append(freq)
+ if len(self.frequency_buffer) > self.window_size:
+ self.frequency_buffer.pop(0)
+ recent_freqs = self.frequency_buffer[:-1] if len(self.frequency_buffer) > 1 else []
+
+ if self.verbose:
+ self.console.print(f"[dim magenta][CUSUM DEBUG] Buffer for σ calculation (excluding current): {[f'{f:.3f}' for f in recent_freqs]} (len={len(recent_freqs)})[/]")
+
+ if len(recent_freqs) >= 3:
+ freqs = np.array(recent_freqs)
+ self.rolling_std = np.std(freqs)
+
+
+ std_factor = max(self.rolling_std, 0.01)
+
+ self.adaptive_threshold = 2.0 * std_factor
+ self.adaptive_drift = 0.5 * std_factor
+
+ if self.verbose:
+ self.console.print(f"[dim cyan][CUSUM] σ={self.rolling_std:.3f}, "
+ f"h_t={self.adaptive_threshold:.3f} (2σ threshold), "
+ f"k_t={self.adaptive_drift:.3f} (0.5σ drift)[/]")
+
+ def _reset_cusum_state(self):
+ """Reset CUSUM state when no frequency is detected."""
+ self.sum_pos = 0.0
+ self.sum_neg = 0.0
+ self.reference = None
+ self.initialized = False
+
+ self.frequency_buffer.clear()
+ self.rolling_std = 0.0
+ self.adaptive_threshold = 0.0
+ self.adaptive_drift = 0.0
+
+ if self.shared_resources:
+ if hasattr(self.shared_resources, 'cusum_lock'):
+ with self.shared_resources.cusum_lock:
+ del self.shared_resources.cusum_frequencies[:]
+ del self.shared_resources.cusum_timestamps[:]
+ else:
+ del self.shared_resources.cusum_frequencies[:]
+ del self.shared_resources.cusum_timestamps[:]
+
+ self.console.print("[dim yellow][CUSUM] State cleared: Starting fresh when frequency resumes[/]")
+
+ def add_frequency(self, freq: float, timestamp: float = None) -> Tuple[bool, Dict[str, Any]]:
+
+ if np.isnan(freq) or freq <= 0:
+ self.console.print("[yellow][AV-CUSUM] No frequency found - resetting algorithm state[/]")
+ self._reset_cusum_state()
+ return False, {}
+
+ if self.shared_resources:
+ if hasattr(self.shared_resources, 'cusum_lock'):
+ with self.shared_resources.cusum_lock:
+ self.shared_resources.cusum_frequencies.append(freq)
+ self.shared_resources.cusum_timestamps.append(timestamp or 0.0)
+ else:
+ self.shared_resources.cusum_frequencies.append(freq)
+ self.shared_resources.cusum_timestamps.append(timestamp or 0.0)
+
+ self._update_adaptive_parameters(freq)
+
+ if not self.initialized:
+ min_init_samples = 3
+ if self.shared_resources and len(self.shared_resources.cusum_frequencies) >= min_init_samples:
+ first_freqs = list(self.shared_resources.cusum_frequencies)[:min_init_samples]
+ self.reference = np.mean(first_freqs)
+ self.initialized = True
+ if self.show_init:
+ self.console.print(f"[yellow][AV-CUSUM] Reference established: {self.reference:.3f} Hz "
+ f"(from first {min_init_samples} observations: {[f'{f:.3f}' for f in first_freqs]})[/]")
+ else:
+ current_count = len(self.shared_resources.cusum_frequencies) if self.shared_resources else 0
+ self.console.print(f"[dim yellow][AV-CUSUM] Collecting calibration data ({current_count}/{min_init_samples})[/]")
+ return False, {}
+
+ deviation = freq - self.reference
+
+
+ new_sum_pos = max(0, self.sum_pos + deviation - self.adaptive_drift)
+ new_sum_neg = max(0, self.sum_neg - deviation - self.adaptive_drift)
+
+ self.sum_pos = new_sum_pos
+ self.sum_neg = new_sum_neg
+
+ if self.verbose:
+ current_window_size = len(self.shared_resources.cusum_frequencies) if self.shared_resources else 0
+
+ self.console.print(f"[dim yellow][AV-CUSUM DEBUG] Observation #{current_window_size}:[/]")
+ self.console.print(f" [dim]• Current freq: {freq:.3f} Hz[/]")
+ self.console.print(f" [dim]• Reference: {self.reference:.3f} Hz[/]")
+ self.console.print(f" [dim]• Deviation: {freq:.3f} - {self.reference:.3f} = {deviation:.3f}[/]")
+ self.console.print(f" [dim]• Adaptive drift: {self.adaptive_drift:.3f} (k_t = 0.5×σ, σ={self.rolling_std:.3f})[/]")
+ self.console.print(f" [dim]• Sum_pos before: {self.sum_pos:.3f}[/]")
+ self.console.print(f" [dim]• Sum_neg before: {self.sum_neg:.3f}[/]")
+ self.console.print(f" [dim]• Sum_pos calculation: max(0, {self.sum_pos:.3f} + {deviation:.3f} - {self.adaptive_drift:.3f}) = {new_sum_pos:.3f}[/]")
+ self.console.print(f" [dim]• Sum_neg calculation: max(0, {self.sum_neg:.3f} - {deviation:.3f} - {self.adaptive_drift:.3f}) = {new_sum_neg:.3f}[/]")
+ self.console.print(f" [dim]• Adaptive threshold: {self.adaptive_threshold:.3f} (h_t = 2.0×σ, σ={self.rolling_std:.3f})[/]")
+ self.console.print(f" [dim]• Upward change test: {self.sum_pos:.3f} > {self.adaptive_threshold:.3f} = {'UPWARD CHANGE!' if self.sum_pos > self.adaptive_threshold else 'No change'}[/]")
+ self.console.print(f" [dim]• Downward change test: {self.sum_neg:.3f} > {self.adaptive_threshold:.3f} = {'DOWNWARD CHANGE!' if self.sum_neg > self.adaptive_threshold else 'No change'}[/]")
+
+ if self.shared_resources and hasattr(self.shared_resources, 'cusum_frequencies'):
+ sample_count = len(self.shared_resources.cusum_frequencies)
+ else:
+ sample_count = len(self.frequency_buffer)
+
+ if sample_count < 3 or self.adaptive_threshold <= 0:
+ return False, {}
+
+ upward_change = self.sum_pos > self.adaptive_threshold
+ downward_change = self.sum_neg > self.adaptive_threshold
+ change_detected = upward_change or downward_change
+
+ change_info = {
+ 'timestamp': timestamp,
+ 'frequency': freq,
+ 'reference': self.reference,
+ 'sum_pos': self.sum_pos,
+ 'sum_neg': self.sum_neg,
+ 'threshold': self.adaptive_threshold,
+ 'rolling_std': self.rolling_std,
+ 'deviation': deviation,
+ 'change_type': 'increase' if upward_change else 'decrease' if downward_change else 'none'
+ }
+
+ if change_detected:
+ change_type = change_info['change_type']
+ change_percent = abs(deviation / self.reference * 100) if self.reference != 0 else 0
+
+ self.console.print(f"[bold yellow][AV-CUSUM] CHANGE DETECTED! "
+ f"{self.reference:.3f}Hz → {freq:.3f}Hz "
+ f"({change_percent:.1f}% {change_type})[/]")
+ self.console.print(f"[yellow][AV-CUSUM] Sum_pos={self.sum_pos:.2f}, Sum_neg={self.sum_neg:.2f}, "
+ f"Adaptive_Threshold={self.adaptive_threshold:.2f}[/]")
+ self.console.print(f"[dim yellow]AV-CUSUM ANALYSIS: Cumulative sum exceeded adaptive threshold {self.adaptive_threshold:.2f}[/]")
+ self.console.print(f"[dim yellow]Detection method: {'Positive sum (upward trend)' if upward_change else 'Negative sum (downward trend)'}[/]")
+ self.console.print(f"[dim yellow]Adaptive drift: {self.adaptive_drift:.3f} (σ={self.rolling_std:.3f})[/]")
+
+ old_reference = self.reference
+ self.reference = freq
+ self.console.print(f"[cyan][CUSUM] Reference updated: {old_reference:.3f} → {self.reference:.3f} Hz "
+ f"({change_percent:.1f}% change)[/]")
+
+ self.sum_pos = 0.0
+ self.sum_neg = 0.0
+
+ if self.shared_resources:
+ if hasattr(self.shared_resources, 'cusum_lock'):
+ with self.shared_resources.cusum_lock:
+ old_window_size = len(self.shared_resources.cusum_frequencies)
+
+ current_freq_list = [freq]
+ current_timestamp_list = [timestamp or 0.0]
+
+ self.shared_resources.cusum_frequencies[:] = current_freq_list
+ self.shared_resources.cusum_timestamps[:] = current_timestamp_list
+
+ self.console.print(f"[green][CUSUM] CHANGE POINT ADAPTATION: Discarded {old_window_size-1} past samples, "
+ f"starting fresh from current detection[/]")
+ self.console.print(f"[green][CUSUM] WINDOW RESET: {old_window_size} → {len(self.shared_resources.cusum_frequencies)} samples[/]")
+
+ self.shared_resources.cusum_change_count.value += 1
+ else:
+ old_window_size = len(self.shared_resources.cusum_frequencies)
+ current_freq_list = [freq]
+ current_timestamp_list = [timestamp or 0.0]
+ self.shared_resources.cusum_frequencies[:] = current_freq_list
+ self.shared_resources.cusum_timestamps[:] = current_timestamp_list
+ self.console.print(f"[green][CUSUM] CHANGE POINT ADAPTATION: Discarded {old_window_size-1} past samples[/]")
+ self.shared_resources.cusum_change_count.value += 1
+
+ return change_detected, change_info
+
+
+def detect_pattern_change_cusum(
+ shared_resources,
+ current_prediction: Prediction,
+ detector: CUSUMDetector,
+ counter: int
+) -> Tuple[bool, Optional[str], float]:
+
+
+ current_freq = get_dominant(current_prediction)
+ current_time = current_prediction.t_end
+
+ if np.isnan(current_freq):
+ detector._reset_cusum_state()
+ return False, None, current_prediction.t_start
+
+ change_detected, change_info = detector.add_frequency(current_freq, current_time)
+
+ if not change_detected:
+ return False, None, current_prediction.t_start
+
+ change_type = change_info['change_type']
+ reference = change_info['reference']
+ threshold = change_info['threshold']
+ sum_pos = change_info['sum_pos']
+ sum_neg = change_info['sum_neg']
+
+ magnitude = abs(current_freq - reference)
+ percent_change = (magnitude / reference * 100) if reference > 0 else 0
+
+ log_msg = (
+ f"[bold red][CUSUM] CHANGE DETECTED! "
+ f"{reference:.1f}Hz → {current_freq:.1f}Hz "
+ f"(Δ={magnitude:.1f}Hz, {percent_change:.1f}% {change_type}) "
+ f"at sample {len(shared_resources.cusum_frequencies)}, time={current_time:.3f}s[/]\n"
+ f"[red][CUSUM] CUSUM stats: sum_pos={sum_pos:.2f}, sum_neg={sum_neg:.2f}, "
+ f"threshold={threshold}[/]\n"
+ f"[red][CUSUM] Cumulative sum exceeded threshold -> Starting fresh analysis[/]"
+ )
+
+ if percent_change > 100:
+ min_window_size = 0.5
+
+ elif percent_change > 50:
+ min_window_size = 1.0
+ else:
+ min_window_size = 2.0
+
+ new_start_time = max(0, current_time - min_window_size)
+
+ try:
+ from ftio.prediction.online_analysis import get_socket_logger
+ logger = get_socket_logger()
+ logger.send_log("change_point", "CUSUM Change Point Detected", {
+ 'algorithm': 'CUSUM',
+ 'detection_time': current_time,
+ 'change_type': change_type,
+ 'frequency': current_freq,
+ 'reference': reference,
+ 'magnitude': magnitude,
+ 'percent_change': percent_change,
+ 'threshold': threshold,
+ 'counter': counter
+ })
+ except ImportError:
+ pass
+
+ return True, log_msg, new_start_time
+
+
+class SelfTuningPageHinkleyDetector:
+ """Self-Tuning Page-Hinkley detector with adaptive running mean baseline."""
+
+ def __init__(self, window_size: int = 10, shared_resources=None, show_init: bool = True, verbose: bool = False):
+ """Initialize STPH detector with rolling window size (default: 10)."""
+ self.window_size = window_size
+ self.shared_resources = shared_resources
+ self.show_init = show_init
+ self.verbose = verbose
+ self.console = Console()
+
+ self.adaptive_threshold = 0.0
+ self.adaptive_delta = 0.0
+ self.rolling_std = 0.0
+ self.frequency_buffer = []
+
+ self.cumulative_sum_pos = 0.0
+ self.cumulative_sum_neg = 0.0
+ self.reference_mean = 0.0
+ self.sum_of_samples = 0.0
+ self.sample_count = 0
+
+ if shared_resources and hasattr(shared_resources, 'pagehinkley_state'):
+ try:
+ state = dict(shared_resources.pagehinkley_state)
+ if state.get('initialized', False):
+ self.cumulative_sum_pos = state.get('cumulative_sum_pos', 0.0)
+ self.cumulative_sum_neg = state.get('cumulative_sum_neg', 0.0)
+ self.reference_mean = state.get('reference_mean', 0.0)
+ self.sum_of_samples = state.get('sum_of_samples', 0.0)
+ self.sample_count = state.get('sample_count', 0)
+ if self.verbose:
+ self.console.print(f"[green][PH DEBUG] Restored state: cusum_pos={self.cumulative_sum_pos:.3f}, cusum_neg={self.cumulative_sum_neg:.3f}, ref_mean={self.reference_mean:.3f}[/]")
+ else:
+ self._initialize_fresh_state()
+ except Exception as e:
+ if self.verbose:
+ self.console.print(f"[red][PH DEBUG] State restore failed: {e}[/]")
+ self._initialize_fresh_state()
+ else:
+ self._initialize_fresh_state()
+
+ def _update_adaptive_parameters(self, freq: float):
+ """Calculate thresholds automatically from data standard deviation."""
+ import numpy as np
+
+
+ if self.shared_resources and hasattr(self.shared_resources, 'pagehinkley_frequencies'):
+ if hasattr(self.shared_resources, 'ph_lock'):
+ with self.shared_resources.ph_lock:
+ all_freqs = list(self.shared_resources.pagehinkley_frequencies)
+ recent_freqs = all_freqs[-self.window_size-1:-1] if len(all_freqs) > 1 else []
+ else:
+ all_freqs = list(self.shared_resources.pagehinkley_frequencies)
+ recent_freqs = all_freqs[-self.window_size-1:-1] if len(all_freqs) > 1 else []
+ else:
+ self.frequency_buffer.append(freq)
+ if len(self.frequency_buffer) > self.window_size:
+ self.frequency_buffer.pop(0)
+ recent_freqs = self.frequency_buffer[:-1] if len(self.frequency_buffer) > 1 else []
+
+ if len(recent_freqs) >= 3:
+ freqs = np.array(recent_freqs)
+ self.rolling_std = np.std(freqs)
+
+
+ std_factor = max(self.rolling_std, 0.01)
+
+ self.adaptive_threshold = 2.0 * std_factor
+ self.adaptive_delta = 0.5 * std_factor
+
+ if self.verbose:
+ self.console.print(f"[dim magenta][Page-Hinkley] σ={self.rolling_std:.3f}, "
+ f"λ_t={self.adaptive_threshold:.3f} (2σ threshold), "
+ f"δ_t={self.adaptive_delta:.3f} (0.5σ delta)[/]")
+
+ def _reset_pagehinkley_state(self):
+ """Reset Page-Hinkley state when no frequency is detected."""
+ self.cumulative_sum_pos = 0.0
+ self.cumulative_sum_neg = 0.0
+ self.reference_mean = 0.0
+ self.sum_of_samples = 0.0
+ self.sample_count = 0
+
+ self.frequency_buffer.clear()
+ self.rolling_std = 0.0
+ self.adaptive_threshold = 0.0
+ self.adaptive_delta = 0.0
+
+ if self.shared_resources:
+ if hasattr(self.shared_resources, 'pagehinkley_lock'):
+ with self.shared_resources.pagehinkley_lock:
+ if hasattr(self.shared_resources, 'pagehinkley_frequencies'):
+ del self.shared_resources.pagehinkley_frequencies[:]
+ if hasattr(self.shared_resources, 'pagehinkley_timestamps'):
+ del self.shared_resources.pagehinkley_timestamps[:]
+ if hasattr(self.shared_resources, 'pagehinkley_state'):
+ self.shared_resources.pagehinkley_state.clear()
+ else:
+ if hasattr(self.shared_resources, 'pagehinkley_frequencies'):
+ del self.shared_resources.pagehinkley_frequencies[:]
+ if hasattr(self.shared_resources, 'pagehinkley_timestamps'):
+ del self.shared_resources.pagehinkley_timestamps[:]
+ if hasattr(self.shared_resources, 'pagehinkley_state'):
+ self.shared_resources.pagehinkley_state.clear()
+
+ self.console.print("[dim yellow][STPH] State cleared: Starting fresh when frequency resumes[/]")
+
+ def _initialize_fresh_state(self):
+ """Initialize fresh Page-Hinkley state."""
+ self.cumulative_sum_pos = 0.0
+ self.cumulative_sum_neg = 0.0
+ self.reference_mean = 0.0
+ self.sum_of_samples = 0.0
+ self.sample_count = 0
+
+ def reset(self, current_freq: float = None):
+
+ self.cumulative_sum_pos = 0.0
+ self.cumulative_sum_neg = 0.0
+
+ if current_freq is not None:
+ self.reference_mean = current_freq
+ self.sum_of_samples = current_freq
+ self.sample_count = 1
+ else:
+ self.reference_mean = 0.0
+ self.sum_of_samples = 0.0
+ self.sample_count = 0
+
+ if self.shared_resources:
+ if hasattr(self.shared_resources, 'pagehinkley_lock'):
+ with self.shared_resources.pagehinkley_lock:
+ if hasattr(self.shared_resources, 'pagehinkley_state'):
+ self.shared_resources.pagehinkley_state.update({
+ 'cumulative_sum_pos': 0.0,
+ 'cumulative_sum_neg': 0.0,
+ 'reference_mean': self.reference_mean,
+ 'sum_of_samples': self.sum_of_samples,
+ 'sample_count': self.sample_count,
+ 'initialized': True
+ })
+
+
+ if hasattr(self.shared_resources, 'pagehinkley_frequencies'):
+ if current_freq is not None:
+ self.shared_resources.pagehinkley_frequencies[:] = [current_freq]
+ else:
+ del self.shared_resources.pagehinkley_frequencies[:]
+ if hasattr(self.shared_resources, 'pagehinkley_timestamps'):
+ if current_freq is not None:
+ last_timestamp = self.shared_resources.pagehinkley_timestamps[-1] if len(self.shared_resources.pagehinkley_timestamps) > 0 else 0.0
+ self.shared_resources.pagehinkley_timestamps[:] = [last_timestamp]
+ else:
+ del self.shared_resources.pagehinkley_timestamps[:]
+ else:
+ if hasattr(self.shared_resources, 'pagehinkley_state'):
+ self.shared_resources.pagehinkley_state.update({
+ 'cumulative_sum_pos': 0.0,
+ 'cumulative_sum_neg': 0.0,
+ 'reference_mean': self.reference_mean,
+ 'sum_of_samples': self.sum_of_samples,
+ 'sample_count': self.sample_count,
+ 'initialized': True
+ })
+ if hasattr(self.shared_resources, 'pagehinkley_frequencies'):
+ if current_freq is not None:
+ self.shared_resources.pagehinkley_frequencies[:] = [current_freq]
+ else:
+ del self.shared_resources.pagehinkley_frequencies[:]
+ if hasattr(self.shared_resources, 'pagehinkley_timestamps'):
+ if current_freq is not None:
+ last_timestamp = self.shared_resources.pagehinkley_timestamps[-1] if len(self.shared_resources.pagehinkley_timestamps) > 0 else 0.0
+ self.shared_resources.pagehinkley_timestamps[:] = [last_timestamp]
+ else:
+ del self.shared_resources.pagehinkley_timestamps[:]
+
+ if current_freq is not None:
+ self.console.print(f"[cyan][PH] Internal state reset with new reference: {current_freq:.3f} Hz[/]")
+ else:
+ self.console.print(f"[cyan][PH] Internal state reset: Page-Hinkley parameters reinitialized[/]")
+
+ def add_frequency(self, freq: float, timestamp: float = None) -> Tuple[bool, float, Dict[str, Any]]:
+
+ if np.isnan(freq) or freq <= 0:
+ self.console.print("[yellow][STPH] No frequency found - resetting Page-Hinkley state[/]")
+ self._reset_pagehinkley_state()
+ return False, 0.0, {}
+
+ self._update_adaptive_parameters(freq)
+
+ if self.shared_resources:
+ if hasattr(self.shared_resources, 'pagehinkley_lock'):
+ with self.shared_resources.pagehinkley_lock:
+ self.shared_resources.pagehinkley_frequencies.append(freq)
+ self.shared_resources.pagehinkley_timestamps.append(timestamp or 0.0)
+ else:
+ self.shared_resources.pagehinkley_frequencies.append(freq)
+ self.shared_resources.pagehinkley_timestamps.append(timestamp or 0.0)
+
+ if self.sample_count == 0:
+ self.sample_count = 1
+ self.reference_mean = freq
+ self.sum_of_samples = freq
+ if self.show_init:
+ self.console.print(f"[yellow][STPH] Reference mean initialized: {self.reference_mean:.3f} Hz[/]")
+ else:
+ self.sample_count += 1
+ self.sum_of_samples += freq
+ self.reference_mean = self.sum_of_samples / self.sample_count
+
+ pos_difference = freq - self.reference_mean - self.adaptive_delta
+ old_cumsum_pos = self.cumulative_sum_pos
+ self.cumulative_sum_pos = max(0, self.cumulative_sum_pos + pos_difference)
+
+ neg_difference = self.reference_mean - freq - self.adaptive_delta
+ old_cumsum_neg = self.cumulative_sum_neg
+ self.cumulative_sum_neg = max(0, self.cumulative_sum_neg + neg_difference)
+
+ if self.verbose:
+ self.console.print(f"[dim magenta][STPH DEBUG] Sample #{self.sample_count}:[/]")
+ self.console.print(f" [dim]• Current freq: {freq:.3f} Hz[/]")
+ self.console.print(f" [dim]• Reference mean: {self.reference_mean:.3f} Hz[/]")
+ self.console.print(f" [dim]• Adaptive delta: {self.adaptive_delta:.3f}[/]")
+ self.console.print(f" [dim]• Positive difference: {freq:.3f} - {self.reference_mean:.3f} - {self.adaptive_delta:.3f} = {pos_difference:.3f}[/]")
+ self.console.print(f" [dim]• Sum_pos = max(0, {old_cumsum_pos:.3f} + {pos_difference:.3f}) = {self.cumulative_sum_pos:.3f}[/]")
+ self.console.print(f" [dim]• Negative difference: {self.reference_mean:.3f} - {freq:.3f} - {self.adaptive_delta:.3f} = {neg_difference:.3f}[/]")
+ self.console.print(f" [dim]• Sum_neg = max(0, {old_cumsum_neg:.3f} + {neg_difference:.3f}) = {self.cumulative_sum_neg:.3f}[/]")
+ self.console.print(f" [dim]• Adaptive threshold: {self.adaptive_threshold:.3f}[/]")
+ self.console.print(f" [dim]• Upward change test: {self.cumulative_sum_pos:.3f} > {self.adaptive_threshold:.3f} = {'UPWARD CHANGE!' if self.cumulative_sum_pos > self.adaptive_threshold else 'No change'}[/]")
+ self.console.print(f" [dim]• Downward change test: {self.cumulative_sum_neg:.3f} > {self.adaptive_threshold:.3f} = {'DOWNWARD CHANGE!' if self.cumulative_sum_neg > self.adaptive_threshold else 'No change'}[/]")
+
+ if self.shared_resources and hasattr(self.shared_resources, 'pagehinkley_state'):
+ if hasattr(self.shared_resources, 'pagehinkley_lock'):
+ with self.shared_resources.pagehinkley_lock:
+ self.shared_resources.pagehinkley_state.update({
+ 'cumulative_sum_pos': self.cumulative_sum_pos,
+ 'cumulative_sum_neg': self.cumulative_sum_neg,
+ 'reference_mean': self.reference_mean,
+ 'sum_of_samples': self.sum_of_samples,
+ 'sample_count': self.sample_count,
+ 'initialized': True
+ })
+ else:
+ self.shared_resources.pagehinkley_state.update({
+ 'cumulative_sum_pos': self.cumulative_sum_pos,
+ 'cumulative_sum_neg': self.cumulative_sum_neg,
+ 'reference_mean': self.reference_mean,
+ 'sum_of_samples': self.sum_of_samples,
+ 'sample_count': self.sample_count,
+ 'initialized': True
+ })
+
+ if self.shared_resources and hasattr(self.shared_resources, 'pagehinkley_frequencies'):
+ sample_count = len(self.shared_resources.pagehinkley_frequencies)
+ else:
+ sample_count = len(self.frequency_buffer)
+
+ if sample_count < 3 or self.adaptive_threshold <= 0:
+ return False, 0.0, {}
+
+ upward_change = self.cumulative_sum_pos > self.adaptive_threshold
+ downward_change = self.cumulative_sum_neg > self.adaptive_threshold
+ change_detected = upward_change or downward_change
+
+ if upward_change:
+ change_type = "increase"
+ triggering_sum = self.cumulative_sum_pos
+ elif downward_change:
+ change_type = "decrease"
+ triggering_sum = self.cumulative_sum_neg
+ else:
+ change_type = "none"
+ triggering_sum = max(self.cumulative_sum_pos, self.cumulative_sum_neg)
+
+ if change_detected:
+ magnitude = abs(freq - self.reference_mean)
+ percent_change = (magnitude / self.reference_mean * 100) if self.reference_mean > 0 else 0
+
+ self.console.print(f"[bold magenta][STPH] CHANGE DETECTED! "
+ f"{self.reference_mean:.3f}Hz → {freq:.3f}Hz "
+ f"({percent_change:.1f}% {change_type})[/]")
+ self.console.print(f"[magenta][STPH] Sum_pos={self.cumulative_sum_pos:.2f}, Sum_neg={self.cumulative_sum_neg:.2f}, "
+ f"Adaptive_Threshold={self.adaptive_threshold:.3f} (σ={self.rolling_std:.3f})[/]")
+ self.console.print(f"[dim magenta]STPH ANALYSIS: Cumulative sum exceeded adaptive threshold {self.adaptive_threshold:.2f}[/]")
+ self.console.print(f"[dim magenta]Detection method: {'Positive sum (upward trend)' if upward_change else 'Negative sum (downward trend)'}[/]")
+ self.console.print(f"[dim magenta]Adaptive minimum detectable change: {self.adaptive_delta:.3f}[/]")
+
+ if self.shared_resources and hasattr(self.shared_resources, 'pagehinkley_change_count'):
+ if hasattr(self.shared_resources, 'pagehinkley_lock'):
+ with self.shared_resources.pagehinkley_lock:
+ self.shared_resources.pagehinkley_change_count.value += 1
+ else:
+ self.shared_resources.pagehinkley_change_count.value += 1
+
+ current_window_size = len(self.shared_resources.pagehinkley_frequencies) if self.shared_resources else self.sample_count
+
+ metadata = {
+ 'cumulative_sum_pos': self.cumulative_sum_pos,
+ 'cumulative_sum_neg': self.cumulative_sum_neg,
+ 'triggering_sum': triggering_sum,
+ 'change_type': change_type,
+ 'reference_mean': self.reference_mean,
+ 'frequency': freq,
+ 'window_size': current_window_size,
+ 'threshold': self.adaptive_threshold,
+ 'adaptive_delta': self.adaptive_delta,
+ 'rolling_std': self.rolling_std
+ }
+
+ return change_detected, triggering_sum, metadata
+
+
+def detect_pattern_change_pagehinkley(
+ shared_resources,
+ current_prediction: Prediction,
+ detector: SelfTuningPageHinkleyDetector,
+ counter: int
+) -> Tuple[bool, Optional[str], float]:
+
+ import numpy as np
+
+ current_freq = get_dominant(current_prediction)
+ current_time = current_prediction.t_end
+
+ if current_freq is None or np.isnan(current_freq):
+ detector._reset_pagehinkley_state()
+ return False, None, current_prediction.t_start
+
+ change_detected, triggering_sum, metadata = detector.add_frequency(current_freq, current_time)
+
+ if change_detected:
+ detector.reset(current_freq=current_freq)
+
+ change_type = metadata.get("change_type", "unknown")
+ frequency = metadata.get("frequency", current_freq)
+ reference_mean = metadata.get("reference_mean", 0.0)
+ window_size = metadata.get("window_size", 0)
+
+ magnitude = abs(frequency - reference_mean)
+ percent_change = (magnitude / reference_mean * 100) if reference_mean > 0 else 0
+
+ direction_arrow = "increasing" if change_type == "increase" else "decreasing" if change_type == "decrease" else "stable"
+ log_message = (
+ f"[bold red][Page-Hinkley] PAGE-HINKLEY CHANGE DETECTED! {direction_arrow} "
+ f"{reference_mean:.1f}Hz → {frequency:.1f}Hz "
+ f"(Δ={magnitude:.1f}Hz, {percent_change:.1f}% {change_type}) "
+ f"at sample {window_size}, time={current_time:.3f}s[/]\n"
+ f"[red][Page-Hinkley] Page-Hinkley stats: sum_pos={metadata.get('cumulative_sum_pos', 0):.2f}, "
+ f"sum_neg={metadata.get('cumulative_sum_neg', 0):.2f}, threshold={detector.adaptive_threshold:.3f}[/]\n"
+ f"[red][Page-Hinkley] Cumulative sum exceeded threshold -> Starting fresh analysis[/]"
+ )
+
+ adaptive_start_time = current_time
+ if hasattr(shared_resources, 'pagehinkley_last_change_time'):
+ shared_resources.pagehinkley_last_change_time.value = current_time
+
+ logger = shared_resources.logger if hasattr(shared_resources, 'logger') else None
+ if logger:
+ logger.send_log("change_point", "Page-Hinkley Change Point Detected", {
+ 'algorithm': 'PageHinkley',
+ 'frequency': frequency,
+ 'reference_mean': reference_mean,
+ 'magnitude': magnitude,
+ 'percent_change': percent_change,
+ 'triggering_sum': triggering_sum,
+ 'change_type': change_type,
+ 'position': window_size,
+ 'timestamp': current_time,
+ 'threshold': detector.adaptive_threshold,
+ 'delta': detector.adaptive_delta,
+ 'prediction_counter': counter
+ })
+
+ return True, log_message, adaptive_start_time
+
+ return False, None, current_prediction.t_start
diff --git a/ftio/prediction/online_analysis.py b/ftio/prediction/online_analysis.py
index 839ac85..c797fb9 100644
--- a/ftio/prediction/online_analysis.py
+++ b/ftio/prediction/online_analysis.py
@@ -1,10 +1,10 @@
-"""Performs the analysis for prediction. This includes the calculation of ftio and parsing of the data into a queue"""
-
from __future__ import annotations
from argparse import Namespace
-
import numpy as np
+import socket
+import json
+import time
from rich.console import Console
from ftio.cli import ftio_core
@@ -13,53 +13,186 @@
from ftio.plot.units import set_unit
from ftio.prediction.helper import get_dominant
from ftio.prediction.shared_resources import SharedResources
+from ftio.prediction.change_point_detection import ChangePointDetector, detect_pattern_change_adwin, CUSUMDetector, detect_pattern_change_cusum, SelfTuningPageHinkleyDetector, detect_pattern_change_pagehinkley
+
+
+class SocketLogger:
+
+ def __init__(self, host='localhost', port=9999):
+ self.host = host
+ self.port = port
+ self.socket = None
+ self.connected = False
+ self._connect()
+
+ def _connect(self):
+ """Attempt to connect to the GUI server"""
+ try:
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.socket.settimeout(1.0) # 1 second timeout
+ self.socket.connect((self.host, self.port))
+ self.connected = True
+ print(f"[INFO] Connected to GUI server at {self.host}:{self.port}")
+ except (socket.error, ConnectionRefusedError, socket.timeout) as e:
+ self.connected = False
+ if self.socket:
+ self.socket.close()
+ self.socket = None
+ print(f"[WARNING] Failed to connect to GUI server at {self.host}:{self.port}: {e}")
+ print(f"[WARNING] GUI logging disabled - messages will only appear in console")
+
+ def send_log(self, log_type: str, message: str, data: dict = None):
+ if not self.connected:
+ return
+
+ try:
+ log_data = {
+ 'timestamp': time.time(),
+ 'type': log_type,
+ 'message': message,
+ 'data': data or {}
+ }
+
+ json_data = json.dumps(log_data) + '\n'
+ self.socket.send(json_data.encode('utf-8'))
+
+ except (socket.error, BrokenPipeError, ConnectionResetError) as e:
+ print(f"[WARNING] Failed to send to GUI: {e}")
+ self.connected = False
+ if self.socket:
+ self.socket.close()
+ self.socket = None
+
+ def close(self):
+ if self.socket:
+ self.socket.close()
+ self.socket = None
+ self.connected = False
+
+
+_socket_logger = None
+
+def get_socket_logger():
+ global _socket_logger
+ if _socket_logger is None:
+ _socket_logger = SocketLogger()
+ return _socket_logger
+
+def strip_rich_formatting(text: str) -> str:
+ import re
+
+ clean_text = re.sub(r'\[/?(?:purple|blue|green|yellow|red|bold|dim|/)\]', '', text)
+
+ clean_text = re.sub(r'\[(?:purple|blue|green|yellow|red|bold|dim)\[', '[', clean_text)
+
+ return clean_text
+
+def log_to_gui_and_console(console: Console, message: str, log_type: str = "info", data: dict = None):
+ logger = get_socket_logger()
+ clean_message = strip_rich_formatting(message)
+
+ console.print(message)
+
+ logger.send_log(log_type, clean_message, data)
+
+
+def get_change_detector(shared_resources: SharedResources, algorithm: str = "adwin"):
+ console = Console()
+ algo = (algorithm or "adwin").lower()
+ global _local_detector_cache
+ if '_local_detector_cache' not in globals():
+ _local_detector_cache = {}
+ detector_key = f"{algo}_detector"
+ init_flag_attr = f"{algo}_initialized"
-def ftio_process(shared_resources: SharedResources, args: list[str], msgs=None) -> None:
- """Perform a single prediction
+ if detector_key in _local_detector_cache:
+ return _local_detector_cache[detector_key]
+
+ init_flag = getattr(shared_resources, init_flag_attr)
+ show_init_message = not init_flag.value
- Args:
- shared_resources (SharedResources): shared resources among processes
- args (list[str]): additional arguments passed to ftio
- """
+ if algo == "cusum":
+ detector = CUSUMDetector(window_size=50, shared_resources=shared_resources, show_init=show_init_message, verbose=True)
+ elif algo == "ph":
+ detector = SelfTuningPageHinkleyDetector(shared_resources=shared_resources, show_init=show_init_message, verbose=True)
+ else:
+ detector = ChangePointDetector(delta=0.05, shared_resources=shared_resources, show_init=show_init_message, verbose=True)
+
+ _local_detector_cache[detector_key] = detector
+ init_flag.value = True
+ return detector
+
+def ftio_process(shared_resources: SharedResources, args: list[str], msgs=None) -> None:
console = Console()
- console.print(f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Started")
+ pred_id = shared_resources.count.value
+ start_msg = f"[purple][PREDICTOR] (#{pred_id}):[/] Started"
+ log_to_gui_and_console(console, start_msg, "predictor_start", {"count": pred_id})
- # Modify the arguments
args.extend(["-e", "no"])
args.extend(["-ts", f"{shared_resources.start_time.value:.2f}"])
- # perform prediction
- prediction, parsed_args = ftio_core.main(args, msgs)
- if not prediction:
- console.print("[yellow]Terminating prediction (no data passed) [/]")
- console.print(
- f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Stopped"
- )
- exit(0)
+ prediction_list, parsed_args = ftio_core.main(args, msgs)
+ if not prediction_list:
+ log_to_gui_and_console(console,
+ "[yellow]Terminating prediction (no data passed)[/]",
+ "termination", {"reason": "no_data"})
+ return
- if not isinstance(prediction, list) or len(prediction) != 1:
- raise ValueError(
- "[red][PREDICTOR] (#{shared_resources.count.value}):[/] predictor should be called on exactly on file"
- )
-
- # get the prediction
- prediction = prediction[-1]
- # plot_bar_with_rich(shared_resources.t_app,shared_resources.b_app, width_percentage=0.9)
+ prediction = prediction_list[-1]
+ freq = get_dominant(prediction) or 0.0
- # get data
- freq = get_dominant(prediction) # just get a single dominant value
-
- # save prediction results
save_data(prediction, shared_resources)
- # display results
text = display_result(freq, prediction, shared_resources)
-
- # data analysis to decrease window thus change start_time
text += window_adaptation(parsed_args, prediction, freq, shared_resources)
+ is_change_point = "[CHANGE_POINT]" in text
+ change_point_info = None
+ if is_change_point:
+ import re
+ t_match = re.search(r"t_s=([0-9.]+)", text)
+ f_match = re.search(r"change:\s*([0-9.]+)\s*→\s*([0-9.]+)", text)
+ change_point_info = {
+ "prediction_id": pred_id,
+ "timestamp": float(prediction.t_end),
+ "old_frequency": float(f_match.group(1)) if f_match else 0.0,
+ "new_frequency": float(f_match.group(2)) if f_match else freq,
+ "start_time": float(t_match.group(1)) if t_match else float(prediction.t_start)
+ }
+ candidates = [
+ {"frequency": f, "confidence": c}
+ for f, c in zip(prediction.dominant_freq, prediction.conf)
+ ]
+ if candidates:
+ best = max(candidates, key=lambda c: c["confidence"])
+ dominant_freq = best["frequency"]
+ dominant_period = 1.0 / dominant_freq if dominant_freq > 0 else 0.0
+ confidence = best["confidence"]
+ else:
+ dominant_freq = dominant_period = confidence = 0.0
+
+ structured_prediction = {
+ "prediction_id": pred_id,
+ "timestamp": str(time.time()),
+ "dominant_freq": dominant_freq,
+ "dominant_period": dominant_period,
+ "confidence": confidence,
+ "candidates": candidates,
+ "time_window": (float(prediction.t_start), float(prediction.t_end)),
+ "total_bytes": str(prediction.total_bytes),
+ "bytes_transferred": str(prediction.total_bytes),
+ "current_hits": int(shared_resources.hits.value),
+ "periodic_probability": 0.0,
+ "frequency_range": (0.0, 0.0),
+ "period_range": (0.0, 0.0),
+ "is_change_point": is_change_point,
+ "change_point": change_point_info,
+ }
+
+ get_socket_logger().send_log("prediction", "FTIO structured prediction", structured_prediction)
+ log_to_gui_and_console(console, text, "prediction_log", {"count": pred_id, "freq": dominant_freq})
+
+ shared_resources.count.value += 1
- # print text
- console.print(text)
def window_adaptation(
@@ -68,33 +201,84 @@ def window_adaptation(
freq: float,
shared_resources: SharedResources,
) -> str:
- """modifies the start time if conditions are true
-
- Args:
- args (argparse): command line arguments
- prediction (Prediction): result from FTIO
- freq (float|Nan): dominant frequency
- shared_resources (SharedResources): shared resources among processes
- text (str): text to display
-
- Returns:
- str: _description_
- """
- # average data/data processing
text = ""
t_s = prediction.t_start
t_e = prediction.t_end
total_bytes = prediction.total_bytes
- # Hits
+ prediction_count = shared_resources.count.value
+ text += f"Prediction #{prediction_count}\n"
+
text += hits(args, prediction, shared_resources)
- # time window adaptation
- if not np.isnan(freq):
- n_phases = (t_e - t_s) * freq
- avr_bytes = int(total_bytes / float(n_phases))
- unit, order = set_unit(avr_bytes, "B")
- avr_bytes = order * avr_bytes
+ algorithm = args.algorithm
+
+ detector = get_change_detector(shared_resources, algorithm)
+ if algorithm == "cusum":
+ change_detected, change_log, adaptive_start_time = detect_pattern_change_cusum(
+ shared_resources, prediction, detector, shared_resources.count.value
+ )
+ elif algorithm == "ph":
+ change_detected, change_log, adaptive_start_time = detect_pattern_change_pagehinkley(
+ shared_resources, prediction, detector, shared_resources.count.value
+ )
+ else:
+ change_detected, change_log, adaptive_start_time = detect_pattern_change_adwin(
+ shared_resources, prediction, detector, shared_resources.count.value
+ )
+
+ if np.isnan(freq):
+ if algorithm == "cusum":
+ cusum_samples = len(shared_resources.cusum_frequencies)
+ cusum_changes = shared_resources.cusum_change_count.value
+ text += f"[dim][CUSUM STATE: {cusum_samples} samples, {cusum_changes} changes detected so far][/]\n"
+ if cusum_samples > 0:
+ last_freq = shared_resources.cusum_frequencies[-1] if shared_resources.cusum_frequencies else "None"
+ text += f"[dim][LAST KNOWN FREQ: {last_freq:.3f} Hz][/]\n"
+ elif algorithm == "ph":
+ ph_samples = len(shared_resources.pagehinkley_frequencies)
+ ph_changes = shared_resources.pagehinkley_change_count.value
+ text += f"[dim][PAGE-HINKLEY STATE: {ph_samples} samples, {ph_changes} changes detected so far][/]\n"
+ if ph_samples > 0:
+ last_freq = shared_resources.pagehinkley_frequencies[-1] if shared_resources.pagehinkley_frequencies else "None"
+ text += f"[dim][LAST KNOWN FREQ: {last_freq:.3f} Hz][/]\n"
+ else: # ADWIN
+ adwin_samples = len(shared_resources.adwin_frequencies)
+ adwin_changes = shared_resources.adwin_change_count.value
+ text += f"[dim][ADWIN STATE: {adwin_samples} samples, {adwin_changes} changes detected so far][/]\n"
+ if adwin_samples > 0:
+ last_freq = shared_resources.adwin_frequencies[-1] if shared_resources.adwin_frequencies else "None"
+ text += f"[dim][LAST KNOWN FREQ: {last_freq:.3f} Hz][/]\n"
+
+ if change_detected and change_log:
+ text += f"{change_log}\n"
+ min_window_size = 1.0
+ safe_adaptive_start = min(adaptive_start_time, t_e - min_window_size)
+
+ if safe_adaptive_start >= 0 and (t_e - safe_adaptive_start) >= min_window_size:
+ t_s = safe_adaptive_start
+ algorithm_name = args.algorithm.upper() if hasattr(args, 'algorithm') else "UNKNOWN"
+ text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][green] {algorithm_name} adapted window to start at {t_s:.3f}s (window size: {t_e - t_s:.3f}s)[/]\n"
+ else:
+ t_s = max(0, t_e - min_window_size)
+ algorithm_name = args.algorithm.upper() if hasattr(args, 'algorithm') else "UNKNOWN"
+ text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][yellow] {algorithm_name} adaptation would create unsafe window, using conservative {min_window_size}s window[/]\n"
+ if not np.isnan(freq) and freq > 0:
+ time_window = t_e - t_s
+ if time_window > 0:
+ n_phases = time_window * freq
+ if n_phases > 0:
+ avr_bytes = int(total_bytes / float(n_phases))
+ unit, order = set_unit(avr_bytes, "B")
+ avr_bytes = order * avr_bytes
+ else:
+ n_phases = 0
+ avr_bytes = 0
+ unit = "B"
+ else:
+ n_phases = 0
+ avr_bytes = 0
+ unit = "B"
# FIXME this needs to compensate for a smaller windows
if not args.window_adaptation:
@@ -103,48 +287,69 @@ def window_adaptation(
f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Average transferred {avr_bytes:.0f} {unit}\n"
)
- # adaptive time window
- if "frequency_hits" in args.window_adaptation:
+ if "frequency_hits" in args.window_adaptation and not change_detected:
if shared_resources.hits.value > args.hits:
if (
True
- ): # np.abs(avr_bytes - (total_bytes-aggregated_bytes.value)) < 100:
+ ):
tmp = t_e - 3 * 1 / freq
t_s = tmp if tmp > 0 else 0
text += f"[bold purple][PREDICTOR] (#{shared_resources.count.value}):[/][green]Adjusting start time to {t_s} sec\n[/]"
else:
- t_s = 0
- if shared_resources.hits.value == 0:
- text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][red bold] Resetting start time to {t_s} sec\n[/]"
- elif "data" in args.window_adaptation and len(shared_resources.data) > 0:
+ if not change_detected:
+ t_s = 0
+ if shared_resources.hits.value == 0:
+ text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][red bold] Resetting start time to {t_s} sec\n[/]"
+ elif "data" in args.window_adaptation and len(shared_resources.data) > 0 and not change_detected:
text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/][green]Trying time window adaptation: {shared_resources.count.value:.0f} =? { args.hits * shared_resources.hits.value:.0f}\n[/]"
if shared_resources.count.value == args.hits * shared_resources.hits.value:
- # t_s = shared_resources.data[-shared_resources.count.value]['t_start']
- # text += f'[bold purple][PREDICTOR] (#{shared_resources.count.value}):[/][green]Adjusting start time to t_start {t_s} sec\n[/]'
if len(shared_resources.t_flush) > 0:
print(shared_resources.t_flush)
index = int(args.hits * shared_resources.hits.value - 1)
t_s = shared_resources.t_flush[index]
text += f"[bold purple][PREDICTOR] (#{shared_resources.count.value}):[/][green]Adjusting start time to t_flush[{index}] {t_s} sec\n[/]"
- # TODO 1: Make sanity check -- see if the same number of bytes was transferred
- # TODO 2: Train a model to validate the predictions?
+ if not np.isnan(freq):
+ if algorithm == "cusum":
+ samples = len(shared_resources.cusum_frequencies)
+ changes = shared_resources.cusum_change_count.value
+ recent_freqs = list(shared_resources.cusum_frequencies)[-5:] if len(shared_resources.cusum_frequencies) >= 5 else list(shared_resources.cusum_frequencies)
+ elif algorithm == "ph":
+ samples = len(shared_resources.pagehinkley_frequencies)
+ changes = shared_resources.pagehinkley_change_count.value
+ recent_freqs = list(shared_resources.pagehinkley_frequencies)[-5:] if len(shared_resources.pagehinkley_frequencies) >= 5 else list(shared_resources.pagehinkley_frequencies)
+ else: # ADWIN
+ samples = len(shared_resources.adwin_frequencies)
+ changes = shared_resources.adwin_change_count.value
+ recent_freqs = list(shared_resources.adwin_frequencies)[-5:] if len(shared_resources.adwin_frequencies) >= 5 else list(shared_resources.adwin_frequencies)
+
+ success_rate = (samples / prediction_count) * 100 if prediction_count > 0 else 0
+
+ text += f"\n[bold cyan]{algorithm.upper()} ANALYSIS (Prediction #{prediction_count})[/]\n"
+ text += f"[cyan]Frequency detections: {samples}/{prediction_count} ({success_rate:.1f}% success)[/]\n"
+ text += f"[cyan]Pattern changes detected: {changes}[/]\n"
+ text += f"[cyan]Current frequency: {freq:.3f} Hz ({1/freq:.2f}s period)[/]\n"
+
+ if samples > 1:
+ text += f"[cyan]Recent freq history: {[f'{f:.3f}Hz' for f in recent_freqs]}[/]\n"
+
+ if len(recent_freqs) >= 2:
+ trend = "increasing" if recent_freqs[-1] > recent_freqs[-2] else "decreasing" if recent_freqs[-1] < recent_freqs[-2] else "stable"
+ text += f"[cyan]Frequency trend: {trend}[/]\n"
+
+ text += f"[cyan]{algorithm.upper()} window size: {samples} samples[/]\n"
+ text += f"[cyan]{algorithm.upper()} changes detected: {changes}[/]\n"
+
+ text += f"[bold cyan]{'='*50}[/]\n\n"
+
text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Ended"
shared_resources.start_time.value = t_s
return text
def save_data(prediction, shared_resources) -> None:
- """Put all data from `prediction` in a `queue`. The total bytes are as well saved here.
-
- Args:
- prediction (dict): result from FTIO
- shared_resources (SharedResources): shared resources among processes
- """
- # safe total transferred bytes
shared_resources.aggregated_bytes.value += prediction.total_bytes
- # save data
shared_resources.queue.put(
{
"phase": shared_resources.count.value,
@@ -157,7 +362,6 @@ def save_data(prediction, shared_resources) -> None:
"total_bytes": prediction.total_bytes,
"ranks": prediction.ranks,
"freq": prediction.freq,
- # 'hits': shared_resources.hits.value,
}
)
@@ -165,43 +369,29 @@ def save_data(prediction, shared_resources) -> None:
def display_result(
freq: float, prediction: Prediction, shared_resources: SharedResources
) -> str:
- """Displays the results from FTIO
-
- Args:
- freq (float): dominant frequency
- prediction (Prediction): prediction setting from FTIO
- shared_resources (SharedResources): shared resources among processes
-
- Returns:
- str: text to print to console
- """
text = ""
- # Dominant frequency
if not np.isnan(freq):
text = f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Dominant freq {freq:.3f} Hz ({1/freq if freq != 0 else 0:.2f} sec)\n"
+ else:
+ text = f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] No dominant frequency found\n"
- # Candidates
- text += (
- f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Freq candidates: \n"
- )
- for i, f_d in enumerate(prediction.dominant_freq):
- text += (
- f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] {i}) "
- f"{f_d:.2f} Hz -- conf {prediction.conf[i]:.2f}\n"
- )
+ if len(prediction.dominant_freq) > 0:
+ text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Freq candidates ({len(prediction.dominant_freq)} found): \n"
+ for i, f_d in enumerate(prediction.dominant_freq):
+ text += (
+ f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] {i}) "
+ f"{f_d:.2f} Hz -- conf {prediction.conf[i]:.2f}\n"
+ )
+ else:
+ text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] No frequency candidates detected\n"
- # time window
text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Time window {prediction.t_end-prediction.t_start:.3f} sec ([{prediction.t_start:.3f},{prediction.t_end:.3f}] sec)\n"
- # total bytes
total_bytes = shared_resources.aggregated_bytes.value
- # total_bytes = prediction.total_bytes
unit, order = set_unit(total_bytes, "B")
total_bytes = order * total_bytes
text += f"[purple][PREDICTOR] (#{shared_resources.count.value}):[/] Total bytes {total_bytes:.0f} {unit}\n"
- # Bytes since last time
- # tmp = abs(prediction.total_bytes -shared_resources.aggregated_bytes.value)
tmp = abs(shared_resources.aggregated_bytes.value)
unit, order = set_unit(tmp, "B")
tmp = order * tmp
diff --git a/ftio/prediction/probability_analysis.py b/ftio/prediction/probability_analysis.py
index d7498f0..092f6c9 100644
--- a/ftio/prediction/probability_analysis.py
+++ b/ftio/prediction/probability_analysis.py
@@ -1,28 +1,12 @@
import numpy as np
from rich.console import Console
-
import ftio.prediction.group as gp
from ftio.prediction.helper import get_dominant
from ftio.prediction.probability import Probability
+from ftio.prediction.change_point_detection import ChangePointDetector
-def find_probability(data: list[dict], method: str = "db", counter: int = -1) -> list:
- """Calculates the conditional probability that expresses
- how probable the frequency (event A) is given that the signal
- is periodic occurred (probability B).
- According to Bayes' Theorem, P(A|B) = P(B|A)*P(A)/P(B)
- P(B|A): Probability that the signal is periodic given that it has a frequency A --> 1
- P(A): Probability that the signal has the frequency A
- P(B): Probability that the signal has is periodic
-
- Args:
- data (dict): contacting predictions
- method (str): method to group the predictions (step or db)
- counter (int): number of predictions already executed
-
- Returns:
- out (dict): probability of predictions in ranges
- """
+def find_probability(data: list[dict], method: str = "db", counter:int = -1) -> list:
p_b = 0
p_a = []
p_a_given_b = 0
@@ -56,12 +40,9 @@ def find_probability(data: list[dict], method: str = "db", counter: int = -1) ->
f_min = np.inf
f_max = 0
for pred in grouped_prediction:
- # print(pred)
- # print(f"index is {group}, group is {pred['group']}")
if group == pred["group"]:
f_min = min(get_dominant(pred), f_min)
f_max = max(get_dominant(pred), f_max)
- # print(f"group: {group}, pred_group: {pred['group']}, freq: {get_dominant(pred):.3f}, f_min: {f_min:.3f}, f_max:{f_max:.3f}")
p_a += 1
p_a = p_a / len(data) if len(data) > 0 else 0
@@ -73,3 +54,41 @@ def find_probability(data: list[dict], method: str = "db", counter: int = -1) ->
out.append(prob)
return out
+
+
+def detect_pattern_change(shared_resources, prediction, detector, count):
+ try:
+ from ftio.prediction.helper import get_dominant
+
+ freq = get_dominant(prediction)
+
+ if hasattr(detector, 'verbose') and detector.verbose:
+ console = Console()
+ console.print(f"[cyan][DEBUG] Change point detection called for prediction #{count}, freq={freq:.3f} Hz[/]")
+ console.print(f"[cyan][DEBUG] Detector calibrated: {detector.is_calibrated}, samples: {len(detector.frequencies)}[/]")
+
+ current_time = prediction.t_end
+ result = detector.add_prediction(prediction, current_time)
+
+ if hasattr(detector, 'verbose') and detector.verbose:
+ console = Console()
+ console.print(f"[cyan][DEBUG] Detector result: {result}[/]")
+
+ if result is not None:
+ change_point_idx, change_point_time = result
+
+ if hasattr(detector, 'verbose') and detector.verbose:
+ console = Console()
+ console.print(f"[green][DEBUG] CHANGE POINT DETECTED! Index: {change_point_idx}, Time: {change_point_time:.3f}[/]")
+
+ change_log = f"[red bold][CHANGE_POINT] t_s={change_point_time:.3f} sec[/]"
+ change_log += f"\n[purple][PREDICTOR] (#{count}):[/][yellow] Adapting analysis window to start at t_s={change_point_time:.3f}[/]"
+
+ return True, change_log, change_point_time
+
+ return False, "", prediction.t_start
+
+ except Exception as e:
+ console = Console()
+ console.print(f"[red]Change point detection error: {e}[/]")
+ return False, "", prediction.t_start
\ No newline at end of file
diff --git a/ftio/prediction/shared_resources.py b/ftio/prediction/shared_resources.py
index 45b21f9..636c565 100644
--- a/ftio/prediction/shared_resources.py
+++ b/ftio/prediction/shared_resources.py
@@ -9,25 +9,80 @@ def __init__(self):
def _init_shared_resources(self):
"""Initialize the shared resources."""
- # Queue for FTIO data
+
self.queue = self.manager.Queue()
- # list of dicts with all predictions so far
+
+
self.data = self.manager.list()
- # Total bytes transferred so far
+
self.aggregated_bytes = self.manager.Value("d", 0.0)
- # Hits indicating how often a dominant frequency was found
+
self.hits = self.manager.Value("d", 0.0)
- # Start time window for ftio
+
self.start_time = self.manager.Value("d", 0.0)
- # Number of prediction
+
self.count = self.manager.Value("i", 0)
- # Bandwidth and time appended between predictions
+
self.b_app = self.manager.list()
self.t_app = self.manager.list()
- # For triggering cargo
+
self.sync_trigger = self.manager.Queue()
- # saves when the dada ti received from gkfs
+
self.t_flush = self.manager.list()
+
+
+ self.adwin_frequencies = self.manager.list()
+ self.adwin_timestamps = self.manager.list()
+ self.adwin_total_samples = self.manager.Value("i", 0)
+ self.adwin_change_count = self.manager.Value("i", 0)
+ self.adwin_last_change_time = self.manager.Value("d", 0.0)
+ self.adwin_initialized = self.manager.Value("b", False)
+
+
+ self.adwin_lock = self.manager.Lock()
+
+
+ self.cusum_frequencies = self.manager.list()
+ self.cusum_timestamps = self.manager.list()
+ self.cusum_change_count = self.manager.Value("i", 0)
+ self.cusum_last_change_time = self.manager.Value("d", 0.0)
+ self.cusum_initialized = self.manager.Value("b", False)
+
+
+ self.cusum_lock = self.manager.Lock()
+
+
+ self.pagehinkley_frequencies = self.manager.list()
+ self.pagehinkley_timestamps = self.manager.list()
+ self.pagehinkley_change_count = self.manager.Value("i", 0)
+ self.pagehinkley_last_change_time = self.manager.Value("d", 0.0)
+ self.pagehinkley_initialized = self.manager.Value("b", False)
+
+
+ self.pagehinkley_state = self.manager.dict({
+ 'cumulative_sum_pos': 0.0,
+ 'cumulative_sum_neg': 0.0,
+ 'reference_mean': 0.0,
+ 'sum_of_samples': 0.0,
+ 'sample_count': 0,
+ 'initialized': False
+ })
+
+
+ self.pagehinkley_lock = self.manager.Lock()
+
+
+ self.detector_frequencies = self.manager.list()
+ self.detector_timestamps = self.manager.list()
+ self.detector_is_calibrated = self.manager.Value("b", False)
+ self.detector_reference_freq = self.manager.Value("d", 0.0)
+ self.detector_sensitivity = self.manager.Value("d", 0.0)
+ self.detector_threshold_factor = self.manager.Value("d", 0.0)
+
+
+ self.adwin_initialized = self.manager.Value("b", False)
+ self.cusum_initialized = self.manager.Value("b", False)
+ self.ph_initialized = self.manager.Value("b", False)
def restart(self):
"""Restart the manager and reinitialize shared resources."""
diff --git a/ftio/prediction/tasks.py b/ftio/prediction/tasks.py
index 73d74cb..c260ec0 100644
--- a/ftio/prediction/tasks.py
+++ b/ftio/prediction/tasks.py
@@ -70,23 +70,7 @@ def ftio_metric_task_save(
show: bool = False,
) -> None:
prediction = ftio_metric_task(metric, arrays, argv, ranks, show)
- # freq = get_dominant(prediction) #just get a single dominant value
if prediction:
- # data.append(
- # {
- # "metric": f"{metric}",
- # "dominant_freq": prediction.dominant_freq,
- # "conf": prediction.conf,
- # "amp": prediction.amp,
- # "phi": prediction.phi,
- # "t_start": prediction.t_start,
- # "t_end": prediction.t_end,
- # "total_bytes": prediction.total_bytes,
- # "ranks": prediction.ranks,
- # "freq": prediction.freq,
- # "top_freq": prediction.top_freqs,
- # }
- # )
prediction.metric = metric
data.append(prediction)
else:
diff --git a/gui/dashboard.py b/gui/dashboard.py
new file mode 100644
index 0000000..50d280b
--- /dev/null
+++ b/gui/dashboard.py
@@ -0,0 +1,500 @@
+"""
+Main Dash application for FTIO prediction visualization
+"""
+import dash
+from dash import dcc, html, Input, Output, State, callback_context
+import plotly.graph_objects as go
+import threading
+import time
+from datetime import datetime
+import logging
+
+from gui.data_models import PredictionDataStore
+from gui.socket_listener import SocketListener
+from gui.visualizations import FrequencyTimelineViz, CosineWaveViz, DashboardViz
+
+
+class FTIODashApp:
+ """Main Dash application for FTIO prediction visualization"""
+
+ def __init__(self, host='localhost', port=8050, socket_port=9999):
+ self.app = dash.Dash(__name__)
+ self.host = host
+ self.port = port
+ self.socket_port = socket_port
+
+
+ self.data_store = PredictionDataStore()
+ self.selected_prediction_id = None
+ self.auto_update = True
+ self.last_update = time.time()
+
+ self.socket_listener = SocketListener(
+ port=socket_port,
+ data_callback=self._on_data_received
+ )
+
+
+ self._setup_layout()
+ self._setup_callbacks()
+
+
+ self.socket_thread = self.socket_listener.start_in_thread()
+
+ print(f"FTIO Dashboard starting on http://{host}:{port}")
+ print(f"Socket listener on port {socket_port}")
+
+ def _setup_layout(self):
+ """Setup the Dash app layout"""
+
+ self.app.layout = html.Div([
+
+ html.Div([
+ html.H1("FTIO Prediction Visualizer",
+ style={'textAlign': 'center', 'color': '#2c3e50', 'marginBottom': '20px'}),
+ html.Div([
+ html.P(f"Socket listening on port {self.socket_port}",
+ style={'textAlign': 'center', 'color': '#7f8c8d', 'margin': '0'}),
+ html.P(id='connection-status', children="Waiting for predictions...",
+ style={'textAlign': 'center', 'color': '#e74c3c', 'margin': '0'})
+ ])
+ ], style={'marginBottom': '30px'}),
+
+
+ html.Div([
+ html.Div([
+ html.Label("View Mode:"),
+ dcc.Dropdown(
+ id='view-mode',
+ options=[
+ {'label': 'Dashboard (Merged Cosine Wave)', 'value': 'dashboard'},
+ {'label': 'Individual Prediction (Single Wave)', 'value': 'cosine'}
+ ],
+ value='dashboard',
+ style={'width': '250px'}
+ )
+ ], style={'display': 'inline-block', 'marginRight': '20px'}),
+
+ html.Div([
+ html.Label("Select Prediction:"),
+ dcc.Dropdown(
+ id='prediction-selector',
+ options=[],
+ value=None,
+ placeholder="Select prediction for cosine view",
+ style={'width': '250px'}
+ )
+ ], style={'display': 'inline-block', 'marginRight': '20px'}),
+
+ html.Div([
+ html.Button("Clear Data", id='clear-button', n_clicks=0,
+ style={'backgroundColor': '#e74c3c', 'color': 'white',
+ 'border': 'none', 'padding': '8px 16px', 'cursor': 'pointer'}),
+ html.Button("Auto Update", id='auto-update-button', n_clicks=0,
+ style={'backgroundColor': '#27ae60', 'color': 'white',
+ 'border': 'none', 'padding': '8px 16px', 'cursor': 'pointer',
+ 'marginLeft': '10px'})
+ ], style={'display': 'inline-block'})
+
+ ], style={'textAlign': 'center', 'marginBottom': '20px', 'padding': '20px',
+ 'backgroundColor': '#ecf0f1', 'borderRadius': '5px'}),
+
+
+ html.Div(id='stats-bar', style={'marginBottom': '20px'}),
+
+
+ html.Div(id='main-viz', style={'height': '600px'}),
+
+
+ html.Div([
+ html.Hr(),
+ html.H3("All Predictions", style={'color': '#2c3e50', 'marginTop': '30px'}),
+ html.Div(
+ id='recent-predictions-table',
+ style={
+ 'maxHeight': '400px',
+ 'overflowY': 'auto',
+ 'border': '1px solid #ddd',
+ 'borderRadius': '8px',
+ 'padding': '10px',
+ 'backgroundColor': '#f9f9f9'
+ }
+ )
+ ], style={'marginTop': '20px'}),
+
+
+ dcc.Interval(
+ id='interval-component',
+ interval=2000, # Update every 2 seconds
+ n_intervals=0
+ ),
+
+
+ dcc.Store(id='data-store-trigger')
+ ])
+
+ def _setup_callbacks(self):
+ """Setup Dash callbacks"""
+
+ @self.app.callback(
+ [Output('main-viz', 'children'),
+ Output('prediction-selector', 'options'),
+ Output('prediction-selector', 'value'),
+ Output('connection-status', 'children'),
+ Output('connection-status', 'style'),
+ Output('stats-bar', 'children')],
+ [Input('interval-component', 'n_intervals'),
+ Input('view-mode', 'value'),
+ Input('prediction-selector', 'value'),
+ Input('clear-button', 'n_clicks')],
+ [State('auto-update-button', 'n_clicks')]
+ )
+ def update_visualization(n_intervals, view_mode, selected_pred_id, clear_clicks, auto_clicks):
+
+
+ ctx = callback_context
+ if ctx.triggered and ctx.triggered[0]['prop_id'] == 'clear-button.n_clicks':
+ if clear_clicks > 0:
+ self.data_store.clear_data()
+ self.selected_prediction_id = None
+
+
+ pred_options = []
+ pred_value = selected_pred_id
+
+ if self.data_store.predictions:
+ pred_options = [
+ {'label': f"Prediction #{p.prediction_id} ({p.dominant_freq:.2f} Hz)",
+ 'value': p.prediction_id}
+ for p in self.data_store.predictions[-50:] # Last 50 predictions
+ ]
+
+
+ if pred_value is None and self.data_store.predictions:
+ pred_value = self.data_store.predictions[-1].prediction_id
+
+
+ if self.data_store.predictions:
+ status_text = f"Connected - {len(self.data_store.predictions)} predictions received"
+ status_style = {'textAlign': 'center', 'color': '#27ae60', 'margin': '0'}
+ else:
+ status_text = "Waiting for predictions..."
+ status_style = {'textAlign': 'center', 'color': '#e74c3c', 'margin': '0'}
+
+
+ stats_bar = self._create_stats_bar()
+
+
+ if view_mode == 'cosine' and pred_value is not None:
+ fig = CosineWaveViz.create_cosine_plot(self.data_store, pred_value)
+ viz_component = dcc.Graph(figure=fig, style={'height': '600px'})
+
+ elif view_mode == 'dashboard':
+
+ fig = self._create_cosine_timeline_plot(self.data_store)
+ viz_component = dcc.Graph(figure=fig, style={'height': '600px'})
+
+ else:
+ viz_component = html.Div([
+ html.H3("Select a view mode and prediction to visualize",
+ style={'textAlign': 'center', 'color': '#7f8c8d', 'marginTop': '200px'})
+ ])
+
+ return viz_component, pred_options, pred_value, status_text, status_style, stats_bar
+
+ @self.app.callback(
+ Output('recent-predictions-table', 'children'),
+ [Input('interval-component', 'n_intervals')]
+ )
+ def update_recent_predictions_table(n_intervals):
+ """Update the recent predictions table"""
+
+ if not self.data_store.predictions:
+ return html.P("No predictions yet", style={'textAlign': 'center', 'color': '#7f8c8d'})
+
+
+ recent_preds = self.data_store.predictions
+
+
+ seen_ids = set()
+ unique_preds = []
+ for pred in reversed(recent_preds): # Newest first
+ if pred.prediction_id not in seen_ids:
+ seen_ids.add(pred.prediction_id)
+ unique_preds.append(pred)
+
+
+ rows = []
+ for i, pred in enumerate(unique_preds):
+
+ row_style = {
+ 'backgroundColor': '#ffffff' if i % 2 == 0 else '#f8f9fa',
+ 'padding': '8px',
+ 'borderBottom': '1px solid #dee2e6'
+ }
+
+
+ if pred.dominant_freq == 0 or pred.dominant_freq is None:
+
+ row = html.Tr([
+ html.Td(f"#{pred.prediction_id}", style={'fontWeight': 'bold', 'color': '#999'}),
+ html.Td("—", style={'color': '#999', 'textAlign': 'center', 'fontStyle': 'italic'}),
+ html.Td("No pattern detected", style={'color': '#999', 'fontStyle': 'italic'})
+ ], style=row_style)
+ else:
+
+ change_point_text = ""
+ if pred.is_change_point and pred.change_point:
+ cp = pred.change_point
+ change_point_text = f"🔴 {cp.old_frequency:.2f} → {cp.new_frequency:.2f} Hz"
+
+ row = html.Tr([
+ html.Td(f"#{pred.prediction_id}", style={'fontWeight': 'bold', 'color': '#495057'}),
+ html.Td(f"{pred.dominant_freq:.2f} Hz", style={'color': '#007bff'}),
+ html.Td(change_point_text, style={'color': 'red' if pred.is_change_point else 'black'})
+ ], style=row_style)
+
+ rows.append(row)
+
+
+ table = html.Table([
+ html.Thead([
+ html.Tr([
+ html.Th("ID", style={'backgroundColor': '#6c757d', 'color': 'white', 'padding': '12px'}),
+ html.Th("Frequency", style={'backgroundColor': '#6c757d', 'color': 'white', 'padding': '12px'}),
+ html.Th("Change Point", style={'backgroundColor': '#6c757d', 'color': 'white', 'padding': '12px'})
+ ])
+ ]),
+ html.Tbody(rows)
+ ], style={
+ 'width': '100%',
+ 'borderCollapse': 'collapse',
+ 'marginTop': '10px',
+ 'boxShadow': '0 2px 4px rgba(0,0,0,0.1)',
+ 'borderRadius': '8px',
+ 'overflow': 'hidden'
+ })
+
+ return table
+
+ def _create_stats_bar(self):
+ """Create statistics bar component"""
+
+ if not self.data_store.predictions:
+ return html.Div()
+
+
+ total_preds = len(self.data_store.predictions)
+ total_changes = len(self.data_store.change_points)
+ latest_pred = self.data_store.predictions[-1]
+
+ stats_items = [
+ html.Div([
+ html.H4(str(total_preds), style={'margin': '0', 'color': '#2c3e50'}),
+ html.P("Total Predictions", style={'margin': '0', 'fontSize': '12px', 'color': '#7f8c8d'})
+ ], style={'textAlign': 'center', 'flex': '1'}),
+
+ html.Div([
+ html.H4(str(total_changes), style={'margin': '0', 'color': '#e74c3c'}),
+ html.P("Change Points", style={'margin': '0', 'fontSize': '12px', 'color': '#7f8c8d'})
+ ], style={'textAlign': 'center', 'flex': '1'}),
+
+ html.Div([
+ html.H4(f"{latest_pred.dominant_freq:.2f} Hz", style={'margin': '0', 'color': '#27ae60'}),
+ html.P("Latest Frequency", style={'margin': '0', 'fontSize': '12px', 'color': '#7f8c8d'})
+ ], style={'textAlign': 'center', 'flex': '1'}),
+
+ html.Div([
+ html.H4(f"{latest_pred.confidence:.1f}%", style={'margin': '0', 'color': '#3498db'}),
+ html.P("Latest Confidence", style={'margin': '0', 'fontSize': '12px', 'color': '#7f8c8d'})
+ ], style={'textAlign': 'center', 'flex': '1'})
+ ]
+
+ return html.Div(stats_items, style={
+ 'display': 'flex',
+ 'justifyContent': 'space-around',
+ 'backgroundColor': '#f8f9fa',
+ 'padding': '15px',
+ 'borderRadius': '5px',
+ 'border': '1px solid #dee2e6'
+ })
+
+ def _on_data_received(self, data):
+ """Callback when new data is received from socket"""
+ print(f"[DEBUG] Dashboard received data: {data}")
+
+ if data['type'] == 'prediction':
+ prediction_data = data['data']
+ self.data_store.add_prediction(prediction_data)
+
+ print(f"[DEBUG] Added prediction #{prediction_data.prediction_id}: "
+ f"{prediction_data.dominant_freq:.2f} Hz "
+ f"({'CHANGE POINT' if prediction_data.is_change_point else 'normal'})")
+
+ self.last_update = time.time()
+ else:
+ print(f"[DEBUG] Received non-prediction data: type={data.get('type')}")
+
+ def _create_cosine_timeline_plot(self, data_store):
+ """Create single continuous cosine wave showing I/O pattern evolution"""
+ import plotly.graph_objs as go
+ import numpy as np
+
+ if not data_store.predictions:
+ fig = go.Figure()
+ fig.add_annotation(
+ x=0.5, y=0.5,
+ text="Waiting for predictions...",
+ showarrow=False,
+ font=dict(size=16, color="gray")
+ )
+ fig.update_layout(
+ xaxis=dict(visible=False),
+ yaxis=dict(visible=False),
+ title="I/O Pattern Timeline (Continuous Cosine Wave)"
+ )
+ return fig
+
+
+ last_3_predictions = data_store.get_latest_predictions(3)
+
+
+ sorted_predictions = sorted(last_3_predictions, key=lambda p: p.time_window[0])
+
+
+ global_time = []
+ global_cosine = []
+ cumulative_time = 0.0
+ segment_info = [] # For change point markers
+
+ for pred in sorted_predictions:
+ t_start, t_end = pred.time_window
+ duration = max(0.001, t_end - t_start) # Ensure positive duration
+ freq = pred.dominant_freq
+
+
+ if freq == 0 or freq is None:
+
+ num_points = 100
+ t_local = np.linspace(0, duration, num_points)
+ t_global = cumulative_time + t_local
+
+
+ global_time.extend(t_global.tolist())
+ global_cosine.extend([None] * num_points) # None creates a gap
+ else:
+
+ num_points = max(100, int(freq * duration * 50)) # 50 points per cycle
+
+
+ t_local = np.linspace(0, duration, num_points)
+
+
+ cosine_segment = np.cos(2 * np.pi * freq * t_local)
+
+
+ t_global = cumulative_time + t_local
+
+
+ global_time.extend(t_global.tolist())
+ global_cosine.extend(cosine_segment.tolist())
+
+
+ segment_start = cumulative_time
+ segment_end = cumulative_time + duration
+ segment_info.append((segment_start, segment_end, pred))
+
+
+ cumulative_time += duration
+
+ fig = go.Figure()
+
+
+ fig.add_trace(go.Scatter(
+ x=global_time,
+ y=global_cosine,
+ mode='lines',
+ name='I/O Pattern Evolution',
+ line=dict(color='#1f77b4', width=2),
+ connectgaps=False, # DON'T connect across None values - creates visible gaps
+ hovertemplate="I/O Pattern
" +
+ "Time: %{x:.3f} s
" +
+ "Amplitude: %{y:.3f}"
+ ))
+
+
+ for seg_start, seg_end, pred in segment_info:
+ if pred.dominant_freq == 0 or pred.dominant_freq is None:
+ fig.add_vrect(
+ x0=seg_start,
+ x1=seg_end,
+ fillcolor="gray",
+ opacity=0.15,
+ layer="below",
+ line_width=0,
+ annotation_text="No pattern",
+ annotation_position="top"
+ )
+
+
+ for seg_start, seg_end, pred in segment_info:
+ if pred.is_change_point and pred.change_point:
+ marker_time = seg_start # Mark at the START of the changed segment
+
+
+ fig.add_vline(
+ x=marker_time,
+ line_dash="solid",
+ line_color="red",
+ line_width=4,
+ opacity=0.8
+ )
+
+
+ fig.add_annotation(
+ x=marker_time,
+ y=1.1,
+ text=f"🔴 CHANGE
{pred.change_point.old_frequency:.2f}→{pred.change_point.new_frequency:.2f} Hz",
+ showarrow=True,
+ arrowhead=2,
+ arrowsize=1,
+ arrowwidth=2,
+ arrowcolor="red",
+ ax=0,
+ ay=-40,
+ font=dict(size=12, color="red", family="Arial Black"),
+ bgcolor="rgba(255,255,255,0.9)",
+ bordercolor="red",
+ borderwidth=2
+ )
+
+
+ fig.update_layout(
+ title="I/O Pattern Timeline (Continuous Evolution)",
+ xaxis_title="Time (s) - Concatenated Segments",
+ yaxis_title="I/O Pattern Amplitude",
+ showlegend=True,
+ height=600,
+ hovermode='x unified',
+ yaxis=dict(range=[-1.2, 1.2]),
+ uirevision='constant' # Prevents full page refresh - keeps zoom/pan state
+ )
+
+ return fig
+
+ def run(self, debug=False):
+ """Run the Dash application"""
+ try:
+ self.app.run(host=self.host, port=self.port, debug=debug)
+ except KeyboardInterrupt:
+ print("\nShutting down FTIO Dashboard...")
+ self.socket_listener.stop_server()
+ except Exception as e:
+ print(f"Error running dashboard: {e}")
+ self.socket_listener.stop_server()
+
+
+if __name__ == "__main__":
+
+ dashboard = FTIODashApp(host='localhost', port=8050, socket_port=9999)
+ dashboard.run(debug=False)
diff --git a/gui/data_models.py b/gui/data_models.py
new file mode 100644
index 0000000..d2e1a30
--- /dev/null
+++ b/gui/data_models.py
@@ -0,0 +1,128 @@
+"""
+Data models for storing and managing prediction data from FTIO
+"""
+from dataclasses import dataclass
+from typing import List, Optional, Dict, Any
+import numpy as np
+from datetime import datetime
+
+
+@dataclass
+class FrequencyCandidate:
+ """Individual frequency candidate with confidence"""
+ frequency: float
+ confidence: float
+
+
+@dataclass
+class ChangePoint:
+ """ADWIN detected change point information"""
+ prediction_id: int
+ timestamp: float
+ old_frequency: float
+ new_frequency: float
+ frequency_change_percent: float
+ sample_number: int
+ cut_position: int
+ total_samples: int
+
+
+@dataclass
+class PredictionData:
+ """Single prediction instance data"""
+ prediction_id: int
+ timestamp: str
+ dominant_freq: float
+ dominant_period: float
+ confidence: float
+ candidates: List[FrequencyCandidate]
+ time_window: tuple # (start, end) in seconds
+ total_bytes: str
+ bytes_transferred: str
+ current_hits: int
+ periodic_probability: float
+ frequency_range: tuple # (min_freq, max_freq)
+ period_range: tuple # (min_period, max_period)
+ is_change_point: bool = False
+ change_point: Optional[ChangePoint] = None
+ sample_number: Optional[int] = None
+
+
+class PredictionDataStore:
+ """Manages all prediction data and provides query methods"""
+
+ def __init__(self):
+ self.predictions: List[PredictionData] = []
+ self.change_points: List[ChangePoint] = []
+ self.current_prediction_id = -1
+
+ def add_prediction(self, prediction: PredictionData):
+ """Add a new prediction to the store"""
+ self.predictions.append(prediction)
+ if prediction.is_change_point and prediction.change_point:
+ self.change_points.append(prediction.change_point)
+
+ def get_prediction_by_id(self, pred_id: int) -> Optional[PredictionData]:
+ """Get prediction by ID"""
+ for pred in self.predictions:
+ if pred.prediction_id == pred_id:
+ return pred
+ return None
+
+ def get_frequency_timeline(self) -> tuple:
+ """Get data for frequency timeline plot"""
+ if not self.predictions:
+ return [], [], []
+
+ pred_ids = [p.prediction_id for p in self.predictions]
+ frequencies = [p.dominant_freq for p in self.predictions]
+ confidences = [p.confidence for p in self.predictions]
+
+ return pred_ids, frequencies, confidences
+
+ def get_candidate_frequencies(self) -> Dict[int, List[FrequencyCandidate]]:
+ """Get all candidate frequencies by prediction ID"""
+ candidates_dict = {}
+ for pred in self.predictions:
+ if pred.candidates:
+ candidates_dict[pred.prediction_id] = pred.candidates
+ return candidates_dict
+
+ def get_change_points_for_timeline(self) -> tuple:
+ """Get change point data for timeline visualization"""
+ if not self.change_points:
+ return [], [], []
+
+ pred_ids = [cp.prediction_id for cp in self.change_points]
+ frequencies = [cp.new_frequency for cp in self.change_points]
+ labels = [f"{cp.old_frequency:.2f} → {cp.new_frequency:.2f} Hz"
+ for cp in self.change_points]
+
+ return pred_ids, frequencies, labels
+
+ def generate_cosine_wave(self, prediction_id: int, num_points: int = 1000) -> tuple:
+ """Generate cosine wave data for a specific prediction - DOMINANT FREQUENCY ONLY"""
+ pred = self.get_prediction_by_id(prediction_id)
+ if not pred:
+ return [], [], []
+
+ start_time, end_time = pred.time_window
+ duration = end_time - start_time
+
+ t_relative = np.linspace(0, duration, num_points)
+
+ primary_wave = np.cos(2 * np.pi * pred.dominant_freq * t_relative)
+
+ candidate_waves = []
+
+ return t_relative, primary_wave, candidate_waves
+
+ def get_latest_predictions(self, n: int = 50) -> List[PredictionData]:
+ """Get the latest N predictions"""
+ return self.predictions[-n:] if len(self.predictions) >= n else self.predictions
+
+ def clear_data(self):
+ """Clear all stored data"""
+ self.predictions.clear()
+ self.change_points.clear()
+ self.current_prediction_id = -1
diff --git a/gui/requirements.txt b/gui/requirements.txt
new file mode 100644
index 0000000..620d95a
--- /dev/null
+++ b/gui/requirements.txt
@@ -0,0 +1,5 @@
+# GUI Dependencies for FTIO Dashboard
+dash>=2.14.0
+plotly>=5.15.0
+pandas>=1.5.0
+numpy>=1.24.0
diff --git a/gui/run_dashboard.py b/gui/run_dashboard.py
new file mode 100755
index 0000000..dc5b4f7
--- /dev/null
+++ b/gui/run_dashboard.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+"""
+Launcher script for FTIO GUI Dashboard
+"""
+import sys
+import os
+import argparse
+
+# Add the parent directory to Python path so we can import from ftio
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from gui.dashboard import FTIODashApp
+
+
+def main():
+ parser = argparse.ArgumentParser(description='FTIO Prediction GUI Dashboard')
+ parser.add_argument('--host', default='localhost', help='Dashboard host (default: localhost)')
+ parser.add_argument('--port', type=int, default=8050, help='Dashboard port (default: 8050)')
+ parser.add_argument('--socket-port', type=int, default=9999, help='Socket listener port (default: 9999)')
+ parser.add_argument('--debug', action='store_true', help='Run in debug mode')
+
+ args = parser.parse_args()
+
+ print("=" * 60)
+ print("FTIO Prediction GUI Dashboard")
+ print("=" * 60)
+ print(f"Dashboard URL: http://{args.host}:{args.port}")
+ print(f"Socket listener: {args.socket_port}")
+ print("")
+ print("Instructions:")
+ print("1. Start this dashboard")
+ print("2. Run your FTIO predictor with socket logging enabled")
+ print("3. Watch real-time predictions and change points in the browser")
+ print("")
+ print("Press Ctrl+C to stop")
+ print("=" * 60)
+
+ try:
+ dashboard = FTIODashApp(
+ host=args.host,
+ port=args.port,
+ socket_port=args.socket_port
+ )
+ dashboard.run(debug=args.debug)
+ except KeyboardInterrupt:
+ print("\nDashboard stopped by user")
+ except Exception as e:
+ print(f"Error: {e}")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/gui/socket_listener.py b/gui/socket_listener.py
new file mode 100644
index 0000000..ad0b0c2
--- /dev/null
+++ b/gui/socket_listener.py
@@ -0,0 +1,377 @@
+"""
+Socket listener for receiving FTIO prediction logs and parsing them into structured data
+"""
+import socket
+import json
+import threading
+import re
+import logging
+from typing import Optional, Callable
+from gui.data_models import PredictionData, ChangePoint, FrequencyCandidate, PredictionDataStore
+
+
+class LogParser:
+ """Parses FTIO prediction log messages into structured data"""
+
+ def __init__(self):
+ self.patterns = {
+ 'prediction_start': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Started'),
+ 'prediction_end': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Ended'),
+ 'dominant_freq': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Dominant freq\s+([\d.]+)\s+Hz\s+\(([\d.]+)\s+sec\)'),
+ 'freq_candidates': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+\d+\)\s+([\d.]+)\s+Hz\s+--\s+conf\s+([\d.]+)'),
+ 'time_window': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Time window\s+([\d.]+)\s+sec\s+\(\[([\d.]+),([\d.]+)\]\s+sec\)'),
+ 'total_bytes': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Total bytes\s+(.+)'),
+ 'bytes_transferred': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Bytes transferred since last time\s+(.+)'),
+ 'current_hits': re.compile(r'\[PREDICTOR\]\s+\(#(\d+)\):\s+Current hits\s+([\d.]+)'),
+ 'periodic_prob': re.compile(r'\[PREDICTOR\]\s+P\(periodic\)\s+=\s+([\d.]+)%'),
+ 'freq_range': re.compile(r'\[PREDICTOR\]\s+P\(\[([\d.]+),([\d.]+)\]\s+Hz\)\s+=\s+([\d.]+)%'),
+ 'period_range': re.compile(r'\[PREDICTOR\]\s+\|->\s+\[([\d.]+),([\d.]+)\]\s+Hz\s+=\s+\[([\d.]+),([\d.]+)\]\s+sec'),
+ 'change_point': re.compile(r'\[ADWIN\]\s+Change detected at cut\s+(\d+)/(\d+)!'),
+ 'exact_change_point': re.compile(r'EXACT CHANGE POINT detected at\s+([\d.]+)\s+seconds!'),
+ 'frequency_shift': re.compile(r'\[ADWIN\]\s+Frequency shift:\s+([\d.]+)\s+→\s+([\d.]+)\s+Hz\s+\(([\d.]+)%\)'),
+ 'sample_number': re.compile(r'\[ADWIN\]\s+Sample\s+#(\d+):\s+freq=([\d.]+)\s+Hz'),
+ 'ph_change': re.compile(r'\[Page-Hinkley\]\s+PAGE-HINKLEY CHANGE DETECTED!\s+\w+\s+([\d.]+)Hz\s+→\s+([\d.]+)Hz.*?at sample\s+(\d+),\s+time=([\d.]+)s'),
+ 'stph_change': re.compile(r'\[STPH\]\s+CHANGE DETECTED!\s+([\d.]+)Hz\s+→\s+([\d.]+)Hz\s+\(([\d.]+)%'),
+ 'cusum_change': re.compile(r'\[AV-CUSUM\]\s+CHANGE DETECTED!\s+([\d.]+)Hz\s+→\s+([\d.]+)Hz\s+\(([\d.]+)%'),
+ 'cusum_change_alt': re.compile(r'\[CUSUM\]\s+CHANGE DETECTED!\s+([\d.]+)Hz\s+→\s+([\d.]+)Hz.*?time=([\d.]+)s'),
+ }
+
+ self.current_prediction = None
+ self.current_change_point = None
+ self.candidates_buffer = []
+
+ def parse_log_message(self, message: str) -> Optional[dict]:
+
+ match = self.patterns['prediction_start'].search(message)
+ if match:
+ pred_id = int(match.group(1))
+ self.current_prediction = {
+ 'prediction_id': pred_id,
+ 'candidates': [],
+ 'is_change_point': False,
+ 'change_point': None,
+ 'timestamp': '',
+ 'sample_number': None
+ }
+ self.candidates_buffer = []
+ return None
+
+ if not self.current_prediction:
+ return None
+
+ pred_id = self.current_prediction['prediction_id']
+
+ match = self.patterns['dominant_freq'].search(message)
+ if match and int(match.group(1)) == pred_id:
+ self.current_prediction['dominant_freq'] = float(match.group(2))
+ self.current_prediction['dominant_period'] = float(match.group(3))
+
+ match = self.patterns['freq_candidates'].search(message)
+ if match and int(match.group(1)) == pred_id:
+ freq = float(match.group(2))
+ conf = float(match.group(3))
+ self.candidates_buffer.append(FrequencyCandidate(freq, conf))
+
+ match = self.patterns['time_window'].search(message)
+ if match and int(match.group(1)) == pred_id:
+ self.current_prediction['time_window'] = (float(match.group(3)), float(match.group(4)))
+
+ match = self.patterns['total_bytes'].search(message)
+ if match and int(match.group(1)) == pred_id:
+ self.current_prediction['total_bytes'] = match.group(2).strip()
+
+ match = self.patterns['bytes_transferred'].search(message)
+ if match and int(match.group(1)) == pred_id:
+ self.current_prediction['bytes_transferred'] = match.group(2).strip()
+
+ match = self.patterns['current_hits'].search(message)
+ if match and int(match.group(1)) == pred_id:
+ self.current_prediction['current_hits'] = int(float(match.group(2)))
+
+ match = self.patterns['periodic_prob'].search(message)
+ if match:
+ self.current_prediction['periodic_probability'] = float(match.group(1))
+
+ match = self.patterns['freq_range'].search(message)
+ if match:
+ self.current_prediction['frequency_range'] = (float(match.group(1)), float(match.group(2)))
+ self.current_prediction['confidence'] = float(match.group(3))
+
+ match = self.patterns['period_range'].search(message)
+ if match:
+ self.current_prediction['period_range'] = (float(match.group(3)), float(match.group(4)))
+
+ match = self.patterns['change_point'].search(message)
+ if match:
+ self.current_change_point = {
+ 'cut_position': int(match.group(1)),
+ 'total_samples': int(match.group(2)),
+ 'prediction_id': pred_id
+ }
+ self.current_prediction['is_change_point'] = True
+
+ match = self.patterns['exact_change_point'].search(message)
+ if match and self.current_change_point:
+ self.current_change_point['timestamp'] = float(match.group(1))
+
+ match = self.patterns['frequency_shift'].search(message)
+ if match and self.current_change_point:
+ self.current_change_point['old_frequency'] = float(match.group(1))
+ self.current_change_point['new_frequency'] = float(match.group(2))
+ self.current_change_point['frequency_change_percent'] = float(match.group(3))
+
+ match = self.patterns['sample_number'].search(message)
+ if match:
+ self.current_prediction['sample_number'] = int(match.group(1))
+
+ match = self.patterns['ph_change'].search(message)
+ if match:
+ self.current_change_point = {
+ 'old_frequency': float(match.group(1)),
+ 'new_frequency': float(match.group(2)),
+ 'cut_position': int(match.group(3)),
+ 'total_samples': int(match.group(3)),
+ 'timestamp': float(match.group(4)),
+ 'frequency_change_percent': abs((float(match.group(2)) - float(match.group(1))) / float(match.group(1)) * 100) if float(match.group(1)) > 0 else 0,
+ 'prediction_id': pred_id
+ }
+ self.current_prediction['is_change_point'] = True
+
+ match = self.patterns['stph_change'].search(message)
+ if match:
+ if not self.current_change_point:
+ self.current_change_point = {'prediction_id': pred_id}
+ self.current_change_point['old_frequency'] = float(match.group(1))
+ self.current_change_point['new_frequency'] = float(match.group(2))
+ self.current_change_point['frequency_change_percent'] = float(match.group(3))
+ self.current_prediction['is_change_point'] = True
+
+ match = self.patterns['cusum_change'].search(message)
+ if match:
+ if not self.current_change_point:
+ self.current_change_point = {'prediction_id': pred_id}
+ self.current_change_point['old_frequency'] = float(match.group(1))
+ self.current_change_point['new_frequency'] = float(match.group(2))
+ self.current_change_point['frequency_change_percent'] = float(match.group(3))
+ self.current_prediction['is_change_point'] = True
+
+ match = self.patterns['cusum_change_alt'].search(message)
+ if match:
+ if not self.current_change_point:
+ self.current_change_point = {'prediction_id': pred_id}
+ self.current_change_point['old_frequency'] = float(match.group(1))
+ self.current_change_point['new_frequency'] = float(match.group(2))
+ self.current_change_point['timestamp'] = float(match.group(3))
+ self.current_change_point['frequency_change_percent'] = abs((float(match.group(2)) - float(match.group(1))) / float(match.group(1)) * 100) if float(match.group(1)) > 0 else 0
+ self.current_prediction['is_change_point'] = True
+
+ # Check for prediction end
+ match = self.patterns['prediction_end'].search(message)
+ if match and int(match.group(1)) == pred_id:
+ self.current_prediction['candidates'] = self.candidates_buffer.copy()
+
+ if self.current_prediction['is_change_point'] and self.current_change_point:
+ change_point = ChangePoint(
+ prediction_id=pred_id,
+ timestamp=self.current_change_point.get('timestamp', 0.0),
+ old_frequency=self.current_change_point.get('old_frequency', 0.0),
+ new_frequency=self.current_change_point.get('new_frequency', 0.0),
+ frequency_change_percent=self.current_change_point.get('frequency_change_percent', 0.0),
+ sample_number=self.current_prediction.get('sample_number', 0),
+ cut_position=self.current_change_point.get('cut_position', 0),
+ total_samples=self.current_change_point.get('total_samples', 0)
+ )
+ self.current_prediction['change_point'] = change_point
+
+ prediction_data = PredictionData(
+ prediction_id=pred_id,
+ timestamp=self.current_prediction.get('timestamp', ''),
+ dominant_freq=self.current_prediction.get('dominant_freq', 0.0),
+ dominant_period=self.current_prediction.get('dominant_period', 0.0),
+ confidence=self.current_prediction.get('confidence', 0.0),
+ candidates=self.current_prediction['candidates'],
+ time_window=self.current_prediction.get('time_window', (0.0, 0.0)),
+ total_bytes=self.current_prediction.get('total_bytes', ''),
+ bytes_transferred=self.current_prediction.get('bytes_transferred', ''),
+ current_hits=self.current_prediction.get('current_hits', 0),
+ periodic_probability=self.current_prediction.get('periodic_probability', 0.0),
+ frequency_range=self.current_prediction.get('frequency_range', (0.0, 0.0)),
+ period_range=self.current_prediction.get('period_range', (0.0, 0.0)),
+ is_change_point=self.current_prediction['is_change_point'],
+ change_point=self.current_prediction['change_point'],
+ sample_number=self.current_prediction.get('sample_number')
+ )
+
+ self.current_prediction = None
+ self.current_change_point = None
+ self.candidates_buffer = []
+
+ return {'type': 'prediction', 'data': prediction_data}
+
+ return None
+
+
+class SocketListener:
+ """Listens for socket connections and processes FTIO prediction logs"""
+
+ def __init__(self, host='localhost', port=9999, data_callback: Optional[Callable] = None):
+ self.host = host
+ self.port = port
+ self.data_callback = data_callback
+ self.parser = LogParser()
+ self.running = False
+ self.server_socket = None
+ self.client_connections = []
+
+ def start_server(self):
+ try:
+ self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+
+ print(f"Attempting to bind to {self.host}:{self.port}")
+ self.server_socket.bind((self.host, self.port))
+ self.server_socket.listen(5)
+ self.running = True
+
+ print(f" Socket server successfully listening on {self.host}:{self.port}")
+
+ while self.running:
+ try:
+ client_socket, address = self.server_socket.accept()
+ print(f" Client connected from {address}")
+
+ client_thread = threading.Thread(
+ target=self._handle_client,
+ args=(client_socket, address)
+ )
+ client_thread.daemon = True
+ client_thread.start()
+
+ except socket.error as e:
+ if self.running:
+ print(f"Error accepting client connection: {e}")
+ break
+ except KeyboardInterrupt:
+ print(" Socket server interrupted")
+ break
+
+ except OSError as e:
+ if e.errno == 98: # Address already in use
+ print(f"Port {self.port} is already in use! Please use a different port or kill the process using it.")
+ else:
+ print(f"OS Error starting socket server: {e}")
+ self.running = False
+ except Exception as e:
+ print(f"Unexpected error starting socket server: {e}")
+ import traceback
+ traceback.print_exc()
+ self.running = False
+ finally:
+ self.stop_server()
+
+ def _handle_client(self, client_socket, address):
+ try:
+ while self.running:
+ try:
+ data = client_socket.recv(4096).decode('utf-8')
+ if not data:
+ break
+
+ try:
+ message_data = json.loads(data)
+
+ if message_data.get('type') == 'prediction' and 'data' in message_data:
+ print(f"[DEBUG] Direct prediction data received: #{message_data['data']['prediction_id']}")
+
+ pred_data = message_data['data']
+
+ candidates = []
+ for cand in pred_data.get('candidates', []):
+ candidates.append(FrequencyCandidate(
+ frequency=cand['frequency'],
+ confidence=cand['confidence']
+ ))
+
+ change_point = None
+ if pred_data.get('is_change_point') and pred_data.get('change_point'):
+ cp_data = pred_data['change_point']
+ change_point = ChangePoint(
+ prediction_id=cp_data['prediction_id'],
+ timestamp=cp_data['timestamp'],
+ old_frequency=cp_data['old_frequency'],
+ new_frequency=cp_data['new_frequency'],
+ frequency_change_percent=cp_data['frequency_change_percent'],
+ sample_number=cp_data['sample_number'],
+ cut_position=cp_data['cut_position'],
+ total_samples=cp_data['total_samples']
+ )
+
+ prediction_data = PredictionData(
+ prediction_id=pred_data['prediction_id'],
+ timestamp=pred_data['timestamp'],
+ dominant_freq=pred_data['dominant_freq'],
+ dominant_period=pred_data['dominant_period'],
+ confidence=pred_data['confidence'],
+ candidates=candidates,
+ time_window=tuple(pred_data['time_window']),
+ total_bytes=pred_data['total_bytes'],
+ bytes_transferred=pred_data['bytes_transferred'],
+ current_hits=pred_data['current_hits'],
+ periodic_probability=pred_data['periodic_probability'],
+ frequency_range=tuple(pred_data['frequency_range']),
+ period_range=tuple(pred_data['period_range']),
+ is_change_point=pred_data['is_change_point'],
+ change_point=change_point,
+ sample_number=pred_data.get('sample_number')
+ )
+
+ if self.data_callback:
+ self.data_callback({'type': 'prediction', 'data': prediction_data})
+
+ else:
+ log_message = message_data.get('message', '')
+
+ parsed_data = self.parser.parse_log_message(log_message)
+
+ if parsed_data and self.data_callback:
+ self.data_callback(parsed_data)
+
+ except json.JSONDecodeError:
+ # Handle plain text messages
+ parsed_data = self.parser.parse_log_message(data.strip())
+ if parsed_data and self.data_callback:
+ self.data_callback(parsed_data)
+
+ except socket.error:
+ break
+
+ except Exception as e:
+ logging.error(f"Error handling client {address}: {e}")
+ finally:
+ try:
+ client_socket.close()
+ print(f"Client {address} disconnected")
+ except:
+ pass
+
+ def stop_server(self):
+ self.running = False
+ if self.server_socket:
+ try:
+ self.server_socket.close()
+ except:
+ pass
+
+ for client_socket in self.client_connections:
+ try:
+ client_socket.close()
+ except:
+ pass
+ self.client_connections.clear()
+ print("Socket server stopped")
+
+ def start_in_thread(self):
+ server_thread = threading.Thread(target=self.start_server)
+ server_thread.daemon = True
+ server_thread.start()
+ return server_thread
diff --git a/gui/visualizations.py b/gui/visualizations.py
new file mode 100644
index 0000000..d713899
--- /dev/null
+++ b/gui/visualizations.py
@@ -0,0 +1,314 @@
+"""
+Plotly/Dash visualization components for FTIO prediction data
+"""
+import plotly.graph_objects as go
+import plotly.express as px
+from plotly.subplots import make_subplots
+import numpy as np
+from typing import List, Tuple, Dict
+from gui.data_models import PredictionData, ChangePoint, PredictionDataStore
+
+
+class FrequencyTimelineViz:
+ """Creates frequency timeline visualization"""
+
+ @staticmethod
+ def create_timeline_plot(data_store: PredictionDataStore, title="FTIO Frequency Timeline"):
+ """Create main frequency timeline plot"""
+
+ pred_ids, frequencies, confidences = data_store.get_frequency_timeline()
+
+ if not pred_ids:
+ fig = go.Figure()
+ fig.add_annotation(
+ text="No prediction data available",
+ x=0.5, y=0.5,
+ xref="paper", yref="paper",
+ showarrow=False,
+ font=dict(size=16, color="gray")
+ )
+ fig.update_layout(
+ title=title,
+ xaxis_title="Prediction Index",
+ yaxis_title="Frequency (Hz)",
+ height=500
+ )
+ return fig
+
+ fig = go.Figure()
+
+ fig.add_trace(go.Scatter(
+ x=pred_ids,
+ y=frequencies,
+ mode='lines+markers',
+ name='Dominant Frequency',
+ line=dict(color='blue', width=2),
+ marker=dict(
+ size=8,
+ opacity=[conf/100.0 for conf in confidences],
+ color='blue',
+ line=dict(width=1, color='darkblue')
+ ),
+ hovertemplate="Prediction #%{x}
" +
+ "Frequency: %{y:.2f} Hz
" +
+ "Confidence: %{customdata:.1f}%",
+ customdata=confidences
+ ))
+
+ candidates_dict = data_store.get_candidate_frequencies()
+ for pred_id, candidates in candidates_dict.items():
+ for candidate in candidates:
+ if candidate.frequency != data_store.get_prediction_by_id(pred_id).dominant_freq:
+ fig.add_trace(go.Scatter(
+ x=[pred_id],
+ y=[candidate.frequency],
+ mode='markers',
+ name=f'Candidate (conf: {candidate.confidence:.2f})',
+ marker=dict(
+ size=6,
+ opacity=candidate.confidence,
+ color='orange',
+ symbol='diamond'
+ ),
+ showlegend=False,
+ hovertemplate=f"Candidate Frequency
" +
+ f"Frequency: {candidate.frequency:.2f} Hz
" +
+ f"Confidence: {candidate.confidence:.2f}"
+ ))
+
+ cp_pred_ids, cp_frequencies, cp_labels = data_store.get_change_points_for_timeline()
+
+ if cp_pred_ids:
+ fig.add_trace(go.Scatter(
+ x=cp_pred_ids,
+ y=cp_frequencies,
+ mode='markers',
+ name='Change Points',
+ marker=dict(
+ size=12,
+ color='red',
+ symbol='diamond',
+ line=dict(width=2, color='darkred')
+ ),
+ hovertemplate="Change Point
" +
+ "Prediction #%{x}
" +
+ "%{customdata}",
+ customdata=cp_labels
+ ))
+
+ for pred_id, freq, label in zip(cp_pred_ids, cp_frequencies, cp_labels):
+ fig.add_vline(
+ x=pred_id,
+ line_dash="dash",
+ line_color="red",
+ opacity=0.7,
+ annotation_text=label,
+ annotation_position="top"
+ )
+
+ fig.update_layout(
+ title=dict(
+ text=title,
+ font=dict(size=18, color='darkblue')
+ ),
+ xaxis=dict(
+ title="Prediction Index",
+ showgrid=True,
+ gridcolor='lightgray',
+ tickmode='linear'
+ ),
+ yaxis=dict(
+ title="Frequency (Hz)",
+ showgrid=True,
+ gridcolor='lightgray'
+ ),
+ hovermode='closest',
+ height=500,
+ margin=dict(l=60, r=60, t=80, b=60),
+ plot_bgcolor='white',
+ showlegend=True,
+ legend=dict(
+ x=0.02,
+ y=0.98,
+ bgcolor='rgba(255, 255, 255, 0.8)',
+ bordercolor='gray',
+ borderwidth=1
+ )
+ )
+
+ return fig
+
+
+class CosineWaveViz:
+ """Creates cosine wave visualization for individual predictions"""
+
+ @staticmethod
+ def create_cosine_plot(data_store: PredictionDataStore, prediction_id: int,
+ title=None, num_points=1000):
+ """Create cosine wave plot for a specific prediction"""
+
+ prediction = data_store.get_prediction_by_id(prediction_id)
+ if not prediction:
+ fig = go.Figure()
+ fig.add_annotation(
+ text=f"Prediction #{prediction_id} not found",
+ x=0.5, y=0.5,
+ xref="paper", yref="paper",
+ showarrow=False,
+ font=dict(size=16, color="gray")
+ )
+ fig.update_layout(
+ title=f"Cosine Wave - Prediction #{prediction_id}",
+ xaxis_title="Time (s)",
+ yaxis_title="Amplitude",
+ height=400
+ )
+ return fig
+
+ t, primary_wave, candidate_waves = data_store.generate_cosine_wave(
+ prediction_id, num_points
+ )
+
+ if title is None:
+ title = (f"Cosine Wave - Prediction #{prediction_id} "
+ f"(f = {prediction.dominant_freq:.2f} Hz)")
+
+ fig = go.Figure()
+
+ fig.add_trace(go.Scatter(
+ x=t,
+ y=primary_wave,
+ mode='lines',
+ name=f'I/O Pattern: {prediction.dominant_freq:.2f} Hz',
+ line=dict(color='#1f77b4', width=3),
+ hovertemplate="I/O Pattern
" +
+ "Time: %{x:.3f} s
" +
+ "Amplitude: %{y:.3f}
" +
+ f"Frequency: {prediction.dominant_freq:.2f} Hz"
+ ))
+
+ if prediction.is_change_point and prediction.change_point:
+ cp_time = prediction.change_point.timestamp
+ start_time, end_time = prediction.time_window
+ if start_time <= cp_time <= end_time:
+ cp_relative = cp_time - start_time
+ fig.add_vline(
+ x=cp_relative,
+ line_dash="dash",
+ line_color="red",
+ line_width=3,
+ opacity=0.8,
+ annotation_text=(f"Change Point
"
+ f"{prediction.change_point.old_frequency:.2f} → "
+ f"{prediction.change_point.new_frequency:.2f} Hz"),
+ annotation_position="top"
+ )
+
+ start_time, end_time = prediction.time_window
+ duration = end_time - start_time
+ fig.update_layout(
+ title=dict(
+ text=title,
+ font=dict(size=16, color='darkblue')
+ ),
+ xaxis=dict(
+ title=f"Time (s) - Duration: {duration:.2f}s",
+ range=[0, duration],
+ showgrid=True,
+ gridcolor='lightgray'
+ ),
+ yaxis=dict(
+ title="Amplitude",
+ range=[-1.2, 1.2],
+ showgrid=True,
+ gridcolor='lightgray'
+ ),
+ height=400,
+ margin=dict(l=60, r=60, t=60, b=60),
+ plot_bgcolor='white',
+ showlegend=True,
+ legend=dict(
+ x=0.02,
+ y=0.98,
+ bgcolor='rgba(255, 255, 255, 0.8)',
+ bordercolor='gray',
+ borderwidth=1
+ )
+ )
+
+ return fig
+
+
+class DashboardViz:
+ """Creates comprehensive dashboard visualization"""
+
+ @staticmethod
+ def create_dashboard(data_store: PredictionDataStore, selected_prediction_id=None):
+ """Create comprehensive dashboard with multiple views"""
+
+ fig = make_subplots(
+ rows=2, cols=2,
+ subplot_titles=(
+ "Frequency Timeline",
+ "Latest Predictions",
+ "Cosine Wave View",
+ "Statistics"
+ ),
+ specs=[
+ [{"colspan": 2}, None],
+ [{}, {}]
+ ],
+ row_heights=[0.6, 0.4],
+ vertical_spacing=0.1
+ )
+
+ timeline_fig = FrequencyTimelineViz.create_timeline_plot(data_store)
+ for trace in timeline_fig.data:
+ fig.add_trace(trace, row=1, col=1)
+
+ if selected_prediction_id is not None:
+ cosine_fig = CosineWaveViz.create_cosine_plot(data_store, selected_prediction_id)
+ for trace in cosine_fig.data:
+ fig.add_trace(trace, row=2, col=1)
+
+ stats = DashboardViz._calculate_stats(data_store)
+ fig.add_trace(go.Bar(
+ x=list(stats.keys()),
+ y=list(stats.values()),
+ name="Statistics",
+ marker_color='lightblue'
+ ), row=2, col=2)
+
+ fig.update_layout(
+ height=800,
+ title_text="FTIO Prediction Dashboard",
+ showlegend=True
+ )
+
+ fig.update_xaxes(title_text="Prediction Index", row=1, col=1)
+ fig.update_yaxes(title_text="Frequency (Hz)", row=1, col=1)
+ fig.update_xaxes(title_text="Time (s)", row=2, col=1)
+ fig.update_yaxes(title_text="Amplitude", row=2, col=1)
+ fig.update_xaxes(title_text="Metric", row=2, col=2)
+ fig.update_yaxes(title_text="Value", row=2, col=2)
+
+ return fig
+
+ @staticmethod
+ def _calculate_stats(data_store: PredictionDataStore) -> Dict[str, float]:
+ """Calculate basic statistics from prediction data"""
+ if not data_store.predictions:
+ return {}
+
+ frequencies = [p.dominant_freq for p in data_store.predictions]
+ confidences = [p.confidence for p in data_store.predictions]
+
+ stats = {
+ 'Total Predictions': len(data_store.predictions),
+ 'Change Points': len(data_store.change_points),
+ 'Avg Frequency': np.mean(frequencies),
+ 'Avg Confidence': np.mean(confidences),
+ 'Freq Std Dev': np.std(frequencies)
+ }
+
+ return stats