From c7885c5bfad985a3462fe548e1dd50c3d098a530 Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Fri, 16 Jan 2026 10:20:10 -0500 Subject: [PATCH 1/8] retention execution logging bug fix --- application/single_app/config.py | 2 +- application/single_app/functions_retention_policy.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/application/single_app/config.py b/application/single_app/config.py index 650393e6..18b81bad 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -88,7 +88,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.235.012" +VERSION = "0.235.014" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') diff --git a/application/single_app/functions_retention_policy.py b/application/single_app/functions_retention_policy.py index 02ba2eea..5ba9c286 100644 --- a/application/single_app/functions_retention_policy.py +++ b/application/single_app/functions_retention_policy.py @@ -447,11 +447,14 @@ def delete_aged_conversations(retention_days, workspace_type='personal', user_id cutoff_iso = cutoff_date.isoformat() # Query for aged conversations + # Handle cases where last_activity_at is null, undefined, or older than cutoff query = f""" SELECT c.id, c.title, c.last_activity_at, c.{partition_field} FROM c WHERE c.{partition_field} = @partition_value - AND (c.last_activity_at < @cutoff_date OR IS_NULL(c.last_activity_at)) + AND (c.last_activity_at < @cutoff_date + OR NOT IS_DEFINED(c.last_activity_at) + OR IS_NULL(c.last_activity_at)) """ parameters = [ @@ -535,7 +538,7 @@ def delete_aged_conversations(retention_days, workspace_type='personal', user_id is_bulk_operation=True, group_id=conversation_item.get('group_id'), public_workspace_id=conversation_item.get('public_workspace_id'), - deletion_reason='retention_policy' + additional_context={'deletion_reason': 'retention_policy'} ) # Delete conversation @@ -597,11 +600,14 @@ def delete_aged_documents(retention_days, workspace_type='personal', user_id=Non cutoff_iso = cutoff_date.isoformat() # Query for aged documents + # Handle cases where last_activity_at is null, undefined, or older than cutoff query = f""" SELECT c.id, c.file_name, c.title, c.last_activity_at, c.{partition_field}, c.user_id FROM c WHERE c.{partition_field} = @partition_value - AND (c.last_activity_at < @cutoff_date OR IS_NULL(c.last_activity_at)) + AND (c.last_activity_at < @cutoff_date + OR NOT IS_DEFINED(c.last_activity_at) + OR IS_NULL(c.last_activity_at)) """ parameters = [ From a3e210a0c7c32e068c7c67d3614a7504bec29ab7 Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Fri, 16 Jan 2026 10:20:40 -0500 Subject: [PATCH 2/8] debug timer reset with admin save bug fix --- .../route_frontend_admin_settings.py | 106 +++++++++++++----- 1 file changed, 76 insertions(+), 30 deletions(-) diff --git a/application/single_app/route_frontend_admin_settings.py b/application/single_app/route_frontend_admin_settings.py index 838a565c..45e69fdc 100644 --- a/application/single_app/route_frontend_admin_settings.py +++ b/application/single_app/route_frontend_admin_settings.py @@ -413,24 +413,47 @@ def admin_settings(): if debug_timer_value < min_val or debug_timer_value > max_val: debug_timer_value = min(max(debug_timer_value, min_val), max_val) + # Get existing timer settings to check if they've changed + existing_debug_timer_enabled = settings.get('debug_logging_timer_enabled', False) + existing_debug_timer_value = settings.get('debug_timer_value', 1) + existing_debug_timer_unit = settings.get('debug_timer_unit', 'hours') + existing_debug_logging_enabled = settings.get('enable_debug_logging', False) + existing_debug_turnoff_time = settings.get('debug_logging_turnoff_time') + + # Determine if timer settings have changed + timer_settings_changed = ( + debug_logging_timer_enabled != existing_debug_timer_enabled or + debug_timer_value != existing_debug_timer_value or + debug_timer_unit != existing_debug_timer_unit + ) + debug_logging_newly_enabled = enable_debug_logging and not existing_debug_logging_enabled + # Calculate debug logging turnoff time if timer is enabled and debug logging is on if enable_debug_logging and debug_logging_timer_enabled: - now = datetime.now() - - if debug_timer_unit == 'minutes': - delta = timedelta(minutes=debug_timer_value) - elif debug_timer_unit == 'hours': - delta = timedelta(hours=debug_timer_value) - elif debug_timer_unit == 'days': - delta = timedelta(days=debug_timer_value) - elif debug_timer_unit == 'weeks': - delta = timedelta(weeks=debug_timer_value) + # Only recalculate turnoff time if: + # 1. Timer settings have changed (value, unit, or enabled state), OR + # 2. Debug logging was just enabled, OR + # 3. No existing turnoff time exists + if timer_settings_changed or debug_logging_newly_enabled or not existing_debug_turnoff_time: + now = datetime.now() + + if debug_timer_unit == 'minutes': + delta = timedelta(minutes=debug_timer_value) + elif debug_timer_unit == 'hours': + delta = timedelta(hours=debug_timer_value) + elif debug_timer_unit == 'days': + delta = timedelta(days=debug_timer_value) + elif debug_timer_unit == 'weeks': + delta = timedelta(weeks=debug_timer_value) + else: + delta = timedelta(hours=1) # default fallback + + debug_logging_turnoff_time = now + delta + # Convert to ISO string for JSON serialization + debug_logging_turnoff_time_str = debug_logging_turnoff_time.isoformat() else: - delta = timedelta(hours=1) # default fallback - - debug_logging_turnoff_time = now + delta - # Convert to ISO string for JSON serialization - debug_logging_turnoff_time_str = debug_logging_turnoff_time.isoformat() + # Preserve existing turnoff time + debug_logging_turnoff_time_str = existing_debug_turnoff_time else: debug_logging_turnoff_time_str = None @@ -446,25 +469,48 @@ def admin_settings(): if file_timer_value < min_val or file_timer_value > max_val: file_timer_value = min(max(file_timer_value, min_val), max_val) + # Get existing file timer settings to check if they've changed + existing_file_timer_enabled = settings.get('file_processing_logs_timer_enabled', False) + existing_file_timer_value = settings.get('file_timer_value', 1) + existing_file_timer_unit = settings.get('file_timer_unit', 'hours') + existing_file_processing_logs_enabled = settings.get('enable_file_processing_logs', False) + existing_file_turnoff_time = settings.get('file_processing_logs_turnoff_time') + + # Determine if timer settings have changed + file_timer_settings_changed = ( + file_processing_logs_timer_enabled != existing_file_timer_enabled or + file_timer_value != existing_file_timer_value or + file_timer_unit != existing_file_timer_unit + ) + file_processing_logs_newly_enabled = enable_file_processing_logs and not existing_file_processing_logs_enabled + # Calculate file processing logs turnoff time if timer is enabled and file processing logs are on enable_file_processing_logs = form_data.get('enable_file_processing_logs') == 'on' if enable_file_processing_logs and file_processing_logs_timer_enabled: - now = datetime.now() - - if file_timer_unit == 'minutes': - delta = timedelta(minutes=file_timer_value) - elif file_timer_unit == 'hours': - delta = timedelta(hours=file_timer_value) - elif file_timer_unit == 'days': - delta = timedelta(days=file_timer_value) - elif file_timer_unit == 'weeks': - delta = timedelta(weeks=file_timer_value) + # Only recalculate turnoff time if: + # 1. Timer settings have changed (value, unit, or enabled state), OR + # 2. File processing logs was just enabled, OR + # 3. No existing turnoff time exists + if file_timer_settings_changed or file_processing_logs_newly_enabled or not existing_file_turnoff_time: + now = datetime.now() + + if file_timer_unit == 'minutes': + delta = timedelta(minutes=file_timer_value) + elif file_timer_unit == 'hours': + delta = timedelta(hours=file_timer_value) + elif file_timer_unit == 'days': + delta = timedelta(days=file_timer_value) + elif file_timer_unit == 'weeks': + delta = timedelta(weeks=file_timer_value) + else: + delta = timedelta(hours=1) # default fallback + + file_processing_logs_turnoff_time = now + delta + # Convert to ISO string for JSON serialization + file_processing_logs_turnoff_time_str = file_processing_logs_turnoff_time.isoformat() else: - delta = timedelta(hours=1) # default fallback - - file_processing_logs_turnoff_time = now + delta - # Convert to ISO string for JSON serialization - file_processing_logs_turnoff_time_str = file_processing_logs_turnoff_time.isoformat() + # Preserve existing turnoff time + file_processing_logs_turnoff_time_str = existing_file_turnoff_time else: file_processing_logs_turnoff_time_str = None From fa841c3da56c7f5d51cde62534ccb008c1216290 Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Fri, 16 Jan 2026 10:20:49 -0500 Subject: [PATCH 3/8] Create test_debug_logging_timer_preservation.py --- .../test_debug_logging_timer_preservation.py | 333 ++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100644 functional_tests/test_debug_logging_timer_preservation.py diff --git a/functional_tests/test_debug_logging_timer_preservation.py b/functional_tests/test_debug_logging_timer_preservation.py new file mode 100644 index 00000000..b05a31b2 --- /dev/null +++ b/functional_tests/test_debug_logging_timer_preservation.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python3 +""" +Functional test for Debug Logging Timer Preservation Fix. +Version: 0.235.014 +Implemented in: 0.235.014 + +This test ensures that the debug logging and file processing logs turnoff times +are preserved when saving admin settings, rather than being recalculated on every save. + +The bug was that every time admin settings were saved, the turnoff time was recalculated +from "now + delta" instead of preserving the existing turnoff time if the timer settings +(value, unit, enabled state) hadn't changed. + +Root cause: route_frontend_admin_settings.py always recalculated turnoff times instead +of checking if timer settings changed. + +Fix: Only recalculate turnoff time if: +1. Timer settings have changed (value, unit, or enabled state), OR +2. The logging was just enabled, OR +3. No existing turnoff time exists +""" + +import sys +import os +from datetime import datetime, timedelta + +# Add parent directory to path for imports +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'application', 'single_app')) + + +def test_timer_recalculation_logic(): + """ + Test the logic that determines when turnoff time should be recalculated. + This simulates the conditions checked in route_frontend_admin_settings.py. + """ + print("๐Ÿงช Testing timer recalculation logic...") + + # Scenario 1: Settings unchanged - should preserve existing time + print("\n๐Ÿ“‹ Scenario 1: No timer settings changed - should preserve existing time") + existing_settings = { + 'enable_debug_logging': True, + 'debug_logging_timer_enabled': True, + 'debug_timer_value': 1, + 'debug_timer_unit': 'weeks', + 'debug_logging_turnoff_time': '2026-01-22T11:05:44.417753' + } + new_form_data = { + 'enable_debug_logging': True, + 'debug_logging_timer_enabled': True, + 'debug_timer_value': 1, + 'debug_timer_unit': 'weeks' + } + + timer_settings_changed = ( + new_form_data['debug_logging_timer_enabled'] != existing_settings.get('debug_logging_timer_enabled', False) or + new_form_data['debug_timer_value'] != existing_settings.get('debug_timer_value', 1) or + new_form_data['debug_timer_unit'] != existing_settings.get('debug_timer_unit', 'hours') + ) + debug_logging_newly_enabled = new_form_data['enable_debug_logging'] and not existing_settings.get('enable_debug_logging', False) + existing_turnoff_time = existing_settings.get('debug_logging_turnoff_time') + + should_recalculate = timer_settings_changed or debug_logging_newly_enabled or not existing_turnoff_time + + if should_recalculate: + print("โŒ FAILED: Should NOT recalculate - settings unchanged") + return False + else: + print("โœ… PASSED: Correctly determined to preserve existing turnoff time") + + # Scenario 2: Timer value changed - should recalculate + print("\n๐Ÿ“‹ Scenario 2: Timer value changed - should recalculate") + new_form_data_changed_value = { + 'enable_debug_logging': True, + 'debug_logging_timer_enabled': True, + 'debug_timer_value': 2, # Changed from 1 to 2 + 'debug_timer_unit': 'weeks' + } + + timer_settings_changed = ( + new_form_data_changed_value['debug_logging_timer_enabled'] != existing_settings.get('debug_logging_timer_enabled', False) or + new_form_data_changed_value['debug_timer_value'] != existing_settings.get('debug_timer_value', 1) or + new_form_data_changed_value['debug_timer_unit'] != existing_settings.get('debug_timer_unit', 'hours') + ) + debug_logging_newly_enabled = new_form_data_changed_value['enable_debug_logging'] and not existing_settings.get('enable_debug_logging', False) + + should_recalculate = timer_settings_changed or debug_logging_newly_enabled or not existing_turnoff_time + + if should_recalculate: + print("โœ… PASSED: Correctly determined to recalculate (timer value changed)") + else: + print("โŒ FAILED: Should recalculate when timer value changed") + return False + + # Scenario 3: Timer unit changed - should recalculate + print("\n๐Ÿ“‹ Scenario 3: Timer unit changed - should recalculate") + new_form_data_changed_unit = { + 'enable_debug_logging': True, + 'debug_logging_timer_enabled': True, + 'debug_timer_value': 1, + 'debug_timer_unit': 'days' # Changed from 'weeks' to 'days' + } + + timer_settings_changed = ( + new_form_data_changed_unit['debug_logging_timer_enabled'] != existing_settings.get('debug_logging_timer_enabled', False) or + new_form_data_changed_unit['debug_timer_value'] != existing_settings.get('debug_timer_value', 1) or + new_form_data_changed_unit['debug_timer_unit'] != existing_settings.get('debug_timer_unit', 'hours') + ) + debug_logging_newly_enabled = new_form_data_changed_unit['enable_debug_logging'] and not existing_settings.get('enable_debug_logging', False) + + should_recalculate = timer_settings_changed or debug_logging_newly_enabled or not existing_turnoff_time + + if should_recalculate: + print("โœ… PASSED: Correctly determined to recalculate (timer unit changed)") + else: + print("โŒ FAILED: Should recalculate when timer unit changed") + return False + + # Scenario 4: Debug logging newly enabled - should recalculate + print("\n๐Ÿ“‹ Scenario 4: Debug logging newly enabled - should recalculate") + existing_settings_logging_off = { + 'enable_debug_logging': False, # Was off + 'debug_logging_timer_enabled': True, + 'debug_timer_value': 1, + 'debug_timer_unit': 'weeks', + 'debug_logging_turnoff_time': None # No turnoff time when disabled + } + new_form_data_enable = { + 'enable_debug_logging': True, # Now on + 'debug_logging_timer_enabled': True, + 'debug_timer_value': 1, + 'debug_timer_unit': 'weeks' + } + + timer_settings_changed = ( + new_form_data_enable['debug_logging_timer_enabled'] != existing_settings_logging_off.get('debug_logging_timer_enabled', False) or + new_form_data_enable['debug_timer_value'] != existing_settings_logging_off.get('debug_timer_value', 1) or + new_form_data_enable['debug_timer_unit'] != existing_settings_logging_off.get('debug_timer_unit', 'hours') + ) + debug_logging_newly_enabled = new_form_data_enable['enable_debug_logging'] and not existing_settings_logging_off.get('enable_debug_logging', False) + existing_turnoff_time_off = existing_settings_logging_off.get('debug_logging_turnoff_time') + + should_recalculate = timer_settings_changed or debug_logging_newly_enabled or not existing_turnoff_time_off + + if should_recalculate: + print("โœ… PASSED: Correctly determined to recalculate (debug logging newly enabled)") + else: + print("โŒ FAILED: Should recalculate when debug logging is newly enabled") + return False + + # Scenario 5: Timer enabled changed - should recalculate + print("\n๐Ÿ“‹ Scenario 5: Timer enabled changed (was off, now on) - should recalculate") + existing_settings_timer_off = { + 'enable_debug_logging': True, + 'debug_logging_timer_enabled': False, # Was off + 'debug_timer_value': 1, + 'debug_timer_unit': 'weeks', + 'debug_logging_turnoff_time': None # No turnoff time when timer disabled + } + new_form_data_timer_on = { + 'enable_debug_logging': True, + 'debug_logging_timer_enabled': True, # Now on + 'debug_timer_value': 1, + 'debug_timer_unit': 'weeks' + } + + timer_settings_changed = ( + new_form_data_timer_on['debug_logging_timer_enabled'] != existing_settings_timer_off.get('debug_logging_timer_enabled', False) or + new_form_data_timer_on['debug_timer_value'] != existing_settings_timer_off.get('debug_timer_value', 1) or + new_form_data_timer_on['debug_timer_unit'] != existing_settings_timer_off.get('debug_timer_unit', 'hours') + ) + debug_logging_newly_enabled = new_form_data_timer_on['enable_debug_logging'] and not existing_settings_timer_off.get('enable_debug_logging', False) + existing_turnoff_time_timer_off = existing_settings_timer_off.get('debug_logging_turnoff_time') + + should_recalculate = timer_settings_changed or debug_logging_newly_enabled or not existing_turnoff_time_timer_off + + if should_recalculate: + print("โœ… PASSED: Correctly determined to recalculate (timer enabled state changed)") + else: + print("โŒ FAILED: Should recalculate when timer enabled state changed") + return False + + print("\nโœ… All timer recalculation logic tests passed!") + return True + + +def test_file_processing_logs_timer_preservation(): + """ + Test the same preservation logic for file processing logs timer. + """ + print("\n๐Ÿงช Testing file processing logs timer preservation logic...") + + # Scenario: Settings unchanged - should preserve existing time + print("\n๐Ÿ“‹ Scenario: No file timer settings changed - should preserve existing time") + existing_settings = { + 'enable_file_processing_logs': True, + 'file_processing_logs_timer_enabled': True, + 'file_timer_value': 24, + 'file_timer_unit': 'hours', + 'file_processing_logs_turnoff_time': '2026-01-17T10:30:00.000000' + } + new_form_data = { + 'enable_file_processing_logs': True, + 'file_processing_logs_timer_enabled': True, + 'file_timer_value': 24, + 'file_timer_unit': 'hours' + } + + file_timer_settings_changed = ( + new_form_data['file_processing_logs_timer_enabled'] != existing_settings.get('file_processing_logs_timer_enabled', False) or + new_form_data['file_timer_value'] != existing_settings.get('file_timer_value', 1) or + new_form_data['file_timer_unit'] != existing_settings.get('file_timer_unit', 'hours') + ) + file_processing_logs_newly_enabled = new_form_data['enable_file_processing_logs'] and not existing_settings.get('enable_file_processing_logs', False) + existing_file_turnoff_time = existing_settings.get('file_processing_logs_turnoff_time') + + should_recalculate = file_timer_settings_changed or file_processing_logs_newly_enabled or not existing_file_turnoff_time + + if should_recalculate: + print("โŒ FAILED: Should NOT recalculate - file timer settings unchanged") + return False + else: + print("โœ… PASSED: Correctly determined to preserve existing file turnoff time") + + print("\nโœ… File processing logs timer preservation test passed!") + return True + + +def test_edge_cases(): + """ + Test edge cases for timer preservation. + """ + print("\n๐Ÿงช Testing edge cases...") + + # Edge case 1: No existing turnoff time (should always recalculate) + print("\n๐Ÿ“‹ Edge case 1: No existing turnoff time - should recalculate") + existing_settings = { + 'enable_debug_logging': True, + 'debug_logging_timer_enabled': True, + 'debug_timer_value': 1, + 'debug_timer_unit': 'weeks', + 'debug_logging_turnoff_time': None # No existing time + } + new_form_data = { + 'enable_debug_logging': True, + 'debug_logging_timer_enabled': True, + 'debug_timer_value': 1, + 'debug_timer_unit': 'weeks' + } + + timer_settings_changed = ( + new_form_data['debug_logging_timer_enabled'] != existing_settings.get('debug_logging_timer_enabled', False) or + new_form_data['debug_timer_value'] != existing_settings.get('debug_timer_value', 1) or + new_form_data['debug_timer_unit'] != existing_settings.get('debug_timer_unit', 'hours') + ) + debug_logging_newly_enabled = new_form_data['enable_debug_logging'] and not existing_settings.get('enable_debug_logging', False) + existing_turnoff_time = existing_settings.get('debug_logging_turnoff_time') + + should_recalculate = timer_settings_changed or debug_logging_newly_enabled or not existing_turnoff_time + + if should_recalculate: + print("โœ… PASSED: Correctly determined to recalculate (no existing turnoff time)") + else: + print("โŒ FAILED: Should recalculate when no existing turnoff time") + return False + + # Edge case 2: Empty string turnoff time (should recalculate) + print("\n๐Ÿ“‹ Edge case 2: Empty string turnoff time - should recalculate") + existing_settings_empty = { + 'enable_debug_logging': True, + 'debug_logging_timer_enabled': True, + 'debug_timer_value': 1, + 'debug_timer_unit': 'weeks', + 'debug_logging_turnoff_time': '' # Empty string + } + + existing_turnoff_time_empty = existing_settings_empty.get('debug_logging_turnoff_time') + should_recalculate = timer_settings_changed or debug_logging_newly_enabled or not existing_turnoff_time_empty + + if should_recalculate: + print("โœ… PASSED: Correctly determined to recalculate (empty string turnoff time)") + else: + print("โŒ FAILED: Should recalculate when turnoff time is empty string") + return False + + print("\nโœ… All edge case tests passed!") + return True + + +if __name__ == "__main__": + print("=" * 70) + print("Debug Logging Timer Preservation Fix - Functional Test") + print("Version: 0.235.014") + print("=" * 70) + + tests = [ + ("Timer Recalculation Logic", test_timer_recalculation_logic), + ("File Processing Logs Timer Preservation", test_file_processing_logs_timer_preservation), + ("Edge Cases", test_edge_cases), + ] + + results = [] + for test_name, test_func in tests: + print(f"\n{'=' * 70}") + print(f"Running: {test_name}") + print("=" * 70) + try: + result = test_func() + results.append((test_name, result)) + except Exception as e: + print(f"โŒ EXCEPTION in {test_name}: {e}") + import traceback + traceback.print_exc() + results.append((test_name, False)) + + print("\n" + "=" * 70) + print("TEST SUMMARY") + print("=" * 70) + + passed = sum(1 for _, r in results if r) + total = len(results) + + for test_name, result in results: + status = "โœ… PASSED" if result else "โŒ FAILED" + print(f" {status}: {test_name}") + + print(f"\n๐Ÿ“Š Results: {passed}/{total} tests passed") + + if passed == total: + print("\n๐ŸŽ‰ All tests passed! The fix is working correctly.") + sys.exit(0) + else: + print("\nโš ๏ธ Some tests failed. Please review the fix.") + sys.exit(1) From 4db385b7015fb89e70531f73ac2e46cf05df8c3a Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Fri, 16 Jan 2026 10:23:31 -0500 Subject: [PATCH 4/8] fixed file processing logic prevent runtime execution --- application/single_app/config.py | 2 +- application/single_app/route_frontend_admin_settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/application/single_app/config.py b/application/single_app/config.py index 18b81bad..a9a0e5d6 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -88,7 +88,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.235.014" +VERSION = "0.235.015" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') diff --git a/application/single_app/route_frontend_admin_settings.py b/application/single_app/route_frontend_admin_settings.py index 45e69fdc..da45c965 100644 --- a/application/single_app/route_frontend_admin_settings.py +++ b/application/single_app/route_frontend_admin_settings.py @@ -462,6 +462,7 @@ def admin_settings(): file_timer_value = int(form_data.get('file_timer_value', 1)) file_timer_unit = form_data.get('file_timer_unit', 'hours') file_processing_logs_turnoff_time = None + enable_file_processing_logs = form_data.get('enable_file_processing_logs') == 'on' # Validate file timer values if file_timer_unit in timer_limits: @@ -485,7 +486,6 @@ def admin_settings(): file_processing_logs_newly_enabled = enable_file_processing_logs and not existing_file_processing_logs_enabled # Calculate file processing logs turnoff time if timer is enabled and file processing logs are on - enable_file_processing_logs = form_data.get('enable_file_processing_logs') == 'on' if enable_file_processing_logs and file_processing_logs_timer_enabled: # Only recalculate turnoff time if: # 1. Timer settings have changed (value, unit, or enabled state), OR From 7f506e7451325c190d9ffc5a4624f69818df3e4f Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Fri, 16 Jan 2026 10:58:46 -0500 Subject: [PATCH 5/8] fixed bug processing execution against personal documents --- application/single_app/config.py | 2 +- .../single_app/functions_retention_policy.py | 107 ++++++++++++++---- 2 files changed, 87 insertions(+), 22 deletions(-) diff --git a/application/single_app/config.py b/application/single_app/config.py index a9a0e5d6..547a8df9 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -88,7 +88,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.235.015" +VERSION = "0.235.021" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') diff --git a/application/single_app/functions_retention_policy.py b/application/single_app/functions_retention_policy.py index 5ba9c286..9d9ab72d 100644 --- a/application/single_app/functions_retention_policy.py +++ b/application/single_app/functions_retention_policy.py @@ -196,6 +196,8 @@ def process_personal_retention(): if conversation_retention_days == 'none' and document_retention_days == 'none': continue + debug_print(f"Processing retention for user {user_id}: conversations={conversation_retention_days} days, documents={document_retention_days} days") + user_deletion_summary = { 'user_id': user_id, 'conversations_deleted': 0, @@ -447,14 +449,14 @@ def delete_aged_conversations(retention_days, workspace_type='personal', user_id cutoff_iso = cutoff_date.isoformat() # Query for aged conversations - # Handle cases where last_activity_at is null, undefined, or older than cutoff + # Check for null/undefined FIRST to avoid comparing null values with dates query = f""" SELECT c.id, c.title, c.last_activity_at, c.{partition_field} FROM c WHERE c.{partition_field} = @partition_value - AND (c.last_activity_at < @cutoff_date - OR NOT IS_DEFINED(c.last_activity_at) - OR IS_NULL(c.last_activity_at)) + AND (NOT IS_DEFINED(c.last_activity_at) + OR IS_NULL(c.last_activity_at) + OR (IS_DEFINED(c.last_activity_at) AND NOT IS_NULL(c.last_activity_at) AND c.last_activity_at < @cutoff_date)) """ parameters = [ @@ -462,11 +464,18 @@ def delete_aged_conversations(retention_days, workspace_type='personal', user_id {"name": "@cutoff_date", "value": cutoff_iso} ] - aged_conversations = list(container.query_items( - query=query, - parameters=parameters, - enable_cross_partition_query=True - )) + debug_print(f"Querying aged conversations: workspace_type={workspace_type}, partition_field={partition_field}, partition_value={partition_value}, cutoff_date={cutoff_iso}, retention_days={retention_days}") + + try: + aged_conversations = list(container.query_items( + query=query, + parameters=parameters, + enable_cross_partition_query=True + )) + debug_print(f"Found {len(aged_conversations)} aged conversations for {workspace_type} workspace") + except Exception as query_error: + debug_print(f"Error querying aged conversations for {workspace_type} (partition_value={partition_value}): {query_error}") + return {'count': 0, 'details': []} deleted_details = [] @@ -596,18 +605,18 @@ def delete_aged_documents(retention_days, workspace_type='personal', user_id=Non deletion_user_id = user_id # Calculate cutoff date + # Documents use format like '2026-01-08T21:49:15Z' so we match that format cutoff_date = datetime.now(timezone.utc) - timedelta(days=retention_days) - cutoff_iso = cutoff_date.isoformat() + cutoff_iso = cutoff_date.strftime('%Y-%m-%dT%H:%M:%SZ') # Query for aged documents - # Handle cases where last_activity_at is null, undefined, or older than cutoff + # Documents use 'last_updated' field (not 'last_activity_at' like conversations) + # Use simple date comparison - documents always have last_updated field query = f""" - SELECT c.id, c.file_name, c.title, c.last_activity_at, c.{partition_field}, c.user_id + SELECT c.id, c.file_name, c.title, c.last_updated, c.user_id FROM c WHERE c.{partition_field} = @partition_value - AND (c.last_activity_at < @cutoff_date - OR NOT IS_DEFINED(c.last_activity_at) - OR IS_NULL(c.last_activity_at)) + AND c.last_updated < @cutoff_date """ parameters = [ @@ -615,11 +624,67 @@ def delete_aged_documents(retention_days, workspace_type='personal', user_id=Non {"name": "@cutoff_date", "value": cutoff_iso} ] - aged_documents = list(container.query_items( - query=query, - parameters=parameters, - enable_cross_partition_query=True - )) + debug_print(f"Querying aged documents: workspace_type={workspace_type}, partition_field={partition_field}, partition_value={partition_value}, cutoff_date={cutoff_iso}, retention_days={retention_days}") + debug_print(f"Document query: {query.strip()}") + + try: + aged_documents = list(container.query_items( + query=query, + parameters=parameters, + enable_cross_partition_query=True + )) + debug_print(f"Found {len(aged_documents)} aged documents for {workspace_type} workspace") + except Exception as query_error: + debug_print(f"Error querying aged documents for {workspace_type} (partition_value={partition_value}): {query_error}") + # Try progressive diagnostic queries to isolate the issue + try: + # Test 1: Simple query without date + simple_query = f"SELECT c.id, c.file_name, c.last_updated FROM c WHERE c.{partition_field} = @partition_value" + simple_params = [{"name": "@partition_value", "value": partition_value}] + debug_print(f"Diagnostic test 1: Simple query without date filter...") + test_docs = list(container.query_items( + query=simple_query, + parameters=simple_params, + enable_cross_partition_query=True + )) + debug_print(f" Test 1 PASSED - found {len(test_docs)} documents") + for doc in test_docs[:3]: + debug_print(f" Document {doc.get('id')}: last_updated={doc.get('last_updated')} (type: {type(doc.get('last_updated')).__name__})") + + # Test 2: Simple date comparison only (for docs with last_updated) + date_query = f"SELECT c.id, c.last_updated FROM c WHERE c.{partition_field} = @partition_value AND c.last_updated < @cutoff_date" + date_params = [ + {"name": "@partition_value", "value": partition_value}, + {"name": "@cutoff_date", "value": cutoff_iso} + ] + debug_print(f"Diagnostic test 2: Date comparison query...") + try: + date_docs = list(container.query_items( + query=date_query, + parameters=date_params, + enable_cross_partition_query=True + )) + debug_print(f" Test 2 PASSED - found {len(date_docs)} documents older than cutoff") + except Exception as date_error: + debug_print(f" Test 2 FAILED: {date_error}") + + # Test 3: IS_DEFINED check only + defined_query = f"SELECT c.id FROM c WHERE c.{partition_field} = @partition_value AND NOT IS_DEFINED(c.last_updated)" + defined_params = [{"name": "@partition_value", "value": partition_value}] + debug_print(f"Diagnostic test 3: NOT IS_DEFINED check...") + try: + defined_docs = list(container.query_items( + query=defined_query, + parameters=defined_params, + enable_cross_partition_query=True + )) + debug_print(f" Test 3 PASSED - found {len(defined_docs)} documents without last_updated") + except Exception as defined_error: + debug_print(f" Test 3 FAILED: {defined_error}") + + except Exception as simple_error: + debug_print(f"Diagnostic queries failed: {simple_error}") + return {'count': 0, 'details': []} deleted_details = [] @@ -640,7 +705,7 @@ def delete_aged_documents(retention_days, workspace_type='personal', user_id=Non 'id': document_id, 'file_name': file_name, 'title': title, - 'last_activity_at': doc.get('last_activity_at') + 'last_updated': doc.get('last_updated') }) debug_print(f"Deleted document {document_id} ({file_name}) due to retention policy") From 69abaa0a89f97a3dc9bf266009eec8e0e99b532e Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Fri, 16 Jan 2026 10:59:35 -0500 Subject: [PATCH 6/8] removed test logging --- application/single_app/config.py | 2 +- .../single_app/functions_retention_policy.py | 49 ------------------- 2 files changed, 1 insertion(+), 50 deletions(-) diff --git a/application/single_app/config.py b/application/single_app/config.py index 547a8df9..486a1457 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -88,7 +88,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.235.021" +VERSION = "0.235.022" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') diff --git a/application/single_app/functions_retention_policy.py b/application/single_app/functions_retention_policy.py index 9d9ab72d..76296c19 100644 --- a/application/single_app/functions_retention_policy.py +++ b/application/single_app/functions_retention_policy.py @@ -625,7 +625,6 @@ def delete_aged_documents(retention_days, workspace_type='personal', user_id=Non ] debug_print(f"Querying aged documents: workspace_type={workspace_type}, partition_field={partition_field}, partition_value={partition_value}, cutoff_date={cutoff_iso}, retention_days={retention_days}") - debug_print(f"Document query: {query.strip()}") try: aged_documents = list(container.query_items( @@ -636,54 +635,6 @@ def delete_aged_documents(retention_days, workspace_type='personal', user_id=Non debug_print(f"Found {len(aged_documents)} aged documents for {workspace_type} workspace") except Exception as query_error: debug_print(f"Error querying aged documents for {workspace_type} (partition_value={partition_value}): {query_error}") - # Try progressive diagnostic queries to isolate the issue - try: - # Test 1: Simple query without date - simple_query = f"SELECT c.id, c.file_name, c.last_updated FROM c WHERE c.{partition_field} = @partition_value" - simple_params = [{"name": "@partition_value", "value": partition_value}] - debug_print(f"Diagnostic test 1: Simple query without date filter...") - test_docs = list(container.query_items( - query=simple_query, - parameters=simple_params, - enable_cross_partition_query=True - )) - debug_print(f" Test 1 PASSED - found {len(test_docs)} documents") - for doc in test_docs[:3]: - debug_print(f" Document {doc.get('id')}: last_updated={doc.get('last_updated')} (type: {type(doc.get('last_updated')).__name__})") - - # Test 2: Simple date comparison only (for docs with last_updated) - date_query = f"SELECT c.id, c.last_updated FROM c WHERE c.{partition_field} = @partition_value AND c.last_updated < @cutoff_date" - date_params = [ - {"name": "@partition_value", "value": partition_value}, - {"name": "@cutoff_date", "value": cutoff_iso} - ] - debug_print(f"Diagnostic test 2: Date comparison query...") - try: - date_docs = list(container.query_items( - query=date_query, - parameters=date_params, - enable_cross_partition_query=True - )) - debug_print(f" Test 2 PASSED - found {len(date_docs)} documents older than cutoff") - except Exception as date_error: - debug_print(f" Test 2 FAILED: {date_error}") - - # Test 3: IS_DEFINED check only - defined_query = f"SELECT c.id FROM c WHERE c.{partition_field} = @partition_value AND NOT IS_DEFINED(c.last_updated)" - defined_params = [{"name": "@partition_value", "value": partition_value}] - debug_print(f"Diagnostic test 3: NOT IS_DEFINED check...") - try: - defined_docs = list(container.query_items( - query=defined_query, - parameters=defined_params, - enable_cross_partition_query=True - )) - debug_print(f" Test 3 PASSED - found {len(defined_docs)} documents without last_updated") - except Exception as defined_error: - debug_print(f" Test 3 FAILED: {defined_error}") - - except Exception as simple_error: - debug_print(f"Diagnostic queries failed: {simple_error}") return {'count': 0, 'details': []} deleted_details = [] From 316fb6bf08bacdadd6796e10ebfccf0726c78830 Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Fri, 16 Jan 2026 11:07:03 -0500 Subject: [PATCH 7/8] Retention Policy Document Deletion Fix --- .../RETENTION_POLICY_DOCUMENT_DELETION_FIX.md | 149 ++++++++++++++++++ docs/explanation/release_notes.md | 13 ++ 2 files changed, 162 insertions(+) create mode 100644 docs/explanation/fixes/v0.235.022/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md diff --git a/docs/explanation/fixes/v0.235.022/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md b/docs/explanation/fixes/v0.235.022/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md new file mode 100644 index 00000000..e010a0ef --- /dev/null +++ b/docs/explanation/fixes/v0.235.022/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md @@ -0,0 +1,149 @@ +# Retention Policy Document Deletion Fix + +**Version Implemented:** 0.235.022 + +## Problem Statement + +The retention policy execution was failing when attempting to delete aged documents, while conversation deletion worked correctly. The error manifested as: + +``` +[DEBUG] [INFO]: Error querying aged documents for personal (partition_value=1d6312bd-3eaa-4586-8b74-e90eee126f78): (BadRequest) One of the input values is invalid. +``` + +This prevented the automated cleanup of old documents based on user-configured retention policies. + +## Root Cause Analysis + +Investigation revealed **four distinct issues** causing the document deletion to fail: + +### Issue 1: Wrong Field Name +Documents use `last_updated` as the timestamp field, but the retention policy was querying for `last_activity_at` (which is used by conversations). + +**Document schema:** +```json +{ + "upload_date": "2025-11-20T15:17:57Z", + "last_updated": "2025-11-20T15:54:22Z" +} +``` + +**Incorrect query:** +```sql +WHERE c.last_activity_at < @cutoff_date +``` + +### Issue 2: Date Format Mismatch +Documents store timestamps in `YYYY-MM-DDTHH:MM:SSZ` format, but the query was using Python's `.isoformat()` which produces `+00:00` suffix with microseconds. + +- **Document format:** `2026-01-08T21:49:15Z` +- **Query format:** `2026-01-15T15:49:09.828460+00:00` + +Cosmos DB string comparison failed due to format differences. + +### Issue 3: Duplicate Column in SELECT +The query included both `c.{partition_field}` and `c.user_id` in the SELECT clause. When `partition_field='user_id'`, this created a duplicate column causing query errors. + +**Problematic query:** +```sql +SELECT c.id, c.file_name, c.title, c.last_updated, c.user_id, c.user_id +``` + +### Issue 4: Incorrect Activity Logging Parameter +The `log_conversation_deletion()` function was called with `deletion_reason='retention_policy'`, but this parameter doesn't exist in the function signature. It should use `additional_context` instead. + +## Solution Implementation + +### File Modified: `functions_retention_policy.py` + +#### Fix 1: Correct Field Name +Changed document queries to use `last_updated` instead of `last_activity_at`: + +```python +# Query for aged documents +# Documents use 'last_updated' field (not 'last_activity_at' like conversations) +query = f""" + SELECT c.id, c.file_name, c.title, c.last_updated, c.user_id + FROM c + WHERE c.{partition_field} = @partition_value + AND c.last_updated < @cutoff_date +""" +``` + +#### Fix 2: Correct Date Format +Changed from `.isoformat()` to `.strftime()` to match document timestamp format: + +```python +# Documents use format like '2026-01-08T21:49:15Z' so we match that format +cutoff_date = datetime.now(timezone.utc) - timedelta(days=retention_days) +cutoff_iso = cutoff_date.strftime('%Y-%m-%dT%H:%M:%SZ') +``` + +#### Fix 3: Remove Duplicate Column +Simplified SELECT to avoid duplicate columns: + +```python +SELECT c.id, c.file_name, c.title, c.last_updated, c.user_id +``` + +#### Fix 4: Correct Activity Logging Parameter +Changed from invalid parameter to proper `additional_context`: + +```python +# Before (incorrect) +log_conversation_deletion( + ... + deletion_reason='retention_policy' +) + +# After (correct) +log_conversation_deletion( + ... + additional_context={'deletion_reason': 'retention_policy'} +) +``` + +### Additional Improvements + +#### Enhanced Debug Logging +Added comprehensive debug logging to aid future troubleshooting: + +```python +debug_print(f"Processing retention for user {user_id}: conversations={conversation_retention_days} days, documents={document_retention_days} days") +debug_print(f"Querying aged documents: workspace_type={workspace_type}, partition_field={partition_field}, partition_value={partition_value}, cutoff_date={cutoff_iso}, retention_days={retention_days}") +debug_print(f"Found {len(aged_documents)} aged documents for {workspace_type} workspace") +``` + +## Testing & Validation + +After the fix, retention policy execution completed successfully: + +``` +[DEBUG] [INFO]: Querying aged documents: workspace_type=personal, partition_field=user_id, partition_value=1d6312bd-3eaa-4586-8b74-e90eee126f78, cutoff_date=2026-01-15T15:58:09Z, retention_days=1 +[DEBUG] [INFO]: Found 1 aged documents for personal workspace +[DEBUG] [INFO]: [DELETE DOCUMENT] Starting deletion for document: 36a030b2-57b2-426b-8aa9-6f49eed5f8a6 +[DEBUG] [INFO]: Logged document deletion transaction: 36a030b2-57b2-426b-8aa9-6f49eed5f8a6 +Successfully deleted blob at 1d6312bd-3eaa-4586-8b74-e90eee126f78/test.pdf +[DEBUG] [INFO]: Deleted document 36a030b2-57b2-426b-8aa9-6f49eed5f8a6 (test.pdf) due to retention policy +[DEBUG] [INFO]: Notification created: 5a92235d-d408-4449-9c72-0951ed198688 [personal] [system_announcement] +[DEBUG] [INFO]: Retention policy execution completed: {'success': True, ... 'documents': 1, 'users_affected': 1 ...} +``` + +## Files Changed + +| File | Changes | +|------|---------| +| `functions_retention_policy.py` | Fixed field name, date format, duplicate columns, activity logging | +| `config.py` | Version bump to 0.235.022 | + +## Impact + +- **Retention Policy:** Now correctly deletes aged documents based on user settings +- **Activity Logging:** Document deletions are properly logged with deletion reason +- **User Notifications:** Users receive notifications when documents are deleted by retention policy +- **Blob Storage:** Associated blob files are correctly removed + +## Related Components + +- Conversation retention (uses `last_activity_at` - unchanged) +- Group workspace retention (shares same document deletion logic) +- Public workspace retention (shares same document deletion logic) diff --git a/docs/explanation/release_notes.md b/docs/explanation/release_notes.md index ac99efa6..a8f22184 100644 --- a/docs/explanation/release_notes.md +++ b/docs/explanation/release_notes.md @@ -1,6 +1,19 @@ # Feature Release +### **(v0.235.022)** + +#### Bug Fixes + +* **Retention Policy Document Deletion Fix** + * Fixed critical bug where retention policy execution failed when attempting to delete aged documents, while conversation deletion worked correctly. + * **Root Cause 1**: Documents use `last_updated` field, but query was looking for `last_activity_at` (used by conversations). + * **Root Cause 2**: Date format mismatch - documents store `YYYY-MM-DDTHH:MM:SSZ` but query used Python's `.isoformat()` with `+00:00` suffix. + * **Root Cause 3**: Duplicate column in SELECT clause when `partition_field='user_id'` caused query errors. + * **Root Cause 4**: Activity logging called with incorrect `deletion_reason` parameter instead of `additional_context`. + * **Files Modified**: `functions_retention_policy.py` (query field names, date format, SELECT clause, activity logging). + * (Ref: `delete_aged_documents()`, retention policy execution, Cosmos DB queries) + ### **(v0.235.012)** #### Bug Fixes From 7721dd9a7fc93cfff098c2f5d2ffa8a9ebb870aa Mon Sep 17 00:00:00 2001 From: Paul Lizer Date: Sat, 17 Jan 2026 15:07:25 -0500 Subject: [PATCH 8/8] Improve execution logic for rentention policy Now uses retention_policy_next_run timestamp - Compares current time against the stored next scheduled time. If current time >= next scheduled time, it runs. Reduced check interval from 1 hour to 5 minutes - More responsive scheduling, ensures it catches the scheduled time promptly. Better fallback logic - If next_run can't be parsed, falls back to checking last_run (23-hour threshold). Runs immediately if never run before - If there's no last_run or next_run, it will execute on the first check. --- application/single_app/app.py | 67 +++++++++----- application/single_app/config.py | 2 +- .../single_app/functions_retention_policy.py | 40 ++++++--- .../RETENTION_POLICY_DOCUMENT_DELETION_FIX.md | 2 +- .../RETENTION_POLICY_SCHEDULER_FIX.md | 87 +++++++++++++++++++ docs/explanation/release_notes.md | 11 ++- 6 files changed, 173 insertions(+), 36 deletions(-) rename docs/explanation/fixes/{v0.235.022 => v0.235.025}/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md (99%) create mode 100644 docs/explanation/fixes/v0.235.025/RETENTION_POLICY_SCHEDULER_FIX.md diff --git a/application/single_app/app.py b/application/single_app/app.py index c1dc124f..df4244c2 100644 --- a/application/single_app/app.py +++ b/application/single_app/app.py @@ -309,14 +309,34 @@ def check_retention_policy(): if personal_enabled or group_enabled or public_enabled: current_time = datetime.now(timezone.utc) - execution_hour = settings.get('retention_policy_execution_hour', 2) - # Check if we're in the execution hour - if current_time.hour == execution_hour: - # Check if we haven't run today yet + # Check if next scheduled run time has passed + next_run = settings.get('retention_policy_next_run') + should_run = False + + if next_run: + try: + next_run_dt = datetime.fromisoformat(next_run) + # Run if we've passed the scheduled time + if current_time >= next_run_dt: + should_run = True + except Exception as parse_error: + print(f"Error parsing next_run timestamp: {parse_error}") + # If we can't parse, fall back to checking last_run + last_run = settings.get('retention_policy_last_run') + if last_run: + try: + last_run_dt = datetime.fromisoformat(last_run) + # Run if last run was more than 23 hours ago + if (current_time - last_run_dt).total_seconds() > (23 * 3600): + should_run = True + except: + should_run = True + else: + should_run = True + else: + # No next_run set, check last_run instead last_run = settings.get('retention_policy_last_run') - should_run = False - if last_run: try: last_run_dt = datetime.fromisoformat(last_run) @@ -326,29 +346,30 @@ def check_retention_policy(): except: should_run = True else: + # Never run before, execute now should_run = True + + if should_run: + print(f"Executing scheduled retention policy at {current_time.isoformat()}") + from functions_retention_policy import execute_retention_policy + results = execute_retention_policy(manual_execution=False) - if should_run: - print(f"Executing scheduled retention policy at {current_time.isoformat()}") - from functions_retention_policy import execute_retention_policy - results = execute_retention_policy(manual_execution=False) - - if results.get('success'): - print(f"Retention policy execution completed: " - f"{results['personal']['conversations']} personal conversations, " - f"{results['personal']['documents']} personal documents, " - f"{results['group']['conversations']} group conversations, " - f"{results['group']['documents']} group documents, " - f"{results['public']['conversations']} public conversations, " - f"{results['public']['documents']} public documents deleted.") - else: - print(f"Retention policy execution failed: {results.get('errors')}") + if results.get('success'): + print(f"Retention policy execution completed: " + f"{results['personal']['conversations']} personal conversations, " + f"{results['personal']['documents']} personal documents, " + f"{results['group']['conversations']} group conversations, " + f"{results['group']['documents']} group documents, " + f"{results['public']['conversations']} public conversations, " + f"{results['public']['documents']} public documents deleted.") + else: + print(f"Retention policy execution failed: {results.get('errors')}") except Exception as e: print(f"Error in retention policy check: {e}") - # Check every hour - time.sleep(3600) + # Check every 5 minutes for more responsive scheduling + time.sleep(300) # Start the retention policy check thread retention_thread = threading.Thread(target=check_retention_policy, daemon=True) diff --git a/application/single_app/config.py b/application/single_app/config.py index 486a1457..2224a49e 100644 --- a/application/single_app/config.py +++ b/application/single_app/config.py @@ -88,7 +88,7 @@ EXECUTOR_TYPE = 'thread' EXECUTOR_MAX_WORKERS = 30 SESSION_TYPE = 'filesystem' -VERSION = "0.235.022" +VERSION = "0.235.025" SECRET_KEY = os.getenv('SECRET_KEY', 'dev-secret-key-change-in-production') diff --git a/application/single_app/functions_retention_policy.py b/application/single_app/functions_retention_policy.py index 76296c19..6c59ef64 100644 --- a/application/single_app/functions_retention_policy.py +++ b/application/single_app/functions_retention_policy.py @@ -18,6 +18,7 @@ from functions_activity_logging import log_conversation_deletion, log_conversation_archival from functions_notifications import create_notification, create_group_notification, create_public_workspace_notification from functions_debug import debug_print +from functions_appinsights import log_event from datetime import datetime, timezone, timedelta @@ -36,6 +37,7 @@ def get_all_user_settings(): )) return users except Exception as e: + log_event("get_all_user_settings_error", {"error": str(e)}) debug_print(f"Error fetching all user settings: {e}") return [] @@ -55,6 +57,7 @@ def get_all_groups(): )) return groups except Exception as e: + log_event("get_all_groups_error", {"error": str(e)}) debug_print(f"Error fetching all groups: {e}") return [] @@ -74,6 +77,7 @@ def get_all_public_workspaces(): )) return workspaces except Exception as e: + log_event("get_all_public_workspaces_error", {"error": str(e)}) debug_print(f"Error fetching all public workspaces: {e}") return [] @@ -156,6 +160,7 @@ def execute_retention_policy(workspace_scopes=None, manual_execution=False): return results except Exception as e: + log_event("execute_retention_policy_error", {"error": str(e), "workspace_scopes": workspace_scopes, "manual_execution": manual_execution}) debug_print(f"Error executing retention policy: {e}") results['success'] = False results['errors'].append(str(e)) @@ -218,6 +223,7 @@ def process_personal_retention(): user_deletion_summary['conversation_details'] = conv_results['details'] results['conversations'] += conv_results['count'] except Exception as e: + log_event("process_personal_retention_conversations_error", {"error": str(e), "user_id": user_id}) debug_print(f"Error processing conversations for user {user_id}: {e}") # Process documents @@ -232,6 +238,7 @@ def process_personal_retention(): user_deletion_summary['document_details'] = doc_results['details'] results['documents'] += doc_results['count'] except Exception as e: + log_event("process_personal_retention_documents_error", {"error": str(e), "user_id": user_id}) debug_print(f"Error processing documents for user {user_id}: {e}") # Send notification if anything was deleted @@ -243,6 +250,7 @@ def process_personal_retention(): return results except Exception as e: + log_event("process_personal_retention_error", {"error": str(e)}) debug_print(f"Error in process_personal_retention: {e}") return results @@ -301,6 +309,7 @@ def process_group_retention(): group_deletion_summary['conversation_details'] = conv_results['details'] results['conversations'] += conv_results['count'] except Exception as e: + log_event("process_group_retention_conversations_error", {"error": str(e), "group_id": group_id}) debug_print(f"Error processing conversations for group {group_id}: {e}") # Process documents @@ -315,6 +324,7 @@ def process_group_retention(): group_deletion_summary['document_details'] = doc_results['details'] results['documents'] += doc_results['count'] except Exception as e: + log_event("process_group_retention_documents_error", {"error": str(e), "group_id": group_id}) debug_print(f"Error processing documents for group {group_id}: {e}") # Send notification if anything was deleted @@ -326,6 +336,7 @@ def process_group_retention(): return results except Exception as e: + log_event("process_group_retention_error", {"error": str(e)}) debug_print(f"Error in process_group_retention: {e}") return results @@ -384,6 +395,7 @@ def process_public_retention(): workspace_deletion_summary['conversation_details'] = conv_results['details'] results['conversations'] += conv_results['count'] except Exception as e: + log_event("process_public_retention_conversations_error", {"error": str(e), "public_workspace_id": workspace_id}) debug_print(f"Error processing conversations for public workspace {workspace_id}: {e}") # Process documents @@ -398,6 +410,7 @@ def process_public_retention(): workspace_deletion_summary['document_details'] = doc_results['details'] results['documents'] += doc_results['count'] except Exception as e: + log_event("process_public_retention_documents_error", {"error": str(e), "public_workspace_id": workspace_id}) debug_print(f"Error processing documents for public workspace {workspace_id}: {e}") # Send notification if anything was deleted @@ -409,6 +422,7 @@ def process_public_retention(): return results except Exception as e: + log_event("process_public_retention_error", {"error": str(e)}) debug_print(f"Error in process_public_retention: {e}") return results @@ -474,16 +488,17 @@ def delete_aged_conversations(retention_days, workspace_type='personal', user_id )) debug_print(f"Found {len(aged_conversations)} aged conversations for {workspace_type} workspace") except Exception as query_error: + log_event("delete_aged_conversations_query_error", {"error": str(query_error), "workspace_type": workspace_type, "partition_value": partition_value}) debug_print(f"Error querying aged conversations for {workspace_type} (partition_value={partition_value}): {query_error}") return {'count': 0, 'details': []} deleted_details = [] for conv in aged_conversations: - conversation_id = conv.get('id') - conversation_title = conv.get('title', 'Untitled') - try: + conversation_id = conv.get('id') + conversation_title = conv.get('title', 'Untitled') + # Read full conversation for archiving/logging conversation_item = container.read_item( item=conversation_id, @@ -565,7 +580,9 @@ def delete_aged_conversations(retention_days, workspace_type='personal', user_id debug_print(f"Deleted conversation {conversation_id} ({conversation_title}) due to retention policy") except Exception as e: - debug_print(f"Error deleting conversation {conversation_id}: {e}") + conv_id = conv.get('id', 'unknown') if conv else 'unknown' + log_event("delete_aged_conversations_deletion_error", {"error": str(e), "conversation_id": conv_id, "workspace_type": workspace_type}) + debug_print(f"Error deleting conversation {conv_id}: {e}") return { 'count': len(deleted_details), @@ -634,18 +651,19 @@ def delete_aged_documents(retention_days, workspace_type='personal', user_id=Non )) debug_print(f"Found {len(aged_documents)} aged documents for {workspace_type} workspace") except Exception as query_error: + log_event("delete_aged_documents_query_error", {"error": str(query_error), "workspace_type": workspace_type, "partition_value": partition_value}) debug_print(f"Error querying aged documents for {workspace_type} (partition_value={partition_value}): {query_error}") return {'count': 0, 'details': []} deleted_details = [] for doc in aged_documents: - document_id = doc.get('id') - file_name = doc.get('file_name', 'Unknown') - title = doc.get('title', file_name) - doc_user_id = doc.get('user_id') or deletion_user_id - try: + document_id = doc.get('id') + file_name = doc.get('file_name', 'Unknown') + title = doc.get('title', file_name) + doc_user_id = doc.get('user_id') or deletion_user_id + # Delete document chunks from search index delete_document_chunks(document_id, group_id, public_workspace_id) @@ -662,7 +680,9 @@ def delete_aged_documents(retention_days, workspace_type='personal', user_id=Non debug_print(f"Deleted document {document_id} ({file_name}) due to retention policy") except Exception as e: - debug_print(f"Error deleting document {document_id}: {e}") + doc_id = doc.get('id', 'unknown') if doc else 'unknown' + log_event("delete_aged_documents_deletion_error", {"error": str(e), "document_id": doc_id, "workspace_type": workspace_type}) + debug_print(f"Error deleting document {doc_id}: {e}") return { 'count': len(deleted_details), diff --git a/docs/explanation/fixes/v0.235.022/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md b/docs/explanation/fixes/v0.235.025/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md similarity index 99% rename from docs/explanation/fixes/v0.235.022/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md rename to docs/explanation/fixes/v0.235.025/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md index e010a0ef..cacb8b68 100644 --- a/docs/explanation/fixes/v0.235.022/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md +++ b/docs/explanation/fixes/v0.235.025/RETENTION_POLICY_DOCUMENT_DELETION_FIX.md @@ -1,6 +1,6 @@ # Retention Policy Document Deletion Fix -**Version Implemented:** 0.235.022 +**Version Implemented:** 0.235.025 ## Problem Statement diff --git a/docs/explanation/fixes/v0.235.025/RETENTION_POLICY_SCHEDULER_FIX.md b/docs/explanation/fixes/v0.235.025/RETENTION_POLICY_SCHEDULER_FIX.md new file mode 100644 index 00000000..36ae844d --- /dev/null +++ b/docs/explanation/fixes/v0.235.025/RETENTION_POLICY_SCHEDULER_FIX.md @@ -0,0 +1,87 @@ +# Retention Policy Scheduler Fix + +## Issue Description + +The automated retention policy scheduler was not executing at the scheduled time. Users would see the "Next Scheduled Execution" time pass without the policy running. + +## Root Cause Analysis + +The background scheduler in `app.py` had multiple issues preventing reliable execution: + +1. **Hour-matching approach was unreliable**: The scheduler only ran if the check happened exactly during the execution hour (e.g., 2 AM). With 1-hour sleep intervals, it could easily miss the entire window if the thread's check cycle didn't align with the execution hour. + +2. **Check interval too long**: Checking every hour (3600 seconds) meant poor responsiveness and high probability of missing the scheduled time. + +3. **No use of stored next_run timestamp**: The code ignored the `retention_policy_next_run` setting that was being saved, instead relying solely on hour matching. + +4. **No catch-up logic**: If the scheduled time passed while the app was down or during a sleep cycle, there was no mechanism to run the missed execution. + +## Files Modified + +| File | Changes | +|------|---------| +| [app.py](../../../application/single_app/app.py) | Rewrote `check_retention_policy()` background task | +| [config.py](../../../application/single_app/config.py) | Version bump to 0.235.025 | + +## Technical Details + +### Before (Problematic Code) +```python +# Check if we're in the execution hour +if current_time.hour == execution_hour: + # Check if we haven't run today yet + last_run = settings.get('retention_policy_last_run') + # ... run if last_run > 23 hours ago + +# Check every hour +time.sleep(3600) +``` + +### After (Fixed Code) +```python +# Check if next scheduled run time has passed +next_run = settings.get('retention_policy_next_run') +if next_run: + next_run_dt = datetime.fromisoformat(next_run) + # Run if we've passed the scheduled time + if current_time >= next_run_dt: + should_run = True + +# Check every 5 minutes for more responsive scheduling +time.sleep(300) +``` + +### Key Improvements + +1. **Uses `retention_policy_next_run` timestamp**: Compares current time against the stored next scheduled execution time. If current time >= scheduled time, it runs. + +2. **Reduced check interval**: Changed from 1 hour to 5 minutes (300 seconds) for more responsive scheduling. + +3. **Better fallback logic**: If `next_run` can't be parsed, falls back to checking `last_run` with a 23-hour threshold. + +4. **Immediate execution for missed schedules**: If the scheduled time has already passed, the policy runs on the next check cycle. + +5. **Runs immediately if never run before**: If there's no `last_run` or `next_run`, it will execute on the first check. + +## Testing Approach + +1. Enable retention policy for personal workspaces +2. Set execution hour to current hour or a past hour +3. Restart the application +4. Verify the retention policy executes within 5 minutes +5. Confirm `Last Execution` and `Next Scheduled Execution` timestamps update correctly + +## Impact Analysis + +- **Positive**: Retention policies now execute reliably at scheduled times +- **Positive**: Missed executions are caught up on next app start or check cycle +- **Consideration**: Slightly higher CPU usage due to 5-minute checks vs 1-hour checks (negligible impact) + +## Version Information + +- **Fixed in version**: 0.235.025 +- **Issue introduced**: Original implementation + +## Related Changes + +- [RETENTION_POLICY_DOCUMENT_DELETION_FIX.md](RETENTION_POLICY_DOCUMENT_DELETION_FIX.md) - Related retention policy fixes in same version diff --git a/docs/explanation/release_notes.md b/docs/explanation/release_notes.md index a8f22184..02d7139a 100644 --- a/docs/explanation/release_notes.md +++ b/docs/explanation/release_notes.md @@ -1,7 +1,7 @@ # Feature Release -### **(v0.235.022)** +### **(v0.235.025)** #### Bug Fixes @@ -14,6 +14,15 @@ * **Files Modified**: `functions_retention_policy.py` (query field names, date format, SELECT clause, activity logging). * (Ref: `delete_aged_documents()`, retention policy execution, Cosmos DB queries) +* **Retention Policy Scheduler Fix** + * Fixed automated retention policy scheduler not executing at the scheduled time. + * **Root Cause 1**: Hour-matching approach was unreliable - only ran if check happened exactly during the execution hour (e.g., 2 AM), but 1-hour sleep intervals could miss the entire window. + * **Root Cause 2**: Check interval too long (1 hour) meant poor responsiveness and high probability of missing scheduled time. + * **Root Cause 3**: Code ignored the stored `retention_policy_next_run` timestamp, instead relying solely on hour matching. + * **Solution**: Now uses `retention_policy_next_run` timestamp for comparison, reduced check interval from 1 hour to 5 minutes, added fallback logic for missed executions. + * **Files Modified**: `app.py` (`check_retention_policy()` background task). + * (Ref: retention policy scheduler, background task, scheduled execution) + ### **(v0.235.012)** #### Bug Fixes