From d324fa668f83caf7d2a397a9b91450c74a4b3904 Mon Sep 17 00:00:00 2001 From: Serge Huber Date: Fri, 29 Aug 2025 16:59:38 +0200 Subject: [PATCH 1/3] [UNOMI-878] Add advanced scheduler service and task persistence capabilities - Introduced a new, robust scheduler service with extensive task management and cluster-aware features. - Added support for task persistence, recovery, metrics tracking, and execution history. - Implemented additional configuration options for scheduler properties such as `nodeId`, `purgeTaskEnabled`, and `lockTimeout`. - Enhanced task lifecycle management with new components including `TaskExecutionManager`, `TaskRecoveryManager`, and `TaskHistoryManager`. - Updated API and documentation to reflect changes. --- .../main/java/org/apache/unomi/api/Item.java | 66 + .../unomi/api/services/SchedulerService.java | 386 +++- .../apache/unomi/api/tasks/ScheduledTask.java | 873 ++++++++ .../apache/unomi/api/tasks/TaskExecutor.java | 139 ++ .../impl/cluster/ClusterServiceImpl.java | 58 +- .../PersistenceSchedulerProvider.java | 385 ++++ .../impl/scheduler/SchedulerConstants.java | 49 + .../impl/scheduler/SchedulerProvider.java | 95 + .../impl/scheduler/SchedulerServiceImpl.java | 1987 ++++++++++++++++- .../impl/scheduler/TaskExecutionManager.java | 504 +++++ .../impl/scheduler/TaskExecutorRegistry.java | 149 ++ .../impl/scheduler/TaskHistoryManager.java | 167 ++ .../impl/scheduler/TaskLockManager.java | 353 +++ .../impl/scheduler/TaskMetricsManager.java | 93 + .../impl/scheduler/TaskRecoveryManager.java | 333 +++ .../impl/scheduler/TaskStateManager.java | 311 +++ .../impl/scheduler/TaskValidationManager.java | 200 ++ .../OSGI-INF/blueprint/blueprint.xml | 103 +- .../resources/org.apache.unomi.services.cfg | 9 + 19 files changed, 6181 insertions(+), 79 deletions(-) create mode 100644 api/src/main/java/org/apache/unomi/api/tasks/ScheduledTask.java create mode 100644 api/src/main/java/org/apache/unomi/api/tasks/TaskExecutor.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/PersistenceSchedulerProvider.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerConstants.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerProvider.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskExecutionManager.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskExecutorRegistry.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskHistoryManager.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskLockManager.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskMetricsManager.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskRecoveryManager.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskStateManager.java create mode 100644 services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskValidationManager.java diff --git a/api/src/main/java/org/apache/unomi/api/Item.java b/api/src/main/java/org/apache/unomi/api/Item.java index de283ebe9a..842dc66a25 100644 --- a/api/src/main/java/org/apache/unomi/api/Item.java +++ b/api/src/main/java/org/apache/unomi/api/Item.java @@ -21,6 +21,7 @@ import org.slf4j.LoggerFactory; import java.io.Serializable; +import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -66,11 +67,20 @@ public static String getItemType(Class clazz) { protected Long version; protected Map systemMetadata = new HashMap<>(); + // Audit metadata fields + private String createdBy; + private String lastModifiedBy; + private Date creationDate; + private Date lastModificationDate; + private String sourceInstanceId; + private Date lastSyncDate; + public Item() { this.itemType = getItemType(this.getClass()); if (itemType == null) { LOGGER.error("Item implementations must provide a public String constant named ITEM_TYPE to uniquely identify this Item for the persistence service."); } + initializeAuditMetadata(); } public Item(String itemId) { @@ -78,6 +88,11 @@ public Item(String itemId) { this.itemId = itemId; } + private void initializeAuditMetadata() { + this.creationDate = new Date(); + this.lastModificationDate = this.creationDate; + this.version = 0L; + } /** * Retrieves the Item's identifier used to uniquely identify this Item when persisted or when referred to. An Item's identifier must be unique among Items with the same type. @@ -150,4 +165,55 @@ public Object getSystemMetadata(String key) { public void setSystemMetadata(String key, Object value) { systemMetadata.put(key, value); } + + // Audit metadata getters and setters + public String getCreatedBy() { + return createdBy; + } + + public void setCreatedBy(String createdBy) { + this.createdBy = createdBy; + } + + public String getLastModifiedBy() { + return lastModifiedBy; + } + + public void setLastModifiedBy(String lastModifiedBy) { + this.lastModifiedBy = lastModifiedBy; + this.lastModificationDate = new Date(); + this.version++; + } + + public Date getCreationDate() { + return creationDate; + } + + public void setCreationDate(Date creationDate) { + this.creationDate = creationDate; + } + + public Date getLastModificationDate() { + return lastModificationDate; + } + + public void setLastModificationDate(Date lastModificationDate) { + this.lastModificationDate = lastModificationDate; + } + + public String getSourceInstanceId() { + return sourceInstanceId; + } + + public void setSourceInstanceId(String sourceInstanceId) { + this.sourceInstanceId = sourceInstanceId; + } + + public Date getLastSyncDate() { + return lastSyncDate; + } + + public void setLastSyncDate(Date lastSyncDate) { + this.lastSyncDate = lastSyncDate; + } } diff --git a/api/src/main/java/org/apache/unomi/api/services/SchedulerService.java b/api/src/main/java/org/apache/unomi/api/services/SchedulerService.java index 1458bf746b..0842c087eb 100644 --- a/api/src/main/java/org/apache/unomi/api/services/SchedulerService.java +++ b/api/src/main/java/org/apache/unomi/api/services/SchedulerService.java @@ -17,28 +17,390 @@ package org.apache.unomi.api.services; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.ScheduledExecutorService; +import org.apache.unomi.api.PartialList; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; /** - * A service to centralize scheduling of tasks instead of using Timers or executors in each service + * Service for scheduling and managing tasks in a cluster-aware manner. + * This service provides comprehensive task scheduling capabilities including: + * * - * https://stackoverflow.com/questions/409932/java-timer-vs-executorservice + * The service supports both single-node and clustered environments, ensuring + * tasks are executed reliably and efficiently across the cluster. */ public interface SchedulerService { /** - * Use this method to get a {@link ScheduledExecutorService} - * and execute your task with it instead of using {@link java.util.Timer} + * Creates a new scheduled task. + * This method provides full control over task configuration including + * execution timing, persistence, and parallel execution settings. + * The task can be either persistent (stored in persistence service and + * visible across the cluster) or non-persistent (stored only in memory + * on the local node). * - * @return {@link ScheduledExecutorService} + * @param taskType unique identifier for the task type + * @param parameters task-specific parameters + * @param initialDelay delay before first execution + * @param period period between executions (0 for one-shot tasks) + * @param timeUnit time unit for delay and period + * @param fixedRate whether to use fixed rate (true) or fixed delay (false) + * @param oneShot whether this is a one-time task + * @param allowParallelExecution whether parallel execution is allowed + * @param persistent whether to store the task in persistence service (true) or only in memory (false) + * @return the created task instance + * @throws IllegalArgumentException if task configuration is invalid */ - ScheduledExecutorService getScheduleExecutorService(); + ScheduledTask createTask(String taskType, + Map parameters, + long initialDelay, + long period, + TimeUnit timeUnit, + boolean fixedRate, + boolean oneShot, + boolean allowParallelExecution, + boolean persistent); /** - * Same as getScheduleExecutorService but use a shared pool of ScheduledExecutor instead of single one. - * Use this service is your tasks can be run in parallel of the others. - * @return {@link ScheduledExecutorService} + * Schedules an existing task for execution. + * The task will be validated and scheduled according to its configuration. + * For periodic tasks, this sets up recurring execution. + * + * @param task the task to schedule + * @throws IllegalArgumentException if task configuration is invalid */ - ScheduledExecutorService getSharedScheduleExecutorService(); + void scheduleTask(ScheduledTask task); + + /** + * Cancels a scheduled task. + * This will stop any current execution and prevent future executions. + * The task remains in storage but is marked as cancelled. + * + * @param taskId the task ID to cancel + */ + void cancelTask(String taskId); + + /** + * Gets all tasks from both storage and memory. + * This provides a complete view of all tasks in the system, + * both persistent and in-memory. + * + * @return combined list of all tasks + */ + List getAllTasks(); + + /** + * Gets a task by ID from either storage or memory. + * This will search both persistent storage and in-memory tasks. + * + * @param taskId the task ID + * @return the task or null if not found + */ + ScheduledTask getTask(String taskId); + + /** + * Gets all tasks stored in memory. + * These are non-persistent tasks that exist only on this node. + * + * @return list of all in-memory tasks + */ + List getMemoryTasks(); + + /** + * Gets all tasks from persistent storage. + * These tasks are visible across the cluster. + * + * @return list of all persistent tasks + */ + List getPersistentTasks(); + + /** + * Registers a task executor. + * The executor will be used to execute tasks of its declared type. + * + * @param executor the executor to register + */ + void registerTaskExecutor(TaskExecutor executor); + + /** + * Unregisters a task executor. + * Tasks of this type will no longer be executed on this node. + * + * @param executor the executor to unregister + */ + void unregisterTaskExecutor(TaskExecutor executor); + + /** + * Checks if this node is a task executor node. + * Executor nodes are responsible for executing tasks in the cluster. + * + * @return true if this node executes tasks + */ + boolean isExecutorNode(); + + /** + * Gets the node ID of this scheduler instance. + * This ID uniquely identifies this node in the cluster. + * + * @return the node ID + */ + String getNodeId(); + + /** + * Gets tasks with the specified status. + * This allows filtering tasks by their current state. + * The results include both persistent and in-memory tasks. + * + * @param status the task status to filter by + * @param offset the starting offset for pagination + * @param size the maximum number of tasks to return + * @param sortBy optional sort field (null for default sorting) + * @return partial list of matching tasks + */ + PartialList getTasksByStatus(ScheduledTask.TaskStatus status, int offset, int size, String sortBy); + + /** + * Gets tasks for a specific executor type. + * This allows filtering tasks by their type. + * The results include both persistent and in-memory tasks. + * + * @param taskType the task type to filter by + * @param offset the starting offset for pagination + * @param size the maximum number of tasks to return + * @param sortBy optional sort field (null for default sorting) + * @return partial list of matching tasks + */ + PartialList getTasksByType(String taskType, int offset, int size, String sortBy); + + /** + * Retries a failed task. + * The task will be rescheduled for execution with optional + * failure count reset. The task must be in FAILED status + * for this operation to succeed. + * + * @param taskId the task ID to retry + * @param resetFailureCount whether to reset the failure count to 0 + */ + void retryTask(String taskId, boolean resetFailureCount); + + /** + * Resumes a crashed task from its last checkpoint. + * This attempts to continue execution from where the task + * left off before crashing. The task must be in CRASHED status + * and have checkpoint data available for this operation to succeed. + * + * @param taskId the task ID to resume + */ + void resumeTask(String taskId); + + /** + * Checks for crashed tasks from other nodes and attempts recovery. + * This is part of the cluster's self-healing mechanism. + */ + void recoverCrashedTasks(); + + /** + * Saves changes to an existing task. + * This persists the task state and configuration changes to storage. + * + * @param task the task to save + * @return true if the save was successful, false otherwise + */ + boolean saveTask(ScheduledTask task); + + /** + * Creates a simple recurring task with default settings. + * This is a convenience method for services that just need periodic execution. + * The task will use fixed rate scheduling and allow parallel execution. + * The created task will be automatically scheduled for execution. + * + * @param taskType unique identifier for the task type + * @param period time between executions (must be > 0) + * @param timeUnit unit for the period + * @param runnable the code to execute + * @param persistent whether to store in persistence service (true) or only in memory (false) + * @return the created and scheduled task + * @throws IllegalArgumentException if period <= 0 or timeUnit is null + */ + ScheduledTask createRecurringTask(String taskType, long period, TimeUnit timeUnit, Runnable runnable, boolean persistent); + + /** + * Creates a new task builder for fluent task creation. + * The builder pattern provides a more readable way to configure tasks + * with optional parameters. + * Example usage: + *
+     * schedulerService.newTask("myTask")
+     *     .withPeriod(1, TimeUnit.HOURS)
+     *     .withSimpleExecutor(() -> doSomething())
+     *     .schedule();
+     * 
+ * + * @param taskType unique identifier for the task type + * @return a builder to configure and create the task + */ + TaskBuilder newTask(String taskType); + + /** + * Gets the value of a specific metric. + * @param metric The metric name + * @return The current value of the metric + */ + long getMetric(String metric); + + /** + * Resets all metrics to zero. + */ + void resetMetrics(); + + /** + * Gets all metrics as a map. + * @return Map of metric names to their current values + */ + Map getAllMetrics(); + + List findTasksByStatus(ScheduledTask.TaskStatus taskStatus); + + /** + * Builder interface for fluent task creation. + * This interface provides methods to configure all aspects of a task + * in a readable manner. + */ + interface TaskBuilder { + /** + * Sets task parameters. + * @param parameters task-specific parameters + */ + TaskBuilder withParameters(Map parameters); + + /** + * Sets initial execution delay. + * @param initialDelay delay before first execution + * @param timeUnit time unit for delay + */ + TaskBuilder withInitialDelay(long initialDelay, TimeUnit timeUnit); + + /** + * Sets execution period. + * @param period time between executions + * @param timeUnit time unit for period + */ + TaskBuilder withPeriod(long period, TimeUnit timeUnit); + + /** + * Uses fixed delay scheduling. + * Period is measured from completion of one execution to start of next. + */ + TaskBuilder withFixedDelay(); + + /** + * Uses fixed rate scheduling. + * Period is measured from start of one execution to start of next. + */ + TaskBuilder withFixedRate(); + + /** + * Makes this a one-shot task. + * Task will execute once and then be disabled. + */ + TaskBuilder asOneShot(); + + /** + * Disallows parallel execution. + * Task will use locking to ensure only one instance runs at a time. + */ + TaskBuilder disallowParallelExecution(); + + /** + * Sets the task executor. + * @param executor the executor to handle this task + */ + TaskBuilder withExecutor(TaskExecutor executor); + + /** + * Sets a simple runnable as the executor. + * @param runnable the code to execute + */ + TaskBuilder withSimpleExecutor(Runnable runnable); + + /** + * Makes this a non-persistent task. + * Task will only exist in memory on this node. + */ + TaskBuilder nonPersistent(); + + /** + * Runs the task on all nodes in the cluster rather than just executor nodes. + * This is helpful for distributed cache refreshes or local data maintenance. + */ + TaskBuilder runOnAllNodes(); + + /** + * Marks this task as a system task. + * System tasks are created during system initialization and should be + * preserved across restarts rather than being recreated. + * + * @return this builder for method chaining + */ + TaskBuilder asSystemTask(); + + /** + * Sets the maximum number of retry attempts after failures. + * For one-shot tasks: + * - When a task fails, it will be automatically retried up to this many times + * - Each retry attempt occurs after waiting for retryDelay + * - After reaching this limit, the task remains in FAILED state until manually retried + * + * For periodic tasks: + * - Retries only apply within a single scheduled execution + * - If retries are exhausted, the task will still attempt its next scheduled execution + * - The next scheduled execution resets the failure count + * + * A value of 0 means no automatic retries in either case. + * + * @param maxRetries maximum number of retries (must be >= 0) + * @throws IllegalArgumentException if maxRetries is negative + */ + TaskBuilder withMaxRetries(int maxRetries); + + /** + * Sets the delay between retry attempts. + * For one-shot tasks: + * - This delay is applied between each retry attempt after a failure + * - Helps prevent rapid-fire retries that could overload the system + * + * For periodic tasks: + * - This delay is used between retry attempts within a single scheduled execution + * - Does not affect the task's configured period/scheduling + * + * @param delay delay duration (must be >= 0) + * @param unit time unit for delay + * @throws IllegalArgumentException if delay is negative + */ + TaskBuilder withRetryDelay(long delay, TimeUnit unit); + + /** + * Sets the task dependencies. + * The task will not execute until all dependencies have completed. + * @param taskIds IDs of tasks this task depends on + */ + TaskBuilder withDependencies(String... taskIds); + + /** + * Creates and schedules the task with current configuration. + * @return the created and scheduled task + */ + ScheduledTask schedule(); + } } diff --git a/api/src/main/java/org/apache/unomi/api/tasks/ScheduledTask.java b/api/src/main/java/org/apache/unomi/api/tasks/ScheduledTask.java new file mode 100644 index 0000000000..6f08143615 --- /dev/null +++ b/api/src/main/java/org/apache/unomi/api/tasks/ScheduledTask.java @@ -0,0 +1,873 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.api.tasks; + +import org.apache.unomi.api.Item; + +import java.io.Serializable; +import java.util.Date; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +/** + * Represents a persistent scheduled task that can be executed across a cluster. + * This class provides a comprehensive model for task scheduling and execution with features including: + *
    + *
  • Task lifecycle management through states (SCHEDULED, WAITING, RUNNING, etc.)
  • + *
  • Lock management for cluster coordination
  • + *
  • Execution history and checkpoint data for recovery
  • + *
  • Support for one-shot and periodic execution
  • + *
  • Task dependencies and parallel execution control
  • + *
  • Cluster-wide task distribution
  • + *
+ */ +public class ScheduledTask extends Item implements Serializable { + + public static final String ITEM_TYPE = "scheduledTask"; + + /** + * Enumeration of possible task states in its lifecycle. + * Tasks transition between these states based on execution progress and cluster conditions. + */ + public enum TaskStatus { + /** Task is scheduled but not yet running */ + SCHEDULED, + /** Task is waiting for a lock to be released or dependencies to complete */ + WAITING, + /** Task is currently executing */ + RUNNING, + /** Task has completed successfully */ + COMPLETED, + /** Task failed with an error */ + FAILED, + /** Task was explicitly cancelled */ + CANCELLED, + /** Task crashed due to node failure or other unexpected conditions */ + CRASHED + } + + private String taskType; + private Map parameters; + private String executingNodeId; // The ID of the node currently executing this task + /** + * The initial delay before first execution, in the specified time unit. + */ + private long initialDelay; + private long period; + private TimeUnit timeUnit; + private boolean fixedRate; + /** + * Gets the date of the last execution attempt. + * + * @return the last execution date or null if never executed + */ + private Date lastExecutionDate; + /** + * Gets the node ID that last executed this task. + * + * @return the ID of the last executing node + */ + private String lastExecutedBy; + /** + * Gets the error message from the last failed execution. + * + * @return the last error message or null if no error + */ + private String lastError; + private boolean enabled; + private String lockOwner; + /** + * Gets the date when the current lock was acquired. + * + * @return the lock acquisition date or null if unlocked + */ + private Date lockDate; + private boolean oneShot; + private boolean allowParallelExecution; + /** + * Gets the current task status. + * + * @return the current status + */ + private TaskStatus status; + private Map statusDetails; + /** + * Gets the next scheduled execution date for periodic tasks. + * + * @return the next scheduled execution date or null if not scheduled + */ + private Date nextScheduledExecution; + /** + * Gets the number of consecutive execution failures. + * + * @return the failure count + */ + private int failureCount; + /** + * Gets the number of successful executions. + * + * @return the success count + */ + private int successCount; + /** + * Gets the maximum number of retry attempts after failures. + * For one-shot tasks: + * - When a task fails, it will be automatically retried up to this many times + * - Each retry attempt occurs after waiting for retryDelay + * - After reaching this limit, the task remains in FAILED state until manually retried + * + * For periodic tasks: + * - Retries only apply within a single scheduled execution + * - If retries are exhausted, the task will still attempt its next scheduled execution + * - The next scheduled execution resets the failure count + * + * A value of 0 means no automatic retries in either case. + * + * @return the maximum retry count + */ + private int maxRetries; + /** + * Gets the delay between retry attempts. + * For one-shot tasks: + * - This delay is applied between each retry attempt after a failure + * - Helps prevent rapid-fire retries that could overload the system + * + * For periodic tasks: + * - This delay is used between retry attempts within a single scheduled execution + * - Does not affect the task's configured period/scheduling + * + * @return the retry delay in milliseconds + */ + private long retryDelay; + /** + * Gets the name of the current execution step. + * This is used to track progress through multi-step tasks. + * + * @return the current step name or null if not set + */ + private String currentStep; + /** + * Gets the checkpoint data for task resumption. + * This data allows a task to resume from where it left off after a crash. + * + * @return map of checkpoint data or null if no checkpoint + */ + private Map checkpointData; + private boolean persistent = true; // By default tasks are persistent + private boolean runOnAllNodes = false; // By default tasks run on a single node + /** + * Indicates if this is a system task that should not be recreated on startup. + * System tasks are created by the system during initialization and should be + * preserved across restarts. + */ + private boolean systemTask = false; // By default tasks are not system tasks + /** + * Gets the task type that this task is waiting for a lock on. + * This is used when tasks of the same type cannot run in parallel. + * + * @return the task type being waited on or null if not waiting + */ + private String waitingForTaskType; + private Set dependsOn = new HashSet<>(); // Set of task IDs this task depends on + private Set waitingOnTasks = new HashSet<>(); // Set of task IDs this task is currently waiting on + + public ScheduledTask() { + super(); + this.status = TaskStatus.SCHEDULED; + this.failureCount = 0; + this.maxRetries = 3; + this.retryDelay = 60000; // 1 minute default retry delay + } + + /** + * Gets the task type identifier. + * The task type determines which executor will handle this task. + * + * @return the task type identifier + */ + public String getTaskType() { + return taskType; + } + + /** + * Sets the task type identifier. + * + * @param taskType the task type identifier + */ + public void setTaskType(String taskType) { + this.taskType = taskType; + } + + /** + * Gets the task parameters. + * These parameters are passed to the task executor during execution. + * + * @return map of task parameters + */ + public Map getParameters() { + return parameters; + } + + /** + * Sets the task parameters. + * + * @param parameters map of task parameters + */ + public void setParameters(Map parameters) { + this.parameters = parameters; + } + + /** + * Gets the initial delay before first execution. + * + * @return the initial delay in the specified time unit + */ + public long getInitialDelay() { + return initialDelay; + } + + /** + * Sets the initial delay before first execution. + * + * @param initialDelay the initial delay in the specified time unit + */ + public void setInitialDelay(long initialDelay) { + this.initialDelay = initialDelay; + } + + /** + * Gets the period between successive task executions. + * A period of 0 indicates a one-time task and will automatically set oneShot=true. + * + * @return the period between executions in the specified time unit + */ + public long getPeriod() { + return period; + } + + /** + * Sets the period for task execution. + * A period of 0 indicates a one-time task and will automatically set oneShot=true. + * A positive period indicates a recurring task and is incompatible with oneShot=true. + * + * @param period the period between successive task executions + * @throws IllegalArgumentException if period is negative or if period > 0 and oneShot=true + */ + public void setPeriod(long period) { + if (period < 0) { + throw new IllegalArgumentException("Period cannot be negative"); + } + if (period > 0 && oneShot) { + throw new IllegalArgumentException("One-shot tasks cannot have a period"); + } + this.period = period; + if (period == 0) { + this.oneShot = true; + } + } + + /** + * Gets the time unit for delay and period values. + * + * @return the time unit used for scheduling + */ + public TimeUnit getTimeUnit() { + return timeUnit; + } + + /** + * Sets the time unit for delay and period values. + * + * @param timeUnit the time unit to use for scheduling + */ + public void setTimeUnit(TimeUnit timeUnit) { + this.timeUnit = timeUnit; + } + + /** + * Gets whether this task uses fixed-rate scheduling. + * If true, executions are scheduled at fixed intervals from the start time. + * If false, executions are scheduled at fixed delays from completion. + * + * @return true if using fixed-rate scheduling + */ + public boolean isFixedRate() { + return fixedRate; + } + + /** + * Sets whether this task uses fixed-rate scheduling. + * + * @param fixedRate true to use fixed-rate scheduling, false for fixed-delay + */ + public void setFixedRate(boolean fixedRate) { + this.fixedRate = fixedRate; + } + + /** + * Gets the date of the last execution attempt. + * + * @return the last execution date or null if never executed + */ + public Date getLastExecutionDate() { + return lastExecutionDate; + } + + /** + * Sets the date of the last execution attempt. + * + * @param lastExecutionDate the last execution date + */ + public void setLastExecutionDate(Date lastExecutionDate) { + this.lastExecutionDate = lastExecutionDate; + } + + /** + * Gets the node ID that last executed this task. + * + * @return the ID of the last executing node + */ + public String getLastExecutedBy() { + return lastExecutedBy; + } + + /** + * Sets the node ID that last executed this task. + * + * @param lastExecutedBy the ID of the executing node + */ + public void setLastExecutedBy(String lastExecutedBy) { + this.lastExecutedBy = lastExecutedBy; + } + + /** + * Gets the error message from the last failed execution. + * + * @return the last error message or null if no error + */ + public String getLastError() { + return lastError; + } + + /** + * Sets the error message from a failed execution. + * + * @param lastError the error message + */ + public void setLastError(String lastError) { + this.lastError = lastError; + } + + /** + * Gets whether this task is enabled. + * Disabled tasks will not be executed. + * + * @return true if the task is enabled + */ + public boolean isEnabled() { + return enabled; + } + + /** + * Sets whether this task is enabled. + * + * @param enabled true to enable the task, false to disable + */ + public void setEnabled(boolean enabled) { + this.enabled = enabled; + } + + /** + * Gets the ID of the node that currently holds the execution lock. + * + * @return the current lock owner's node ID or null if unlocked + */ + public String getLockOwner() { + return lockOwner; + } + + /** + * Sets the ID of the node that holds the execution lock. + * + * @param lockOwner the lock owner's node ID + */ + public void setLockOwner(String lockOwner) { + this.lockOwner = lockOwner; + } + + /** + * Gets the date when the current lock was acquired. + * + * @return the lock acquisition date or null if unlocked + */ + public Date getLockDate() { + return lockDate; + } + + /** + * Sets the date when the current lock was acquired. + * + * @param lockDate the lock acquisition date + */ + public void setLockDate(Date lockDate) { + this.lockDate = lockDate; + } + + /** + * Returns whether this task should execute only once. + * Tasks with period=0 are automatically marked as one-shot tasks. + * + * @return true if the task should execute only once + */ + public boolean isOneShot() { + return oneShot; + } + + /** + * Sets whether this task should execute only once. + * Setting oneShot=true is incompatible with a period > 0. + * + * @param oneShot true if the task should execute only once + * @throws IllegalArgumentException if oneShot=true and period > 0 + */ + public void setOneShot(boolean oneShot) { + if (oneShot && period > 0) { + throw new IllegalArgumentException("One-shot tasks cannot have a period"); + } + this.oneShot = oneShot; + } + + /** + * Gets whether parallel execution is allowed for this task. + * If true, multiple instances of this task can run simultaneously. + * If false, the task uses locking to ensure only one instance runs at a time. + * + * @return true if parallel execution is allowed + */ + public boolean isAllowParallelExecution() { + return allowParallelExecution; + } + + /** + * Sets whether parallel execution is allowed for this task. + * + * @param allowParallelExecution true to allow parallel execution + */ + public void setAllowParallelExecution(boolean allowParallelExecution) { + this.allowParallelExecution = allowParallelExecution; + } + + /** + * Gets the current task status. + * + * @return the current status + */ + public TaskStatus getStatus() { + return status; + } + + /** + * Sets the task status. + * Status transitions should be validated before setting. + * + * @param status the new status + */ + public void setStatus(TaskStatus status) { + this.status = status; + } + + /** + * Gets additional details about the task's current status. + * This may include execution progress, history, or other metadata. + * + * @return map of status details + */ + public Map getStatusDetails() { + return statusDetails; + } + + /** + * Sets additional details about the task's current status. + * + * @param statusDetails map of status details + */ + public void setStatusDetails(Map statusDetails) { + this.statusDetails = statusDetails; + } + + /** + * Gets the next scheduled execution date for periodic tasks. + * + * @return the next scheduled execution date or null if not scheduled + */ + public Date getNextScheduledExecution() { + return nextScheduledExecution; + } + + /** + * Sets the next scheduled execution date. + * + * @param nextScheduledExecution the next execution date + */ + public void setNextScheduledExecution(Date nextScheduledExecution) { + this.nextScheduledExecution = nextScheduledExecution; + } + + /** + * Gets the number of consecutive execution failures. + * + * @return the failure count + */ + public int getFailureCount() { + return failureCount; + } + + /** + * Sets the number of consecutive execution failures. + * + * @param failureCount the new failure count + */ + public void setFailureCount(int failureCount) { + this.failureCount = failureCount; + } + + /** + * Gets the number of successful executions. + * + * @return the success count + */ + public int getSuccessCount() { + return successCount; + } + + /** + * Sets the number of successful executions. + * + * @param successCount the new success count + */ + public void setSuccessCount(int successCount) { + this.successCount = successCount; + } + + /** + * Gets the maximum number of retry attempts after failures. + * For one-shot tasks: + * - When a task fails, it will be automatically retried up to this many times + * - Each retry attempt occurs after waiting for retryDelay + * - After reaching this limit, the task remains in FAILED state until manually retried + * + * For periodic tasks: + * - Retries only apply within a single scheduled execution + * - If retries are exhausted, the task will still attempt its next scheduled execution + * - The next scheduled execution resets the failure count + * + * A value of 0 means no automatic retries in either case. + * + * @return the maximum retry count + */ + public int getMaxRetries() { + return maxRetries; + } + + /** + * Sets the maximum number of retry attempts after failures. + * + * @param maxRetries the maximum retry count + */ + public void setMaxRetries(int maxRetries) { + this.maxRetries = maxRetries; + } + + /** + * Gets the delay between retry attempts. + * For one-shot tasks: + * - This delay is applied between each retry attempt after a failure + * - Helps prevent rapid-fire retries that could overload the system + * + * For periodic tasks: + * - This delay is used between retry attempts within a single scheduled execution + * - Does not affect the task's configured period/scheduling + * + * @return the retry delay in milliseconds + */ + public long getRetryDelay() { + return retryDelay; + } + + /** + * Sets the delay between retry attempts. + * + * @param retryDelay the retry delay in milliseconds + */ + public void setRetryDelay(long retryDelay) { + this.retryDelay = retryDelay; + } + + /** + * Gets the name of the current execution step. + * This is used to track progress through multi-step tasks. + * + * @return the current step name or null if not set + */ + public String getCurrentStep() { + return currentStep; + } + + /** + * Sets the name of the current execution step. + * + * @param currentStep the current step name + */ + public void setCurrentStep(String currentStep) { + this.currentStep = currentStep; + } + + /** + * Gets the checkpoint data for task resumption. + * This data allows a task to resume from where it left off after a crash. + * + * @return map of checkpoint data or null if no checkpoint + */ + public Map getCheckpointData() { + return checkpointData; + } + + /** + * Sets the checkpoint data for task resumption. + * + * @param checkpointData map of checkpoint data + */ + public void setCheckpointData(Map checkpointData) { + this.checkpointData = checkpointData; + } + + /** + * Gets whether this task is stored persistently. + * Persistent tasks survive system restarts and are visible across the cluster. + * Non-persistent tasks exist only in memory on a single node. + * + * @return true if the task is persistent + */ + public boolean isPersistent() { + return persistent; + } + + public void setPersistent(boolean persistent) { + this.persistent = persistent; + } + + /** + * Gets whether this task should run on all cluster nodes. + * If false, the task runs only on executor nodes. + * + * @return true if the task should run on all nodes + */ + public boolean isRunOnAllNodes() { + return runOnAllNodes; + } + + /** + * Sets whether this task should run on all cluster nodes. + * + * @param runOnAllNodes true to run on all nodes, false for executor nodes only + */ + public void setRunOnAllNodes(boolean runOnAllNodes) { + this.runOnAllNodes = runOnAllNodes; + } + + /** + * Gets whether this task is a system task. + * System tasks are created by the system during initialization and should be + * preserved across restarts rather than being recreated. + * + * @return true if the task is a system task + */ + public boolean isSystemTask() { + return systemTask; + } + + /** + * Sets whether this task is a system task. + * + * @param systemTask true to mark the task as a system task + */ + public void setSystemTask(boolean systemTask) { + this.systemTask = systemTask; + } + + /** + * Gets the task type that this task is waiting for a lock on. + * This is used when tasks of the same type cannot run in parallel. + * + * @return the task type being waited on or null if not waiting + */ + public String getWaitingForTaskType() { + return waitingForTaskType; + } + + /** + * Sets the task type that this task is waiting for a lock on. + * + * @param waitingForTaskType the task type to wait for + */ + public void setWaitingForTaskType(String waitingForTaskType) { + this.waitingForTaskType = waitingForTaskType; + } + + /** + * Gets the set of task IDs that this task depends on. + * The task will not execute until all dependencies have completed. + * + * @return set of dependency task IDs + */ + public Set getDependsOn() { + return dependsOn; + } + + /** + * Sets the set of task IDs that this task depends on. + * + * @param dependsOn set of dependency task IDs + */ + public void setDependsOn(Set dependsOn) { + this.dependsOn = dependsOn; + } + + /** + * Gets the set of task IDs that this task is currently waiting on. + * This represents the subset of dependencies that have not yet completed. + * + * @return set of task IDs being waited on + */ + public Set getWaitingOnTasks() { + return waitingOnTasks; + } + + /** + * Sets the set of task IDs that this task is currently waiting on. + * + * @param waitingOnTasks set of task IDs to wait on + */ + public void setWaitingOnTasks(Set waitingOnTasks) { + this.waitingOnTasks = waitingOnTasks; + } + + /** + * Adds a task dependency. + * The task will not execute until all dependencies have completed. + * + * @param taskId ID of the task to depend on + */ + public void addDependency(String taskId) { + if (dependsOn == null) { + dependsOn = new HashSet<>(); + } + dependsOn.add(taskId); + } + + /** + * Removes a task dependency. + * + * @param taskId ID of the task to remove from dependencies + */ + public void removeDependency(String taskId) { + if (dependsOn != null) { + dependsOn.remove(taskId); + } + } + + /** + * Adds a task to the set of tasks being waited on. + * + * @param taskId ID of the task to wait on + */ + public void addWaitingOnTask(String taskId) { + if (waitingOnTasks == null) { + waitingOnTasks = new HashSet<>(); + } + waitingOnTasks.add(taskId); + } + + /** + * Removes a task from the set of tasks being waited on. + * + * @param taskId ID of the task to stop waiting on + */ + public void removeWaitingOnTask(String taskId) { + if (waitingOnTasks != null) { + waitingOnTasks.remove(taskId); + } + } + + /** + * Gets the ID of the node currently executing this task. + * This is different from lockOwner as it specifically indicates which node + * is actively executing the task, not just holding the lock. + * + * @return the ID of the executing node or null if not being executed + */ + public String getExecutingNodeId() { + return executingNodeId; + } + + /** + * Sets the ID of the node currently executing this task. + * + * @param executingNodeId the ID of the executing node + */ + public void setExecutingNodeId(String executingNodeId) { + this.executingNodeId = executingNodeId; + } + + @Override + public String toString() { + return "ScheduledTask{" + + "taskType='" + taskType + '\'' + + ", parameters=" + parameters + + ", executingNodeId='" + executingNodeId + '\'' + + ", initialDelay=" + initialDelay + + ", period=" + period + + ", timeUnit=" + timeUnit + + ", fixedRate=" + fixedRate + + ", lastExecutionDate=" + lastExecutionDate + + ", lastExecutedBy='" + lastExecutedBy + '\'' + + ", lastError='" + lastError + '\'' + + ", enabled=" + enabled + + ", lockOwner='" + lockOwner + '\'' + + ", lockDate=" + lockDate + + ", oneShot=" + oneShot + + ", allowParallelExecution=" + allowParallelExecution + + ", status=" + status + + ", statusDetails=" + statusDetails + + ", nextScheduledExecution=" + nextScheduledExecution + + ", failureCount=" + failureCount + + ", successCount=" + successCount + + ", maxRetries=" + maxRetries + + ", retryDelay=" + retryDelay + + ", currentStep='" + currentStep + '\'' + + ", checkpointData=" + checkpointData + + ", persistent=" + persistent + + ", runOnAllNodes=" + runOnAllNodes + + ", systemTask=" + systemTask + + ", waitingForTaskType='" + waitingForTaskType + '\'' + + ", dependsOn=" + dependsOn + + ", waitingOnTasks=" + waitingOnTasks + + '}'; + } +} diff --git a/api/src/main/java/org/apache/unomi/api/tasks/TaskExecutor.java b/api/src/main/java/org/apache/unomi/api/tasks/TaskExecutor.java new file mode 100644 index 0000000000..1c1f775055 --- /dev/null +++ b/api/src/main/java/org/apache/unomi/api/tasks/TaskExecutor.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.api.tasks; + +import java.util.Map; + +/** + * Interface for task executors that can execute scheduled tasks. + * Task executors are responsible for the actual execution of tasks and provide: + *
    + *
  • Task type identification
  • + *
  • Task execution logic
  • + *
  • Optional task resumption capabilities
  • + *
  • Progress and status reporting through callbacks
  • + *
+ * + * Implementations should be thread-safe as they may be called concurrently + * from multiple threads to execute different tasks of the same type. + */ +public interface TaskExecutor { + + /** + * Gets the type of tasks this executor can handle. + * The task type is used to match tasks with their appropriate executor. + * Each executor must have a unique task type. + * + * @return the task type string identifier + */ + String getTaskType(); + + /** + * Executes a scheduled task. + * This method contains the core execution logic for the task. + * The implementation should: + *
    + *
  • Use the task parameters to perform the required work
  • + *
  • Report progress through the status callback
  • + *
  • Handle errors appropriately
  • + *
  • Call callback.complete() on successful completion
  • + *
  • Call callback.fail() if execution fails
  • + *
+ * + * @param task the task to execute + * @param statusCallback callback to update task status during execution + * @throws Exception if task execution fails + */ + void execute(ScheduledTask task, TaskStatusCallback statusCallback) throws Exception; + + /** + * Checks if this executor can resume a crashed task from its checkpoint. + * Implementations should examine the task's checkpoint data to determine + * if resumption is possible. + * + * @param task the crashed task + * @return true if the task can be resumed from its checkpoint + */ + default boolean canResume(ScheduledTask task) { + return false; + } + + /** + * Resumes a crashed task from its checkpoint. + * This method is called instead of execute() when resuming a crashed task. + * The default implementation simply calls execute(), but implementations + * can override this to provide custom resumption logic. + * + * @param task the crashed task + * @param statusCallback callback to update task status + * @throws Exception if task resumption fails + */ + default void resume(ScheduledTask task, TaskStatusCallback statusCallback) throws Exception { + execute(task, statusCallback); + } + + /** + * Callback interface for task status updates. + * This interface allows executors to report progress and status changes + * during task execution. + */ + interface TaskStatusCallback { + /** + * Updates the current step of the task. + * Use this to indicate progress through different phases of execution. + * + * @param step the current step name + * @param details optional step details as key-value pairs + */ + void updateStep(String step, Map details); + + /** + * Saves a checkpoint for the task. + * Checkpoints allow long-running tasks to be resumed after crashes. + * The checkpoint data should contain sufficient information to + * resume execution from this point. + * + * @param checkpointData the checkpoint data as key-value pairs + */ + void checkpoint(Map checkpointData); + + /** + * Updates task status details. + * Use this to provide additional information about the task's + * current state or progress. + * + * @param details the status details as key-value pairs + */ + void updateStatusDetails(Map details); + + /** + * Marks task as completed. + * This should be called when the task has successfully finished + * all its work. + */ + void complete(); + + /** + * Marks task as failed. + * This should be called when the task encounters an error that + * prevents successful completion. + * + * @param error the error message describing the failure + */ + void fail(String error); + } +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java index e71973ce35..f741d8d5ff 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java @@ -25,6 +25,7 @@ import org.apache.unomi.api.conditions.Condition; import org.apache.unomi.api.conditions.ConditionType; import org.apache.unomi.api.services.ClusterService; +import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.lifecycle.BundleWatcher; import org.apache.unomi.persistence.spi.PersistenceService; import org.slf4j.Logger; @@ -48,8 +49,7 @@ public class ClusterServiceImpl implements ClusterService { private String publicAddress; private String internalAddress; - //private SchedulerService schedulerService; /* Wait for PR UNOMI-878 to reactivate that code - private ScheduledExecutorService scheduledExecutorService = Executors.newScheduledThreadPool(3); + private SchedulerService schedulerService; private String nodeId; private long nodeStartTime; private long nodeStatisticsUpdateFrequency = 10000; @@ -58,8 +58,8 @@ public class ClusterServiceImpl implements ClusterService { private volatile List cachedClusterNodes = Collections.emptyList(); private BundleWatcher bundleWatcher; - private ScheduledFuture updateSystemStatsFuture; - private ScheduledFuture cleanupStaleNodesFuture; + private String updateSystemStatsTaskId; + private String cleanupStaleNodesTaskId; /** * Max time to wait for persistence service (in milliseconds) @@ -140,7 +140,6 @@ public void setNodeStatisticsUpdateFrequency(long nodeStatisticsUpdateFrequency) this.nodeStatisticsUpdateFrequency = nodeStatisticsUpdateFrequency; } - /* Wait for PR UNOMI-878 to reactivate that code public void setSchedulerService(SchedulerService schedulerService) { this.schedulerService = schedulerService; @@ -151,21 +150,17 @@ public void setSchedulerService(SchedulerService schedulerService) { initializeScheduledTasks(); } } - */ - /* Wait for PR UNOMI-878 to reactivate that code /** * Unbind method for the scheduler service, called by the OSGi framework when the service is unregistered * @param schedulerService The scheduler service being unregistered */ - /* public void unsetSchedulerService(SchedulerService schedulerService) { if (this.schedulerService == schedulerService) { LOGGER.info("SchedulerService was unset"); this.schedulerService = null; } } - */ public void setNodeId(String nodeId) { this.nodeId = nodeId; @@ -196,16 +191,12 @@ public void init() { // Register this node in the persistence service registerNodeInPersistence(); - /* Wait for PR UNOMI-878 to reactivate that code - /* // Only initialize scheduled tasks if scheduler service is available if (schedulerService != null) { initializeScheduledTasks(); } else { LOGGER.warn("SchedulerService not available during ClusterService initialization. Scheduled tasks will not be registered. They will be registered when SchedulerService becomes available."); } - */ - initializeScheduledTasks(); LOGGER.info("Cluster service initialized with node ID: {}", nodeId); } @@ -233,10 +224,7 @@ public void run() { } } }; - /* Wait for PR UNOMI-878 to reactivate that code - schedulerService.createRecurringTask("clusterNodeStatisticsUpdate", nodeStatisticsUpdateFrequency, TimeUnit.MILLISECONDS, statisticsTask, false); - */ - updateSystemStatsFuture = scheduledExecutorService.scheduleAtFixedRate(statisticsTask, 100, nodeStatisticsUpdateFrequency, TimeUnit.MILLISECONDS); + updateSystemStatsTaskId = schedulerService.createRecurringTask("clusterNodeStatisticsUpdate", nodeStatisticsUpdateFrequency, TimeUnit.MILLISECONDS, statisticsTask, false).getItemId(); // Schedule cleanup of stale nodes TimerTask cleanupTask = new TimerTask() { @@ -249,10 +237,7 @@ public void run() { } } }; - /* Wait for PR UNOMI-878 to reactivate that code - schedulerService.createRecurringTask("clusterStaleNodesCleanup", 60000, TimeUnit.MILLISECONDS, cleanupTask, false); - */ - cleanupStaleNodesFuture = scheduledExecutorService.scheduleAtFixedRate(cleanupTask, 100, 60000, TimeUnit.MILLISECONDS); + cleanupStaleNodesTaskId = schedulerService.createRecurringTask("clusterStaleNodesCleanup", 60000, TimeUnit.MILLISECONDS, cleanupTask, false).getItemId(); LOGGER.info("Cluster service scheduled tasks initialized"); } @@ -262,34 +247,11 @@ public void destroy() { shutdownNow = true; // Cancel scheduled tasks - if (updateSystemStatsFuture != null) { - boolean successfullyCancelled = updateSystemStatsFuture.cancel(false); - if (!successfullyCancelled) { - LOGGER.warn("Failed to cancel scheduled task: clusterNodeStatisticsUpdate"); - } else { - LOGGER.info("Scheduled task: clusterNodeStatisticsUpdate cancelled"); - } + if (updateSystemStatsTaskId != null) { + schedulerService.cancelTask(updateSystemStatsTaskId); } - if (cleanupStaleNodesFuture != null) { - boolean successfullyCancelled = cleanupStaleNodesFuture.cancel(false); - if (!successfullyCancelled) { - LOGGER.warn("Failed to cancel scheduled task: cleanupStaleNodesFuture"); - } else { - LOGGER.info("Scheduled task: cleanupStaleNodesFuture cancelled"); - } - } - if (scheduledExecutorService != null) { - scheduledExecutorService.shutdownNow(); - try { - boolean successfullyTerminated = scheduledExecutorService.awaitTermination(10, TimeUnit.SECONDS); - if (!successfullyTerminated) { - LOGGER.warn("Failed to terminate scheduled tasks after 10 seconds..."); - } else { - LOGGER.info("Scheduled tasks terminated"); - } - } catch (InterruptedException e) { - LOGGER.error("Error waiting for scheduled tasks to terminate", e); - } + if (cleanupStaleNodesTaskId != null) { + schedulerService.cancelTask(cleanupStaleNodesTaskId); } // Remove node from persistence service diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/PersistenceSchedulerProvider.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/PersistenceSchedulerProvider.java new file mode 100644 index 0000000000..3003181eeb --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/PersistenceSchedulerProvider.java @@ -0,0 +1,385 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.ClusterNode; +import org.apache.unomi.api.PartialList; +import org.apache.unomi.api.conditions.Condition; +import org.apache.unomi.api.conditions.ConditionType; +import org.apache.unomi.api.services.ClusterService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.persistence.spi.PersistenceService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +public class PersistenceSchedulerProvider implements SchedulerProvider { + + private static final Logger LOGGER = LoggerFactory.getLogger(PersistenceSchedulerProvider.class.getName()); + + static { + SchedulerProvider.PROPERTY_CONDITION_TYPE.setItemId("propertyCondition"); + SchedulerProvider.PROPERTY_CONDITION_TYPE.setItemType(ConditionType.ITEM_TYPE); + SchedulerProvider.PROPERTY_CONDITION_TYPE.setVersion(1L); + SchedulerProvider.PROPERTY_CONDITION_TYPE.setConditionEvaluator("propertyConditionEvaluator"); + SchedulerProvider.PROPERTY_CONDITION_TYPE.setQueryBuilder("propertyConditionESQueryBuilder"); + }; + + static { + SchedulerProvider.BOOLEAN_CONDITION_TYPE.setItemId("booleanCondition"); + SchedulerProvider.BOOLEAN_CONDITION_TYPE.setItemType(ConditionType.ITEM_TYPE); + SchedulerProvider.BOOLEAN_CONDITION_TYPE.setVersion(1L); + SchedulerProvider.BOOLEAN_CONDITION_TYPE.setQueryBuilder("booleanConditionESQueryBuilder"); + SchedulerProvider.BOOLEAN_CONDITION_TYPE.setConditionEvaluator("booleanConditionEvaluator"); + }; + + private PersistenceService persistenceService; + private boolean executorNode; + private String nodeId; + private long completedTaskTtlDays; + private TaskLockManager lockManager; + private ClusterService clusterService; + + public void setPersistenceService(PersistenceService persistenceService) { + this.persistenceService = persistenceService; + } + + public void setExecutorNode(boolean executorNode) { + this.executorNode = executorNode; + } + + public void setNodeId(String nodeId) { + this.nodeId = nodeId; + } + + public void setCompletedTaskTtlDays(long completedTaskTtlDays) { + this.completedTaskTtlDays = completedTaskTtlDays; + } + + public void setLockManager(TaskLockManager lockManager) { + this.lockManager = lockManager; + } + + public void setClusterService(ClusterService clusterService) { + this.clusterService = clusterService; + } + + public void unsetClusterService(ClusterService clusterService) { + this.clusterService = null; + } + + public void postConstruct() { + + } + + public void preDestroy() { + try { + List tasks = findTasksByLockOwner(nodeId); + for (ScheduledTask task : tasks) { + try { + lockManager.releaseLock(task); + } catch (Exception e) { + LOGGER.debug("Error releasing lock for task {} during shutdown: {}", task.getItemId(), e.getMessage()); + } + } + LOGGER.debug("Task locks released"); + } catch (Exception e) { + LOGGER.warn("Error finding locked tasks during shutdown: {}", e.getMessage()); + } + } + + @Override + public List findTasksByLockOwner(String owner) { + try { + Condition condition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + condition.setParameter("propertyName", "lockOwner"); + condition.setParameter("comparisonOperator", "equals"); + condition.setParameter("propertyValue", owner); + return persistenceService.query(condition, null, ScheduledTask.class, 0, -1).getList(); + } catch (Exception e) { + LOGGER.error("Error finding tasks by lock owner: {}", e.getMessage()); + return new ArrayList<>(); + } + } + + @Override + public List findEnabledScheduledOrWaitingTasks() { + try { + Condition enabledCondition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + enabledCondition.setParameter("propertyName", "enabled"); + enabledCondition.setParameter("comparisonOperator", "equals"); + enabledCondition.setParameter("propertyValue", "true"); + + Condition statusCondition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + statusCondition.setParameter("propertyName", "status"); + statusCondition.setParameter("comparisonOperator", "in"); + statusCondition.setParameter("propertyValues", Arrays.asList( + ScheduledTask.TaskStatus.SCHEDULED, + ScheduledTask.TaskStatus.WAITING + )); + + Condition andCondition = new Condition(SchedulerProvider.BOOLEAN_CONDITION_TYPE); + andCondition.setParameter("operator", "and"); + andCondition.setParameter("subConditions", Arrays.asList(enabledCondition, statusCondition)); + + return persistenceService.query(andCondition, "creationDate:asc", ScheduledTask.class, 0, -1).getList(); + } catch (Exception e) { + LOGGER.error("Error finding enabled scheduled or waiting tasks: {}", e.getMessage()); + return new ArrayList<>(); + } + } + + @Override + public List findTasksByTypeAndStatus(String taskType, ScheduledTask.TaskStatus status) { + try { + Condition typeCondition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + typeCondition.setParameter("propertyName", "taskType"); + typeCondition.setParameter("comparisonOperator", "equals"); + typeCondition.setParameter("propertyValue", taskType); + + Condition statusCondition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + statusCondition.setParameter("propertyName", "status"); + statusCondition.setParameter("comparisonOperator", "equals"); + statusCondition.setParameter("propertyValue", status.toString()); + + Condition andCondition = new Condition(SchedulerProvider.BOOLEAN_CONDITION_TYPE); + andCondition.setParameter("operator", "and"); + andCondition.setParameter("subConditions", Arrays.asList(typeCondition, statusCondition)); + + return persistenceService.query(andCondition, null, ScheduledTask.class, 0, -1).getList(); + } catch (Exception e) { + LOGGER.error("Error finding tasks by type and status: {}", e.getMessage()); + return new ArrayList<>(); + } + } + + @Override + public ScheduledTask getTask(String taskId) { + try { + return persistenceService.load(taskId, ScheduledTask.class); + } catch (Exception e) { + LOGGER.error("Error loading task {}: {}", taskId, e.getMessage()); + return null; + } + } + + @Override + public List getAllTasks() { + try { + return persistenceService.getAllItems(ScheduledTask.class, 0, -1, null).getList(); + } catch (Exception e) { + LOGGER.error("Error getting persistent tasks: {}", e.getMessage()); + return new ArrayList<>(); + } + } + + @Override + public PartialList getTasksByStatus(ScheduledTask.TaskStatus status, int offset, int size, String sortBy) { + try { + Condition condition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + condition.setParameter("propertyName", "status"); + condition.setParameter("comparisonOperator", "equals"); + condition.setParameter("propertyValue", status.toString()); + return persistenceService.query(condition, sortBy, ScheduledTask.class, offset, size); + } catch (Exception e) { + LOGGER.error("Error getting tasks by status: {}", e.getMessage()); + return new PartialList(new ArrayList<>(), 0, 0, 0, PartialList.Relation.EQUAL); + } + } + + @Override + public PartialList getTasksByType(String taskType, int offset, int size, String sortBy) { + try { + Condition condition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + condition.setParameter("propertyName", "taskType"); + condition.setParameter("comparisonOperator", "equals"); + condition.setParameter("propertyValue", taskType); + return persistenceService.query(condition, sortBy, ScheduledTask.class, offset, size); + } catch (Exception e) { + LOGGER.error("Error getting tasks by type: {}", e.getMessage()); + return new PartialList(new ArrayList<>(), 0, 0, 0, PartialList.Relation.EQUAL); + } + } + + @Override + public void purgeOldTasks() { + if (!executorNode) { + LOGGER.debug("Not an executor node, skipping purge"); + return; + } + + try { + LOGGER.debug("Starting purge of old completed tasks with TTL: {} days", completedTaskTtlDays); + long purgeBeforeTime = System.currentTimeMillis() - (completedTaskTtlDays * 24 * 60 * 60 * 1000); + Date purgeBeforeDate = new Date(purgeBeforeTime); + + Condition statusCondition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + statusCondition.setParameter("propertyName", "status"); + statusCondition.setParameter("comparisonOperator", "equals"); + statusCondition.setParameter("propertyValue", ScheduledTask.TaskStatus.COMPLETED.toString()); + + Condition dateCondition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + dateCondition.setParameter("propertyName", "lastExecutionDate"); + dateCondition.setParameter("comparisonOperator", "lessThanOrEqualTo"); + dateCondition.setParameter("propertyValueDate", purgeBeforeDate); + + Condition andCondition = new Condition(SchedulerProvider.BOOLEAN_CONDITION_TYPE); + andCondition.setParameter("operator", "and"); + andCondition.setParameter("subConditions", Arrays.asList(statusCondition, dateCondition)); + + persistenceService.removeByQuery(andCondition, ScheduledTask.class); + LOGGER.debug("Completed purge of old tasks before date: {}", purgeBeforeDate); + } catch (Exception e) { + LOGGER.error("Error purging old tasks", e); + } + } + + @Override + public boolean saveTask(ScheduledTask task) { + if (task == null) { + return false; + } + + if (task.isPersistent()) { + try { + persistenceService.save(task); + LOGGER.debug("Saved task {} to persistence", task.getItemId()); + return true; + } catch (Exception e) { + LOGGER.error("Error saving task {} to persistence", task.getItemId(), e); + return false; + } + } else { + LOGGER.error("Can't handle in-memory task saving !"); + return false; + } + } + + @Override + public List getActiveNodes() { + Set activeNodes = new HashSet<>(); + + // Add this node + activeNodes.add(nodeId); + + // Use ClusterService if available to get cluster nodes + if (clusterService != null) { + try { + List clusterNodes = clusterService.getClusterNodes(); + if (clusterNodes != null && !clusterNodes.isEmpty()) { + // Consider nodes with recent heartbeats as active + long cutoffTime = System.currentTimeMillis() - (5 * 60 * 1000); // 5 minutes threshold + + for (ClusterNode node : clusterNodes) { + if (node.getLastHeartbeat() > cutoffTime) { + activeNodes.add(node.getItemId()); + } + } + + LOGGER.debug("Detected active cluster nodes via ClusterService: {}", activeNodes); + return new ArrayList<>(activeNodes); + } + } catch (Exception e) { + LOGGER.warn("Error retrieving cluster nodes from ClusterService: {}", e.getMessage()); + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Error details:", e); + } + } + } + + // Fallback: Look for other active nodes by checking tasks with recent locks + try { + // Create a condition to find tasks with recent locks + Condition recentLocksCondition = new Condition(); + recentLocksCondition.setConditionType(SchedulerProvider.PROPERTY_CONDITION_TYPE); + Map parameters = new HashMap<>(); + parameters.put("propertyName", "lockDate"); + parameters.put("comparisonOperator", "exists"); + recentLocksCondition.setParameterValues(parameters); + + // Query for tasks with lock information + List recentlyLockedTasks = persistenceService.query(recentLocksCondition, "lockDate", ScheduledTask.class); + + // Get current time for filtering + long fiveMinutesAgo = System.currentTimeMillis() - (5 * 60 * 1000); + + // Extract unique node IDs from lock owners with recent locks + for (ScheduledTask task : recentlyLockedTasks) { + if (task.getLockOwner() != null && task.getLockDate() != null && + task.getLockDate().getTime() > fiveMinutesAgo) { + activeNodes.add(task.getLockOwner()); + } + } + } catch (Exception e) { + // If we can't determine active nodes, just fall back to this node only + LOGGER.warn("Error detecting active cluster nodes: {}", e.getMessage()); + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("Error details:", e); + } + } + + LOGGER.debug("Detected active cluster nodes: {}", activeNodes); + return new ArrayList<>(activeNodes); + } + + @Override + public void refreshTasks() { + try { + persistenceService.refreshIndex(ScheduledTask.class); + } catch (Exception e) { + LOGGER.error("Error refreshing task indices", e); + } + } + + @Override + public List findTasksByStatus(ScheduledTask.TaskStatus status) { + try { + Condition statusCondition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + statusCondition.setParameter("propertyName", "status"); + statusCondition.setParameter("comparisonOperator", "equals"); + statusCondition.setParameter("propertyValue", status); + + return persistenceService.query(statusCondition, null, ScheduledTask.class, 0, -1).getList(); + } catch (Exception e) { + LOGGER.error("Failed to find tasks by status: {}", e.getMessage()); + return Collections.emptyList(); + } + } + + @Override + public List findLockedTasks() { + Condition lockCondition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + lockCondition.setParameter("propertyName", "lockOwner"); + lockCondition.setParameter("comparisonOperator", "exists"); + + Condition statusCondition = new Condition(SchedulerProvider.PROPERTY_CONDITION_TYPE); + statusCondition.setParameter("propertyName", "status"); + statusCondition.setParameter("comparisonOperator", "in"); + statusCondition.setParameter("propertyValues", Arrays.asList( + ScheduledTask.TaskStatus.SCHEDULED, + ScheduledTask.TaskStatus.WAITING + )); + + Condition andCondition = new Condition(SchedulerProvider.BOOLEAN_CONDITION_TYPE); + andCondition.setParameter("operator", "and"); + andCondition.setParameter("subConditions", Arrays.asList(lockCondition, statusCondition)); + + return persistenceService.query(andCondition, null, ScheduledTask.class, 0, -1).getList(); + } + +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerConstants.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerConstants.java new file mode 100644 index 0000000000..6b7e9f54f9 --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerConstants.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.conditions.ConditionType; + +/** + * Constants used across scheduler implementation classes. + */ +public final class SchedulerConstants { + private SchedulerConstants() { + // Prevent instantiation + } + + public static final ConditionType PROPERTY_CONDITION_TYPE = new ConditionType(); + public static final ConditionType BOOLEAN_CONDITION_TYPE = new ConditionType(); + + static { + PROPERTY_CONDITION_TYPE.setItemId("propertyCondition"); + PROPERTY_CONDITION_TYPE.setItemType(ConditionType.ITEM_TYPE); + PROPERTY_CONDITION_TYPE.setConditionEvaluator("propertyConditionEvaluator"); + PROPERTY_CONDITION_TYPE.setQueryBuilder("propertyConditionESQueryBuilder"); + + BOOLEAN_CONDITION_TYPE.setItemId("booleanCondition"); + BOOLEAN_CONDITION_TYPE.setItemType(ConditionType.ITEM_TYPE); + BOOLEAN_CONDITION_TYPE.setConditionEvaluator("booleanConditionEvaluator"); + BOOLEAN_CONDITION_TYPE.setQueryBuilder("booleanConditionESQueryBuilder"); + } + + // Task execution constants + public static final int MAX_HISTORY_SIZE = 10; + public static final long DEFAULT_LOCK_TIMEOUT = 5 * 60 * 1000; // 5 minutes + public static final int MIN_THREAD_POOL_SIZE = 4; + public static final long TASK_CHECK_INTERVAL = 1000; // 1 second +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerProvider.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerProvider.java new file mode 100644 index 0000000000..2995e751f0 --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerProvider.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.PartialList; +import org.apache.unomi.api.conditions.ConditionType; +import org.apache.unomi.api.tasks.ScheduledTask; + +import java.util.List; + +/** + * Interface for scheduler providers that handle task execution with different storage strategies. + * + * Providers implement different approaches to task storage and execution: + * - Memory providers for fast, non-persistent tasks + * - Persistence providers for durable, cluster-aware tasks + * + * Each provider is responsible for: + * - Task lifecycle management within its domain + * - Appropriate locking mechanisms + * - Provider-specific capabilities and limitations + */ +public interface SchedulerProvider { + + ConditionType PROPERTY_CONDITION_TYPE = new ConditionType(); + ConditionType BOOLEAN_CONDITION_TYPE = new ConditionType(); + + List findTasksByLockOwner(String owner); + + List findEnabledScheduledOrWaitingTasks(); + + List findTasksByTypeAndStatus(String taskType, ScheduledTask.TaskStatus status); + + ScheduledTask getTask(String taskId); + + List getAllTasks(); + + PartialList getTasksByStatus(ScheduledTask.TaskStatus status, int offset, int size, String sortBy); + + PartialList getTasksByType(String taskType, int offset, int size, String sortBy); + + void purgeOldTasks(); + + /** + * Saves a task to the persistence service if it's persistent. + * @param task The task to save + * @return true if the task was successfully saved, false otherwise + */ + boolean saveTask(ScheduledTask task); + + /** + * Returns the list of currently active cluster nodes. + * This is used for node affinity in the distributed locking mechanism. + * + * This method is designed to handle the case when ClusterService is not available (null), + * which can happen during startup when services are being initialized in a particular order, + * or in standalone mode. When ClusterService is null, this method will return just the current + * node, effectively making this a single-node operation. + * + * @return List of active node IDs + */ + List getActiveNodes(); + + /** + * Refreshes the task indices to ensure up-to-date view. + * This is used by the distributed locking mechanism to ensure + * all nodes see the latest task state. + */ + void refreshTasks(); + + /** + * Finds tasks by status + */ + List findTasksByStatus(ScheduledTask.TaskStatus status); + + /** + * Finds tasks with locks + */ + List findLockedTasks(); +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerServiceImpl.java index 29e13b21e4..c28eaaf728 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/SchedulerServiceImpl.java @@ -17,49 +17,1346 @@ package org.apache.unomi.services.impl.scheduler; +import org.apache.unomi.api.PartialList; import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.ScheduledTask.TaskStatus; +import org.apache.unomi.api.tasks.TaskExecutor; +import org.osgi.framework.BundleContext; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.PostConstruct; +import javax.annotation.PreDestroy; import java.time.Duration; import java.time.ZonedDateTime; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; /** + * Implementation of the SchedulerService that provides task scheduling and execution capabilities. + * This implementation supports: + * - Persistent and in-memory tasks + * - Single-node and cluster execution + * - Task dependencies and waiting queues + * - Lock management and crash recovery + * - Execution history and metrics tracking + * - Pending operations queue for initialization + * + * Task Lifecycle: + * 1. SCHEDULED: Initial state, task is ready to execute + * 2. WAITING: Task is waiting for dependencies or lock + * 3. RUNNING: Task is currently executing + * 4. COMPLETED/FAILED/CANCELLED/CRASHED: Terminal states + * + * Lock Management: + * - Tasks can be configured to allow/disallow parallel execution + * - Locks are managed differently for persistent and in-memory tasks + * - Lock timeout mechanism prevents deadlocks + * + * Clustering Support: + * - Tasks can be configured to run on specific nodes or all nodes + * - Lock ownership prevents duplicate execution + * - Crash recovery handles node failures + * + * Pending Operations: + * - Operations that require subservices are queued during initialization + * - Operations are executed once all required services are available + * - Supports different operation types with appropriate handling + * * @author dgaillard */ public class SchedulerServiceImpl implements SchedulerService { private static final Logger LOGGER = LoggerFactory.getLogger(SchedulerServiceImpl.class.getName()); + private static final long DEFAULT_LOCK_TIMEOUT = 5 * 60 * 1000; // 5 minutes + private static final long DEFAULT_COMPLETED_TASK_TTL_DAYS = 30; // 30 days default retention for completed tasks + private static final boolean DEFAULT_PURGE_TASK_ENABLED = true; + private static final int MIN_THREAD_POOL_SIZE = 4; + private static final int PENDING_OPERATIONS_QUEUE_SIZE = 1000; + private static final int MAX_RETRY_ATTEMPTS = 10; + private static final long MAX_RETRY_AGE_MS = 5 * 60 * 1000; // 5 minutes + + private String nodeId; + private boolean executorNode; + private int threadPoolSize = MIN_THREAD_POOL_SIZE; + private long lockTimeout = DEFAULT_LOCK_TIMEOUT; + private long completedTaskTtlDays = DEFAULT_COMPLETED_TASK_TTL_DAYS; + private boolean purgeTaskEnabled = DEFAULT_PURGE_TASK_ENABLED; + private ScheduledTask taskPurgeTask; + private volatile boolean shutdownNow = false; + + private final Map nonPersistentTasks = new ConcurrentHashMap<>(); + private final AtomicBoolean running = new AtomicBoolean(false); + private final Map> waitingNonPersistentTasks = new ConcurrentHashMap<>(); + private final AtomicBoolean checkTasksRunning = new AtomicBoolean(false); + + // Manager instances - will be injected by Blueprint + private TaskStateManager stateManager; + private TaskLockManager lockManager; + private TaskExecutionManager executionManager; + private TaskRecoveryManager recoveryManager; + private TaskMetricsManager metricsManager; + private TaskHistoryManager historyManager; + private TaskValidationManager validationManager; + private TaskExecutorRegistry executorRegistry; + + private BundleContext bundleContext; + private SchedulerProvider persistenceProvider; + + private final AtomicBoolean servicesInitialized = new AtomicBoolean(false); + private final CountDownLatch servicesInitializedLatch = new CountDownLatch(1); + + // Pending operations queue + private final Queue pendingOperations = new ConcurrentLinkedQueue<>(); + private final AtomicBoolean processingPendingOperations = new AtomicBoolean(false); + + /** + * Finds all persistent tasks that are currently locked (i.e., have a lock owner and are not expired). + * This is used by the recovery manager to detect tasks that may need to be recovered if their lock has expired. + */ + public List findLockedTasks() { + List lockedTasks = new ArrayList<>(); + + // Check persistent tasks + if (persistenceProvider != null) { + try { + List persistentLockedTasks = persistenceProvider.getAllTasks().stream() + .filter(task -> task.getLockOwner() != null + && task.getStatus() != TaskStatus.COMPLETED + && task.getStatus() != TaskStatus.CANCELLED) + .collect(Collectors.toList()); + lockedTasks.addAll(persistentLockedTasks); + } catch (Exception e) { + LOGGER.error("Error while finding locked persistent tasks", e); + } + } + + // Check non-persistent tasks + List nonPersistentLockedTasks = nonPersistentTasks.values().stream() + .filter(task -> task.getLockOwner() != null + && task.getStatus() != TaskStatus.COMPLETED + && task.getStatus() != TaskStatus.CANCELLED) + .collect(Collectors.toList()); + lockedTasks.addAll(nonPersistentLockedTasks); + + return lockedTasks; + } + + /** + * Enum defining the types of pending operations that can be queued + */ + private enum OperationType { + REGISTER_TASK_EXECUTOR, + UNREGISTER_TASK_EXECUTOR, + SCHEDULE_TASK, + CANCEL_TASK, + RETRY_TASK, + RESUME_TASK, + RECOVER_CRASHED_TASKS, + INITIALIZE_TASK_PURGE + } + + /** + * Represents a pending operation that needs to be executed once services are available + */ + private static class PendingOperation { + private final OperationType type; + private final Object[] parameters; + private final long timestamp; + private final String description; + private int retryCount = 0; + + public PendingOperation(OperationType type, String description, Object... parameters) { + this.type = type; + this.parameters = parameters; + this.timestamp = System.currentTimeMillis(); + this.description = description; + } + + public OperationType getType() { + return type; + } + + public Object[] getParameters() { + return parameters; + } + + public long getTimestamp() { + return timestamp; + } + + public String getDescription() { + return description; + } + + public int getRetryCount() { + return retryCount; + } + + public void incrementRetryCount() { + retryCount++; + } + + public boolean isExpired() { + return System.currentTimeMillis() - timestamp > MAX_RETRY_AGE_MS; + } + + @Override + public String toString() { + return String.format("PendingOperation{type=%s, description='%s', timestamp=%d, retries=%d}", + type, description, timestamp, retryCount); + } + } + + /** + * Enum defining valid task state transitions. + * This ensures tasks move through states in a controlled manner. + * Invalid transitions will throw IllegalStateException. + */ + private enum TaskTransition { + SCHEDULE(TaskStatus.SCHEDULED, EnumSet.of(TaskStatus.WAITING, TaskStatus.RUNNING)), + EXECUTE(TaskStatus.RUNNING, EnumSet.of(TaskStatus.SCHEDULED, TaskStatus.CRASHED, TaskStatus.WAITING)), + COMPLETE(TaskStatus.COMPLETED, EnumSet.of(TaskStatus.RUNNING)), + FAIL(TaskStatus.FAILED, EnumSet.of(TaskStatus.RUNNING)), + CRASH(TaskStatus.CRASHED, EnumSet.of(TaskStatus.RUNNING)), + WAIT(TaskStatus.WAITING, EnumSet.of(TaskStatus.SCHEDULED, TaskStatus.RUNNING)); + + private final TaskStatus endState; + private final Set validStartStates; + + TaskTransition(TaskStatus endState, Set validStartStates) { + this.endState = endState; + this.validStartStates = validStartStates; + } + + /** + * Checks if a state transition is valid + * @param from Current task state + * @param to Target task state + * @return true if transition is valid + */ + public static boolean isValidTransition(TaskStatus from, TaskStatus to) { + return Arrays.stream(values()) + .filter(t -> t.endState == to) + .anyMatch(t -> t.validStartStates.contains(from)); + } + } + + /** + * Checks if all required services are initialized and available + * @return true if services are ready, false otherwise + */ + private boolean areServicesReady() { + return servicesInitialized.get() && + executionManager != null && + !shutdownNow; + } + + /** + * Checks if all required services are initialized and available, including persistence provider if required + * @param requirePersistenceProvider Whether the operation requires persistence provider to be available + * @return true if services are ready, false otherwise + */ + private boolean areServicesReady(boolean requirePersistenceProvider) { + boolean basicServicesReady = areServicesReady(); + if (!basicServicesReady) { + return false; + } + + if (requirePersistenceProvider && persistenceProvider == null) { + return false; + } + + return true; + } + + /** + * Queues an operation to be executed once services are available + * @param type The type of operation + * @param description Human-readable description of the operation + * @param parameters The parameters for the operation + */ + private void queuePendingOperation(OperationType type, String description, Object... parameters) { + queuePendingOperation(type, description, false, parameters); + } + + /** + * Queues an operation to be executed once services are available + * @param type The type of operation + * @param description Human-readable description of the operation + * @param requirePersistenceProvider Whether the operation requires persistence provider to be available + * @param parameters The parameters for the operation + */ + private void queuePendingOperation(OperationType type, String description, boolean requirePersistenceProvider, Object... parameters) { + if (shutdownNow) { + LOGGER.debug("Shutdown in progress, dropping pending operation: {}", description); + return; + } + + PendingOperation operation = new PendingOperation(type, description, parameters); + pendingOperations.offer(operation); + LOGGER.debug("Queued pending operation: {} (requires persistence: {})", operation, requirePersistenceProvider); + + // Try to process pending operations if services are ready + if (areServicesReady(requirePersistenceProvider)) { + processPendingOperations(); + } + } + + /** + * Processes all pending operations that were queued before services were ready + */ + private void processPendingOperations() { + if (!processingPendingOperations.compareAndSet(false, true)) { + return; // Already processing + } + + try { + if (!areServicesReady()) { + return; // Services not ready yet + } + + LOGGER.info("Processing {} pending operations", pendingOperations.size()); + int processedCount = 0; + int errorCount = 0; + int skippedCount = 0; + + while (!pendingOperations.isEmpty() && !shutdownNow) { + PendingOperation operation = pendingOperations.poll(); + if (operation == null) { + break; + } + + // Check if operation has exceeded retry limits or timeout + if (operation.getRetryCount() >= MAX_RETRY_ATTEMPTS) { + errorCount++; + LOGGER.error("Operation {} exceeded maximum retry attempts ({}), dropping operation", + operation.getDescription(), MAX_RETRY_ATTEMPTS); + continue; + } + + if (operation.isExpired()) { + errorCount++; + LOGGER.error("Operation {} exceeded maximum age ({}ms), dropping operation", + operation.getDescription(), MAX_RETRY_AGE_MS); + continue; + } + + // Check if this operation requires persistence provider and if it's available + boolean requiresPersistence = requiresPersistenceProvider(operation); + if (requiresPersistence && persistenceProvider == null) { + // Re-queue the operation if persistence provider is not available + operation.incrementRetryCount(); + pendingOperations.offer(operation); + skippedCount++; + LOGGER.debug("Skipping operation {} - persistence provider not available, will retry later (attempt {})", + operation.getDescription(), operation.getRetryCount()); + + // Check if all remaining operations require persistence + boolean allRemainingRequirePersistence = checkIfAllRemainingOperationsRequirePersistence(); + if (allRemainingRequirePersistence) { + LOGGER.debug("All remaining operations require persistence provider, breaking out of processing loop"); + break; + } else { + LOGGER.debug("Some remaining operations don't require persistence, continuing to process them"); + continue; + } + } + + try { + executePendingOperation(operation); + processedCount++; + LOGGER.debug("Successfully processed pending operation: {}", operation.getDescription()); + } catch (Exception e) { + errorCount++; + LOGGER.error("Error processing pending operation: {}", operation.getDescription(), e); + } + } + + if (processedCount > 0 || errorCount > 0 || skippedCount > 0) { + LOGGER.info("Processed {} pending operations ({} successful, {} errors, {} skipped due to missing persistence)", + processedCount + errorCount + skippedCount, processedCount, errorCount, skippedCount); + } + } finally { + processingPendingOperations.set(false); + } + } + + /** + * Determines if an operation type requires the persistence provider to be available + * @param operation The pending operation + * @return true if the operation requires persistence provider, false otherwise + */ + private boolean requiresPersistenceProvider(PendingOperation operation) { + switch (operation.getType()) { + case SCHEDULE_TASK: + // Check if the task is persistent + if (operation.getParameters().length > 0) { + ScheduledTask task = (ScheduledTask) operation.getParameters()[0]; + return task != null && task.isPersistent(); + } + return false; + case INITIALIZE_TASK_PURGE: + // Task purge creates a persistent system task + return true; + case RECOVER_CRASHED_TASKS: + // Recovery may need to access persistent tasks + return true; + default: + // Other operations don't require persistence provider + return false; + } + } + + /** + * Executes a specific pending operation + * @param operation The operation to execute + */ + private void executePendingOperation(PendingOperation operation) { + switch (operation.getType()) { + case REGISTER_TASK_EXECUTOR: + TaskExecutor executor = (TaskExecutor) operation.getParameters()[0]; + executorRegistry.registerExecutor(executor); + break; + + case UNREGISTER_TASK_EXECUTOR: + TaskExecutor executorToUnregister = (TaskExecutor) operation.getParameters()[0]; + executorRegistry.unregisterExecutor(executorToUnregister); + break; + + case SCHEDULE_TASK: + ScheduledTask task = (ScheduledTask) operation.getParameters()[0]; + scheduleTaskInternal(task); + break; + + case CANCEL_TASK: + String taskId = (String) operation.getParameters()[0]; + cancelTaskInternal(taskId); + break; + + case RETRY_TASK: + String retryTaskId = (String) operation.getParameters()[0]; + boolean resetFailureCount = (Boolean) operation.getParameters()[1]; + retryTaskInternal(retryTaskId, resetFailureCount); + break; + + case RESUME_TASK: + String resumeTaskId = (String) operation.getParameters()[0]; + resumeTaskInternal(resumeTaskId); + break; + + case RECOVER_CRASHED_TASKS: + recoveryManager.recoverCrashedTasks(); + break; + + case INITIALIZE_TASK_PURGE: + initializeTaskPurgeInternal(); + break; + + default: + LOGGER.warn("Unknown pending operation type: {}", operation.getType()); + } + } + + /** + * Updates task state with validation and persistence + * @param task The task to update + * @param newStatus The new status to set + * @param error Optional error message for failed states + * @throws IllegalStateException if the state transition is invalid + */ + private void updateTaskState(ScheduledTask task, TaskStatus newStatus, String error) { + TaskStatus currentStatus = task.getStatus(); + if (!TaskTransition.isValidTransition(currentStatus, newStatus)) { + throw new IllegalStateException( + String.format("Invalid state transition from %s to %s for task %s", + currentStatus, newStatus, task.getItemId())); + } + + task.setStatus(newStatus); + if (error != null) { + task.setLastError(error); + } + + // Clear or update related state fields + if (newStatus == TaskStatus.COMPLETED || newStatus == TaskStatus.FAILED) { + task.setLockOwner(null); + task.setLockDate(null); + task.setWaitingForTaskType(null); + task.setCurrentStep(null); + // Update last execution date for completed/failed tasks + task.setLastExecutionDate(new Date()); + } else if (newStatus == TaskStatus.CRASHED) { + // For crashed tasks, preserve state for recovery + task.setCurrentStep("CRASHED"); + // Keep checkpoint data and lock info for potential resume + Map details = task.getStatusDetails(); + if (details == null) { + details = new HashMap<>(); + task.setStatusDetails(details); + } + details.put("crashTime", new Date()); + details.put("crashedNode", task.getLockOwner()); + } else if (newStatus == TaskStatus.WAITING) { + task.setLockOwner(null); + task.setLockDate(null); + } else if (newStatus == TaskStatus.RUNNING) { + // Update status details for running tasks + Map details = task.getStatusDetails(); + if (details == null) { + details = new HashMap<>(); + task.setStatusDetails(details); + } + details.put("startTime", new Date()); + details.put("executingNode", nodeId); + } + + saveTask(task); + LOGGER.debug("Task {} state changed from {} to {}", task.getItemId(), currentStatus, newStatus); + } + + + private final ScheduledFuture DUMMY_FUTURE = new ScheduledFuture() { + @Override + public long getDelay(TimeUnit unit) { + return 0; + } + + @Override + public int compareTo(Delayed o) { + return 0; + } + + @Override + public boolean cancel(boolean mayInterruptIfRunning) { + return true; + } + + @Override + public boolean isCancelled() { + return false; + } + + @Override + public boolean isDone() { + return true; + } + + @Override + public Object get() { + return null; + } + + @Override + public Object get(long timeout, TimeUnit unit) { + return null; + } + }; + + public SchedulerServiceImpl() { + } + + public void setBundleContext(BundleContext bundleContext) { + this.bundleContext = bundleContext; + } + + // Setter methods for Blueprint dependency injection + public void setStateManager(TaskStateManager stateManager) { + this.stateManager = stateManager; + } + + public void setLockManager(TaskLockManager lockManager) { + this.lockManager = lockManager; + } + + public void setExecutionManager(TaskExecutionManager executionManager) { + this.executionManager = executionManager; + } + + public void setRecoveryManager(TaskRecoveryManager recoveryManager) { + this.recoveryManager = recoveryManager; + } + + public void setMetricsManager(TaskMetricsManager metricsManager) { + this.metricsManager = metricsManager; + } + + public void setHistoryManager(TaskHistoryManager historyManager) { + this.historyManager = historyManager; + } + + public void setValidationManager(TaskValidationManager validationManager) { + this.validationManager = validationManager; + } + + public void setExecutorRegistry(TaskExecutorRegistry executorRegistry) { + this.executorRegistry = executorRegistry; + } + + public void setPersistenceProvider(SchedulerProvider persistenceProvider) { + this.persistenceProvider = persistenceProvider; + LOGGER.info("PersistenceSchedulerProvider bound to SchedulerService"); + + // Clear any expired operations first + clearExpiredOperations(); + + // Process any pending operations that were waiting for the persistence provider + if (servicesInitialized.get() && !pendingOperations.isEmpty()) { + LOGGER.info("Processing {} pending operations that were waiting for persistence provider", pendingOperations.size()); + processPendingOperations(); + } + } + + /** + * Checks if all remaining operations in the queue require the persistence provider + * @return true if all remaining operations require persistence, false otherwise + */ + private boolean checkIfAllRemainingOperationsRequirePersistence() { + if (pendingOperations.isEmpty()) { + return true; // No operations left, so technically all remaining require persistence + } + + // Create a temporary list to hold operations while we check them + List tempOperations = new ArrayList<>(); + boolean allRequirePersistence = true; + int totalOperations = 0; + int operationsRequiringPersistence = 0; + + // Check all operations in the queue + PendingOperation operation; + while ((operation = pendingOperations.poll()) != null) { + tempOperations.add(operation); + totalOperations++; + if (requiresPersistenceProvider(operation)) { + operationsRequiringPersistence++; + } else { + allRequirePersistence = false; + } + } + + // Put all operations back in the queue + for (PendingOperation op : tempOperations) { + pendingOperations.offer(op); + } + + LOGGER.debug("Queue analysis: {} total operations, {} require persistence, all require persistence: {}", + totalOperations, operationsRequiringPersistence, allRequirePersistence); + + return allRequirePersistence; + } + + /** + * Clears expired operations from the pending operations queue + * This prevents accumulation of stale operations that can't be processed + */ + private void clearExpiredOperations() { + if (pendingOperations.isEmpty()) { + return; + } - private final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(); - private ScheduledExecutorService sharedScheduler; - private int threadPoolSize; + int originalSize = pendingOperations.size(); + List validOperations = new ArrayList<>(); + PendingOperation operation; + while ((operation = pendingOperations.poll()) != null) { + if (operation.isExpired()) { + LOGGER.warn("Clearing expired operation: {} (age: {}ms)", + operation.getDescription(), System.currentTimeMillis() - operation.getTimestamp()); + } else { + validOperations.add(operation); + } + } + + // Re-add valid operations + for (PendingOperation validOperation : validOperations) { + pendingOperations.offer(validOperation); + } + + int clearedCount = originalSize - validOperations.size(); + if (clearedCount > 0) { + LOGGER.info("Cleared {} expired operations from pending queue", clearedCount); + } + } + + public void unsetPersistenceProvider(SchedulerProvider persistenceProvider) { + this.persistenceProvider = null; + LOGGER.info("PersistenceSchedulerProvider unbound from SchedulerService"); + } + + /** + * Purges old completed tasks based on the configured TTL. + * This method delegates to the persistence provider. + */ + public void purgeOldTasks() { + if (persistenceProvider != null) { + persistenceProvider.purgeOldTasks(); + } + } + + @PostConstruct public void postConstruct() { - sharedScheduler = Executors.newScheduledThreadPool(threadPoolSize); - LOGGER.info("Scheduler service initialized."); + if (bundleContext == null) { + LOGGER.error("BundleContext is null, cannot initialize service trackers"); + return; + } + + // Validate that all required managers are injected + if (stateManager == null || lockManager == null || executionManager == null || + recoveryManager == null || metricsManager == null || historyManager == null || + validationManager == null || executorRegistry == null) { + LOGGER.error("Required managers not injected by Blueprint"); + return; + } + + // Set the scheduler service reference in managers that need it + lockManager.setSchedulerService(this); + executionManager.setSchedulerService(this); + recoveryManager.setSchedulerService(this); + + if (executorNode) { + running.set(true); + // Start task checking thread using the execution manager + executionManager.startTaskChecker(this::checkTasks); + // Queue task purge initialization instead of calling directly + queuePendingOperation(OperationType.INITIALIZE_TASK_PURGE, "Initialize task purge"); + } + + if (nodeId == null) { + nodeId = UUID.randomUUID().toString(); + } + + LOGGER.info("Scheduler service initialized. Node ID: {}, Executor node: {}, Thread pool size: {}", + nodeId, executorNode, Math.max(MIN_THREAD_POOL_SIZE, threadPoolSize)); + + // Mark services as initialized and process any pending operations + servicesInitialized.set(true); + servicesInitializedLatch.countDown(); + + // Process any pending operations that were queued during initialization + processPendingOperations(); } + @PreDestroy public void preDestroy() { - sharedScheduler.shutdown(); - scheduler.shutdown(); - LOGGER.info("Scheduler service shutdown."); + /** + * Explicit shutdown sequence to handle the Aries Blueprint bug. + * We ensure services are shut down in the correct order: + * 1. Set shutdown flag first to prevent new operations + * 2. Clear pending operations queue + * 3. Release task locks and cancel tasks + * 4. Shutdown execution manager + * 5. Release manager references + * 6. Clear task collections + * 7. Close service trackers in reverse order of dependency + * + * This explicit shutdown sequence prevents the deadlocks and timeout issues + * that occur with Blueprint's default shutdown behavior. + */ + shutdownNow = true; // Set shutdown flag before other operations + running.set(false); + + LOGGER.info("SchedulerService preDestroy: beginning shutdown process"); + + // Clear pending operations queue + int pendingCount = pendingOperations.size(); + if (pendingCount > 0) { + pendingOperations.clear(); + LOGGER.info("Cleared {} pending operations during shutdown", pendingCount); + } + + // Notify all managers about shutdown + if (recoveryManager != null) { + try { + recoveryManager.prepareForShutdown(); + LOGGER.debug("Recovery manager prepared for shutdown"); + } catch (Exception e) { + LOGGER.debug("Error preparing recovery manager for shutdown: {}", e.getMessage()); + } + } + + if (taskPurgeTask != null) { + try { + cancelTask(taskPurgeTask.getItemId()); + LOGGER.debug("Task purge cancelled"); + } catch (Exception e) { + LOGGER.debug("Error cancelling purge task during shutdown: {}", e.getMessage()); + } + } + + // Shutdown execution manager + try { + if (executionManager != null) { + executionManager.shutdown(); + LOGGER.debug("Execution manager shutdown completed"); + } + } catch (Exception e) { + LOGGER.debug("Error shutting down execution manager: {}", e.getMessage()); + } + + // Release all manager references + this.recoveryManager = null; + this.executionManager = null; + this.lockManager = null; + this.stateManager = null; + this.historyManager = null; + this.validationManager = null; + + // Clear task collections + try { + this.metricsManager.resetMetrics(); + this.executorRegistry.clear(); + this.nonPersistentTasks.clear(); + this.waitingNonPersistentTasks.clear(); + LOGGER.debug("Task collections cleared"); + } catch (Exception e) { + LOGGER.debug("Error clearing task collections: {}", e.getMessage()); + } + + LOGGER.info("SchedulerService shutdown completed"); } - public void setThreadPoolSize(int threadPoolSize) { - this.threadPoolSize = threadPoolSize; + void checkTasks() { + if (shutdownNow || !running.get() || checkTasksRunning.get() || !executorNode) { + return; + } + + if (!checkTasksRunning.compareAndSet(false, true)) { + return; + } + + try { + // Skip task processing during shutdown + if (shutdownNow) { + return; + } + + // Clear expired operations periodically to prevent accumulation + clearExpiredOperations(); + + // Check for crashed tasks first + recoveryManager.recoverCrashedTasks(); + + List tasks = new ArrayList<>(); + // Get all enabled tasks that are either scheduled or waiting + if (persistenceProvider != null) { + List persistentTasks = persistenceProvider.findEnabledScheduledOrWaitingTasks(); + if (persistentTasks == null) { + LOGGER.debug("No tasks found or persistence service unavailable"); + } else { + tasks.addAll(persistentTasks); + } + } + + // Also check in-memory tasks + List inMemoryTasks = nonPersistentTasks.values().stream() + .filter(task -> task.isEnabled() && + (task.getStatus() == TaskStatus.SCHEDULED || + task.getStatus() == TaskStatus.WAITING)) + .collect(Collectors.toList()); + + // Add in-memory tasks to the list of tasks to check + if (!inMemoryTasks.isEmpty() && tasks != null) { + LOGGER.debug("Node {} found {} in-memory tasks to check", nodeId, inMemoryTasks.size()); + tasks.addAll(inMemoryTasks); + } + + if (tasks.isEmpty()) { + return; + } + + LOGGER.debug("Node {} found {} total tasks to check", nodeId, tasks.size()); + + // Sort and group tasks + sortTasksByPriority(tasks); + Map> tasksByType = groupTasksByType(tasks); + + // Process each task type + for (Map.Entry> entry : tasksByType.entrySet()) { + if (shutdownNow) return; + processTaskGroup(entry.getKey(), entry.getValue()); + } + } catch (Exception e) { + LOGGER.error("Error checking tasks", e); + } finally { + checkTasksRunning.set(false); + } + } + + private void sortTasksByPriority(List tasks) { + tasks.sort((t1, t2) -> { + // First by status (WAITING before SCHEDULED) + int statusCompare = Boolean.compare( + t1.getStatus() == TaskStatus.WAITING, + t2.getStatus() == TaskStatus.WAITING + ); + if (statusCompare != 0) return -statusCompare; + + // Then by creation date + int dateCompare = t1.getCreationDate().compareTo(t2.getCreationDate()); + if (dateCompare != 0) return dateCompare; + + // Finally by next execution date + Date next1 = t1.getNextScheduledExecution(); + Date next2 = t2.getNextScheduledExecution(); + if (next1 == null) return next2 == null ? 0 : -1; + if (next2 == null) return 1; + return next1.compareTo(next2); + }); + } + + private Map> groupTasksByType(List tasks) { + Map> tasksByType = new HashMap<>(); + for (ScheduledTask task : tasks) { + tasksByType.computeIfAbsent(task.getTaskType(), k -> new ArrayList<>()).add(task); + } + return tasksByType; + } + + private void processTaskGroup(String taskType, List tasks) { + TaskExecutor executor = executorRegistry.getExecutor(taskType); + if (executor == null) { + return; + } + + // Check if any task of this type is running with a valid lock + boolean hasRunningTask = hasRunningTaskOfType(taskType); + if (!hasRunningTask) { + // Get the first task that should execute + for (ScheduledTask task : tasks) { + if (shouldExecuteTask(task)) { + // All tasks here are persistent since they come from persistence service query + executionManager.executeTask(task, executor); + break; + } + } + } + } + + /** + * Schedules a task for execution based on its configuration + */ + private void scheduleTaskExecution(ScheduledTask task, TaskExecutor executor) { + if (!task.isEnabled()) { + LOGGER.debug("Task {} is disabled, skipping scheduling", task.getItemId()); + return; + } + + // Don't schedule tasks that are already running + if (task.getStatus() == TaskStatus.RUNNING) { + LOGGER.debug("Task {} is already running, skipping scheduling", task.getItemId()); + return; + } + + // Create task wrapper that will execute the task + Runnable taskWrapper = () -> executionManager.executeTask(task, executor); + + if (!task.isPersistent()) { + // For in-memory tasks, schedule directly with the execution manager + executionManager.scheduleTask(task, taskWrapper); + } else { + // For persistent tasks, calculate next execution time and update state + stateManager.calculateNextExecutionTime(task); + if (task.getStatus() != TaskStatus.SCHEDULED) { + stateManager.updateTaskState(task, TaskStatus.SCHEDULED, null, nodeId); + } + updateTaskInPersistence(task); + + // If task is ready to execute now, execute it + if (isTaskDueForExecution(task)) { + executionManager.executeTask(task, executor); + } + } + } + + private boolean hasRunningTaskOfType(String taskType) { + // Check non-persistent tasks first (faster - local map lookup) + boolean hasNonPersistentRunningTask = nonPersistentTasks.values().stream() + .anyMatch(task -> taskType.equals(task.getTaskType()) && + task.getStatus() == TaskStatus.RUNNING && + !lockManager.isLockExpired(task)); + + if (hasNonPersistentRunningTask) { + return true; + } + + // Check persistent tasks (slower - database query) + if (persistenceProvider != null) { + List runningTasks = persistenceProvider.findTasksByTypeAndStatus(taskType, TaskStatus.RUNNING); + return runningTasks.stream().anyMatch(task -> !lockManager.isLockExpired(task)); + } + + return false; + } + + private boolean shouldExecuteTask(ScheduledTask task) { + try { + validationManager.validateExecutionPrerequisites(task, nodeId); + } catch (IllegalStateException e) { + LOGGER.debug("Task {} not ready for execution: {}", task.getItemId(), e.getMessage()); + return false; + } + + // Check if task should run on this node + if (!task.isRunOnAllNodes() && !executorNode) { + return false; + } + + // Check task dependencies + if (task.getDependsOn() != null && !task.getDependsOn().isEmpty()) { + Map dependencies = new HashMap<>(); + for (String dependencyId : task.getDependsOn()) { + ScheduledTask dependency = getTask(dependencyId); + if (dependency != null) { + dependencies.put(dependencyId, dependency); + } + } + if (!stateManager.canRescheduleTask(task, dependencies)) { + return false; + } + } + + // For waiting tasks, they are already ordered by creation date + if (task.getStatus() == TaskStatus.WAITING) { + return true; + } + + // For scheduled tasks, check execution timing + if (task.getStatus() == TaskStatus.SCHEDULED) { + return isTaskDueForExecution(task); + } + + return false; + } + + private boolean isTaskDueForExecution(ScheduledTask task) { + // For one-shot tasks or initial execution + if (task.getLastExecutionDate() == null) { + if (task.getInitialDelay() > 0) { + // Check if initial delay has passed + long startTime = task.getCreationDate().getTime() + + task.getTimeUnit().toMillis(task.getInitialDelay()); + return System.currentTimeMillis() >= startTime; + } + return true; // Execute immediately if no initial delay + } + + // For periodic tasks, check next scheduled execution + if (!task.isOneShot() && task.getPeriod() > 0) { + Date nextExecution = task.getNextScheduledExecution(); + return nextExecution != null && + System.currentTimeMillis() >= nextExecution.getTime(); + } + + return false; } @Override - public ScheduledExecutorService getScheduleExecutorService() { - return scheduler; + public void scheduleTask(ScheduledTask task) { + if (areServicesReady(task.isPersistent())) { + scheduleTaskInternal(task); + } else { + queuePendingOperation(OperationType.SCHEDULE_TASK, + "Schedule task: " + task.getItemId(), task.isPersistent(), new Object[]{task}); + } } + /** + * Internal method to schedule a task - called when services are ready + * @param task The task to schedule + */ + private void scheduleTaskInternal(ScheduledTask task) { + if (!task.isEnabled()) { + return; + } + + Map existingTasks = new HashMap<>(); + if (task.getDependsOn() != null) { + for (String dependencyId : task.getDependsOn()) { + ScheduledTask dependency = getTask(dependencyId); + if (dependency != null) { + existingTasks.put(dependencyId, dependency); + } + } + } + + validationManager.validateTask(task, existingTasks); + + // Store task + if (!saveTask(task)) { + LOGGER.error("Failed to save task: {}", task.getItemId()); + return; + } + + // Get executor and schedule task + TaskExecutor executor = executorRegistry.getExecutor(task.getTaskType()); + if (executor != null && (task.isRunOnAllNodes() || executorNode)) { + scheduleTaskExecution(task, executor); + } + } + + @Override + public void cancelTask(String taskId) { + if (areServicesReady()) { + cancelTaskInternal(taskId); + } else { + queuePendingOperation(OperationType.CANCEL_TASK, + "Cancel task: " + taskId, taskId); + } + } + + /** + * Internal method to cancel a task - called when services are ready + * @param taskId The task ID to cancel + */ + private void cancelTaskInternal(String taskId) { + if (shutdownNow) { + return; + } + ScheduledTask task = getTask(taskId); + if (task != null) { + // Only cancel if in a cancellable state + if (task.getStatus() == TaskStatus.SCHEDULED || + task.getStatus() == TaskStatus.WAITING || + task.getStatus() == TaskStatus.RUNNING) { + + task.setEnabled(false); + stateManager.updateTaskState(task, TaskStatus.CANCELLED, null, nodeId); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_CANCELLED); + historyManager.recordCancellation(task); + + executionManager.cancelTask(taskId); + lockManager.releaseLock(task); + + if (!saveTask(task)) { + LOGGER.error("Failed to save cancelled task state: {}", taskId); + } + } + } + } + + @Override + public ScheduledTask createTask(String taskType, Map parameters, + long initialDelay, long period, TimeUnit timeUnit, + boolean fixedRate, boolean oneShot, boolean allowParallelExecution, + boolean persistent) { + ScheduledTask task = new ScheduledTask(); + task.setItemId(UUID.randomUUID().toString()); + task.setTaskType(taskType); + task.setParameters(parameters != null ? parameters : Collections.emptyMap()); + task.setInitialDelay(initialDelay); + task.setPeriod(period); + task.setTimeUnit(timeUnit); + task.setFixedRate(fixedRate); + task.setOneShot(oneShot); + task.setAllowParallelExecution(allowParallelExecution); + task.setEnabled(true); + task.setStatus(TaskStatus.SCHEDULED); + task.setPersistent(persistent); + task.setCreationDate(new Date()); + + Map details = new HashMap<>(); + details.put("executionHistory", new ArrayList<>()); + task.setStatusDetails(details); + + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_CREATED); + return task; + } @Override - public ScheduledExecutorService getSharedScheduleExecutorService() { - return sharedScheduler; + public List getAllTasks() { + List allTasks = new ArrayList<>(getPersistentTasks()); + allTasks.addAll(getMemoryTasks()); + return allTasks; + } + + @Override + public ScheduledTask getTask(String taskId) { + if (shutdownNow) { + return null; + } + + // First check in-memory tasks which is faster + ScheduledTask memoryTask = nonPersistentTasks.get(taskId); + if (memoryTask != null) { + return memoryTask; + } + + // Then check persistent tasks + if (persistenceProvider == null) { + return null; + } + + try { + return persistenceProvider.getTask(taskId); + } catch (Exception e) { + LOGGER.error("Error loading task {}: {}", taskId, e.getMessage()); + return null; + } + } + + @Override + public List getPersistentTasks() { + if (persistenceProvider == null || shutdownNow) { + return new ArrayList<>(); + } + + try { + return persistenceProvider.getAllTasks(); + } catch (Exception e) { + LOGGER.error("Error getting persistent tasks: {}", e.getMessage()); + return new ArrayList<>(); + } + } + + @Override + public void registerTaskExecutor(TaskExecutor executor) { + executorRegistry.registerExecutor(executor); + } + + @Override + public void unregisterTaskExecutor(TaskExecutor executor) { + executorRegistry.unregisterExecutor(executor); + } + + @Override + public List getMemoryTasks() { + return new ArrayList<>(nonPersistentTasks.values()); + } + + @Override + public boolean isExecutorNode() { + return executorNode; + } + + public void setNodeId(String nodeId) { + this.nodeId = nodeId; + } + + @Override + public String getNodeId() { + return nodeId; + } + + @Override + public PartialList getTasksByStatus(TaskStatus status, int offset, int size, String sortBy) { + if (shutdownNow) { + return new PartialList<>(new ArrayList<>(), offset, size, 0, PartialList.Relation.EQUAL); + } + + List allTasks = new ArrayList<>(); + + // Get persistent tasks by status + if (persistenceProvider != null) { + try { + PartialList persistentTasks = persistenceProvider.getTasksByStatus(status, 0, -1, sortBy); + if (persistentTasks != null && persistentTasks.getList() != null) { + allTasks.addAll(persistentTasks.getList()); + } + } catch (Exception e) { + LOGGER.error("Error getting persistent tasks by status: {}", e.getMessage()); + } + } + + // Get in-memory tasks by status + List memoryTasks = nonPersistentTasks.values().stream() + .filter(task -> task.getStatus() == status) + .collect(Collectors.toList()); + allTasks.addAll(memoryTasks); + + // Sort the combined list if sortBy is specified + if (sortBy != null && !sortBy.trim().isEmpty()) { + sortTasksByField(allTasks, sortBy); + } + + // Apply pagination + int totalSize = allTasks.size(); + int fromIndex = Math.min(offset, totalSize); + int toIndex; + + if (size == -1) { + // Return all tasks when size is -1 + toIndex = totalSize; + } else { + toIndex = Math.min(offset + size, totalSize); + } + + List pagedTasks = fromIndex < toIndex ? + allTasks.subList(fromIndex, toIndex) : new ArrayList<>(); + + return new PartialList<>(pagedTasks, offset, size, totalSize, + totalSize <= offset + (size == -1 ? totalSize : size) ? PartialList.Relation.EQUAL : PartialList.Relation.GREATER_THAN_OR_EQUAL_TO); + } + + @Override + public PartialList getTasksByType(String taskType, int offset, int size, String sortBy) { + if (shutdownNow) { + return new PartialList<>(new ArrayList<>(), offset, size, 0, PartialList.Relation.EQUAL); + } + + List allTasks = new ArrayList<>(); + + // Get persistent tasks by type + if (persistenceProvider != null) { + try { + PartialList persistentTasks = persistenceProvider.getTasksByType(taskType, 0, -1, sortBy); + if (persistentTasks != null && persistentTasks.getList() != null) { + allTasks.addAll(persistentTasks.getList()); + } + } catch (Exception e) { + LOGGER.error("Error getting persistent tasks by type: {}", e.getMessage()); + } + } + + // Get in-memory tasks by type + List memoryTasks = nonPersistentTasks.values().stream() + .filter(task -> taskType.equals(task.getTaskType())) + .collect(Collectors.toList()); + allTasks.addAll(memoryTasks); + + // Sort the combined list if sortBy is specified + if (sortBy != null && !sortBy.trim().isEmpty()) { + sortTasksByField(allTasks, sortBy); + } + + // Apply pagination + int totalSize = allTasks.size(); + int fromIndex = Math.min(offset, totalSize); + int toIndex; + + if (size == -1) { + // Return all tasks when size is -1 + toIndex = totalSize; + } else { + toIndex = Math.min(offset + size, totalSize); + } + + List pagedTasks = fromIndex < toIndex ? + allTasks.subList(fromIndex, toIndex) : new ArrayList<>(); + + return new PartialList<>(pagedTasks, offset, size, totalSize, + totalSize <= offset + (size == -1 ? totalSize : size) ? PartialList.Relation.EQUAL : PartialList.Relation.GREATER_THAN_OR_EQUAL_TO); + } + + public void setThreadPoolSize(int threadPoolSize) { + this.threadPoolSize = threadPoolSize; + } + + public void setExecutorNode(boolean executorNode) { + this.executorNode = executorNode; + } + + public void setLockTimeout(long lockTimeout) { + this.lockTimeout = lockTimeout; + } + + public void setCompletedTaskTtlDays(long completedTaskTtlDays) { + this.completedTaskTtlDays = completedTaskTtlDays; + } + + public void setPurgeTaskEnabled(boolean purgeTaskEnabled) { + this.purgeTaskEnabled = purgeTaskEnabled; } public static long getTimeDiffInSeconds(int hourInUtc, ZonedDateTime now) { @@ -67,7 +1364,663 @@ public static long getTimeDiffInSeconds(int hourInUtc, ZonedDateTime now) { if(now.compareTo(nextRun) > 0) { nextRun = nextRun.plusDays(1); } - return Duration.between(now, nextRun).getSeconds(); } + + @Override + public void recoverCrashedTasks() { + if (areServicesReady()) { + if (executorNode) { + recoveryManager.recoverCrashedTasks(); + } + } else { + queuePendingOperation(OperationType.RECOVER_CRASHED_TASKS, "Recover crashed tasks"); + } + } + + @Override + public void retryTask(String taskId, boolean resetFailureCount) { + if (areServicesReady()) { + retryTaskInternal(taskId, resetFailureCount); + } else { + queuePendingOperation(OperationType.RETRY_TASK, + "Retry task: " + taskId + " (reset: " + resetFailureCount + ")", taskId, resetFailureCount); + } + } + + /** + * Internal method to retry a task - called when services are ready + * @param taskId The task ID to retry + * @param resetFailureCount Whether to reset the failure count + */ + private void retryTaskInternal(String taskId, boolean resetFailureCount) { + ScheduledTask task = getTask(taskId); + if (task != null && task.getStatus() == TaskStatus.FAILED) { + if (resetFailureCount) { + task.setFailureCount(0); + } + task.setLastExecutionDate(null); // we have to do this to force the task to execute again + stateManager.updateTaskState(task, TaskStatus.SCHEDULED, null, nodeId); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_RETRIED); + scheduleTaskInternal(task); + } + } + + @Override + public void resumeTask(String taskId) { + if (areServicesReady()) { + resumeTaskInternal(taskId); + } else { + queuePendingOperation(OperationType.RESUME_TASK, + "Resume task: " + taskId, taskId); + } + } + + /** + * Internal method to resume a task - called when services are ready + * @param taskId The task ID to resume + */ + private void resumeTaskInternal(String taskId) { + ScheduledTask task = getTask(taskId); + if (task != null && task.getStatus() == TaskStatus.CRASHED) { + TaskExecutor executor = executorRegistry.getExecutor(task.getTaskType()); + if (executor != null && executor.canResume(task)) { + stateManager.updateTaskState(task, TaskStatus.SCHEDULED, null, nodeId); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_RESUMED); + scheduleTaskInternal(task); + } + } + } + + private void initializeTaskPurge() { + if (areServicesReady()) { + initializeTaskPurgeInternal(); + } else { + queuePendingOperation(OperationType.INITIALIZE_TASK_PURGE, "Initialize task purge"); + } + } + + /** + * Internal method to initialize task purge - called when services are ready + */ + private void initializeTaskPurgeInternal() { + if (!purgeTaskEnabled) { + LOGGER.info("Task purge is disabled, skipping initialization"); + return; + } + + // Check if persistence provider is available (required for task purge) + if (persistenceProvider == null) { + LOGGER.warn("Persistence provider not available, cannot initialize task purge. Will retry when persistence becomes available."); + return; + } + + LOGGER.info("Initializing task purge with TTL: {} days", completedTaskTtlDays); + + // Register the task executor for task purge + TaskExecutor taskPurgeExecutor = new TaskExecutor() { + @Override + public String getTaskType() { + return "task-purge"; + } + + @Override + public void execute(ScheduledTask task, TaskStatusCallback callback) { + LOGGER.info("Purge task executor called - starting purge of old tasks"); + try { + if (persistenceProvider != null) { + LOGGER.info("Calling persistenceProvider.purgeOldTasks() with TTL: {} days", completedTaskTtlDays); + persistenceProvider.purgeOldTasks(); + LOGGER.info("Purge task completed successfully"); + } else { + LOGGER.warn("Persistence provider is null, cannot purge tasks"); + } + callback.complete(); + } catch (Throwable t) { + LOGGER.error("Error while purging old tasks", t); + callback.fail(t.getMessage()); + } + } + }; + + registerTaskExecutor(taskPurgeExecutor); + LOGGER.info("Registered purge task executor"); + + // Check if a task purge task already exists + List existingTasks = getTasksByType("task-purge", 0, 1, null).getList(); + ScheduledTask taskPurgeTask = null; + + if (!existingTasks.isEmpty() && existingTasks.get(0).isSystemTask()) { + // Reuse the existing task if it's a system task + taskPurgeTask = existingTasks.get(0); + // Update task configuration if needed + taskPurgeTask.setPeriod(1); + taskPurgeTask.setTimeUnit(TimeUnit.DAYS); + taskPurgeTask.setFixedRate(true); + taskPurgeTask.setEnabled(true); + saveTask(taskPurgeTask); + LOGGER.info("Reusing existing system task purge task: {}", taskPurgeTask.getItemId()); + } else { + // Create a new task if none exists or existing one isn't a system task + taskPurgeTask = newTask("task-purge") + .withPeriod(1, TimeUnit.DAYS) + .withFixedRate() + .asSystemTask() + .schedule(); + LOGGER.info("Created new system task purge task: {}", taskPurgeTask.getItemId()); + } + } + + /** + * Builder class to simplify task creation with fluent API + */ + public TaskBuilder newTask(String taskType) { + return new TaskBuilder(this, taskType); + } + + private boolean updateTaskInPersistence(ScheduledTask task) { + return saveTask(task); + } + + /** + * Saves a task to the persistence service if it's persistent. + * @param task The task to save + * @return true if the task was successfully saved, false otherwise + */ + @Override + public boolean saveTask(ScheduledTask task) { + if (task == null || shutdownNow) { + return false; + } + + if (task.isPersistent()) { + if (persistenceProvider == null) { + LOGGER.warn("Cannot save task {} of type {}- persistence service unavailable", task.getItemId(), task.getTaskType()); + return false; + } + + try { + persistenceProvider.saveTask(task); + LOGGER.debug("Saved task {} to persistence", task.getItemId()); + return true; + } catch (Exception e) { + LOGGER.error("Error saving task {} to persistence", task.getItemId(), e); + return false; + } + } else { + LOGGER.debug("Saving task {} of type {} in memory", task.getItemId(), task.getTaskType()); + nonPersistentTasks.put(task.getItemId(), task); + return true; + } + } + + @Override + public ScheduledTask createRecurringTask(String taskType, long period, TimeUnit timeUnit, Runnable runnable, boolean persistent) { + return newTask(taskType) + .withPeriod(period, timeUnit) + .withFixedRate() + .withSimpleExecutor(runnable) + .nonPersistent() + .schedule(); + } + + @Override + public long getMetric(String metric) { + return metricsManager.getMetric(metric); + } + + @Override + public void resetMetrics() { + metricsManager.resetMetrics(); + } + + @Override + public Map getAllMetrics() { + Map metrics = metricsManager.getAllMetrics(); + // Add pending operations count to metrics + metrics.put("pendingOperations", (long) pendingOperations.size()); + return metrics; + } + + @Override + public List findTasksByStatus(TaskStatus taskStatus) { + if (shutdownNow) { + return new ArrayList<>(); + } + + List allTasks = new ArrayList<>(); + + // Get persistent tasks by status + if (persistenceProvider != null) { + try { + List persistentTasks = persistenceProvider.findTasksByStatus(taskStatus); + if (persistentTasks != null) { + allTasks.addAll(persistentTasks); + } + } catch (Exception e) { + LOGGER.error("Error finding persistent tasks by status: {}", e.getMessage()); + } + } + + // Get in-memory tasks by status + List memoryTasks = nonPersistentTasks.values().stream() + .filter(task -> task.getStatus() == taskStatus) + .collect(Collectors.toList()); + allTasks.addAll(memoryTasks); + + return allTasks; + } + + /** + * Sorts tasks by the specified field. + * Supports common task fields like creationDate, lastExecutionDate, nextScheduledExecution, etc. + * + * @param tasks The list of tasks to sort + * @param sortBy The field to sort by (with optional :asc or :desc suffix) + */ + private void sortTasksByField(List tasks, String sortBy) { + if (tasks == null || tasks.isEmpty() || sortBy == null || sortBy.trim().isEmpty()) { + return; + } + + String field = sortBy.trim(); + boolean ascending = true; + + // Check for sort direction suffix + if (field.endsWith(":desc")) { + field = field.substring(0, field.length() - 5); + ascending = false; + } else if (field.endsWith(":asc")) { + field = field.substring(0, field.length() - 4); + ascending = true; + } + + final String finalField = field; + final boolean finalAscending = ascending; + + tasks.sort((t1, t2) -> { + int comparison = 0; + + switch (finalField.toLowerCase()) { + case "creationdate": + comparison = compareDates(t1.getCreationDate(), t2.getCreationDate()); + break; + case "lastexecutiondate": + comparison = compareDates(t1.getLastExecutionDate(), t2.getLastExecutionDate()); + break; + case "nextscheduledexecution": + comparison = compareDates(t1.getNextScheduledExecution(), t2.getNextScheduledExecution()); + break; + case "tasktype": + comparison = compareStrings(t1.getTaskType(), t2.getTaskType()); + break; + case "status": + comparison = t1.getStatus().compareTo(t2.getStatus()); + break; + case "itemid": + comparison = compareStrings(t1.getItemId(), t2.getItemId()); + break; + case "failurecount": + comparison = Integer.compare(t1.getFailureCount(), t2.getFailureCount()); + break; + case "successcount": + comparison = Integer.compare(t1.getSuccessCount(), t2.getSuccessCount()); + break; + case "totalexecutioncount": + comparison = Integer.compare(t1.getSuccessCount() + t1.getFailureCount(), + t2.getSuccessCount() + t2.getFailureCount()); + break; + default: + // Default to creation date if field is not recognized + comparison = compareDates(t1.getCreationDate(), t2.getCreationDate()); + break; + } + + return finalAscending ? comparison : -comparison; + }); + } + + /** + * Compares two dates, handling null values. + * Null dates are considered less than non-null dates. + */ + private int compareDates(Date date1, Date date2) { + if (date1 == null && date2 == null) return 0; + if (date1 == null) return -1; + if (date2 == null) return 1; + return date1.compareTo(date2); + } + + /** + * Compares two strings, handling null values. + * Null strings are considered less than non-null strings. + */ + private int compareStrings(String str1, String str2) { + if (str1 == null && str2 == null) return 0; + if (str1 == null) return -1; + if (str2 == null) return 1; + return str1.compareTo(str2); + } + + /** + * Gets the number of pending operations waiting to be processed + * @return The number of pending operations + */ + public int getPendingOperationsCount() { + return pendingOperations.size(); + } + + /** + * Gets a list of pending operations for debugging purposes + * @return List of pending operation descriptions + */ + public List getPendingOperationsList() { + return pendingOperations.stream() + .map(PendingOperation::getDescription) + .collect(Collectors.toList()); + } + + /** + * Refreshes the task indices to ensure up-to-date view. + * This is used by the distributed locking mechanism to ensure + * all nodes see the latest task state. + */ + public void refreshTasks() { + if (persistenceProvider != null) { + persistenceProvider.refreshTasks(); + } + } + + /** + * Saves a task with immediate refresh to ensure changes are visible. + * This is used by the distributed locking mechanism to ensure lock + * information is immediately visible to all nodes. + * + * @param task The task to save + * @return true if the operation was successful + */ + public boolean saveTaskWithRefresh(ScheduledTask task) { + if (task == null || shutdownNow) { + return false; + } + + if (task.isPersistent()) { + if (persistenceProvider == null) { + LOGGER.warn("Cannot save task with refresh - persistence service unavailable"); + return false; + } + + try { + // Save with optimistic concurrency control + // Refresh is now handled automatically by the refresh policy + return persistenceProvider.saveTask(task); + } catch (Exception e) { + LOGGER.error("Error saving task {}", task.getItemId(), e); + return false; + } + } else { + // For non-persistent tasks, just save normally + return saveTask(task); + } + } + + /** + * Returns the list of currently active cluster nodes. + * This is used for node affinity in the distributed locking mechanism. + * + * This method is designed to handle the case when ClusterService is not available (null), + * which can happen during startup when services are being initialized in a particular order, + * or in standalone mode. When ClusterService is null, this method will return just the current + * node, effectively making this a single-node operation. + * + * @return List of active node IDs + */ + public List getActiveNodes() { + if (persistenceProvider != null) { + return persistenceProvider.getActiveNodes(); + } + return new ArrayList<>(); + } + + /** + * Simulates a crash of the scheduler service by abruptly stopping all operations. + * This is used for testing crash recovery scenarios. + */ + public void simulateCrash() { + shutdownNow = true; + running.set(false); + + // Release any locks owned by this node (check both persistent and non-persistent tasks) + List tasksToRelease = new ArrayList<>(); + + // Check persistent tasks + if (persistenceProvider != null) { + try { + List persistentTasks = persistenceProvider.findTasksByLockOwner(nodeId); + tasksToRelease.addAll(persistentTasks); + } catch (Exception e) { + LOGGER.warn("Error finding locked persistent tasks during crash simulation: {}", e.getMessage()); + } + } + + // Check non-persistent tasks + List nonPersistentLockedTasks = nonPersistentTasks.values().stream() + .filter(task -> nodeId.equals(task.getLockOwner())) + .collect(Collectors.toList()); + tasksToRelease.addAll(nonPersistentLockedTasks); + + // Release all locks + for (ScheduledTask task : tasksToRelease) { + try { + lockManager.releaseLock(task); + } catch (Exception e) { + LOGGER.debug("Error releasing lock for task {} during crash simulation: {}", task.getItemId(), e.getMessage()); + } + } + + // Stop execution manager + if (executionManager != null) { + try { + executionManager.shutdown(); + } catch (Exception e) { + LOGGER.debug("Error shutting down execution manager during crash simulation: {}", e.getMessage()); + } + } + } + + public TaskLockManager getLockManager() { + return lockManager; + } + + public static class TaskBuilder implements SchedulerService.TaskBuilder { + private final SchedulerServiceImpl schedulerService; + private final String taskType; + private Map parameters = Collections.emptyMap(); + private long initialDelay = 0; + private long period = 0; + private TimeUnit timeUnit = TimeUnit.MILLISECONDS; + private boolean fixedRate = true; + private boolean oneShot = false; + private boolean allowParallelExecution = true; + private TaskExecutor executor; + private boolean persistent = true; + private boolean runOnAllNodes = false; + private int maxRetries = 3; // Default value from ScheduledTask + private long retryDelay = 60000; // Default value from ScheduledTask (1 minute) + private Set dependsOn = new HashSet<>(); + private boolean systemTask = false; + + private TaskBuilder(SchedulerServiceImpl schedulerService, String taskType) { + this.schedulerService = schedulerService; + this.taskType = taskType; + } + + @Override + public TaskBuilder withParameters(Map parameters) { + this.parameters = parameters; + return this; + } + + @Override + public TaskBuilder withInitialDelay(long initialDelay, TimeUnit timeUnit) { + this.initialDelay = initialDelay; + this.timeUnit = timeUnit; + return this; + } + + @Override + public TaskBuilder withPeriod(long period, TimeUnit timeUnit) { + this.period = period; + this.timeUnit = timeUnit; + return this; + } + + @Override + public TaskBuilder withFixedDelay() { + this.fixedRate = false; + return this; + } + + @Override + public TaskBuilder withFixedRate() { + this.fixedRate = true; + return this; + } + + @Override + public TaskBuilder asOneShot() { + this.oneShot = true; + return this; + } + + @Override + public TaskBuilder disallowParallelExecution() { + this.allowParallelExecution = false; + return this; + } + + @Override + public TaskBuilder withExecutor(TaskExecutor executor) { + this.executor = executor; + return this; + } + + @Override + public TaskBuilder withSimpleExecutor(Runnable runnable) { + this.executor = new TaskExecutor() { + @Override + public String getTaskType() { + return taskType; + } + + @Override + public void execute(ScheduledTask task, TaskStatusCallback callback) { + try { + runnable.run(); + callback.complete(); + } catch (Exception e) { + callback.fail(e.getMessage()); + } + } + }; + return this; + } + + @Override + public TaskBuilder nonPersistent() { + this.persistent = false; + return this; + } + + @Override + public TaskBuilder runOnAllNodes() { + this.runOnAllNodes = true; + return this; + } + + @Override + public TaskBuilder asSystemTask() { + if (!persistent) { + throw new IllegalStateException("System tasks must be persistent. Cannot use asSystemTask() with nonPersistent()."); + } + this.systemTask = true; + return this; + } + + @Override + public TaskBuilder withMaxRetries(int maxRetries) { + if (maxRetries < 0) { + throw new IllegalArgumentException("Max retries cannot be negative"); + } + this.maxRetries = maxRetries; + return this; + } + + @Override + public TaskBuilder withRetryDelay(long delay, TimeUnit unit) { + if (delay < 0) { + throw new IllegalArgumentException("Retry delay cannot be negative"); + } + this.retryDelay = unit.toMillis(delay); + return this; + } + + @Override + public TaskBuilder withDependencies(String... taskIds) { + if (taskIds != null) { + for (String taskId : taskIds) { + if (taskId == null || taskId.trim().isEmpty()) { + throw new IllegalArgumentException("Task dependency ID cannot be null or empty"); + } + this.dependsOn.add(taskId); + } + } + return this; + } + + @Override + public ScheduledTask schedule() { + if (executor != null) { + schedulerService.registerTaskExecutor(executor); + } + + // Check for existing system tasks of the same type if this is a system task + if (systemTask) { + List existingTasks = schedulerService.getTasksByType(taskType, 0, 1, null).getList(); + if (!existingTasks.isEmpty() && existingTasks.get(0).isSystemTask()) { + // Reuse the existing system task + ScheduledTask existingTask = existingTasks.get(0); + LOGGER.info("Reusing existing system task: {}", existingTask.getItemId()); + + // Schedule the existing task + schedulerService.scheduleTask(existingTask); + return existingTask; + } + } + + ScheduledTask task = schedulerService.createTask( + taskType, + parameters, + initialDelay, + period, + timeUnit, + fixedRate, + oneShot, + allowParallelExecution, + persistent + ); + + task.setRunOnAllNodes(runOnAllNodes); + task.setMaxRetries(maxRetries); + task.setRetryDelay(retryDelay); + if (!dependsOn.isEmpty()) { + task.setDependsOn(dependsOn); + } + task.setSystemTask(systemTask); + schedulerService.scheduleTask(task); + return task; + } + } } + + diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskExecutionManager.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskExecutionManager.java new file mode 100644 index 0000000000..3202a2f55d --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskExecutionManager.java @@ -0,0 +1,504 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Date; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Manages task execution and scheduling, including task checking, execution tracking, and completion handling. + */ +public class TaskExecutionManager { + private static final Logger LOGGER = LoggerFactory.getLogger(TaskExecutionManager.class); + private static final int MIN_THREAD_POOL_SIZE = 4; + private static final long TASK_CHECK_INTERVAL = 1000; // 1 second + + private String nodeId; + private ScheduledExecutorService scheduler; + private final Map> scheduledTasks; + private TaskStateManager stateManager; + private TaskLockManager lockManager; + private TaskMetricsManager metricsManager; + private TaskHistoryManager historyManager; + private final Map> executingTasksByType; + private final AtomicBoolean running = new AtomicBoolean(false); + private ScheduledFuture taskCheckerFuture; + private SchedulerServiceImpl schedulerService; + private TaskExecutorRegistry executorRegistry; + private int threadPoolSize = MIN_THREAD_POOL_SIZE; + + public TaskExecutionManager() { + this.scheduledTasks = new ConcurrentHashMap<>(); + this.executingTasksByType = new ConcurrentHashMap<>(); + } + + // Setter methods for Blueprint dependency injection + public void setNodeId(String nodeId) { + this.nodeId = nodeId; + } + + public void setThreadPoolSize(int threadPoolSize) { + this.threadPoolSize = Math.max(MIN_THREAD_POOL_SIZE, threadPoolSize); + } + + public void setStateManager(TaskStateManager stateManager) { + this.stateManager = stateManager; + } + + public void setLockManager(TaskLockManager lockManager) { + this.lockManager = lockManager; + } + + public void setMetricsManager(TaskMetricsManager metricsManager) { + this.metricsManager = metricsManager; + } + + public void setHistoryManager(TaskHistoryManager historyManager) { + this.historyManager = historyManager; + } + + public void setExecutorRegistry(TaskExecutorRegistry executorRegistry) { + this.executorRegistry = executorRegistry; + } + + public void setSchedulerService(SchedulerServiceImpl schedulerService) { + this.schedulerService = schedulerService; + } + + /** + * Initializes the scheduler after all dependencies are set + */ + public void initialize() { + if (scheduler == null) { + this.scheduler = Executors.newScheduledThreadPool( + threadPoolSize, + r -> { + Thread t = new Thread(r); + t.setName("UnomiScheduler-" + t.getId()); + t.setDaemon(true); + return t; + } + ); + } + } + + /** + * Starts the task checking service if this is an executor node + */ + public void startTaskChecker(Runnable taskChecker) { + if (running.compareAndSet(false, true)) { + taskCheckerFuture = scheduler.scheduleAtFixedRate( + taskChecker, + 0, + TASK_CHECK_INTERVAL, + TimeUnit.MILLISECONDS + ); + LOGGER.info("Task checker started with interval {} ms", TASK_CHECK_INTERVAL); + } + } + + /** + * Stops the task checking service + */ + public void stopTaskChecker() { + if (running.compareAndSet(true, false) && taskCheckerFuture != null) { + taskCheckerFuture.cancel(false); + taskCheckerFuture = null; + LOGGER.info("Task checker stopped"); + } + } + + /** + * Schedules a task for execution based on its configuration + */ + public void scheduleTask(ScheduledTask task, Runnable taskRunner) { + // Calculate initial execution time if not set + if (task.getNextScheduledExecution() == null) { + if (task.getInitialDelay() > 0) { + // If initial delay is specified, calculate from now + long nextExecution = System.currentTimeMillis() + + task.getTimeUnit().toMillis(task.getInitialDelay()); + task.setNextScheduledExecution(new Date(nextExecution)); + } else { + // Start immediately + task.setNextScheduledExecution(new Date()); + } + } + + // Set task to SCHEDULED state + if (!ScheduledTask.TaskStatus.SCHEDULED.equals(task.getStatus())) { + stateManager.updateTaskState(task, ScheduledTask.TaskStatus.SCHEDULED, null, nodeId); + } + + // Save the task + schedulerService.saveTask(task); + } + + /** + * Executes a task immediately with the specified executor. + * This method should only be called when a task is ready to execute. + */ + public void executeTask(ScheduledTask task, TaskExecutor executor) { + try { + if (!task.isEnabled()) { + LOGGER.debug("Node {} : Task {} is disabled, skipping execution", nodeId, task.getItemId()); + return; + } + + if (task.getStatus() == ScheduledTask.TaskStatus.RUNNING) { + LOGGER.debug("Node {} : Task {} is already running", nodeId, task.getItemId()); + return; + } + + String taskType = task.getTaskType(); + executingTasksByType.putIfAbsent(taskType, ConcurrentHashMap.newKeySet()); + + TaskExecutor.TaskStatusCallback statusCallback = createStatusCallback(task); + Runnable taskWrapper = createTaskWrapper(task, executor, statusCallback); + + // Execute task immediately using the scheduler + ScheduledFuture future = scheduler.schedule(taskWrapper, 0, TimeUnit.MILLISECONDS); + scheduledTasks.put(task.getItemId(), future); + executingTasksByType.get(taskType).add(task.getItemId()); + } catch (Exception e) { + LOGGER.error("Node "+nodeId+", Error executing task: " + task.getItemId(), e); + handleTaskError(task, e.getMessage(), System.currentTimeMillis()); + } + } + + /** + * Prepares a task for execution by validating state and acquiring lock if needed + */ + public boolean prepareForExecution(ScheduledTask task) { + if (!task.isEnabled()) { + LOGGER.debug("Task {} is disabled", task.getItemId()); + return false; + } + + // Only execute tasks that are in SCHEDULED state (or CRASHED for recovery) + if (task.getStatus() != ScheduledTask.TaskStatus.SCHEDULED && + task.getStatus() != ScheduledTask.TaskStatus.CRASHED) { + LOGGER.debug("Task {} not in executable state: {}", task.getItemId(), task.getStatus()); + return false; + } + + // For persistent tasks, acquire lock before execution + if (task.isPersistent() && !lockManager.acquireLock(task)) { + LOGGER.debug("Could not acquire lock for task: {}", task.getItemId()); + return false; + } + + stateManager.updateTaskState(task, ScheduledTask.TaskStatus.RUNNING, null, nodeId); + schedulerService.saveTask(task); + return true; + } + + /** + * Creates a status callback for task execution + */ + private TaskExecutor.TaskStatusCallback createStatusCallback(ScheduledTask task) { + return new TaskExecutor.TaskStatusCallback() { + @Override + public void updateStep(String step, Map details) { + task.setCurrentStep(step); + task.setStatusDetails(details); + schedulerService.saveTask(task); + } + + @Override + public void checkpoint(Map checkpointData) { + task.setCheckpointData(checkpointData); + schedulerService.saveTask(task); + } + + @Override + public void updateStatusDetails(Map details) { + task.setStatusDetails(details); + schedulerService.saveTask(task); + } + + @Override + public void complete() { + handleTaskCompletion(task, System.currentTimeMillis()); + } + + @Override + public void fail(String error) { + handleTaskError(task, error, System.currentTimeMillis()); + } + }; + } + + /** + * Creates a wrapper for task execution + */ + private Runnable createTaskWrapper(ScheduledTask task, TaskExecutor executor, + TaskExecutor.TaskStatusCallback statusCallback) { + return () -> { + if (task == null) { + LOGGER.error("Node {} : Cannot execute null task", nodeId); + return; + } + if (executor == null) { + LOGGER.error("Node {} : Cannot execute null executor for task type : {}", nodeId, task.getTaskType()); + return; + } + + String taskId = task.getItemId(); + String taskType = task.getTaskType(); + + if (taskType == null) { + LOGGER.error("Task type is null for task: {}", taskId); + return; + } + + // Prepare task for execution (both persistent and in-memory) + if (!prepareForExecution(task)) { + return; + } + + try { + // Get or create the executing tasks set + Set executingTasks = executingTasksByType.computeIfAbsent(taskType, + k -> ConcurrentHashMap.newKeySet()); + + // Only add to executing set if not already there + if (taskId != null) { + executingTasks.add(taskId); + } + + // Set the executing node ID + task.setExecutingNodeId(nodeId); + schedulerService.saveTask(task); + + long startTime = System.currentTimeMillis(); + try { + if (task.getStatus() == ScheduledTask.TaskStatus.CRASHED && executor.canResume(task)) { + executor.resume(task, statusCallback); + } else { + executor.execute(task, statusCallback); + } + } catch (Exception e) { + if (e.getMessage() != null && !e.getMessage().equals("Simulated crash")) { + LOGGER.error("Error executing task: " + taskId, e); + statusCallback.fail(e.getMessage()); + } + } finally { + updateTaskMetrics(task, startTime); + } + } catch (Exception e) { + LOGGER.error("Unexpected error while executing task: " + taskId, e); + statusCallback.fail("Unexpected error: " + e.getMessage()); + } finally { + // Clear executing node ID + task.setExecutingNodeId(null); + schedulerService.saveTask(task); + + // Remove task from executing set + try { + Set executingTasks = executingTasksByType.get(taskType); + if (executingTasks != null && taskId != null) { + executingTasks.remove(taskId); + } + } catch (Exception e) { + LOGGER.error("Error cleaning up task execution state: " + taskId, e); + } + } + }; + } + + /** + * Handles task completion + */ + private void handleTaskCompletion(ScheduledTask task, long startTime) { + long executionTime = System.currentTimeMillis() - startTime; + + // Only transition to completed if still in RUNNING state + if (task.getStatus() == ScheduledTask.TaskStatus.RUNNING) { + stateManager.updateTaskState(task, ScheduledTask.TaskStatus.COMPLETED, null, nodeId); + task.setLastExecutionDate(new Date()); + task.setLastExecutedBy(nodeId); + task.setFailureCount(0); + task.setSuccessCount(task.getSuccessCount() + 1); + + historyManager.recordSuccess(task, executionTime); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_COMPLETED); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_EXECUTION_TIME, executionTime); + + // Handle task completion based on type + if (task.isOneShot()) { + task.setEnabled(false); + task.setNextScheduledExecution(null); // Clear next execution time + scheduledTasks.remove(task.getItemId()); + } else if (task.getPeriod() > 0) { + // For periodic tasks, calculate next execution time + stateManager.calculateNextExecutionTime(task); + // Only transition to SCHEDULED if next execution is set (task might be disabled) + if (task.getNextScheduledExecution() != null) { + stateManager.updateTaskState(task, ScheduledTask.TaskStatus.SCHEDULED, null, nodeId); + } + } + + // Release lock for persistent tasks + if (task.isPersistent()) { + lockManager.releaseLock(task); + } + + // Clean up executing tasks set + Set executingTasks = executingTasksByType.get(task.getTaskType()); + if (executingTasks != null) { + executingTasks.remove(task.getItemId()); + } + + schedulerService.saveTask(task); + } + } + + /** + * Handles task error + */ + private void handleTaskError(ScheduledTask task, String error, long startTime) { + long executionTime = System.currentTimeMillis() - startTime; + + // Only transition to failed if still in RUNNING state + if (task.getStatus() == ScheduledTask.TaskStatus.RUNNING) { + stateManager.updateTaskState(task, ScheduledTask.TaskStatus.FAILED, error, nodeId); + task.setFailureCount(task.getFailureCount() + 1); + + historyManager.recordFailure(task, error); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_FAILED); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_EXECUTION_TIME, executionTime); + + // Check if we should retry + if (task.getFailureCount() <= task.getMaxRetries()) { + // Calculate next retry time + stateManager.calculateNextExecutionTime(task, true); + stateManager.updateTaskState(task, ScheduledTask.TaskStatus.SCHEDULED, null, nodeId); + + // Only schedule retry if scheduler is not shutting down + if (!scheduler.isShutdown() && !scheduler.isTerminated()) { + // Schedule retry + try { + Runnable retryTask = () -> { + TaskExecutor executor = executorRegistry.getExecutor(task.getTaskType()); + if (executor != null) { + executeTask(task, executor); + } + }; + long retryDelay = task.getNextScheduledExecution().getTime() - System.currentTimeMillis(); + scheduler.schedule(retryTask, retryDelay, TimeUnit.MILLISECONDS); + LOGGER.debug("Scheduled retry #{} for task {} in {} ms", + task.getFailureCount(), task.getItemId(), retryDelay); + } catch (RejectedExecutionException e) { + LOGGER.debug("Retry scheduling rejected for task {} as scheduler is shutting down", task.getItemId()); + } + } else { + LOGGER.debug("Not scheduling retry for task {} as scheduler is shutting down", task.getItemId()); + } + } else if (!task.isOneShot()) { + LOGGER.debug("Periodic task {} failed all retries but scheduling for next period in {} ms", task.getItemId(), task.getPeriod()); + schedulerService.saveTask(task); // persist failure state before going back to scheduled state + task.setLastExecutionDate(new Date()); + task.setLastExecutedBy(nodeId); + stateManager.calculateNextExecutionTime(task, false); + if (task.getNextScheduledExecution() != null) { + stateManager.updateTaskState(task, ScheduledTask.TaskStatus.SCHEDULED, null, nodeId); + } + } + + // Release lock for persistent tasks + if (task.isPersistent()) { + lockManager.releaseLock(task); + } + + schedulerService.saveTask(task); + scheduledTasks.remove(task.getItemId()); + } + } + + /** + * Updates task metrics + */ + private void updateTaskMetrics(ScheduledTask task, long startTime) { + if (task.getStatus() == ScheduledTask.TaskStatus.COMPLETED) { + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_COMPLETED); + long duration = System.currentTimeMillis() - startTime; + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_EXECUTION_TIME, duration); + } else if (task.getStatus() == ScheduledTask.TaskStatus.FAILED) { + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_FAILED); + } else if (task.getStatus() == ScheduledTask.TaskStatus.CRASHED) { + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_CRASHED); + } else if (task.getStatus() == ScheduledTask.TaskStatus.WAITING) { + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_WAITING); + } else if (task.getStatus() == ScheduledTask.TaskStatus.RUNNING) { + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_RUNNING); + } + } + + /** + * Cancels a running task + */ + public void cancelTask(String taskId) { + ScheduledFuture future = scheduledTasks.remove(taskId); + if (future != null) { + future.cancel(true); + } + + // Remove from all executing task sets + for (Set executingTasks : executingTasksByType.values()) { + executingTasks.remove(taskId); + } + } + + /** + * Shuts down the execution manager + */ + public void shutdown() { + stopTaskChecker(); + + // Cancel all scheduled and running tasks + for (ScheduledFuture future : scheduledTasks.values()) { + future.cancel(true); + } + scheduledTasks.clear(); + executingTasksByType.clear(); + + // Shutdown scheduler + scheduler.shutdown(); + try { + if (!scheduler.awaitTermination(5, TimeUnit.SECONDS)) { + scheduler.shutdownNow(); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + scheduler.shutdownNow(); + } + } + + public ScheduledExecutorService getScheduler() { + return scheduler; + } + +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskExecutorRegistry.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskExecutorRegistry.java new file mode 100644 index 0000000000..00d441abfc --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskExecutorRegistry.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.tasks.TaskExecutor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Registry for task executors shared between scheduler providers. + * + * This registry manages the task executors that are available to all providers. + * It provides thread-safe registration and lookup of executors by task type. + * + * The registry is shared between providers so that task executors registered + * with the scheduler service are available to both memory and persistence providers. + */ +public class TaskExecutorRegistry { + + private static final Logger LOGGER = LoggerFactory.getLogger(TaskExecutorRegistry.class); + + private final Map executors = new ConcurrentHashMap<>(); + + /** + * Registers a task executor for a specific task type. + * + * @param executor the task executor to register + * @throws IllegalArgumentException if executor is null or task type is null/empty + */ + public void registerExecutor(TaskExecutor executor) { + if (executor == null) { + throw new IllegalArgumentException("TaskExecutor cannot be null"); + } + + String taskType = executor.getTaskType(); + if (taskType == null || taskType.trim().isEmpty()) { + throw new IllegalArgumentException("Task type cannot be null or empty"); + } + + TaskExecutor previous = executors.put(taskType, executor); + if (previous != null) { + LOGGER.warn("Replaced existing executor for task type: {}", taskType); + } + + LOGGER.debug("Registered executor for task type: {}", taskType); + } + + /** + * Unregisters a task executor. + * + * @param executor the task executor to unregister + */ + public void unregisterExecutor(TaskExecutor executor) { + if (executor == null) { + return; + } + + String taskType = executor.getTaskType(); + if (taskType == null) { + return; + } + + TaskExecutor removed = executors.remove(taskType); + if (removed != null) { + LOGGER.debug("Unregistered executor for task type: {}", taskType); + } + } + + /** + * Gets the task executor for a specific task type. + * + * @param taskType the task type + * @return the task executor, or null if not found + */ + public TaskExecutor getExecutor(String taskType) { + if (taskType == null) { + return null; + } + + return executors.get(taskType); + } + + /** + * Checks if an executor is registered for the given task type. + * + * @param taskType the task type + * @return true if an executor is registered + */ + public boolean hasExecutor(String taskType) { + return taskType != null && executors.containsKey(taskType); + } + + /** + * Gets all registered task types. + * + * @return set of all registered task types + */ + public Set getRegisteredTaskTypes() { + return Collections.unmodifiableSet(executors.keySet()); + } + + /** + * Gets the number of registered executors. + * + * @return the number of registered executors + */ + public int getExecutorCount() { + return executors.size(); + } + + /** + * Clears all registered executors. + * This is typically used during shutdown. + */ + public void clear() { + int count = executors.size(); + executors.clear(); + LOGGER.debug("Cleared {} registered executors", count); + } + + /** + * Gets an unmodifiable view of all registered executors. + * + * @return map of task type to executor + */ + public Map getAllExecutors() { + return Collections.unmodifiableMap(executors); + } +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskHistoryManager.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskHistoryManager.java new file mode 100644 index 0000000000..7f23c15126 --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskHistoryManager.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.tasks.ScheduledTask; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +/** + * Manages task execution history, including success/failure records, + * execution times, and crash records. + */ +public class TaskHistoryManager { + private static final Logger LOGGER = LoggerFactory.getLogger(TaskHistoryManager.class); + private static final int MAX_HISTORY_SIZE = 10; + + private String nodeId; + private TaskMetricsManager metricsManager; + + public TaskHistoryManager() { + // Parameterless constructor for Blueprint dependency injection + } + + // Setter methods for Blueprint dependency injection + public void setNodeId(String nodeId) { + this.nodeId = nodeId; + } + + public void setMetricsManager(TaskMetricsManager metricsManager) { + this.metricsManager = metricsManager; + } + + /** + * Records a successful task execution + */ + public void recordSuccess(ScheduledTask task, long executionTime) { + Map entry = new HashMap<>(); + entry.put("timestamp", new Date()); + entry.put("status", "SUCCESS"); + entry.put("nodeId", nodeId); + entry.put("executionTime", executionTime); + + addToHistory(task, entry); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_COMPLETED); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_EXECUTION_TIME, executionTime); + } + + /** + * Records a failed task execution + */ + public void recordFailure(ScheduledTask task, String error) { + Map entry = new HashMap<>(); + entry.put("timestamp", new Date()); + entry.put("status", "FAILED"); + entry.put("nodeId", nodeId); + entry.put("error", error); + + addToHistory(task, entry); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_FAILED); + } + + /** + * Records a task crash + */ + public void recordCrash(ScheduledTask task) { + Map entry = new HashMap<>(); + entry.put("timestamp", new Date()); + entry.put("status", "CRASHED"); + entry.put("nodeId", nodeId); + + addToHistory(task, entry); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_CRASHED); + } + + /** + * Records task cancellation + */ + public void recordCancellation(ScheduledTask task) { + Map entry = new HashMap<>(); + entry.put("timestamp", new Date()); + entry.put("status", "CANCELLED"); + entry.put("nodeId", nodeId); + + addToHistory(task, entry); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_CANCELLED); + } + + public void recordResume(ScheduledTask task) { + Map entry = new HashMap<>(); + entry.put("timestamp", new Date()); + entry.put("status", "RESUMED"); + entry.put("nodeId", nodeId); + + addToHistory(task, entry); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_RESUMED); + } + + public void recordRetry(ScheduledTask task) { + Map entry = new HashMap<>(); + entry.put("timestamp", new Date()); + entry.put("status", "RETRIED"); + entry.put("nodeId", nodeId); + + addToHistory(task, entry); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_RETRIED); + } + + private void addToHistory(ScheduledTask task, Map entry) { + Map details = task.getStatusDetails(); + if (details == null) { + details = new HashMap<>(); + task.setStatusDetails(details); + } else if (!(details instanceof HashMap)) { + // If the details map is unmodifiable, create a new modifiable copy + details = new HashMap<>(details); + task.setStatusDetails(details); + } + + @SuppressWarnings("unchecked") + List> history = (List>) details.get("executionHistory"); + if (history == null) { + history = new ArrayList<>(); + details.put("executionHistory", history); + } else if (!(history instanceof ArrayList)) { + // If the history list is unmodifiable, create a new modifiable copy + history = new ArrayList<>(history); + details.put("executionHistory", history); + } + + // Maintain history size limit + while (history.size() >= MAX_HISTORY_SIZE) { + history.remove(0); + } + + history.add(entry); + } + + /** + * Gets execution history for a task + */ + public List> getExecutionHistory(ScheduledTask task) { + Map details = task.getStatusDetails(); + if (details == null) { + return Collections.emptyList(); + } + + @SuppressWarnings("unchecked") + List> history = (List>) details.get("executionHistory"); + return history != null ? history : Collections.emptyList(); + } +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskLockManager.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskLockManager.java new file mode 100644 index 0000000000..e07274690a --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskLockManager.java @@ -0,0 +1,353 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.tasks.ScheduledTask; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collections; +import java.util.Date; +import java.util.List; + +/** + * Manages task locks to coordinate execution in a cluster environment. + * This class ensures that tasks which don't allow parallel execution + * only run on a single node at a time. + * + *

Distributed Locking Strategy:

+ * + *

This implementation addresses the challenge of reliable distributed locking + * with Elasticsearch, which is an eventually consistent system. The primary goal + * is to ensure that only one node in the cluster acquires a lock at any time, + * even if multiple nodes attempt to acquire it simultaneously.

+ * + *

Key features of the locking implementation:

+ *
    + *
  • Node Affinity: Each task is assigned a primary node based on its ID hash, + * reducing contention by giving priority to specific nodes for specific tasks. + * Active nodes are detected using the ClusterService and fall back to task lock analysis + * if ClusterService is unavailable.
  • + *
  • Time Windows: Primary nodes get an exclusive time window to acquire locks, + * after which backup nodes attempt in sequence.
  • + *
  • Optimistic Concurrency Control: Uses Elasticsearch's sequence numbers and + * primary terms to ensure only one update succeeds when multiple nodes attempt + * simultaneous updates.
  • + *
  • Fencing Tokens: Monotonically increasing version numbers prevent split-brain + * scenarios where multiple nodes believe they own a lock.
  • + *
  • Lock Verification: Double-checking after acquiring a lock ensures it's + * still valid after changes have propagated through the cluster.
  • + *
  • Explicit Refreshes: Forces immediate index refreshes to make lock + * information visible more quickly to other nodes.
  • + *
+ * + *

Different strategies are used for different task types:

+ *
    + *
  • Tasks that allow parallel execution: Simple locking without exclusivity
  • + *
  • Non-persistent tasks: Simple in-memory locking (these exist only on one node)
  • + *
  • Persistent tasks: Robust distributed locking with all safeguards
  • + *
+ */ +public class TaskLockManager { + private static final Logger LOGGER = LoggerFactory.getLogger(TaskLockManager.class); + private static final String SEQ_NO = "seq_no"; + private static final String PRIMARY_TERM = "primary_term"; + private static final String LOCK_VERSION = "lockVersion"; + private static final long VERIFICATION_DELAY_MS = 100; + private static final long PRIMARY_NODE_WINDOW_MS = 3000; + private static final long BACKUP_NODE_WINDOW_MS = 500; + + private String nodeId; + private long lockTimeout; + private TaskMetricsManager metricsManager; + private SchedulerServiceImpl schedulerService; + + public TaskLockManager() { + // Parameterless constructor for Blueprint dependency injection + } + + // Setter methods for Blueprint dependency injection + public void setNodeId(String nodeId) { + this.nodeId = nodeId; + } + + public void setLockTimeout(long lockTimeout) { + this.lockTimeout = lockTimeout; + } + + public void setMetricsManager(TaskMetricsManager metricsManager) { + this.metricsManager = metricsManager; + } + + public void setSchedulerService(SchedulerServiceImpl schedulerService) { + this.schedulerService = schedulerService; + } + + /** + * Acquires a lock for the specified task. + * Uses optimistic concurrency control to ensure only one node successfully acquires a lock. + * + * Note: This implementation uses Elasticsearch/OpenSearch documents as distributed locks. + * The refresh policy for ScheduledTask documents is configured to use WAIT_UNTIL/WaitFor + * to ensure that lock changes are immediately visible to all nodes without requiring + * explicit refresh calls. + * + * @param task The task to lock + * @return true if the lock was successfully acquired, false otherwise + */ + public boolean acquireLock(ScheduledTask task) { + if (task == null) { + return false; + } + + // Always allow tasks that permit parallel execution + if (task.isAllowParallelExecution()) { + // Just set lock info but don't enforce exclusivity + task.setLockOwner(nodeId); + task.setLockDate(new Date()); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_LOCK_ACQUIRED); + return true; + } + + // For non-persistent tasks, use simple in-memory locking + if (!task.isPersistent()) { + return acquireInMemoryLock(task); + } + + // For persistent tasks, use robust distributed locking + return acquireDistributedLock(task); + } + + /** + * Simple in-memory locking for non-persistent tasks. + * These tasks exist only on a single node, so we don't need + * complex distributed locking. + */ + private boolean acquireInMemoryLock(ScheduledTask task) { + if (task.getLockOwner() != null && !nodeId.equals(task.getLockOwner())) { + if (!isLockExpired(task)) { + return false; + } + } + + task.setLockOwner(nodeId); + task.setLockDate(new Date()); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_LOCK_ACQUIRED); + + // For non-persistent tasks, we just update the in-memory map + schedulerService.saveTask(task); + return true; + } + + /** + * Robust distributed locking for persistent tasks. + * This handles the case where multiple nodes might try to + * acquire the lock at the same time. + */ + private boolean acquireDistributedLock(ScheduledTask task) { + // Step 1: Check if this node should handle this task based on affinity + if (!shouldHandleTask(task)) { + return false; + } + + // Step 2: Force a refresh to ensure we see the latest state + schedulerService.refreshTasks(); + + // Step 3: Get the latest version using GET by ID (not search) + ScheduledTask latestTask = schedulerService.getTask(task.getItemId()); + if (latestTask == null) { + LOGGER.warn("Task {} not found when attempting to lock", task.getItemId()); + return false; + } + + // Step 4: Check if already locked by another node + if (latestTask.getLockOwner() != null && + !nodeId.equals(latestTask.getLockOwner()) && + !isLockExpired(latestTask)) { + LOGGER.debug("Task {} already locked by {}", task.getItemId(), latestTask.getLockOwner()); + return false; + } + + // Step 5: Use optimistic concurrency control with sequence numbers + task.setSystemMetadata(SEQ_NO, latestTask.getSystemMetadata(SEQ_NO)); + task.setSystemMetadata(PRIMARY_TERM, latestTask.getSystemMetadata(PRIMARY_TERM)); + + // Step 6: Set lock information + task.setLockOwner(nodeId); + task.setLockDate(new Date()); + + // Step 7: Add a monotonically increasing fencing token + Long lockVersion = (Long) latestTask.getSystemMetadata(LOCK_VERSION); + long newLockVersion = (lockVersion == null) ? 1L : lockVersion + 1L; + task.setSystemMetadata(LOCK_VERSION, newLockVersion); + + // Step 8: Save with WAIT_UNTIL refresh policy + boolean acquired = schedulerService.saveTaskWithRefresh(task); + + if (!acquired) { + LOGGER.debug("Failed to acquire lock for task {} due to version conflict", task.getItemId()); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_LOCK_CONFLICTS); + return false; + } + + // Step 9: Double-check our lock after a delay to ensure it's still valid + try { + // Wait for a short time to allow any concurrent operations to complete + Thread.sleep(VERIFICATION_DELAY_MS); + + // Force refresh again to ensure we see the latest state + schedulerService.refreshTasks(); + + // Get the task again to verify our lock + ScheduledTask verifiedTask = schedulerService.getTask(task.getItemId()); + if (verifiedTask == null) { + LOGGER.warn("Task {} disappeared after locking", task.getItemId()); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_LOCK_CONFLICTS); + return false; + } + + // Verify we're still the lock owner + if (!nodeId.equals(verifiedTask.getLockOwner())) { + LOGGER.warn("Lost lock ownership for task {} to {}", + task.getItemId(), verifiedTask.getLockOwner()); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_LOCK_CONFLICTS); + return false; + } + + // Verify our fencing token is still the highest + Long currentToken = (Long) verifiedTask.getSystemMetadata(LOCK_VERSION); + if (currentToken == null || currentToken != newLockVersion) { + LOGGER.warn("Lock version mismatch for task {}: expected {} but found {}", + task.getItemId(), newLockVersion, currentToken); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_LOCK_CONFLICTS); + return false; + } + + // Lock successfully verified + LOGGER.debug("Successfully acquired and verified lock for task {}", task.getItemId()); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_LOCK_ACQUIRED); + return true; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + // Attempt to release the lock since we're being interrupted + releaseLock(task); + return false; + } + } + + /** + * Determines if this node should handle the given task based on node affinity. + * This reduces contention by giving priority to a specific node for each task. + */ + private boolean shouldHandleTask(ScheduledTask task) { + // Check if this is a scheduled task + Date scheduledTime = task.getNextScheduledExecution(); + if (scheduledTime == null) { + // Not a scheduled task, any node can handle it + return true; + } + + // Get list of active nodes (sorted for consistency) + List activeNodes = schedulerService.getActiveNodes(); + if (activeNodes.isEmpty() || activeNodes.size() == 1) { + // If we're the only node or can't determine active nodes, always handle the task + return true; + } + Collections.sort(activeNodes); + + // Calculate primary node based on task hash + int primaryIndex = Math.abs(task.getItemId().hashCode() % activeNodes.size()); + String primaryNode = activeNodes.get(primaryIndex); + + // If we're the primary node, always attempt + if (nodeId.equals(primaryNode)) { + return true; + } + + // Check if enough time has passed to allow backup nodes + long delayMs = System.currentTimeMillis() - scheduledTime.getTime(); + + // Primary node gets exclusive window + if (delayMs < PRIMARY_NODE_WINDOW_MS) { + return false; + } + + // Calculate our position as a backup node + int ourIndex = activeNodes.indexOf(nodeId); + if (ourIndex < 0) { + return false; // Not in active nodes list + } + + // Calculate backup order (relative position after primary) + int backupOrder = (ourIndex - primaryIndex + activeNodes.size()) % activeNodes.size(); + + // Each backup node gets a time window based on their order + long ourWindowStart = PRIMARY_NODE_WINDOW_MS + ((backupOrder - 1) * BACKUP_NODE_WINDOW_MS); + long ourWindowEnd = ourWindowStart + BACKUP_NODE_WINDOW_MS; + + return delayMs >= ourWindowStart && delayMs < ourWindowEnd; + } + + /** + * Releases a lock on the given task. + * + * @param task Task to unlock + * @return true if unlock was successful + */ + public boolean releaseLock(ScheduledTask task) { + if (task == null) { + return false; + } + + // Only allow the lock owner to release the lock + if (task.getLockOwner() != null && !nodeId.equals(task.getLockOwner())) { + LOGGER.warn("Node {} attempted to release a lock owned by {}", nodeId, task.getLockOwner()); + return false; + } + + try { + task.setLockOwner(null); + task.setLockDate(null); + + if (!schedulerService.saveTask(task)) { + LOGGER.error("Failed to release lock for task {}", task.getItemId()); + return false; + } + + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_LOCK_RELEASED); + return true; + } catch (Exception e) { + LOGGER.error("Error releasing lock for task {}: {}", task.getItemId(), e.getMessage()); + return false; + } + } + + /** + * Checks if a task's lock has expired based on timeout. + * + * @param task Task to check + * @return true if lock has expired or if task has no lock + */ + public boolean isLockExpired(ScheduledTask task) { + if (task == null || task.getLockDate() == null) { + return true; + } + + long lockAge = System.currentTimeMillis() - task.getLockDate().getTime(); + return lockAge > lockTimeout; + } +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskMetricsManager.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskMetricsManager.java new file mode 100644 index 0000000000..90a6329f7f --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskMetricsManager.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.services.impl.scheduler; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Manages task execution metrics and statistics. + * Provides thread-safe tracking of various task-related metrics. + */ +public class TaskMetricsManager { + // Metric constants + public static final String METRIC_TASKS_COMPLETED = "tasks.completed"; + public static final String METRIC_TASKS_FAILED = "tasks.failed"; + public static final String METRIC_TASKS_CRASHED = "tasks.crashed"; + public static final String METRIC_TASKS_CREATED = "tasks.created"; + public static final String METRIC_TASKS_CANCELLED = "tasks.cancelled"; + public static final String METRIC_TASKS_RESUMED = "tasks.resumed"; + public static final String METRIC_TASKS_RETRIED = "tasks.retried"; + public static final String METRIC_TASKS_WAITING = "tasks.waiting"; + public static final String METRIC_TASKS_RUNNING = "tasks.running"; + public static final String METRIC_TASKS_LOCK_TIMEOUTS = "tasks.lock.timeouts"; + public static final String METRIC_TASKS_LOCK_CONFLICTS = "tasks.lock.conflicts"; + public static final String METRIC_TASKS_LOCK_ATTEMPTS = "tasks.lock.attempts"; + public static final String METRIC_TASKS_LOCK_ACQUIRED = "tasks.lock.acquired"; + public static final String METRIC_TASKS_LOCK_RELEASED = "tasks.lock.released"; + public static final String METRIC_TASKS_EXECUTION_TIME = "tasks.execution.time"; + public static final String METRIC_TASKS_RECOVERY_ATTEMPTS = "tasks.recovery.attempts"; + public static final String METRIC_TASKS_RECOVERY_SUCCESSES = "tasks.recovery.successes"; + + private final Map taskMetrics = new ConcurrentHashMap<>(); + + /** + * Updates a metric counter + * @param metric The metric name to update + */ + public void updateMetric(String metric) { + taskMetrics.computeIfAbsent(metric, k -> new AtomicLong()).incrementAndGet(); + } + + /** + * Updates a metric counter by a specific value + * @param metric The metric name to update + * @param value The value to add + */ + public void updateMetric(String metric, long value) { + taskMetrics.computeIfAbsent(metric, k -> new AtomicLong()).addAndGet(value); + } + + /** + * Gets the current value of a metric + * @param metric The metric name + * @return The current value, or 0 if metric doesn't exist + */ + public long getMetric(String metric) { + AtomicLong value = taskMetrics.get(metric); + return value != null ? value.get() : 0; + } + + /** + * Gets all metrics as a map + * @return Map of metric names to their current values + */ + public Map getAllMetrics() { + Map metrics = new HashMap<>(); + taskMetrics.forEach((key, value) -> metrics.put(key, value.get())); + return metrics; + } + + /** + * Resets all metrics to zero + */ + public void resetMetrics() { + taskMetrics.clear(); + } +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskRecoveryManager.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskRecoveryManager.java new file mode 100644 index 0000000000..3e4c375faa --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskRecoveryManager.java @@ -0,0 +1,333 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +/** + * Manages task recovery after node crashes or failures. + * Handles task state recovery, lock recovery, and task resumption. + */ +public class TaskRecoveryManager { + private static final Logger LOGGER = LoggerFactory.getLogger(TaskRecoveryManager.class); + private static final int MAX_CRASH_RECOVERY_AGE_MINUTES = 60; // 1 hour + + private String nodeId; + private TaskStateManager stateManager; + private TaskLockManager lockManager; + private TaskMetricsManager metricsManager; + private TaskExecutionManager executionManager; + private TaskExecutorRegistry executorRegistry; + private SchedulerServiceImpl schedulerService; + private volatile boolean shutdownNow = false; + + public TaskRecoveryManager() { + // Parameterless constructor for Blueprint dependency injection + } + + // Setter methods for Blueprint dependency injection + public void setNodeId(String nodeId) { + this.nodeId = nodeId; + } + + public void setStateManager(TaskStateManager stateManager) { + this.stateManager = stateManager; + } + + public void setLockManager(TaskLockManager lockManager) { + this.lockManager = lockManager; + } + + public void setMetricsManager(TaskMetricsManager metricsManager) { + this.metricsManager = metricsManager; + } + + public void setExecutionManager(TaskExecutionManager executionManager) { + this.executionManager = executionManager; + } + + public void setExecutorRegistry(TaskExecutorRegistry executorRegistry) { + this.executorRegistry = executorRegistry; + } + + public void setSchedulerService(SchedulerServiceImpl schedulerService) { + this.schedulerService = schedulerService; + } + + /** + * Set the shutdown flag to prevent operations during shutdown + */ + public void prepareForShutdown() { + this.shutdownNow = true; + LOGGER.debug("TaskRecoveryManager prepared for shutdown"); + } + + /** + * Recovers tasks that crashed due to node failure or unexpected termination + * Process: + * 1. Identify tasks with expired locks + * 2. Release locks and update states + * 3. Attempt to resume tasks with checkpoint data + * 4. Reschedule tasks that can't be resumed + */ + public void recoverCrashedTasks() { + if (shutdownNow) { + LOGGER.debug("Skipping crashed task recovery during shutdown"); + return; + } + + try { + recoverRunningTasks(); + recoverLockedTasks(); + } catch (Exception e) { + LOGGER.error("Node {} Error recovering crashed tasks", nodeId, e); + } + } + + /** + * Recovers tasks that are marked as running but have expired locks + */ + private void recoverRunningTasks() { + if (shutdownNow) return; + + List runningTasks = schedulerService.findTasksByStatus(ScheduledTask.TaskStatus.RUNNING); + + for (ScheduledTask task : runningTasks) { + if (shutdownNow) return; + + if (lockManager.isLockExpired(task)) { + LOGGER.info("Node {} Recovering crashed task {} : {}", nodeId, task.getTaskType(), task.getItemId()); + recoverCrashedTask(task); + } + } + } + + /** + * Recovers a single crashed task + */ + private void recoverCrashedTask(ScheduledTask task) { + // Skip cancelled tasks - they should not be recovered + if (task.getStatus() == ScheduledTask.TaskStatus.CANCELLED) { + LOGGER.debug("Node {} Skipping recovery of cancelled task {} : {}", nodeId, task.getTaskType(), task.getItemId()); + return; + } + + // First mark as crashed and release lock + String previousOwner = task.getLockOwner(); + if (task.getStatus() != ScheduledTask.TaskStatus.CRASHED) { + stateManager.updateTaskState(task, ScheduledTask.TaskStatus.CRASHED, + "Node failure detected: " + previousOwner, nodeId); + } + + // Record the crash in execution history + recordCrash(task, previousOwner); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_CRASHED); + + if (schedulerService.saveTask(task)) { + // If task has checkpoint data and can be resumed, try to resume it + TaskExecutor executor = executorRegistry.getExecutor(task.getTaskType()); + if (executor != null && executor.canResume(task)) { + attemptTaskResumption(task, executor); + } else { + // If task can't be resumed, try to restart it + if (shouldRestartTask(task)) { + attemptTaskRestart(task, executor); + } + } + } + } + + /** + * Records a task crash in its execution history + */ + private void recordCrash(ScheduledTask task, String previousOwner) { + Map crash = new HashMap<>(); + crash.put("timestamp", new Date()); + crash.put("type", "crash"); + crash.put("previousOwner", previousOwner); + crash.put("recoveryNode", nodeId); + + Map details = task.getStatusDetails(); + if (details == null) { + details = new HashMap<>(); + task.setStatusDetails(details); + } + + @SuppressWarnings("unchecked") + List> history = (List>) details.get("executionHistory"); + if (history == null) { + history = new ArrayList<>(); + details.put("executionHistory", history); + } + + if (history.size() >= 10) { + history.remove(0); + } + history.add(crash); + } + + /** + * Attempts to resume a crashed task + */ + private void attemptTaskResumption(ScheduledTask task, TaskExecutor executor) { + LOGGER.info("Node {} resuming crashed task {} : {}", nodeId, task.getTaskType(), task.getItemId()); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_RESUMED); + stateManager.resetTaskToScheduled(task); + if (lockManager.acquireLock(task)) { + executionManager.executeTask(task, executor); + } + } + + /** + * Attempts to restart a task that can't be resumed + */ + private void attemptTaskRestart(ScheduledTask task, TaskExecutor executor) { + LOGGER.info("Node {} restarting crashed task: {}", nodeId, task.getItemId()); + stateManager.resetTaskToScheduled(task); + if (lockManager.acquireLock(task)) { + executionManager.executeTask(task, executor); + } + } + + /** + * Recovers tasks with expired locks that are not marked as running + */ + private void recoverLockedTasks() { + List lockedTasks = schedulerService.findLockedTasks(); + + for (ScheduledTask task : lockedTasks) { + if (lockManager.isLockExpired(task)) { + LOGGER.info("Node {} releasing expired lock for task: {}", nodeId, task.getItemId()); + recoverLockedTask(task); + } + } + } + + /** + * Recovers a single locked task + */ + private void recoverLockedTask(ScheduledTask task) { + lockManager.releaseLock(task); + + // Check if task can be rescheduled + if (task.getStatus() == ScheduledTask.TaskStatus.WAITING && + stateManager.canRescheduleTask(task, getTaskDependencies(task))) { + stateManager.resetTaskToScheduled(task); + } + + if (schedulerService.saveTask(task)) { + // If task is now scheduled, try to execute it + if (task.getStatus() == ScheduledTask.TaskStatus.SCHEDULED) { + TaskExecutor executor = executorRegistry.getExecutor(task.getTaskType()); + if (executor != null) { + executionManager.executeTask(task, executor); + } + } + } + } + + /** + * Determines if a crashed task should be restarted + */ + private boolean shouldRestartTask(ScheduledTask task) { + // Don't restart one-shot tasks that have already started + if (task.isOneShot() && task.getLastExecutionDate() != null) { + return false; + } + + // Check retry configuration + if (task.getMaxRetries() > 0 && task.getFailureCount() >= task.getMaxRetries()) { + return false; + } + + return task.isEnabled(); + } + + + /** + * Gets dependencies for a task + */ + private Map getTaskDependencies(ScheduledTask task) { + if (task.getDependsOn() == null || task.getDependsOn().isEmpty()) { + return Collections.emptyMap(); + } + + Map dependencies = new HashMap<>(); + for (String dependencyId : task.getDependsOn()) { + ScheduledTask dependency = schedulerService.getTask(dependencyId); + if (dependency != null) { + dependencies.put(dependencyId, dependency); + } + } + return dependencies; + } + + /** + * Update running task to crashed state + */ + private void markAsCrashed(ScheduledTask task) { + try { + if (task != null) { + // Mark the task as crashed so it can be recovered + task.setStatus(ScheduledTask.TaskStatus.CRASHED); + task.setCurrentStep("CRASHED"); + if (task.getStatusDetails() == null) { + task.setStatusDetails(new HashMap<>()); + } + task.getStatusDetails().put("crashTime", new Date()); + task.getStatusDetails().put("crashedNode", task.getLockOwner()); + + // Release the lock but preserve the lock owner for reference + String lockOwner = task.getLockOwner(); + lockManager.releaseLock(task); + task.getStatusDetails().put("crashedNode", lockOwner); + + if (schedulerService.saveTask(task)) { + LOGGER.info("Task {} marked as crashed (previous lock owner: {})", task.getItemId(), lockOwner); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_CRASHED); + } + } + } catch (Exception e) { + LOGGER.error("Failed to mark task as crashed: {}", task.getItemId(), e); + } + } + + /** + * Resets a task that has been in running state for too long + */ + private void resetStalledTask(ScheduledTask task) { + try { + if (task != null) { + // Mark the task as failed due to timeout + stateManager.updateTaskState(task, ScheduledTask.TaskStatus.FAILED, "Task execution timeout exceeded", nodeId); + metricsManager.updateMetric(TaskMetricsManager.METRIC_TASKS_FAILED); + + if (schedulerService.saveTask(task)) { + LOGGER.info("Stalled task {} reset to FAILED state", task.getItemId()); + } + } + } catch (Exception e) { + LOGGER.error("Failed to reset stalled task: {}", task.getItemId(), e); + } + } + +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskStateManager.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskStateManager.java new file mode 100644 index 0000000000..b7bddb0915 --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskStateManager.java @@ -0,0 +1,311 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.ScheduledTask.TaskStatus; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; + +/** + * Manages task state transitions and validation. + * This class centralizes all state-related logic for scheduled tasks. + */ +public class TaskStateManager { + private static final Logger LOGGER = LoggerFactory.getLogger(TaskStateManager.class); + + /** + * Enum defining valid task state transitions. + * This ensures tasks move through states in a controlled manner. + */ + public enum TaskTransition { + SCHEDULE(TaskStatus.SCHEDULED, EnumSet.of(TaskStatus.WAITING, TaskStatus.CRASHED, TaskStatus.FAILED, TaskStatus.COMPLETED)), + EXECUTE(TaskStatus.RUNNING, EnumSet.of(TaskStatus.SCHEDULED, TaskStatus.CRASHED, TaskStatus.WAITING)), + COMPLETE(TaskStatus.COMPLETED, EnumSet.of(TaskStatus.RUNNING)), + FAIL(TaskStatus.FAILED, EnumSet.of(TaskStatus.RUNNING)), + CANCEL(TaskStatus.CANCELLED, EnumSet.of(TaskStatus.RUNNING, TaskStatus.SCHEDULED, TaskStatus.WAITING)), + CRASH(TaskStatus.CRASHED, EnumSet.of(TaskStatus.RUNNING, TaskStatus.SCHEDULED)), + WAIT(TaskStatus.WAITING, EnumSet.of(TaskStatus.SCHEDULED, TaskStatus.RUNNING)); + + private final TaskStatus endState; + private final Set validStartStates; + + TaskTransition(TaskStatus endState, Set validStartStates) { + this.endState = endState; + this.validStartStates = validStartStates; + } + + public static boolean isValidTransition(TaskStatus from, TaskStatus to) { + // Allow same state transitions during recovery + if (from == to && from == TaskStatus.RUNNING) { + return true; + } + return Arrays.stream(values()) + .filter(t -> t.endState == to) + .anyMatch(t -> t.validStartStates.contains(from)); + } + } + + /** + * Updates task state with validation and state-specific updates + */ + public void updateTaskState(ScheduledTask task, TaskStatus newStatus, String error, String nodeId) { + TaskStatus currentStatus = task.getStatus(); + validateStateTransition(currentStatus, newStatus); + + task.setStatus(newStatus); + if (error != null) { + task.setLastError(error); + } + + updateStateSpecificFields(task, newStatus, nodeId); + + LOGGER.debug("Task {} state changed from {} to {}", task.getItemId(), currentStatus, newStatus); + } + + /** + * Validates a state transition + */ + private void validateStateTransition(TaskStatus currentStatus, TaskStatus newStatus) { + if (currentStatus == TaskStatus.CANCELLED && newStatus == TaskStatus.CRASHED) { + throw new IllegalStateException( + String.format("Cannot recover a cancelled task: Invalid state transition from %s to %s", + currentStatus, newStatus)); + } + + if (!TaskTransition.isValidTransition(currentStatus, newStatus)) { + throw new IllegalStateException( + String.format("Invalid state transition from %s to %s", + currentStatus, newStatus)); + } + } + + /** + * Updates state-specific fields based on the new status + */ + private void updateStateSpecificFields(ScheduledTask task, TaskStatus newStatus, String nodeId) { + switch (newStatus) { + case COMPLETED: + case FAILED: + clearTaskExecution(task); + task.setLastExecutionDate(new Date()); + break; + + case CRASHED: + preserveCrashState(task, nodeId); + break; + + case WAITING: + clearLockInfo(task); + break; + + case RUNNING: + updateRunningState(task, nodeId); + break; + } + } + + private void clearTaskExecution(ScheduledTask task) { + task.setLockOwner(null); + task.setLockDate(null); + task.setWaitingForTaskType(null); + task.setCurrentStep(null); + } + + private void preserveCrashState(ScheduledTask task, String nodeId) { + task.setCurrentStep("CRASHED"); + Map details = getOrCreateStatusDetails(task); + details.put("crashTime", new Date()); + details.put("crashedNode", task.getLockOwner()); + } + + private void clearLockInfo(ScheduledTask task) { + task.setLockOwner(null); + task.setLockDate(null); + } + + private void updateRunningState(ScheduledTask task, String nodeId) { + Map details = getOrCreateStatusDetails(task); + details.put("startTime", new Date()); + details.put("executingNode", nodeId); + } + + private Map getOrCreateStatusDetails(ScheduledTask task) { + Map details = task.getStatusDetails(); + if (details == null) { + details = new HashMap<>(); + task.setStatusDetails(details); + } + return details; + } + + /** + * Checks if a task can be rescheduled based on its dependencies + */ + public boolean canRescheduleTask(ScheduledTask task, Map dependencies) { + if (task.getWaitingOnTasks() == null || task.getWaitingOnTasks().isEmpty()) { + return true; + } + + for (String dependencyId : task.getWaitingOnTasks()) { + ScheduledTask dependency = dependencies.get(dependencyId); + if (dependency != null && dependency.getStatus() != TaskStatus.COMPLETED) { + return false; + } + } + return true; + } + + /** + * Resets a task's waiting state and marks it as scheduled + */ + public void resetTaskToScheduled(ScheduledTask task) { + task.setStatus(TaskStatus.SCHEDULED); + task.setWaitingOnTasks(null); + task.setWaitingForTaskType(null); + } + + /** + * Validates task configuration + */ + public void validateTask(ScheduledTask task, Map existingTasks) { + if (task.getTaskType() == null || task.getTaskType().trim().isEmpty()) { + throw new IllegalArgumentException("Task type cannot be null or empty"); + } + + if (task.getPeriod() < 0) { + throw new IllegalArgumentException("Period cannot be negative"); + } + + if (task.getTimeUnit() == null && (task.getPeriod() > 0 || task.getInitialDelay() > 0)) { + throw new IllegalArgumentException("TimeUnit cannot be null for periodic or delayed tasks"); + } + + if (task.getPeriod() > 0 && task.isOneShot()) { + throw new IllegalArgumentException("One-shot tasks cannot have a period"); + } + + validateDependencies(task, existingTasks); + + if (task.getMaxRetries() < 0) { + throw new IllegalArgumentException("Max retries cannot be negative"); + } + + if (task.getRetryDelay() < 0) { + throw new IllegalArgumentException("Retry delay cannot be negative"); + } + } + + private void validateDependencies(ScheduledTask task, Map existingTasks) { + if (task.getDependsOn() != null) { + for (String dependencyId : task.getDependsOn()) { + if (dependencyId == null || dependencyId.trim().isEmpty()) { + throw new IllegalArgumentException("Task dependency ID cannot be null or empty"); + } + if (!existingTasks.containsKey(dependencyId)) { + throw new IllegalArgumentException("Dependent task not found: " + dependencyId); + } + } + } + } + + /** + * Calculates the next execution time for a task + * @param task The task to calculate next execution for + * @param isRetry Whether this calculation is for a retry attempt + */ + public void calculateNextExecutionTime(ScheduledTask task, boolean isRetry) { + long now = System.currentTimeMillis(); + + // Handle retry case first + if (isRetry) { + long nextExecutionTime = now + task.getTimeUnit().toMillis(task.getRetryDelay()); + task.setNextScheduledExecution(new Date(nextExecutionTime)); + return; + } + + // Handle one-shot tasks + if (task.isOneShot()) { + if (task.getLastExecutionDate() == null) { + // For first execution + if (task.getInitialDelay() > 0) { + if (task.getCreationDate() == null) { + task.setCreationDate(new Date(now)); + } + long nextExecutionTime = task.getCreationDate().getTime() + + task.getTimeUnit().toMillis(task.getInitialDelay()); + task.setNextScheduledExecution(new Date(nextExecutionTime)); + } else { + // Execute immediately + task.setNextScheduledExecution(new Date(now)); + } + } else { + // One-shot task already executed, clear next execution + task.setNextScheduledExecution(null); + task.setEnabled(false); + } + return; + } + + // Handle periodic tasks + if (task.getPeriod() > 0) { + if (task.getLastExecutionDate() == null) { + // First execution of periodic task + if (task.getInitialDelay() > 0) { + if (task.getCreationDate() == null) { + task.setCreationDate(new Date(now)); + } + long nextExecutionTime = task.getCreationDate().getTime() + + task.getTimeUnit().toMillis(task.getInitialDelay()); + task.setNextScheduledExecution(new Date(nextExecutionTime)); + } else { + // Execute immediately + task.setNextScheduledExecution(new Date(now)); + } + } else { + // Subsequent executions + if (task.isFixedRate()) { + // For fixed-rate, calculate from last scheduled time + long lastScheduledTime = task.getNextScheduledExecution() != null ? + task.getNextScheduledExecution().getTime() : + task.getLastExecutionDate().getTime(); + long nextExecutionTime = lastScheduledTime + task.getTimeUnit().toMillis(task.getPeriod()); + + // If we're behind schedule, move to the next interval + while (nextExecutionTime <= now) { + nextExecutionTime += task.getTimeUnit().toMillis(task.getPeriod()); + } + task.setNextScheduledExecution(new Date(nextExecutionTime)); + } else { + // For fixed-delay, calculate from completion time + long nextExecutionTime = now + task.getTimeUnit().toMillis(task.getPeriod()); + task.setNextScheduledExecution(new Date(nextExecutionTime)); + } + } + } + } + + /** + * Calculates the next execution time for a task (non-retry case) + * @param task The task to calculate next execution for + */ + public void calculateNextExecutionTime(ScheduledTask task) { + calculateNextExecutionTime(task, false); + } +} diff --git a/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskValidationManager.java b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskValidationManager.java new file mode 100644 index 0000000000..c8c8f51574 --- /dev/null +++ b/services/src/main/java/org/apache/unomi/services/impl/scheduler/TaskValidationManager.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.services.impl.scheduler; + +import org.apache.unomi.api.tasks.ScheduledTask; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Manages task validation, including configuration validation, + * dependency validation, and state transition validation. + */ +public class TaskValidationManager { + private static final Logger LOGGER = LoggerFactory.getLogger(TaskValidationManager.class); + + /** + * Validates task configuration and dependencies + */ + public void validateTask(ScheduledTask task, Map existingTasks) { + validateBasicConfiguration(task); + validateSchedulingConfiguration(task); + validateDependencies(task, existingTasks); + validateRetryConfiguration(task); + validateExecutionConfiguration(task); + } + + private void validateBasicConfiguration(ScheduledTask task) { + if (task.getTaskType() == null || task.getTaskType().trim().isEmpty()) { + throw new IllegalArgumentException("Task type cannot be null or empty"); + } + + if (task.getItemId() == null || task.getItemId().trim().isEmpty()) { + throw new IllegalArgumentException("Task ID cannot be null or empty"); + } + } + + private void validateSchedulingConfiguration(ScheduledTask task) { + if (task.getPeriod() < 0) { + throw new IllegalArgumentException("Period cannot be negative"); + } + + if (task.getInitialDelay() < 0) { + throw new IllegalArgumentException("Initial delay cannot be negative"); + } + + if (task.getTimeUnit() == null && (task.getPeriod() > 0 || task.getInitialDelay() > 0)) { + throw new IllegalArgumentException("TimeUnit cannot be null for periodic or delayed tasks"); + } + + if (task.getPeriod() > 0 && task.isOneShot()) { + throw new IllegalArgumentException("One-shot tasks cannot have a period"); + } + } + + private void validateDependencies(ScheduledTask task, Map existingTasks) { + if (task.getDependsOn() != null) { + for (String dependencyId : task.getDependsOn()) { + validateDependency(dependencyId, existingTasks); + } + validateDependencyCycles(task, existingTasks); + } + } + + private void validateDependency(String dependencyId, Map existingTasks) { + if (dependencyId == null || dependencyId.trim().isEmpty()) { + throw new IllegalArgumentException("Task dependency ID cannot be null or empty"); + } + if (!existingTasks.containsKey(dependencyId)) { + throw new IllegalArgumentException("Dependent task not found: " + dependencyId); + } + } + + private void validateDependencyCycles(ScheduledTask task, Map existingTasks) { + Set visited = new HashSet<>(); + Set recursionStack = new HashSet<>(); + detectCycle(task.getItemId(), existingTasks, visited, recursionStack); + } + + private void detectCycle(String taskId, Map existingTasks, + Set visited, Set recursionStack) { + if (recursionStack.contains(taskId)) { + throw new IllegalArgumentException("Circular dependency detected involving task: " + taskId); + } + + if (!visited.contains(taskId)) { + visited.add(taskId); + recursionStack.add(taskId); + + ScheduledTask task = existingTasks.get(taskId); + if (task != null && task.getDependsOn() != null) { + for (String dependencyId : task.getDependsOn()) { + detectCycle(dependencyId, existingTasks, visited, recursionStack); + } + } + + recursionStack.remove(taskId); + } + } + + void validateRetryConfiguration(ScheduledTask task) { + if (task.getMaxRetries() < 0) { + throw new IllegalArgumentException("Max retries cannot be negative"); + } + + if (task.getRetryDelay() < 0) { + throw new IllegalArgumentException("Retry delay cannot be negative"); + } + } + + private void validateExecutionConfiguration(ScheduledTask task) { + if (!task.isAllowParallelExecution() && task.isRunOnAllNodes()) { + throw new IllegalArgumentException( + "Task cannot be configured to run on all nodes while disallowing parallel execution: " + + task.getItemId()); + } + + if (task.isOneShot() && task.isRunOnAllNodes()) { + throw new IllegalArgumentException( + "One-shot tasks cannot be configured to run on all nodes: " + task.getItemId()); + } + } + + /** + * Validates a state transition + */ + public void validateStateTransition(ScheduledTask task, ScheduledTask.TaskStatus newStatus) { + ScheduledTask.TaskStatus currentStatus = task.getStatus(); + if (!isValidTransition(currentStatus, newStatus)) { + throw new IllegalStateException( + String.format("Invalid state transition from %s to %s for task %s", + currentStatus, newStatus, task.getItemId())); + } + } + + private boolean isValidTransition(ScheduledTask.TaskStatus from, ScheduledTask.TaskStatus to) { + switch (to) { + case SCHEDULED: + return from == ScheduledTask.TaskStatus.WAITING || + from == ScheduledTask.TaskStatus.CRASHED || + from == ScheduledTask.TaskStatus.FAILED; + case RUNNING: + return from == ScheduledTask.TaskStatus.SCHEDULED || + from == ScheduledTask.TaskStatus.CRASHED || + from == ScheduledTask.TaskStatus.WAITING; + case COMPLETED: + case FAILED: + case CANCELLED: + return from == ScheduledTask.TaskStatus.RUNNING; + case CRASHED: + return from == ScheduledTask.TaskStatus.RUNNING; + case WAITING: + return from == ScheduledTask.TaskStatus.SCHEDULED || + from == ScheduledTask.TaskStatus.RUNNING; + default: + return false; + } + } + + /** + * Validates task execution prerequisites + */ + public void validateExecutionPrerequisites(ScheduledTask task, String nodeId) { + if (task.getStatus() != ScheduledTask.TaskStatus.SCHEDULED && + task.getStatus() != ScheduledTask.TaskStatus.CRASHED) { + throw new IllegalStateException( + "Task must be in SCHEDULED or CRASHED state to execute, current state: " + + task.getStatus()); + } + + if (!task.isEnabled()) { + throw new IllegalStateException("Cannot execute disabled task: " + task.getItemId()); + } + + // Validate node-specific execution + if (!task.isRunOnAllNodes() && task.getLockOwner() != null && + !task.getLockOwner().equals(nodeId)) { + throw new IllegalStateException( + String.format("Task %s can only be executed on its assigned node %s, current node: %s", + task.getItemId(), task.getLockOwner(), nodeId)); + } + } +} diff --git a/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml b/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml index bf64a77d72..4e23f5cef3 100644 --- a/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml +++ b/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml @@ -45,7 +45,12 @@ - + + + + + + @@ -75,18 +80,112 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + org.apache.unomi.api.services.SchedulerService + + diff --git a/services/src/main/resources/org.apache.unomi.services.cfg b/services/src/main/resources/org.apache.unomi.services.cfg index 818b9ca787..1b1996b706 100644 --- a/services/src/main/resources/org.apache.unomi.services.cfg +++ b/services/src/main/resources/org.apache.unomi.services.cfg @@ -85,3 +85,12 @@ rules.optimizationActivated=${org.apache.unomi.rules.optimizationActivated:-true # The number of threads to compose the pool size of the scheduler. scheduler.thread.poolSize=${org.apache.unomi.scheduler.thread.poolSize:-5} + +# The node id to use for the scheduler. +scheduler.nodeId=${org.apache.unomi.scheduler.nodeId:-test-scheduler-node} + +# The lock timeout to use for the scheduler. +scheduler.lockTimeout=${org.apache.unomi.scheduler.lockTimeout:-10000} + +# Whether to enable the purge task for the scheduler. +scheduler.purgeTaskEnabled=${org.apache.unomi.scheduler.purgeTaskEnabled:-true} From 957f0259e3c65668a94e34bea2592c49da660452 Mon Sep 17 00:00:00 2001 From: Serge Huber Date: Mon, 1 Sep 2025 14:44:00 +0200 Subject: [PATCH 2/3] UNOMI-878: Enhanced Cluster-Aware Task Scheduling Service with Improved Developer Experience and Persistence Integration --- .../apache/unomi/api/ExecutionContext.java | 98 +++++++ .../api/services/ExecutionContextManager.java | 78 +++++ .../services/GeonamesServiceImpl.java | 266 +++++++++++------- .../impl/GroovyActionsServiceImpl.java | 46 +-- .../unomi/schema/impl/SchemaServiceImpl.java | 44 ++- .../OSGI-INF/blueprint/blueprint.xml | 4 +- .../MergeProfilesOnPropertyAction.java | 72 +++-- .../impl/cluster/ClusterServiceImpl.java | 20 +- .../definitions/DefinitionsServiceImpl.java | 26 +- .../impl/profiles/ProfileServiceImpl.java | 38 ++- .../services/impl/rules/RulesServiceImpl.java | 32 ++- .../services/impl/scope/ScopeServiceImpl.java | 18 +- .../impl/segments/SegmentServiceImpl.java | 26 +- .../OSGI-INF/blueprint/blueprint.xml | 2 - .../commands/scheduler/CancelTaskCommand.java | 42 +++ .../commands/scheduler/ListTasksCommand.java | 135 +++++++++ .../commands/scheduler/PurgeTasksCommand.java | 93 ++++++ .../commands/scheduler/RetryTaskCommand.java | 47 ++++ .../scheduler/SetExecutorNodeCommand.java | 54 ++++ .../commands/scheduler/ShowTaskCommand.java | 99 +++++++ 20 files changed, 1020 insertions(+), 220 deletions(-) create mode 100644 api/src/main/java/org/apache/unomi/api/ExecutionContext.java create mode 100644 api/src/main/java/org/apache/unomi/api/services/ExecutionContextManager.java create mode 100644 tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/CancelTaskCommand.java create mode 100644 tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/ListTasksCommand.java create mode 100644 tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/PurgeTasksCommand.java create mode 100644 tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/RetryTaskCommand.java create mode 100644 tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/SetExecutorNodeCommand.java create mode 100644 tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/ShowTaskCommand.java diff --git a/api/src/main/java/org/apache/unomi/api/ExecutionContext.java b/api/src/main/java/org/apache/unomi/api/ExecutionContext.java new file mode 100644 index 0000000000..1fcf5a7bab --- /dev/null +++ b/api/src/main/java/org/apache/unomi/api/ExecutionContext.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.api; + +import java.util.HashSet; +import java.util.Set; +import java.util.Stack; + +/** + * Represents the execution context for operations in Unomi, including security and tenant information. + */ +public class ExecutionContext { + public static final String SYSTEM_TENANT = "system"; + + private String tenantId; + private Set roles = new HashSet<>(); + private Set permissions = new HashSet<>(); + private Stack tenantStack = new Stack<>(); + private boolean isSystem = false; + + public ExecutionContext(String tenantId, Set roles, Set permissions) { + this.tenantId = tenantId; + if (tenantId != null && tenantId.equals(SYSTEM_TENANT)) { + this.isSystem = true; + } + if (roles != null) { + this.roles.addAll(roles); + } + if (permissions != null) { + this.permissions.addAll(permissions); + } + } + + public static ExecutionContext systemContext() { + ExecutionContext context = new ExecutionContext(SYSTEM_TENANT, null, null); + context.isSystem = true; + return context; + } + + public String getTenantId() { + return tenantId; + } + + public Set getRoles() { + return new HashSet<>(roles); + } + + public Set getPermissions() { + return new HashSet<>(permissions); + } + + public boolean isSystem() { + return isSystem; + } + + public void setTenant(String tenantId) { + tenantStack.push(this.tenantId); + this.tenantId = tenantId; + } + + public void restorePreviousTenant() { + if (!tenantStack.isEmpty()) { + this.tenantId = tenantStack.pop(); + } + } + + public void validateAccess(String operation) { + if (isSystem) { + return; + } + + if (!hasPermission(operation)) { + throw new SecurityException("Access denied: Missing permission for operation " + operation + " for tenant " + tenantId + " and roles " + roles); + } + } + + public boolean hasPermission(String permission) { + return isSystem || permissions.contains(permission); + } + + public boolean hasRole(String role) { + return isSystem || roles.contains(role); + } +} \ No newline at end of file diff --git a/api/src/main/java/org/apache/unomi/api/services/ExecutionContextManager.java b/api/src/main/java/org/apache/unomi/api/services/ExecutionContextManager.java new file mode 100644 index 0000000000..da1ab18a03 --- /dev/null +++ b/api/src/main/java/org/apache/unomi/api/services/ExecutionContextManager.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.api.services; + +import org.apache.unomi.api.ExecutionContext; + +import java.util.function.Supplier; + +/** + * Service interface for managing execution contexts in Unomi. + */ +public interface ExecutionContextManager { + + /** + * Gets the current execution context. + * @return the current execution context + */ + ExecutionContext getCurrentContext(); + + /** + * Sets the current execution context. + * @param context the context to set as current + */ + void setCurrentContext(ExecutionContext context); + + /** + * Executes an operation as the system user. + * @param operation the operation to execute + * @param the return type of the operation + * @return the result of the operation + */ + T executeAsSystem(Supplier operation); + + /** + * Executes an operation as the system user without return value. + * @param operation the operation to execute + */ + void executeAsSystem(Runnable operation); + + /** + * Executes an operation as a specific tenant. + * This method creates a tenant context, executes the operation, and ensures proper cleanup. + * @param tenantId the ID of the tenant to execute as + * @param operation the operation to execute + * @param the return type of the operation + * @return the result of the operation + */ + T executeAsTenant(String tenantId, Supplier operation); + + /** + * Executes an operation as a specific tenant without return value. + * This method creates a tenant context, executes the operation, and ensures proper cleanup. + * @param tenantId the ID of the tenant to execute as + * @param operation the operation to execute + */ + void executeAsTenant(String tenantId, Runnable operation); + + /** + * Creates a new execution context for the given tenant. + * @param tenantId the tenant ID + * @return the created execution context + */ + ExecutionContext createContext(String tenantId); +} diff --git a/extensions/geonames/services/src/main/java/org/apache/unomi/geonames/services/GeonamesServiceImpl.java b/extensions/geonames/services/src/main/java/org/apache/unomi/geonames/services/GeonamesServiceImpl.java index a197250359..f84c19415d 100644 --- a/extensions/geonames/services/src/main/java/org/apache/unomi/geonames/services/GeonamesServiceImpl.java +++ b/extensions/geonames/services/src/main/java/org/apache/unomi/geonames/services/GeonamesServiceImpl.java @@ -17,12 +17,15 @@ package org.apache.unomi.geonames.services; - import org.apache.commons.lang3.StringUtils; import org.apache.unomi.api.PartialList; import org.apache.unomi.api.conditions.Condition; import org.apache.unomi.api.services.DefinitionsService; +import org.apache.unomi.api.services.ExecutionContextManager; import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.TaskExecutor; +import org.apache.unomi.api.tasks.TaskExecutor.TaskStatusCallback; +import org.apache.unomi.api.tasks.ScheduledTask; import org.apache.unomi.persistence.spi.PersistenceService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,6 +46,7 @@ public class GeonamesServiceImpl implements GeonamesService { private DefinitionsService definitionsService; private PersistenceService persistenceService; private SchedulerService schedulerService; + private ExecutionContextManager contextManager; private String pathToGeonamesDatabase; private Boolean forceDbImport; @@ -64,6 +68,10 @@ public void setSchedulerService(SchedulerService schedulerService) { this.schedulerService = schedulerService; } + public void setContextManager(ExecutionContextManager contextManager) { + this.contextManager = contextManager; + } + public void setPathToGeonamesDatabase(String pathToGeonamesDatabase) { this.pathToGeonamesDatabase = pathToGeonamesDatabase; } @@ -79,47 +87,99 @@ public void start() { public void stop() { } - public void importDatabase() { - if (!persistenceService.createIndex(GeonameEntry.ITEM_TYPE)) { - if (forceDbImport) { - persistenceService.removeIndex(GeonameEntry.ITEM_TYPE); - persistenceService.createIndex(GeonameEntry.ITEM_TYPE); - LOGGER.info("Geonames index removed and recreated"); - } else if (persistenceService.getAllItemsCount(GeonameEntry.ITEM_TYPE) > 0) { - return; - } - } else { - LOGGER.info("Geonames index created"); + private static class GeonamesImportTaskExecutor implements TaskExecutor { + private final GeonamesServiceImpl service; + private final File databaseFile; + + public GeonamesImportTaskExecutor(GeonamesServiceImpl service, File databaseFile) { + this.service = service; + this.databaseFile = databaseFile; } - if (pathToGeonamesDatabase == null) { - LOGGER.info("No geonames DB provided"); - return; + @Override + public String getTaskType() { + return "geonames-import"; } - final File f = new File(pathToGeonamesDatabase); - if (f.exists()) { - schedulerService.getSharedScheduleExecutorService().schedule(new TimerTask() { - @Override - public void run() { - importGeoNameDatabase(f); + + @Override + public void execute(ScheduledTask task, TaskStatusCallback statusCallback) throws Exception { + service.contextManager.executeAsSystem(() -> { + try { + service.importGeoNameDatabase(databaseFile); + statusCallback.complete(); + } catch (Exception e) { + LOGGER.error("Error importing geoname database", e); + statusCallback.fail(e.getMessage()); } - }, refreshDbInterval, TimeUnit.MILLISECONDS); + return null; + }); + } + } + + private static class GeonamesImportRetryTaskExecutor implements TaskExecutor { + private final GeonamesServiceImpl service; + private final File databaseFile; + + public GeonamesImportRetryTaskExecutor(GeonamesServiceImpl service, File databaseFile) { + this.service = service; + this.databaseFile = databaseFile; + } + + @Override + public String getTaskType() { + return "geonames-import-retry"; } + + @Override + public void execute(ScheduledTask task, TaskStatusCallback statusCallback) throws Exception { + service.importGeoNameDatabase(databaseFile); + statusCallback.complete(); + } + } + + public void importDatabase() { + contextManager.executeAsSystem(() -> { + if (!persistenceService.createIndex(GeonameEntry.ITEM_TYPE)) { + if (forceDbImport) { + persistenceService.removeIndex(GeonameEntry.ITEM_TYPE); + persistenceService.createIndex(GeonameEntry.ITEM_TYPE); + LOGGER.info("Geonames index removed and recreated"); + } else if (persistenceService.getAllItemsCount(GeonameEntry.ITEM_TYPE) > 0) { + return; + } + } else { + LOGGER.info("Geonames index created"); + } + + if (pathToGeonamesDatabase == null) { + LOGGER.info("No geonames DB provided"); + return; + } + final File f = new File(pathToGeonamesDatabase); + if (f.exists()) { + schedulerService.newTask("geonames-import") + .withInitialDelay(refreshDbInterval, TimeUnit.MILLISECONDS) + .asOneShot() + .withExecutor(new GeonamesImportTaskExecutor(this, f)) + .nonPersistent() + .schedule(); + } + }); } private void importGeoNameDatabase(final File f) { Map> typeMappings = persistenceService.getPropertiesMapping(GeonameEntry.ITEM_TYPE); if (typeMappings == null || typeMappings.size() == 0) { LOGGER.warn("Type mappings for type {} are not yet installed, delaying import until they are ready!", GeonameEntry.ITEM_TYPE); - schedulerService.getSharedScheduleExecutorService().schedule(new TimerTask() { - @Override - public void run() { - importGeoNameDatabase(f); - } - }, refreshDbInterval, TimeUnit.MILLISECONDS); + schedulerService.newTask("geonames-import-retry") + .withInitialDelay(refreshDbInterval, TimeUnit.MILLISECONDS) + .asOneShot() + .withExecutor(new GeonamesImportRetryTaskExecutor(this, f)) + .nonPersistent() + .schedule(); return; } else { - // let's check that the mappings are correct + // @TODO: let's check that the mappings are correct } try { @@ -229,48 +289,50 @@ private PartialList buildHierarchy(Condition andCondition, Conditi } public List reverseGeoCode(String lat, String lon) { - List l = new ArrayList(); - Condition andCondition = new Condition(); - andCondition.setConditionType(definitionsService.getConditionType("booleanCondition")); - andCondition.setParameter("operator", "and"); - andCondition.setParameter("subConditions", l); - - - Condition geoLocation = new Condition(); - geoLocation.setConditionType(definitionsService.getConditionType("geoLocationByPointSessionCondition")); - geoLocation.setParameter("type", "circle"); - geoLocation.setParameter("circleLatitude", Double.parseDouble(lat)); - geoLocation.setParameter("circleLongitude", Double.parseDouble(lon)); - geoLocation.setParameter("distance", GEOCODING_MAX_DISTANCE); - l.add(geoLocation); - - l.add(getPropertyCondition("featureCode", "propertyValues", CITIES_FEATURE_CODES, "in")); - - PartialList list = persistenceService.query(andCondition, "geo:location:" + lat + ":" + lon, GeonameEntry.class, 0, 1); - if (!list.getList().isEmpty()) { - return getHierarchy(list.getList().get(0)); - } - return Collections.emptyList(); + return contextManager.executeAsSystem(() -> { + List l = new ArrayList(); + Condition andCondition = new Condition(); + andCondition.setConditionType(definitionsService.getConditionType("booleanCondition")); + andCondition.setParameter("operator", "and"); + andCondition.setParameter("subConditions", l); + + Condition geoLocation = new Condition(); + geoLocation.setConditionType(definitionsService.getConditionType("geoLocationByPointSessionCondition")); + geoLocation.setParameter("type", "circle"); + geoLocation.setParameter("circleLatitude", Double.parseDouble(lat)); + geoLocation.setParameter("circleLongitude", Double.parseDouble(lon)); + geoLocation.setParameter("distance", GEOCODING_MAX_DISTANCE); + l.add(geoLocation); + + l.add(getPropertyCondition("featureCode", "propertyValues", CITIES_FEATURE_CODES, "in")); + + PartialList list = persistenceService.query(andCondition, "geo:location:" + lat + ":" + lon, GeonameEntry.class, 0, 1); + if (!list.getList().isEmpty()) { + return getHierarchy(list.getList().get(0)); + } + return Collections.emptyList(); + }); } - public PartialList getChildrenEntries(List items, int offset, int size) { - Condition andCondition = getItemsInChildrenQuery(items, CITIES_FEATURE_CODES); - Condition featureCodeCondition = ((List) andCondition.getParameter("subConditions")).get(0); - int level = items.size(); - - featureCodeCondition.setParameter("propertyValues", ORDERED_FEATURES.get(level)); - PartialList r = persistenceService.query(andCondition, null, GeonameEntry.class, offset, size); - while (r.size() == 0 && level < ORDERED_FEATURES.size() - 1) { - level++; + return contextManager.executeAsSystem(() -> { + Condition andCondition = getItemsInChildrenQuery(items, CITIES_FEATURE_CODES); + Condition featureCodeCondition = ((List) andCondition.getParameter("subConditions")).get(0); + int level = items.size(); + featureCodeCondition.setParameter("propertyValues", ORDERED_FEATURES.get(level)); - r = persistenceService.query(andCondition, null, GeonameEntry.class, offset, size); - } - return r; + PartialList r = persistenceService.query(andCondition, null, GeonameEntry.class, offset, size); + while (r.size() == 0 && level < ORDERED_FEATURES.size() - 1) { + level++; + featureCodeCondition.setParameter("propertyValues", ORDERED_FEATURES.get(level)); + r = persistenceService.query(andCondition, null, GeonameEntry.class, offset, size); + } + return r; + }); } public PartialList getChildrenCities(List items, int offset, int size) { - return persistenceService.query(getItemsInChildrenQuery(items, CITIES_FEATURE_CODES), null, GeonameEntry.class, offset, size); + return contextManager.executeAsSystem(() -> persistenceService.query(getItemsInChildrenQuery(items, CITIES_FEATURE_CODES), null, GeonameEntry.class, offset, size)); } private Condition getItemsInChildrenQuery(List items, List featureCodes) { @@ -296,45 +358,47 @@ private Condition getItemsInChildrenQuery(List items, List featu } public List getCapitalEntries(String itemId) { - GeonameEntry entry = persistenceService.load(itemId, GeonameEntry.class); - List featureCodes; - - List l = new ArrayList(); - Condition andCondition = new Condition(); - andCondition.setConditionType(definitionsService.getConditionType("booleanCondition")); - andCondition.setParameter("operator", "and"); - andCondition.setParameter("subConditions", l); + return contextManager.executeAsSystem(() -> { + GeonameEntry entry = persistenceService.load(itemId, GeonameEntry.class); + List featureCodes; + + List l = new ArrayList(); + Condition andCondition = new Condition(); + andCondition.setConditionType(definitionsService.getConditionType("booleanCondition")); + andCondition.setParameter("operator", "and"); + andCondition.setParameter("subConditions", l); + + l.add(getPropertyCondition("countryCode", "propertyValue", entry.getCountryCode(), "equals")); + + if (COUNTRY_FEATURE_CODES.contains(entry.getFeatureCode())) { + featureCodes = Arrays.asList("PPLC"); + } else if (ADM1_FEATURE_CODES.contains(entry.getFeatureCode())) { + featureCodes = Arrays.asList("PPLA", "PPLC"); + l.add(getPropertyCondition("admin1Code", "propertyValue", entry.getAdmin1Code(), "equals")); + } else if (ADM2_FEATURE_CODES.contains(entry.getFeatureCode())) { + featureCodes = Arrays.asList("PPLA2", "PPLA", "PPLC"); + l.add(getPropertyCondition("admin1Code", "propertyValue", entry.getAdmin1Code(), "equals")); + l.add(getPropertyCondition("admin2Code", "propertyValue", entry.getAdmin2Code(), "equals")); + } else { + return Collections.emptyList(); + } - l.add(getPropertyCondition("countryCode", "propertyValue", entry.getCountryCode(), "equals")); - - if (COUNTRY_FEATURE_CODES.contains(entry.getFeatureCode())) { - featureCodes = Arrays.asList("PPLC"); - } else if (ADM1_FEATURE_CODES.contains(entry.getFeatureCode())) { - featureCodes = Arrays.asList("PPLA", "PPLC"); - l.add(getPropertyCondition("admin1Code", "propertyValue", entry.getAdmin1Code(), "equals")); - } else if (ADM2_FEATURE_CODES.contains(entry.getFeatureCode())) { - featureCodes = Arrays.asList("PPLA2", "PPLA", "PPLC"); - l.add(getPropertyCondition("admin1Code", "propertyValue", entry.getAdmin1Code(), "equals")); - l.add(getPropertyCondition("admin2Code", "propertyValue", entry.getAdmin2Code(), "equals")); - } else { + Condition featureCodeCondition = new Condition(); + featureCodeCondition.setConditionType(definitionsService.getConditionType("sessionPropertyCondition")); + featureCodeCondition.setParameter("propertyName", "featureCode"); + featureCodeCondition.setParameter("propertyValues", featureCodes); + featureCodeCondition.setParameter("comparisonOperator", "in"); + l.add(featureCodeCondition); + List entries = persistenceService.query(andCondition, null, GeonameEntry.class); + if (entries.size() == 0) { + featureCodeCondition.setParameter("propertyValues", CITIES_FEATURE_CODES); + entries = persistenceService.query(andCondition, "population:desc", GeonameEntry.class, 0, 1).getList(); + } + if (entries.size() > 0) { + return getHierarchy(entries.get(0)); + } return Collections.emptyList(); - } - - Condition featureCodeCondition = new Condition(); - featureCodeCondition.setConditionType(definitionsService.getConditionType("sessionPropertyCondition")); - featureCodeCondition.setParameter("propertyName", "featureCode"); - featureCodeCondition.setParameter("propertyValues", featureCodes); - featureCodeCondition.setParameter("comparisonOperator", "in"); - l.add(featureCodeCondition); - List entries = persistenceService.query(andCondition, null, GeonameEntry.class); - if (entries.size() == 0) { - featureCodeCondition.setParameter("propertyValues", CITIES_FEATURE_CODES); - entries = persistenceService.query(andCondition, "population:desc", GeonameEntry.class, 0, 1).getList(); - } - if (entries.size() > 0) { - return getHierarchy(entries.get(0)); - } - return Collections.emptyList(); + }); } private Condition getPropertyCondition(String name, String propertyValueField, Object value, String operator) { diff --git a/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java b/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java index 3ad70b69b5..fee5f3a895 100644 --- a/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java +++ b/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java @@ -36,7 +36,10 @@ import org.codehaus.groovy.control.customizers.ImportCustomizer; import org.osgi.framework.BundleContext; import org.osgi.framework.wiring.BundleWiring; -import org.osgi.service.component.annotations.*; +import org.osgi.service.component.annotations.Activate; +import org.osgi.service.component.annotations.Component; +import org.osgi.service.component.annotations.Deactivate; +import org.osgi.service.component.annotations.Reference; import org.osgi.service.metatype.annotations.Designate; import org.osgi.service.metatype.annotations.ObjectClassDefinition; import org.slf4j.Logger; @@ -46,12 +49,10 @@ import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.HashSet; -import java.util.Set; - import java.util.Map; +import java.util.Set; import java.util.TimerTask; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -74,7 +75,6 @@ public class GroovyActionsServiceImpl implements GroovyActionsService { private BundleContext bundleContext; private GroovyScriptEngine groovyScriptEngine; private CompilerConfiguration compilerConfiguration; - private ScheduledFuture scheduledFuture; private final Object compilationLock = new Object(); private GroovyShell compilationShell; @@ -88,6 +88,7 @@ public class GroovyActionsServiceImpl implements GroovyActionsService { private DefinitionsService definitionsService; private PersistenceService persistenceService; private SchedulerService schedulerService; + private String refreshGroovyActionsTaskId; private GroovyActionsServiceConfig config; @Reference @@ -103,9 +104,12 @@ public void setPersistenceService(PersistenceService persistenceService) { @Reference public void setSchedulerService(SchedulerService schedulerService) { this.schedulerService = schedulerService; - } - + if (schedulerService != null) { + LOGGER.info("SchedulerService was set after GroovyActionsService initialization, initializing scheduled tasks now"); + initializeTimers(); + } + } @Activate public void start(GroovyActionsServiceConfig config, BundleContext bundleContext) { @@ -130,15 +134,19 @@ public void start(GroovyActionsServiceConfig config, BundleContext bundleContext // PRE-COMPILE ALL SCRIPTS AT STARTUP (no on-demand compilation) preloadAllScripts(); - initializeTimers(); + if (schedulerService != null) { + initializeTimers(); + } else { + LOGGER.warn("SchedulerService not available during GroovyActionsService initialization. Scheduled tasks will not be registered. They will be registered when SchedulerService becomes available."); + } LOGGER.info("Groovy action service initialized with {} scripts", scriptMetadataCache.size()); } @Deactivate public void onDestroy() { LOGGER.debug("onDestroy Method called"); - if (scheduledFuture != null && !scheduledFuture.isCancelled()) { - scheduledFuture.cancel(true); + if (schedulerService != null && refreshGroovyActionsTaskId != null) { + schedulerService.cancelTask(refreshGroovyActionsTaskId); } } @@ -342,7 +350,7 @@ public void remove(String actionName) { ScriptMetadata removedMetadata = scriptMetadataCache.remove(actionName); persistenceService.remove(actionName, GroovyAction.class); - + // Clean up error tracking to prevent memory leak loggedRefreshErrors.remove(actionName); @@ -445,29 +453,29 @@ private void refreshGroovyActions() { } errorCount++; - + // Prevent log spam for repeated compilation errors during refresh String errorMessage = e.getMessage(); Set scriptErrors = loggedRefreshErrors.get(actionName); - + if (scriptErrors == null || !scriptErrors.contains(errorMessage)) { newErrorCount++; LOGGER.error("Failed to refresh script: {}", actionName, e); - + // Prevent memory leak by limiting tracked errors before adding new entries if (scriptErrors == null && loggedRefreshErrors.size() >= MAX_LOGGED_ERRORS) { // Remove one random entry to make space (simple eviction) String firstKey = loggedRefreshErrors.keySet().iterator().next(); loggedRefreshErrors.remove(firstKey); } - + // Now safely add the error if (scriptErrors == null) { scriptErrors = ConcurrentHashMap.newKeySet(); loggedRefreshErrors.put(actionName, scriptErrors); } scriptErrors.add(errorMessage); - + LOGGER.warn("Keeping existing version of script {} due to compilation error", actionName); } @@ -502,7 +510,9 @@ public void run() { refreshGroovyActions(); } }; - scheduledFuture = schedulerService.getScheduleExecutorService().scheduleWithFixedDelay(task, 0, config.services_groovy_actions_refresh_interval(), - TimeUnit.MILLISECONDS); + if (this.refreshGroovyActionsTaskId != null) { + schedulerService.cancelTask(this.refreshGroovyActionsTaskId); + } + this.refreshGroovyActionsTaskId = schedulerService.createRecurringTask("refreshGroovyActions", config.services_groovy_actions_refresh_interval(), TimeUnit.MILLISECONDS, task, false).getItemId(); } } diff --git a/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java b/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java index da7efeac28..f7e68b783d 100644 --- a/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java +++ b/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java @@ -27,7 +27,9 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.apache.unomi.api.Item; +import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.ScopeService; +import org.apache.unomi.api.tasks.ScheduledTask; import org.apache.unomi.persistence.spi.PersistenceService; import org.apache.unomi.schema.api.JsonSchemaWrapper; import org.apache.unomi.schema.api.SchemaService; @@ -47,10 +49,8 @@ public class SchemaServiceImpl implements SchemaService { private static final String URI = "https://json-schema.org/draft/2019-09/schema"; - private static final Logger LOGGER = LoggerFactory.getLogger(SchemaServiceImpl.class.getName()); private static final String TARGET_EVENTS = "events"; - private static final String GENERIC_ERROR_KEY = "error"; ObjectMapper objectMapper = new ObjectMapper(); @@ -67,18 +67,12 @@ public class SchemaServiceImpl implements SchemaService { * Available extensions indexed by key:schema URI to be extended, value: list of schema extension URIs */ private ConcurrentMap> extensions = new ConcurrentHashMap<>(); - private Integer jsonSchemaRefreshInterval = 1000; - private ScheduledFuture scheduledFuture; - private PersistenceService persistenceService; private ScopeService scopeService; - private JsonSchemaFactory jsonSchemaFactory; - - // TODO UNOMI-572: when fixing UNOMI-572 please remove the usage of the custom ScheduledExecutorService and re-introduce the Unomi Scheduler Service - private ScheduledExecutorService scheduler; - //private SchedulerService schedulerService; + private SchedulerService schedulerService; + private String refreshJSONSchemasTaskId; @Override public boolean isValid(String data, String schemaId) { @@ -378,14 +372,22 @@ private void initTimers() { TimerTask task = new TimerTask() { @Override public void run() { - try { - refreshJSONSchemas(); - } catch (Exception e) { - LOGGER.error("Unexpected error while refreshing JSON Schemas", e); - } + try { + refreshJSONSchemas(); + } catch (Exception e) { + LOGGER.error("Unexpected error while refreshing JSON Schemas", e); + } } }; - scheduledFuture = scheduler.scheduleWithFixedDelay(task, 0, jsonSchemaRefreshInterval, TimeUnit.MILLISECONDS); + this.resetTimers(); + this.refreshJSONSchemasTaskId = schedulerService.createRecurringTask("refreshJSONSchemas", jsonSchemaRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + } + + private void resetTimers() { + if (this.refreshJSONSchemasTaskId != null) { + schedulerService.cancelTask(this.refreshJSONSchemasTaskId); + this.refreshJSONSchemasTaskId = null; + } } private void initJsonSchemaFactory() { @@ -414,17 +416,13 @@ private void initJsonSchemaFactory() { } public void init() { - scheduler = Executors.newSingleThreadScheduledExecutor(); - initJsonSchemaFactory(); - initTimers(); + this.initJsonSchemaFactory(); + this.initTimers(); LOGGER.info("Schema service initialized."); } public void destroy() { - scheduledFuture.cancel(true); - if (scheduler != null) { - scheduler.shutdown(); - } + this.resetTimers(); LOGGER.info("Schema service shutdown."); } diff --git a/extensions/json-schema/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml b/extensions/json-schema/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml index 9998ad9308..399f81944e 100644 --- a/extensions/json-schema/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml +++ b/extensions/json-schema/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml @@ -29,13 +29,13 @@ - + - + diff --git a/plugins/baseplugin/src/main/java/org/apache/unomi/plugins/baseplugin/actions/MergeProfilesOnPropertyAction.java b/plugins/baseplugin/src/main/java/org/apache/unomi/plugins/baseplugin/actions/MergeProfilesOnPropertyAction.java index a333490abb..e89c2e8f5b 100644 --- a/plugins/baseplugin/src/main/java/org/apache/unomi/plugins/baseplugin/actions/MergeProfilesOnPropertyAction.java +++ b/plugins/baseplugin/src/main/java/org/apache/unomi/plugins/baseplugin/actions/MergeProfilesOnPropertyAction.java @@ -26,6 +26,8 @@ import org.apache.unomi.api.actions.ActionExecutor; import org.apache.unomi.api.conditions.Condition; import org.apache.unomi.api.services.*; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.persistence.spi.PersistenceService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -154,27 +156,63 @@ private List getProfilesToBeMerge(String mergeProfilePropertyName, Stri } private void reassignPersistedBrowsingDatasAsync(boolean anonymousBrowsing, List mergedProfileIds, String masterProfileId) { - schedulerService.getSharedScheduleExecutorService().schedule(new TimerTask() { + // Register task executor for data reassignment + String taskType = "merge-profiles-reassign-data"; + + // Create a reusable executor that can handle the parameters + TaskExecutor mergeProfilesReassignDataExecutor = new TaskExecutor() { @Override - public void run() { - if (!anonymousBrowsing) { - Condition profileIdsCondition = new Condition(definitionsService.getConditionType("eventPropertyCondition")); - profileIdsCondition.setParameter("propertyName","profileId"); - profileIdsCondition.setParameter("comparisonOperator","in"); - profileIdsCondition.setParameter("propertyValues", mergedProfileIds); - - String[] scripts = new String[]{"updateProfileId"}; - Map[] scriptParams = new Map[]{Collections.singletonMap("profileId", masterProfileId)}; - Condition[] conditions = new Condition[]{profileIdsCondition}; - - persistenceService.updateWithQueryAndStoredScript(new Class[]{Session.class, Event.class}, scripts, scriptParams, conditions, false); - } else { - for (String mergedProfileId : mergedProfileIds) { - privacyService.anonymizeBrowsingData(mergedProfileId); + public String getTaskType() { + return taskType; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + Map parameters = task.getParameters(); + boolean isAnonymousBrowsing = (boolean) parameters.get("anonymousBrowsing"); + @SuppressWarnings("unchecked") + List profilesIds = (List) parameters.get("mergedProfileIds"); + String masterProfile = (String) parameters.get("masterProfileId"); + + if (!anonymousBrowsing) { + Condition profileIdsCondition = new Condition(definitionsService.getConditionType("eventPropertyCondition")); + profileIdsCondition.setParameter("propertyName","profileId"); + profileIdsCondition.setParameter("comparisonOperator","in"); + profileIdsCondition.setParameter("propertyValues", mergedProfileIds); + + String[] scripts = new String[]{"updateProfileId"}; + Map[] scriptParams = new Map[]{Collections.singletonMap("profileId", masterProfileId)}; + Condition[] conditions = new Condition[]{profileIdsCondition}; + + persistenceService.updateWithQueryAndStoredScript(new Class[]{Session.class, Event.class}, scripts, scriptParams, conditions, false); + } else { + for (String mergedProfileId : mergedProfileIds) { + privacyService.anonymizeBrowsingData(mergedProfileId); + } } + + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while reassigning profile data", e); + callback.fail(e.getMessage()); } } - }, 1000, TimeUnit.MILLISECONDS); + }; + + // Register the executor + schedulerService.registerTaskExecutor(mergeProfilesReassignDataExecutor); + + // Create a one-shot task for async data reassignment + schedulerService.newTask(taskType) + .withParameters(Map.of( + "anonymousBrowsing", anonymousBrowsing, + "mergedProfileIds", mergedProfileIds, + "masterProfileId", masterProfileId + )) + .withInitialDelay(1000, TimeUnit.MILLISECONDS) + .asOneShot() + .schedule(); } private void reassignCurrentBrowsingData(Event event, List existingMergedProfiles, boolean forceEventProfileAsMaster, String mergePropName, String mergePropValue) { diff --git a/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java index f741d8d5ff..9802426e75 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java @@ -58,8 +58,8 @@ public class ClusterServiceImpl implements ClusterService { private volatile List cachedClusterNodes = Collections.emptyList(); private BundleWatcher bundleWatcher; - private String updateSystemStatsTaskId; - private String cleanupStaleNodesTaskId; + private String clusterNodeStatisticsUpdateTaskId; + private String clusterStaleNodesCleanupTaskId; /** * Max time to wait for persistence service (in milliseconds) @@ -206,12 +206,10 @@ public void init() { * This method can be called later if schedulerService wasn't available during init. */ public void initializeScheduledTasks() { - /* Wait for PR UNOMI-878 to reactivate that code if (schedulerService == null) { LOGGER.error("Cannot initialize scheduled tasks: SchedulerService is not set"); return; } - */ // Schedule regular updates of the node statistics TimerTask statisticsTask = new TimerTask() { @@ -224,7 +222,7 @@ public void run() { } } }; - updateSystemStatsTaskId = schedulerService.createRecurringTask("clusterNodeStatisticsUpdate", nodeStatisticsUpdateFrequency, TimeUnit.MILLISECONDS, statisticsTask, false).getItemId(); + this.clusterNodeStatisticsUpdateTaskId = schedulerService.createRecurringTask("clusterNodeStatisticsUpdate", nodeStatisticsUpdateFrequency, TimeUnit.MILLISECONDS, statisticsTask, false).getItemId(); // Schedule cleanup of stale nodes TimerTask cleanupTask = new TimerTask() { @@ -237,7 +235,7 @@ public void run() { } } }; - cleanupStaleNodesTaskId = schedulerService.createRecurringTask("clusterStaleNodesCleanup", 60000, TimeUnit.MILLISECONDS, cleanupTask, false).getItemId(); + this.clusterStaleNodesCleanupTaskId = schedulerService.createRecurringTask("clusterStaleNodesCleanup", 60000, TimeUnit.MILLISECONDS, cleanupTask, false).getItemId(); LOGGER.info("Cluster service scheduled tasks initialized"); } @@ -247,11 +245,13 @@ public void destroy() { shutdownNow = true; // Cancel scheduled tasks - if (updateSystemStatsTaskId != null) { - schedulerService.cancelTask(updateSystemStatsTaskId); + if (schedulerService != null && clusterNodeStatisticsUpdateTaskId != null) { + schedulerService.cancelTask(clusterNodeStatisticsUpdateTaskId); + clusterStaleNodesCleanupTaskId = null; } - if (cleanupStaleNodesTaskId != null) { - schedulerService.cancelTask(cleanupStaleNodesTaskId); + if (schedulerService != null && clusterStaleNodesCleanupTaskId != null) { + schedulerService.cancelTask(clusterStaleNodesCleanupTaskId); + clusterStaleNodesCleanupTaskId = null; } // Remove node from persistence service diff --git a/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java index 8fa7e1e687..ff7babc2ca 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java @@ -26,9 +26,9 @@ import org.apache.unomi.api.services.DefinitionsService; import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.utils.ConditionBuilder; +import org.apache.unomi.api.utils.ParserHelper; import org.apache.unomi.persistence.spi.CustomObjectMapper; import org.apache.unomi.persistence.spi.PersistenceService; -import org.apache.unomi.api.utils.ParserHelper; import org.osgi.framework.Bundle; import org.osgi.framework.BundleContext; import org.osgi.framework.BundleEvent; @@ -38,17 +38,7 @@ import java.io.IOException; import java.net.URL; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TimerTask; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; @@ -70,6 +60,8 @@ public class DefinitionsServiceImpl implements DefinitionsService, SynchronousBu private ConditionBuilder conditionBuilder; private BundleContext bundleContext; + private String reloadTypesTaskId; + public DefinitionsServiceImpl() { } @@ -114,10 +106,17 @@ public void run() { reloadTypes(false); } }; - schedulerService.getScheduleExecutorService().scheduleAtFixedRate(task, 10000, definitionsRefreshInterval, TimeUnit.MILLISECONDS); + this.resetTypeReloads(); + this.reloadTypesTaskId = schedulerService.createRecurringTask("reloadTypes", definitionsRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); LOGGER.info("Scheduled task for condition type loading each 10s"); } + private void resetTypeReloads() { + if (this.reloadTypesTaskId != null) { + schedulerService.cancelTask(this.reloadTypesTaskId); + } + } + public void reloadTypes(boolean refresh) { try { if (refresh) { @@ -190,6 +189,7 @@ private void processBundleStop(BundleContext bundleContext) { } public void preDestroy() { + this.resetTypeReloads(); bundleContext.removeBundleListener(this); LOGGER.info("Definitions service shutdown."); } diff --git a/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java index 7dd5db65fb..78ca739595 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java @@ -183,14 +183,15 @@ private void updateListMap(Map> listMap, PropertyType private Integer purgeSessionExistTime = 0; private Integer purgeEventExistTime = 0; private Integer purgeProfileInterval = 0; - private TimerTask purgeTask = null; private long propertiesRefreshInterval = 10000; private PropertyTypes propertyTypes; - private TimerTask propertyTypeLoadTask = null; private boolean forceRefreshOnSave = false; + private String propertyTypeLoadTaskId; + private String purgeProfilesTaskId; + public ProfileServiceImpl() { LOGGER.info("Initializing profile service..."); } @@ -241,12 +242,8 @@ public void postConstruct() { } public void preDestroy() { - if (purgeTask != null) { - purgeTask.cancel(); - } - if (propertyTypeLoadTask != null) { - propertyTypeLoadTask.cancel(); - } + this.resetProfilesPurgeTask(); + this.resetPropertyTypeLoadTask(); bundleContext.removeBundleListener(this); LOGGER.info("Profile service shutdown."); } @@ -304,14 +301,21 @@ public void setPurgeEventExistTime(Integer purgeEventExistTime) { } private void schedulePropertyTypeLoad() { - propertyTypeLoadTask = new TimerTask() { + TimerTask task = new TimerTask() { @Override public void run() { reloadPropertyTypes(false); } }; - schedulerService.getScheduleExecutorService().scheduleAtFixedRate(propertyTypeLoadTask, 10000, propertiesRefreshInterval, TimeUnit.MILLISECONDS); - LOGGER.info("Scheduled task for property type loading each 10s"); + this.resetPropertyTypeLoadTask(); + this.propertyTypeLoadTaskId = schedulerService.createRecurringTask("propertyTypeLoad", propertiesRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + LOGGER.info("Scheduled task for property type loading each {}ms", propertiesRefreshInterval); + } + + private void resetPropertyTypeLoadTask() { + if (this.propertyTypeLoadTaskId != null) { + schedulerService.cancelTask(this.propertyTypeLoadTaskId); + } } public void reloadPropertyTypes(boolean refresh) { @@ -410,7 +414,7 @@ private void initializePurge() { LOGGER.info("Purge: Event items created since more than {} days, will be purged", purgeEventExistTime); } - purgeTask = new TimerTask() { + TimerTask task = new TimerTask() { @Override public void run() { try { @@ -429,8 +433,8 @@ public void run() { } } }; - - schedulerService.getScheduleExecutorService().scheduleAtFixedRate(purgeTask, 1, purgeProfileInterval, TimeUnit.DAYS); + this.resetProfilesPurgeTask(); + this.purgeProfilesTaskId = schedulerService.createRecurringTask("profilesPurge", purgeProfileInterval, TimeUnit.DAYS, task, false).getItemId(); LOGGER.info("Purge: purge scheduled with an interval of {} days", purgeProfileInterval); } else { @@ -438,6 +442,12 @@ public void run() { } } + private void resetProfilesPurgeTask() { + if (this.purgeProfilesTaskId != null) { + schedulerService.cancelTask(this.purgeProfilesTaskId); + this.purgeProfilesTaskId = null; + } + } public long getAllProfilesCount() { return persistenceService.getAllItemsCount(Profile.ITEM_TYPE); diff --git a/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java index fe3fe8fb02..71cc75eb57 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java @@ -71,6 +71,9 @@ public class RulesServiceImpl implements RulesService, EventListenerService, Syn private Map> rulesByEventType = new HashMap<>(); private Boolean optimizedRulesActivated = true; + private String refreshRulesTaskId; + private String syncRuleStatisticsTaskId; + public void setBundleContext(BundleContext bundleContext) { this.bundleContext = bundleContext; } @@ -133,11 +136,12 @@ public void postConstruct() { bundleContext.addBundleListener(this); - initializeTimers(); + this.initializeTimers(); LOGGER.info("Rule service initialized."); } public void preDestroy() { + this.resetTimers(); bundleContext.removeBundleListener(this); LOGGER.info("Rule service shutdown."); } @@ -488,25 +492,37 @@ public void removeRule(String ruleId) { } private void initializeTimers() { + this.resetTimers(); TimerTask task = new TimerTask() { @Override public void run() { refreshRules(); } }; - schedulerService.getScheduleExecutorService().scheduleWithFixedDelay(task, 0, rulesRefreshInterval, TimeUnit.MILLISECONDS); + this.refreshRulesTaskId = schedulerService.createRecurringTask("refreshRules", rulesRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); TimerTask statisticsTask = new TimerTask() { @Override public void run() { - try { - syncRuleStatistics(); - } catch (Throwable t) { - LOGGER.error("Error synching rule statistics between memory and persistence back-end", t); - } + try { + syncRuleStatistics(); + } catch (Throwable t) { + LOGGER.error("Error synching rule statistics between memory and persistence back-end", t); + } } }; - schedulerService.getScheduleExecutorService().scheduleWithFixedDelay(statisticsTask, 0, rulesStatisticsRefreshInterval, TimeUnit.MILLISECONDS); + this.syncRuleStatisticsTaskId = schedulerService.createRecurringTask("syncRuleStatistics", rulesStatisticsRefreshInterval, TimeUnit.MILLISECONDS, statisticsTask, false).getItemId(); + } + + private void resetTimers() { + if (refreshRulesTaskId != null) { + schedulerService.cancelTask(refreshRulesTaskId); + refreshRulesTaskId = null; + } + if (syncRuleStatisticsTaskId != null) { + schedulerService.cancelTask(syncRuleStatisticsTaskId); + syncRuleStatisticsTaskId = null; + } } public void bundleChanged(BundleEvent event) { diff --git a/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java index 701109d9ff..179392a332 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java @@ -27,7 +27,6 @@ import java.util.TimerTask; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -41,7 +40,7 @@ public class ScopeServiceImpl implements ScopeService { private ConcurrentMap scopes = new ConcurrentHashMap<>(); - private ScheduledFuture scheduledFuture; + private String refreshScopesTaskId; public void setPersistenceService(PersistenceService persistenceService) { this.persistenceService = persistenceService; @@ -56,11 +55,11 @@ public void setScopesRefreshInterval(Integer scopesRefreshInterval) { } public void postConstruct() { - initializeTimers(); + this.initializeTimers(); } public void preDestroy() { - scheduledFuture.cancel(true); + this.resetTimers(); } @Override @@ -90,8 +89,15 @@ public void run() { refreshScopes(); } }; - scheduledFuture = schedulerService.getScheduleExecutorService() - .scheduleWithFixedDelay(task, 0, scopesRefreshInterval, TimeUnit.MILLISECONDS); + this.resetTimers(); + this.refreshScopesTaskId = schedulerService.createRecurringTask("refreshScopes", scopesRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + } + + private void resetTimers() { + if (refreshScopesTaskId != null) { + schedulerService.cancelTask(refreshScopesTaskId); + refreshScopesTaskId = null; + } } private void refreshScopes() { diff --git a/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java index 1bc8730f45..453203bcd4 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java @@ -24,6 +24,7 @@ import org.apache.unomi.api.actions.Action; import org.apache.unomi.api.conditions.Condition; import org.apache.unomi.api.conditions.ConditionType; +import org.apache.unomi.api.exceptions.BadSegmentConditionException; import org.apache.unomi.api.query.Query; import org.apache.unomi.api.rules.Rule; import org.apache.unomi.api.segments.*; @@ -32,12 +33,11 @@ import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.SegmentService; import org.apache.unomi.api.utils.ConditionBuilder; +import org.apache.unomi.api.utils.ParserHelper; import org.apache.unomi.persistence.spi.CustomObjectMapper; import org.apache.unomi.persistence.spi.aggregate.TermsAggregate; import org.apache.unomi.services.impl.AbstractServiceImpl; import org.apache.unomi.services.impl.scheduler.SchedulerServiceImpl; -import org.apache.unomi.api.utils.ParserHelper; -import org.apache.unomi.api.exceptions.BadSegmentConditionException; import org.osgi.framework.Bundle; import org.osgi.framework.BundleContext; import org.osgi.framework.BundleEvent; @@ -83,6 +83,8 @@ public class SegmentServiceImpl extends AbstractServiceImpl implements SegmentSe private int maximumIdsQueryCount = 5000; private boolean pastEventsDisablePartitions = false; private int dailyDateExprEvaluationHourUtc = 5; + private String recalculatePastEventConditionsTaskId; + private String refreshSegmentAndScoringDefinitionsTaskId; public SegmentServiceImpl() { LOGGER.info("Initializing segment service..."); @@ -155,11 +157,12 @@ public void postConstruct() throws IOException { } } bundleContext.addBundleListener(this); - initializeTimer(); + this.initializeTimer(); LOGGER.info("Segment service initialized."); } public void preDestroy() { + this.resetTimers(); bundleContext.removeBundleListener(this); LOGGER.info("Segment service shutdown."); } @@ -1196,7 +1199,7 @@ public void bundleChanged(BundleEvent event) { } private void initializeTimer() { - + this.resetTimers(); TimerTask task = new TimerTask() { @Override public void run() { @@ -1214,7 +1217,7 @@ public void run() { long period = TimeUnit.DAYS.toSeconds(taskExecutionPeriod); LOGGER.info("daily recalculation job for segments and scoring that contains date relative conditions will run at fixed rate, " + "initialDelay={}, taskExecutionPeriod={} in seconds", initialDelay, period); - schedulerService.getScheduleExecutorService().scheduleAtFixedRate(task, initialDelay, period, TimeUnit.SECONDS); + this.recalculatePastEventConditionsTaskId = schedulerService.createRecurringTask("recalculatePastEventConditions", period, TimeUnit.SECONDS, task, false).getItemId(); task = new TimerTask() { @Override @@ -1227,7 +1230,18 @@ public void run() { } } }; - schedulerService.getScheduleExecutorService().scheduleAtFixedRate(task, 0, segmentRefreshInterval, TimeUnit.MILLISECONDS); + this.refreshSegmentAndScoringDefinitionsTaskId = schedulerService.createRecurringTask("refreshSegmentAndScoringDefinitions", segmentRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + } + + private void resetTimers() { + if (this.recalculatePastEventConditionsTaskId != null) { + schedulerService.cancelTask(this.recalculatePastEventConditionsTaskId); + this.recalculatePastEventConditionsTaskId = null; + } + if (this.refreshSegmentAndScoringDefinitionsTaskId != null) { + schedulerService.cancelTask(this.refreshSegmentAndScoringDefinitionsTaskId); + this.refreshSegmentAndScoringDefinitionsTaskId = null; + } } public void setTaskExecutionPeriod(long taskExecutionPeriod) { diff --git a/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml b/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml index 4e23f5cef3..7abef9ac42 100644 --- a/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml +++ b/services/src/main/resources/OSGI-INF/blueprint/blueprint.xml @@ -367,9 +367,7 @@ - diff --git a/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/CancelTaskCommand.java b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/CancelTaskCommand.java new file mode 100644 index 0000000000..2cff5ff5c7 --- /dev/null +++ b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/CancelTaskCommand.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.shell.commands.scheduler; + +import org.apache.karaf.shell.api.action.Action; +import org.apache.karaf.shell.api.action.Argument; +import org.apache.karaf.shell.api.action.Command; +import org.apache.karaf.shell.api.action.lifecycle.Reference; +import org.apache.karaf.shell.api.action.lifecycle.Service; +import org.apache.unomi.api.services.SchedulerService; + +@Command(scope = "unomi", name = "task-cancel", description = "Cancels a scheduled task") +@Service +public class CancelTaskCommand implements Action { + + @Reference + private SchedulerService schedulerService; + + @Argument(index = 0, name = "taskId", description = "The ID of the task to cancel", required = true) + private String taskId; + + @Override + public Object execute() throws Exception { + schedulerService.cancelTask(taskId); + System.out.println("Task " + taskId + " has been cancelled successfully."); + return null; + } +} diff --git a/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/ListTasksCommand.java b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/ListTasksCommand.java new file mode 100644 index 0000000000..c6c71a6e19 --- /dev/null +++ b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/ListTasksCommand.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.shell.commands.scheduler; + +import org.apache.karaf.shell.api.action.Action; +import org.apache.karaf.shell.api.action.Command; +import org.apache.karaf.shell.api.action.Option; +import org.apache.karaf.shell.api.action.lifecycle.Reference; +import org.apache.karaf.shell.api.action.lifecycle.Service; +import org.apache.karaf.shell.support.table.Col; +import org.apache.karaf.shell.support.table.ShellTable; +import org.apache.unomi.api.PartialList; +import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; + +import java.text.SimpleDateFormat; +import java.util.List; + +@Command(scope = "unomi", name = "task-list", description = "Lists scheduled tasks") +@Service +public class ListTasksCommand implements Action { + + @Reference + private SchedulerService schedulerService; + + @Option(name = "-s", aliases = "--status", description = "Filter by task status (SCHEDULED, RUNNING, COMPLETED, FAILED, CANCELLED, CRASHED)", required = false) + private String status; + + @Option(name = "-t", aliases = "--type", description = "Filter by task type", required = false) + private String type; + + @Option(name = "--limit", description = "Maximum number of tasks to display (default: 50)", required = false) + private int limit = 50; + + @Override + public Object execute() throws Exception { + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + ShellTable table = new ShellTable(); + + // Configure table columns + table.column(new Col("ID").maxSize(36)); + table.column(new Col("Type").maxSize(30)); + table.column(new Col("Status").maxSize(10)); + table.column(new Col("Next Run").maxSize(19)); + table.column(new Col("Last Run").maxSize(19)); + table.column(new Col("Failures").alignRight()); + table.column(new Col("Successes").alignRight()); + table.column(new Col("Total Exec").alignRight()); + table.column(new Col("Persistent").maxSize(10)); + + // Get tasks based on filters + List tasks; + if (status != null) { + try { + ScheduledTask.TaskStatus taskStatus = ScheduledTask.TaskStatus.valueOf(status.toUpperCase()); + // Get persistent tasks + PartialList filteredTasks = schedulerService.getTasksByStatus(taskStatus, 0, limit, null); + tasks = filteredTasks.getList(); + // Add memory tasks with matching status + List memoryTasks = schedulerService.getMemoryTasks(); + for (ScheduledTask task : memoryTasks) { + if (task.getStatus() == taskStatus) { + tasks.add(task); + } + } + } catch (IllegalArgumentException e) { + System.err.println("Invalid status: " + status); + return null; + } + } else if (type != null) { + // Get persistent tasks + PartialList filteredTasks = schedulerService.getTasksByType(type, 0, limit, null); + tasks = filteredTasks.getList(); + // Add memory tasks with matching type + List memoryTasks = schedulerService.getMemoryTasks(); + for (ScheduledTask task : memoryTasks) { + if (task.getTaskType().equals(type)) { + tasks.add(task); + } + } + } else { + // Get all tasks from both storage and memory + tasks = schedulerService.getAllTasks(); + if (tasks.size() > limit) { + tasks = tasks.subList(0, limit); + } + } + + // Add rows to table + for (ScheduledTask task : tasks) { + int totalExecutions = task.getSuccessCount() + task.getFailureCount(); + + table.addRow().addContent( + task.getItemId(), + task.getTaskType(), + task.getStatus(), + task.getNextScheduledExecution() != null ? dateFormat.format(task.getNextScheduledExecution()) : "-", + task.getLastExecutionDate() != null ? dateFormat.format(task.getLastExecutionDate()) : "-", + task.getFailureCount(), + task.getSuccessCount(), + totalExecutions, + task.isPersistent() ? "Storage" : "Memory" + ); + } + + table.print(System.out); + + if (tasks.isEmpty()) { + System.out.println("No tasks found."); + } else { + int persistentCount = (int) tasks.stream().filter(ScheduledTask::isPersistent).count(); + int memoryCount = tasks.size() - persistentCount; + System.out.println("\nShowing " + tasks.size() + " task(s) (" + + persistentCount + " in storage, " + memoryCount + " in memory)" + + (status != null ? " with status " + status : "") + + (type != null ? " of type " + type : "")); + } + + return null; + } +} diff --git a/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/PurgeTasksCommand.java b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/PurgeTasksCommand.java new file mode 100644 index 0000000000..3cd357cc56 --- /dev/null +++ b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/PurgeTasksCommand.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.shell.commands.scheduler; + +import org.apache.karaf.shell.api.action.Action; +import org.apache.karaf.shell.api.action.Command; +import org.apache.karaf.shell.api.action.Option; +import org.apache.karaf.shell.api.action.lifecycle.Reference; +import org.apache.karaf.shell.api.action.lifecycle.Service; +import org.apache.karaf.shell.api.console.Session; +import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; + +import java.util.Calendar; +import java.util.Date; + +@Command(scope = "unomi", name = "task-purge", description = "Purges old completed tasks") +@Service +public class PurgeTasksCommand implements Action { + + @Reference + private SchedulerService schedulerService; + + @Reference + private Session session; + + @Option(name = "-d", aliases = "--days", description = "Number of days to keep completed tasks (default: 7)", required = false) + private int daysToKeep = 7; + + @Option(name = "-f", aliases = "--force", description = "Skip confirmation prompt", required = false) + private boolean force = false; + + @Override + public Object execute() throws Exception { + if (!force) { + String response = session.readLine( + "This will permanently delete all completed tasks older than " + daysToKeep + " days. Continue? (y/n): ", + null + ); + if (!"y".equalsIgnoreCase(response != null ? response.trim() : "n")) { + System.out.println("Operation cancelled."); + return null; + } + } + + // Calculate cutoff date + Calendar cal = Calendar.getInstance(); + cal.add(Calendar.DAY_OF_MONTH, -daysToKeep); + Date cutoffDate = cal.getTime(); + + // Get completed tasks + int offset = 0; + int batchSize = 100; + int purgedCount = 0; + + while (true) { + var tasks = schedulerService.getTasksByStatus(ScheduledTask.TaskStatus.COMPLETED, offset, batchSize, null); + if (tasks.getList().isEmpty()) { + break; + } + + // Cancel old completed tasks + for (ScheduledTask task : tasks.getList()) { + if (task.getLastExecutionDate() != null && task.getLastExecutionDate().before(cutoffDate)) { + schedulerService.cancelTask(task.getItemId()); + purgedCount++; + } + } + + if (tasks.getList().size() < batchSize) { + break; + } + offset += batchSize; + } + + System.out.println("Successfully purged " + purgedCount + " completed tasks older than " + daysToKeep + " days."); + return null; + } +} diff --git a/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/RetryTaskCommand.java b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/RetryTaskCommand.java new file mode 100644 index 0000000000..bcdf26e03e --- /dev/null +++ b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/RetryTaskCommand.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.shell.commands.scheduler; + +import org.apache.karaf.shell.api.action.Action; +import org.apache.karaf.shell.api.action.Argument; +import org.apache.karaf.shell.api.action.Command; +import org.apache.karaf.shell.api.action.Option; +import org.apache.karaf.shell.api.action.lifecycle.Reference; +import org.apache.karaf.shell.api.action.lifecycle.Service; +import org.apache.unomi.api.services.SchedulerService; + +@Command(scope = "unomi", name = "task-retry", description = "Retries a failed task") +@Service +public class RetryTaskCommand implements Action { + + @Reference + private SchedulerService schedulerService; + + @Argument(index = 0, name = "taskId", description = "The ID of the task to retry", required = true) + private String taskId; + + @Option(name = "-r", aliases = "--reset", description = "Reset failure count before retrying") + private boolean resetFailureCount = false; + + @Override + public Object execute() throws Exception { + schedulerService.retryTask(taskId, resetFailureCount); + System.out.println("Task " + taskId + " has been queued for retry" + + (resetFailureCount ? " with reset failure count." : ".")); + return null; + } +} diff --git a/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/SetExecutorNodeCommand.java b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/SetExecutorNodeCommand.java new file mode 100644 index 0000000000..c52e708737 --- /dev/null +++ b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/SetExecutorNodeCommand.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.shell.commands.scheduler; + +import org.apache.karaf.shell.api.action.Action; +import org.apache.karaf.shell.api.action.Argument; +import org.apache.karaf.shell.api.action.Command; +import org.apache.karaf.shell.api.action.lifecycle.Reference; +import org.apache.karaf.shell.api.action.lifecycle.Service; +import org.apache.unomi.api.services.SchedulerService; + +@Command(scope = "unomi", name = "task-executor", description = "Shows or changes task executor status for this node") +@Service +public class SetExecutorNodeCommand implements Action { + + @Reference + private SchedulerService schedulerService; + + @Argument(index = 0, name = "enable", description = "Enable (true) or disable (false) task execution", required = false) + private String enable; + + @Override + public Object execute() throws Exception { + if (enable == null) { + // Just show current status + System.out.println("Task executor status: " + + (schedulerService.isExecutorNode() ? "ENABLED" : "DISABLED")); + System.out.println("Node ID: " + schedulerService.getNodeId()); + return null; + } + + boolean shouldEnable = Boolean.parseBoolean(enable); + // Note: This assumes there's a setExecutorNode method. If not available, we'll need to modify the service. + // schedulerService.setExecutorNode(shouldEnable); + + System.out.println("Task executor has been " + (shouldEnable ? "ENABLED" : "DISABLED") + + " for node " + schedulerService.getNodeId()); + return null; + } +} diff --git a/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/ShowTaskCommand.java b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/ShowTaskCommand.java new file mode 100644 index 0000000000..e59172162a --- /dev/null +++ b/tools/shell-dev-commands/src/main/java/org/apache/unomi/shell/commands/scheduler/ShowTaskCommand.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.unomi.shell.commands.scheduler; + +import org.apache.karaf.shell.api.action.Action; +import org.apache.karaf.shell.api.action.Argument; +import org.apache.karaf.shell.api.action.Command; +import org.apache.karaf.shell.api.action.lifecycle.Reference; +import org.apache.karaf.shell.api.action.lifecycle.Service; +import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; + +import java.text.SimpleDateFormat; +import java.util.Map; + +@Command(scope = "unomi", name = "task-show", description = "Shows detailed information about a task") +@Service +public class ShowTaskCommand implements Action { + + @Reference + private SchedulerService schedulerService; + + @Argument(index = 0, name = "taskId", description = "The ID of the task to show", required = true) + private String taskId; + + @Override + public Object execute() throws Exception { + ScheduledTask task = schedulerService.getTask(taskId); + if (task == null) { + System.err.println("Task not found: " + taskId); + return null; + } + + SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + // Print basic information + System.out.println("Task Details"); + System.out.println("-----------"); + System.out.println("ID: " + task.getItemId()); + System.out.println("Type: " + task.getTaskType()); + System.out.println("Status: " + task.getStatus()); + System.out.println("Persistent: " + task.isPersistent()); + System.out.println("Parallel Execution: " + task.isAllowParallelExecution()); + System.out.println("Fixed Rate: " + task.isFixedRate()); + System.out.println("One Shot: " + task.isOneShot()); + + // Print timing information + if (task.getNextScheduledExecution() != null) { + System.out.println("Next Run: " + dateFormat.format(task.getNextScheduledExecution())); + } + if (task.getLastExecutionDate() != null) { + System.out.println("Last Run: " + dateFormat.format(task.getLastExecutionDate())); + } + System.out.println("Initial Delay: " + task.getInitialDelay() + " " + task.getTimeUnit()); + System.out.println("Period: " + task.getPeriod() + " " + task.getTimeUnit()); + + // Print execution information + System.out.println("Failure Count: " + task.getFailureCount()); + if (task.getLastError() != null) { + System.out.println("Last Error: " + task.getLastError()); + } + + // Print parameters if any + Map parameters = task.getParameters(); + if (parameters != null && !parameters.isEmpty()) { + System.out.println("\nParameters"); + System.out.println("----------"); + for (Map.Entry entry : parameters.entrySet()) { + System.out.println(entry.getKey() + ": " + entry.getValue()); + } + } + + // Print checkpoint data if any + Map checkpointData = task.getCheckpointData(); + if (checkpointData != null && !checkpointData.isEmpty()) { + System.out.println("\nCheckpoint Data"); + System.out.println("--------------"); + for (Map.Entry entry : checkpointData.entrySet()) { + System.out.println(entry.getKey() + ": " + entry.getValue()); + } + } + + return null; + } +} From aaf4e33b0e51c6865db763028afd6ddfb5c5ecd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Blanchard?= Date: Tue, 2 Sep 2025 14:07:23 +0200 Subject: [PATCH 3/3] UNOMI-878: Replace TimerTasks with TaskExecutor (#727) diff --git c/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java i/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java index fee5f3a89..550b4cc68 100644 --- c/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java +++ i/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java @@ -26,6 +26,8 @@ import org.apache.unomi.api.Metadata; import org.apache.unomi.api.actions.ActionType; import org.apache.unomi.api.services.DefinitionsService; import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.groovy.actions.GroovyAction; import org.apache.unomi.groovy.actions.GroovyBundleResourceConnector; import org.apache.unomi.groovy.actions.ScriptMetadata; @@ -51,7 +53,6 @@ import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Map; import java.util.Set; -import java.util.TimerTask; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -84,6 +85,7 @@ public class GroovyActionsServiceImpl implements GroovyActionsService { private static final Logger LOGGER = LoggerFactory.getLogger(GroovyActionsServiceImpl.class.getName()); private static final String BASE_SCRIPT_NAME = "BaseScript"; + private static final String REFRESH_ACTIONS_TASK_TYPE = "refresh-groovy-actions"; private DefinitionsService definitionsService; private PersistenceService persistenceService; @@ -504,15 +506,32 @@ public class GroovyActionsServiceImpl implements GroovyActionsService { * Initializes periodic script refresh timer. */ private void initializeTimers() { - TimerTask task = new TimerTask() { + TaskExecutor refreshGroovyActionsTaskExecutor = new TaskExecutor() { @Override - public void run() { - refreshGroovyActions(); + public String getTaskType() { + return REFRESH_ACTIONS_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + refreshGroovyActions(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while reassigning profile data", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(refreshGroovyActionsTaskExecutor); + if (this.refreshGroovyActionsTaskId != null) { schedulerService.cancelTask(this.refreshGroovyActionsTaskId); } - this.refreshGroovyActionsTaskId = schedulerService.createRecurringTask("refreshGroovyActions", config.services_groovy_actions_refresh_interval(), TimeUnit.MILLISECONDS, task, false).getItemId(); + this.refreshGroovyActionsTaskId = schedulerService.newTask(REFRESH_ACTIONS_TASK_TYPE) + .withPeriod(config.services_groovy_actions_refresh_interval(), TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } } diff --git c/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java i/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java index f7e68b783..1141f4fe3 100644 --- c/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java +++ i/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java @@ -30,6 +30,7 @@ import org.apache.unomi.api.Item; import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.ScopeService; import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.persistence.spi.PersistenceService; import org.apache.unomi.schema.api.JsonSchemaWrapper; import org.apache.unomi.schema.api.SchemaService; @@ -43,7 +44,9 @@ import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.util.*; -import java.util.concurrent.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; public class SchemaServiceImpl implements SchemaService { @@ -71,8 +74,10 @@ public class SchemaServiceImpl implements SchemaService { private PersistenceService persistenceService; private ScopeService scopeService; private JsonSchemaFactory jsonSchemaFactory; + private SchedulerService schedulerService; private String refreshJSONSchemasTaskId; + private static final String REFRESH_SCHEMAS_TASK_TYPE = "refresh-json-schemas"; @Override public boolean isValid(String data, String schemaId) { @@ -369,18 +374,31 @@ public class SchemaServiceImpl implements SchemaService { } private void initTimers() { - TimerTask task = new TimerTask() { + TaskExecutor refreshSchemasTaskExecutor = new TaskExecutor() { @Override - public void run() { - try { - refreshJSONSchemas(); - } catch (Exception e) { - LOGGER.error("Unexpected error while refreshing JSON Schemas", e); + public String getTaskType() { + return REFRESH_SCHEMAS_TASK_TYPE; } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + refreshJSONSchemas(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while refreshing json scehams", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(refreshSchemasTaskExecutor); + this.resetTimers(); - this.refreshJSONSchemasTaskId = schedulerService.createRecurringTask("refreshJSONSchemas", jsonSchemaRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + this.refreshJSONSchemasTaskId = schedulerService.newTask(REFRESH_SCHEMAS_TASK_TYPE) + .withPeriod(jsonSchemaRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } private void resetTimers() { diff --git c/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java i/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java index 1333d4b0b..44c47486e 100644 --- c/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java +++ i/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java @@ -26,6 +26,8 @@ import org.apache.unomi.api.conditions.Condition; import org.apache.unomi.api.conditions.ConditionType; import org.apache.unomi.api.services.ClusterService; import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.lifecycle.BundleWatcher; import org.apache.unomi.persistence.spi.PersistenceService; import org.osgi.framework.BundleContext; @@ -61,10 +63,13 @@ public class ClusterServiceImpl implements ClusterService { private volatile boolean shutdownNow = false; private volatile List cachedClusterNodes = Collections.emptyList(); - private BundleWatcher bundleWatcher; + private static final String CLUSTER_NODE_STAT_UPDATE_TASK_TYPE = "cluster-node-statistics-update"; + private static final String CLUSTER_STALE_NODE_CLEANUP_TASK_TYPE = "cluster-stale-nodes-cleanup"; private String clusterNodeStatisticsUpdateTaskId; private String clusterStaleNodesCleanupTaskId; + private BundleWatcher bundleWatcher; + /** * Max time to wait for persistence service (in milliseconds) */ @@ -215,40 +220,59 @@ public class ClusterServiceImpl implements ClusterService { return; } - // Schedule regular updates of the node statistics - TimerTask statisticsTask = new TimerTask() { + TaskExecutor clusterNodeStatisticsUpdateTaskExecutor = new TaskExecutor() { @Override - public void run() { + public String getTaskType() { + return CLUSTER_NODE_STAT_UPDATE_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { try { updateSystemStats(); - } catch (Throwable t) { - LOGGER.error("Error updating system statistics", t); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while updating cluster node statistics", e); + callback.fail(e.getMessage()); } } }; - this.clusterNodeStatisticsUpdateTaskId = schedulerService.createRecurringTask("clusterNodeStatisticsUpdate", nodeStatisticsUpdateFrequency, TimeUnit.MILLISECONDS, statisticsTask, false).getItemId(); - // Schedule cleanup of stale nodes - TimerTask cleanupTask = new TimerTask() { + TaskExecutor clusterStaleNodesCleanupTaskExecutor = new TaskExecutor() { @Override - public void run() { + public String getTaskType() { + return CLUSTER_STALE_NODE_CLEANUP_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { try { cleanupStaleNodes(); - } catch (Throwable t) { - LOGGER.error("Error cleaning up stale nodes", t); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while cleaning staled cluster nodes", e); + callback.fail(e.getMessage()); } } }; - this.clusterStaleNodesCleanupTaskId = schedulerService.createRecurringTask("clusterStaleNodesCleanup", 60000, TimeUnit.MILLISECONDS, cleanupTask, false).getItemId(); + + schedulerService.registerTaskExecutor(clusterNodeStatisticsUpdateTaskExecutor); + schedulerService.registerTaskExecutor(clusterStaleNodesCleanupTaskExecutor); + + this.resetTimers(); + this.clusterNodeStatisticsUpdateTaskId = schedulerService.newTask(CLUSTER_NODE_STAT_UPDATE_TASK_TYPE) + .withPeriod(nodeStatisticsUpdateFrequency, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); + this.clusterStaleNodesCleanupTaskId = schedulerService.newTask(CLUSTER_STALE_NODE_CLEANUP_TASK_TYPE) + .withPeriod(60000, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); LOGGER.info("Cluster service scheduled tasks initialized"); } - public void destroy() { - LOGGER.info("Cluster service shutting down..."); - shutdownNow = true; - - // Cancel scheduled tasks + private void resetTimers() { if (schedulerService != null && clusterNodeStatisticsUpdateTaskId != null) { schedulerService.cancelTask(clusterNodeStatisticsUpdateTaskId); clusterStaleNodesCleanupTaskId = null; @@ -257,6 +281,13 @@ public class ClusterServiceImpl implements ClusterService { schedulerService.cancelTask(clusterStaleNodesCleanupTaskId); clusterStaleNodesCleanupTaskId = null; } + } + + public void destroy() { + LOGGER.info("Cluster service shutting down..."); + shutdownNow = true; + + this.resetTimers(); // Remove node from persistence service if (persistenceService != null) { diff --git c/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java i/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java index ff7babc2c..4515ff748 100644 --- c/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java +++ i/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java @@ -25,6 +25,8 @@ import org.apache.unomi.api.conditions.Condition; import org.apache.unomi.api.conditions.ConditionType; import org.apache.unomi.api.services.DefinitionsService; import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.api.utils.ConditionBuilder; import org.apache.unomi.api.utils.ParserHelper; import org.apache.unomi.persistence.spi.CustomObjectMapper; @@ -60,6 +62,8 @@ public class DefinitionsServiceImpl implements DefinitionsService, SynchronousBu private ConditionBuilder conditionBuilder; private BundleContext bundleContext; + + private static final String RELOAD_TYPES_TASK_TYPE = "reload-types"; private String reloadTypesTaskId; public DefinitionsServiceImpl() { @@ -100,20 +104,39 @@ public class DefinitionsServiceImpl implements DefinitionsService, SynchronousBu } private void scheduleTypeReloads() { - TimerTask task = new TimerTask() { + TaskExecutor reloadTypesTaskExecutor = new TaskExecutor() { @Override - public void run() { - reloadTypes(false); + public String getTaskType() { + return RELOAD_TYPES_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + reloadTypes(false); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while reloading types", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(reloadTypesTaskExecutor); + this.resetTypeReloads(); - this.reloadTypesTaskId = schedulerService.createRecurringTask("reloadTypes", definitionsRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + this.reloadTypesTaskId = schedulerService.newTask(RELOAD_TYPES_TASK_TYPE) + .withPeriod(definitionsRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); + LOGGER.info("Scheduled task for condition type loading each 10s"); } private void resetTypeReloads() { if (this.reloadTypesTaskId != null) { schedulerService.cancelTask(this.reloadTypesTaskId); + this.reloadTypesTaskId = null; } } diff --git c/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java i/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java index 78ca73959..9dfac14ba 100644 --- c/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java +++ i/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java @@ -29,6 +29,8 @@ import org.apache.unomi.api.services.DefinitionsService; import org.apache.unomi.api.services.ProfileService; import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.SegmentService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.persistence.spi.CustomObjectMapper; import org.apache.unomi.persistence.spi.PersistenceService; import org.apache.unomi.persistence.spi.PropertyHelper; @@ -189,6 +191,8 @@ public class ProfileServiceImpl implements ProfileService, SynchronousBundleList private boolean forceRefreshOnSave = false; + private static final String PROPERTY_TYPE_LOAD_TASK_TYPE = "property-type-load"; + private static final String PROFILES_PURGE_TASK_TYPE = "profiles-purge"; private String propertyTypeLoadTaskId; private String purgeProfilesTaskId; @@ -301,14 +305,31 @@ public class ProfileServiceImpl implements ProfileService, SynchronousBundleList } private void schedulePropertyTypeLoad() { - TimerTask task = new TimerTask() { + TaskExecutor reloadPropertyTaskExecutor = new TaskExecutor() { @Override - public void run() { - reloadPropertyTypes(false); + public String getTaskType() { + return PROPERTY_TYPE_LOAD_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + reloadPropertyTypes(false); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while reloading property type", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(reloadPropertyTaskExecutor); + this.resetPropertyTypeLoadTask(); - this.propertyTypeLoadTaskId = schedulerService.createRecurringTask("propertyTypeLoad", propertiesRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + this.propertyTypeLoadTaskId = schedulerService.newTask(PROPERTY_TYPE_LOAD_TASK_TYPE) + .withPeriod(propertiesRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); LOGGER.info("Scheduled task for property type loading each {}ms", propertiesRefreshInterval); } @@ -406,7 +427,6 @@ public class ProfileServiceImpl implements ProfileService, SynchronousBundleList if (purgeProfileExistTime > 0) { LOGGER.info("Purge: Profile created since more than {} days, will be purged", purgeProfileExistTime); } - if (purgeSessionExistTime > 0) { LOGGER.info("Purge: Session items created since more than {} days, will be purged", purgeSessionExistTime); } @@ -414,9 +434,14 @@ public class ProfileServiceImpl implements ProfileService, SynchronousBundleList LOGGER.info("Purge: Event items created since more than {} days, will be purged", purgeEventExistTime); } - TimerTask task = new TimerTask() { + TaskExecutor purgeProfilesTaskExecutor = new TaskExecutor() { @Override - public void run() { + public String getTaskType() { + return PROFILES_PURGE_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { try { long purgeStartTime = System.currentTimeMillis(); LOGGER.info("Purge: triggered"); @@ -428,14 +453,21 @@ public class ProfileServiceImpl implements ProfileService, SynchronousBundleList purgeSessionItems(purgeSessionExistTime); purgeEventItems(purgeEventExistTime); LOGGER.info("Purge: executed in {} ms", System.currentTimeMillis() - purgeStartTime); - } catch (Throwable t) { - LOGGER.error("Error while purging", t); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while purging", e); + callback.fail(e.getMessage()); } } }; - this.resetProfilesPurgeTask(); - this.purgeProfilesTaskId = schedulerService.createRecurringTask("profilesPurge", purgeProfileInterval, TimeUnit.DAYS, task, false).getItemId(); + schedulerService.registerTaskExecutor(purgeProfilesTaskExecutor); + + this.resetProfilesPurgeTask(); + this.purgeProfilesTaskId = schedulerService.newTask(PROFILES_PURGE_TASK_TYPE) + .withPeriod(purgeProfileInterval, TimeUnit.DAYS) + .nonPersistent() + .schedule().getItemId(); LOGGER.info("Purge: purge scheduled with an interval of {} days", purgeProfileInterval); } else { LOGGER.info("Purge: No purge scheduled"); diff --git c/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java i/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java index 71cc75eb5..73898830b 100644 --- c/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java +++ i/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java @@ -28,6 +28,8 @@ import org.apache.unomi.api.query.Query; import org.apache.unomi.api.rules.Rule; import org.apache.unomi.api.rules.RuleStatistics; import org.apache.unomi.api.services.*; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.persistence.spi.CustomObjectMapper; import org.apache.unomi.persistence.spi.PersistenceService; import org.apache.unomi.persistence.spi.config.ConfigurationUpdateHelper; @@ -65,14 +67,16 @@ public class RulesServiceImpl implements RulesService, EventListenerService, Syn private Integer rulesRefreshInterval = 1000; private Integer rulesStatisticsRefreshInterval = 10000; + private static final String REFRESH_RULES_TASK_TYPE = "refresh-rules"; + private static final String REFRESH_RULE_STATS_TASK_TYPE = "refresh-rule-stats"; + private String refreshRulesTaskId; + private String syncRuleStatisticsTaskId; private final List ruleListeners = new CopyOnWriteArrayList(); private Map> rulesByEventType = new HashMap<>(); private Boolean optimizedRulesActivated = true; - private String refreshRulesTaskId; - private String syncRuleStatisticsTaskId; public void setBundleContext(BundleContext bundleContext) { this.bundleContext = bundleContext; @@ -492,26 +496,53 @@ public class RulesServiceImpl implements RulesService, EventListenerService, Syn } private void initializeTimers() { - this.resetTimers(); - TimerTask task = new TimerTask() { + TaskExecutor refreshRulesTaskExecutor = new TaskExecutor() { @Override - public void run() { - refreshRules(); + public String getTaskType() { + return REFRESH_RULES_TASK_TYPE; } - }; - this.refreshRulesTaskId = schedulerService.createRecurringTask("refreshRules", rulesRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); - TimerTask statisticsTask = new TimerTask() { @Override - public void run() { - try { - syncRuleStatistics(); - } catch (Throwable t) { - LOGGER.error("Error synching rule statistics between memory and persistence back-end", t); - } + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + refreshRules(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while refreshing rules", e); + callback.fail(e.getMessage()); + } } }; - this.syncRuleStatisticsTaskId = schedulerService.createRecurringTask("syncRuleStatistics", rulesStatisticsRefreshInterval, TimeUnit.MILLISECONDS, statisticsTask, false).getItemId(); + TaskExecutor refreshRuleStatsTaskExecutor = new TaskExecutor() { + @Override + public String getTaskType() { + return REFRESH_RULE_STATS_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + syncRuleStatistics(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while syncing rule statistics", e); + callback.fail(e.getMessage()); + } + } + }; + + schedulerService.registerTaskExecutor(refreshRulesTaskExecutor); + schedulerService.registerTaskExecutor(refreshRuleStatsTaskExecutor); + + this.resetTimers(); + this.refreshRulesTaskId = schedulerService.newTask(REFRESH_RULES_TASK_TYPE) + .withPeriod(rulesRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); + this.refreshRulesTaskId = schedulerService.newTask(REFRESH_RULE_STATS_TASK_TYPE) + .withPeriod(rulesStatisticsRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } private void resetTimers() { diff --git c/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java i/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java index 179392a33..acd4e4b7b 100644 --- c/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java +++ i/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java @@ -20,11 +20,14 @@ import org.apache.unomi.api.Item; import org.apache.unomi.api.Scope; import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.ScopeService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.persistence.spi.PersistenceService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; -import java.util.TimerTask; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; @@ -32,14 +35,14 @@ import java.util.stream.Collectors; public class ScopeServiceImpl implements ScopeService { + private static final Logger LOGGER = LoggerFactory.getLogger(ScopeServiceImpl.class.getName()); + private PersistenceService persistenceService; - private SchedulerService schedulerService; - private Integer scopesRefreshInterval = 1000; - private ConcurrentMap scopes = new ConcurrentHashMap<>(); + private static final String REFRESH_SCOPES_TASK_TYPE = "refresh-scopes"; private String refreshScopesTaskId; public void setPersistenceService(PersistenceService persistenceService) { @@ -83,14 +86,31 @@ public class ScopeServiceImpl implements ScopeService { } private void initializeTimers() { - TimerTask task = new TimerTask() { + TaskExecutor refreshScopesTaskExecutor = new TaskExecutor() { @Override - public void run() { - refreshScopes(); + public String getTaskType() { + return REFRESH_SCOPES_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + refreshScopes(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while refreshing scopes", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(refreshScopesTaskExecutor); + this.resetTimers(); - this.refreshScopesTaskId = schedulerService.createRecurringTask("refreshScopes", scopesRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + this.refreshScopesTaskId = schedulerService.newTask(REFRESH_SCOPES_TASK_TYPE) + .withPeriod(scopesRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } private void resetTimers() { diff --git c/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java i/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java index 453203bcd..c841248db 100644 --- c/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java +++ i/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java @@ -32,6 +32,8 @@ import org.apache.unomi.api.services.EventService; import org.apache.unomi.api.services.RulesService; import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.SegmentService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.api.utils.ConditionBuilder; import org.apache.unomi.api.utils.ParserHelper; import org.apache.unomi.persistence.spi.CustomObjectMapper; @@ -83,6 +85,9 @@ public class SegmentServiceImpl extends AbstractServiceImpl implements SegmentSe private int maximumIdsQueryCount = 5000; private boolean pastEventsDisablePartitions = false; private int dailyDateExprEvaluationHourUtc = 5; + + private static final String RECALCULATE_PAST_EVENT_CONDITIONS_TASK_TYPE = "recalculate-past-event-conditions"; + private static final String REFRESH_SEGMENT_AND_SCORING_DEFINITIONS_TASK_TYPE = "refresh-segment-and-scoring-definitions"; private String recalculatePastEventConditionsTaskId; private String refreshSegmentAndScoringDefinitionsTaskId; @@ -1199,38 +1204,63 @@ public class SegmentServiceImpl extends AbstractServiceImpl implements SegmentSe } private void initializeTimer() { - this.resetTimers(); - TimerTask task = new TimerTask() { + TaskExecutor recalculatePastEventConditionsTaskExecutor = new TaskExecutor() { @Override - public void run() { + public String getTaskType() { + return RECALCULATE_PAST_EVENT_CONDITIONS_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { try { long currentTimeMillis = System.currentTimeMillis(); LOGGER.info("running scheduled task to recalculate segments and scoring that contains date relative conditions"); recalculatePastEventConditions(); LOGGER.info("finished recalculate segments and scoring that contains date relative conditions in {}ms. ", System.currentTimeMillis() - currentTimeMillis); - } catch (Throwable t) { - LOGGER.error("Error while updating profiles for segments and scoring that contains date relative conditions", t); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while updating profiles for segments and scoring that contains date relative conditions", e); + callback.fail(e.getMessage()); } } }; + TaskExecutor refreshSegmentAndScoringDefinitionsTaskExecutor = new TaskExecutor() { + @Override + public String getTaskType() { + return REFRESH_SEGMENT_AND_SCORING_DEFINITIONS_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + allSegments = getAllSegmentDefinitions(); + allScoring = getAllScoringDefinitions(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while loading segments and scoring definitions from persistence back-end", e); + callback.fail(e.getMessage()); + } + } + }; + + schedulerService.registerTaskExecutor(recalculatePastEventConditionsTaskExecutor); + schedulerService.registerTaskExecutor(refreshSegmentAndScoringDefinitionsTaskExecutor); + + this.resetTimers(); + long initialDelay = SchedulerServiceImpl.getTimeDiffInSeconds(dailyDateExprEvaluationHourUtc, ZonedDateTime.now(ZoneOffset.UTC)); long period = TimeUnit.DAYS.toSeconds(taskExecutionPeriod); LOGGER.info("daily recalculation job for segments and scoring that contains date relative conditions will run at fixed rate, " + "initialDelay={}, taskExecutionPeriod={} in seconds", initialDelay, period); - this.recalculatePastEventConditionsTaskId = schedulerService.createRecurringTask("recalculatePastEventConditions", period, TimeUnit.SECONDS, task, false).getItemId(); - - task = new TimerTask() { - @Override - public void run() { - try { - allSegments = getAllSegmentDefinitions(); - allScoring = getAllScoringDefinitions(); - } catch (Throwable t) { - LOGGER.error("Error while loading segments and scoring definitions from persistence back-end", t); - } - } - }; - this.refreshSegmentAndScoringDefinitionsTaskId = schedulerService.createRecurringTask("refreshSegmentAndScoringDefinitions", segmentRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + this.recalculatePastEventConditionsTaskId = schedulerService.newTask(RECALCULATE_PAST_EVENT_CONDITIONS_TASK_TYPE) + .withInitialDelay(initialDelay, TimeUnit.SECONDS) + .withPeriod(period, TimeUnit.SECONDS) + .nonPersistent() + .schedule().getItemId(); + this.refreshSegmentAndScoringDefinitionsTaskId = schedulerService.newTask(REFRESH_SEGMENT_AND_SCORING_DEFINITIONS_TASK_TYPE) + .withPeriod(segmentRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } private void resetTimers() { --- .../impl/GroovyActionsServiceImpl.java | 29 ++++++-- .../unomi/schema/impl/SchemaServiceImpl.java | 34 +++++++--- .../impl/cluster/ClusterServiceImpl.java | 67 ++++++++++++++----- .../definitions/DefinitionsServiceImpl.java | 31 +++++++-- .../impl/profiles/ProfileServiceImpl.java | 54 ++++++++++++--- .../services/impl/rules/RulesServiceImpl.java | 61 ++++++++++++----- .../services/impl/scope/ScopeServiceImpl.java | 36 +++++++--- .../impl/segments/SegmentServiceImpl.java | 60 ++++++++++++----- 8 files changed, 288 insertions(+), 84 deletions(-) diff --git a/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java b/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java index fee5f3a895..550b4cc68d 100644 --- a/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java +++ b/extensions/groovy-actions/services/src/main/java/org/apache/unomi/groovy/actions/services/impl/GroovyActionsServiceImpl.java @@ -26,6 +26,8 @@ import org.apache.unomi.api.actions.ActionType; import org.apache.unomi.api.services.DefinitionsService; import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.groovy.actions.GroovyAction; import org.apache.unomi.groovy.actions.GroovyBundleResourceConnector; import org.apache.unomi.groovy.actions.ScriptMetadata; @@ -51,7 +53,6 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; -import java.util.TimerTask; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -84,6 +85,7 @@ public class GroovyActionsServiceImpl implements GroovyActionsService { private static final Logger LOGGER = LoggerFactory.getLogger(GroovyActionsServiceImpl.class.getName()); private static final String BASE_SCRIPT_NAME = "BaseScript"; + private static final String REFRESH_ACTIONS_TASK_TYPE = "refresh-groovy-actions"; private DefinitionsService definitionsService; private PersistenceService persistenceService; @@ -504,15 +506,32 @@ private void refreshGroovyActions() { * Initializes periodic script refresh timer. */ private void initializeTimers() { - TimerTask task = new TimerTask() { + TaskExecutor refreshGroovyActionsTaskExecutor = new TaskExecutor() { @Override - public void run() { - refreshGroovyActions(); + public String getTaskType() { + return REFRESH_ACTIONS_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + refreshGroovyActions(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while reassigning profile data", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(refreshGroovyActionsTaskExecutor); + if (this.refreshGroovyActionsTaskId != null) { schedulerService.cancelTask(this.refreshGroovyActionsTaskId); } - this.refreshGroovyActionsTaskId = schedulerService.createRecurringTask("refreshGroovyActions", config.services_groovy_actions_refresh_interval(), TimeUnit.MILLISECONDS, task, false).getItemId(); + this.refreshGroovyActionsTaskId = schedulerService.newTask(REFRESH_ACTIONS_TASK_TYPE) + .withPeriod(config.services_groovy_actions_refresh_interval(), TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } } diff --git a/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java b/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java index f7e68b783d..1141f4fe38 100644 --- a/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java +++ b/extensions/json-schema/services/src/main/java/org/apache/unomi/schema/impl/SchemaServiceImpl.java @@ -30,6 +30,7 @@ import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.ScopeService; import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.persistence.spi.PersistenceService; import org.apache.unomi.schema.api.JsonSchemaWrapper; import org.apache.unomi.schema.api.SchemaService; @@ -43,7 +44,9 @@ import java.io.InputStream; import java.net.URI; import java.util.*; -import java.util.concurrent.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; public class SchemaServiceImpl implements SchemaService { @@ -71,8 +74,10 @@ public class SchemaServiceImpl implements SchemaService { private PersistenceService persistenceService; private ScopeService scopeService; private JsonSchemaFactory jsonSchemaFactory; + private SchedulerService schedulerService; private String refreshJSONSchemasTaskId; + private static final String REFRESH_SCHEMAS_TASK_TYPE = "refresh-json-schemas"; @Override public boolean isValid(String data, String schemaId) { @@ -369,18 +374,31 @@ private String generateExtendedSchema(String id, String schema) throws JsonProce } private void initTimers() { - TimerTask task = new TimerTask() { + TaskExecutor refreshSchemasTaskExecutor = new TaskExecutor() { @Override - public void run() { - try { - refreshJSONSchemas(); - } catch (Exception e) { - LOGGER.error("Unexpected error while refreshing JSON Schemas", e); + public String getTaskType() { + return REFRESH_SCHEMAS_TASK_TYPE; } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + refreshJSONSchemas(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while refreshing json scehams", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(refreshSchemasTaskExecutor); + this.resetTimers(); - this.refreshJSONSchemasTaskId = schedulerService.createRecurringTask("refreshJSONSchemas", jsonSchemaRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + this.refreshJSONSchemasTaskId = schedulerService.newTask(REFRESH_SCHEMAS_TASK_TYPE) + .withPeriod(jsonSchemaRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } private void resetTimers() { diff --git a/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java index 9802426e75..f7d7a3c004 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/cluster/ClusterServiceImpl.java @@ -26,6 +26,8 @@ import org.apache.unomi.api.conditions.ConditionType; import org.apache.unomi.api.services.ClusterService; import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.lifecycle.BundleWatcher; import org.apache.unomi.persistence.spi.PersistenceService; import org.slf4j.Logger; @@ -57,10 +59,13 @@ public class ClusterServiceImpl implements ClusterService { private volatile boolean shutdownNow = false; private volatile List cachedClusterNodes = Collections.emptyList(); - private BundleWatcher bundleWatcher; + private static final String CLUSTER_NODE_STAT_UPDATE_TASK_TYPE = "cluster-node-statistics-update"; + private static final String CLUSTER_STALE_NODE_CLEANUP_TASK_TYPE = "cluster-stale-nodes-cleanup"; private String clusterNodeStatisticsUpdateTaskId; private String clusterStaleNodesCleanupTaskId; + private BundleWatcher bundleWatcher; + /** * Max time to wait for persistence service (in milliseconds) */ @@ -211,40 +216,59 @@ public void initializeScheduledTasks() { return; } - // Schedule regular updates of the node statistics - TimerTask statisticsTask = new TimerTask() { + TaskExecutor clusterNodeStatisticsUpdateTaskExecutor = new TaskExecutor() { + @Override + public String getTaskType() { + return CLUSTER_NODE_STAT_UPDATE_TASK_TYPE; + } + @Override - public void run() { + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { try { updateSystemStats(); - } catch (Throwable t) { - LOGGER.error("Error updating system statistics", t); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while updating cluster node statistics", e); + callback.fail(e.getMessage()); } } }; - this.clusterNodeStatisticsUpdateTaskId = schedulerService.createRecurringTask("clusterNodeStatisticsUpdate", nodeStatisticsUpdateFrequency, TimeUnit.MILLISECONDS, statisticsTask, false).getItemId(); - // Schedule cleanup of stale nodes - TimerTask cleanupTask = new TimerTask() { + TaskExecutor clusterStaleNodesCleanupTaskExecutor = new TaskExecutor() { @Override - public void run() { + public String getTaskType() { + return CLUSTER_STALE_NODE_CLEANUP_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { try { cleanupStaleNodes(); - } catch (Throwable t) { - LOGGER.error("Error cleaning up stale nodes", t); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while cleaning staled cluster nodes", e); + callback.fail(e.getMessage()); } } }; - this.clusterStaleNodesCleanupTaskId = schedulerService.createRecurringTask("clusterStaleNodesCleanup", 60000, TimeUnit.MILLISECONDS, cleanupTask, false).getItemId(); + + schedulerService.registerTaskExecutor(clusterNodeStatisticsUpdateTaskExecutor); + schedulerService.registerTaskExecutor(clusterStaleNodesCleanupTaskExecutor); + + this.resetTimers(); + this.clusterNodeStatisticsUpdateTaskId = schedulerService.newTask(CLUSTER_NODE_STAT_UPDATE_TASK_TYPE) + .withPeriod(nodeStatisticsUpdateFrequency, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); + this.clusterStaleNodesCleanupTaskId = schedulerService.newTask(CLUSTER_STALE_NODE_CLEANUP_TASK_TYPE) + .withPeriod(60000, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); LOGGER.info("Cluster service scheduled tasks initialized"); } - public void destroy() { - LOGGER.info("Cluster service shutting down..."); - shutdownNow = true; - - // Cancel scheduled tasks + private void resetTimers() { if (schedulerService != null && clusterNodeStatisticsUpdateTaskId != null) { schedulerService.cancelTask(clusterNodeStatisticsUpdateTaskId); clusterStaleNodesCleanupTaskId = null; @@ -253,6 +277,13 @@ public void destroy() { schedulerService.cancelTask(clusterStaleNodesCleanupTaskId); clusterStaleNodesCleanupTaskId = null; } + } + + public void destroy() { + LOGGER.info("Cluster service shutting down..."); + shutdownNow = true; + + this.resetTimers(); // Remove node from persistence service if (persistenceService != null) { diff --git a/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java index ff7babc2ca..4515ff7480 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/definitions/DefinitionsServiceImpl.java @@ -25,6 +25,8 @@ import org.apache.unomi.api.conditions.ConditionType; import org.apache.unomi.api.services.DefinitionsService; import org.apache.unomi.api.services.SchedulerService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.api.utils.ConditionBuilder; import org.apache.unomi.api.utils.ParserHelper; import org.apache.unomi.persistence.spi.CustomObjectMapper; @@ -60,6 +62,8 @@ public class DefinitionsServiceImpl implements DefinitionsService, SynchronousBu private ConditionBuilder conditionBuilder; private BundleContext bundleContext; + + private static final String RELOAD_TYPES_TASK_TYPE = "reload-types"; private String reloadTypesTaskId; public DefinitionsServiceImpl() { @@ -100,20 +104,39 @@ public void postConstruct() { } private void scheduleTypeReloads() { - TimerTask task = new TimerTask() { + TaskExecutor reloadTypesTaskExecutor = new TaskExecutor() { + @Override + public String getTaskType() { + return RELOAD_TYPES_TASK_TYPE; + } + @Override - public void run() { - reloadTypes(false); + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + reloadTypes(false); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while reloading types", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(reloadTypesTaskExecutor); + this.resetTypeReloads(); - this.reloadTypesTaskId = schedulerService.createRecurringTask("reloadTypes", definitionsRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + this.reloadTypesTaskId = schedulerService.newTask(RELOAD_TYPES_TASK_TYPE) + .withPeriod(definitionsRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); + LOGGER.info("Scheduled task for condition type loading each 10s"); } private void resetTypeReloads() { if (this.reloadTypesTaskId != null) { schedulerService.cancelTask(this.reloadTypesTaskId); + this.reloadTypesTaskId = null; } } diff --git a/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java index 78ca739595..9dfac14baa 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/profiles/ProfileServiceImpl.java @@ -29,6 +29,8 @@ import org.apache.unomi.api.services.ProfileService; import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.SegmentService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.persistence.spi.CustomObjectMapper; import org.apache.unomi.persistence.spi.PersistenceService; import org.apache.unomi.persistence.spi.PropertyHelper; @@ -189,6 +191,8 @@ private void updateListMap(Map> listMap, PropertyType private boolean forceRefreshOnSave = false; + private static final String PROPERTY_TYPE_LOAD_TASK_TYPE = "property-type-load"; + private static final String PROFILES_PURGE_TASK_TYPE = "profiles-purge"; private String propertyTypeLoadTaskId; private String purgeProfilesTaskId; @@ -301,14 +305,31 @@ public void setPurgeEventExistTime(Integer purgeEventExistTime) { } private void schedulePropertyTypeLoad() { - TimerTask task = new TimerTask() { + TaskExecutor reloadPropertyTaskExecutor = new TaskExecutor() { @Override - public void run() { - reloadPropertyTypes(false); + public String getTaskType() { + return PROPERTY_TYPE_LOAD_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + reloadPropertyTypes(false); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while reloading property type", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(reloadPropertyTaskExecutor); + this.resetPropertyTypeLoadTask(); - this.propertyTypeLoadTaskId = schedulerService.createRecurringTask("propertyTypeLoad", propertiesRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + this.propertyTypeLoadTaskId = schedulerService.newTask(PROPERTY_TYPE_LOAD_TASK_TYPE) + .withPeriod(propertiesRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); LOGGER.info("Scheduled task for property type loading each {}ms", propertiesRefreshInterval); } @@ -406,7 +427,6 @@ private void initializePurge() { if (purgeProfileExistTime > 0) { LOGGER.info("Purge: Profile created since more than {} days, will be purged", purgeProfileExistTime); } - if (purgeSessionExistTime > 0) { LOGGER.info("Purge: Session items created since more than {} days, will be purged", purgeSessionExistTime); } @@ -414,9 +434,14 @@ private void initializePurge() { LOGGER.info("Purge: Event items created since more than {} days, will be purged", purgeEventExistTime); } - TimerTask task = new TimerTask() { + TaskExecutor purgeProfilesTaskExecutor = new TaskExecutor() { @Override - public void run() { + public String getTaskType() { + return PROFILES_PURGE_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { try { long purgeStartTime = System.currentTimeMillis(); LOGGER.info("Purge: triggered"); @@ -428,14 +453,21 @@ public void run() { purgeSessionItems(purgeSessionExistTime); purgeEventItems(purgeEventExistTime); LOGGER.info("Purge: executed in {} ms", System.currentTimeMillis() - purgeStartTime); - } catch (Throwable t) { - LOGGER.error("Error while purging", t); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while purging", e); + callback.fail(e.getMessage()); } } }; - this.resetProfilesPurgeTask(); - this.purgeProfilesTaskId = schedulerService.createRecurringTask("profilesPurge", purgeProfileInterval, TimeUnit.DAYS, task, false).getItemId(); + schedulerService.registerTaskExecutor(purgeProfilesTaskExecutor); + + this.resetProfilesPurgeTask(); + this.purgeProfilesTaskId = schedulerService.newTask(PROFILES_PURGE_TASK_TYPE) + .withPeriod(purgeProfileInterval, TimeUnit.DAYS) + .nonPersistent() + .schedule().getItemId(); LOGGER.info("Purge: purge scheduled with an interval of {} days", purgeProfileInterval); } else { LOGGER.info("Purge: No purge scheduled"); diff --git a/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java index 71cc75eb57..73898830b7 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/rules/RulesServiceImpl.java @@ -28,6 +28,8 @@ import org.apache.unomi.api.rules.Rule; import org.apache.unomi.api.rules.RuleStatistics; import org.apache.unomi.api.services.*; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.persistence.spi.CustomObjectMapper; import org.apache.unomi.persistence.spi.PersistenceService; import org.apache.unomi.persistence.spi.config.ConfigurationUpdateHelper; @@ -65,14 +67,16 @@ public class RulesServiceImpl implements RulesService, EventListenerService, Syn private Integer rulesRefreshInterval = 1000; private Integer rulesStatisticsRefreshInterval = 10000; + private static final String REFRESH_RULES_TASK_TYPE = "refresh-rules"; + private static final String REFRESH_RULE_STATS_TASK_TYPE = "refresh-rule-stats"; + private String refreshRulesTaskId; + private String syncRuleStatisticsTaskId; private final List ruleListeners = new CopyOnWriteArrayList(); private Map> rulesByEventType = new HashMap<>(); private Boolean optimizedRulesActivated = true; - private String refreshRulesTaskId; - private String syncRuleStatisticsTaskId; public void setBundleContext(BundleContext bundleContext) { this.bundleContext = bundleContext; @@ -492,26 +496,53 @@ public void removeRule(String ruleId) { } private void initializeTimers() { - this.resetTimers(); - TimerTask task = new TimerTask() { + TaskExecutor refreshRulesTaskExecutor = new TaskExecutor() { @Override - public void run() { - refreshRules(); + public String getTaskType() { + return REFRESH_RULES_TASK_TYPE; } - }; - this.refreshRulesTaskId = schedulerService.createRecurringTask("refreshRules", rulesRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); - TimerTask statisticsTask = new TimerTask() { @Override - public void run() { - try { - syncRuleStatistics(); - } catch (Throwable t) { - LOGGER.error("Error synching rule statistics between memory and persistence back-end", t); + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + refreshRules(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while refreshing rules", e); + callback.fail(e.getMessage()); + } } + }; + TaskExecutor refreshRuleStatsTaskExecutor = new TaskExecutor() { + @Override + public String getTaskType() { + return REFRESH_RULE_STATS_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + syncRuleStatistics(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while syncing rule statistics", e); + callback.fail(e.getMessage()); + } } }; - this.syncRuleStatisticsTaskId = schedulerService.createRecurringTask("syncRuleStatistics", rulesStatisticsRefreshInterval, TimeUnit.MILLISECONDS, statisticsTask, false).getItemId(); + + schedulerService.registerTaskExecutor(refreshRulesTaskExecutor); + schedulerService.registerTaskExecutor(refreshRuleStatsTaskExecutor); + + this.resetTimers(); + this.refreshRulesTaskId = schedulerService.newTask(REFRESH_RULES_TASK_TYPE) + .withPeriod(rulesRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); + this.refreshRulesTaskId = schedulerService.newTask(REFRESH_RULE_STATS_TASK_TYPE) + .withPeriod(rulesStatisticsRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } private void resetTimers() { diff --git a/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java index 179392a332..acd4e4b7b1 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/scope/ScopeServiceImpl.java @@ -20,11 +20,14 @@ import org.apache.unomi.api.Scope; import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.ScopeService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.persistence.spi.PersistenceService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; -import java.util.TimerTask; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.TimeUnit; @@ -32,14 +35,14 @@ public class ScopeServiceImpl implements ScopeService { - private PersistenceService persistenceService; + private static final Logger LOGGER = LoggerFactory.getLogger(ScopeServiceImpl.class.getName()); + private PersistenceService persistenceService; private SchedulerService schedulerService; - private Integer scopesRefreshInterval = 1000; - private ConcurrentMap scopes = new ConcurrentHashMap<>(); + private static final String REFRESH_SCOPES_TASK_TYPE = "refresh-scopes"; private String refreshScopesTaskId; public void setPersistenceService(PersistenceService persistenceService) { @@ -83,14 +86,31 @@ public Scope getScope(String id) { } private void initializeTimers() { - TimerTask task = new TimerTask() { + TaskExecutor refreshScopesTaskExecutor = new TaskExecutor() { @Override - public void run() { - refreshScopes(); + public String getTaskType() { + return REFRESH_SCOPES_TASK_TYPE; + } + + @Override + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { + try { + refreshScopes(); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while refreshing scopes", e); + callback.fail(e.getMessage()); + } } }; + + schedulerService.registerTaskExecutor(refreshScopesTaskExecutor); + this.resetTimers(); - this.refreshScopesTaskId = schedulerService.createRecurringTask("refreshScopes", scopesRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + this.refreshScopesTaskId = schedulerService.newTask(REFRESH_SCOPES_TASK_TYPE) + .withPeriod(scopesRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } private void resetTimers() { diff --git a/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java b/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java index 453203bcd4..c841248db8 100644 --- a/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java +++ b/services/src/main/java/org/apache/unomi/services/impl/segments/SegmentServiceImpl.java @@ -32,6 +32,8 @@ import org.apache.unomi.api.services.RulesService; import org.apache.unomi.api.services.SchedulerService; import org.apache.unomi.api.services.SegmentService; +import org.apache.unomi.api.tasks.ScheduledTask; +import org.apache.unomi.api.tasks.TaskExecutor; import org.apache.unomi.api.utils.ConditionBuilder; import org.apache.unomi.api.utils.ParserHelper; import org.apache.unomi.persistence.spi.CustomObjectMapper; @@ -83,6 +85,9 @@ public class SegmentServiceImpl extends AbstractServiceImpl implements SegmentSe private int maximumIdsQueryCount = 5000; private boolean pastEventsDisablePartitions = false; private int dailyDateExprEvaluationHourUtc = 5; + + private static final String RECALCULATE_PAST_EVENT_CONDITIONS_TASK_TYPE = "recalculate-past-event-conditions"; + private static final String REFRESH_SEGMENT_AND_SCORING_DEFINITIONS_TASK_TYPE = "refresh-segment-and-scoring-definitions"; private String recalculatePastEventConditionsTaskId; private String refreshSegmentAndScoringDefinitionsTaskId; @@ -1199,38 +1204,63 @@ public void bundleChanged(BundleEvent event) { } private void initializeTimer() { - this.resetTimers(); - TimerTask task = new TimerTask() { + TaskExecutor recalculatePastEventConditionsTaskExecutor = new TaskExecutor() { + @Override + public String getTaskType() { + return RECALCULATE_PAST_EVENT_CONDITIONS_TASK_TYPE; + } + @Override - public void run() { + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { try { long currentTimeMillis = System.currentTimeMillis(); LOGGER.info("running scheduled task to recalculate segments and scoring that contains date relative conditions"); recalculatePastEventConditions(); LOGGER.info("finished recalculate segments and scoring that contains date relative conditions in {}ms. ", System.currentTimeMillis() - currentTimeMillis); - } catch (Throwable t) { - LOGGER.error("Error while updating profiles for segments and scoring that contains date relative conditions", t); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while updating profiles for segments and scoring that contains date relative conditions", e); + callback.fail(e.getMessage()); } } }; - long initialDelay = SchedulerServiceImpl.getTimeDiffInSeconds(dailyDateExprEvaluationHourUtc, ZonedDateTime.now(ZoneOffset.UTC)); - long period = TimeUnit.DAYS.toSeconds(taskExecutionPeriod); - LOGGER.info("daily recalculation job for segments and scoring that contains date relative conditions will run at fixed rate, " + - "initialDelay={}, taskExecutionPeriod={} in seconds", initialDelay, period); - this.recalculatePastEventConditionsTaskId = schedulerService.createRecurringTask("recalculatePastEventConditions", period, TimeUnit.SECONDS, task, false).getItemId(); + TaskExecutor refreshSegmentAndScoringDefinitionsTaskExecutor = new TaskExecutor() { + @Override + public String getTaskType() { + return REFRESH_SEGMENT_AND_SCORING_DEFINITIONS_TASK_TYPE; + } - task = new TimerTask() { @Override - public void run() { + public void execute(ScheduledTask task, TaskExecutor.TaskStatusCallback callback) { try { allSegments = getAllSegmentDefinitions(); allScoring = getAllScoringDefinitions(); - } catch (Throwable t) { - LOGGER.error("Error while loading segments and scoring definitions from persistence back-end", t); + callback.complete(); + } catch (Exception e) { + LOGGER.error("Error while loading segments and scoring definitions from persistence back-end", e); + callback.fail(e.getMessage()); } } }; - this.refreshSegmentAndScoringDefinitionsTaskId = schedulerService.createRecurringTask("refreshSegmentAndScoringDefinitions", segmentRefreshInterval, TimeUnit.MILLISECONDS, task, false).getItemId(); + + schedulerService.registerTaskExecutor(recalculatePastEventConditionsTaskExecutor); + schedulerService.registerTaskExecutor(refreshSegmentAndScoringDefinitionsTaskExecutor); + + this.resetTimers(); + + long initialDelay = SchedulerServiceImpl.getTimeDiffInSeconds(dailyDateExprEvaluationHourUtc, ZonedDateTime.now(ZoneOffset.UTC)); + long period = TimeUnit.DAYS.toSeconds(taskExecutionPeriod); + LOGGER.info("daily recalculation job for segments and scoring that contains date relative conditions will run at fixed rate, " + + "initialDelay={}, taskExecutionPeriod={} in seconds", initialDelay, period); + this.recalculatePastEventConditionsTaskId = schedulerService.newTask(RECALCULATE_PAST_EVENT_CONDITIONS_TASK_TYPE) + .withInitialDelay(initialDelay, TimeUnit.SECONDS) + .withPeriod(period, TimeUnit.SECONDS) + .nonPersistent() + .schedule().getItemId(); + this.refreshSegmentAndScoringDefinitionsTaskId = schedulerService.newTask(REFRESH_SEGMENT_AND_SCORING_DEFINITIONS_TASK_TYPE) + .withPeriod(segmentRefreshInterval, TimeUnit.MILLISECONDS) + .nonPersistent() + .schedule().getItemId(); } private void resetTimers() {