Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
fix(learning): harden maintenance scheduler lifecycle
  • Loading branch information
Xinhua Gu committed Mar 13, 2026
commit 09d47166c938e032ae4308ba3d5175cee02557ab
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public final class LearningMaintenanceScheduler {
private final IntSupplier activeSessionCount;
private final LongSupplier largestMemoryFileBytes;
private final MaintenancePipeline pipeline;
private final Object lifecycleLock = new Object();

private final AtomicBoolean running = new AtomicBoolean(false);
private final AtomicBoolean maintenanceRunning = new AtomicBoolean(false);
Expand All @@ -57,17 +58,21 @@ public LearningMaintenanceScheduler(Config config,
}

public void start() {
if (!running.compareAndSet(false, true)) {
log.debug("LearningMaintenanceScheduler already running");
return;
synchronized (lifecycleLock) {
if (running.get()) {
log.debug("LearningMaintenanceScheduler already running");
return;
}
var nextScheduler = Executors.newSingleThreadScheduledExecutor(r -> {
var thread = new Thread(r, "learning-maintenance-scheduler");
thread.setDaemon(true);
return thread;
});
long tickMillis = Math.max(1, config.tickInterval().toMillis());
nextScheduler.scheduleAtFixedRate(this::tick, tickMillis, tickMillis, TimeUnit.MILLISECONDS);
scheduler = nextScheduler;
running.set(true);
}
scheduler = Executors.newSingleThreadScheduledExecutor(r -> {
var thread = new Thread(r, "learning-maintenance-scheduler");
thread.setDaemon(true);
return thread;
});
long tickMillis = Math.max(1, config.tickInterval().toMillis());
scheduler.scheduleAtFixedRate(this::tick, tickMillis, tickMillis, TimeUnit.MILLISECONDS);
log.info("LearningMaintenanceScheduler started: tick={}s timeTrigger={}h sessionTrigger={} sizeTrigger={}KB idleTrigger={}m",
config.tickInterval().toSeconds(),
config.timeTriggerInterval().toHours(),
Expand All @@ -77,18 +82,27 @@ public void start() {
}

public void stop() {
if (!running.compareAndSet(true, false)) {
return;
ScheduledExecutorService currentScheduler;
synchronized (lifecycleLock) {
if (!running.get()) {
return;
}
running.set(false);
currentScheduler = scheduler;
scheduler = null;
}
if (scheduler != null) {
scheduler.shutdown();
if (currentScheduler != null) {
currentScheduler.shutdown();
try {
if (!scheduler.awaitTermination(10, TimeUnit.SECONDS)) {
scheduler.shutdownNow();
if (!currentScheduler.awaitTermination(10, TimeUnit.SECONDS)) {
currentScheduler.shutdownNow();
}
} catch (InterruptedException e) {
scheduler.shutdownNow();
currentScheduler.shutdownNow();
Thread.currentThread().interrupt();
} finally {
maintenanceRunning.set(false);
sessionsSinceLastRun.set(0);
}
}
log.info("LearningMaintenanceScheduler stopped");
Expand All @@ -98,6 +112,10 @@ public boolean isRunning() {
return running.get();
}

boolean isMaintenanceRunningForTest() {
return maintenanceRunning.get();
}

public void onSessionClosed() {
if (!running.get()) {
return;
Expand Down Expand Up @@ -135,12 +153,13 @@ private void tryTrigger(Trigger trigger) {
if (!maintenanceRunning.compareAndSet(false, true)) {
return;
}
int sessionsAtStart = sessionsSinceLastRun.get();

Thread.ofVirtual().name("learning-maintenance-" + trigger.id()).start(() -> {
try {
pipeline.run(trigger.id());
lastRunAt = clock.instant();
sessionsSinceLastRun.set(0);
sessionsSinceLastRun.updateAndGet(current -> Math.max(0, current - sessionsAtStart));
if (trigger == Trigger.SIZE_THRESHOLD) {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
sizeTriggerArmed = false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

Expand Down Expand Up @@ -121,6 +122,51 @@ void sessionCloseDoesNotTriggerWhileStopped() throws Exception {
assertThat(triggers).isEmpty();
}

@Test
void sessionsClosedDuringMaintenanceArePreserved() throws Exception {
var clock = new MutableClock(Instant.parse("2026-03-13T10:00:00Z"));
var activeSessions = new AtomicInteger(0);
var memoryBytes = new AtomicLong(0);
var triggers = Collections.synchronizedList(new ArrayList<String>());
var started = new CountDownLatch(1);
var release = new CountDownLatch(1);
var scheduler = new LearningMaintenanceScheduler(
new LearningMaintenanceScheduler.Config(
Duration.ofHours(6),
2,
50L * 1024L,
Duration.ofMinutes(5),
Duration.ofDays(1)),
clock,
activeSessions::get,
memoryBytes::get,
trigger -> {
triggers.add(trigger);
started.countDown();
release.await();
}
);
scheduler.start();
try {
scheduler.onSessionClosed();
scheduler.onSessionClosed();
assertThat(started.await(2, java.util.concurrent.TimeUnit.SECONDS)).isTrue();
assertThat(triggers).containsExactly("session-count");

scheduler.onSessionClosed();

release.countDown();
waitForMaintenanceToSettle(scheduler);

scheduler.onSessionClosed();
waitForTriggers(triggers, 2);

assertThat(triggers).containsExactly("session-count", "session-count");
} finally {
scheduler.stop();
}
}

private static LearningMaintenanceScheduler scheduler(MutableClock clock,
AtomicInteger activeSessions,
AtomicLong memoryBytes,
Expand Down Expand Up @@ -154,6 +200,17 @@ private static void waitForTriggers(List<String> triggers, int expected) throws
throw new AssertionError("Timed out waiting for trigger count " + expected + ", got " + triggers.size());
}

private static void waitForMaintenanceToSettle(LearningMaintenanceScheduler scheduler) throws InterruptedException {
long deadline = System.currentTimeMillis() + 2_000;
while (System.currentTimeMillis() < deadline) {
if (!scheduler.isMaintenanceRunningForTest()) {
return;
}
Thread.sleep(20);
}
throw new AssertionError("Timed out waiting for maintenance to settle");
}

private static final class MutableClock extends Clock {
private Instant instant;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ private static long fileSize(Path file) {
try {
return Files.exists(file) ? Files.size(file) : 0L;
} catch (IOException e) {
log.debug("Failed to read memory file size for {}: {}", file, e.getMessage());
return 0L;
}
Comment on lines +347 to 353

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Avoid silently swallowing file-size read failures

At Line 350, returning 0L on IOException without logging hides permission/disk issues and can silently disable size-threshold maintenance behavior.

💡 Suggested fix (add low-noise logging)
     private static long fileSize(Path file) {
         try {
             return Files.exists(file) ? Files.size(file) : 0L;
         } catch (IOException e) {
+            log.warn("Failed to read backing file size for {}: {}", file, e.getMessage());
             return 0L;
         }
     }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
private static long fileSize(Path file) {
try {
return Files.exists(file) ? Files.size(file) : 0L;
} catch (IOException e) {
return 0L;
}
private static long fileSize(Path file) {
try {
return Files.exists(file) ? Files.size(file) : 0L;
} catch (IOException e) {
log.warn("Failed to read backing file size for {}: {}", file, e.getMessage());
return 0L;
}
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@aceclaw-memory/src/main/java/dev/aceclaw/memory/AutoMemoryStore.java` around
lines 347 - 352, The fileSize(Path file) method in AutoMemoryStore is silently
swallowing IOExceptions and returning 0L; change it to catch IOException, log a
low-noise message including the file path and the exception (e.g.,
LOGGER.debug/trace or LOGGER.warn with low frequency) so permission/disk errors
are visible, then return 0L as before; ensure you reference the existing logger
instance in AutoMemoryStore (or add one if missing) and include the exception
object in the log call for full stack/causal info.

}
Expand Down
Loading