Skip to content
Merged
Prev Previous commit
Next Next commit
fix(skill): track dynamic runtime generation lifecycle
  • Loading branch information
Xinhua Gu committed Mar 13, 2026
commit d8300a8c66ba6c0dd8869d502696b207973ac64d
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package dev.aceclaw.core.planner;

import dev.aceclaw.core.llm.Message;

import java.util.List;
import java.util.Objects;

Expand All @@ -8,13 +10,15 @@
*
* @param plan the final plan state (with updated step statuses)
* @param stepResults result of each step attempted (may be fewer than plan.steps() if aborted)
* @param messages all messages produced while executing the plan
* @param totalDurationMs wall-clock time for the entire plan execution
* @param success whether all attempted steps completed successfully
* @param totalTokensUsed total tokens consumed across all steps
*/
public record PlanExecutionResult(
TaskPlan plan,
List<StepResult> stepResults,
List<Message> messages,
long totalDurationMs,
boolean success,
int totalTokensUsed
Expand All @@ -23,5 +27,6 @@ public record PlanExecutionResult(
public PlanExecutionResult {
Objects.requireNonNull(plan, "plan");
stepResults = stepResults != null ? List.copyOf(stepResults) : List.of();
messages = messages != null ? List.copyOf(messages) : List.of();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ public PlanExecutionResult execute(
long planStart = System.currentTimeMillis();
var allMessages = new ArrayList<>(
conversationHistory != null ? conversationHistory : Collections.<Message>emptyList());
var generatedMessages = new ArrayList<Message>();
var stepResults = new ArrayList<StepResult>();
var mutablePlan = plan.withStatus(new PlanStatus.Executing(0, plan.steps().size()));
boolean allSuccess = true;
Expand Down Expand Up @@ -185,6 +186,7 @@ public PlanExecutionResult execute(
try {
var turn = agentLoop.runTurn(stepPrompt, allMessages, handler, cancellationToken);
allMessages.addAll(turn.newMessages());
generatedMessages.addAll(turn.newMessages());

var usage = turn.totalUsage();
var result = new StepResult(
Expand Down Expand Up @@ -241,6 +243,7 @@ public PlanExecutionResult execute(
var fallbackTurn = agentLoop.runTurn(
fallbackPrompt, allMessages, handler, cancellationToken);
allMessages.addAll(fallbackTurn.newMessages());
generatedMessages.addAll(fallbackTurn.newMessages());

var fbUsage = fallbackTurn.totalUsage();
var fallbackResult = new StepResult(
Expand Down Expand Up @@ -396,7 +399,13 @@ public PlanExecutionResult execute(
log.info("Plan execution finished: success={}, steps={}/{}, duration={}ms, tokens={}",
allSuccess, stepResults.size(), plan.steps().size(), totalDuration, totalTokens);

return new PlanExecutionResult(mutablePlan, stepResults, totalDuration, allSuccess, totalTokens);
return new PlanExecutionResult(
mutablePlan,
stepResults,
generatedMessages,
totalDuration,
allSuccess,
totalTokens);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ private void wireAgentHandler(Path workingDir) {
}
if (runtimeSkillGeneratorForSessionEnd != null) {
try {
agentHandlerForCleanup.awaitSessionPostProcessing(session.id());
int persistedDrafts = runtimeSkillGeneratorForSessionEnd.persistDrafts(session.id(), sessionWorkingDir);
if (persistedDrafts > 0) {
log.info("Persisted {} runtime skill drafts for session {}", persistedDrafts, session.id());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,13 @@ public java.util.Optional<SkillConfig> maybeGenerate(String sessionId,
return java.util.Optional.empty();
}

var draft = generateDraft(sessionId, projectPath, allowedTools, sessionHistory);
final RuntimeSkillDraft draft;
try {
draft = generateDraft(sessionId, projectPath, allowedTools, sessionHistory);
} catch (IllegalArgumentException e) {
log.debug("Skipping runtime skill generation for session {}: {}", sessionId, e.getMessage());
return java.util.Optional.empty();
}
String skillName = resolveRuntimeName(sessionId, draft.name(), state);
var config = new SkillConfig(
skillName,
Expand Down Expand Up @@ -199,10 +205,10 @@ private RuntimeSkillDraft generateDraft(String sessionId,
List<String> allowedTools,
List<AgentSession.ConversationMessage> sessionHistory) {
try {
return proposeDraft(sessionId, projectPath, allowedTools, sessionHistory);
return sanitizeDraft(proposeDraft(sessionId, projectPath, allowedTools, sessionHistory));
} catch (Exception e) {
log.warn("Dynamic runtime skill generation fell back to template: {}", e.getMessage());
return fallbackDraft(allowedTools, sessionHistory);
return sanitizeDraft(fallbackDraft(allowedTools, sessionHistory));
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

Expand Down Expand Up @@ -271,19 +277,34 @@ private RuntimeSkillDraft parseDraft(String text) throws LlmException {
if (description == null || body == null) {
throw new LlmException("Runtime skill response missing description/body");
}
if (BASH_MENTION.matcher(body).find()) {
throw new LlmException("Runtime skill response contains disallowed bash content");
}
return new RuntimeSkillDraft(
return sanitizeDraft(new RuntimeSkillDraft(
name != null ? name : "runtime-workflow",
description,
argumentHint,
body);
body));
} catch (IOException e) {
throw new LlmException("Failed to parse runtime skill response: " + e.getMessage());
}
}

private static RuntimeSkillDraft sanitizeDraft(RuntimeSkillDraft draft) {
Objects.requireNonNull(draft, "draft");
var description = trimToNull(draft.description());
var body = trimToNull(draft.body());
if (description == null || body == null) {
throw new IllegalArgumentException("Runtime skill draft missing description/body");
}
if (BASH_MENTION.matcher(body).find()) {
throw new IllegalArgumentException("Runtime skill draft contains disallowed bash content");
}
var name = trimToNull(draft.name());
return new RuntimeSkillDraft(
name != null ? name : "runtime-workflow",
description,
trimToNull(draft.argumentHint()),
body);
}

private static String extractJson(String text) {
var matcher = CODE_FENCE.matcher(text);
return matcher.find() ? matcher.group(1) : text.trim();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ public final class StreamingAgentHandler {
new ConcurrentHashMap<>();
private final ConcurrentHashMap<String, List<String>> sessionInjectedCandidateIds =
new ConcurrentHashMap<>();
private final ConcurrentHashMap<String, CompletableFuture<Void>> sessionPostProcessing =
new ConcurrentHashMap<>();
private final ConcurrentHashMap<Path, SkillOutcomeTracker> projectSkillTrackers =
new ConcurrentHashMap<>();
private final ConcurrentHashMap<String, ReentrantLock> skillOutcomeLocks =
Expand Down Expand Up @@ -271,7 +273,7 @@ private Object handlePrompt(JsonNode params, StreamContext context) throws Excep
if (planCheckpointStore != null) {
var resumeResult = tryResumeFromCheckpoint(
sessionId, session, cancelContext, eventHandler,
permissionAwareLoop, cancellationToken, metricsCollector, watchdog);
permissionAwareLoop, cancellationToken, metricsCollector, watchdog, requestToolNames);
if (resumeResult != null) {
sendBudgetExhaustedNotificationIfNeeded(watchdog, cancelContext, sessionId, cancellationToken);
return resumeResult;
Expand Down Expand Up @@ -499,6 +501,13 @@ public void onPlanEscalated(TaskPlan escalatedPlan, String reason) {
}

var plannedStopReason = planResult.success() ? StopReason.END_TURN : StopReason.ERROR;
schedulePostRequestLearning(
sessionId,
session.projectPath(),
syntheticTurn(planResult, plannedStopReason),
List.copyOf(session.messages()),
metricsCollector != null ? metricsCollector.allMetrics() : Map.of(),
requestToolNames);
recordInjectedCandidateOutcomes(
sessionId, planResult.success(), cancellationToken.isCancelled(), plannedStopReason);

Expand Down Expand Up @@ -557,33 +566,13 @@ private Object buildTurnResult(dev.aceclaw.core.agent.Turn turn, AgentSession se
logTurnToJournal(prompt, turn);
}

// Self-improvement
if (selfImprovementEngine != null) {
final var turnRef = turn;
final var historyRef = List.copyOf(session.messages());
final var sessionIdRef = sessionId;
final var projectPathRef = session.projectPath();
// Take an immutable snapshot of metrics before handing off to the virtual thread
final var metricsSnapshot = metricsCollector != null
? metricsCollector.allMetrics() : Map.<String, dev.aceclaw.core.agent.ToolMetrics>of();
var shortId = sessionId.length() > 8 ? sessionId.substring(0, 8) : sessionId;
Thread.ofVirtual().name("self-improve-" + shortId).start(() -> {
try {
var insights = selfImprovementEngine.analyze(turnRef, historyRef, metricsSnapshot);
if (!insights.isEmpty()) {
int persisted = selfImprovementEngine.persist(insights, sessionIdRef, projectPathRef);
log.debug("Self-improvement: {} insights analyzed, {} persisted (session={})",
insights.size(), persisted, sessionIdRef);
}
if (dynamicSkillGenerator != null) {
dynamicSkillGenerator.maybeGenerate(
sessionIdRef, projectPathRef, turnRef, historyRef, insights, requestToolNames);
}
} catch (Exception e) {
log.warn("Self-improvement analysis failed: {}", e.getMessage());
}
});
}
schedulePostRequestLearning(
sessionId,
session.projectPath(),
turn,
List.copyOf(session.messages()),
metricsCollector != null ? metricsCollector.allMetrics() : Map.of(),
requestToolNames);

recordInjectedCandidateOutcomes(
sessionId, turn.finalStopReason() != StopReason.ERROR, cancellationToken.isCancelled(),
Expand Down Expand Up @@ -633,6 +622,72 @@ private Object buildTurnResult(dev.aceclaw.core.agent.Turn turn, AgentSession se
return result;
}

private void schedulePostRequestLearning(String sessionId,
Path projectPath,
dev.aceclaw.core.agent.Turn turn,
List<AgentSession.ConversationMessage> sessionHistory,
Map<String, ToolMetrics> metricsSnapshot,
Set<String> requestToolNames) {
if ((selfImprovementEngine == null && dynamicSkillGenerator == null)
|| turn == null || turn.newMessages().isEmpty()) {
return;
}

var historyRef = List.copyOf(sessionHistory);
var metricsRef = metricsSnapshot == null ? Map.<String, ToolMetrics>of() : Map.copyOf(metricsSnapshot);
var toolNamesRef = requestToolNames == null ? Set.<String>of() : Set.copyOf(requestToolNames);

sessionPostProcessing.compute(sessionId, (ignored, previous) -> {
var base = previous == null
? CompletableFuture.<Void>completedFuture(null)
: previous.exceptionally(error -> null);
var next = base.thenRunAsync(
() -> runPostRequestLearning(sessionId, projectPath, turn, historyRef, metricsRef, toolNamesRef),
command -> Thread.ofVirtual().name("post-learn-" + shortenSessionId(sessionId)).start(command));
next.whenComplete((unused, error) -> sessionPostProcessing.remove(sessionId, next));
return next;
});
}

private void runPostRequestLearning(String sessionId,
Path projectPath,
dev.aceclaw.core.agent.Turn turn,
List<AgentSession.ConversationMessage> sessionHistory,
Map<String, ToolMetrics> metricsSnapshot,
Set<String> requestToolNames) {
var insights = List.<dev.aceclaw.memory.Insight>of();
if (selfImprovementEngine != null) {
try {
insights = selfImprovementEngine.analyze(turn, sessionHistory, metricsSnapshot);
if (!insights.isEmpty()) {
int persisted = selfImprovementEngine.persist(insights, sessionId, projectPath);
log.debug("Self-improvement: {} insights analyzed, {} persisted (session={})",
insights.size(), persisted, sessionId);
}
} catch (Exception e) {
log.warn("Self-improvement analysis failed: {}", e.getMessage());
}
}
if (dynamicSkillGenerator != null) {
try {
dynamicSkillGenerator.maybeGenerate(
sessionId, projectPath, turn, sessionHistory, insights, requestToolNames);
} catch (Exception e) {
log.warn("Dynamic runtime skill generation failed: {}", e.getMessage());
}
}
}

private dev.aceclaw.core.agent.Turn syntheticTurn(PlanExecutionResult planResult, StopReason stopReason) {
int totalInput = planResult.stepResults().stream().mapToInt(StepResult::inputTokens).sum();
int totalOutput = planResult.stepResults().stream().mapToInt(StepResult::outputTokens).sum();
return new dev.aceclaw.core.agent.Turn(planResult.messages(), stopReason, new dev.aceclaw.core.llm.Usage(totalInput, totalOutput));
}

private static String shortenSessionId(String sessionId) {
return sessionId.length() > 8 ? sessionId.substring(0, 8) : sessionId;
}

private void recordPromptSkillCorrections(String sessionId, Path projectPath, String prompt) {
var recent = sessionRecentSuccessfulSkills.remove(sessionId);
if (recent == null || recent.isEmpty() || !SessionEndExtractor.looksLikeCorrection(prompt)) {
Expand Down Expand Up @@ -1482,6 +1537,20 @@ public void clearSessionMetrics(String sessionId) {
sessionInjectedCandidateIds.remove(sessionId);
sessionDoomLoops.remove(sessionId);
sessionProgressDetectors.remove(sessionId);
sessionPostProcessing.remove(sessionId);
}

public void awaitSessionPostProcessing(String sessionId) {
Objects.requireNonNull(sessionId, "sessionId");
var future = sessionPostProcessing.get(sessionId);
if (future == null) {
return;
}
try {
future.get(30, TimeUnit.SECONDS);
} catch (Exception e) {
log.warn("Timed out waiting for post-request learning for {}: {}", sessionId, e.getMessage());
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

/**
Expand Down Expand Up @@ -1902,7 +1971,8 @@ private Object tryResumeFromCheckpoint(
String sessionId, AgentSession session,
StreamContext cancelContext, StreamEventHandler eventHandler,
StreamingAgentLoop permissionAwareLoop, CancellationToken cancellationToken,
ToolMetricsCollector metricsCollector, WatchdogTimer watchdog) throws Exception {
ToolMetricsCollector metricsCollector, WatchdogTimer watchdog,
Set<String> requestToolNames) throws Exception {

var resumeRouter = new ResumeRouter(planCheckpointStore);
var routeDecision = resumeRouter.route(sessionId, session.projectPath());
Expand Down Expand Up @@ -1933,7 +2003,8 @@ private Object tryResumeFromCheckpoint(

if (userAccepted && cp.hasRemainingSteps()) {
return executeResumedPlan(cp, session, sessionId, cancelContext,
eventHandler, permissionAwareLoop, cancellationToken, metricsCollector, watchdog);
eventHandler, permissionAwareLoop, cancellationToken, metricsCollector, watchdog,
requestToolNames);
}

// User declined or no remaining steps
Expand Down Expand Up @@ -1994,7 +2065,8 @@ private Object executeResumedPlan(
PlanCheckpoint cp, AgentSession session, String sessionId,
StreamContext cancelContext, StreamEventHandler eventHandler,
StreamingAgentLoop permissionAwareLoop, CancellationToken cancellationToken,
ToolMetricsCollector metricsCollector, WatchdogTimer watchdog) throws Exception {
ToolMetricsCollector metricsCollector, WatchdogTimer watchdog,
Set<String> requestToolNames) throws Exception {

// 1. Mark old checkpoint as RESUMED
planCheckpointStore.markResumed(cp.planId());
Expand Down Expand Up @@ -2185,6 +2257,13 @@ public void onPlanEscalated(TaskPlan escalatedPlan, String reason) {
}

var plannedStopReason = planResult.success() ? StopReason.END_TURN : StopReason.ERROR;
schedulePostRequestLearning(
sessionId,
session.projectPath(),
syntheticTurn(planResult, plannedStopReason),
List.copyOf(session.messages()),
metricsCollector != null ? metricsCollector.allMetrics() : Map.of(),
requestToolNames);
recordInjectedCandidateOutcomes(
sessionId, planResult.success(), cancellationToken.isCancelled(), plannedStopReason);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,29 @@ void rejectsGeneratedDraftThatMentionsBashInBody() {
assertThat(generated.orElseThrow().body()).doesNotContain("bash");
}

@Test
void skipsGenerationWhenFallbackDraftWouldMentionBash() {
mockLlm.enqueueSendMessageResponse(MockLlmClient.sendMessageTextResponse("""
{
"name": "bad-workflow",
"description": "Contains bash",
"argument_hint": "",
"body": "Run bash to inspect the repo."
}
"""));

var generated = generator.maybeGenerate(
"session-1",
workDir,
repeatedSequenceTurn("read_file", "grep", "edit_file"),
sessionHistory("Please use bash if needed."),
repeatedSequenceInsight(),
Set.of("read_file", "grep", "edit_file"));

assertThat(generated).isEmpty();
assertThat(skillRegistry.runtimeSkills("session-1")).isEmpty();
}

@Test
void skipsGenerationWhenObservedToolIsNotAllowedInSession() {
var generated = generator.maybeGenerate(
Expand Down
Loading