xinhuagu · xinhuagu · May 1, 2026 · May 1, 2026 · qodo-code-review · May 1, 2026
diff --git a/aceclaw-core/src/main/java/dev/aceclaw/core/planner/ComplexityEstimator.java b/aceclaw-core/src/main/java/dev/aceclaw/core/planner/ComplexityEstimator.java
@@ -15,12 +15,15 @@ public final class ComplexityEstimator {
 
     /**
      * Default threshold for the no-arg constructor. Mirrors
-     * {@code AceClawConfig.DEFAULT_PLANNER_THRESHOLD} — keep the two
-     * in sync. Lowered from 5 to 3 because single-signal compound
-     * prompts ("refactor X", "extract Y", "do A and then B") are
-     * already plannable but were never triggering the planner.
+     * {@code AceClawConfig.DEFAULT_PLANNER_THRESHOLD} — keep the
+     * two in sync. Settled at 4: too low (3) made every single
+     * "refactor X" / "extract Y" trigger a planner LLM call even on
+     * trivial prompts; too high (5) required two explicit signals
+     * which most prompts didn't hit. At 4, a single +3 signal alone
+     * stays as plain ReAct, but adding ANY second signal flips on
+     * planning.
      */
-    private static final int DEFAULT_THRESHOLD = 3;
+    private static final int DEFAULT_THRESHOLD = 4;
 
     // -- Heuristic patterns ---------------------------------------------------
 

diff --git a/aceclaw-core/src/test/java/dev/aceclaw/core/planner/ComplexityEstimatorTest.java b/aceclaw-core/src/test/java/dev/aceclaw/core/planner/ComplexityEstimatorTest.java
@@ -40,15 +40,31 @@ void multipleFiles_detected() {
     }
 
     @Test
-    void refactoring_highScore() {
+    void refactoring_singleSignalDoesNotPlan_atDefaultThreshold() {
+        // The signal IS detected and contributes its 3 points, but at
+        // the default threshold (4) a single +3 signal alone is not
+        // enough to invoke the planner — the planner adds a real
+        // LLM-call cost, and "refactor X" can mean trivially small
+        // work (REFACTORING regex matches "extract" too, etc.).
+        // Users who want planning on a borderline single-signal prompt
+        // can use the /plan slash command (forcePlan) instead.
         var score = estimator.estimate("Refactor the authentication module");
         assertTrue(score.signals().contains("refactoring"));
-        assertTrue(score.score() >= 3);
-        // After the threshold drop (5 → 3), a single 'refactoring'
-        // signal is enough to plan. Pinning so a future revert can't
-        // silently restore the old "single signal never plans"
-        // behavior without updating this test.
-        assertTrue(score.shouldPlan());
+        assertEquals(3, score.score());
+        assertFalse(score.shouldPlan());
+    }
+
+    @Test
+    void refactoring_plusSecondSignal_plans() {
+        // Adding ANY second signal pushes a +3 prompt over threshold 4.
+        // Pins the rule "single +3 signal → no plan; +3 with anything
+        // else → plan" so future threshold tweaks have to reckon with
+        // the assertion explicitly.
+        var withTesting = estimator.estimate("Refactor the auth module and add tests");
+        assertTrue(withTesting.signals().contains("refactoring"));
+        assertTrue(withTesting.signals().contains("testing"));
+        assertTrue(withTesting.score() >= 4);
+        assertTrue(withTesting.shouldPlan());
     }
 
     @Test

diff --git a/aceclaw-daemon/src/main/java/dev/aceclaw/daemon/AceClawConfig.java b/aceclaw-daemon/src/main/java/dev/aceclaw/daemon/AceClawConfig.java
@@ -75,17 +75,27 @@ public final class AceClawConfig {
     private static final boolean DEFAULT_HEARTBEAT_ENABLED = true;
     private static final boolean DEFAULT_PLANNER_ENABLED = true;
     /**
-     * Default complexity score for triggering the planner. Lowered
-     * from 5 → 3 so single-signal compound prompts ("refactor X",
-     * "rename across", "do A and then B") trigger a plan instead of
-     * being treated as plain ReAct turns. Empirically, threshold=5
-     * required two explicit signals which most everyday agentic
-     * prompts don't hit, so the planner essentially never fired for
-     * typical work. See {@link ComplexityEstimator} for the score
-     * table. Users can still override via config to restore older
-     * behavior.
-     */
-    private static final int DEFAULT_PLANNER_THRESHOLD = 3;
+     * Default complexity score for triggering the planner. Bumped
+     * from 5 → 4 (initially landed at 3, dialled back after review).
+     *
+     * <p>Threshold 5 required two explicit signals — most everyday
+     * agentic prompts hit at most one, so the planner essentially
+     * never fired. Threshold 3 went too far the other way: every
+     * single "refactor X" / "extract Y" (REFACTORING regex matches
+     * "extract" too) triggered a planner LLM call before any actual
+     * work, even on trivial prompts.
+     *
+     * <p>Threshold 4 is the middle ground: single-signal +3 prompts
+     * ("refactor X" alone) stay as plain ReAct, but adding ANY
+     * second signal (a long description, a second action, multiple
+     * files, testing, …) flips on planning. Users who explicitly
+     * want the planner on a borderline prompt can use
+     * {@code /plan <prompt>} as the escape hatch — that bypasses
+     * this heuristic entirely.
+     *
+     * <p>See {@link ComplexityEstimator} for the score table.
+     */
+    private static final int DEFAULT_PLANNER_THRESHOLD = 4;
     private static final boolean DEFAULT_ADAPTIVE_REPLAN_ENABLED = true;
     private static final boolean DEFAULT_CANDIDATE_INJECTION_ENABLED = true;
     private static final boolean DEFAULT_CANDIDATE_PROMOTION_ENABLED = true;