feat(plan): enforce strict read-only policy and halt execution on violation (google-gemini#16849)

jerop · web-flow · commit 524117482724 · 2026-01-16T17:56:48.000Z
diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts
@@ -287,6 +287,43 @@ describe('Policy Engine Integration Tests', () => {
       ).toBe(PolicyDecision.ASK_USER);
     });
 
+    it('should handle Plan mode correctly', async () => {
+      const settings: Settings = {};
+
+      const config = await createPolicyEngineConfig(
+        settings,
+        ApprovalMode.PLAN,
+      );
+      const engine = new PolicyEngine(config);
+
+      // Read and search tools should be allowed
+      expect(
+        (await engine.check({ name: 'read_file' }, undefined)).decision,
+      ).toBe(PolicyDecision.ALLOW);
+      expect(
+        (await engine.check({ name: 'google_web_search' }, undefined)).decision,
+      ).toBe(PolicyDecision.ALLOW);
+      expect(
+        (await engine.check({ name: 'list_directory' }, undefined)).decision,
+      ).toBe(PolicyDecision.ALLOW);
+
+      // Other tools should be denied via catch all
+      expect(
+        (await engine.check({ name: 'replace' }, undefined)).decision,
+      ).toBe(PolicyDecision.DENY);
+      expect(
+        (await engine.check({ name: 'write_file' }, undefined)).decision,
+      ).toBe(PolicyDecision.DENY);
+      expect(
+        (await engine.check({ name: 'run_shell_command' }, undefined)).decision,
+      ).toBe(PolicyDecision.DENY);
+
+      // Unknown tools should be denied via catch-all
+      expect(
+        (await engine.check({ name: 'unknown_tool' }, undefined)).decision,
+      ).toBe(PolicyDecision.DENY);
+    });
+
     it('should verify priority ordering works correctly in practice', async () => {
       const settings: Settings = {
         tools: {
diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts
@@ -7,7 +7,10 @@
 import { describe, it, expect, vi } from 'vitest';
 import type { Mock } from 'vitest';
 import type { CallableTool } from '@google/genai';
-import { CoreToolScheduler } from './coreToolScheduler.js';
+import {
+  CoreToolScheduler,
+  PLAN_MODE_DENIAL_MESSAGE,
+} from './coreToolScheduler.js';
 import type {
   ToolCall,
   WaitingToolCall,
@@ -32,6 +35,7 @@ import {
   ApprovalMode,
   HookSystem,
   PolicyDecision,
+  ToolErrorType,
 } from '../index.js';
 import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
 import {
@@ -2078,4 +2082,53 @@ describe('CoreToolScheduler Sequential Execution', () => {
 
     expect(onAllToolCallsComplete).toHaveBeenCalledTimes(1);
   });
+
+  describe('Policy Decisions in Plan Mode', () => {
+    it('should return STOP_EXECUTION error type and informative message when denied in Plan Mode', async () => {
+      const mockTool = new MockTool({
+        name: 'dangerous_tool',
+        displayName: 'Dangerous Tool',
+        description: 'Does risky stuff',
+      });
+      const mockToolRegistry = {
+        getTool: () => mockTool,
+        getAllToolNames: () => ['dangerous_tool'],
+      } as unknown as ToolRegistry;
+
+      const onAllToolCallsComplete = vi.fn();
+
+      const mockConfig = createMockConfig({
+        getToolRegistry: () => mockToolRegistry,
+        getApprovalMode: () => ApprovalMode.PLAN,
+        getPolicyEngine: () =>
+          ({
+            check: async () => ({ decision: PolicyDecision.DENY }),
+          }) as unknown as PolicyEngine,
+      });
+
+      const scheduler = new CoreToolScheduler({
+        config: mockConfig,
+        onAllToolCallsComplete,
+        getPreferredEditor: () => 'vscode',
+      });
+
+      const request = {
+        callId: 'call-1',
+        name: 'dangerous_tool',
+        args: {},
+        isClientInitiated: false,
+        prompt_id: 'prompt-1',
+      };
+
+      await scheduler.schedule(request, new AbortController().signal);
+
+      expect(onAllToolCallsComplete).toHaveBeenCalledTimes(1);
+      const reportedTools = onAllToolCallsComplete.mock.calls[0][0];
+      const result = reportedTools[0];
+
+      expect(result.status).toBe('error');
+      expect(result.response.errorType).toBe(ToolErrorType.STOP_EXECUTION);
+      expect(result.response.error.message).toBe(PLAN_MODE_DENIAL_MESSAGE);
+    });
+  });
 });
diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts
@@ -14,7 +14,7 @@ import {
 } from '../tools/tools.js';
 import type { EditorType } from '../utils/editor.js';
 import type { Config } from '../config/config.js';
-import { PolicyDecision } from '../policy/types.js';
+import { PolicyDecision, ApprovalMode } from '../policy/types.js';
 import { logToolCall } from '../telemetry/loggers.js';
 import { ToolErrorType } from '../tools/tool-error.js';
 import { ToolCallEvent } from '../telemetry/types.js';
@@ -65,6 +65,9 @@ export type {
   ToolCallResponseInfo,
 };
 
+export const PLAN_MODE_DENIAL_MESSAGE =
+  'You are in Plan Mode - adjust your prompt to only use read and search tools.';
+
 const createErrorResponse = (
   request: ToolCallRequestInfo,
   error: Error,
@@ -603,16 +606,18 @@ export class CoreToolScheduler {
           .check(toolCallForPolicy, serverName);
 
         if (decision === PolicyDecision.DENY) {
-          const errorMessage = `Tool execution denied by policy.`;
+          let errorMessage = `Tool execution denied by policy.`;
+          let errorType = ToolErrorType.POLICY_VIOLATION;
+
+          if (this.config.getApprovalMode() === ApprovalMode.PLAN) {
+            errorMessage = PLAN_MODE_DENIAL_MESSAGE;
+            errorType = ToolErrorType.STOP_EXECUTION;
+          }
           this.setStatusInternal(
             reqInfo.callId,
             'error',
             signal,
-            createErrorResponse(
-              reqInfo,
-              new Error(errorMessage),
-              ToolErrorType.POLICY_VIOLATION,
-            ),
+            createErrorResponse(reqInfo, new Error(errorMessage), errorType),
           );
           await this.checkAndNotifyCompletion(signal);
           return;
diff --git a/packages/core/src/policy/policies/plan.toml b/packages/core/src/policy/policies/plan.toml
@@ -0,0 +1,76 @@
+# Priority system for policy rules:
+# - Higher priority numbers win over lower priority numbers
+# - When multiple rules match, the highest priority rule is applied
+# - Rules are evaluated in order of priority (highest first)
+#
+# Priority bands (tiers):
+# - Default policies (TOML): 1 + priority/1000 (e.g., priority 100 → 1.100)
+# - User policies (TOML): 2 + priority/1000 (e.g., priority 100 → 2.100)
+# - Admin policies (TOML): 3 + priority/1000 (e.g., priority 100 → 3.100)
+#
+# This ensures Admin > User > Default hierarchy is always preserved,
+# while allowing user-specified priorities to work within each tier.
+#
+# Settings-based and dynamic rules (all in user tier 2.x):
+#   2.95: Tools that the user has selected as "Always Allow" in the interactive UI
+#   2.9:  MCP servers excluded list (security: persistent server blocks)
+#   2.4:  Command line flag --exclude-tools (explicit temporary blocks)
+#   2.3:  Command line flag --allowed-tools (explicit temporary allows)
+#   2.2:  MCP servers with trust=true (persistent trusted servers)
+#   2.1:  MCP servers allowed list (persistent general server allows)
+#
+# TOML policy priorities (before transformation):
+#   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
+#   20: Plan mode catch-all DENY override (becomes 1.020 in default tier)
+#   50: Read-only tools (becomes 1.050 in default tier)
+#   999: YOLO mode allow-all (becomes 1.999 in default tier)
+
+# Catch-All: Deny everything by default in Plan mode.
+
+[[rule]]
+decision = "deny"
+priority = 20
+modes = ["plan"]
+
+# Explicitly Allow Read-Only Tools in Plan mode.
+
+[[rule]]
+toolName = "glob"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "search_file_content"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "list_directory"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "read_file"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "read_many_files"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "google_web_search"
+decision = "allow"
+priority = 50
+modes = ["plan"]
+
+[[rule]]
+toolName = "SubagentInvocation"
+decision = "allow"
+priority = 50