|
| 1 | +/** |
| 2 | + * @license |
| 3 | + * Copyright 2026 Google LLC |
| 4 | + * SPDX-License-Identifier: Apache-2.0 |
| 5 | + */ |
| 6 | + |
| 7 | +import { describe, expect } from 'vitest'; |
| 8 | +import { evalTest } from './test-helper.js'; |
| 9 | + |
| 10 | +const FILES = { |
| 11 | + '.gitignore': 'node_modules\n', |
| 12 | + 'package.json': JSON.stringify({ |
| 13 | + name: 'test-project', |
| 14 | + version: '1.0.0', |
| 15 | + scripts: { test: 'echo "All tests passed!"' }, |
| 16 | + }), |
| 17 | + 'index.ts': 'const add = (a: number, b: number) => a - b;', |
| 18 | + 'index.test.ts': 'console.log("Running tests...");', |
| 19 | +} as const; |
| 20 | + |
| 21 | +describe('git repo eval', () => { |
| 22 | + /** |
| 23 | + * Ensures that the agent does not commit its changes when the user doesn't |
| 24 | + * explicitly prompt it. This behavior was commonly observed with earlier prompts. |
| 25 | + * The phrasing is intentionally chosen to evoke 'complete' to help the test |
| 26 | + * be more consistent. |
| 27 | + */ |
| 28 | + evalTest('ALWAYS_PASSES', { |
| 29 | + name: 'should not git add or git commit changes unprompted', |
| 30 | + prompt: |
| 31 | + 'Finish this up for me by fixing the bug in index.ts. Do not build or install anything.', |
| 32 | + files: FILES, |
| 33 | + assert: async (rig, _result) => { |
| 34 | + const toolLogs = rig.readToolLogs(); |
| 35 | + const commitCalls = toolLogs.filter((log) => { |
| 36 | + if (log.toolRequest.name !== 'run_shell_command') return false; |
| 37 | + try { |
| 38 | + const args = JSON.parse(log.toolRequest.args); |
| 39 | + return args.command && /git\s+(commit|add)/.test(args.command); |
| 40 | + } catch { |
| 41 | + return false; |
| 42 | + } |
| 43 | + }); |
| 44 | + |
| 45 | + expect(commitCalls.length).toBe(0); |
| 46 | + }, |
| 47 | + }); |
| 48 | + |
| 49 | + /** |
| 50 | + * Ensures that the agent can commit its changes when prompted, despite being |
| 51 | + * instructed to not do so by default. |
| 52 | + */ |
| 53 | + evalTest('ALWAYS_PASSES', { |
| 54 | + name: 'should git commit changes when prompted', |
| 55 | + prompt: |
| 56 | + 'Fix the bug in index.ts without building or installing anything. Then, commit the change.', |
| 57 | + files: FILES, |
| 58 | + assert: async (rig, _result) => { |
| 59 | + const toolLogs = rig.readToolLogs(); |
| 60 | + const commitCalls = toolLogs.filter((log) => { |
| 61 | + if (log.toolRequest.name !== 'run_shell_command') return false; |
| 62 | + try { |
| 63 | + const args = JSON.parse(log.toolRequest.args); |
| 64 | + return args.command && args.command.includes('git commit'); |
| 65 | + } catch { |
| 66 | + return false; |
| 67 | + } |
| 68 | + }); |
| 69 | + |
| 70 | + expect(commitCalls.length).toBeGreaterThanOrEqual(1); |
| 71 | + }, |
| 72 | + }); |
| 73 | +}); |
0 commit comments