forked from google-gemini/gemini-cli
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinteractive-hang.eval.ts
More file actions
78 lines (74 loc) · 2.32 KB
/
Copy pathinteractive-hang.eval.ts
File metadata and controls
78 lines (74 loc) · 2.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
describe('interactive_commands', () => {
/**
* Validates that the agent does not use interactive commands unprompted.
* Interactive commands block the progress of the agent, requiring user
* intervention.
*/
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should not use interactive commands',
prompt: 'Execute tests.',
files: {
'package.json': JSON.stringify(
{
name: 'example',
type: 'module',
devDependencies: {
vitest: 'latest',
},
},
null,
2,
),
'example.test.js': `
import { test, expect } from 'vitest';
test('it works', () => {
expect(1 + 1).toBe(2);
});
`,
},
assert: async (rig, result) => {
const logs = rig.readToolLogs();
const vitestCall = logs.find(
(l) =>
l.toolRequest.name === 'run_shell_command' &&
l.toolRequest.args.toLowerCase().includes('vitest'),
);
expect(vitestCall, 'Agent should have called vitest').toBeDefined();
expect(
vitestCall?.toolRequest.args,
'Agent should have passed run arg',
).toMatch(/\b(run|--run)\b/);
},
});
/**
* Validates that the agent uses non-interactive flags when scaffolding a new project.
*/
evalTest('ALWAYS_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should use non-interactive flags when scaffolding a new app',
prompt: 'Create a new react application named my-app using vite.',
assert: async (rig, result) => {
const logs = rig.readToolLogs();
const scaffoldCall = logs.find(
(l) =>
l.toolRequest.name === 'run_shell_command' &&
/npm (init|create)|npx (.*)?create-|yarn create|pnpm create/.test(
l.toolRequest.args,
),
);
expect(
scaffoldCall,
'Agent should have called a scaffolding command (e.g., npm create)',
).toBeDefined();
expect(
scaffoldCall?.toolRequest.args,
'Agent should have passed a non-interactive flag (-y, --yes, or a specific --template)',
).toMatch(/(?:^|\s)(--yes|-y|--template\s+\S+)(?:\s|$|\\|")/);
},
});
});