forked from google-gemini/gemini-cli
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathautomated-tool-use.eval.ts
More file actions
174 lines (166 loc) · 4.57 KB
/
Copy pathautomated-tool-use.eval.ts
File metadata and controls
174 lines (166 loc) · 4.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
describe('Automated tool use', () => {
/**
* Tests that the agent always utilizes --fix when calling eslint.
* We provide a 'lint' script in the package.json, which helps elicit
* a repro by guiding the agent into using the existing deficient script.
*/
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should use automated tools (eslint --fix) to fix code style issues',
files: {
'package.json': JSON.stringify(
{
name: 'typescript-project',
version: '1.0.0',
type: 'module',
scripts: {
lint: 'eslint .',
},
devDependencies: {
eslint: '^9.0.0',
globals: '^15.0.0',
typescript: '^5.0.0',
'typescript-eslint': '^8.0.0',
'@eslint/js': '^9.0.0',
},
},
null,
2,
),
'eslint.config.js': `
import globals from "globals";
import pluginJs from "@eslint/js";
import tseslint from "typescript-eslint";
export default [
{
files: ["**/*.{js,mjs,cjs,ts}"],
languageOptions: {
globals: globals.node
}
},
pluginJs.configs.recommended,
...tseslint.configs.recommended,
{
rules: {
"prefer-const": "error",
"@typescript-eslint/no-unused-vars": "off"
}
}
];
`,
'src/app.ts': `
export function main() {
let count = 10;
console.log(count);
}
`,
},
prompt:
'Fix the linter errors in this project. Make sure to avoid interactive commands.',
assert: async (rig) => {
// Check if run_shell_command was used with --fix
const toolCalls = rig.readToolLogs();
const shellCommands = toolCalls.filter(
(call) => call.toolRequest.name === 'run_shell_command',
);
const hasFixCommand = shellCommands.some((call) => {
let args = call.toolRequest.args;
if (typeof args === 'string') {
try {
args = JSON.parse(args);
} catch (e) {
return false;
}
}
const cmd = (args as any)['command'];
return (
cmd &&
(cmd.includes('eslint') || cmd.includes('npm run lint')) &&
cmd.includes('--fix')
);
});
expect(
hasFixCommand,
'Expected agent to use eslint --fix via run_shell_command',
).toBe(true);
},
});
/**
* Tests that the agent uses prettier --write to fix formatting issues in files
* instead of trying to edit the files itself.
*/
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should use automated tools (prettier --write) to fix formatting issues',
files: {
'package.json': JSON.stringify(
{
name: 'typescript-project',
version: '1.0.0',
type: 'module',
scripts: {},
devDependencies: {
prettier: '^3.0.0',
typescript: '^5.0.0',
},
},
null,
2,
),
'.prettierrc': JSON.stringify(
{
semi: true,
singleQuote: true,
},
null,
2,
),
'src/app.ts': `
export function main() {
const data={ name:'test',
val:123
}
console.log(data)
}
`,
},
prompt:
'Fix the formatting errors in this project. Make sure to avoid interactive commands.',
assert: async (rig) => {
// Check if run_shell_command was used with --write
const toolCalls = rig.readToolLogs();
const shellCommands = toolCalls.filter(
(call) => call.toolRequest.name === 'run_shell_command',
);
const hasFixCommand = shellCommands.some((call) => {
let args = call.toolRequest.args;
if (typeof args === 'string') {
try {
args = JSON.parse(args);
} catch (e) {
return false;
}
}
const cmd = (args as any)['command'];
return (
cmd &&
cmd.includes('prettier') &&
(cmd.includes('--write') || cmd.includes('-w'))
);
});
expect(
hasFixCommand,
'Expected agent to use prettier --write via run_shell_command',
).toBe(true);
},
});
});