forked from google-gemini/gemini-cli
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfrugalSearch.eval.ts
More file actions
90 lines (83 loc) · 2.87 KB
/
Copy pathfrugalSearch.eval.ts
File metadata and controls
90 lines (83 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect } from 'vitest';
import { evalTest } from './test-helper.js';
/**
* Evals to verify that the agent uses search tools efficiently (frugally)
* by utilizing limiting parameters like `limit` and `max_matches_per_file`.
* This ensures the agent doesn't flood the context window with unnecessary search results.
*/
describe('Frugal Search', () => {
/**
* Ensure that the agent makes use of either grep or ranged reads in fulfilling this task.
* The task is specifically phrased to not evoke "view" or "search" specifically because
* the model implicitly understands that such tasks are searches. This covers the case of
* an unexpectedly large file benefitting from frugal approaches to viewing, like grep, or
* ranged reads.
*/
evalTest('USUALLY_PASSES', {
suiteName: 'default',
suiteType: 'behavioral',
name: 'should use grep or ranged read for large files',
prompt: 'What year was legacy_processor.ts written?',
files: {
'src/utils.ts': 'export const add = (a, b) => a + b;',
'src/types.ts': 'export type ID = string;',
'src/legacy_processor.ts': [
'// Copyright 2005 Legacy Systems Inc.',
...Array.from(
{ length: 5000 },
(_, i) =>
`// Legacy code block ${i} - strictly preserved for backward compatibility`,
),
].join('\n'),
'README.md': '# Project documentation',
},
assert: async (rig) => {
const toolCalls = rig.readToolLogs();
const getParams = (call: any) => {
let args = call.toolRequest.args;
if (typeof args === 'string') {
try {
args = JSON.parse(args);
} catch (e) {
// Ignore parse errors
}
}
return args;
};
// Check for wasteful full file reads
const fullReads = toolCalls.filter((call) => {
if (call.toolRequest.name !== 'read_file') return false;
const args = getParams(call);
return (
args.file_path === 'src/legacy_processor.ts' &&
(args.end_line === undefined || args.end_line === null)
);
});
expect(
fullReads.length,
'Agent should not attempt to read the entire large file at once',
).toBe(0);
// Check that it actually tried to find it using appropriate tools
const validAttempts = toolCalls.filter((call) => {
const args = getParams(call);
if (call.toolRequest.name === 'grep_search') {
return true;
}
if (
call.toolRequest.name === 'read_file' &&
args.file_path === 'src/legacy_processor.ts' &&
args.end_line !== undefined
) {
return true;
}
return false;
});
expect(validAttempts.length).toBeGreaterThan(0);
},
});
});