Skip to content

Commit f09d45d

Browse files
authored
fix(core): prevent isBinary false-positive on Windows PTY streams (google-gemini#26565)
1 parent 792654c commit f09d45d

3 files changed

Lines changed: 154 additions & 3 deletions

File tree

packages/core/src/services/shellExecutionService.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1133,7 +1133,7 @@ export class ShellExecutionService {
11331133
const sniffBuffer = Buffer.concat(sniffChunks);
11341134
sniffedBytes = sniffBuffer.length;
11351135

1136-
if (isBinary(sniffBuffer)) {
1136+
if (isBinary(sniffBuffer, 512, true)) {
11371137
isStreamingRawContent = false;
11381138
binaryBytesReceived = sniffBuffer.length;
11391139
const event: ShellOutputEvent = { type: 'binary_detected' };

packages/core/src/utils/textUtils.test.ts

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import {
99
safeLiteralReplace,
1010
truncateString,
1111
safeTemplateReplace,
12+
isBinary,
13+
stripAnsiFromBuffer,
1214
} from './textUtils.js';
1315

1416
describe('safeLiteralReplace', () => {
@@ -198,3 +200,121 @@ describe('safeTemplateReplace', () => {
198200
expect(safeTemplateReplace(tmpl, replacements)).toBe('Value: $&');
199201
});
200202
});
203+
204+
describe('stripAnsiFromBuffer', () => {
205+
it('returns the buffer unchanged when no escape sequences are present', () => {
206+
const input = Buffer.from('hello world');
207+
expect(stripAnsiFromBuffer(input).toString()).toBe('hello world');
208+
});
209+
210+
it('strips CSI sequences (ESC [ ... final)', () => {
211+
// ESC[31m = red foreground, ESC[0m = reset
212+
const input = Buffer.from('\x1b[31mhello\x1b[0m');
213+
expect(stripAnsiFromBuffer(input).toString()).toBe('hello');
214+
});
215+
216+
it('strips OSC sequences terminated by BEL', () => {
217+
// OSC title set: ESC ] 0 ; title BEL
218+
const input = Buffer.from('\x1b]0;My Title\x07some text');
219+
expect(stripAnsiFromBuffer(input).toString()).toBe('some text');
220+
});
221+
222+
it('strips OSC sequences terminated by ST (ESC \\)', () => {
223+
const input = Buffer.from('\x1b]0;My Title\x1b\\some text');
224+
expect(stripAnsiFromBuffer(input).toString()).toBe('some text');
225+
});
226+
227+
it('strips simple two-byte escape sequences', () => {
228+
// ESC D = Index (scroll down)
229+
const input = Buffer.from('\x1bDhello');
230+
expect(stripAnsiFromBuffer(input).toString()).toBe('hello');
231+
});
232+
233+
it('handles multiple mixed escape sequences', () => {
234+
const input = Buffer.from(
235+
'\x1b[31m\x1b]0;title\x07hello\x1b[0m world\x1bD',
236+
);
237+
expect(stripAnsiFromBuffer(input).toString()).toBe('hello world');
238+
});
239+
240+
it('returns empty buffer when input is only escape sequences', () => {
241+
const input = Buffer.from('\x1b[31m\x1b[0m');
242+
expect(stripAnsiFromBuffer(input).length).toBe(0);
243+
});
244+
});
245+
246+
describe('isBinary', () => {
247+
describe('default mode (strict, for files/pipes)', () => {
248+
it('returns false for null/undefined/empty input', () => {
249+
expect(isBinary(null)).toBe(false);
250+
expect(isBinary(undefined)).toBe(false);
251+
expect(isBinary(Buffer.alloc(0))).toBe(false);
252+
});
253+
254+
it('returns false for plain ASCII text', () => {
255+
expect(isBinary(Buffer.from('hello world\n'))).toBe(false);
256+
});
257+
258+
it('returns true when a single null byte is present', () => {
259+
expect(isBinary(Buffer.from('hello\x00world'))).toBe(true);
260+
});
261+
262+
it('returns true for binary data', () => {
263+
const buf = Buffer.alloc(100, 0);
264+
expect(isBinary(buf)).toBe(true);
265+
});
266+
267+
it('only checks the first sampleSize bytes', () => {
268+
const buf = Buffer.alloc(600, 0x41); // 'A' x 600
269+
buf[550] = 0; // null byte outside default 512 sample
270+
expect(isBinary(buf)).toBe(false);
271+
});
272+
273+
it('detects null byte within custom sampleSize', () => {
274+
const buf = Buffer.alloc(600, 0x41);
275+
buf[550] = 0;
276+
expect(isBinary(buf, 600)).toBe(true);
277+
});
278+
});
279+
280+
describe('PTY mode (isPtyOutput = true)', () => {
281+
it('returns false for PTY output that is pure ANSI escape sequences', () => {
282+
const buf = Buffer.from('\x1b[31m\x1b[0m');
283+
expect(isBinary(buf, 512, true)).toBe(false);
284+
});
285+
286+
it('returns false for text with ANSI sequences containing embedded null bytes', () => {
287+
// Simulate Windows PTY: OSC title set with null bytes, followed by text
288+
const osc = Buffer.from('\x1b]0;title\x00\x07');
289+
const text = Buffer.from('hello world');
290+
const buf = Buffer.concat([osc, text]);
291+
expect(isBinary(buf, 512, true)).toBe(false);
292+
});
293+
294+
it('returns false for a single stray null byte among text', () => {
295+
const buf = Buffer.from('hello\x00world, this is a long text output');
296+
expect(isBinary(buf, 512, true)).toBe(false);
297+
});
298+
299+
it('returns true for actual binary data through PTY (>10% nulls after strip)', () => {
300+
// 90 null bytes + 10 text bytes → 90% nulls = binary
301+
const nulls = Buffer.alloc(90, 0);
302+
const text = Buffer.from('abcdefghij');
303+
const buf = Buffer.concat([nulls, text]);
304+
expect(isBinary(buf, 512, true)).toBe(true);
305+
});
306+
307+
it('returns false for realistic Windows PTY output with ANSI reset + text', () => {
308+
// Simulates: color set, OSC title with a stray null, reset, then real output
309+
const buf = Buffer.from(
310+
'\x1b[?25l\x1b]0;\x00Window Title\x07\x1b[0mPS C:\\Users> echo hello\r\nhello\r\n',
311+
);
312+
expect(isBinary(buf, 512, true)).toBe(false);
313+
});
314+
315+
it('returns false when the buffer is empty after stripping ANSI', () => {
316+
const buf = Buffer.from('\x1b[31m\x1b[42m\x1b[0m');
317+
expect(isBinary(buf, 512, true)).toBe(false);
318+
});
319+
});
320+
});

packages/core/src/utils/textUtils.ts

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
* SPDX-License-Identifier: Apache-2.0
55
*/
66

7+
import stripAnsi from 'strip-ansi';
8+
79
/**
810
* Safely replaces text with literal strings, avoiding ECMAScript GetSubstitution issues.
911
* Escapes $ characters to prevent template interpretation.
@@ -25,22 +27,51 @@ export function safeLiteralReplace(
2527
return str.replaceAll(oldString, escapedNewString);
2628
}
2729

30+
/**
31+
* Strips ANSI/VT escape sequences from a raw byte buffer.
32+
* Uses latin1 encoding to preserve every byte's value exactly (0-255)
33+
* while allowing string-based removal of escape sequences.
34+
*/
35+
export function stripAnsiFromBuffer(data: Buffer): Buffer {
36+
const stripped = stripAnsi(data.toString('latin1'));
37+
return Buffer.from(stripped, 'latin1');
38+
}
39+
2840
/**
2941
* Checks if a Buffer is likely binary by testing for the presence of a NULL byte.
3042
* The presence of a NULL byte is a strong indicator that the data is not plain text.
3143
* @param data The Buffer to check.
3244
* @param sampleSize The number of bytes from the start of the buffer to test.
33-
* @returns True if a NULL byte is found, false otherwise.
45+
* @param isPtyOutput When true, ANSI escape sequences are stripped before
46+
* checking and a null-byte ratio threshold is used instead of failing on
47+
* a single null byte. This prevents false positives caused by node-pty
48+
* on Windows emitting VT control sequences that contain null bytes.
49+
* @returns True if the data is likely binary, false otherwise.
3450
*/
3551
export function isBinary(
3652
data: Buffer | null | undefined,
3753
sampleSize = 512,
54+
isPtyOutput = false,
3855
): boolean {
3956
if (!data) {
4057
return false;
4158
}
4259

43-
const sample = data.length > sampleSize ? data.subarray(0, sampleSize) : data;
60+
let sample = data.length > sampleSize ? data.subarray(0, sampleSize) : data;
61+
62+
if (isPtyOutput) {
63+
sample = stripAnsiFromBuffer(sample);
64+
if (sample.length === 0) {
65+
return false;
66+
}
67+
let nullCount = 0;
68+
for (const byte of sample) {
69+
if (byte === 0) {
70+
nullCount++;
71+
}
72+
}
73+
return nullCount / sample.length > 0.1;
74+
}
4475

4576
for (const byte of sample) {
4677
// The presence of a NULL byte (0x00) is one of the most reliable

0 commit comments

Comments
 (0)