Skip to content

Commit 6589cdf

Browse files
authored
Proposal: deterministic encoding for child-process I/O (google-gemini#27247)
1 parent 9de4289 commit 6589cdf

7 files changed

Lines changed: 49 additions & 695 deletions

File tree

package-lock.json

Lines changed: 0 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/core/package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
"@xterm/headless": "5.5.0",
5555
"ajv": "^8.17.1",
5656
"ajv-formats": "^3.0.0",
57-
"chardet": "^2.1.0",
5857
"chokidar": "^5.0.0",
5958
"command-exists": "^1.2.9",
6059
"diff": "^8.0.3",

packages/core/src/index.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ export {
103103
export * from './utils/tool-utils.js';
104104
export * from './utils/tool-visibility.js';
105105
export * from './utils/terminalSerializer.js';
106-
export * from './utils/systemEncoding.js';
107106
export * from './utils/textUtils.js';
108107
export * from './utils/formatters.js';
109108
export * from './utils/generateContentResponseUtilities.js';

packages/core/src/services/shellExecutionService.test.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,6 @@ vi.mock('../utils/terminalSerializer.js', () => ({
120120
convertColorToHex: () => '#000000',
121121
ColorMode: { DEFAULT: 0, PALETTE: 1, RGB: 2 },
122122
}));
123-
vi.mock('../utils/systemEncoding.js', () => ({
124-
getCachedEncodingForBuffer: vi.fn().mockReturnValue('utf-8'),
125-
}));
126-
127123
const mockProcessKill = vi
128124
.spyOn(process, 'kill')
129125
.mockImplementation(() => true);
@@ -1030,15 +1026,15 @@ describe('ShellExecutionService', () => {
10301026
});
10311027

10321028
describe('Platform-Specific Behavior', () => {
1033-
it('should use powershell.exe on Windows', async () => {
1029+
it('should use powershell.exe on Windows and prefix the command with chcp 65001 for the PTY session', async () => {
10341030
mockPlatform.mockReturnValue('win32');
10351031
await simulateExecution('dir "foo bar"', (pty) =>
10361032
pty.onExit.mock.calls[0][0]({ exitCode: 0, signal: null }),
10371033
);
10381034

10391035
expect(mockPtySpawn).toHaveBeenCalledWith(
10401036
'powershell.exe',
1041-
['-NoProfile', '-Command', 'dir "foo bar"'],
1037+
['-NoProfile', '-Command', 'chcp 65001 >$null;dir "foo bar"'],
10421038
expect.any(Object),
10431039
);
10441040
});

packages/core/src/services/shellExecutionService.ts

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ import os from 'node:os';
1313
import fs, { mkdirSync } from 'node:fs';
1414
import path from 'node:path';
1515
import type { IPty } from '@lydell/node-pty';
16-
import { getCachedEncodingForBuffer } from '../utils/systemEncoding.js';
1716
import {
1817
getShellConfiguration,
1918
resolveExecutable,
@@ -81,6 +80,40 @@ function ensurePromptvarsDisabled(command: string, shell: ShellType): string {
8180
return `${BASH_SHOPT_GUARD} ${command}`;
8281
}
8382

83+
// On Windows, a new ConPTY session inherits its codepage from the system
84+
// OEMCP (microsoft/terminal `src/host/settings.cpp:41` defaults
85+
// `_uCodePage` to `Globals.uiOEMCP`, set from `GetOEMCP()` in
86+
// `srvinit.cpp:44`). On locales without "Beta: Use Unicode UTF-8 for
87+
// worldwide language support" the OEMCP is a legacy codepage (e.g. 850,
88+
// 866, 936, 932), and conhost converts every byte from the child via
89+
// `MultiByteToWideChar(gci.OutputCP, ...)` in `_stream.cpp:341-343`,
90+
// turning UTF-8 output from child processes (perl, python, node, ...)
91+
// into mojibake.
92+
//
93+
// `CreatePseudoConsole` does not accept a codepage argument
94+
// (microsoft/terminal#9174 — open as a feature request). The only way
95+
// to set the ConPTY codepage is from inside the new session via
96+
// `SetConsoleOutputCP` (intercepted by conhost in `getset.cpp:1144`).
97+
// Prefix the command with `chcp 65001` so the first thing the new
98+
// session does is switch its codepage to UTF-8.
99+
function injectUtf8CodepageForPty(
100+
command: string,
101+
shell: ShellType,
102+
isWindows: boolean,
103+
usingPty: boolean,
104+
): string {
105+
if (!isWindows || !usingPty) {
106+
return command;
107+
}
108+
if (shell === 'powershell') {
109+
return `chcp 65001 >$null;${command}`;
110+
}
111+
if (shell === 'cmd') {
112+
return `chcp 65001>nul&${command}`;
113+
}
114+
return command;
115+
}
116+
84117
/** A structured result from a shell command execution. */
85118
export type ShellExecutionResult = ExecutionResult;
86119

@@ -389,6 +422,7 @@ export class ShellExecutionService {
389422
cwd: string,
390423
shellExecutionConfig: ShellExecutionConfig,
391424
isInteractive: boolean,
425+
usingPty: boolean,
392426
): Promise<{
393427
program: string;
394428
args: string[];
@@ -417,7 +451,13 @@ export class ShellExecutionService {
417451
const resolvedExecutable = resolveExecutable(executable) ?? executable;
418452

419453
const guardedCommand = ensurePromptvarsDisabled(commandToExecute, shell);
420-
const spawnArgs = [...argsPrefix, guardedCommand];
454+
const finalCommand = injectUtf8CodepageForPty(
455+
guardedCommand,
456+
shell,
457+
isWindows,
458+
usingPty,
459+
);
460+
const spawnArgs = [...argsPrefix, finalCommand];
421461

422462
// 2. Prepare Environment
423463
const gitConfigKeys: string[] = [];
@@ -520,6 +560,7 @@ export class ShellExecutionService {
520560
cwd,
521561
shellExecutionConfig,
522562
isInteractive,
563+
false,
523564
);
524565
cmdCleanup = prepared.cleanup;
525566

@@ -620,14 +661,8 @@ export class ShellExecutionService {
620661

621662
const handleOutput = (data: Buffer, stream: 'stdout' | 'stderr') => {
622663
if (!stdoutDecoder || !stderrDecoder) {
623-
const encoding = getCachedEncodingForBuffer(data);
624-
try {
625-
stdoutDecoder = new TextDecoder(encoding);
626-
stderrDecoder = new TextDecoder(encoding);
627-
} catch {
628-
stdoutDecoder = new TextDecoder('utf-8');
629-
stderrDecoder = new TextDecoder('utf-8');
630-
}
664+
stdoutDecoder = new TextDecoder('utf-8');
665+
stderrDecoder = new TextDecoder('utf-8');
631666
}
632667

633668
if (isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE) {
@@ -900,6 +935,7 @@ export class ShellExecutionService {
900935
cwd,
901936
shellExecutionConfig,
902937
true,
938+
true,
903939
);
904940
cmdCleanup = prepared.cleanup;
905941

@@ -1115,12 +1151,7 @@ export class ShellExecutionService {
11151151
() =>
11161152
new Promise<void>((resolveChunk) => {
11171153
if (!decoder) {
1118-
const encoding = getCachedEncodingForBuffer(data);
1119-
try {
1120-
decoder = new TextDecoder(encoding);
1121-
} catch {
1122-
decoder = new TextDecoder('utf-8');
1123-
}
1154+
decoder = new TextDecoder('utf-8');
11241155
}
11251156

11261157
if (isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE) {

0 commit comments

Comments
 (0)