@@ -13,7 +13,6 @@ import os from 'node:os';
1313import fs , { mkdirSync } from 'node:fs' ;
1414import path from 'node:path' ;
1515import type { IPty } from '@lydell/node-pty' ;
16- import { getCachedEncodingForBuffer } from '../utils/systemEncoding.js' ;
1716import {
1817 getShellConfiguration ,
1918 resolveExecutable ,
@@ -81,6 +80,40 @@ function ensurePromptvarsDisabled(command: string, shell: ShellType): string {
8180 return `${ BASH_SHOPT_GUARD } ${ command } ` ;
8281}
8382
83+ // On Windows, a new ConPTY session inherits its codepage from the system
84+ // OEMCP (microsoft/terminal `src/host/settings.cpp:41` defaults
85+ // `_uCodePage` to `Globals.uiOEMCP`, set from `GetOEMCP()` in
86+ // `srvinit.cpp:44`). On locales without "Beta: Use Unicode UTF-8 for
87+ // worldwide language support" the OEMCP is a legacy codepage (e.g. 850,
88+ // 866, 936, 932), and conhost converts every byte from the child via
89+ // `MultiByteToWideChar(gci.OutputCP, ...)` in `_stream.cpp:341-343`,
90+ // turning UTF-8 output from child processes (perl, python, node, ...)
91+ // into mojibake.
92+ //
93+ // `CreatePseudoConsole` does not accept a codepage argument
94+ // (microsoft/terminal#9174 — open as a feature request). The only way
95+ // to set the ConPTY codepage is from inside the new session via
96+ // `SetConsoleOutputCP` (intercepted by conhost in `getset.cpp:1144`).
97+ // Prefix the command with `chcp 65001` so the first thing the new
98+ // session does is switch its codepage to UTF-8.
99+ function injectUtf8CodepageForPty (
100+ command : string ,
101+ shell : ShellType ,
102+ isWindows : boolean ,
103+ usingPty : boolean ,
104+ ) : string {
105+ if ( ! isWindows || ! usingPty ) {
106+ return command ;
107+ }
108+ if ( shell === 'powershell' ) {
109+ return `chcp 65001 >$null;${ command } ` ;
110+ }
111+ if ( shell === 'cmd' ) {
112+ return `chcp 65001>nul&${ command } ` ;
113+ }
114+ return command ;
115+ }
116+
84117/** A structured result from a shell command execution. */
85118export type ShellExecutionResult = ExecutionResult ;
86119
@@ -389,6 +422,7 @@ export class ShellExecutionService {
389422 cwd : string ,
390423 shellExecutionConfig : ShellExecutionConfig ,
391424 isInteractive : boolean ,
425+ usingPty : boolean ,
392426 ) : Promise < {
393427 program : string ;
394428 args : string [ ] ;
@@ -417,7 +451,13 @@ export class ShellExecutionService {
417451 const resolvedExecutable = resolveExecutable ( executable ) ?? executable ;
418452
419453 const guardedCommand = ensurePromptvarsDisabled ( commandToExecute , shell ) ;
420- const spawnArgs = [ ...argsPrefix , guardedCommand ] ;
454+ const finalCommand = injectUtf8CodepageForPty (
455+ guardedCommand ,
456+ shell ,
457+ isWindows ,
458+ usingPty ,
459+ ) ;
460+ const spawnArgs = [ ...argsPrefix , finalCommand ] ;
421461
422462 // 2. Prepare Environment
423463 const gitConfigKeys : string [ ] = [ ] ;
@@ -520,6 +560,7 @@ export class ShellExecutionService {
520560 cwd ,
521561 shellExecutionConfig ,
522562 isInteractive ,
563+ false ,
523564 ) ;
524565 cmdCleanup = prepared . cleanup ;
525566
@@ -620,14 +661,8 @@ export class ShellExecutionService {
620661
621662 const handleOutput = ( data : Buffer , stream : 'stdout' | 'stderr' ) => {
622663 if ( ! stdoutDecoder || ! stderrDecoder ) {
623- const encoding = getCachedEncodingForBuffer ( data ) ;
624- try {
625- stdoutDecoder = new TextDecoder ( encoding ) ;
626- stderrDecoder = new TextDecoder ( encoding ) ;
627- } catch {
628- stdoutDecoder = new TextDecoder ( 'utf-8' ) ;
629- stderrDecoder = new TextDecoder ( 'utf-8' ) ;
630- }
664+ stdoutDecoder = new TextDecoder ( 'utf-8' ) ;
665+ stderrDecoder = new TextDecoder ( 'utf-8' ) ;
631666 }
632667
633668 if ( isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE ) {
@@ -900,6 +935,7 @@ export class ShellExecutionService {
900935 cwd ,
901936 shellExecutionConfig ,
902937 true ,
938+ true ,
903939 ) ;
904940 cmdCleanup = prepared . cleanup ;
905941
@@ -1115,12 +1151,7 @@ export class ShellExecutionService {
11151151 ( ) =>
11161152 new Promise < void > ( ( resolveChunk ) => {
11171153 if ( ! decoder ) {
1118- const encoding = getCachedEncodingForBuffer ( data ) ;
1119- try {
1120- decoder = new TextDecoder ( encoding ) ;
1121- } catch {
1122- decoder = new TextDecoder ( 'utf-8' ) ;
1123- }
1154+ decoder = new TextDecoder ( 'utf-8' ) ;
11241155 }
11251156
11261157 if ( isStreamingRawContent && sniffedBytes < MAX_SNIFF_SIZE ) {
0 commit comments