Skip to content
This repository was archived by the owner on Jul 9, 2025. It is now read-only.

Commit f97add6

Browse files
committed
Bug 1296503 - Switch config.status to unicode literals. r=ted
Ironically, the first thing we do with those unicode literals is convert them to byte strings because the build backends don't like them yet.
1 parent 31a17a0 commit f97add6

2 files changed

Lines changed: 48 additions & 42 deletions

File tree

configure.py

Lines changed: 31 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,19 @@
55
from __future__ import print_function, unicode_literals
66

77
import codecs
8-
import json
98
import os
109
import subprocess
1110
import sys
12-
13-
from collections import Iterable
11+
import textwrap
1412

1513

1614
base_dir = os.path.abspath(os.path.dirname(__file__))
1715
sys.path.insert(0, os.path.join(base_dir, 'python', 'mozbuild'))
1816
from mozbuild.configure import ConfigureSandbox
17+
from mozbuild.util import (
18+
indented_repr,
19+
encode,
20+
)
1921

2022

2123
def main(argv):
@@ -60,57 +62,44 @@ def sanitized_bools(v):
6062
print("Creating config.status", file=sys.stderr)
6163
encoding = 'mbcs' if sys.platform == 'win32' else 'utf-8'
6264
with codecs.open('config.status', 'w', encoding) as fh:
63-
fh.write('#!%s\n' % config['PYTHON'])
64-
fh.write('# coding=%s\n' % encoding)
65-
# Because we're serializing as JSON but reading as python, the values
66-
# for True, False and None are true, false and null, which don't exist.
67-
# Define them.
68-
fh.write('true, false, null = True, False, None\n')
65+
fh.write(textwrap.dedent('''\
66+
#!%(python)s
67+
# coding=%(encoding)s
68+
from __future__ import unicode_literals
69+
from mozbuild.util import encode
70+
encoding = '%(encoding)s'
71+
''') % {'python': config['PYTHON'], 'encoding': encoding})
72+
# A lot of the build backend code is currently expecting byte
73+
# strings and breaks in subtle ways with unicode strings. (bug 1296508)
6974
for k, v in sanitized_config.iteritems():
70-
fh.write('%s = ' % k)
71-
json.dump(v, fh, sort_keys=True, indent=4, ensure_ascii=False)
72-
fh.write('\n')
75+
fh.write('%s = encode(%s, encoding)\n' % (k, indented_repr(v)))
7376
fh.write("__all__ = ['topobjdir', 'topsrcdir', 'defines', "
7477
"'non_global_defines', 'substs', 'mozconfig']")
7578

7679
if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
77-
fh.write('''
78-
if __name__ == '__main__':
79-
args = dict([(name, globals()[name]) for name in __all__])
80-
from mozbuild.config_status import config_status
81-
config_status(**args)
82-
''')
83-
84-
# Running config.status standalone uses byte literals for all the config,
85-
# instead of the unicode literals we have in sanitized_config right now.
86-
# Some values in sanitized_config also have more complex types, such as
87-
# EnumString, which using when calling config_status would currently break
88-
# the build, as well as making it inconsistent with re-running
89-
# config.status. Fortunately, EnumString derives from unicode, so it's
90-
# covered by converting unicode strings.
91-
# Moreover, a lot of the build backend code is currently expecting byte
92-
# strings and breaks in subtle ways with unicode strings.
93-
def encode(v):
94-
if isinstance(v, dict):
95-
return {
96-
encode(k): encode(val)
97-
for k, val in v.iteritems()
98-
}
99-
if isinstance(v, str):
100-
return v
101-
if isinstance(v, unicode):
102-
return v.encode(encoding)
103-
if isinstance(v, Iterable):
104-
return [encode(i) for i in v]
105-
return v
80+
fh.write(textwrap.dedent('''
81+
if __name__ == '__main__':
82+
from mozbuild.config_status import config_status
83+
args = dict([(name, globals()[name]) for name in __all__])
84+
config_status(**args)
85+
'''))
10686

10787
# Other things than us are going to run this file, so we need to give it
10888
# executable permissions.
10989
os.chmod('config.status', 0o755)
11090
if config.get('MOZ_BUILD_APP') != 'js' or config.get('JS_STANDALONE'):
11191
os.environ[b'WRITE_MOZINFO'] = b'1'
11292
from mozbuild.config_status import config_status
113-
return config_status(args=[], **encode(sanitized_config))
93+
94+
# Some values in sanitized_config also have more complex types, such as
95+
# EnumString, which using when calling config_status would currently
96+
# break the build, as well as making it inconsistent with re-running
97+
# config.status. Fortunately, EnumString derives from unicode, so it's
98+
# covered by converting unicode strings.
99+
100+
# A lot of the build backend code is currently expecting byte strings
101+
# and breaks in subtle ways with unicode strings.
102+
return config_status(args=[], **encode(sanitized_config, encoding))
114103
return 0
115104

116105

python/mozbuild/mozbuild/util.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from collections import (
2626
defaultdict,
27+
Iterable,
2728
OrderedDict,
2829
)
2930
from io import (
@@ -1244,3 +1245,19 @@ def recurse_indented_repr(o, level):
12441245
else:
12451246
yield repr(o)
12461247
return ''.join(recurse_indented_repr(o, 0))
1248+
1249+
1250+
def encode(obj, encoding='utf-8'):
1251+
'''Recursively encode unicode strings with the given encoding.'''
1252+
if isinstance(obj, dict):
1253+
return {
1254+
encode(k, encoding): encode(v, encoding)
1255+
for k, v in obj.iteritems()
1256+
}
1257+
if isinstance(obj, bytes):
1258+
return obj
1259+
if isinstance(obj, unicode):
1260+
return obj.encode(encoding)
1261+
if isinstance(obj, Iterable):
1262+
return [encode(i, encoding) for i in obj]
1263+
return obj

0 commit comments

Comments
 (0)