Skip to content
This repository was archived by the owner on Jul 9, 2025. It is now read-only.

Commit 4cc8484

Browse files
committed
Bug 1578339 - Use SIMD accelerated encoding conversions in SpiderMonkey. r=jwalden
Differential Revision: https://phabricator.services.mozilla.com/D44470 --HG-- extra : moz-landing-system : lando
1 parent bf6f8c6 commit 4cc8484

15 files changed

Lines changed: 203 additions & 329 deletions

File tree

js/public/CharacterEncoding.h

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#define js_CharacterEncoding_h
99

1010
#include "mozilla/Range.h"
11+
#include "mozilla/Span.h"
1112

1213
#include "js/TypeDecls.h"
1314
#include "js/Utility.h"
@@ -285,25 +286,23 @@ LossyUTF8CharsToNewTwoByteCharsZ(JSContext* cx, const ConstUTF8CharsZ& utf8,
285286
JS_PUBLIC_API size_t GetDeflatedUTF8StringLength(JSFlatString* s);
286287

287288
/*
288-
* Note: Unlike this function, JS_EncodeStringToUTF8BufferPartial in jsapi.h
289-
* does not require flattening the string first. Consider using that function
290-
* instead of this one.
289+
* Encode whole scalar values of |src| into |dst| as UTF-8 until |src| is
290+
* exhausted or too little space is available in |dst| to fit the scalar
291+
* value. Lone surrogates are converted to REPLACEMENT CHARACTER. Return
292+
* the number of bytes of |dst| that were filled.
291293
*
292-
* Encode |src| as UTF8. The caller must either ensure |dst| has enough space
293-
* to encode the entire string or pass the length of the buffer as |dstlenp|,
294-
* in which case the function will encode characters from the string until
295-
* the buffer is exhausted. Does not write the null terminator.
294+
* Use |JS_EncodeStringToUTF8BufferPartial| if your string isn't already
295+
* flat.
296296
*
297-
* If |dstlenp| is provided, it will be updated to hold the number of bytes
298-
* written to the buffer. If |numcharsp| is provided, it will be updated to hold
299-
* the number of Unicode characters written to the buffer (which can be less
300-
* than the length of the string, if the buffer is exhausted before the string
301-
* is fully encoded).
297+
* Given |JSString* str = JS_FORGET_STRING_FLATNESS(src)|,
298+
* if |JS_StringHasLatin1Chars(str)|, then |src| is always fully converted
299+
* if |dst.Length() >= JS_GetStringLength(str) * 2|. Otherwise |src| is
300+
* always fully converted if |dst.Length() >= JS_GetStringLength(str) * 3|.
301+
*
302+
* The exact space required is always |GetDeflatedUTF8StringLength(str)|.
302303
*/
303-
JS_PUBLIC_API void DeflateStringToUTF8Buffer(JSFlatString* src,
304-
mozilla::RangedPtr<char> dst,
305-
size_t* dstlenp = nullptr,
306-
size_t* numcharsp = nullptr);
304+
JS_PUBLIC_API size_t DeflateStringToUTF8Buffer(JSFlatString* src,
305+
mozilla::Span<char> dst);
307306

308307
/*
309308
* The smallest character encoding capable of fully representing a particular

js/src/ctypes/CTypes.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3350,9 +3350,8 @@ static bool ImplicitConvert(JSContext* cx, HandleValue val,
33503350
return false;
33513351
}
33523352

3353-
JS::DeflateStringToUTF8Buffer(
3354-
sourceFlat, mozilla::RangedPtr<char>(*charBuffer, nbytes),
3355-
&nbytes);
3353+
nbytes = JS::DeflateStringToUTF8Buffer(
3354+
sourceFlat, mozilla::MakeSpan(*charBuffer, nbytes));
33563355
(*charBuffer)[nbytes] = 0;
33573356
*freePointer = true;
33583357
break;
@@ -3479,9 +3478,8 @@ static bool ImplicitConvert(JSContext* cx, HandleValue val,
34793478
}
34803479

34813480
char* charBuffer = static_cast<char*>(buffer);
3482-
JS::DeflateStringToUTF8Buffer(
3483-
sourceFlat, mozilla::RangedPtr<char>(charBuffer, nbytes),
3484-
&nbytes);
3481+
nbytes = JS::DeflateStringToUTF8Buffer(
3482+
sourceFlat, mozilla::MakeSpan(charBuffer, nbytes));
34853483

34863484
if (targetLength > nbytes) {
34873485
charBuffer[nbytes] = 0;

js/src/ctypes/Library.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,8 @@ JSObject* Library::Create(JSContext* cx, HandleValue path,
150150
return nullptr;
151151
}
152152

153-
JS::DeflateStringToUTF8Buffer(
154-
pathStr, mozilla::RangedPtr<char>(pathBytes.get(), nbytes), &nbytes);
153+
nbytes = JS::DeflateStringToUTF8Buffer(
154+
pathStr, mozilla::MakeSpan(pathBytes.get(), nbytes));
155155
pathBytes[nbytes] = 0;
156156
}
157157

js/src/jsapi-tests/testDeflateStringToUTF8Buffer.cpp

Lines changed: 14 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ BEGIN_TEST(test_DeflateStringToUTF8Buffer) {
1616
// initialized to 0x1.
1717

1818
char actual[100];
19-
mozilla::RangedPtr<char> range = mozilla::RangedPtr<char>(actual, 100);
19+
auto span = mozilla::MakeSpan(actual);
2020

2121
// Test with an ASCII string, which calls JSFlatString::latin1Chars
2222
// to retrieve the characters from the string and generates UTF-8 output
@@ -29,70 +29,25 @@ BEGIN_TEST(test_DeflateStringToUTF8Buffer) {
2929
{
3030
const char expected[] = {0x4F, 0x68, 0x61, 0x69, 0x1};
3131
memset(actual, 0x1, 100);
32-
JS::DeflateStringToUTF8Buffer(flatStr, range);
33-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
34-
}
35-
36-
{
37-
size_t dstlen = 4;
38-
const char expected[] = {0x4F, 0x68, 0x61, 0x69, 0x1};
39-
memset(actual, 0x1, 100);
40-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen);
41-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
42-
CHECK_EQUAL(dstlen, 4u);
43-
}
44-
45-
{
46-
size_t numchars = 0;
47-
const char expected[] = {0x4F, 0x68, 0x61, 0x69, 0x1};
48-
memset(actual, 0x1, 100);
49-
JS::DeflateStringToUTF8Buffer(flatStr, range, nullptr, &numchars);
50-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
51-
CHECK_EQUAL(numchars, 4u);
52-
}
53-
54-
{
55-
size_t dstlen = 4;
56-
size_t numchars = 0;
57-
const char expected[] = {0x4F, 0x68, 0x61, 0x69, 0x1};
58-
memset(actual, 0x1, 100);
59-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
32+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span);
6033
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
6134
CHECK_EQUAL(dstlen, 4u);
62-
CHECK_EQUAL(numchars, 4u);
6335
}
6436

6537
{
66-
size_t dstlen = 3;
67-
size_t numchars = 0;
6838
const char expected[] = {0x4F, 0x68, 0x61, 0x1};
6939
memset(actual, 0x1, 100);
70-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
40+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(3));
7141
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
7242
CHECK_EQUAL(dstlen, 3u);
73-
CHECK_EQUAL(numchars, 3u);
7443
}
7544

7645
{
77-
size_t dstlen = 100;
78-
size_t numchars = 0;
79-
const char expected[] = {0x4F, 0x68, 0x61, 0x69, 0x1};
80-
memset(actual, 0x1, 100);
81-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
82-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
83-
CHECK_EQUAL(dstlen, 4u);
84-
CHECK_EQUAL(numchars, 4u);
85-
}
86-
87-
{
88-
size_t dstlen = 0;
89-
size_t numchars = 0;
9046
const unsigned char expected[] = {0x1};
9147
memset(actual, 0x1, 100);
92-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
48+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(0));
9349
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
9450
CHECK_EQUAL(dstlen, 0u);
95-
CHECK_EQUAL(numchars, 0u);
9651
}
9752

9853
// Test with a Latin-1 string, which calls JSFlatString::latin1Chars
@@ -107,90 +62,46 @@ BEGIN_TEST(test_DeflateStringToUTF8Buffer) {
10762
const unsigned char expected[] = {0xC3, 0x93, 0x68, 0xC3,
10863
0xA3, 0xC3, 0xAF, 0x1};
10964
memset(actual, 0x1, 100);
110-
JS::DeflateStringToUTF8Buffer(flatStr, range);
111-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
112-
}
113-
114-
{
115-
size_t dstlen = 7;
116-
const unsigned char expected[] = {0xC3, 0x93, 0x68, 0xC3,
117-
0xA3, 0xC3, 0xAF, 0x1};
118-
memset(actual, 0x1, 100);
119-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen);
120-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
121-
CHECK_EQUAL(dstlen, 7u);
122-
}
123-
124-
{
125-
size_t numchars = 0;
126-
const unsigned char expected[] = {0xC3, 0x93, 0x68, 0xC3,
127-
0xA3, 0xC3, 0xAF, 0x1};
128-
memset(actual, 0x1, 100);
129-
JS::DeflateStringToUTF8Buffer(flatStr, range, nullptr, &numchars);
65+
JS::DeflateStringToUTF8Buffer(flatStr, span);
13066
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
131-
CHECK_EQUAL(numchars, 4u);
13267
}
13368

13469
{
135-
size_t dstlen = 7;
136-
size_t numchars = 0;
13770
const unsigned char expected[] = {0xC3, 0x93, 0x68, 0xC3,
13871
0xA3, 0xC3, 0xAF, 0x1};
13972
memset(actual, 0x1, 100);
140-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
73+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(7));
14174
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
14275
CHECK_EQUAL(dstlen, 7u);
143-
CHECK_EQUAL(numchars, 4u);
14476
}
14577

14678
{
14779
// Specify a destination buffer length of 3. That's exactly enough
14880
// space to encode the first two characters, which takes three bytes.
149-
size_t dstlen = 3;
150-
size_t numchars = 0;
15181
const unsigned char expected[] = {0xC3, 0x93, 0x68, 0x1};
15282
memset(actual, 0x1, 100);
153-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
83+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(3));
15484
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
15585
CHECK_EQUAL(dstlen, 3u);
156-
CHECK_EQUAL(numchars, 2u);
15786
}
15887

15988
{
16089
// Specify a destination buffer length of 4. That's only enough space
16190
// to encode the first two characters, which takes three bytes, because
16291
// the third character would take another two bytes.
163-
size_t dstlen = 4;
164-
size_t numchars = 0;
16592
const unsigned char expected[] = {0xC3, 0x93, 0x68, 0x1};
16693
memset(actual, 0x1, 100);
167-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
94+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(4));
16895
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
16996
CHECK_EQUAL(dstlen, 3u);
170-
CHECK_EQUAL(numchars, 2u);
171-
}
172-
173-
{
174-
size_t dstlen = 100;
175-
size_t numchars = 0;
176-
const unsigned char expected[] = {0xC3, 0x93, 0x68, 0xC3,
177-
0xA3, 0xC3, 0xAF, 0x1};
178-
memset(actual, 0x1, 100);
179-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
180-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
181-
CHECK_EQUAL(dstlen, 7u);
182-
CHECK_EQUAL(numchars, 4u);
18397
}
18498

18599
{
186-
size_t dstlen = 0;
187-
size_t numchars = 0;
188100
const unsigned char expected[] = {0x1};
189101
memset(actual, 0x1, 100);
190-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
102+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(0));
191103
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
192104
CHECK_EQUAL(dstlen, 0u);
193-
CHECK_EQUAL(numchars, 0u);
194105
}
195106

196107
// Test with a UTF-16 string, which calls JSFlatString::twoByteChars
@@ -204,90 +115,46 @@ BEGIN_TEST(test_DeflateStringToUTF8Buffer) {
204115
const unsigned char expected[] = {0xCE, 0x8C, 0x68, 0xC8,
205116
0x83, 0xD1, 0x97, 0x1};
206117
memset(actual, 0x1, 100);
207-
JS::DeflateStringToUTF8Buffer(flatStr, range);
208-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
209-
}
210-
211-
{
212-
size_t dstlen = 7;
213-
const unsigned char expected[] = {0xCE, 0x8C, 0x68, 0xC8,
214-
0x83, 0xD1, 0x97, 0x1};
215-
memset(actual, 0x1, 100);
216-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen);
217-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
218-
CHECK_EQUAL(dstlen, 7u);
219-
}
220-
221-
{
222-
size_t numchars = 0;
223-
const unsigned char expected[] = {0xCE, 0x8C, 0x68, 0xC8,
224-
0x83, 0xD1, 0x97, 0x1};
225-
memset(actual, 0x1, 100);
226-
JS::DeflateStringToUTF8Buffer(flatStr, range, nullptr, &numchars);
118+
JS::DeflateStringToUTF8Buffer(flatStr, span);
227119
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
228-
CHECK_EQUAL(numchars, 4u);
229120
}
230121

231122
{
232-
size_t dstlen = 7;
233-
size_t numchars = 0;
234123
const unsigned char expected[] = {0xCE, 0x8C, 0x68, 0xC8,
235124
0x83, 0xD1, 0x97, 0x1};
236125
memset(actual, 0x1, 100);
237-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
126+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(7));
238127
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
239128
CHECK_EQUAL(dstlen, 7u);
240-
CHECK_EQUAL(numchars, 4u);
241129
}
242130

243131
{
244132
// Specify a destination buffer length of 3. That's exactly enough
245133
// space to encode the first two characters, which takes three bytes.
246-
size_t dstlen = 3;
247-
size_t numchars = 0;
248134
const unsigned char expected[] = {0xCE, 0x8C, 0x68, 0x1};
249135
memset(actual, 0x1, 100);
250-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
136+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(3));
251137
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
252138
CHECK_EQUAL(dstlen, 3u);
253-
CHECK_EQUAL(numchars, 2u);
254139
}
255140

256141
{
257142
// Specify a destination buffer length of 4. That's only enough space
258143
// to encode the first two characters, which takes three bytes, because
259144
// the third character would take another two bytes.
260-
size_t dstlen = 4;
261-
size_t numchars = 0;
262145
const unsigned char expected[] = {0xCE, 0x8C, 0x68, 0x1};
263146
memset(actual, 0x1, 100);
264-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
147+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(4));
265148
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
266149
CHECK_EQUAL(dstlen, 3u);
267-
CHECK_EQUAL(numchars, 2u);
268-
}
269-
270-
{
271-
size_t dstlen = 100;
272-
size_t numchars = 0;
273-
const unsigned char expected[] = {0xCE, 0x8C, 0x68, 0xC8,
274-
0x83, 0xD1, 0x97, 0x1};
275-
memset(actual, 0x1, 100);
276-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
277-
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
278-
CHECK_EQUAL(dstlen, 7u);
279-
CHECK_EQUAL(numchars, 4u);
280150
}
281151

282152
{
283-
size_t dstlen = 0;
284-
size_t numchars = 0;
285153
const unsigned char expected[] = {0x1};
286154
memset(actual, 0x1, 100);
287-
JS::DeflateStringToUTF8Buffer(flatStr, range, &dstlen, &numchars);
155+
size_t dstlen = JS::DeflateStringToUTF8Buffer(flatStr, span.To(0));
288156
CHECK_EQUAL(memcmp(actual, expected, sizeof(expected)), 0);
289157
CHECK_EQUAL(dstlen, 0u);
290-
CHECK_EQUAL(numchars, 0u);
291158
}
292159

293160
return true;

js/src/shell/js.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6519,9 +6519,8 @@ class ShellSourceHook : public SourceHook {
65196519
return false;
65206520
}
65216521

6522-
size_t dstLen = *length;
6523-
JS::DeflateStringToUTF8Buffer(
6524-
flat, mozilla::RangedPtr<char>(*utf8Source, *length), &dstLen);
6522+
mozilla::DebugOnly<size_t> dstLen = JS::DeflateStringToUTF8Buffer(
6523+
flat, mozilla::MakeSpan(*utf8Source, *length));
65256524
MOZ_ASSERT(dstLen == *length);
65266525
}
65276526

js/src/util/StringBuffer.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include "util/StringBuffer.h"
88

9+
#include "mozilla/Latin1.h"
910
#include "mozilla/Range.h"
1011
#include "mozilla/Unused.h"
1112

@@ -62,7 +63,9 @@ bool StringBuffer::inflateChars() {
6263
return false;
6364
}
6465

65-
twoByte.infallibleAppend(latin1Chars().begin(), latin1Chars().length());
66+
twoByte.infallibleGrowByUninitialized(latin1Chars().length());
67+
68+
mozilla::ConvertLatin1toUtf16(mozilla::AsChars(latin1Chars()), twoByte);
6669

6770
cb.destroy();
6871
cb.construct<TwoByteCharBuffer>(std::move(twoByte));

0 commit comments

Comments
 (0)