Skip to content
This repository was archived by the owner on Jul 9, 2025. It is now read-only.

Commit d170276

Browse files
committed
Bug 860180 - Prefer UTF-16BE/LE to UTF-16. r=hsivonen
1 parent 1c6de2e commit d170276

6 files changed

Lines changed: 14 additions & 40 deletions

File tree

content/base/public/nsContentUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ class nsContentUtils
510510
* @return boolean indicating whether a BOM was detected.
511511
*/
512512
static bool CheckForBOM(const unsigned char* aBuffer, uint32_t aLength,
513-
nsACString& aCharset, bool *bigEndian = nullptr);
513+
nsACString& aCharset);
514514

515515
static nsresult GuessCharset(const char *aData, uint32_t aDataLen,
516516
nsACString &aCharset);

content/base/src/nsContentUtils.cpp

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3713,7 +3713,7 @@ nsContentUtils::ConvertStringFromCharset(const nsACString& aCharset,
37133713
/* static */
37143714
bool
37153715
nsContentUtils::CheckForBOM(const unsigned char* aBuffer, uint32_t aLength,
3716-
nsACString& aCharset, bool *bigEndian)
3716+
nsACString& aCharset)
37173717
{
37183718
bool found = true;
37193719
aCharset.Truncate();
@@ -3725,15 +3725,11 @@ nsContentUtils::CheckForBOM(const unsigned char* aBuffer, uint32_t aLength,
37253725
}
37263726
else if (aLength >= 2 &&
37273727
aBuffer[0] == 0xFE && aBuffer[1] == 0xFF) {
3728-
aCharset = "UTF-16";
3729-
if (bigEndian)
3730-
*bigEndian = true;
3728+
aCharset = "UTF-16BE";
37313729
}
37323730
else if (aLength >= 2 &&
37333731
aBuffer[0] == 0xFF && aBuffer[1] == 0xFE) {
3734-
aCharset = "UTF-16";
3735-
if (bigEndian)
3736-
*bigEndian = false;
3732+
aCharset = "UTF-16LE";
37373733
} else {
37383734
found = false;
37393735
}
@@ -3791,16 +3787,7 @@ nsContentUtils::GuessCharset(const char *aData, uint32_t aDataLen,
37913787
(aDataLen >= sizeof(sniffBuf) ? sizeof(sniffBuf) : aDataLen);
37923788
memcpy(sniffBuf, aData, numRead);
37933789

3794-
bool bigEndian;
3795-
if (CheckForBOM(sniffBuf, numRead, aCharset, &bigEndian) &&
3796-
aCharset.EqualsLiteral("UTF-16")) {
3797-
if (bigEndian) {
3798-
aCharset.AppendLiteral("BE");
3799-
}
3800-
else {
3801-
aCharset.AppendLiteral("LE");
3802-
}
3803-
}
3790+
CheckForBOM(sniffBuf, numRead, aCharset);
38043791
}
38053792

38063793
if (aCharset.IsEmpty()) {

content/base/src/nsScriptLoader.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -981,14 +981,14 @@ DetectByteOrderMark(const unsigned char* aBytes, int32_t aLen, nsCString& oChars
981981
if (0xFF == aBytes[1]) {
982982
// FE FF
983983
// UTF-16, big-endian
984-
oCharset.Assign("UTF-16");
984+
oCharset.Assign("UTF-16BE");
985985
}
986986
break;
987987
case 0xFF:
988988
if (0xFE == aBytes[1]) {
989989
// FF FE
990990
// UTF-16, little-endian
991-
oCharset.Assign("UTF-16");
991+
oCharset.Assign("UTF-16LE");
992992
}
993993
break;
994994
}

dom/workers/FileReaderSync.cpp

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -343,21 +343,9 @@ FileReaderSync::GuessCharset(nsIInputStream *aStream, nsACString &aCharset)
343343
sizeof(sniffBuf), &numRead);
344344
NS_ENSURE_SUCCESS(rv, rv);
345345

346-
if (numRead >= 4 &&
347-
sniffBuf[0] == 0x00 &&
348-
sniffBuf[1] == 0x00 &&
349-
sniffBuf[2] == 0xfe &&
350-
sniffBuf[3] == 0xff) {
351-
mCharset = "UTF-32BE";
352-
} else if (numRead >= 4 &&
353-
sniffBuf[0] == 0xff &&
354-
sniffBuf[1] == 0xfe &&
355-
sniffBuf[2] == 0x00 &&
356-
sniffBuf[3] == 0x00) {
357-
mCharset = "UTF-32LE";
358-
} else if (numRead >= 2 &&
359-
sniffBuf[0] == 0xfe &&
360-
sniffBuf[1] == 0xff) {
346+
if (numRead >= 2 &&
347+
sniffBuf[0] == 0xfe &&
348+
sniffBuf[1] == 0xff) {
361349
mCharset = "UTF-16BE";
362350
} else if (numRead >= 2 &&
363351
sniffBuf[0] == 0xff &&

extensions/universalchardet/src/base/nsUniversalDetector.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen)
8888
case '\xFE':
8989
if ('\xFF' == aBuf[1])
9090
// FE FF UTF-16, big endian BOM
91-
mDetectedCharset = "UTF-16";
91+
mDetectedCharset = "UTF-16BE";
9292
break;
9393
case '\xFF':
9494
if ('\xFE' == aBuf[1])
9595
// FF FE UTF-16, little endian BOM
96-
mDetectedCharset = "UTF-16";
96+
mDetectedCharset = "UTF-16LE";
9797
break;
9898
} // switch
9999

layout/style/Loader.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -663,9 +663,8 @@ SheetLoadData::OnDetermineCharset(nsIUnicharStreamLoader* aLoader,
663663
if (nsContentUtils::CheckForBOM((const unsigned char*)aSegment.BeginReading(),
664664
aSegment.Length(),
665665
aCharset)) {
666-
// aCharset is now either "UTF-16" or "UTF-8".
667-
// The UTF-16 decoder will re-sniff and swallow the BOM.
668-
// The UTF-8 decoder will swallow the BOM.
666+
// aCharset is now either "UTF-16BE", "UTF-16BE" or "UTF-8"
667+
// which will swallow the BOM.
669668
mCharset.Assign(aCharset);
670669
#ifdef PR_LOGGING
671670
LOG((" Setting from BOM to: %s", PromiseFlatCString(aCharset).get()));

0 commit comments

Comments
 (0)