Skip to content
This repository was archived by the owner on Jul 9, 2025. It is now read-only.

Commit 67d9eda

Browse files
committed
Bug 863025 - Fix nsUniversalDetector overlooking the UTF-16 BOM. r=smontagu
1 parent 72635c5 commit 67d9eda

1 file changed

Lines changed: 23 additions & 20 deletions

File tree

extensions/universalchardet/src/base/nsUniversalDetector.cpp

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -77,31 +77,34 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, uint32_t aLen)
7777
if (mStart)
7878
{
7979
mStart = false;
80-
if (aLen > 2)
81-
switch (aBuf[0])
82-
{
83-
case '\xEF':
84-
if (('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2]))
85-
// EF BB BF UTF-8 encoded BOM
86-
mDetectedCharset = "UTF-8";
80+
if (aLen >= 2) {
81+
switch (aBuf[0]) {
82+
case '\xEF':
83+
if ((aLen > 2) && ('\xBB' == aBuf[1]) && ('\xBF' == aBuf[2])) {
84+
// EF BB BF UTF-8 encoded BOM
85+
mDetectedCharset = "UTF-8";
86+
}
8787
break;
88-
case '\xFE':
89-
if ('\xFF' == aBuf[1])
90-
// FE FF UTF-16, big endian BOM
91-
mDetectedCharset = "UTF-16BE";
88+
case '\xFE':
89+
if ('\xFF' == aBuf[1]) {
90+
// FE FF UTF-16, big endian BOM
91+
mDetectedCharset = "UTF-16BE";
92+
}
9293
break;
93-
case '\xFF':
94-
if ('\xFE' == aBuf[1])
95-
// FF FE UTF-16, little endian BOM
96-
mDetectedCharset = "UTF-16LE";
94+
case '\xFF':
95+
if ('\xFE' == aBuf[1]) {
96+
// FF FE UTF-16, little endian BOM
97+
mDetectedCharset = "UTF-16LE";
98+
}
9799
break;
98100
} // switch
101+
}
99102

100-
if (mDetectedCharset)
101-
{
102-
mDone = true;
103-
return NS_OK;
104-
}
103+
if (mDetectedCharset)
104+
{
105+
mDone = true;
106+
return NS_OK;
107+
}
105108
}
106109

107110
uint32_t i;

0 commit comments

Comments
 (0)