Browse Source

improve fix

Lee Thomason 1 year ago
parent
commit
ff48ea1bae
1 changed files with 57 additions and 72 deletions
  1. 57 72
      tinyxml2.cpp

+ 57 - 72
tinyxml2.cpp

@@ -467,104 +467,89 @@ void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length
 }
 
 
-const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
+const char* XMLUtil::GetCharacterRef(const char* p, char* value, int* length)
 {
-    // Presume an entity, and pull it out.
+    // Assume an entity, and pull it out.
     *length = 0;
 
-	static const uint32_t MAX_CODE_POINT = 0x10FFFF;
+    static const uint32_t MAX_CODE_POINT = 0x10FFFF;
 
-    if ( *(p+1) == '#' && *(p+2) ) {
+    if (*(p + 1) == '#' && *(p + 2)) {
         uint32_t ucs = 0;
         ptrdiff_t delta = 0;
         uint32_t mult = 1;
         static const char SEMICOLON = ';';
 
-        if ( *(p+2) == 'x' ) {
+        bool hex = false;
+        uint32_t radix = 10;
+        const char* q = 0;
+        char terminator = '#';
+
+        if (*(p + 2) == 'x') {
             // Hexadecimal.
-            const char* q = p+3;
-            if ( !(*q) ) {
-                return 0;
-            }
+            hex = true;
+            radix = 16;
+            terminator = 'x';
 
-            q = strchr( q, SEMICOLON );
-            if ( !q ) {
-                return 0;
-            }
-            TIXMLASSERT( *q == SEMICOLON );
+            q = p + 3;
+        }
+        else {
+            // Decimal.
+            q = p + 2;
+        }
+        if (!(*q)) {
+            return 0;
+        }
 
-            delta = q-p;
-            --q;
+        q = strchr(q, SEMICOLON);
+        if (!q) {
+            return 0;
+        }
+        TIXMLASSERT(*q == SEMICOLON);
 
-            while ( *q != 'x' ) {
-                uint32_t digit = 0;
+        delta = q - p;
+        --q;
 
-                if ( *q >= '0' && *q <= '9' ) {
-                    digit = *q - '0';
-                }
-                else if ( *q >= 'a' && *q <= 'f' ) {
-                    digit = *q - 'a' + 10;
-                }
-                else if ( *q >= 'A' && *q <= 'F' ) {
-                    digit = *q - 'A' + 10;
-                }
-                else {
-                    return 0;
-                }
-                TIXMLASSERT( digit < 16 );
-                const unsigned int digitScaled = mult * digit;
-                ucs += digitScaled;
-				if (ucs > MAX_CODE_POINT) {
-					return 0;
-				}
+        while (*q != terminator) {
+            uint32_t digit = 0;
 
-                mult *= 16;
-                --q;
+            if (*q >= '0' && *q <= '9') {
+                digit = *q - '0';
             }
-        }
-        else {
-            // Decimal.
-            const char* q = p+2;
-            if ( !(*q) ) {
-                return 0;
+            else if (hex && (*q >= 'a' && *q <= 'f')) {
+                digit = *q - 'a' + 10;
             }
-
-            q = strchr( q, SEMICOLON );
-
-            if ( !q ) {
+            else if (hex && (*q >= 'A' && *q <= 'F')) {
+                digit = *q - 'A' + 10;
+            }
+            else {
                 return 0;
             }
-            TIXMLASSERT( *q == SEMICOLON );
-
-            delta = q-p;
-            --q;
-
-            while ( *q != '#' ) {
-                if ( *q >= '0' && *q <= '9' ) {
-                    const uint32_t digit = *q - '0';
-                    TIXMLASSERT( digit < 10 );
-                    const uint32_t digitScaled = mult * digit;
-                    ucs += digitScaled;
-                    if (ucs > MAX_CODE_POINT) {
-                        return 0;
-                    }
-                }
-                else {
-                    return 0;
-                }
-                mult *= 10;
-                --q;
+            TIXMLASSERT(digit < radix);
+
+            const unsigned int digitScaled = mult * digit;
+            ucs += digitScaled;
+            mult *= radix;       
+            
+            // Security check: could a value exist that is out of range?
+            // Easily; limit to the MAX_CODE_POINT, which also allows for a
+            // bunch of leading zeroes.
+            if (mult > MAX_CODE_POINT) {
+                mult = MAX_CODE_POINT;
             }
+            --q;
+        }
+        // Out of range:
+        if (ucs > MAX_CODE_POINT) {
+            return 0;
         }
         // convert the UCS to UTF-8
-		TIXMLASSERT(ucs <= MAX_CODE_POINT);
-        ConvertUTF32ToUTF8( ucs, value, length );
+        ConvertUTF32ToUTF8(ucs, value, length);
         return p + delta + 1;
     }
-    return p+1;
+    return p + 1;
 }
 
-
 void XMLUtil::ToStr( int v, char* buffer, int bufferSize )
 {
     TIXML_SNPRINTF( buffer, bufferSize, "%d", v );