ソースを参照

Merge pull request #510 from leethomason/kezenator-line-numbering

Kezenator line numbering
Lee Thomason 9 年 前
コミット
c3a19156ff
4 ファイル変更270 行追加79 行削除
  1. 11 3
      readme.md
  2. 79 54
      tinyxml2.cpp
  3. 41 21
      tinyxml2.h
  4. 139 1
      xmltest.cpp

+ 11 - 3
readme.md

@@ -88,9 +88,8 @@ Advantages of TinyXML-2
 
 Advantages of TinyXML-1
 
-1.  Can report the location of parsing errors.
-2.  Support for some C++ STL conventions: streams and strings
-3.  Very mature and well debugged code base.
+1.  Support for some C++ STL conventions: streams and strings
+2.  Very mature and well debugged code base.
 
 Features
 --------
@@ -157,6 +156,15 @@ However, you may also use COLLAPSE_WHITESPACE, which will:
 Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE.
 It essentially causes the XML to be parsed twice.
 
+#### Error Reporting
+
+TinyXML-2 reports the line number of any errors in an XML document that
+cannot be parsed correctly. In addition, all nodes (elements, declarations,
+text, comments etc.) and attributes have a line number recorded as they are parsed.
+This allows an application that performs additional validation of the parsed
+XML document (e.g. application-implemented DTD validation) to report
+line number information in it's errors.
+
 ### Entities
 
 TinyXML-2 recognizes the pre-defined "character entities", meaning special

+ 79 - 54
tinyxml2.cpp

@@ -189,10 +189,11 @@ void StrPair::SetStr( const char* str, int flags )
 }
 
 
-char* StrPair::ParseText( char* p, const char* endTag, int strFlags )
+char* StrPair::ParseText( char* p, const char* endTag, int strFlags, int* curLineNumPtr )
 {
     TIXMLASSERT( p );
     TIXMLASSERT( endTag && *endTag );
+	TIXMLASSERT(curLineNumPtr);
 
     char* start = p;
     char  endChar = *endTag;
@@ -203,6 +204,8 @@ char* StrPair::ParseText( char* p, const char* endTag, int strFlags )
         if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) {
             Set( start, p, strFlags );
             return p + length;
+        } else if (*p == '\n') {
+            ++(*curLineNumPtr);
         }
         ++p;
         TIXMLASSERT( p );
@@ -236,7 +239,7 @@ void StrPair::CollapseWhitespace()
     // Adjusting _start would cause undefined behavior on delete[]
     TIXMLASSERT( ( _flags & NEEDS_DELETE ) == 0 );
     // Trim leading space.
-    _start = XMLUtil::SkipWhiteSpace( _start );
+    _start = XMLUtil::SkipWhiteSpace( _start, 0 );
 
     if ( *_start ) {
         const char* p = _start;	// the read pointer
@@ -244,7 +247,7 @@ void StrPair::CollapseWhitespace()
 
         while( *p ) {
             if ( XMLUtil::IsWhiteSpace( *p )) {
-                p = XMLUtil::SkipWhiteSpace( p );
+                p = XMLUtil::SkipWhiteSpace( p, 0 );
                 if ( *p == 0 ) {
                     break;    // don't write to q; this trims the trailing space.
                 }
@@ -637,7 +640,8 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
     TIXMLASSERT( node );
     TIXMLASSERT( p );
     char* const start = p;
-    p = XMLUtil::SkipWhiteSpace( p );
+    int const startLine = _parseCurLineNum;
+    p = XMLUtil::SkipWhiteSpace( p, &_parseCurLineNum );
     if( !*p ) {
         *node = 0;
         TIXMLASSERT( p );
@@ -663,12 +667,14 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
     if ( XMLUtil::StringEqual( p, xmlHeader, xmlHeaderLen ) ) {
         TIXMLASSERT( sizeof( XMLDeclaration ) == _commentPool.ItemSize() );
         returnNode = new (_commentPool.Alloc()) XMLDeclaration( this );
+        returnNode->_parseLineNum = _parseCurLineNum;
         returnNode->_memPool = &_commentPool;
         p += xmlHeaderLen;
     }
     else if ( XMLUtil::StringEqual( p, commentHeader, commentHeaderLen ) ) {
         TIXMLASSERT( sizeof( XMLComment ) == _commentPool.ItemSize() );
         returnNode = new (_commentPool.Alloc()) XMLComment( this );
+        returnNode->_parseLineNum = _parseCurLineNum;
         returnNode->_memPool = &_commentPool;
         p += commentHeaderLen;
     }
@@ -676,6 +682,7 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
         TIXMLASSERT( sizeof( XMLText ) == _textPool.ItemSize() );
         XMLText* text = new (_textPool.Alloc()) XMLText( this );
         returnNode = text;
+        returnNode->_parseLineNum = _parseCurLineNum;
         returnNode->_memPool = &_textPool;
         p += cdataHeaderLen;
         text->SetCData( true );
@@ -683,12 +690,14 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
     else if ( XMLUtil::StringEqual( p, dtdHeader, dtdHeaderLen ) ) {
         TIXMLASSERT( sizeof( XMLUnknown ) == _commentPool.ItemSize() );
         returnNode = new (_commentPool.Alloc()) XMLUnknown( this );
+        returnNode->_parseLineNum = _parseCurLineNum;
         returnNode->_memPool = &_commentPool;
         p += dtdHeaderLen;
     }
     else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
         TIXMLASSERT( sizeof( XMLElement ) == _elementPool.ItemSize() );
         returnNode = new (_elementPool.Alloc()) XMLElement( this );
+        returnNode->_parseLineNum = _parseCurLineNum;
         returnNode->_memPool = &_elementPool;
         p += elementHeaderLen;
     }
@@ -696,7 +705,9 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
         TIXMLASSERT( sizeof( XMLText ) == _textPool.ItemSize() );
         returnNode = new (_textPool.Alloc()) XMLText( this );
         returnNode->_memPool = &_textPool;
+        returnNode->_parseLineNum = _parseCurLineNum; // Report line of first non-whitespace character
         p = start;	// Back it up, all the text counts.
+        _parseCurLineNum = startLine;
     }
 
     TIXMLASSERT( returnNode );
@@ -725,6 +736,7 @@ bool XMLDocument::Accept( XMLVisitor* visitor ) const
 XMLNode::XMLNode( XMLDocument* doc ) :
     _document( doc ),
     _parent( 0 ),
+    _parseLineNum( 0 ),
     _firstChild( 0 ), _lastChild( 0 ),
     _prev( 0 ), _next( 0 ),
 	_userData( 0 ),
@@ -942,7 +954,7 @@ const XMLElement* XMLNode::PreviousSiblingElement( const char* name ) const
 }
 
 
-char* XMLNode::ParseDeep( char* p, StrPair* parentEnd )
+char* XMLNode::ParseDeep( char* p, StrPair* parentEnd, int* curLineNumPtr )
 {
     // This is a recursive method, but thinking about it "at the current level"
     // it is a pretty simple flat list:
@@ -970,12 +982,14 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEnd )
             break;
         }
 
+        int initialLineNum = node->_parseLineNum;
+
         StrPair endTag;
-        p = node->ParseDeep( p, &endTag );
+        p = node->ParseDeep( p, &endTag, curLineNumPtr );
         if ( !p ) {
             DeleteNode( node );
             if ( !_document->Error() ) {
-                _document->SetError( XML_ERROR_PARSING, 0, 0 );
+                _document->SetError( XML_ERROR_PARSING, 0, 0, initialLineNum);
             }
             break;
         }
@@ -995,7 +1009,7 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEnd )
                 }
             }
             if ( !wellLocated ) {
-                _document->SetError( XML_ERROR_PARSING_DECLARATION, decl->Value(), 0 );
+                _document->SetError( XML_ERROR_PARSING_DECLARATION, decl->Value(), 0, initialLineNum);
                 DeleteNode( node );
                 break;
             }
@@ -1030,7 +1044,7 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEnd )
                 }
             }
             if ( mismatch ) {
-                _document->SetError( XML_ERROR_MISMATCHED_ELEMENT, ele->Name(), 0 );
+                _document->SetError( XML_ERROR_MISMATCHED_ELEMENT, ele->Name(), 0, initialLineNum);
                 DeleteNode( node );
                 break;
             }
@@ -1077,13 +1091,13 @@ const XMLElement* XMLNode::ToElementWithName( const char* name ) const
 }
 
 // --------- XMLText ---------- //
-char* XMLText::ParseDeep( char* p, StrPair* )
+char* XMLText::ParseDeep( char* p, StrPair*, int* curLineNumPtr )
 {
     const char* start = p;
     if ( this->CData() ) {
-        p = _value.ParseText( p, "]]>", StrPair::NEEDS_NEWLINE_NORMALIZATION );
+        p = _value.ParseText( p, "]]>", StrPair::NEEDS_NEWLINE_NORMALIZATION, curLineNumPtr );
         if ( !p ) {
-            _document->SetError( XML_ERROR_PARSING_CDATA, start, 0 );
+            _document->SetError( XML_ERROR_PARSING_CDATA, start, 0, _parseLineNum );
         }
         return p;
     }
@@ -1093,12 +1107,12 @@ char* XMLText::ParseDeep( char* p, StrPair* )
             flags |= StrPair::NEEDS_WHITESPACE_COLLAPSING;
         }
 
-        p = _value.ParseText( p, "<", flags );
+        p = _value.ParseText( p, "<", flags, curLineNumPtr );
         if ( p && *p ) {
             return p-1;
         }
         if ( !p ) {
-            _document->SetError( XML_ERROR_PARSING_TEXT, start, 0 );
+            _document->SetError( XML_ERROR_PARSING_TEXT, start, 0, _parseLineNum );
         }
     }
     return 0;
@@ -1142,13 +1156,13 @@ XMLComment::~XMLComment()
 }
 
 
-char* XMLComment::ParseDeep( char* p, StrPair* )
+char* XMLComment::ParseDeep( char* p, StrPair*, int* curLineNumPtr )
 {
     // Comment parses as text.
     const char* start = p;
-    p = _value.ParseText( p, "-->", StrPair::COMMENT );
+    p = _value.ParseText( p, "-->", StrPair::COMMENT, curLineNumPtr );
     if ( p == 0 ) {
-        _document->SetError( XML_ERROR_PARSING_COMMENT, start, 0 );
+        _document->SetError( XML_ERROR_PARSING_COMMENT, start, 0, _parseLineNum );
     }
     return p;
 }
@@ -1192,13 +1206,13 @@ XMLDeclaration::~XMLDeclaration()
 }
 
 
-char* XMLDeclaration::ParseDeep( char* p, StrPair* )
+char* XMLDeclaration::ParseDeep( char* p, StrPair*, int* curLineNumPtr )
 {
     // Declaration parses as text.
     const char* start = p;
-    p = _value.ParseText( p, "?>", StrPair::NEEDS_NEWLINE_NORMALIZATION );
+    p = _value.ParseText( p, "?>", StrPair::NEEDS_NEWLINE_NORMALIZATION, curLineNumPtr );
     if ( p == 0 ) {
-        _document->SetError( XML_ERROR_PARSING_DECLARATION, start, 0 );
+        _document->SetError( XML_ERROR_PARSING_DECLARATION, start, 0, _parseLineNum );
     }
     return p;
 }
@@ -1241,14 +1255,14 @@ XMLUnknown::~XMLUnknown()
 }
 
 
-char* XMLUnknown::ParseDeep( char* p, StrPair* )
+char* XMLUnknown::ParseDeep( char* p, StrPair*, int* curLineNumPtr )
 {
     // Unknown parses as text.
     const char* start = p;
 
-    p = _value.ParseText( p, ">", StrPair::NEEDS_NEWLINE_NORMALIZATION );
+    p = _value.ParseText( p, ">", StrPair::NEEDS_NEWLINE_NORMALIZATION, curLineNumPtr );
     if ( !p ) {
-        _document->SetError( XML_ERROR_PARSING_UNKNOWN, start, 0 );
+        _document->SetError( XML_ERROR_PARSING_UNKNOWN, start, 0, _parseLineNum );
     }
     return p;
 }
@@ -1290,7 +1304,7 @@ const char* XMLAttribute::Value() const
     return _value.GetStr();
 }
 
-char* XMLAttribute::ParseDeep( char* p, bool processEntities )
+char* XMLAttribute::ParseDeep( char* p, bool processEntities, int* curLineNumPtr )
 {
     // Parse using the name rules: bug fix, was using ParseText before
     p = _name.ParseName( p );
@@ -1299,13 +1313,13 @@ char* XMLAttribute::ParseDeep( char* p, bool processEntities )
     }
 
     // Skip white space before =
-    p = XMLUtil::SkipWhiteSpace( p );
+    p = XMLUtil::SkipWhiteSpace( p, curLineNumPtr );
     if ( *p != '=' ) {
         return 0;
     }
 
     ++p;	// move up to opening quote
-    p = XMLUtil::SkipWhiteSpace( p );
+    p = XMLUtil::SkipWhiteSpace( p, curLineNumPtr );
     if ( *p != '\"' && *p != '\'' ) {
         return 0;
     }
@@ -1313,7 +1327,7 @@ char* XMLAttribute::ParseDeep( char* p, bool processEntities )
     char endTag[2] = { *p, 0 };
     ++p;	// move past opening quote
 
-    p = _value.ParseText( p, endTag, processEntities ? StrPair::ATTRIBUTE_VALUE : StrPair::ATTRIBUTE_VALUE_LEAVE_ENTITIES );
+    p = _value.ParseText( p, endTag, processEntities ? StrPair::ATTRIBUTE_VALUE : StrPair::ATTRIBUTE_VALUE_LEAVE_ENTITIES, curLineNumPtr );
     return p;
 }
 
@@ -1747,16 +1761,16 @@ void XMLElement::DeleteAttribute( const char* name )
 }
 
 
-char* XMLElement::ParseAttributes( char* p )
+char* XMLElement::ParseAttributes( char* p, int* curLineNumPtr )
 {
     const char* start = p;
     XMLAttribute* prevAttribute = 0;
 
     // Read the attributes.
     while( p ) {
-        p = XMLUtil::SkipWhiteSpace( p );
+        p = XMLUtil::SkipWhiteSpace( p, curLineNumPtr );
         if ( !(*p) ) {
-            _document->SetError( XML_ERROR_PARSING_ELEMENT, start, Name() );
+            _document->SetError( XML_ERROR_PARSING_ELEMENT, start, Name(), _parseLineNum );
             return 0;
         }
 
@@ -1764,11 +1778,14 @@ char* XMLElement::ParseAttributes( char* p )
         if (XMLUtil::IsNameStartChar( *p ) ) {
             XMLAttribute* attrib = CreateAttribute();
             TIXMLASSERT( attrib );
+            attrib->_parseLineNum = _document->_parseCurLineNum;
+
+            int attrLineNum = attrib->_parseLineNum;
 
-            p = attrib->ParseDeep( p, _document->ProcessEntities() );
+            p = attrib->ParseDeep( p, _document->ProcessEntities(), curLineNumPtr );
             if ( !p || Attribute( attrib->Name() ) ) {
                 DeleteAttribute( attrib );
-                _document->SetError( XML_ERROR_PARSING_ATTRIBUTE, start, p );
+                _document->SetError( XML_ERROR_PARSING_ATTRIBUTE, start, p, attrLineNum );
                 return 0;
             }
             // There is a minor bug here: if the attribute in the source xml
@@ -1795,7 +1812,7 @@ char* XMLElement::ParseAttributes( char* p )
             return p+2;	// done; sealed element.
         }
         else {
-            _document->SetError( XML_ERROR_PARSING_ELEMENT, start, p );
+            _document->SetError( XML_ERROR_PARSING_ELEMENT, start, p, _parseLineNum );
             return 0;
         }
     }
@@ -1825,10 +1842,10 @@ XMLAttribute* XMLElement::CreateAttribute()
 //	<ele></ele>
 //	<ele>foo<b>bar</b></ele>
 //
-char* XMLElement::ParseDeep( char* p, StrPair* strPair )
+char* XMLElement::ParseDeep( char* p, StrPair* strPair, int* curLineNumPtr )
 {
     // Read the element name.
-    p = XMLUtil::SkipWhiteSpace( p );
+    p = XMLUtil::SkipWhiteSpace( p, curLineNumPtr );
 
     // The closing element is the </element> form. It is
     // parsed just like a regular element then deleted from
@@ -1843,12 +1860,12 @@ char* XMLElement::ParseDeep( char* p, StrPair* strPair )
         return 0;
     }
 
-    p = ParseAttributes( p );
+    p = ParseAttributes( p, curLineNumPtr );
     if ( !p || !*p || _closingType ) {
         return p;
     }
 
-    p = XMLNode::ParseDeep( p, strPair );
+    p = XMLNode::ParseDeep( p, strPair, curLineNumPtr );
     return p;
 }
 
@@ -2071,7 +2088,7 @@ XMLError XMLDocument::LoadFile( const char* filename )
     Clear();
     FILE* fp = callfopen( filename, "rb" );
     if ( !fp ) {
-        SetError( XML_ERROR_FILE_NOT_FOUND, filename, 0 );
+        SetError( XML_ERROR_FILE_NOT_FOUND, filename, 0, 0 );
         return _errorID;
     }
     LoadFile( fp );
@@ -2108,7 +2125,7 @@ XMLError XMLDocument::LoadFile( FILE* fp )
 
     fseek( fp, 0, SEEK_SET );
     if ( fgetc( fp ) == EOF && ferror( fp ) != 0 ) {
-        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 );
+        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0, 0 );
         return _errorID;
     }
 
@@ -2116,19 +2133,19 @@ XMLError XMLDocument::LoadFile( FILE* fp )
     const long filelength = ftell( fp );
     fseek( fp, 0, SEEK_SET );
     if ( filelength == -1L ) {
-        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 );
+        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0, 0 );
         return _errorID;
     }
     TIXMLASSERT( filelength >= 0 );
 
     if ( !LongFitsIntoSizeTMinusOne<>::Fits( filelength ) ) {
         // Cannot handle files which won't fit in buffer together with null terminator
-        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 );
+        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0, 0 );
         return _errorID;
     }
 
     if ( filelength == 0 ) {
-        SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0 );
+        SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0, 0 );
         return _errorID;
     }
 
@@ -2137,7 +2154,7 @@ XMLError XMLDocument::LoadFile( FILE* fp )
     _charBuffer = new char[size+1];
     size_t read = fread( _charBuffer, 1, size, fp );
     if ( read != size ) {
-        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 );
+        SetError( XML_ERROR_FILE_READ_ERROR, 0, 0, 0 );
         return _errorID;
     }
 
@@ -2152,7 +2169,7 @@ XMLError XMLDocument::SaveFile( const char* filename, bool compact )
 {
     FILE* fp = callfopen( filename, "w" );
     if ( !fp ) {
-        SetError( XML_ERROR_FILE_COULD_NOT_BE_OPENED, filename, 0 );
+        SetError( XML_ERROR_FILE_COULD_NOT_BE_OPENED, filename, 0, 0 );
         return _errorID;
     }
     SaveFile(fp, compact);
@@ -2177,7 +2194,7 @@ XMLError XMLDocument::Parse( const char* p, size_t len )
     Clear();
 
     if ( len == 0 || !p || !*p ) {
-        SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0 );
+        SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0, 0 );
         return _errorID;
     }
     if ( len == (size_t)(-1) ) {
@@ -2215,13 +2232,14 @@ void XMLDocument::Print( XMLPrinter* streamer ) const
 }
 
 
-void XMLDocument::SetError( XMLError error, const char* str1, const char* str2 )
+void XMLDocument::SetError( XMLError error, const char* str1, const char* str2, int lineNum )
 {
     TIXMLASSERT( error >= 0 && error < XML_ERROR_COUNT );
     _errorID = error;
 	
 	_errorStr1.Reset();
 	_errorStr2.Reset();
+    _errorLineNum = lineNum;
 
 	if (str1)
 		_errorStr1.SetStr(str1);
@@ -2229,14 +2247,19 @@ void XMLDocument::SetError( XMLError error, const char* str1, const char* str2 )
 		_errorStr2.SetStr(str2);
 }
 
-const char* XMLDocument::ErrorName() const
+/*static*/ const char* XMLDocument::ErrorIDToName(XMLError errorID)
 {
-	TIXMLASSERT( _errorID >= 0 && _errorID < XML_ERROR_COUNT );
-    const char* errorName = _errorNames[_errorID];
+	TIXMLASSERT( errorID >= 0 && errorID < XML_ERROR_COUNT );
+    const char* errorName = _errorNames[errorID];
     TIXMLASSERT( errorName && errorName[0] );
     return errorName;
 }
 
+const char* XMLDocument::ErrorName() const
+{
+    return ErrorIDToName(_errorID);
+}
+
 void XMLDocument::PrintError() const
 {
     if ( Error() ) {
@@ -2254,8 +2277,8 @@ void XMLDocument::PrintError() const
         // Should check INT_MIN <= _errorID && _errorId <= INT_MAX, but that
         // causes a clang "always true" -Wtautological-constant-out-of-range-compare warning
         TIXMLASSERT( 0 <= _errorID && XML_ERROR_COUNT - 1 <= INT_MAX );
-        printf( "XMLDocument error id=%d '%s' str1=%s str2=%s\n",
-                static_cast<int>( _errorID ), ErrorName(), buf1, buf2 );
+        printf( "XMLDocument error id=%d '%s' str1=%s str2=%s line=%d\n",
+                static_cast<int>( _errorID ), ErrorName(), buf1, buf2, _errorLineNum );
     }
 }
 
@@ -2263,14 +2286,16 @@ void XMLDocument::Parse()
 {
     TIXMLASSERT( NoChildren() ); // Clear() must have been called previously
     TIXMLASSERT( _charBuffer );
+    _parseCurLineNum = 1;
+    _parseLineNum = 1;
     char* p = _charBuffer;
-    p = XMLUtil::SkipWhiteSpace( p );
+    p = XMLUtil::SkipWhiteSpace( p, &_parseCurLineNum );
     p = const_cast<char*>( XMLUtil::ReadBOM( p, &_writeBOM ) );
     if ( !*p ) {
-        SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0 );
+        SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0, 0 );
         return;
     }
-    ParseDeep(p, 0 );
+    ParseDeep(p, 0, &_parseCurLineNum );
 }
 
 XMLPrinter::XMLPrinter( FILE* file, bool compact, int depth ) :

+ 41 - 21
tinyxml2.h

@@ -160,7 +160,7 @@ public:
 
     void SetStr( const char* str, int flags=0 );
 
-    char* ParseText( char* in, const char* endTag, int strFlags );
+    char* ParseText( char* in, const char* endTag, int strFlags, int* curLineNumPtr );
     char* ParseName( char* in );
 
     void TransferTo( StrPair* other );
@@ -530,16 +530,20 @@ enum XMLError {
 class XMLUtil
 {
 public:
-    static const char* SkipWhiteSpace( const char* p )	{
+    static const char* SkipWhiteSpace( const char* p, int* curLineNumPtr )	{
         TIXMLASSERT( p );
+
         while( IsWhiteSpace(*p) ) {
+            if (curLineNumPtr && *p == '\n') {
+                ++(*curLineNumPtr);
+            }
             ++p;
         }
         TIXMLASSERT( p );
         return p;
     }
-    static char* SkipWhiteSpace( char* p )				{
-        return const_cast<char*>( SkipWhiteSpace( const_cast<const char*>(p) ) );
+    static char* SkipWhiteSpace( char* p, int* curLineNumPtr )				{
+        return const_cast<char*>( SkipWhiteSpace( const_cast<const char*>(p), curLineNumPtr ) );
     }
 
     // Anything in the high order range of UTF-8 is assumed to not be whitespace. This isn't
@@ -706,6 +710,9 @@ public:
     */
     void SetValue( const char* val, bool staticMem=false );
 
+    /// Gets the line number the node is in, if the document was parsed from a file.
+    int GetLineNum() const { return _parseLineNum; }
+
     /// Get the parent of this node on the DOM.
     const XMLNode*	Parent() const			{
         return _parent;
@@ -889,11 +896,12 @@ protected:
     XMLNode( XMLDocument* );
     virtual ~XMLNode();
 
-    virtual char* ParseDeep( char*, StrPair* );
+    virtual char* ParseDeep( char*, StrPair*, int* );
 
     XMLDocument*	_document;
     XMLNode*		_parent;
     mutable StrPair	_value;
+    int             _parseLineNum;
 
     XMLNode*		_firstChild;
     XMLNode*		_lastChild;
@@ -956,7 +964,7 @@ protected:
     XMLText( XMLDocument* doc )	: XMLNode( doc ), _isCData( false )	{}
     virtual ~XMLText()												{}
 
-    char* ParseDeep( char*, StrPair* endTag );
+    char* ParseDeep( char*, StrPair* endTag, int* curLineNumPtr );
 
 private:
     bool _isCData;
@@ -987,7 +995,7 @@ protected:
     XMLComment( XMLDocument* doc );
     virtual ~XMLComment();
 
-    char* ParseDeep( char*, StrPair* endTag );
+    char* ParseDeep( char*, StrPair* endTag, int* curLineNumPtr);
 
 private:
     XMLComment( const XMLComment& );	// not supported
@@ -1026,7 +1034,7 @@ protected:
     XMLDeclaration( XMLDocument* doc );
     virtual ~XMLDeclaration();
 
-    char* ParseDeep( char*, StrPair* endTag );
+    char* ParseDeep( char*, StrPair* endTag, int* curLineNumPtr );
 
 private:
     XMLDeclaration( const XMLDeclaration& );	// not supported
@@ -1061,7 +1069,7 @@ protected:
     XMLUnknown( XMLDocument* doc );
     virtual ~XMLUnknown();
 
-    char* ParseDeep( char*, StrPair* endTag );
+    char* ParseDeep( char*, StrPair* endTag, int* curLineNumPtr );
 
 private:
     XMLUnknown( const XMLUnknown& );	// not supported
@@ -1086,6 +1094,9 @@ public:
     /// The value of the attribute.
     const char* Value() const;
 
+    /// Gets the line number the attribute is in, if the document was parsed from a file.
+    int GetLineNum() const { return _parseLineNum; }
+
     /// The next attribute in the list.
     const XMLAttribute* Next() const {
         return _next;
@@ -1173,10 +1184,11 @@ private:
     void operator=( const XMLAttribute& );	// not supported
     void SetName( const char* name );
 
-    char* ParseDeep( char* p, bool processEntities );
+    char* ParseDeep( char* p, bool processEntities, int* curLineNumPtr );
 
     mutable StrPair _name;
     mutable StrPair _value;
+    int             _parseLineNum;
     XMLAttribute*   _next;
     MemPool*        _memPool;
 };
@@ -1548,7 +1560,7 @@ public:
     virtual bool ShallowEqual( const XMLNode* compare ) const;
 
 protected:
-    char* ParseDeep( char* p, StrPair* endTag );
+    char* ParseDeep( char* p, StrPair* endTag, int* curLineNumPtr );
 
 private:
     XMLElement( XMLDocument* doc );
@@ -1561,7 +1573,7 @@ private:
     }
     XMLAttribute* FindOrCreateAttribute( const char* name );
     //void LinkAttribute( XMLAttribute* attrib );
-    char* ParseAttributes( char* p );
+    char* ParseAttributes( char* p, int* curLineNumPtr );
     static void DeleteAttribute( XMLAttribute* attribute );
     XMLAttribute* CreateAttribute();
 
@@ -1739,10 +1751,10 @@ public:
     */
     void DeleteNode( XMLNode* node );
 
-    void SetError( XMLError error, const char* str1, const char* str2 );
+    void SetError( XMLError error, const char* str1, const char* str2, int lineNum );
 
     void ClearError() {
-        SetError(XML_SUCCESS, 0, 0);
+        SetError(XML_SUCCESS, 0, 0, 0);
     }
 
     /// Return true if there was an error parsing the document.
@@ -1754,6 +1766,7 @@ public:
         return _errorID;
     }
 	const char* ErrorName() const;
+    static const char* ErrorIDToName(XMLError errorID);
 
     /// Return a possibly helpful diagnostic location or string.
     const char* GetErrorStr1() const {
@@ -1763,6 +1776,11 @@ public:
     const char* GetErrorStr2() const {
         return _errorStr2.GetStr();
     }
+    /// Return the line where the error occured, or zero if unknown.
+    int GetErrorLineNum() const
+    {
+        return _errorLineNum;
+    }
     /// If there is an error, print it to stdout.
     void PrintError() const;
     
@@ -1783,13 +1801,15 @@ private:
     XMLDocument( const XMLDocument& );	// not supported
     void operator=( const XMLDocument& );	// not supported
 
-    bool        _writeBOM;
-    bool        _processEntities;
-    XMLError    _errorID;
-    Whitespace  _whitespace;
-    mutable StrPair		_errorStr1;
-    mutable StrPair		_errorStr2;
-    char*       _charBuffer;
+    bool			_writeBOM;
+    bool			_processEntities;
+    XMLError		_errorID;
+    Whitespace		_whitespace;
+    mutable StrPair	_errorStr1;
+    mutable StrPair	_errorStr2;
+    int             _errorLineNum;
+    char*			_charBuffer;
+    int				_parseCurLineNum;
 
     MemPoolT< sizeof(XMLElement) >	 _elementPool;
     MemPoolT< sizeof(XMLAttribute) > _attributePool;

+ 139 - 1
xmltest.cpp

@@ -63,6 +63,15 @@ bool XMLTest (const char* testString, const char* expected, const char* found, b
 	return pass;
 }
 
+bool XMLTest(const char* testString, XMLError expected, XMLError found, bool echo = true, bool extraNL = false)
+{
+    return XMLTest(testString, XMLDocument::ErrorIDToName(expected), XMLDocument::ErrorIDToName(found), echo, extraNL);
+}
+
+bool XMLTest(const char* testString, bool expected, bool found, bool echo = true, bool extraNL = false)
+{
+    return XMLTest(testString, expected ? "true" : "false", found ? "true" : "false", echo, extraNL);
+}
 
 template< class T > bool XMLTest( const char* testString, T expected, T found, bool echo=true )
 {
@@ -1631,11 +1640,140 @@ int main( int argc, const char ** argv )
 	{
 		XMLDocument doc;
 		for( int i = 0; i < XML_ERROR_COUNT; i++ ) {
-			doc.SetError( (XMLError)i, 0, 0 );
+			doc.SetError( (XMLError)i, 0, 0, 0 );
 			doc.ErrorName();
 		}
 	}
 
+    // ----------- Line Number Tracking --------------
+    {
+        struct TestUtil: XMLVisitor
+        {
+            void TestParseError(const char *testString, const char *docStr, XMLError expected_error, int expectedLine)
+            {
+                XMLDocument doc;
+                XMLError err = doc.Parse(docStr);
+
+                XMLTest(testString, true, doc.Error());
+                XMLTest(testString, expected_error, err);
+                XMLTest(testString, expectedLine, doc.GetErrorLineNum());
+            };
+
+            void TestStringLines(const char *testString, const char *docStr, const char *expectedLines)
+            {
+                XMLDocument doc;
+                doc.Parse(docStr);
+                XMLTest(testString, false, doc.Error());
+                TestDocLines(testString, doc, expectedLines);
+            }
+
+            void TestFileLines(const char *testString, const char *file_name, const char *expectedLines)
+            {
+                XMLDocument doc;
+                doc.LoadFile(file_name);
+                XMLTest(testString, false, doc.Error());
+                TestDocLines(testString, doc, expectedLines);
+            }
+
+        private:
+            DynArray<char, 10> str;
+
+            void Push(char type, int lineNum)
+            {
+                str.Push(type);
+                str.Push(char('0' + (lineNum / 10)));
+                str.Push(char('0' + (lineNum % 10)));
+            }
+
+            bool VisitEnter(const XMLDocument& doc)
+            {
+                Push('D', doc.GetLineNum());
+                return true;
+            }
+            bool VisitEnter(const XMLElement& element, const XMLAttribute* firstAttribute)
+            {
+                Push('E', element.GetLineNum());
+                for (const XMLAttribute *attr = firstAttribute; attr != 0; attr = attr->Next())
+                    Push('A', attr->GetLineNum());
+                return true;
+            }
+            bool Visit(const XMLDeclaration& declaration)
+            {
+                Push('L', declaration.GetLineNum());
+                return true;
+            }
+            bool Visit(const XMLText& text)
+            {
+                Push('T', text.GetLineNum());
+                return true;
+            }
+            bool Visit(const XMLComment& comment)
+            {
+                Push('C', comment.GetLineNum());
+                return true;
+            }
+            bool Visit(const XMLUnknown& unknown)
+            {
+                Push('U', unknown.GetLineNum());
+                return true;
+            }
+
+            void TestDocLines(const char *testString, XMLDocument &doc, const char *expectedLines)
+            {
+                str.Clear();
+                doc.Accept(this);
+                str.Push(0);
+                XMLTest(testString, expectedLines, str.Mem());
+            }
+        } tester;
+
+		tester.TestParseError("ErrorLine-Parsing", "\n<root>\n foo \n<unclosed/>", XML_ERROR_PARSING, 2);
+        tester.TestParseError("ErrorLine-Declaration", "<root>\n<?xml version=\"1.0\"?>", XML_ERROR_PARSING_DECLARATION, 2);
+        tester.TestParseError("ErrorLine-Mismatch", "\n<root>\n</mismatch>", XML_ERROR_MISMATCHED_ELEMENT, 2);
+        tester.TestParseError("ErrorLine-CData", "\n<root><![CDATA[ \n foo bar \n", XML_ERROR_PARSING_CDATA, 2);
+        tester.TestParseError("ErrorLine-Text", "\n<root>\n foo bar \n", XML_ERROR_PARSING_TEXT, 3);
+        tester.TestParseError("ErrorLine-Comment", "\n<root>\n<!-- >\n", XML_ERROR_PARSING_COMMENT, 3);
+        tester.TestParseError("ErrorLine-Declaration", "\n<root>\n<? >\n", XML_ERROR_PARSING_DECLARATION, 3);
+        tester.TestParseError("ErrorLine-Unknown", "\n<root>\n<! \n", XML_ERROR_PARSING_UNKNOWN, 3);
+        tester.TestParseError("ErrorLine-Element", "\n<root>\n<unclosed \n", XML_ERROR_PARSING_ELEMENT, 3);
+        tester.TestParseError("ErrorLine-Attribute", "\n<root>\n<unclosed \n att\n", XML_ERROR_PARSING_ATTRIBUTE, 4);
+        tester.TestParseError("ErrorLine-ElementClose", "\n<root>\n<unclosed \n/unexpected", XML_ERROR_PARSING_ELEMENT, 3);
+
+		tester.TestStringLines(
+            "LineNumbers-String",
+
+            "<?xml version=\"1.0\"?>\n"					// 1 Doc, DecL
+                "<root a='b' \n"						// 2 Element Attribute
+                "c='d'> d <blah/>  \n"					// 3 Attribute Text Element
+                "newline in text \n"					// 4 Text
+                "and second <zxcv/><![CDATA[\n"			// 5 Element Text
+                " cdata test ]]><!-- comment -->\n"		// 6 Comment
+                "<! unknown></root>",					// 7 Unknown
+
+            "D01L01E02A02A03T03E03T04E05T05C06U07");
+
+		tester.TestStringLines(
+            "LineNumbers-CRLF",
+
+            "\r\n"										// 1 Doc (arguably should be line 2)
+            "<?xml version=\"1.0\"?>\n"					// 2 DecL
+            "<root>\r\n"								// 3 Element
+            "\n"										// 4
+            "text contining new line \n"				// 5 Text
+            " and also containing crlf \r\n"			// 6
+            "<sub><![CDATA[\n"							// 7 Element Text
+            "cdata containing new line \n"				// 8
+            " and also containing cflr\r\n"				// 9
+            "]]></sub><sub2/></root>",					// 10 Element
+
+            "D01L02E03T05E07T07E10");
+
+		tester.TestFileLines(
+            "LineNumbers-File",
+            "resources/utf8test.xml",
+            "D01L01E02E03A03A03T03E04A04A04T04E05A05A05T05E06A06A06T06E07A07A07T07E08A08A08T08E09T09E10T10");
+    }
+
     // ----------- Performance tracking --------------
 	{
 #if defined( _MSC_VER )