Lee Thomason 2 jaren geleden
bovenliggende
commit
e0302bfd44
5 gewijzigde bestanden met toevoegingen van 206 en 13 verwijderingen
  1. 2 2
      .github/workflows/test.yml
  2. 12 4
      readme.md
  3. 17 5
      tinyxml2.cpp
  4. 3 2
      tinyxml2.h
  5. 172 0
      xmltest.cpp

+ 2 - 2
.github/workflows/test.yml

@@ -6,7 +6,7 @@ jobs:
     strategy:
     strategy:
       fail-fast: false
       fail-fast: false
       matrix:
       matrix:
-        os: [ windows-2019, macos-10.15, ubuntu-20.04 ]
+        os: [ windows-2019, macos-latest, ubuntu-20.04 ]
         cmake: [ 3.15, 3.x ]
         cmake: [ 3.15, 3.x ]
         include:
         include:
           - os: windows-2019
           - os: windows-2019
@@ -17,7 +17,7 @@ jobs:
           - os: ubuntu-20.04
           - os: ubuntu-20.04
             tree: tree
             tree: tree
 
 
-          - os: macos-10.15
+          - os: macos-latest
             tree: find
             tree: find
 
 
           - cmake: 3.15
           - cmake: 3.15

+ 12 - 4
readme.md

@@ -1,7 +1,7 @@
 TinyXML-2
 TinyXML-2
 =========
 =========
 
 
-![Build](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg)
+[![Test](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml/badge.svg)](https://github.com/leethomason/tinyxml2/actions/workflows/test.yml)
 
 
 TinyXML-2 is a simple, small, efficient, C++ XML parser that can be
 TinyXML-2 is a simple, small, efficient, C++ XML parser that can be
 easily integrated into other programs.
 easily integrated into other programs.
@@ -91,7 +91,7 @@ by the Document. When the Document is deleted, so are all the nodes it contains.
 
 
 ### White Space
 ### White Space
 
 
-#### Whitespace Preservation (default)
+#### Whitespace Preservation (default, PRESERVE_WHITESPACE)
 
 
 Microsoft has an excellent article on white space: http://msdn.microsoft.com/en-us/library/ms256097.aspx
 Microsoft has an excellent article on white space: http://msdn.microsoft.com/en-us/library/ms256097.aspx
 
 
@@ -123,7 +123,7 @@ valuable. TinyXML-2 sees these as the same XML:
 
 
 	<document><data>1</data><data>2</data><data>3</data></document>
 	<document><data>1</data><data>2</data><data>3</data></document>
 
 
-#### Whitespace Collapse
+#### Whitespace Collapse (COLLAPSE_WHITESPACE)
 
 
 For some applications, it is preferable to collapse whitespace. Collapsing
 For some applications, it is preferable to collapse whitespace. Collapsing
 whitespace gives you "HTML-like" behavior, which is sometimes more suitable
 whitespace gives you "HTML-like" behavior, which is sometimes more suitable
@@ -141,7 +141,15 @@ However, you may also use COLLAPSE_WHITESPACE, which will:
 Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE.
 Note that (currently) there is a performance impact for using COLLAPSE_WHITESPACE.
 It essentially causes the XML to be parsed twice.
 It essentially causes the XML to be parsed twice.
 
 
-#### Error Reporting
+#### Pedantic Whitespace (PEDANTIC_WHITESPACE)
+
+For applications that need to know about text nodes that are composed entirely of 
+whitespace, PEDANTIC_WHITESPACE is available. PEDANTIC_WHITESPACE maintains all the
+whilespace between elements. 
+
+PEDANTIC_WHITESPACE is a new mode and not as tested as the other whitespace modes.
+
+### Error Reporting
 
 
 TinyXML-2 reports the line number of any errors in an XML document that
 TinyXML-2 reports the line number of any errors in an XML document that
 cannot be parsed correctly. In addition, all nodes (elements, declarations,
 cannot be parsed correctly. In addition, all nodes (elements, declarations,

+ 17 - 5
tinyxml2.cpp

@@ -715,7 +715,7 @@ bool XMLUtil::ToUnsigned64(const char* str, uint64_t* value) {
 }
 }
 
 
 
 
-char* XMLDocument::Identify( char* p, XMLNode** node )
+char* XMLDocument::Identify( char* p, XMLNode** node, bool first )
 {
 {
     TIXMLASSERT( node );
     TIXMLASSERT( node );
     TIXMLASSERT( p );
     TIXMLASSERT( p );
@@ -767,9 +767,19 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
         p += dtdHeaderLen;
         p += dtdHeaderLen;
     }
     }
     else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
     else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
-        returnNode =  CreateUnlinkedNode<XMLElement>( _elementPool );
-        returnNode->_parseLineNum = _parseCurLineNum;
-        p += elementHeaderLen;
+
+        // Preserve whitespace pedantically before closing tag, when it's immediately after opening tag
+        if (WhitespaceMode() == PEDANTIC_WHITESPACE && first && p != start && *(p + elementHeaderLen) == '/') {
+            returnNode = CreateUnlinkedNode<XMLText>(_textPool);
+            returnNode->_parseLineNum = startLine;
+            p = start;	// Back it up, all the text counts.
+            _parseCurLineNum = startLine;
+        }
+        else {
+            returnNode = CreateUnlinkedNode<XMLElement>(_elementPool);
+            returnNode->_parseLineNum = _parseCurLineNum;
+            p += elementHeaderLen;
+        }
     }
     }
     else {
     else {
         returnNode = CreateUnlinkedNode<XMLText>( _textPool );
         returnNode = CreateUnlinkedNode<XMLText>( _textPool );
@@ -1098,14 +1108,16 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEndTag, int* curLineNumPtr )
 	if (_document->Error())
 	if (_document->Error())
 		return 0;
 		return 0;
 
 
+	bool first = true;
 	while( p && *p ) {
 	while( p && *p ) {
         XMLNode* node = 0;
         XMLNode* node = 0;
 
 
-        p = _document->Identify( p, &node );
+        p = _document->Identify( p, &node, first );
         TIXMLASSERT( p );
         TIXMLASSERT( p );
         if ( node == 0 ) {
         if ( node == 0 ) {
             break;
             break;
         }
         }
+        first = false;
 
 
        const int initialLineNum = node->_parseLineNum;
        const int initialLineNum = node->_parseLineNum;
 
 

+ 3 - 2
tinyxml2.h

@@ -1710,7 +1710,8 @@ private:
 
 
 enum Whitespace {
 enum Whitespace {
     PRESERVE_WHITESPACE,
     PRESERVE_WHITESPACE,
-    COLLAPSE_WHITESPACE
+    COLLAPSE_WHITESPACE,
+    PEDANTIC_WHITESPACE
 };
 };
 
 
 
 
@@ -1921,7 +1922,7 @@ public:
 	void DeepCopy(XMLDocument* target) const;
 	void DeepCopy(XMLDocument* target) const;
 
 
 	// internal
 	// internal
-    char* Identify( char* p, XMLNode** node );
+    char* Identify( char* p, XMLNode** node, bool first );
 
 
 	// internal
 	// internal
 	void MarkInUse(const XMLNode* const);
 	void MarkInUse(const XMLNode* const);

+ 172 - 0
xmltest.cpp

@@ -1869,6 +1869,178 @@ int main( int argc, const char ** argv )
 		XMLTest( "Whitespace  all space", true, 0 == doc.FirstChildElement()->FirstChild() );
 		XMLTest( "Whitespace  all space", true, 0 == doc.FirstChildElement()->FirstChild() );
 	}
 	}
 
 
+	// ----------- Preserve Whitespace ------------
+	{
+		const char* xml = "<element>This  is  &apos;  \n\n text &apos;</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", "This  is  '  \n\n text '", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> This \nis &apos;  text  &apos;  </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", " This \nis '  text  '  ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n This is &apos; text &apos;  \n</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", "  \n This is ' text '  \n", doc.FirstChildElement()->GetText());
+	}
+
+	// Following cases are for text that is all whitespace which are not preserved intentionally
+	{
+		const char* xml = "<element> </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>   </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>\n\n</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> \n \n </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	// ----------- Pedantic Whitespace ------------
+	{
+		const char* xml = "<element>This  is  &apos;  \n\n text &apos;</element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "This  is  '  \n\n text '", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> This \nis &apos;  text  &apos;  </element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " This \nis '  text  '  ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n This is &apos; text &apos;  \n</element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "  \n This is ' text '  \n", doc.FirstChildElement()->GetText());
+	}
+
+	// Following cases are for text that is all whitespace which is preserved with pedantic mode
+	{
+		const char* xml = "<element> </element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>   </element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "   ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>\n\n</element>\n";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "\n\n", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n</element> \n ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "  \n", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> \n  \n </element>  ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " \n  \n ", doc.FirstChildElement()->GetText());
+	}
+
+	// Following cases are for checking nested elements are still parsed with pedantic whitespace
+	{
+		const char* xml = "<element>\n\t<a> This is nested text </a>\n</element>  ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " This is nested text ", doc.RootElement()->FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  <b> </b>  </element>\n";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " ", doc.RootElement()->FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  <c attribute=\"test\"/>  </element>\n ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", true, 0 == doc.RootElement()->FirstChildElement()->GetText());
+	}
+
+	// Check sample xml can be parsed with pedantic mode
+	{
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.LoadFile("resources/dream.xml");
+		XMLTest("Load dream.xml with pedantic whitespace mode", false, doc.Error());
+
+		XMLTest("Dream", "xml version=\"1.0\"",
+			doc.FirstChild()->ToDeclaration()->Value());
+		XMLTest("Dream", true, doc.FirstChild()->NextSibling()->ToUnknown() != 0);
+		XMLTest("Dream", "DOCTYPE PLAY SYSTEM \"play.dtd\"",
+			doc.FirstChild()->NextSibling()->ToUnknown()->Value());
+		XMLTest("Dream", "And Robin shall restore amends.",
+			doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText());
+	}
+
 	{
 	{
 		// An assert should not fire.
 		// An assert should not fire.
 		const char* xml = "<element/>";
 		const char* xml = "<element/>";