Ver Fonte

Merge branch 'pedantic-whitespace' of https://github.com/kcsaul/tinyxml2 into kcsaul-pedantic-whitespace

Lee Thomason há 2 anos atrás
pai
commit
9d026527a4
3 ficheiros alterados com 192 adições e 7 exclusões
  1. 17 5
      tinyxml2.cpp
  2. 3 2
      tinyxml2.h
  3. 172 0
      xmltest.cpp

+ 17 - 5
tinyxml2.cpp

@@ -715,7 +715,7 @@ bool XMLUtil::ToUnsigned64(const char* str, uint64_t* value) {
 }
 
 
-char* XMLDocument::Identify( char* p, XMLNode** node )
+char* XMLDocument::Identify( char* p, XMLNode** node, bool first )
 {
     TIXMLASSERT( node );
     TIXMLASSERT( p );
@@ -767,9 +767,19 @@ char* XMLDocument::Identify( char* p, XMLNode** node )
         p += dtdHeaderLen;
     }
     else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
-        returnNode =  CreateUnlinkedNode<XMLElement>( _elementPool );
-        returnNode->_parseLineNum = _parseCurLineNum;
-        p += elementHeaderLen;
+
+        // Preserve whitespace pedantically before closing tag, when it's immediately after opening tag
+        if (WhitespaceMode() == PEDANTIC_WHITESPACE && first && p != start && *(p + elementHeaderLen) == '/') {
+            returnNode = CreateUnlinkedNode<XMLText>(_textPool);
+            returnNode->_parseLineNum = startLine;
+            p = start;	// Back it up, all the text counts.
+            _parseCurLineNum = startLine;
+        }
+        else {
+            returnNode = CreateUnlinkedNode<XMLElement>(_elementPool);
+            returnNode->_parseLineNum = _parseCurLineNum;
+            p += elementHeaderLen;
+        }
     }
     else {
         returnNode = CreateUnlinkedNode<XMLText>( _textPool );
@@ -1098,14 +1108,16 @@ char* XMLNode::ParseDeep( char* p, StrPair* parentEndTag, int* curLineNumPtr )
 	if (_document->Error())
 		return 0;
 
+	bool first = true;
 	while( p && *p ) {
         XMLNode* node = 0;
 
-        p = _document->Identify( p, &node );
+        p = _document->Identify( p, &node, first );
         TIXMLASSERT( p );
         if ( node == 0 ) {
             break;
         }
+        first = false;
 
        const int initialLineNum = node->_parseLineNum;
 

+ 3 - 2
tinyxml2.h

@@ -1710,7 +1710,8 @@ private:
 
 enum Whitespace {
     PRESERVE_WHITESPACE,
-    COLLAPSE_WHITESPACE
+    COLLAPSE_WHITESPACE,
+    PEDANTIC_WHITESPACE
 };
 
 
@@ -1921,7 +1922,7 @@ public:
 	void DeepCopy(XMLDocument* target) const;
 
 	// internal
-    char* Identify( char* p, XMLNode** node );
+    char* Identify( char* p, XMLNode** node, bool first );
 
 	// internal
 	void MarkInUse(const XMLNode* const);

+ 172 - 0
xmltest.cpp

@@ -1869,6 +1869,178 @@ int main( int argc, const char ** argv )
 		XMLTest( "Whitespace  all space", true, 0 == doc.FirstChildElement()->FirstChild() );
 	}
 
+	// ----------- Preserve Whitespace ------------
+	{
+		const char* xml = "<element>This  is  &apos;  \n\n text &apos;</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", "This  is  '  \n\n text '", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> This \nis &apos;  text  &apos;  </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", " This \nis '  text  '  ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n This is &apos; text &apos;  \n</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", "  \n This is ' text '  \n", doc.FirstChildElement()->GetText());
+	}
+
+	// Following cases are for text that is all whitespace which are not preserved intentionally
+	{
+		const char* xml = "<element> </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>   </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>\n\n</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n</element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> \n \n </element>";
+		XMLDocument doc(true, PRESERVE_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with whitespace preserved", false, doc.Error());
+		XMLTest("Whitespace preserved", true, 0 == doc.FirstChildElement()->GetText());
+	}
+
+	// ----------- Pedantic Whitespace ------------
+	{
+		const char* xml = "<element>This  is  &apos;  \n\n text &apos;</element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "This  is  '  \n\n text '", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> This \nis &apos;  text  &apos;  </element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " This \nis '  text  '  ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n This is &apos; text &apos;  \n</element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "  \n This is ' text '  \n", doc.FirstChildElement()->GetText());
+	}
+
+	// Following cases are for text that is all whitespace which is preserved with pedantic mode
+	{
+		const char* xml = "<element> </element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>   </element>";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "   ", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>\n\n</element>\n";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "\n\n", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  \n</element> \n ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", "  \n", doc.FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element> \n  \n </element>  ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " \n  \n ", doc.FirstChildElement()->GetText());
+	}
+
+	// Following cases are for checking nested elements are still parsed with pedantic whitespace
+	{
+		const char* xml = "<element>\n\t<a> This is nested text </a>\n</element>  ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " This is nested text ", doc.RootElement()->FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  <b> </b>  </element>\n";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", " ", doc.RootElement()->FirstChildElement()->GetText());
+	}
+
+	{
+		const char* xml = "<element>  <c attribute=\"test\"/>  </element>\n ";
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.Parse(xml);
+		XMLTest("Parse nested elements with pedantic whitespace", false, doc.Error());
+		XMLTest("Pedantic whitespace", true, 0 == doc.RootElement()->FirstChildElement()->GetText());
+	}
+
+	// Check sample xml can be parsed with pedantic mode
+	{
+		XMLDocument doc(true, PEDANTIC_WHITESPACE);
+		doc.LoadFile("resources/dream.xml");
+		XMLTest("Load dream.xml with pedantic whitespace mode", false, doc.Error());
+
+		XMLTest("Dream", "xml version=\"1.0\"",
+			doc.FirstChild()->ToDeclaration()->Value());
+		XMLTest("Dream", true, doc.FirstChild()->NextSibling()->ToUnknown() != 0);
+		XMLTest("Dream", "DOCTYPE PLAY SYSTEM \"play.dtd\"",
+			doc.FirstChild()->NextSibling()->ToUnknown()->Value());
+		XMLTest("Dream", "And Robin shall restore amends.",
+			doc.LastChild()->LastChild()->LastChild()->LastChild()->LastChildElement()->GetText());
+	}
+
 	{
 		// An assert should not fire.
 		const char* xml = "<element/>";