Просмотр исходного кода

added whitespace=collapse support. tests work. code needs review

Lee Thomason (grinliz) 13 лет назад
Родитель
Сommit
bc1bfb7f27
3 измененных файлов с 90 добавлено и 13 удалено
  1. 43 6
      tinyxml2.cpp
  2. 28 7
      tinyxml2.h
  3. 19 0
      xmltest.cpp

+ 43 - 6
tinyxml2.cpp

@@ -23,10 +23,12 @@ distribution.
 
 #include "tinyxml2.h"
 
-#include <cstdio>
-#include <cstdlib>
-#include <new>
-#include <cstddef>
+#include <new>		// yes, this one new style header, is in the Android SDK.
+#ifdef ANDROID_NDK
+	#include <stddef.h>
+#else
+	#include <cstddef>
+#endif
 
 using namespace tinyxml2;
 
@@ -156,6 +158,31 @@ char* StrPair::ParseName( char* p )
 }
 
 
+void StrPair::CollapseWhitespace()
+{
+	// Trim leading space.
+	start = XMLUtil::SkipWhiteSpace( start );
+
+	if ( start && *start ) {
+		char* p = start;	// the read pointer
+		char* q = start;	// the write pointer
+
+		while( *p ) {
+			if ( XMLUtil::IsWhiteSpace( *p )) {
+				p = XMLUtil::SkipWhiteSpace( p );
+				if ( *p == 0 ) 
+					break;	// don't write to q; this trims the trailing space.
+				*q = ' ';
+				++q;
+			}
+			*q = *p;
+			++q;
+			++p;
+		}
+		*q = 0;
+	}
+}
+
 
 const char* StrPair::GetStr()
 {
@@ -232,6 +259,11 @@ const char* StrPair::GetStr()
 			}
 			*q = 0;
 		}
+		// The loop below has plenty going on, and this
+		// is a less useful mode. Break it out.
+		if ( flags & COLLAPSE_WHITESPACE ) {
+			CollapseWhitespace();
+		}
 		flags = (flags & NEEDS_DELETE);
 	}
 	return start;
@@ -815,7 +847,11 @@ char* XMLText::ParseDeep( char* p, StrPair* )
 		return p;
 	}
 	else {
-		p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES );
+		int flags = document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES;
+		if ( document->WhitespaceMode() == COLLAPSE_WHITESPACE )
+			flags |= StrPair::COLLAPSE_WHITESPACE;
+
+		p = value.ParseText( p, "<", flags );
 		if ( !p ) {
 			document->SetError( XML_ERROR_PARSING_TEXT, start, 0 );
 		}
@@ -1416,11 +1452,12 @@ bool XMLElement::Accept( XMLVisitor* visitor ) const
 
 
 // --------- XMLDocument ----------- //
-XMLDocument::XMLDocument( bool _processEntities ) :
+XMLDocument::XMLDocument( bool _processEntities, Whitespace _whitespace ) :
 	XMLNode( 0 ),
 	writeBOM( false ),
 	processEntities( _processEntities ),
 	errorID( 0 ),
+	whitespace( _whitespace ),
 	errorStr1( 0 ),
 	errorStr2( 0 ),
 	charBuffer( 0 )

+ 28 - 7
tinyxml2.h

@@ -24,11 +24,21 @@ distribution.
 #ifndef TINYXML2_INCLUDED
 #define TINYXML2_INCLUDED
 
-#include <cctype>
-#include <climits>
-#include <cstdio>
-#include <cstring>
-#include <cstdarg>
+#ifdef ANDROID_NDK
+	#include <ctype.h>
+	#include <limits.h>
+	#include <stdio.h>
+	#include <stdlib.h>
+	#include <string.h>
+	#include <stdarg.h>
+#else
+	#include <cctype>
+	#include <climits>
+	#include <cstdio>
+	#include <cstdlib>
+	#include <cstring>
+	#include <cstdarg>
+#endif
 
 /* 
    TODO: intern strings instead of allocation.
@@ -112,6 +122,7 @@ public:
 	enum {
 		NEEDS_ENTITY_PROCESSING			= 0x01,
 		NEEDS_NEWLINE_NORMALIZATION		= 0x02,
+		COLLAPSE_WHITESPACE				= 0x04,
 
 		TEXT_ELEMENT		= NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
 		TEXT_ELEMENT_LEAVE_ENTITIES		= NEEDS_NEWLINE_NORMALIZATION,
@@ -140,6 +151,7 @@ public:
 
 private:
 	void Reset();
+	void CollapseWhitespace();
 
 	enum {
 		NEEDS_FLUSH = 0x100,
@@ -365,6 +377,7 @@ public:
 	// correct, but simple, and usually works.
 	static const char* SkipWhiteSpace( const char* p )	{ while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<const unsigned char*>(p) ) ) { ++p; } return p; }
 	static char* SkipWhiteSpace( char* p )				{ while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<unsigned char*>(p) ) )		{ ++p; } return p; }
+	static bool IsWhiteSpace( char p )					{ return !IsUTF8Continuation(p) && isspace( static_cast<unsigned char>(p) ); }
 
 	inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX )  {
 		int n = 0;
@@ -1031,6 +1044,12 @@ private:
 };
 
 
+enum Whitespace {
+	PRESERVE_WHITESPACE,
+	COLLAPSE_WHITESPACE
+};	   
+
+	
 /** A Document binds together all the functionality. 
 	It can be saved, loaded, and printed to the screen.
 	All Nodes are connected and allocated to a Document.
@@ -1041,7 +1060,7 @@ class XMLDocument : public XMLNode
 	friend class XMLElement;
 public:
 	/// constructor
-	XMLDocument( bool processEntities = true ); 
+	XMLDocument( bool processEntities = true, Whitespace = PRESERVE_WHITESPACE ); 
 	~XMLDocument();
 
 	virtual XMLDocument* ToDocument()				{ return this; }
@@ -1086,7 +1105,8 @@ public:
 	*/
 	int SaveFile( FILE* );
 
-	bool ProcessEntities() const						{ return processEntities; }
+	bool ProcessEntities() const		{ return processEntities; }
+	Whitespace WhitespaceMode() const	{ return whitespace; }
 
 	/**
 		Returns true if this document has a leading Byte Order Mark of UTF8.
@@ -1189,6 +1209,7 @@ private:
 	bool writeBOM;
 	bool processEntities;
 	int errorID;
+	Whitespace whitespace;
 	const char* errorStr1;
 	const char* errorStr2;
 	char* charBuffer;

+ 19 - 0
xmltest.cpp

@@ -938,6 +938,25 @@ int main( int /*argc*/, const char ** /*argv*/ )
 		XMLTest( "QueryBoolText", boolValue, true,					false );
 	}
 
+	// ----------- Whitespace ------------
+	{
+		const char* xml = "<element>"
+							"<a> This \nis &apos;  text  &apos; </a>"
+							"<b>  This is &apos; text &apos;  \n</b>"
+							"<c>This  is  &apos;  \n\n text &apos;</c>"
+						  "</element>";
+		XMLDocument doc( true, COLLAPSE_WHITESPACE );
+		doc.Parse( xml );
+
+		const XMLElement* element = doc.FirstChildElement();
+		for( const XMLElement* parent = element->FirstChildElement();
+			 parent;
+			 parent = parent->NextSiblingElement() )
+		{
+			XMLTest( "Whitespace collapse", "This is ' text '", parent->GetText() );
+		}
+	}
+
 	
 	// ----------- Performance tracking --------------
 	{