Просмотр исходного кода

added whitespace=collapse support. tests work. code needs review

Lee Thomason (grinliz) 13 лет назад
Родитель
Сommit
bc1bfb7f27
3 измененных файлов с 90 добавлено и 13 удалено
  1. 43 6
      tinyxml2.cpp
  2. 28 7
      tinyxml2.h
  3. 19 0
      xmltest.cpp

+ 43 - 6
tinyxml2.cpp

@@ -23,10 +23,12 @@ distribution.
 
 
 #include "tinyxml2.h"
 #include "tinyxml2.h"
 
 
-#include <cstdio>
-#include <cstdlib>
-#include <new>
-#include <cstddef>
+#include <new>		// yes, this one new style header, is in the Android SDK.
+#ifdef ANDROID_NDK
+	#include <stddef.h>
+#else
+	#include <cstddef>
+#endif
 
 
 using namespace tinyxml2;
 using namespace tinyxml2;
 
 
@@ -156,6 +158,31 @@ char* StrPair::ParseName( char* p )
 }
 }
 
 
 
 
+void StrPair::CollapseWhitespace()
+{
+	// Trim leading space.
+	start = XMLUtil::SkipWhiteSpace( start );
+
+	if ( start && *start ) {
+		char* p = start;	// the read pointer
+		char* q = start;	// the write pointer
+
+		while( *p ) {
+			if ( XMLUtil::IsWhiteSpace( *p )) {
+				p = XMLUtil::SkipWhiteSpace( p );
+				if ( *p == 0 ) 
+					break;	// don't write to q; this trims the trailing space.
+				*q = ' ';
+				++q;
+			}
+			*q = *p;
+			++q;
+			++p;
+		}
+		*q = 0;
+	}
+}
+
 
 
 const char* StrPair::GetStr()
 const char* StrPair::GetStr()
 {
 {
@@ -232,6 +259,11 @@ const char* StrPair::GetStr()
 			}
 			}
 			*q = 0;
 			*q = 0;
 		}
 		}
+		// The loop below has plenty going on, and this
+		// is a less useful mode. Break it out.
+		if ( flags & COLLAPSE_WHITESPACE ) {
+			CollapseWhitespace();
+		}
 		flags = (flags & NEEDS_DELETE);
 		flags = (flags & NEEDS_DELETE);
 	}
 	}
 	return start;
 	return start;
@@ -815,7 +847,11 @@ char* XMLText::ParseDeep( char* p, StrPair* )
 		return p;
 		return p;
 	}
 	}
 	else {
 	else {
-		p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES );
+		int flags = document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES;
+		if ( document->WhitespaceMode() == COLLAPSE_WHITESPACE )
+			flags |= StrPair::COLLAPSE_WHITESPACE;
+
+		p = value.ParseText( p, "<", flags );
 		if ( !p ) {
 		if ( !p ) {
 			document->SetError( XML_ERROR_PARSING_TEXT, start, 0 );
 			document->SetError( XML_ERROR_PARSING_TEXT, start, 0 );
 		}
 		}
@@ -1416,11 +1452,12 @@ bool XMLElement::Accept( XMLVisitor* visitor ) const
 
 
 
 
 // --------- XMLDocument ----------- //
 // --------- XMLDocument ----------- //
-XMLDocument::XMLDocument( bool _processEntities ) :
+XMLDocument::XMLDocument( bool _processEntities, Whitespace _whitespace ) :
 	XMLNode( 0 ),
 	XMLNode( 0 ),
 	writeBOM( false ),
 	writeBOM( false ),
 	processEntities( _processEntities ),
 	processEntities( _processEntities ),
 	errorID( 0 ),
 	errorID( 0 ),
+	whitespace( _whitespace ),
 	errorStr1( 0 ),
 	errorStr1( 0 ),
 	errorStr2( 0 ),
 	errorStr2( 0 ),
 	charBuffer( 0 )
 	charBuffer( 0 )

+ 28 - 7
tinyxml2.h

@@ -24,11 +24,21 @@ distribution.
 #ifndef TINYXML2_INCLUDED
 #ifndef TINYXML2_INCLUDED
 #define TINYXML2_INCLUDED
 #define TINYXML2_INCLUDED
 
 
-#include <cctype>
-#include <climits>
-#include <cstdio>
-#include <cstring>
-#include <cstdarg>
+#ifdef ANDROID_NDK
+	#include <ctype.h>
+	#include <limits.h>
+	#include <stdio.h>
+	#include <stdlib.h>
+	#include <string.h>
+	#include <stdarg.h>
+#else
+	#include <cctype>
+	#include <climits>
+	#include <cstdio>
+	#include <cstdlib>
+	#include <cstring>
+	#include <cstdarg>
+#endif
 
 
 /* 
 /* 
    TODO: intern strings instead of allocation.
    TODO: intern strings instead of allocation.
@@ -112,6 +122,7 @@ public:
 	enum {
 	enum {
 		NEEDS_ENTITY_PROCESSING			= 0x01,
 		NEEDS_ENTITY_PROCESSING			= 0x01,
 		NEEDS_NEWLINE_NORMALIZATION		= 0x02,
 		NEEDS_NEWLINE_NORMALIZATION		= 0x02,
+		COLLAPSE_WHITESPACE				= 0x04,
 
 
 		TEXT_ELEMENT		= NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
 		TEXT_ELEMENT		= NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
 		TEXT_ELEMENT_LEAVE_ENTITIES		= NEEDS_NEWLINE_NORMALIZATION,
 		TEXT_ELEMENT_LEAVE_ENTITIES		= NEEDS_NEWLINE_NORMALIZATION,
@@ -140,6 +151,7 @@ public:
 
 
 private:
 private:
 	void Reset();
 	void Reset();
+	void CollapseWhitespace();
 
 
 	enum {
 	enum {
 		NEEDS_FLUSH = 0x100,
 		NEEDS_FLUSH = 0x100,
@@ -365,6 +377,7 @@ public:
 	// correct, but simple, and usually works.
 	// correct, but simple, and usually works.
 	static const char* SkipWhiteSpace( const char* p )	{ while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<const unsigned char*>(p) ) ) { ++p; } return p; }
 	static const char* SkipWhiteSpace( const char* p )	{ while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<const unsigned char*>(p) ) ) { ++p; } return p; }
 	static char* SkipWhiteSpace( char* p )				{ while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<unsigned char*>(p) ) )		{ ++p; } return p; }
 	static char* SkipWhiteSpace( char* p )				{ while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<unsigned char*>(p) ) )		{ ++p; } return p; }
+	static bool IsWhiteSpace( char p )					{ return !IsUTF8Continuation(p) && isspace( static_cast<unsigned char>(p) ); }
 
 
 	inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX )  {
 	inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX )  {
 		int n = 0;
 		int n = 0;
@@ -1031,6 +1044,12 @@ private:
 };
 };
 
 
 
 
+enum Whitespace {
+	PRESERVE_WHITESPACE,
+	COLLAPSE_WHITESPACE
+};	   
+
+	
 /** A Document binds together all the functionality. 
 /** A Document binds together all the functionality. 
 	It can be saved, loaded, and printed to the screen.
 	It can be saved, loaded, and printed to the screen.
 	All Nodes are connected and allocated to a Document.
 	All Nodes are connected and allocated to a Document.
@@ -1041,7 +1060,7 @@ class XMLDocument : public XMLNode
 	friend class XMLElement;
 	friend class XMLElement;
 public:
 public:
 	/// constructor
 	/// constructor
-	XMLDocument( bool processEntities = true ); 
+	XMLDocument( bool processEntities = true, Whitespace = PRESERVE_WHITESPACE ); 
 	~XMLDocument();
 	~XMLDocument();
 
 
 	virtual XMLDocument* ToDocument()				{ return this; }
 	virtual XMLDocument* ToDocument()				{ return this; }
@@ -1086,7 +1105,8 @@ public:
 	*/
 	*/
 	int SaveFile( FILE* );
 	int SaveFile( FILE* );
 
 
-	bool ProcessEntities() const						{ return processEntities; }
+	bool ProcessEntities() const		{ return processEntities; }
+	Whitespace WhitespaceMode() const	{ return whitespace; }
 
 
 	/**
 	/**
 		Returns true if this document has a leading Byte Order Mark of UTF8.
 		Returns true if this document has a leading Byte Order Mark of UTF8.
@@ -1189,6 +1209,7 @@ private:
 	bool writeBOM;
 	bool writeBOM;
 	bool processEntities;
 	bool processEntities;
 	int errorID;
 	int errorID;
+	Whitespace whitespace;
 	const char* errorStr1;
 	const char* errorStr1;
 	const char* errorStr2;
 	const char* errorStr2;
 	char* charBuffer;
 	char* charBuffer;

+ 19 - 0
xmltest.cpp

@@ -938,6 +938,25 @@ int main( int /*argc*/, const char ** /*argv*/ )
 		XMLTest( "QueryBoolText", boolValue, true,					false );
 		XMLTest( "QueryBoolText", boolValue, true,					false );
 	}
 	}
 
 
+	// ----------- Whitespace ------------
+	{
+		const char* xml = "<element>"
+							"<a> This \nis &apos;  text  &apos; </a>"
+							"<b>  This is &apos; text &apos;  \n</b>"
+							"<c>This  is  &apos;  \n\n text &apos;</c>"
+						  "</element>";
+		XMLDocument doc( true, COLLAPSE_WHITESPACE );
+		doc.Parse( xml );
+
+		const XMLElement* element = doc.FirstChildElement();
+		for( const XMLElement* parent = element->FirstChildElement();
+			 parent;
+			 parent = parent->NextSiblingElement() )
+		{
+			XMLTest( "Whitespace collapse", "This is ' text '", parent->GetText() );
+		}
+	}
+
 	
 	
 	// ----------- Performance tracking --------------
 	// ----------- Performance tracking --------------
 	{
 	{