Просмотр исходного кода

Added performance test and option to leave entities

Lee Thomason 14 лет назад
Родитель
Сommit
6f381b7739
6 измененных файлов с 133 добавлено и 31 удалено
  1. 1 1
      readme.txt
  2. 32 26
      tinyxml2.cpp
  3. 13 4
      tinyxml2.h
  4. 3 0
      tinyxml2/tinyxml2.vcxproj
  5. 5 0
      tinyxml2/tinyxml2.vcxproj.filters
  6. 79 0
      xmltest.cpp

+ 1 - 1
readme.txt

@@ -55,7 +55,7 @@ complete XML needs, TinyXML-2 is not the parser for you.
 Which should you use? TinyXML-2 uses a similar API to TinyXML-1 and the same
 rich test cases. But the implementation of the parser is completely re-written
 to make it more appropriate for use in a game. It uses less memory, is faster,
-and user far few memory allocations.
+and uses far few memory allocations.
 
 TinyXML-2 has no requirement for STL, but has also dropped all STL support. All
 strings are query and set as 'const char*'. This allows the use of internal 

+ 32 - 26
tinyxml2.cpp

@@ -740,7 +740,7 @@ char* XMLText::ParseDeep( char* p, StrPair* )
 		return p;
 	}
 	else {
-		p = value.ParseText( p, "<", StrPair::TEXT_ELEMENT );
+		p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES );
 		if ( !p ) {
 			document->SetError( ERROR_PARSING_TEXT, start, 0 );
 		}
@@ -916,14 +916,14 @@ bool XMLUnknown::Accept( XMLVisitor* visitor ) const
 }
 
 // --------- XMLAttribute ---------- //
-char* XMLAttribute::ParseDeep( char* p )
+char* XMLAttribute::ParseDeep( char* p, bool processEntities )
 {
 	p = name.ParseText( p, "=", StrPair::ATTRIBUTE_NAME );
 	if ( !p || !*p ) return 0;
 
 	char endTag[2] = { *p, 0 };
 	++p;
-	p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE );
+	p = value.ParseText( p, endTag, processEntities ? StrPair::ATTRIBUTE_VALUE : StrPair::ATTRIBUTE_VALUE_LEAVE_ENTITIES );
 	//if ( value.Empty() ) return 0;
 	return p;
 }
@@ -1141,7 +1141,7 @@ char* XMLElement::ParseAttributes( char* p )
 			XMLAttribute* attrib = new (document->attributePool.Alloc() ) XMLAttribute();
 			attrib->memPool = &document->attributePool;
 
-			p = attrib->ParseDeep( p );
+			p = attrib->ParseDeep( p, document->ProcessEntities() );
 			if ( !p || Attribute( attrib->Name() ) ) {
 				DELETE_ATTRIBUTE( attrib );
 				document->SetError( ERROR_PARSING_ATTRIBUTE, start, p );
@@ -1250,9 +1250,13 @@ bool XMLElement::Accept( XMLVisitor* visitor ) const
 
 
 // --------- XMLDocument ----------- //
-XMLDocument::XMLDocument() :
+XMLDocument::XMLDocument( bool _processEntities ) :
 	XMLNode( 0 ),
 	writeBOM( false ),
+	processEntities( _processEntities ),
+	errorID( 0 ),
+	errorStr1( 0 ),
+	errorStr2( 0 ),
 	charBuffer( 0 )
 {
 	document = this;	// avoid warning about 'this' in initializer list
@@ -1474,7 +1478,8 @@ XMLPrinter::XMLPrinter( FILE* file ) :
 	firstElement( true ),
 	fp( file ), 
 	depth( 0 ), 
-	textDepth( -1 )
+	textDepth( -1 ),
+	processEntities( true )
 {
 	for( int i=0; i<ENTITY_RANGE; ++i ) {
 		entityFlag[i] = false;
@@ -1540,31 +1545,33 @@ void XMLPrinter::PrintString( const char* p, bool restricted )
 	const char* q = p;
 	const bool* flag = restricted ? restrictedEntityFlag : entityFlag;
 
-	while ( *q ) {
-		// Remember, char is sometimes signed. (How many times has that bitten me?)
-		if ( *q > 0 && *q < ENTITY_RANGE ) {
-			// Check for entities. If one is found, flush
-			// the stream up until the entity, write the 
-			// entity, and keep looking.
-			if ( flag[*q] ) {
-				while ( p < q ) {
-					Print( "%c", *p );
-					++p;
-				}
-				for( int i=0; i<NUM_ENTITIES; ++i ) {
-					if ( entities[i].value == *q ) {
-						Print( "&%s;", entities[i].pattern );
-						break;
+	if ( processEntities ) {
+		while ( *q ) {
+			// Remember, char is sometimes signed. (How many times has that bitten me?)
+			if ( *q > 0 && *q < ENTITY_RANGE ) {
+				// Check for entities. If one is found, flush
+				// the stream up until the entity, write the 
+				// entity, and keep looking.
+				if ( flag[*q] ) {
+					while ( p < q ) {
+						Print( "%c", *p );
+						++p;
+					}
+					for( int i=0; i<NUM_ENTITIES; ++i ) {
+						if ( entities[i].value == *q ) {
+							Print( "&%s;", entities[i].pattern );
+							break;
+						}
 					}
+					++p;
 				}
-				++p;
 			}
+			++q;
 		}
-		++q;
 	}
 	// Flush the remaining string. This will be the entire
 	// string if an entity wasn't found.
-	if ( q-p > 0 ) {
+	if ( !processEntities || (q-p > 0) ) {
 		Print( "%s", p );
 	}
 }
@@ -1735,6 +1742,7 @@ void XMLPrinter::PushUnknown( const char* value )
 
 bool XMLPrinter::VisitEnter( const XMLDocument& doc )
 {
+	processEntities = doc.ProcessEntities();
 	if ( doc.HasBOM() ) {
 		PushHeader( true, false );
 	}
@@ -1785,5 +1793,3 @@ bool XMLPrinter::Visit( const XMLUnknown& unknown )
 	PushUnknown( unknown.Value() );
 	return true;
 }
-
-

+ 13 - 4
tinyxml2.h

@@ -115,8 +115,10 @@ public:
 		NEEDS_NEWLINE_NORMALIZATION		= 0x02,
 
 		TEXT_ELEMENT		= NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
+		TEXT_ELEMENT_LEAVE_ENTITIES		= NEEDS_NEWLINE_NORMALIZATION,
 		ATTRIBUTE_NAME		= 0,
 		ATTRIBUTE_VALUE		= NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
+		ATTRIBUTE_VALUE_LEAVE_ENTITIES		= NEEDS_NEWLINE_NORMALIZATION,
 		COMMENT				= NEEDS_NEWLINE_NORMALIZATION,
 	};
 
@@ -804,7 +806,7 @@ private:
 	void operator=( const XMLAttribute& );	// not supported
 	void SetName( const char* name );
 
-	char* ParseDeep( char* p );
+	char* ParseDeep( char* p, bool processEntities );
 
 	mutable StrPair name;
 	mutable StrPair value;
@@ -962,7 +964,7 @@ class XMLDocument : public XMLNode
 	friend class XMLElement;
 public:
 	/// constructor
-	XMLDocument(); 
+	XMLDocument( bool processEntities = true ); 
 	~XMLDocument();
 
 	virtual XMLDocument* ToDocument()				{ return this; }
@@ -993,6 +995,11 @@ public:
 	*/
 	void SaveFile( const char* filename );
 
+	bool ProcessEntities() const						{ return processEntities; }
+
+	/**
+		Returns true if this document has a leading Byte Order Mark of UTF8.
+	*/
 	bool HasBOM() const { return writeBOM; }
 
 	/** Return the root element of DOM. Equivalent to FirstChildElement().
@@ -1071,8 +1078,8 @@ public:
 	// internal
 	char* Identify( char* p, XMLNode** node );
 
-	virtual XMLNode* ShallowClone( XMLDocument* document ) const	{ return 0; }
-	virtual bool ShallowEqual( const XMLNode* compare ) const	{ return false; }
+	virtual XMLNode* ShallowClone( XMLDocument* /*document*/ ) const	{ return 0; }
+	virtual bool ShallowEqual( const XMLNode* /*compare*/ ) const	{ return false; }
 
 private:
 	XMLDocument( const XMLDocument& );	// not supported
@@ -1080,6 +1087,7 @@ private:
 	void InitDocument();
 
 	bool writeBOM;
+	bool processEntities;
 	int errorID;
 	const char* errorStr1;
 	const char* errorStr2;
@@ -1196,6 +1204,7 @@ private:
 	FILE* fp;
 	int depth;
 	int textDepth;
+	bool processEntities;
 
 	enum {
 		ENTITY_RANGE = 64,

+ 3 - 0
tinyxml2/tinyxml2.vcxproj

@@ -80,6 +80,9 @@
   <ItemGroup>
     <ClInclude Include="..\tinyxml2.h" />
   </ItemGroup>
+  <ItemGroup>
+    <None Include="..\readme.txt" />
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>

+ 5 - 0
tinyxml2/tinyxml2.vcxproj.filters

@@ -19,4 +19,9 @@
       <Filter>Source Files</Filter>
     </ClInclude>
   </ItemGroup>
+  <ItemGroup>
+    <None Include="..\readme.txt">
+      <Filter>Source Files</Filter>
+    </None>
+  </ItemGroup>
 </Project>

+ 79 - 0
xmltest.cpp

@@ -3,9 +3,12 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <time.h>
 
 #if defined( _MSC_VER )
 	#include <crtdbg.h>
+	#define WIN32_LEAN_AND_MEAN
+	#include <windows.h>
 	_CrtMemState startMemState;
 	_CrtMemState endMemState;
 #endif
@@ -211,6 +214,8 @@ int main( int /*argc*/, const char* /*argv*/ )
 
 		//gNewTotal = gNew - newStart;
 	}
+
+
 	{
 		const char* error =	"<?xml version=\"1.0\" standalone=\"no\" ?>\n"
 							"<passages count=\"006\" formatversion=\"20020620\">\n"
@@ -458,6 +463,24 @@ int main( int /*argc*/, const char* /*argv*/ )
 		fclose( textfile );
 	}
 
+	{
+		// Suppress entities.
+		const char* passages =
+			"<?xml version=\"1.0\" standalone=\"no\" ?>"
+			"<passages count=\"006\" formatversion=\"20020620\">"
+				"<psg context=\"Line 5 has &quot;quotation marks&quot; and &apos;apostrophe marks&apos;.\">Crazy &ttk;</psg>"
+			"</passages>";
+		
+		XMLDocument doc( false );
+		doc.Parse( passages );
+
+		XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->Attribute( "context" ), 
+				 "Line 5 has &quot;quotation marks&quot; and &apos;apostrophe marks&apos;." );
+		XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->FirstChild()->Value(),
+				 "Crazy &ttk;" );
+		doc.Print();
+	}
+
 	{
         const char* test = "<?xml version='1.0'?><a.elem xmi.version='2.0'/>";
 
@@ -653,6 +676,62 @@ int main( int /*argc*/, const char* /*argv*/ )
 		XMLTest( "Clone and Equal", 4, count );
 	}
 
+	// ----------- Performance tracking --------------
+	{
+#if defined( _MSC_VER )
+		__int64 start, end, freq;
+		QueryPerformanceFrequency( (LARGE_INTEGER*) &freq );
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning ( push )
+#pragma warning ( disable : 4996 )		// Fail to see a compelling reason why this should be deprecated.
+#endif
+		FILE* fp  = fopen( "dream.xml", "r" );
+#if defined(_MSC_VER)
+#pragma warning ( pop )
+#endif
+		fseek( fp, 0, SEEK_END );
+		long size = ftell( fp );
+		fseek( fp, 0, SEEK_SET );
+
+		char* mem = new char[size+1];
+		fread( mem, size, 1, fp );
+		fclose( fp );
+		mem[size] = 0;
+
+#if defined( _MSC_VER )
+		QueryPerformanceCounter( (LARGE_INTEGER*) &start );
+#else
+		clock_t cstart = clock();
+#endif
+		static const int COUNT = 10;
+		for( int i=0; i<COUNT; ++i ) {
+			XMLDocument doc;
+			doc.Parse( mem );
+		}
+#if defined( _MSC_VER )
+		QueryPerformanceCounter( (LARGE_INTEGER*) &end );
+#else
+		clock_t cend = clock();
+#endif
+
+		delete [] mem;
+
+		static const char* note = 
+#ifdef DEBUG
+			"DEBUG";
+#else
+			"Release";
+#endif
+
+#if defined( _MSC_VER )
+		printf( "\nParsing %s of dream.xml: %.3f milli-seconds\n", note, 1000.0 * (double)(end-start) / ( (double)freq * (double)COUNT) );
+#else
+		printf( "\nParsing %s of dream.xml: %.3f milli-seconds\n", note, (double)(cend - cstart)/(double)COUNT );
+#endif
+	}
+
 	#if defined( _MSC_VER )
 		_CrtMemCheckpoint( &endMemState );  
 		//_CrtMemDumpStatistics( &endMemState );