Parcourir la source

another rev of text parsing.

Lee Thomason il y a 14 ans
Parent
commit
fde6a756d1
4 fichiers modifiés avec 58 ajouts et 22 suppressions
  1. 50 15
      tinyxml2.cpp
  2. 4 4
      tinyxml2.h
  3. BIN
      tinyxml2.suo
  4. 4 3
      xmltest.cpp

+ 50 - 15
tinyxml2.cpp

@@ -7,6 +7,12 @@
 
 using namespace tinyxml2;
 
+static const char LINE_FEED				= (char)0x0a;				// all line endings are normalized to LF
+static const char LF = LINE_FEED;
+static const char CARRIAGE_RETURN		= (char)0x0d;			// CR gets filtered out
+static const char CR = CARRIAGE_RETURN;
+
+
 // --------- CharBuffer ----------- //
 /*static*/ CharBuffer* CharBuffer::Construct( const char* in )
 {
@@ -91,25 +97,54 @@ const char* XMLNode::ParseText( char* p, const char* endTag, char** next )
 {
 	TIXMLASSERT( endTag && *endTag );
 
-	char* start = SkipWhiteSpace( p );
-	if ( !start )
-		return 0;
-
-	char endChar = *endTag;
-	p = start;
-	int length = strlen( endTag );
+	char* start = p;
+	char* q = p;		// q (target) <= p (src) in same buffer.
+	char  endChar = *endTag;
+	int   length = strlen( endTag );	
+	char* nextTag = 0;
 
+	// Inner loop of text parsing.
 	while ( *p ) {
-		if ( *p == endChar ) {
-			if ( strncmp( p, endTag, length ) == 0 ) {
-				*p = 0;
-				*next = p + length;
-				return start;
+		if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) {
+			*q = 0;
+			nextTag = p + length;
+			break;
+		}
+		else if ( *p == CR ) {
+			// CR-LF pair becomes LF
+			// CR alone becomes LF
+			// LF-CR becomes LF
+			if ( *(p+1) == LF ) {
+				p += 2;
+			}
+			else {
+				++p;
 			}
+			*q = LF;
 		}
-		++p;
+		else if ( *p == LF ) {
+			if ( *(p+1) == CR ) {
+				p += 2;
+			}
+			else {
+				++p;
+			}
+			*q = LF;
+		}
+		else {
+			*q = *p;
+			++p;
+		}
+		++q;
 	}	
-	return 0;
+
+	// Error? If we don't have a text tag, something went wrong. (Although 
+	// what the nextTag points at may be null.)
+	if ( nextTag == 0 ) {
+		return 0;
+	}
+	*next = nextTag;
+	return start;
 }
 
 
@@ -129,7 +164,7 @@ XMLComment::~XMLComment()
 void XMLComment::Print( FILE* fp, int depth )
 {
 	XMLNode::Print( fp, depth );
-	fprintf( fp, "<!-- %s -->\n", value );
+	fprintf( fp, "<!--%s-->\n", value );
 }
 
 

+ 4 - 4
tinyxml2.h

@@ -68,13 +68,13 @@ protected:
 		}
 		return false;
 	}
+	inline static int IsUTF8Continuation( char p ) { return p & 0x80; }
 
 	/* Parses text. (Not a text node.)
 	   - [ ] EOL normalization.
-	   - [x] Trim leading whitespace
-	   - [ ] Trim trailing whitespace.
-	   - [ ] Leaves inner whitespace
-	   - [ ] Inserts one space between lines.
+	   - [X] Do not trim leading whitespace
+	   - [X] Do not trim trailing whitespace.
+	   - [X] Leaves inner whitespace
 	*/
 	const char* ParseText( char* in, const char* endTag, char** next );
 

BIN
tinyxml2.suo


+ 4 - 3
xmltest.cpp

@@ -7,6 +7,7 @@ using namespace tinyxml2;
 
 int main( int argc, const char* argv )
 {
+#if 0
 	{
 		static const char* test = "<!--hello world-->";
 
@@ -14,14 +15,14 @@ int main( int argc, const char* argv )
 		doc.Parse( test );
 		doc.Print( stdout );
 	}
-	/*
+#endif
 	{
-		static const char* test = "<hello></hello>";
+		static const char* test = "<!--hello world\n"
+			                      "          line 2\r-->";
 
 		XMLDocument doc;
 		doc.Parse( test );
 		doc.Print( stdout );
 	}
-	*/
 	return 0;
 }