Jelajahi Sumber

Let several archives be case-insensitive.

(and several more probably _should_ be but I don't have the details on
them at the moment. But now it's just changing a 1 to a 0 to fix those!)
Ryan C. Gordon 3 tahun lalu
induk
melakukan
17b691b0ea

+ 57 - 7
src/physfs.c

@@ -1433,15 +1433,60 @@ char *__PHYSFS_strdup(const char *str)
 } /* __PHYSFS_strdup */
 
 
-PHYSFS_uint32 __PHYSFS_hashString(const char *str, size_t len)
+PHYSFS_uint32 __PHYSFS_hashString(const char *str)
 {
     PHYSFS_uint32 hash = 5381;
-    while (len--)
-        hash = ((hash << 5) + hash) ^ *(str++);
+    while (1)
+    {
+        const char ch = *(str++);
+        if (ch == 0)
+            break;
+        hash = ((hash << 5) + hash) ^ ch;
+    } /* while */
     return hash;
 } /* __PHYSFS_hashString */
 
 
+PHYSFS_uint32 __PHYSFS_hashStringCaseFold(const char *str)
+{
+    PHYSFS_uint32 hash = 5381;
+    while (1)
+    {
+        const PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&str);
+        if (cp == 0)
+            break;
+        else
+        {
+            PHYSFS_uint32 folded[3];
+            const int numbytes = (int) (PHYSFS_caseFold(cp, folded) * sizeof (PHYSFS_uint32));
+            const char *bytes = (const char *) folded;
+            int i;
+            for (i = 0; i < numbytes; i++)
+                hash = ((hash << 5) + hash) ^ *(bytes++);
+        } /* else */
+    } /* while */
+
+    return hash;
+} /* __PHYSFS_hashStringCaseFold */
+
+
+PHYSFS_uint32 __PHYSFS_hashStringCaseFoldUSAscii(const char *str)
+{
+    PHYSFS_uint32 hash = 5381;
+    while (1)
+    {
+        char ch = *(str++);
+        if (ch == 0)
+            break;
+        else if ((ch >= 'A') && (ch <= 'Z'))
+            ch -= ('A' - 'a');
+
+        hash = ((hash << 5) + hash) ^ ch;
+    } /* while */
+    return hash;
+} /* __PHYSFS_hashStringCaseFoldUSAscii */
+
+
 /* MAKE SURE you hold stateLock before calling this! */
 static int doRegisterArchiver(const PHYSFS_Archiver *_archiver)
 {
@@ -3229,7 +3274,7 @@ static void setDefaultAllocator(void)
 } /* setDefaultAllocator */
 
 
-int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen)
+int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen, const int case_sensitive, const int only_usascii)
 {
     static char rootpath[2] = { '/', '\0' };
     size_t alloclen;
@@ -3237,6 +3282,8 @@ int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen)
     assert(entrylen >= sizeof (__PHYSFS_DirTreeEntry));
 
     memset(dt, '\0', sizeof (*dt));
+    dt->case_sensitive = case_sensitive;
+    dt->only_usascii = only_usascii;
 
     dt->root = (__PHYSFS_DirTreeEntry *) allocator.Malloc(entrylen);
     BAIL_IF(!dt->root, PHYSFS_ERR_OUT_OF_MEMORY, 0);
@@ -3257,9 +3304,10 @@ int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen)
 } /* __PHYSFS_DirTreeInit */
 
 
-static inline PHYSFS_uint32 hashPathName(__PHYSFS_DirTree *dt, const char *name)
+static PHYSFS_uint32 hashPathName(__PHYSFS_DirTree *dt, const char *name)
 {
-    return __PHYSFS_hashString(name, strlen(name)) % dt->hashBuckets;
+    const PHYSFS_uint32 hashval = dt->case_sensitive ? __PHYSFS_hashString(name) : dt->only_usascii ? __PHYSFS_hashStringCaseFoldUSAscii(name) : __PHYSFS_hashStringCaseFold(name);
+    return hashval % dt->hashBuckets;
 } /* hashPathName */
 
 
@@ -3320,6 +3368,7 @@ void *__PHYSFS_DirTreeAdd(__PHYSFS_DirTree *dt, char *name, const int isdir)
 /* Find the __PHYSFS_DirTreeEntry for a path in platform-independent notation. */
 void *__PHYSFS_DirTreeFind(__PHYSFS_DirTree *dt, const char *path)
 {
+    const int cs = dt->case_sensitive;
     PHYSFS_uint32 hashval;
     __PHYSFS_DirTreeEntry *prev = NULL;
     __PHYSFS_DirTreeEntry *retval;
@@ -3330,7 +3379,8 @@ void *__PHYSFS_DirTreeFind(__PHYSFS_DirTree *dt, const char *path)
     hashval = hashPathName(dt, path);
     for (retval = dt->hash[hashval]; retval; retval = retval->hashnext)
     {
-        if (strcmp(retval->name, path) == 0)
+        const int cmp = cs ? strcmp(retval->name, path) : PHYSFS_utf8stricmp(retval->name, path);
+        if (cmp == 0)
         {
             if (prev != NULL)  /* move this to the front of the list */
             {

+ 1 - 1
src/physfs_archiver_7z.c

@@ -185,7 +185,7 @@ static int szipLoadEntries(SZIPinfo *info)
 {
     int retval = 0;
 
-    if (__PHYSFS_DirTreeInit(&info->tree, sizeof (SZIPentry)))
+    if (__PHYSFS_DirTreeInit(&info->tree, sizeof (SZIPentry), 1, 0))
     {
         const PHYSFS_uint32 count = info->db.NumFiles;
         PHYSFS_uint32 i;

+ 1 - 1
src/physfs_archiver_grp.c

@@ -76,7 +76,7 @@ static void *GRP_openArchive(PHYSFS_Io *io, const char *name,
     BAIL_IF_ERRPASS(!__PHYSFS_readAll(io, &count, sizeof(count)), NULL);
     count = PHYSFS_swapULE32(count);
 
-    unpkarc = UNPK_openArchive(io);
+    unpkarc = UNPK_openArchive(io, 0, 1);
     BAIL_IF_ERRPASS(!unpkarc, NULL);
 
     if (!grpLoadEntries(io, count, unpkarc))

+ 1 - 1
src/physfs_archiver_hog.c

@@ -130,7 +130,7 @@ static void *HOG_openArchive(PHYSFS_Io *io, const char *name,
 
     *claimed = 1;
 
-    unpkarc = UNPK_openArchive(io);
+    unpkarc = UNPK_openArchive(io, 0, 1);
     BAIL_IF_ERRPASS(!unpkarc, NULL);
 
     if (!(hog1 ? hog1LoadEntries(io, unpkarc) : hog2LoadEntries(io, unpkarc)))

+ 2 - 1
src/physfs_archiver_iso9660.c

@@ -346,7 +346,8 @@ static void *ISO9660_openArchive(PHYSFS_Io *io, const char *filename,
     if (!parseVolumeDescriptor(io, &rootpos, &len, &joliet, claimed))
         return NULL;
 
-    unpkarc = UNPK_openArchive(io);
+    /* !!! FIXME: check case_sensitive and only_usascii params for this archive. */
+    unpkarc = UNPK_openArchive(io, 1, 0);
     BAIL_IF_ERRPASS(!unpkarc, NULL);
 
     if (!iso9660LoadEntries(io, joliet, "", rootpos, rootpos + len, unpkarc))

+ 1 - 1
src/physfs_archiver_mvl.c

@@ -70,7 +70,7 @@ static void *MVL_openArchive(PHYSFS_Io *io, const char *name,
     BAIL_IF_ERRPASS(!__PHYSFS_readAll(io, &count, sizeof(count)), NULL);
     count = PHYSFS_swapULE32(count);
 
-    unpkarc = UNPK_openArchive(io);
+    unpkarc = UNPK_openArchive(io, 0, 1);
     BAIL_IF_ERRPASS(!unpkarc, NULL);
 
     if (!mvlLoadEntries(io, count, unpkarc))

+ 2 - 1
src/physfs_archiver_qpak.c

@@ -86,7 +86,8 @@ static void *QPAK_openArchive(PHYSFS_Io *io, const char *name,
 
     BAIL_IF_ERRPASS(!io->seek(io, pos), NULL);
 
-    unpkarc = UNPK_openArchive(io);
+    /* !!! FIXME: check case_sensitive and only_usascii params for this archive. */
+    unpkarc = UNPK_openArchive(io, 1, 0);
     BAIL_IF_ERRPASS(!unpkarc, NULL);
 
     if (!qpakLoadEntries(io, count, unpkarc))

+ 2 - 1
src/physfs_archiver_slb.c

@@ -94,7 +94,8 @@ static void *SLB_openArchive(PHYSFS_Io *io, const char *name,
     /* seek to the table of contents */
     BAIL_IF_ERRPASS(!io->seek(io, tocPos), NULL);
 
-    unpkarc = UNPK_openArchive(io);
+    /* !!! FIXME: check case_sensitive and only_usascii params for this archive. */
+    unpkarc = UNPK_openArchive(io, 1, 0);
     BAIL_IF_ERRPASS(!unpkarc, NULL);
 
     if (!slbLoadEntries(io, count, unpkarc))

+ 2 - 2
src/physfs_archiver_unpacked.c

@@ -285,12 +285,12 @@ void *UNPK_addEntry(void *opaque, char *name, const int isdir,
 } /* UNPK_addEntry */
 
 
-void *UNPK_openArchive(PHYSFS_Io *io)
+void *UNPK_openArchive(PHYSFS_Io *io, const int case_sensitive, const int only_usascii)
 {
     UNPKinfo *info = (UNPKinfo *) allocator.Malloc(sizeof (UNPKinfo));
     BAIL_IF(!info, PHYSFS_ERR_OUT_OF_MEMORY, NULL);
 
-    if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (UNPKentry)))
+    if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (UNPKentry), case_sensitive, only_usascii))
     {
         allocator.Free(info);
         return NULL;

+ 2 - 1
src/physfs_archiver_vdf.c

@@ -129,7 +129,8 @@ static void *VDF_openArchive(PHYSFS_Io *io, const char *name,
 
     BAIL_IF_ERRPASS(!io->seek(io, rootCatOffset), NULL);
 
-    unpkarc = UNPK_openArchive(io);
+    /* !!! FIXME: check case_sensitive and only_usascii params for this archive. */
+    unpkarc = UNPK_openArchive(io, 1, 0);
     BAIL_IF_ERRPASS(!unpkarc, NULL);
 
     if (!vdfLoadEntries(io, count, vdfDosTimeToEpoch(timestamp), unpkarc))

+ 1 - 1
src/physfs_archiver_wad.c

@@ -95,7 +95,7 @@ static void *WAD_openArchive(PHYSFS_Io *io, const char *name,
 
     BAIL_IF_ERRPASS(!io->seek(io, directoryOffset), 0);
 
-    unpkarc = UNPK_openArchive(io);
+    unpkarc = UNPK_openArchive(io, 0, 1);
     BAIL_IF_ERRPASS(!unpkarc, NULL);
 
     if (!wadLoadEntries(io, count, unpkarc))

+ 1 - 1
src/physfs_archiver_zip.c

@@ -1482,7 +1482,7 @@ static void *ZIP_openArchive(PHYSFS_Io *io, const char *name,
 
     if (!zip_parse_end_of_central_dir(info, &dstart, &cdir_ofs, &count))
         goto ZIP_openarchive_failed;
-    else if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (ZIPentry)))
+    else if (!__PHYSFS_DirTreeInit(&info->tree, sizeof (ZIPentry), 1, 0))
         goto ZIP_openarchive_failed;
 
     root = (ZIPentry *) info->tree.root;

+ 23 - 3
src/physfs_internal.h

@@ -322,7 +322,18 @@ char *__PHYSFS_strdup(const char *str);
 /*
  * Give a hash value for a C string (uses djb's xor hashing algorithm).
  */
-PHYSFS_uint32 __PHYSFS_hashString(const char *str, size_t len);
+PHYSFS_uint32 __PHYSFS_hashString(const char *str);
+
+/*
+ * Give a hash value for a C string (uses djb's xor hashing algorithm), case folding as it goes.
+ */
+PHYSFS_uint32 __PHYSFS_hashStringCaseFold(const char *str);
+
+/*
+ * Give a hash value for a C string (uses djb's xor hashing algorithm), case folding as it goes,
+ *  assuming that this is only US-ASCII chars (one byte per char, only 'A' through 'Z' need folding).
+ */
+PHYSFS_uint32 __PHYSFS_hashStringCaseFoldUSAscii(const char *str);
 
 
 /*
@@ -358,9 +369,10 @@ int __PHYSFS_readAll(PHYSFS_Io *io, void *buf, const size_t len);
 
 /* These are shared between some archivers. */
 
+/* LOTS of legacy formats that only use US ASCII, not actually UTF-8, so let them optimize here. */
+void *UNPK_openArchive(PHYSFS_Io *io, const int case_sensitive, const int only_usascii);
 void UNPK_abandonArchive(void *opaque);
 void UNPK_closeArchive(void *opaque);
-void *UNPK_openArchive(PHYSFS_Io *io);
 void *UNPK_addEntry(void *opaque, char *name, const int isdir,
                     const PHYSFS_sint64 ctime, const PHYSFS_sint64 mtime,
                     const PHYSFS_uint64 pos, const PHYSFS_uint64 len);
@@ -392,10 +404,13 @@ typedef struct __PHYSFS_DirTree
     __PHYSFS_DirTreeEntry **hash;  /* all entries hashed for fast lookup. */
     size_t hashBuckets;            /* number of buckets in hash.          */
     size_t entrylen;    /* size in bytes of entries (including subclass). */
+    int case_sensitive;  /* non-zero to treat entries as case-sensitive in DirTreeFind */
+    int only_usascii;  /* non-zero to treat paths as US ASCII only (one byte per char, only 'A' through 'Z' are considered for case folding). */
 } __PHYSFS_DirTree;
 
 
-int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen);
+/* LOTS of legacy formats that only use US ASCII, not actually UTF-8, so let them optimize here. */
+int __PHYSFS_DirTreeInit(__PHYSFS_DirTree *dt, const size_t entrylen, const int case_sensitive, const int only_usascii);
 void *__PHYSFS_DirTreeAdd(__PHYSFS_DirTree *dt, char *name, const int isdir);
 void *__PHYSFS_DirTreeFind(__PHYSFS_DirTree *dt, const char *path);
 PHYSFS_EnumerateCallbackResult __PHYSFS_DirTreeEnumerate(void *opaque,
@@ -725,6 +740,11 @@ int __PHYSFS_platformGrabMutex(void *mutex);
  */
 void __PHYSFS_platformReleaseMutex(void *mutex);
 
+
+/* !!! FIXME: move to public API? */
+PHYSFS_uint32 __PHYSFS_utf8codepoint(const char **_str);
+
+
 #if PHYSFS_HAVE_PRAGMA_VISIBILITY
 #pragma GCC visibility pop
 #endif

+ 11 - 6
src/physfs_unicode.c

@@ -21,8 +21,8 @@
 /*
  * This may not be the best value, but it's one that isn't represented
  *  in Unicode (0x10FFFF is the largest codepoint value). We return this
- *  value from utf8codepoint() if there's bogus bits in the
- *  stream. utf8codepoint() will turn this value into something
+ *  value from __PHYSFS_utf8codepoint() if there's bogus bits in the
+ *  stream. __PHYSFS_utf8codepoint() will turn this value into something
  *  reasonable (like a question mark), for text that wants to try to recover,
  *  whereas utf8valid() will use the value to determine if a string has bad
  *  bits.
@@ -35,7 +35,7 @@
  */
 #define UNICODE_BOGUS_CHAR_CODEPOINT '?'
 
-static PHYSFS_uint32 utf8codepoint(const char **_str)
+PHYSFS_uint32 __PHYSFS_utf8codepoint(const char **_str)
 {
     const char *str = *_str;
     PHYSFS_uint32 retval = 0;
@@ -188,6 +188,11 @@ static PHYSFS_uint32 utf8codepoint(const char **_str)
     } /* else if */
 
     return UNICODE_BOGUS_CHAR_VALUE;
+} /* __PHYSFS_utf8codepoint */
+
+static inline PHYSFS_uint32 utf8codepoint(const char **_str)
+{
+    return __PHYSFS_utf8codepoint(_str);
 } /* utf8codepoint */
 
 static PHYSFS_uint32 utf16codepoint(const PHYSFS_uint16 **_str)
@@ -238,7 +243,7 @@ void PHYSFS_utf8ToUcs4(const char *src, PHYSFS_uint32 *dst, PHYSFS_uint64 len)
     len -= sizeof (PHYSFS_uint32);   /* save room for null char. */
     while (len >= sizeof (PHYSFS_uint32))
     {
-        PHYSFS_uint32 cp = utf8codepoint(&src);
+        PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&src);
         if (cp == 0)
             break;
         else if (cp == UNICODE_BOGUS_CHAR_VALUE)
@@ -256,7 +261,7 @@ void PHYSFS_utf8ToUcs2(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len)
     len -= sizeof (PHYSFS_uint16);   /* save room for null char. */
     while (len >= sizeof (PHYSFS_uint16))
     {
-        PHYSFS_uint32 cp = utf8codepoint(&src);
+        PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&src);
         if (cp == 0)
             break;
         else if (cp == UNICODE_BOGUS_CHAR_VALUE)
@@ -278,7 +283,7 @@ void PHYSFS_utf8ToUtf16(const char *src, PHYSFS_uint16 *dst, PHYSFS_uint64 len)
     len -= sizeof (PHYSFS_uint16);   /* save room for null char. */
     while (len >= sizeof (PHYSFS_uint16))
     {
-        PHYSFS_uint32 cp = utf8codepoint(&src);
+        PHYSFS_uint32 cp = __PHYSFS_utf8codepoint(&src);
         if (cp == 0)
             break;
         else if (cp == UNICODE_BOGUS_CHAR_VALUE)