SDL_qsort.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. /* qsort.c
  2. * (c) 1998 Gareth McCaughan
  3. *
  4. * This is a drop-in replacement for the C library's |qsort()| routine.
  5. *
  6. * Features:
  7. * - Median-of-three pivoting (and more)
  8. * - Truncation and final polishing by a single insertion sort
  9. * - Early truncation when no swaps needed in pivoting step
  10. * - Explicit recursion, guaranteed not to overflow
  11. * - A few little wrinkles stolen from the GNU |qsort()|.
  12. * - separate code for non-aligned / aligned / word-size objects
  13. *
  14. * This code may be reproduced freely provided
  15. * - this file is retained unaltered apart from minor
  16. * changes for portability and efficiency
  17. * - no changes are made to this comment
  18. * - any changes that *are* made are clearly flagged
  19. * - the _ID string below is altered by inserting, after
  20. * the date, the string " altered" followed at your option
  21. * by other material. (Exceptions: you may change the name
  22. * of the exported routine without changing the ID string.
  23. * You may change the values of the macros TRUNC_* and
  24. * PIVOT_THRESHOLD without changing the ID string, provided
  25. * they remain constants with TRUNC_nonaligned, TRUNC_aligned
  26. * and TRUNC_words/WORD_BYTES between 8 and 24, and
  27. * PIVOT_THRESHOLD between 32 and 200.)
  28. *
  29. * You may use it in anything you like; you may make money
  30. * out of it; you may distribute it in object form or as
  31. * part of an executable without including source code;
  32. * you don't have to credit me. (But it would be nice if
  33. * you did.)
  34. *
  35. * If you find problems with this code, or find ways of
  36. * making it significantly faster, please let me know!
  37. * My e-mail address, valid as of early 1998 and certainly
  38. * OK for at least the next 18 months, is
  39. * gjm11@dpmms.cam.ac.uk
  40. * Thanks!
  41. *
  42. * Gareth McCaughan Peterhouse Cambridge 1998
  43. */
  44. #if defined(__clang_analyzer__) && !defined(SDL_DISABLE_ANALYZE_MACROS)
  45. #define SDL_DISABLE_ANALYZE_MACROS 1
  46. #endif
  47. #include "../SDL_internal.h"
  48. /*
  49. #include <assert.h>
  50. #include <stdlib.h>
  51. #include <string.h>
  52. */
  53. #include "SDL_stdinc.h"
  54. #include "SDL_assert.h"
  55. #if defined(HAVE_QSORT)
  56. void
  57. SDL_qsort(void *base, size_t nmemb, size_t size, int (*compare) (const void *, const void *))
  58. {
  59. qsort(base, nmemb, size, compare);
  60. }
  61. #else
  62. #ifdef assert
  63. #undef assert
  64. #endif
  65. #define assert(X) SDL_assert(X)
  66. #ifdef malloc
  67. #undef malloc
  68. #endif
  69. #define malloc SDL_malloc
  70. #ifdef free
  71. #undef free
  72. #endif
  73. #define free SDL_free
  74. #ifdef memcpy
  75. #undef memcpy
  76. #endif
  77. #define memcpy SDL_memcpy
  78. #ifdef memmove
  79. #undef memmove
  80. #endif
  81. #define memmove SDL_memmove
  82. #ifdef qsort
  83. #undef qsort
  84. #endif
  85. #define qsort SDL_qsort
  86. static const char _ID[] = "<qsort.c gjm 1.12 1998-03-19>";
  87. /* How many bytes are there per word? (Must be a power of 2,
  88. * and must in fact equal sizeof(int).)
  89. */
  90. #define WORD_BYTES sizeof(int)
  91. /* How big does our stack need to be? Answer: one entry per
  92. * bit in a |size_t|.
  93. */
  94. #define STACK_SIZE (8*sizeof(size_t))
  95. /* Different situations have slightly different requirements,
  96. * and we make life epsilon easier by using different truncation
  97. * points for the three different cases.
  98. * So far, I have tuned TRUNC_words and guessed that the same
  99. * value might work well for the other two cases. Of course
  100. * what works well on my machine might work badly on yours.
  101. */
  102. #define TRUNC_nonaligned 12
  103. #define TRUNC_aligned 12
  104. #define TRUNC_words 12*WORD_BYTES /* nb different meaning */
  105. /* We use a simple pivoting algorithm for shortish sub-arrays
  106. * and a more complicated one for larger ones. The threshold
  107. * is PIVOT_THRESHOLD.
  108. */
  109. #define PIVOT_THRESHOLD 40
  110. typedef struct
  111. {
  112. char *first;
  113. char *last;
  114. } stack_entry;
  115. #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
  116. #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
  117. #define doLeft {first=ffirst;llast=last;continue;}
  118. #define doRight {ffirst=first;last=llast;continue;}
  119. #define pop {if (--stacktop<0) break;\
  120. first=ffirst=stack[stacktop].first;\
  121. last=llast=stack[stacktop].last;\
  122. continue;}
  123. /* Some comments on the implementation.
  124. * 1. When we finish partitioning the array into "low"
  125. * and "high", we forget entirely about short subarrays,
  126. * because they'll be done later by insertion sort.
  127. * Doing lots of little insertion sorts might be a win
  128. * on large datasets for locality-of-reference reasons,
  129. * but it makes the code much nastier and increases
  130. * bookkeeping overhead.
  131. * 2. We always save the shorter and get to work on the
  132. * longer. This guarantees that every time we push
  133. * an item onto the stack its size is <= 1/2 of that
  134. * of its parent; so the stack can't need more than
  135. * log_2(max-array-size) entries.
  136. * 3. We choose a pivot by looking at the first, last
  137. * and middle elements. We arrange them into order
  138. * because it's easy to do that in conjunction with
  139. * choosing the pivot, and it makes things a little
  140. * easier in the partitioning step. Anyway, the pivot
  141. * is the middle of these three. It's still possible
  142. * to construct datasets where the algorithm takes
  143. * time of order n^2, but it simply never happens in
  144. * practice.
  145. * 3' Newsflash: On further investigation I find that
  146. * it's easy to construct datasets where median-of-3
  147. * simply isn't good enough. So on large-ish subarrays
  148. * we do a more sophisticated pivoting: we take three
  149. * sets of 3 elements, find their medians, and then
  150. * take the median of those.
  151. * 4. We copy the pivot element to a separate place
  152. * because that way we can always do our comparisons
  153. * directly against a pointer to that separate place,
  154. * and don't have to wonder "did we move the pivot
  155. * element?". This makes the inner loop better.
  156. * 5. It's possible to make the pivoting even more
  157. * reliable by looking at more candidates when n
  158. * is larger. (Taking this to its logical conclusion
  159. * results in a variant of quicksort that doesn't
  160. * have that n^2 worst case.) However, the overhead
  161. * from the extra bookkeeping means that it's just
  162. * not worth while.
  163. * 6. This is pretty clean and portable code. Here are
  164. * all the potential portability pitfalls and problems
  165. * I know of:
  166. * - In one place (the insertion sort) I construct
  167. * a pointer that points just past the end of the
  168. * supplied array, and assume that (a) it won't
  169. * compare equal to any pointer within the array,
  170. * and (b) it will compare equal to a pointer
  171. * obtained by stepping off the end of the array.
  172. * These might fail on some segmented architectures.
  173. * - I assume that there are 8 bits in a |char| when
  174. * computing the size of stack needed. This would
  175. * fail on machines with 9-bit or 16-bit bytes.
  176. * - I assume that if |((int)base&(sizeof(int)-1))==0|
  177. * and |(size&(sizeof(int)-1))==0| then it's safe to
  178. * get at array elements via |int*|s, and that if
  179. * actually |size==sizeof(int)| as well then it's
  180. * safe to treat the elements as |int|s. This might
  181. * fail on systems that convert pointers to integers
  182. * in non-standard ways.
  183. * - I assume that |8*sizeof(size_t)<=INT_MAX|. This
  184. * would be false on a machine with 8-bit |char|s,
  185. * 16-bit |int|s and 4096-bit |size_t|s. :-)
  186. */
  187. /* The recursion logic is the same in each case: */
  188. #define Recurse(Trunc) \
  189. { size_t l=last-ffirst,r=llast-first; \
  190. if (l<Trunc) { \
  191. if (r>=Trunc) doRight \
  192. else pop \
  193. } \
  194. else if (l<=r) { pushLeft; doRight } \
  195. else if (r>=Trunc) { pushRight; doLeft }\
  196. else doLeft \
  197. }
  198. /* and so is the pivoting logic: */
  199. #define Pivot(swapper,sz) \
  200. if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
  201. else { \
  202. if (compare(first,mid)<0) { \
  203. if (compare(mid,last)>0) { \
  204. swapper(mid,last); \
  205. if (compare(first,mid)>0) swapper(first,mid);\
  206. } \
  207. } \
  208. else { \
  209. if (compare(mid,last)>0) swapper(first,last)\
  210. else { \
  211. swapper(first,mid); \
  212. if (compare(mid,last)>0) swapper(mid,last);\
  213. } \
  214. } \
  215. first+=sz; last-=sz; \
  216. }
  217. #ifdef DEBUG_QSORT
  218. #include <stdio.h>
  219. #endif
  220. /* and so is the partitioning logic: */
  221. #define Partition(swapper,sz) { \
  222. int swapped=0; \
  223. do { \
  224. while (compare(first,pivot)<0) first+=sz; \
  225. while (compare(pivot,last)<0) last-=sz; \
  226. if (first<last) { \
  227. swapper(first,last); swapped=1; \
  228. first+=sz; last-=sz; } \
  229. else if (first==last) { first+=sz; last-=sz; break; }\
  230. } while (first<=last); \
  231. if (!swapped) pop \
  232. }
  233. /* and so is the pre-insertion-sort operation of putting
  234. * the smallest element into place as a sentinel.
  235. * Doing this makes the inner loop nicer. I got this
  236. * idea from the GNU implementation of qsort().
  237. */
  238. #define PreInsertion(swapper,limit,sz) \
  239. first=base; \
  240. last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
  241. while (last!=base) { \
  242. if (compare(first,last)>0) first=last; \
  243. last-=sz; } \
  244. if (first!=base) swapper(first,(char*)base);
  245. /* and so is the insertion sort, in the first two cases: */
  246. #define Insertion(swapper) \
  247. last=((char*)base)+nmemb*size; \
  248. for (first=((char*)base)+size;first!=last;first+=size) { \
  249. char *test; \
  250. /* Find the right place for |first|. \
  251. * My apologies for var reuse. */ \
  252. for (test=first-size;compare(test,first)>0;test-=size) ; \
  253. test+=size; \
  254. if (test!=first) { \
  255. /* Shift everything in [test,first) \
  256. * up by one, and place |first| \
  257. * where |test| is. */ \
  258. memcpy(pivot,first,size); \
  259. memmove(test+size,test,first-test); \
  260. memcpy(test,pivot,size); \
  261. } \
  262. }
  263. #define SWAP_nonaligned(a,b) { \
  264. register char *aa=(a),*bb=(b); \
  265. register size_t sz=size; \
  266. do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
  267. #define SWAP_aligned(a,b) { \
  268. register int *aa=(int*)(a),*bb=(int*)(b); \
  269. register size_t sz=size; \
  270. do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
  271. #define SWAP_words(a,b) { \
  272. register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
  273. /* ---------------------------------------------------------------------- */
  274. static char *
  275. pivot_big(char *first, char *mid, char *last, size_t size,
  276. int compare(const void *, const void *))
  277. {
  278. size_t d = (((last - first) / size) >> 3) * size;
  279. char *m1, *m2, *m3;
  280. {
  281. char *a = first, *b = first + d, *c = first + 2 * d;
  282. #ifdef DEBUG_QSORT
  283. fprintf(stderr, "< %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
  284. #endif
  285. m1 = compare(a, b) < 0 ?
  286. (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
  287. : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
  288. }
  289. {
  290. char *a = mid - d, *b = mid, *c = mid + d;
  291. #ifdef DEBUG_QSORT
  292. fprintf(stderr, ". %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
  293. #endif
  294. m2 = compare(a, b) < 0 ?
  295. (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
  296. : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
  297. }
  298. {
  299. char *a = last - 2 * d, *b = last - d, *c = last;
  300. #ifdef DEBUG_QSORT
  301. fprintf(stderr, "> %d %d %d\n", *(int *) a, *(int *) b, *(int *) c);
  302. #endif
  303. m3 = compare(a, b) < 0 ?
  304. (compare(b, c) < 0 ? b : (compare(a, c) < 0 ? c : a))
  305. : (compare(a, c) < 0 ? a : (compare(b, c) < 0 ? c : b));
  306. }
  307. #ifdef DEBUG_QSORT
  308. fprintf(stderr, "-> %d %d %d\n", *(int *) m1, *(int *) m2, *(int *) m3);
  309. #endif
  310. return compare(m1, m2) < 0 ?
  311. (compare(m2, m3) < 0 ? m2 : (compare(m1, m3) < 0 ? m3 : m1))
  312. : (compare(m1, m3) < 0 ? m1 : (compare(m2, m3) < 0 ? m3 : m2));
  313. }
  314. /* ---------------------------------------------------------------------- */
  315. static void
  316. qsort_nonaligned(void *base, size_t nmemb, size_t size,
  317. int (*compare) (const void *, const void *))
  318. {
  319. stack_entry stack[STACK_SIZE];
  320. int stacktop = 0;
  321. char *first, *last;
  322. char *pivot = malloc(size);
  323. size_t trunc = TRUNC_nonaligned * size;
  324. assert(pivot != 0);
  325. first = (char *) base;
  326. last = first + (nmemb - 1) * size;
  327. if ((size_t) (last - first) > trunc) {
  328. char *ffirst = first, *llast = last;
  329. while (1) {
  330. /* Select pivot */
  331. {
  332. char *mid = first + size * ((last - first) / size >> 1);
  333. Pivot(SWAP_nonaligned, size);
  334. memcpy(pivot, mid, size);
  335. }
  336. /* Partition. */
  337. Partition(SWAP_nonaligned, size);
  338. /* Prepare to recurse/iterate. */
  339. Recurse(trunc)}
  340. }
  341. PreInsertion(SWAP_nonaligned, TRUNC_nonaligned, size);
  342. Insertion(SWAP_nonaligned);
  343. free(pivot);
  344. }
  345. static void
  346. qsort_aligned(void *base, size_t nmemb, size_t size,
  347. int (*compare) (const void *, const void *))
  348. {
  349. stack_entry stack[STACK_SIZE];
  350. int stacktop = 0;
  351. char *first, *last;
  352. char *pivot = malloc(size);
  353. size_t trunc = TRUNC_aligned * size;
  354. assert(pivot != 0);
  355. first = (char *) base;
  356. last = first + (nmemb - 1) * size;
  357. if ((size_t) (last - first) > trunc) {
  358. char *ffirst = first, *llast = last;
  359. while (1) {
  360. /* Select pivot */
  361. {
  362. char *mid = first + size * ((last - first) / size >> 1);
  363. Pivot(SWAP_aligned, size);
  364. memcpy(pivot, mid, size);
  365. }
  366. /* Partition. */
  367. Partition(SWAP_aligned, size);
  368. /* Prepare to recurse/iterate. */
  369. Recurse(trunc)}
  370. }
  371. PreInsertion(SWAP_aligned, TRUNC_aligned, size);
  372. Insertion(SWAP_aligned);
  373. free(pivot);
  374. }
  375. static void
  376. qsort_words(void *base, size_t nmemb,
  377. int (*compare) (const void *, const void *))
  378. {
  379. stack_entry stack[STACK_SIZE];
  380. int stacktop = 0;
  381. char *first, *last;
  382. char *pivot = malloc(WORD_BYTES);
  383. assert(pivot != 0);
  384. first = (char *) base;
  385. last = first + (nmemb - 1) * WORD_BYTES;
  386. if (last - first > TRUNC_words) {
  387. char *ffirst = first, *llast = last;
  388. while (1) {
  389. #ifdef DEBUG_QSORT
  390. fprintf(stderr, "Doing %d:%d: ",
  391. (first - (char *) base) / WORD_BYTES,
  392. (last - (char *) base) / WORD_BYTES);
  393. #endif
  394. /* Select pivot */
  395. {
  396. char *mid =
  397. first + WORD_BYTES * ((last - first) / (2 * WORD_BYTES));
  398. Pivot(SWAP_words, WORD_BYTES);
  399. *(int *) pivot = *(int *) mid;
  400. }
  401. #ifdef DEBUG_QSORT
  402. fprintf(stderr, "pivot=%d\n", *(int *) pivot);
  403. #endif
  404. /* Partition. */
  405. Partition(SWAP_words, WORD_BYTES);
  406. /* Prepare to recurse/iterate. */
  407. Recurse(TRUNC_words)}
  408. }
  409. PreInsertion(SWAP_words, (TRUNC_words / WORD_BYTES), WORD_BYTES);
  410. /* Now do insertion sort. */
  411. last = ((char *) base) + nmemb * WORD_BYTES;
  412. for (first = ((char *) base) + WORD_BYTES; first != last;
  413. first += WORD_BYTES) {
  414. /* Find the right place for |first|. My apologies for var reuse */
  415. int *pl = (int *) (first - WORD_BYTES), *pr = (int *) first;
  416. *(int *) pivot = *(int *) first;
  417. for (; compare(pl, pivot) > 0; pr = pl, --pl) {
  418. *pr = *pl;
  419. }
  420. if (pr != (int *) first)
  421. *pr = *(int *) pivot;
  422. }
  423. free(pivot);
  424. }
  425. /* ---------------------------------------------------------------------- */
  426. void
  427. qsort(void *base, size_t nmemb, size_t size,
  428. int (*compare) (const void *, const void *))
  429. {
  430. if (nmemb <= 1)
  431. return;
  432. if (((uintptr_t) base | size) & (WORD_BYTES - 1))
  433. qsort_nonaligned(base, nmemb, size, compare);
  434. else if (size != WORD_BYTES)
  435. qsort_aligned(base, nmemb, size, compare);
  436. else
  437. qsort_words(base, nmemb, compare);
  438. }
  439. #endif /* !SDL_qsort */
  440. /* vi: set ts=4 sw=4 expandtab: */