SDL_blit_N.c 122 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203
  1. /*
  2. Simple DirectMedia Layer
  3. Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any damages
  6. arising from the use of this software.
  7. Permission is granted to anyone to use this software for any purpose,
  8. including commercial applications, and to alter it and redistribute it
  9. freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you must not
  11. claim that you wrote the original software. If you use this software
  12. in a product, an acknowledgment in the product documentation would be
  13. appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and must not be
  15. misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source distribution.
  17. */
  18. #include "SDL_internal.h"
  19. #ifdef SDL_HAVE_BLIT_N
  20. #include "SDL_pixels_c.h"
  21. #include "SDL_surface_c.h"
  22. #include "SDL_blit_copy.h"
  23. // General optimized routines that write char by char
  24. #define HAVE_FAST_WRITE_INT8 1
  25. // On some CPU, it's slower than combining and write a word
  26. #ifdef __MIPS__
  27. #undef HAVE_FAST_WRITE_INT8
  28. #define HAVE_FAST_WRITE_INT8 0
  29. #endif
  30. // Functions to blit from N-bit surfaces to other surfaces
  31. #define BLIT_FEATURE_NONE 0x00
  32. #define BLIT_FEATURE_HAS_SSE41 0x01
  33. #define BLIT_FEATURE_HAS_ALTIVEC 0x02
  34. #define BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH 0x04
  35. #ifdef SDL_ALTIVEC_BLITTERS
  36. #ifdef SDL_PLATFORM_MACOS
  37. #include <sys/sysctl.h>
  38. static size_t GetL3CacheSize(void)
  39. {
  40. const char key[] = "hw.l3cachesize";
  41. u_int64_t result = 0;
  42. size_t typeSize = sizeof(result);
  43. int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
  44. if (0 != err) {
  45. return 0;
  46. }
  47. return result;
  48. }
  49. #else
  50. static size_t GetL3CacheSize(void)
  51. {
  52. // XXX: Just guess G4
  53. return 2097152;
  54. }
  55. #endif // SDL_PLATFORM_MACOS
  56. #if (defined(SDL_PLATFORM_MACOS) && (__GNUC__ < 4))
  57. #define VECUINT8_LITERAL(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
  58. (vector unsigned char)(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p)
  59. #define VECUINT16_LITERAL(a, b, c, d, e, f, g, h) \
  60. (vector unsigned short)(a, b, c, d, e, f, g, h)
  61. #else
  62. #define VECUINT8_LITERAL(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
  63. (vector unsigned char) \
  64. { \
  65. a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p \
  66. }
  67. #define VECUINT16_LITERAL(a, b, c, d, e, f, g, h) \
  68. (vector unsigned short) \
  69. { \
  70. a, b, c, d, e, f, g, h \
  71. }
  72. #endif
  73. #define UNALIGNED_PTR(x) (((size_t)x) & 0x0000000F)
  74. #define VSWIZZLE32(a, b, c, d) (vector unsigned char)(0x00 + a, 0x00 + b, 0x00 + c, 0x00 + d, \
  75. 0x04 + a, 0x04 + b, 0x04 + c, 0x04 + d, \
  76. 0x08 + a, 0x08 + b, 0x08 + c, 0x08 + d, \
  77. 0x0C + a, 0x0C + b, 0x0C + c, 0x0C + d)
  78. #define MAKE8888(dstfmt, r, g, b, a) \
  79. (((r << dstfmt->Rshift) & dstfmt->Rmask) | \
  80. ((g << dstfmt->Gshift) & dstfmt->Gmask) | \
  81. ((b << dstfmt->Bshift) & dstfmt->Bmask) | \
  82. ((a << dstfmt->Ashift) & dstfmt->Amask))
  83. /*
  84. * Data Stream Touch...Altivec cache prefetching.
  85. *
  86. * Don't use this on a G5...however, the speed boost is very significant
  87. * on a G4.
  88. */
  89. #define DST_CHAN_SRC 1
  90. #define DST_CHAN_DEST 2
  91. // macro to set DST control word value...
  92. #define DST_CTRL(size, count, stride) \
  93. (((size) << 24) | ((count) << 16) | (stride))
  94. #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
  95. ? vec_lvsl(0, src) \
  96. : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
  97. // Calculate the permute vector used for 32->32 swizzling
  98. static vector unsigned char calc_swizzle32(const SDL_PixelFormatDetails *srcfmt, const SDL_PixelFormatDetails *dstfmt)
  99. {
  100. /*
  101. * We have to assume that the bits that aren't used by other
  102. * colors is alpha, and it's one complete byte, since some formats
  103. * leave alpha with a zero mask, but we should still swizzle the bits.
  104. */
  105. // ARGB
  106. static const SDL_PixelFormatDetails default_pixel_format = {
  107. SDL_PIXELFORMAT_ARGB8888, 0, 0, { 0, 0 }, 0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000, 8, 8, 8, 8, 16, 8, 0, 24
  108. };
  109. const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
  110. 0x04, 0x04, 0x04, 0x04,
  111. 0x08, 0x08, 0x08, 0x08,
  112. 0x0C, 0x0C, 0x0C,
  113. 0x0C);
  114. vector unsigned char vswiz;
  115. vector unsigned int srcvec;
  116. Uint32 rmask, gmask, bmask, amask;
  117. if (!srcfmt) {
  118. srcfmt = &default_pixel_format;
  119. }
  120. if (!dstfmt) {
  121. dstfmt = &default_pixel_format;
  122. }
  123. #define RESHIFT(X) (3 - ((X) >> 3))
  124. rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
  125. gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
  126. bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
  127. // Use zero for alpha if either surface doesn't have alpha
  128. if (dstfmt->Amask) {
  129. amask =
  130. ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
  131. } else {
  132. amask =
  133. 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
  134. 0xFFFFFFFF);
  135. }
  136. #undef RESHIFT
  137. ((unsigned int *)(char *)&srcvec)[0] = (rmask | gmask | bmask | amask);
  138. vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
  139. return (vswiz);
  140. }
  141. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  142. // reorder bytes for PowerPC little endian
  143. static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute)
  144. {
  145. /* The result vector of calc_swizzle32 reorder bytes using vec_perm.
  146. The LE transformation for vec_perm has an implicit assumption
  147. that the permutation is being used to reorder vector elements,
  148. not to reorder bytes within those elements.
  149. Unfortunately the result order is not the expected one for powerpc
  150. little endian when the two first vector parameters of vec_perm are
  151. not of type 'vector char'. This is because the numbering from the
  152. left for BE, and numbering from the right for LE, produces a
  153. different interpretation of what the odd and even lanes are.
  154. Refer to fedora bug 1392465
  155. */
  156. const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL(
  157. 0x01, 0x00, 0x03, 0x02,
  158. 0x05, 0x04, 0x07, 0x06,
  159. 0x09, 0x08, 0x0B, 0x0A,
  160. 0x0D, 0x0C, 0x0F, 0x0E);
  161. vector unsigned char vswiz_ppc64le;
  162. vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder);
  163. return (vswiz_ppc64le);
  164. }
  165. #endif
  166. static void Blit_XRGB8888_RGB565(SDL_BlitInfo *info);
  167. static void Blit_XRGB8888_RGB565Altivec(SDL_BlitInfo *info)
  168. {
  169. int height = info->dst_h;
  170. Uint8 *src = (Uint8 *)info->src;
  171. int srcskip = info->src_skip;
  172. Uint8 *dst = (Uint8 *)info->dst;
  173. int dstskip = info->dst_skip;
  174. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  175. vector unsigned char valpha = vec_splat_u8(0);
  176. vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
  177. vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
  178. 0x00, 0x0a, 0x00, 0x0e,
  179. 0x00, 0x12, 0x00, 0x16,
  180. 0x00, 0x1a, 0x00, 0x1e);
  181. vector unsigned short v1 = vec_splat_u16(1);
  182. vector unsigned short v3 = vec_splat_u16(3);
  183. vector unsigned short v3f =
  184. VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
  185. 0x003f, 0x003f, 0x003f, 0x003f);
  186. vector unsigned short vfc =
  187. VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
  188. 0x00fc, 0x00fc, 0x00fc, 0x00fc);
  189. vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
  190. vf800 = vec_sl(vf800, vec_splat_u16(8));
  191. while (height--) {
  192. vector unsigned char valigner;
  193. vector unsigned char voverflow;
  194. vector unsigned char vsrc;
  195. int width = info->dst_w;
  196. int extrawidth;
  197. // do scalar until we can align...
  198. #define ONE_PIXEL_BLEND(condition, widthvar) \
  199. while (condition) { \
  200. Uint32 Pixel; \
  201. unsigned sR, sG, sB, sA; \
  202. DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
  203. sR, sG, sB, sA); \
  204. *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
  205. ((sG << 3) & 0x000007E0) | \
  206. ((sB >> 3) & 0x0000001F)); \
  207. dst += 2; \
  208. src += 4; \
  209. widthvar--; \
  210. }
  211. ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
  212. // After all that work, here's the vector part!
  213. extrawidth = (width % 8); // trailing unaligned stores
  214. width -= extrawidth;
  215. vsrc = vec_ld(0, src);
  216. valigner = VEC_ALIGNER(src);
  217. while (width) {
  218. vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
  219. vector unsigned int vsrc1, vsrc2;
  220. vector unsigned char vdst;
  221. voverflow = vec_ld(15, src);
  222. vsrc = vec_perm(vsrc, voverflow, valigner);
  223. vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
  224. src += 16;
  225. vsrc = voverflow;
  226. voverflow = vec_ld(15, src);
  227. vsrc = vec_perm(vsrc, voverflow, valigner);
  228. vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
  229. // 1555
  230. vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
  231. vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
  232. vgpixel = vec_and(vgpixel, vfc);
  233. vgpixel = vec_sl(vgpixel, v3);
  234. vrpixel = vec_sl(vpixel, v1);
  235. vrpixel = vec_and(vrpixel, vf800);
  236. vbpixel = vec_and(vpixel, v3f);
  237. vdst =
  238. vec_or((vector unsigned char)vrpixel,
  239. (vector unsigned char)vgpixel);
  240. // 565
  241. vdst = vec_or(vdst, (vector unsigned char)vbpixel);
  242. vec_st(vdst, 0, dst);
  243. width -= 8;
  244. src += 16;
  245. dst += 16;
  246. vsrc = voverflow;
  247. }
  248. SDL_assert(width == 0);
  249. // do scalar until we can align...
  250. ONE_PIXEL_BLEND((extrawidth), extrawidth);
  251. #undef ONE_PIXEL_BLEND
  252. src += srcskip; // move to next row, accounting for pitch.
  253. dst += dstskip;
  254. }
  255. }
  256. #ifdef BROKEN_ALTIVEC_BLITTERS // This doesn't properly expand to the lower destination bits
  257. static void Blit_RGB565_32Altivec(SDL_BlitInfo *info)
  258. {
  259. int height = info->dst_h;
  260. Uint8 *src = (Uint8 *)info->src;
  261. int srcskip = info->src_skip;
  262. Uint8 *dst = (Uint8 *)info->dst;
  263. int dstskip = info->dst_skip;
  264. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  265. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  266. unsigned alpha;
  267. vector unsigned char valpha;
  268. vector unsigned char vpermute;
  269. vector unsigned short vf800;
  270. vector unsigned int v8 = vec_splat_u32(8);
  271. vector unsigned int v16 = vec_add(v8, v8);
  272. vector unsigned short v2 = vec_splat_u16(2);
  273. vector unsigned short v3 = vec_splat_u16(3);
  274. /*
  275. 0x10 - 0x1f is the alpha
  276. 0x00 - 0x0e evens are the red
  277. 0x01 - 0x0f odds are zero
  278. */
  279. vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
  280. 0x10, 0x02, 0x01, 0x01,
  281. 0x10, 0x04, 0x01, 0x01,
  282. 0x10, 0x06, 0x01,
  283. 0x01);
  284. vector unsigned char vredalpha2 =
  285. (vector unsigned char)(vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16)));
  286. /*
  287. 0x00 - 0x0f is ARxx ARxx ARxx ARxx
  288. 0x11 - 0x0f odds are blue
  289. */
  290. vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
  291. 0x04, 0x05, 0x06, 0x13,
  292. 0x08, 0x09, 0x0a, 0x15,
  293. 0x0c, 0x0d, 0x0e, 0x17);
  294. vector unsigned char vblue2 =
  295. (vector unsigned char)(vec_add((vector unsigned int)vblue1, v8));
  296. /*
  297. 0x00 - 0x0f is ARxB ARxB ARxB ARxB
  298. 0x10 - 0x0e evens are green
  299. */
  300. vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
  301. 0x04, 0x05, 0x12, 0x07,
  302. 0x08, 0x09, 0x14, 0x0b,
  303. 0x0c, 0x0d, 0x16, 0x0f);
  304. vector unsigned char vgreen2 =
  305. (vector unsigned char)(vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8)));
  306. SDL_assert(srcfmt->bytes_per_pixel == 2);
  307. SDL_assert(dstfmt->bytes_per_pixel == 4);
  308. vf800 = (vector unsigned short)vec_splat_u8(-7);
  309. vf800 = vec_sl(vf800, vec_splat_u16(8));
  310. if (dstfmt->Amask && info->a) {
  311. ((unsigned char *)&valpha)[0] = alpha = info->a;
  312. valpha = vec_splat(valpha, 0);
  313. } else {
  314. alpha = 0;
  315. valpha = vec_splat_u8(0);
  316. }
  317. vpermute = calc_swizzle32(NULL, dstfmt);
  318. while (height--) {
  319. vector unsigned char valigner;
  320. vector unsigned char voverflow;
  321. vector unsigned char vsrc;
  322. int width = info->dst_w;
  323. int extrawidth;
  324. // do scalar until we can align...
  325. #define ONE_PIXEL_BLEND(condition, widthvar) \
  326. while (condition) { \
  327. unsigned sR, sG, sB; \
  328. unsigned short Pixel = *((unsigned short *)src); \
  329. RGB_FROM_RGB565(Pixel, sR, sG, sB); \
  330. ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
  331. src += 2; \
  332. dst += 4; \
  333. widthvar--; \
  334. }
  335. ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
  336. // After all that work, here's the vector part!
  337. extrawidth = (width % 8); // trailing unaligned stores
  338. width -= extrawidth;
  339. vsrc = vec_ld(0, src);
  340. valigner = VEC_ALIGNER(src);
  341. while (width) {
  342. vector unsigned short vR, vG, vB;
  343. vector unsigned char vdst1, vdst2;
  344. voverflow = vec_ld(15, src);
  345. vsrc = vec_perm(vsrc, voverflow, valigner);
  346. vR = vec_and((vector unsigned short)vsrc, vf800);
  347. vB = vec_sl((vector unsigned short)vsrc, v3);
  348. vG = vec_sl(vB, v2);
  349. vdst1 =
  350. (vector unsigned char)vec_perm((vector unsigned char)vR,
  351. valpha, vredalpha1);
  352. vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
  353. vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
  354. vdst1 = vec_perm(vdst1, valpha, vpermute);
  355. vec_st(vdst1, 0, dst);
  356. vdst2 =
  357. (vector unsigned char)vec_perm((vector unsigned char)vR,
  358. valpha, vredalpha2);
  359. vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
  360. vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
  361. vdst2 = vec_perm(vdst2, valpha, vpermute);
  362. vec_st(vdst2, 16, dst);
  363. width -= 8;
  364. dst += 32;
  365. src += 16;
  366. vsrc = voverflow;
  367. }
  368. SDL_assert(width == 0);
  369. // do scalar until we can align...
  370. ONE_PIXEL_BLEND((extrawidth), extrawidth);
  371. #undef ONE_PIXEL_BLEND
  372. src += srcskip; // move to next row, accounting for pitch.
  373. dst += dstskip;
  374. }
  375. }
  376. static void Blit_RGB555_32Altivec(SDL_BlitInfo *info)
  377. {
  378. int height = info->dst_h;
  379. Uint8 *src = (Uint8 *)info->src;
  380. int srcskip = info->src_skip;
  381. Uint8 *dst = (Uint8 *)info->dst;
  382. int dstskip = info->dst_skip;
  383. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  384. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  385. unsigned alpha;
  386. vector unsigned char valpha;
  387. vector unsigned char vpermute;
  388. vector unsigned short vf800;
  389. vector unsigned int v8 = vec_splat_u32(8);
  390. vector unsigned int v16 = vec_add(v8, v8);
  391. vector unsigned short v1 = vec_splat_u16(1);
  392. vector unsigned short v3 = vec_splat_u16(3);
  393. /*
  394. 0x10 - 0x1f is the alpha
  395. 0x00 - 0x0e evens are the red
  396. 0x01 - 0x0f odds are zero
  397. */
  398. vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
  399. 0x10, 0x02, 0x01, 0x01,
  400. 0x10, 0x04, 0x01, 0x01,
  401. 0x10, 0x06, 0x01,
  402. 0x01);
  403. vector unsigned char vredalpha2 =
  404. (vector unsigned char)(vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16)));
  405. /*
  406. 0x00 - 0x0f is ARxx ARxx ARxx ARxx
  407. 0x11 - 0x0f odds are blue
  408. */
  409. vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
  410. 0x04, 0x05, 0x06, 0x13,
  411. 0x08, 0x09, 0x0a, 0x15,
  412. 0x0c, 0x0d, 0x0e, 0x17);
  413. vector unsigned char vblue2 =
  414. (vector unsigned char)(vec_add((vector unsigned int)vblue1, v8));
  415. /*
  416. 0x00 - 0x0f is ARxB ARxB ARxB ARxB
  417. 0x10 - 0x0e evens are green
  418. */
  419. vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
  420. 0x04, 0x05, 0x12, 0x07,
  421. 0x08, 0x09, 0x14, 0x0b,
  422. 0x0c, 0x0d, 0x16, 0x0f);
  423. vector unsigned char vgreen2 =
  424. (vector unsigned char)(vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8)));
  425. SDL_assert(srcfmt->bytes_per_pixel == 2);
  426. SDL_assert(dstfmt->bytes_per_pixel == 4);
  427. vf800 = (vector unsigned short)vec_splat_u8(-7);
  428. vf800 = vec_sl(vf800, vec_splat_u16(8));
  429. if (dstfmt->Amask && info->a) {
  430. ((unsigned char *)&valpha)[0] = alpha = info->a;
  431. valpha = vec_splat(valpha, 0);
  432. } else {
  433. alpha = 0;
  434. valpha = vec_splat_u8(0);
  435. }
  436. vpermute = calc_swizzle32(NULL, dstfmt);
  437. while (height--) {
  438. vector unsigned char valigner;
  439. vector unsigned char voverflow;
  440. vector unsigned char vsrc;
  441. int width = info->dst_w;
  442. int extrawidth;
  443. // do scalar until we can align...
  444. #define ONE_PIXEL_BLEND(condition, widthvar) \
  445. while (condition) { \
  446. unsigned sR, sG, sB; \
  447. unsigned short Pixel = *((unsigned short *)src); \
  448. RGB_FROM_RGB555(Pixel, sR, sG, sB); \
  449. ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
  450. src += 2; \
  451. dst += 4; \
  452. widthvar--; \
  453. }
  454. ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
  455. // After all that work, here's the vector part!
  456. extrawidth = (width % 8); // trailing unaligned stores
  457. width -= extrawidth;
  458. vsrc = vec_ld(0, src);
  459. valigner = VEC_ALIGNER(src);
  460. while (width) {
  461. vector unsigned short vR, vG, vB;
  462. vector unsigned char vdst1, vdst2;
  463. voverflow = vec_ld(15, src);
  464. vsrc = vec_perm(vsrc, voverflow, valigner);
  465. vR = vec_and(vec_sl((vector unsigned short)vsrc, v1), vf800);
  466. vB = vec_sl((vector unsigned short)vsrc, v3);
  467. vG = vec_sl(vB, v3);
  468. vdst1 =
  469. (vector unsigned char)vec_perm((vector unsigned char)vR,
  470. valpha, vredalpha1);
  471. vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
  472. vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
  473. vdst1 = vec_perm(vdst1, valpha, vpermute);
  474. vec_st(vdst1, 0, dst);
  475. vdst2 =
  476. (vector unsigned char)vec_perm((vector unsigned char)vR,
  477. valpha, vredalpha2);
  478. vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
  479. vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
  480. vdst2 = vec_perm(vdst2, valpha, vpermute);
  481. vec_st(vdst2, 16, dst);
  482. width -= 8;
  483. dst += 32;
  484. src += 16;
  485. vsrc = voverflow;
  486. }
  487. SDL_assert(width == 0);
  488. // do scalar until we can align...
  489. ONE_PIXEL_BLEND((extrawidth), extrawidth);
  490. #undef ONE_PIXEL_BLEND
  491. src += srcskip; // move to next row, accounting for pitch.
  492. dst += dstskip;
  493. }
  494. }
  495. #endif // BROKEN_ALTIVEC_BLITTERS
  496. static void BlitNtoNKey(SDL_BlitInfo *info);
  497. static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
  498. static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
  499. {
  500. int height = info->dst_h;
  501. Uint32 *srcp = (Uint32 *)info->src;
  502. int srcskip = info->src_skip / 4;
  503. Uint32 *dstp = (Uint32 *)info->dst;
  504. int dstskip = info->dst_skip / 4;
  505. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  506. int srcbpp = srcfmt->bytes_per_pixel;
  507. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  508. int dstbpp = dstfmt->bytes_per_pixel;
  509. int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
  510. unsigned alpha = dstfmt->Amask ? info->a : 0;
  511. Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  512. Uint32 ckey = info->colorkey;
  513. vector unsigned int valpha;
  514. vector unsigned char vpermute;
  515. vector unsigned char vzero;
  516. vector unsigned int vckey;
  517. vector unsigned int vrgbmask;
  518. vpermute = calc_swizzle32(srcfmt, dstfmt);
  519. if (info->dst_w < 16) {
  520. if (copy_alpha) {
  521. BlitNtoNKeyCopyAlpha(info);
  522. } else {
  523. BlitNtoNKey(info);
  524. }
  525. return;
  526. }
  527. vzero = vec_splat_u8(0);
  528. if (alpha) {
  529. ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
  530. valpha =
  531. (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
  532. } else {
  533. valpha = (vector unsigned int)vzero;
  534. }
  535. ckey &= rgbmask;
  536. ((unsigned int *)(char *)&vckey)[0] = ckey;
  537. vckey = vec_splat(vckey, 0);
  538. ((unsigned int *)(char *)&vrgbmask)[0] = rgbmask;
  539. vrgbmask = vec_splat(vrgbmask, 0);
  540. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  541. // reorder bytes for PowerPC little endian
  542. vpermute = reorder_ppc64le_vec(vpermute);
  543. #endif
  544. while (height--) {
  545. #define ONE_PIXEL_BLEND(condition, widthvar) \
  546. if (copy_alpha) { \
  547. while (condition) { \
  548. Uint32 Pixel; \
  549. unsigned sR, sG, sB, sA; \
  550. DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
  551. sR, sG, sB, sA); \
  552. if ((Pixel & rgbmask) != ckey) { \
  553. ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
  554. sR, sG, sB, sA); \
  555. } \
  556. dstp = (Uint32 *)(((Uint8 *)dstp) + dstbpp); \
  557. srcp = (Uint32 *)(((Uint8 *)srcp) + srcbpp); \
  558. widthvar--; \
  559. } \
  560. } else { \
  561. while (condition) { \
  562. Uint32 Pixel; \
  563. unsigned sR, sG, sB; \
  564. RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
  565. if (Pixel != ckey) { \
  566. RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
  567. ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
  568. sR, sG, sB, alpha); \
  569. } \
  570. dstp = (Uint32 *)(((Uint8 *)dstp) + dstbpp); \
  571. srcp = (Uint32 *)(((Uint8 *)srcp) + srcbpp); \
  572. widthvar--; \
  573. } \
  574. }
  575. int width = info->dst_w;
  576. ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
  577. SDL_assert(width > 0);
  578. if (width > 0) {
  579. int extrawidth = (width % 4);
  580. vector unsigned char valigner = VEC_ALIGNER(srcp);
  581. vector unsigned int vs = vec_ld(0, srcp);
  582. width -= extrawidth;
  583. SDL_assert(width >= 4);
  584. while (width) {
  585. vector unsigned char vsel;
  586. vector unsigned int vd;
  587. vector unsigned int voverflow = vec_ld(15, srcp);
  588. // load the source vec
  589. vs = vec_perm(vs, voverflow, valigner);
  590. // vsel is set for items that match the key
  591. vsel = (vector unsigned char)vec_and(vs, vrgbmask);
  592. vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
  593. // permute the src vec to the dest format
  594. vs = vec_perm(vs, valpha, vpermute);
  595. // load the destination vec
  596. vd = vec_ld(0, dstp);
  597. // select the source and dest into vs
  598. vd = (vector unsigned int)vec_sel((vector unsigned char)vs,
  599. (vector unsigned char)vd,
  600. vsel);
  601. vec_st(vd, 0, dstp);
  602. srcp += 4;
  603. width -= 4;
  604. dstp += 4;
  605. vs = voverflow;
  606. }
  607. ONE_PIXEL_BLEND((extrawidth), extrawidth);
  608. #undef ONE_PIXEL_BLEND
  609. srcp += srcskip;
  610. dstp += dstskip;
  611. }
  612. }
  613. }
  614. // Altivec code to swizzle one 32-bit surface to a different 32-bit format.
  615. // Use this on a G5
  616. static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
  617. {
  618. int height = info->dst_h;
  619. Uint32 *src = (Uint32 *)info->src;
  620. int srcskip = info->src_skip / 4;
  621. Uint32 *dst = (Uint32 *)info->dst;
  622. int dstskip = info->dst_skip / 4;
  623. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  624. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  625. vector unsigned int vzero = vec_splat_u32(0);
  626. vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
  627. if (dstfmt->Amask && !srcfmt->Amask) {
  628. if (info->a) {
  629. vector unsigned char valpha;
  630. ((unsigned char *)&valpha)[0] = info->a;
  631. vzero = (vector unsigned int)vec_splat(valpha, 0);
  632. }
  633. }
  634. SDL_assert(srcfmt->bytes_per_pixel == 4);
  635. SDL_assert(dstfmt->bytes_per_pixel == 4);
  636. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  637. // reorder bytes for PowerPC little endian
  638. vpermute = reorder_ppc64le_vec(vpermute);
  639. #endif
  640. while (height--) {
  641. vector unsigned char valigner;
  642. vector unsigned int vbits;
  643. vector unsigned int voverflow;
  644. Uint32 bits;
  645. Uint8 r, g, b, a;
  646. int width = info->dst_w;
  647. int extrawidth;
  648. // do scalar until we can align...
  649. while ((UNALIGNED_PTR(dst)) && (width)) {
  650. bits = *(src++);
  651. RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
  652. if (!srcfmt->Amask)
  653. a = info->a;
  654. *(dst++) = MAKE8888(dstfmt, r, g, b, a);
  655. width--;
  656. }
  657. // After all that work, here's the vector part!
  658. extrawidth = (width % 4);
  659. width -= extrawidth;
  660. valigner = VEC_ALIGNER(src);
  661. vbits = vec_ld(0, src);
  662. while (width) {
  663. voverflow = vec_ld(15, src);
  664. src += 4;
  665. width -= 4;
  666. vbits = vec_perm(vbits, voverflow, valigner); // src is ready.
  667. vbits = vec_perm(vbits, vzero, vpermute); // swizzle it.
  668. vec_st(vbits, 0, dst); // store it back out.
  669. dst += 4;
  670. vbits = voverflow;
  671. }
  672. SDL_assert(width == 0);
  673. // cover pixels at the end of the row that didn't fit in 16 bytes.
  674. while (extrawidth) {
  675. bits = *(src++); // max 7 pixels, don't bother with prefetch.
  676. RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
  677. if (!srcfmt->Amask)
  678. a = info->a;
  679. *(dst++) = MAKE8888(dstfmt, r, g, b, a);
  680. extrawidth--;
  681. }
  682. src += srcskip;
  683. dst += dstskip;
  684. }
  685. }
  686. // Altivec code to swizzle one 32-bit surface to a different 32-bit format.
  687. // Use this on a G4
  688. static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
  689. {
  690. const int scalar_dst_lead = sizeof(Uint32) * 4;
  691. const int vector_dst_lead = sizeof(Uint32) * 16;
  692. int height = info->dst_h;
  693. Uint32 *src = (Uint32 *)info->src;
  694. int srcskip = info->src_skip / 4;
  695. Uint32 *dst = (Uint32 *)info->dst;
  696. int dstskip = info->dst_skip / 4;
  697. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  698. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  699. vector unsigned int vzero = vec_splat_u32(0);
  700. vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
  701. if (dstfmt->Amask && !srcfmt->Amask) {
  702. if (info->a) {
  703. vector unsigned char valpha;
  704. ((unsigned char *)&valpha)[0] = info->a;
  705. vzero = (vector unsigned int)vec_splat(valpha, 0);
  706. }
  707. }
  708. SDL_assert(srcfmt->bytes_per_pixel == 4);
  709. SDL_assert(dstfmt->bytes_per_pixel == 4);
  710. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  711. // reorder bytes for PowerPC little endian
  712. vpermute = reorder_ppc64le_vec(vpermute);
  713. #endif
  714. while (height--) {
  715. vector unsigned char valigner;
  716. vector unsigned int vbits;
  717. vector unsigned int voverflow;
  718. Uint32 bits;
  719. Uint8 r, g, b, a;
  720. int width = info->dst_w;
  721. int extrawidth;
  722. // do scalar until we can align...
  723. while ((UNALIGNED_PTR(dst)) && (width)) {
  724. vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
  725. DST_CHAN_SRC);
  726. vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
  727. DST_CHAN_DEST);
  728. bits = *(src++);
  729. RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
  730. if (!srcfmt->Amask)
  731. a = info->a;
  732. *(dst++) = MAKE8888(dstfmt, r, g, b, a);
  733. width--;
  734. }
  735. // After all that work, here's the vector part!
  736. extrawidth = (width % 4);
  737. width -= extrawidth;
  738. valigner = VEC_ALIGNER(src);
  739. vbits = vec_ld(0, src);
  740. while (width) {
  741. vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
  742. DST_CHAN_SRC);
  743. vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
  744. DST_CHAN_DEST);
  745. voverflow = vec_ld(15, src);
  746. src += 4;
  747. width -= 4;
  748. vbits = vec_perm(vbits, voverflow, valigner); // src is ready.
  749. vbits = vec_perm(vbits, vzero, vpermute); // swizzle it.
  750. vec_st(vbits, 0, dst); // store it back out.
  751. dst += 4;
  752. vbits = voverflow;
  753. }
  754. SDL_assert(width == 0);
  755. // cover pixels at the end of the row that didn't fit in 16 bytes.
  756. while (extrawidth) {
  757. bits = *(src++); // max 7 pixels, don't bother with prefetch.
  758. RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
  759. if (!srcfmt->Amask)
  760. a = info->a;
  761. *(dst++) = MAKE8888(dstfmt, r, g, b, a);
  762. extrawidth--;
  763. }
  764. src += srcskip;
  765. dst += dstskip;
  766. }
  767. vec_dss(DST_CHAN_SRC);
  768. vec_dss(DST_CHAN_DEST);
  769. }
  770. // !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4.
  771. #define GetBlitFeatures() \
  772. ((SDL_HasAltiVec() ? BLIT_FEATURE_HAS_ALTIVEC : 0) | \
  773. ((GetL3CacheSize() == 0) ? BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH : 0))
  774. #ifdef __MWERKS__
  775. #pragma altivec_model off
  776. #endif
  777. #else
  778. #define GetBlitFeatures() \
  779. (SDL_HasSSE41() ? BLIT_FEATURE_HAS_SSE41 : 0)
  780. #endif
  781. // This is now endian dependent
  782. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  783. #define HI 1
  784. #define LO 0
  785. #else // SDL_BYTEORDER == SDL_BIG_ENDIAN
  786. #define HI 0
  787. #define LO 1
  788. #endif
  789. // Special optimized blit for RGB 8-8-8 --> RGB 5-5-5
  790. #define RGB888_RGB555(dst, src) \
  791. { \
  792. *(Uint16 *)(dst) = (Uint16)((((*src) & 0x00F80000) >> 9) | \
  793. (((*src) & 0x0000F800) >> 6) | \
  794. (((*src) & 0x000000F8) >> 3)); \
  795. }
  796. #ifndef USE_DUFFS_LOOP
  797. #define RGB888_RGB555_TWO(dst, src) \
  798. { \
  799. *(Uint32 *)(dst) = (((((src[HI]) & 0x00F80000) >> 9) | \
  800. (((src[HI]) & 0x0000F800) >> 6) | \
  801. (((src[HI]) & 0x000000F8) >> 3)) \
  802. << 16) | \
  803. (((src[LO]) & 0x00F80000) >> 9) | \
  804. (((src[LO]) & 0x0000F800) >> 6) | \
  805. (((src[LO]) & 0x000000F8) >> 3); \
  806. }
  807. #endif
  808. static void Blit_XRGB8888_RGB555(SDL_BlitInfo *info)
  809. {
  810. #ifndef USE_DUFFS_LOOP
  811. int c;
  812. #endif
  813. int width, height;
  814. Uint32 *src;
  815. Uint16 *dst;
  816. int srcskip, dstskip;
  817. // Set up some basic variables
  818. width = info->dst_w;
  819. height = info->dst_h;
  820. src = (Uint32 *)info->src;
  821. srcskip = info->src_skip / 4;
  822. dst = (Uint16 *)info->dst;
  823. dstskip = info->dst_skip / 2;
  824. #ifdef USE_DUFFS_LOOP
  825. while (height--) {
  826. /* *INDENT-OFF* */ // clang-format off
  827. DUFFS_LOOP(
  828. RGB888_RGB555(dst, src);
  829. ++src;
  830. ++dst;
  831. , width);
  832. /* *INDENT-ON* */ // clang-format on
  833. src += srcskip;
  834. dst += dstskip;
  835. }
  836. #else
  837. // Memory align at 4-byte boundary, if necessary
  838. if ((long)dst & 0x03) {
  839. // Don't do anything if width is 0
  840. if (width == 0) {
  841. return;
  842. }
  843. --width;
  844. while (height--) {
  845. // Perform copy alignment
  846. RGB888_RGB555(dst, src);
  847. ++src;
  848. ++dst;
  849. // Copy in 4 pixel chunks
  850. for (c = width / 4; c; --c) {
  851. RGB888_RGB555_TWO(dst, src);
  852. src += 2;
  853. dst += 2;
  854. RGB888_RGB555_TWO(dst, src);
  855. src += 2;
  856. dst += 2;
  857. }
  858. // Get any leftovers
  859. switch (width & 3) {
  860. case 3:
  861. RGB888_RGB555(dst, src);
  862. ++src;
  863. ++dst;
  864. SDL_FALLTHROUGH;
  865. case 2:
  866. RGB888_RGB555_TWO(dst, src);
  867. src += 2;
  868. dst += 2;
  869. break;
  870. case 1:
  871. RGB888_RGB555(dst, src);
  872. ++src;
  873. ++dst;
  874. break;
  875. }
  876. src += srcskip;
  877. dst += dstskip;
  878. }
  879. } else {
  880. while (height--) {
  881. // Copy in 4 pixel chunks
  882. for (c = width / 4; c; --c) {
  883. RGB888_RGB555_TWO(dst, src);
  884. src += 2;
  885. dst += 2;
  886. RGB888_RGB555_TWO(dst, src);
  887. src += 2;
  888. dst += 2;
  889. }
  890. // Get any leftovers
  891. switch (width & 3) {
  892. case 3:
  893. RGB888_RGB555(dst, src);
  894. ++src;
  895. ++dst;
  896. SDL_FALLTHROUGH;
  897. case 2:
  898. RGB888_RGB555_TWO(dst, src);
  899. src += 2;
  900. dst += 2;
  901. break;
  902. case 1:
  903. RGB888_RGB555(dst, src);
  904. ++src;
  905. ++dst;
  906. break;
  907. }
  908. src += srcskip;
  909. dst += dstskip;
  910. }
  911. }
  912. #endif // USE_DUFFS_LOOP
  913. }
  914. // Special optimized blit for RGB 8-8-8 --> RGB 5-6-5
  915. #define RGB888_RGB565(dst, src) \
  916. { \
  917. *(Uint16 *)(dst) = (Uint16)((((*src) & 0x00F80000) >> 8) | \
  918. (((*src) & 0x0000FC00) >> 5) | \
  919. (((*src) & 0x000000F8) >> 3)); \
  920. }
  921. #ifndef USE_DUFFS_LOOP
  922. #define RGB888_RGB565_TWO(dst, src) \
  923. { \
  924. *(Uint32 *)(dst) = (((((src[HI]) & 0x00F80000) >> 8) | \
  925. (((src[HI]) & 0x0000FC00) >> 5) | \
  926. (((src[HI]) & 0x000000F8) >> 3)) \
  927. << 16) | \
  928. (((src[LO]) & 0x00F80000) >> 8) | \
  929. (((src[LO]) & 0x0000FC00) >> 5) | \
  930. (((src[LO]) & 0x000000F8) >> 3); \
  931. }
  932. #endif
  933. static void Blit_XRGB8888_RGB565(SDL_BlitInfo *info)
  934. {
  935. #ifndef USE_DUFFS_LOOP
  936. int c;
  937. #endif
  938. int width, height;
  939. Uint32 *src;
  940. Uint16 *dst;
  941. int srcskip, dstskip;
  942. // Set up some basic variables
  943. width = info->dst_w;
  944. height = info->dst_h;
  945. src = (Uint32 *)info->src;
  946. srcskip = info->src_skip / 4;
  947. dst = (Uint16 *)info->dst;
  948. dstskip = info->dst_skip / 2;
  949. #ifdef USE_DUFFS_LOOP
  950. while (height--) {
  951. /* *INDENT-OFF* */ // clang-format off
  952. DUFFS_LOOP(
  953. RGB888_RGB565(dst, src);
  954. ++src;
  955. ++dst;
  956. , width);
  957. /* *INDENT-ON* */ // clang-format on
  958. src += srcskip;
  959. dst += dstskip;
  960. }
  961. #else
  962. // Memory align at 4-byte boundary, if necessary
  963. if ((long)dst & 0x03) {
  964. // Don't do anything if width is 0
  965. if (width == 0) {
  966. return;
  967. }
  968. --width;
  969. while (height--) {
  970. // Perform copy alignment
  971. RGB888_RGB565(dst, src);
  972. ++src;
  973. ++dst;
  974. // Copy in 4 pixel chunks
  975. for (c = width / 4; c; --c) {
  976. RGB888_RGB565_TWO(dst, src);
  977. src += 2;
  978. dst += 2;
  979. RGB888_RGB565_TWO(dst, src);
  980. src += 2;
  981. dst += 2;
  982. }
  983. // Get any leftovers
  984. switch (width & 3) {
  985. case 3:
  986. RGB888_RGB565(dst, src);
  987. ++src;
  988. ++dst;
  989. SDL_FALLTHROUGH;
  990. case 2:
  991. RGB888_RGB565_TWO(dst, src);
  992. src += 2;
  993. dst += 2;
  994. break;
  995. case 1:
  996. RGB888_RGB565(dst, src);
  997. ++src;
  998. ++dst;
  999. break;
  1000. }
  1001. src += srcskip;
  1002. dst += dstskip;
  1003. }
  1004. } else {
  1005. while (height--) {
  1006. // Copy in 4 pixel chunks
  1007. for (c = width / 4; c; --c) {
  1008. RGB888_RGB565_TWO(dst, src);
  1009. src += 2;
  1010. dst += 2;
  1011. RGB888_RGB565_TWO(dst, src);
  1012. src += 2;
  1013. dst += 2;
  1014. }
  1015. // Get any leftovers
  1016. switch (width & 3) {
  1017. case 3:
  1018. RGB888_RGB565(dst, src);
  1019. ++src;
  1020. ++dst;
  1021. SDL_FALLTHROUGH;
  1022. case 2:
  1023. RGB888_RGB565_TWO(dst, src);
  1024. src += 2;
  1025. dst += 2;
  1026. break;
  1027. case 1:
  1028. RGB888_RGB565(dst, src);
  1029. ++src;
  1030. ++dst;
  1031. break;
  1032. }
  1033. src += srcskip;
  1034. dst += dstskip;
  1035. }
  1036. }
  1037. #endif // USE_DUFFS_LOOP
  1038. }
  1039. #ifdef SDL_SSE4_1_INTRINSICS
  1040. static void SDL_TARGETING("sse4.1") Blit_RGB565_32_SSE41(SDL_BlitInfo *info)
  1041. {
  1042. int c;
  1043. int width, height;
  1044. const Uint16 *src;
  1045. Uint32 *dst;
  1046. int srcskip, dstskip;
  1047. Uint8 r, g, b;
  1048. // Set up some basic variables
  1049. width = info->dst_w;
  1050. height = info->dst_h;
  1051. src = (const Uint16 *)info->src;
  1052. srcskip = info->src_skip / 2;
  1053. dst = (Uint32 *)info->dst;
  1054. dstskip = info->dst_skip / 4;
  1055. // Red and blue channel multiplier to repeat 5 bits
  1056. __m128i rb_mult = _mm_shuffle_epi32(_mm_cvtsi32_si128(0x01080108), 0);
  1057. // Green channel multiplier to shift by 5 and then repeat 6 bits
  1058. __m128i g_mult = _mm_shuffle_epi32(_mm_cvtsi32_si128(0x20802080), 0);
  1059. // Red channel mask
  1060. __m128i r_mask = _mm_shuffle_epi32(_mm_cvtsi32_si128(0xf800f800), 0);
  1061. // Green channel mask
  1062. __m128i g_mask = _mm_shuffle_epi32(_mm_cvtsi32_si128(0x07e007e0), 0);
  1063. // Alpha channel mask
  1064. __m128i a_mask = _mm_shuffle_epi32(_mm_cvtsi32_si128(0xff00ff00), 0);
  1065. // Get the masks for converting from ARGB
  1066. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  1067. const Uint32 Rshift = dstfmt->Rshift;
  1068. const Uint32 Gshift = dstfmt->Gshift;
  1069. const Uint32 Bshift = dstfmt->Bshift;
  1070. Uint32 Amask, Ashift;
  1071. SDL_Get8888AlphaMaskAndShift(dstfmt, &Amask, &Ashift);
  1072. // The byte offsets for the start of each pixel
  1073. const __m128i mask_offsets = _mm_set_epi8(12, 12, 12, 12, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0);
  1074. const __m128i convert_mask = _mm_add_epi32(
  1075. _mm_set1_epi32(
  1076. ((16 >> 3) << Rshift) |
  1077. (( 8 >> 3) << Gshift) |
  1078. (( 0 >> 3) << Bshift) |
  1079. ((24 >> 3) << Ashift)),
  1080. mask_offsets);
  1081. while (height--) {
  1082. // Copy in 8 pixel chunks
  1083. for (c = width / 8; c; --c) {
  1084. __m128i pixel = _mm_loadu_si128((__m128i *)src);
  1085. __m128i red = pixel;
  1086. __m128i green = pixel;
  1087. __m128i blue = pixel;
  1088. // Get red in the upper 5 bits and then multiply
  1089. red = _mm_and_si128(red, r_mask);
  1090. red = _mm_mulhi_epu16(red, rb_mult);
  1091. // Get blue in the upper 5 bits and then multiply
  1092. blue = _mm_slli_epi16(blue, 11);
  1093. blue = _mm_mulhi_epu16(blue, rb_mult);
  1094. // Combine the red and blue channels
  1095. __m128i red_blue = _mm_or_si128(_mm_slli_epi16(red, 8), blue);
  1096. // Get the green channel and then multiply into place
  1097. green = _mm_and_si128(green, g_mask);
  1098. green = _mm_mulhi_epu16(green, g_mult);
  1099. // Combine the green and alpha channels
  1100. __m128i green_alpha = _mm_or_si128(green, a_mask);
  1101. // Unpack them into output ARGB pixels
  1102. __m128i out1 = _mm_unpacklo_epi8(red_blue, green_alpha);
  1103. __m128i out2 = _mm_unpackhi_epi8(red_blue, green_alpha);
  1104. // Convert to dst format and save!
  1105. // This is an SSSE3 instruction
  1106. out1 = _mm_shuffle_epi8(out1, convert_mask);
  1107. out2 = _mm_shuffle_epi8(out2, convert_mask);
  1108. _mm_storeu_si128((__m128i*)dst, out1);
  1109. _mm_storeu_si128((__m128i*)(dst + 4), out2);
  1110. src += 8;
  1111. dst += 8;
  1112. }
  1113. // Get any leftovers
  1114. switch (width & 7) {
  1115. case 7:
  1116. RGB_FROM_RGB565(*src, r, g, b);
  1117. *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
  1118. ++src;
  1119. SDL_FALLTHROUGH;
  1120. case 6:
  1121. RGB_FROM_RGB565(*src, r, g, b);
  1122. *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
  1123. ++src;
  1124. SDL_FALLTHROUGH;
  1125. case 5:
  1126. RGB_FROM_RGB565(*src, r, g, b);
  1127. *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
  1128. ++src;
  1129. SDL_FALLTHROUGH;
  1130. case 4:
  1131. RGB_FROM_RGB565(*src, r, g, b);
  1132. *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
  1133. ++src;
  1134. SDL_FALLTHROUGH;
  1135. case 3:
  1136. RGB_FROM_RGB565(*src, r, g, b);
  1137. *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
  1138. ++src;
  1139. SDL_FALLTHROUGH;
  1140. case 2:
  1141. RGB_FROM_RGB565(*src, r, g, b);
  1142. *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
  1143. ++src;
  1144. SDL_FALLTHROUGH;
  1145. case 1:
  1146. RGB_FROM_RGB565(*src, r, g, b);
  1147. *dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
  1148. ++src;
  1149. break;
  1150. }
  1151. src += srcskip;
  1152. dst += dstskip;
  1153. }
  1154. }
  1155. #endif // SDL_SSE4_1_INTRINSICS
  1156. #ifdef SDL_HAVE_BLIT_N_RGB565
  1157. // Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces
  1158. #define RGB565_32(dst, src, map) (map[src[LO] * 2] | map[src[HI] * 2 + 1])
  1159. static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
  1160. {
  1161. #ifndef USE_DUFFS_LOOP
  1162. int c;
  1163. #endif
  1164. int width, height;
  1165. Uint8 *src;
  1166. Uint32 *dst;
  1167. int srcskip, dstskip;
  1168. // Set up some basic variables
  1169. width = info->dst_w;
  1170. height = info->dst_h;
  1171. src = info->src;
  1172. srcskip = info->src_skip;
  1173. dst = (Uint32 *)info->dst;
  1174. dstskip = info->dst_skip / 4;
  1175. #ifdef USE_DUFFS_LOOP
  1176. while (height--) {
  1177. /* *INDENT-OFF* */ // clang-format off
  1178. DUFFS_LOOP(
  1179. {
  1180. *dst++ = RGB565_32(dst, src, map);
  1181. src += 2;
  1182. },
  1183. width);
  1184. /* *INDENT-ON* */ // clang-format on
  1185. src += srcskip;
  1186. dst += dstskip;
  1187. }
  1188. #else
  1189. while (height--) {
  1190. // Copy in 4 pixel chunks
  1191. for (c = width / 4; c; --c) {
  1192. *dst++ = RGB565_32(dst, src, map);
  1193. src += 2;
  1194. *dst++ = RGB565_32(dst, src, map);
  1195. src += 2;
  1196. *dst++ = RGB565_32(dst, src, map);
  1197. src += 2;
  1198. *dst++ = RGB565_32(dst, src, map);
  1199. src += 2;
  1200. }
  1201. // Get any leftovers
  1202. switch (width & 3) {
  1203. case 3:
  1204. *dst++ = RGB565_32(dst, src, map);
  1205. src += 2;
  1206. SDL_FALLTHROUGH;
  1207. case 2:
  1208. *dst++ = RGB565_32(dst, src, map);
  1209. src += 2;
  1210. SDL_FALLTHROUGH;
  1211. case 1:
  1212. *dst++ = RGB565_32(dst, src, map);
  1213. src += 2;
  1214. break;
  1215. }
  1216. src += srcskip;
  1217. dst += dstskip;
  1218. }
  1219. #endif // USE_DUFFS_LOOP
  1220. }
  1221. // This is the code used to generate the lookup tables below:
  1222. #if 0
  1223. #include <SDL3/SDL.h>
  1224. #include <stdio.h>
  1225. #define GENERATE_SHIFTS
  1226. static Uint32 Calculate(int v, int bits, int vmax, int shift)
  1227. {
  1228. #if defined(GENERATE_FLOOR)
  1229. return (Uint32)SDL_floor(v * 255.0f / vmax) << shift;
  1230. #elif defined(GENERATE_ROUND)
  1231. return (Uint32)SDL_roundf(v * 255.0f / vmax) << shift;
  1232. #elif defined(GENERATE_SHIFTS)
  1233. switch (bits) {
  1234. case 1:
  1235. v = (v << 7) | (v << 6) | (v << 5) | (v << 4) | (v << 3) | (v << 2) | (v << 1) | v;
  1236. break;
  1237. case 2:
  1238. v = (v << 6) | (v << 4) | (v << 2) | v;
  1239. break;
  1240. case 3:
  1241. v = (v << 5) | (v << 2) | (v >> 1);
  1242. break;
  1243. case 4:
  1244. v = (v << 4) | v;
  1245. break;
  1246. case 5:
  1247. v = (v << 3) | (v >> 2);
  1248. break;
  1249. case 6:
  1250. v = (v << 2) | (v >> 4);
  1251. break;
  1252. case 7:
  1253. v = (v << 1) | (v >> 6);
  1254. break;
  1255. case 8:
  1256. break;
  1257. }
  1258. return (Uint32)v << shift;
  1259. #endif
  1260. }
  1261. static Uint32 CalculateARGB(int v, const SDL_PixelFormatDetails *sfmt, const SDL_PixelFormatDetails *dfmt)
  1262. {
  1263. Uint8 r = (v & sfmt->Rmask) >> sfmt->Rshift;
  1264. Uint8 g = (v & sfmt->Gmask) >> sfmt->Gshift;
  1265. Uint8 b = (v & sfmt->Bmask) >> sfmt->Bshift;
  1266. return dfmt->Amask |
  1267. Calculate(r, sfmt->Rbits, (1 << sfmt->Rbits) - 1, dfmt->Rshift) |
  1268. Calculate(g, sfmt->Gbits, (1 << sfmt->Gbits) - 1, dfmt->Gshift) |
  1269. Calculate(b, sfmt->Bbits, (1 << sfmt->Bbits) - 1, dfmt->Bshift);
  1270. }
  1271. static void GenerateLUT(SDL_PixelFormat src, SDL_PixelFormat dst)
  1272. {
  1273. static Uint32 lut[512];
  1274. const char *src_name = SDL_GetPixelFormatName(src) + 16;
  1275. const char *dst_name = SDL_GetPixelFormatName(dst) + 16;
  1276. const SDL_PixelFormatDetails *sfmt = SDL_GetPixelFormatDetails(src);
  1277. const SDL_PixelFormatDetails *dfmt = SDL_GetPixelFormatDetails(dst);
  1278. int i;
  1279. for (i = 0; i < 256; ++i) {
  1280. lut[i * 2] = CalculateARGB(i, sfmt, dfmt);
  1281. lut[i * 2 + 1] = CalculateARGB(i << 8, sfmt, dfmt);
  1282. }
  1283. printf("// Special optimized blit for %s -> %s\n\n", src_name, dst_name);
  1284. printf("static const Uint32 %s_%s_LUT[%d] = {", src_name, dst_name, (int)SDL_arraysize(lut));
  1285. for (i = 0; i < SDL_arraysize(lut); ++i) {
  1286. if ((i % 8) == 0) {
  1287. printf("\n ");
  1288. }
  1289. printf("0x%.8x", lut[i]);
  1290. if (i < (SDL_arraysize(lut) - 1)) {
  1291. printf(",");
  1292. if (((i + 1) % 8) != 0) {
  1293. printf(" ");
  1294. }
  1295. }
  1296. }
  1297. printf("\n};\n\n");
  1298. }
  1299. int main(int argc, char *argv[])
  1300. {
  1301. GenerateLUT(SDL_PIXELFORMAT_RGB565, SDL_PIXELFORMAT_ARGB8888);
  1302. GenerateLUT(SDL_PIXELFORMAT_RGB565, SDL_PIXELFORMAT_ABGR8888);
  1303. GenerateLUT(SDL_PIXELFORMAT_RGB565, SDL_PIXELFORMAT_RGBA8888);
  1304. GenerateLUT(SDL_PIXELFORMAT_RGB565, SDL_PIXELFORMAT_BGRA8888);
  1305. }
  1306. #endif // 0
  1307. /* *INDENT-OFF* */ // clang-format off
  1308. // Special optimized blit for RGB565 -> ARGB8888
  1309. static const Uint32 RGB565_ARGB8888_LUT[512] = {
  1310. 0xff000000, 0xff000000, 0xff000008, 0xff002000, 0xff000010, 0xff004100, 0xff000018, 0xff006100,
  1311. 0xff000021, 0xff008200, 0xff000029, 0xff00a200, 0xff000031, 0xff00c300, 0xff000039, 0xff00e300,
  1312. 0xff000042, 0xff080000, 0xff00004a, 0xff082000, 0xff000052, 0xff084100, 0xff00005a, 0xff086100,
  1313. 0xff000063, 0xff088200, 0xff00006b, 0xff08a200, 0xff000073, 0xff08c300, 0xff00007b, 0xff08e300,
  1314. 0xff000084, 0xff100000, 0xff00008c, 0xff102000, 0xff000094, 0xff104100, 0xff00009c, 0xff106100,
  1315. 0xff0000a5, 0xff108200, 0xff0000ad, 0xff10a200, 0xff0000b5, 0xff10c300, 0xff0000bd, 0xff10e300,
  1316. 0xff0000c6, 0xff180000, 0xff0000ce, 0xff182000, 0xff0000d6, 0xff184100, 0xff0000de, 0xff186100,
  1317. 0xff0000e7, 0xff188200, 0xff0000ef, 0xff18a200, 0xff0000f7, 0xff18c300, 0xff0000ff, 0xff18e300,
  1318. 0xff000400, 0xff210000, 0xff000408, 0xff212000, 0xff000410, 0xff214100, 0xff000418, 0xff216100,
  1319. 0xff000421, 0xff218200, 0xff000429, 0xff21a200, 0xff000431, 0xff21c300, 0xff000439, 0xff21e300,
  1320. 0xff000442, 0xff290000, 0xff00044a, 0xff292000, 0xff000452, 0xff294100, 0xff00045a, 0xff296100,
  1321. 0xff000463, 0xff298200, 0xff00046b, 0xff29a200, 0xff000473, 0xff29c300, 0xff00047b, 0xff29e300,
  1322. 0xff000484, 0xff310000, 0xff00048c, 0xff312000, 0xff000494, 0xff314100, 0xff00049c, 0xff316100,
  1323. 0xff0004a5, 0xff318200, 0xff0004ad, 0xff31a200, 0xff0004b5, 0xff31c300, 0xff0004bd, 0xff31e300,
  1324. 0xff0004c6, 0xff390000, 0xff0004ce, 0xff392000, 0xff0004d6, 0xff394100, 0xff0004de, 0xff396100,
  1325. 0xff0004e7, 0xff398200, 0xff0004ef, 0xff39a200, 0xff0004f7, 0xff39c300, 0xff0004ff, 0xff39e300,
  1326. 0xff000800, 0xff420000, 0xff000808, 0xff422000, 0xff000810, 0xff424100, 0xff000818, 0xff426100,
  1327. 0xff000821, 0xff428200, 0xff000829, 0xff42a200, 0xff000831, 0xff42c300, 0xff000839, 0xff42e300,
  1328. 0xff000842, 0xff4a0000, 0xff00084a, 0xff4a2000, 0xff000852, 0xff4a4100, 0xff00085a, 0xff4a6100,
  1329. 0xff000863, 0xff4a8200, 0xff00086b, 0xff4aa200, 0xff000873, 0xff4ac300, 0xff00087b, 0xff4ae300,
  1330. 0xff000884, 0xff520000, 0xff00088c, 0xff522000, 0xff000894, 0xff524100, 0xff00089c, 0xff526100,
  1331. 0xff0008a5, 0xff528200, 0xff0008ad, 0xff52a200, 0xff0008b5, 0xff52c300, 0xff0008bd, 0xff52e300,
  1332. 0xff0008c6, 0xff5a0000, 0xff0008ce, 0xff5a2000, 0xff0008d6, 0xff5a4100, 0xff0008de, 0xff5a6100,
  1333. 0xff0008e7, 0xff5a8200, 0xff0008ef, 0xff5aa200, 0xff0008f7, 0xff5ac300, 0xff0008ff, 0xff5ae300,
  1334. 0xff000c00, 0xff630000, 0xff000c08, 0xff632000, 0xff000c10, 0xff634100, 0xff000c18, 0xff636100,
  1335. 0xff000c21, 0xff638200, 0xff000c29, 0xff63a200, 0xff000c31, 0xff63c300, 0xff000c39, 0xff63e300,
  1336. 0xff000c42, 0xff6b0000, 0xff000c4a, 0xff6b2000, 0xff000c52, 0xff6b4100, 0xff000c5a, 0xff6b6100,
  1337. 0xff000c63, 0xff6b8200, 0xff000c6b, 0xff6ba200, 0xff000c73, 0xff6bc300, 0xff000c7b, 0xff6be300,
  1338. 0xff000c84, 0xff730000, 0xff000c8c, 0xff732000, 0xff000c94, 0xff734100, 0xff000c9c, 0xff736100,
  1339. 0xff000ca5, 0xff738200, 0xff000cad, 0xff73a200, 0xff000cb5, 0xff73c300, 0xff000cbd, 0xff73e300,
  1340. 0xff000cc6, 0xff7b0000, 0xff000cce, 0xff7b2000, 0xff000cd6, 0xff7b4100, 0xff000cde, 0xff7b6100,
  1341. 0xff000ce7, 0xff7b8200, 0xff000cef, 0xff7ba200, 0xff000cf7, 0xff7bc300, 0xff000cff, 0xff7be300,
  1342. 0xff001000, 0xff840000, 0xff001008, 0xff842000, 0xff001010, 0xff844100, 0xff001018, 0xff846100,
  1343. 0xff001021, 0xff848200, 0xff001029, 0xff84a200, 0xff001031, 0xff84c300, 0xff001039, 0xff84e300,
  1344. 0xff001042, 0xff8c0000, 0xff00104a, 0xff8c2000, 0xff001052, 0xff8c4100, 0xff00105a, 0xff8c6100,
  1345. 0xff001063, 0xff8c8200, 0xff00106b, 0xff8ca200, 0xff001073, 0xff8cc300, 0xff00107b, 0xff8ce300,
  1346. 0xff001084, 0xff940000, 0xff00108c, 0xff942000, 0xff001094, 0xff944100, 0xff00109c, 0xff946100,
  1347. 0xff0010a5, 0xff948200, 0xff0010ad, 0xff94a200, 0xff0010b5, 0xff94c300, 0xff0010bd, 0xff94e300,
  1348. 0xff0010c6, 0xff9c0000, 0xff0010ce, 0xff9c2000, 0xff0010d6, 0xff9c4100, 0xff0010de, 0xff9c6100,
  1349. 0xff0010e7, 0xff9c8200, 0xff0010ef, 0xff9ca200, 0xff0010f7, 0xff9cc300, 0xff0010ff, 0xff9ce300,
  1350. 0xff001400, 0xffa50000, 0xff001408, 0xffa52000, 0xff001410, 0xffa54100, 0xff001418, 0xffa56100,
  1351. 0xff001421, 0xffa58200, 0xff001429, 0xffa5a200, 0xff001431, 0xffa5c300, 0xff001439, 0xffa5e300,
  1352. 0xff001442, 0xffad0000, 0xff00144a, 0xffad2000, 0xff001452, 0xffad4100, 0xff00145a, 0xffad6100,
  1353. 0xff001463, 0xffad8200, 0xff00146b, 0xffada200, 0xff001473, 0xffadc300, 0xff00147b, 0xffade300,
  1354. 0xff001484, 0xffb50000, 0xff00148c, 0xffb52000, 0xff001494, 0xffb54100, 0xff00149c, 0xffb56100,
  1355. 0xff0014a5, 0xffb58200, 0xff0014ad, 0xffb5a200, 0xff0014b5, 0xffb5c300, 0xff0014bd, 0xffb5e300,
  1356. 0xff0014c6, 0xffbd0000, 0xff0014ce, 0xffbd2000, 0xff0014d6, 0xffbd4100, 0xff0014de, 0xffbd6100,
  1357. 0xff0014e7, 0xffbd8200, 0xff0014ef, 0xffbda200, 0xff0014f7, 0xffbdc300, 0xff0014ff, 0xffbde300,
  1358. 0xff001800, 0xffc60000, 0xff001808, 0xffc62000, 0xff001810, 0xffc64100, 0xff001818, 0xffc66100,
  1359. 0xff001821, 0xffc68200, 0xff001829, 0xffc6a200, 0xff001831, 0xffc6c300, 0xff001839, 0xffc6e300,
  1360. 0xff001842, 0xffce0000, 0xff00184a, 0xffce2000, 0xff001852, 0xffce4100, 0xff00185a, 0xffce6100,
  1361. 0xff001863, 0xffce8200, 0xff00186b, 0xffcea200, 0xff001873, 0xffcec300, 0xff00187b, 0xffcee300,
  1362. 0xff001884, 0xffd60000, 0xff00188c, 0xffd62000, 0xff001894, 0xffd64100, 0xff00189c, 0xffd66100,
  1363. 0xff0018a5, 0xffd68200, 0xff0018ad, 0xffd6a200, 0xff0018b5, 0xffd6c300, 0xff0018bd, 0xffd6e300,
  1364. 0xff0018c6, 0xffde0000, 0xff0018ce, 0xffde2000, 0xff0018d6, 0xffde4100, 0xff0018de, 0xffde6100,
  1365. 0xff0018e7, 0xffde8200, 0xff0018ef, 0xffdea200, 0xff0018f7, 0xffdec300, 0xff0018ff, 0xffdee300,
  1366. 0xff001c00, 0xffe70000, 0xff001c08, 0xffe72000, 0xff001c10, 0xffe74100, 0xff001c18, 0xffe76100,
  1367. 0xff001c21, 0xffe78200, 0xff001c29, 0xffe7a200, 0xff001c31, 0xffe7c300, 0xff001c39, 0xffe7e300,
  1368. 0xff001c42, 0xffef0000, 0xff001c4a, 0xffef2000, 0xff001c52, 0xffef4100, 0xff001c5a, 0xffef6100,
  1369. 0xff001c63, 0xffef8200, 0xff001c6b, 0xffefa200, 0xff001c73, 0xffefc300, 0xff001c7b, 0xffefe300,
  1370. 0xff001c84, 0xfff70000, 0xff001c8c, 0xfff72000, 0xff001c94, 0xfff74100, 0xff001c9c, 0xfff76100,
  1371. 0xff001ca5, 0xfff78200, 0xff001cad, 0xfff7a200, 0xff001cb5, 0xfff7c300, 0xff001cbd, 0xfff7e300,
  1372. 0xff001cc6, 0xffff0000, 0xff001cce, 0xffff2000, 0xff001cd6, 0xffff4100, 0xff001cde, 0xffff6100,
  1373. 0xff001ce7, 0xffff8200, 0xff001cef, 0xffffa200, 0xff001cf7, 0xffffc300, 0xff001cff, 0xffffe300
  1374. };
  1375. static void Blit_RGB565_ARGB8888(SDL_BlitInfo * info)
  1376. {
  1377. Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
  1378. }
  1379. // Special optimized blit for RGB565 -> ABGR8888
  1380. static const Uint32 RGB565_ABGR8888_LUT[512] = {
  1381. 0xff000000, 0xff000000, 0xff080000, 0xff002000, 0xff100000, 0xff004100, 0xff180000, 0xff006100,
  1382. 0xff210000, 0xff008200, 0xff290000, 0xff00a200, 0xff310000, 0xff00c300, 0xff390000, 0xff00e300,
  1383. 0xff420000, 0xff000008, 0xff4a0000, 0xff002008, 0xff520000, 0xff004108, 0xff5a0000, 0xff006108,
  1384. 0xff630000, 0xff008208, 0xff6b0000, 0xff00a208, 0xff730000, 0xff00c308, 0xff7b0000, 0xff00e308,
  1385. 0xff840000, 0xff000010, 0xff8c0000, 0xff002010, 0xff940000, 0xff004110, 0xff9c0000, 0xff006110,
  1386. 0xffa50000, 0xff008210, 0xffad0000, 0xff00a210, 0xffb50000, 0xff00c310, 0xffbd0000, 0xff00e310,
  1387. 0xffc60000, 0xff000018, 0xffce0000, 0xff002018, 0xffd60000, 0xff004118, 0xffde0000, 0xff006118,
  1388. 0xffe70000, 0xff008218, 0xffef0000, 0xff00a218, 0xfff70000, 0xff00c318, 0xffff0000, 0xff00e318,
  1389. 0xff000400, 0xff000021, 0xff080400, 0xff002021, 0xff100400, 0xff004121, 0xff180400, 0xff006121,
  1390. 0xff210400, 0xff008221, 0xff290400, 0xff00a221, 0xff310400, 0xff00c321, 0xff390400, 0xff00e321,
  1391. 0xff420400, 0xff000029, 0xff4a0400, 0xff002029, 0xff520400, 0xff004129, 0xff5a0400, 0xff006129,
  1392. 0xff630400, 0xff008229, 0xff6b0400, 0xff00a229, 0xff730400, 0xff00c329, 0xff7b0400, 0xff00e329,
  1393. 0xff840400, 0xff000031, 0xff8c0400, 0xff002031, 0xff940400, 0xff004131, 0xff9c0400, 0xff006131,
  1394. 0xffa50400, 0xff008231, 0xffad0400, 0xff00a231, 0xffb50400, 0xff00c331, 0xffbd0400, 0xff00e331,
  1395. 0xffc60400, 0xff000039, 0xffce0400, 0xff002039, 0xffd60400, 0xff004139, 0xffde0400, 0xff006139,
  1396. 0xffe70400, 0xff008239, 0xffef0400, 0xff00a239, 0xfff70400, 0xff00c339, 0xffff0400, 0xff00e339,
  1397. 0xff000800, 0xff000042, 0xff080800, 0xff002042, 0xff100800, 0xff004142, 0xff180800, 0xff006142,
  1398. 0xff210800, 0xff008242, 0xff290800, 0xff00a242, 0xff310800, 0xff00c342, 0xff390800, 0xff00e342,
  1399. 0xff420800, 0xff00004a, 0xff4a0800, 0xff00204a, 0xff520800, 0xff00414a, 0xff5a0800, 0xff00614a,
  1400. 0xff630800, 0xff00824a, 0xff6b0800, 0xff00a24a, 0xff730800, 0xff00c34a, 0xff7b0800, 0xff00e34a,
  1401. 0xff840800, 0xff000052, 0xff8c0800, 0xff002052, 0xff940800, 0xff004152, 0xff9c0800, 0xff006152,
  1402. 0xffa50800, 0xff008252, 0xffad0800, 0xff00a252, 0xffb50800, 0xff00c352, 0xffbd0800, 0xff00e352,
  1403. 0xffc60800, 0xff00005a, 0xffce0800, 0xff00205a, 0xffd60800, 0xff00415a, 0xffde0800, 0xff00615a,
  1404. 0xffe70800, 0xff00825a, 0xffef0800, 0xff00a25a, 0xfff70800, 0xff00c35a, 0xffff0800, 0xff00e35a,
  1405. 0xff000c00, 0xff000063, 0xff080c00, 0xff002063, 0xff100c00, 0xff004163, 0xff180c00, 0xff006163,
  1406. 0xff210c00, 0xff008263, 0xff290c00, 0xff00a263, 0xff310c00, 0xff00c363, 0xff390c00, 0xff00e363,
  1407. 0xff420c00, 0xff00006b, 0xff4a0c00, 0xff00206b, 0xff520c00, 0xff00416b, 0xff5a0c00, 0xff00616b,
  1408. 0xff630c00, 0xff00826b, 0xff6b0c00, 0xff00a26b, 0xff730c00, 0xff00c36b, 0xff7b0c00, 0xff00e36b,
  1409. 0xff840c00, 0xff000073, 0xff8c0c00, 0xff002073, 0xff940c00, 0xff004173, 0xff9c0c00, 0xff006173,
  1410. 0xffa50c00, 0xff008273, 0xffad0c00, 0xff00a273, 0xffb50c00, 0xff00c373, 0xffbd0c00, 0xff00e373,
  1411. 0xffc60c00, 0xff00007b, 0xffce0c00, 0xff00207b, 0xffd60c00, 0xff00417b, 0xffde0c00, 0xff00617b,
  1412. 0xffe70c00, 0xff00827b, 0xffef0c00, 0xff00a27b, 0xfff70c00, 0xff00c37b, 0xffff0c00, 0xff00e37b,
  1413. 0xff001000, 0xff000084, 0xff081000, 0xff002084, 0xff101000, 0xff004184, 0xff181000, 0xff006184,
  1414. 0xff211000, 0xff008284, 0xff291000, 0xff00a284, 0xff311000, 0xff00c384, 0xff391000, 0xff00e384,
  1415. 0xff421000, 0xff00008c, 0xff4a1000, 0xff00208c, 0xff521000, 0xff00418c, 0xff5a1000, 0xff00618c,
  1416. 0xff631000, 0xff00828c, 0xff6b1000, 0xff00a28c, 0xff731000, 0xff00c38c, 0xff7b1000, 0xff00e38c,
  1417. 0xff841000, 0xff000094, 0xff8c1000, 0xff002094, 0xff941000, 0xff004194, 0xff9c1000, 0xff006194,
  1418. 0xffa51000, 0xff008294, 0xffad1000, 0xff00a294, 0xffb51000, 0xff00c394, 0xffbd1000, 0xff00e394,
  1419. 0xffc61000, 0xff00009c, 0xffce1000, 0xff00209c, 0xffd61000, 0xff00419c, 0xffde1000, 0xff00619c,
  1420. 0xffe71000, 0xff00829c, 0xffef1000, 0xff00a29c, 0xfff71000, 0xff00c39c, 0xffff1000, 0xff00e39c,
  1421. 0xff001400, 0xff0000a5, 0xff081400, 0xff0020a5, 0xff101400, 0xff0041a5, 0xff181400, 0xff0061a5,
  1422. 0xff211400, 0xff0082a5, 0xff291400, 0xff00a2a5, 0xff311400, 0xff00c3a5, 0xff391400, 0xff00e3a5,
  1423. 0xff421400, 0xff0000ad, 0xff4a1400, 0xff0020ad, 0xff521400, 0xff0041ad, 0xff5a1400, 0xff0061ad,
  1424. 0xff631400, 0xff0082ad, 0xff6b1400, 0xff00a2ad, 0xff731400, 0xff00c3ad, 0xff7b1400, 0xff00e3ad,
  1425. 0xff841400, 0xff0000b5, 0xff8c1400, 0xff0020b5, 0xff941400, 0xff0041b5, 0xff9c1400, 0xff0061b5,
  1426. 0xffa51400, 0xff0082b5, 0xffad1400, 0xff00a2b5, 0xffb51400, 0xff00c3b5, 0xffbd1400, 0xff00e3b5,
  1427. 0xffc61400, 0xff0000bd, 0xffce1400, 0xff0020bd, 0xffd61400, 0xff0041bd, 0xffde1400, 0xff0061bd,
  1428. 0xffe71400, 0xff0082bd, 0xffef1400, 0xff00a2bd, 0xfff71400, 0xff00c3bd, 0xffff1400, 0xff00e3bd,
  1429. 0xff001800, 0xff0000c6, 0xff081800, 0xff0020c6, 0xff101800, 0xff0041c6, 0xff181800, 0xff0061c6,
  1430. 0xff211800, 0xff0082c6, 0xff291800, 0xff00a2c6, 0xff311800, 0xff00c3c6, 0xff391800, 0xff00e3c6,
  1431. 0xff421800, 0xff0000ce, 0xff4a1800, 0xff0020ce, 0xff521800, 0xff0041ce, 0xff5a1800, 0xff0061ce,
  1432. 0xff631800, 0xff0082ce, 0xff6b1800, 0xff00a2ce, 0xff731800, 0xff00c3ce, 0xff7b1800, 0xff00e3ce,
  1433. 0xff841800, 0xff0000d6, 0xff8c1800, 0xff0020d6, 0xff941800, 0xff0041d6, 0xff9c1800, 0xff0061d6,
  1434. 0xffa51800, 0xff0082d6, 0xffad1800, 0xff00a2d6, 0xffb51800, 0xff00c3d6, 0xffbd1800, 0xff00e3d6,
  1435. 0xffc61800, 0xff0000de, 0xffce1800, 0xff0020de, 0xffd61800, 0xff0041de, 0xffde1800, 0xff0061de,
  1436. 0xffe71800, 0xff0082de, 0xffef1800, 0xff00a2de, 0xfff71800, 0xff00c3de, 0xffff1800, 0xff00e3de,
  1437. 0xff001c00, 0xff0000e7, 0xff081c00, 0xff0020e7, 0xff101c00, 0xff0041e7, 0xff181c00, 0xff0061e7,
  1438. 0xff211c00, 0xff0082e7, 0xff291c00, 0xff00a2e7, 0xff311c00, 0xff00c3e7, 0xff391c00, 0xff00e3e7,
  1439. 0xff421c00, 0xff0000ef, 0xff4a1c00, 0xff0020ef, 0xff521c00, 0xff0041ef, 0xff5a1c00, 0xff0061ef,
  1440. 0xff631c00, 0xff0082ef, 0xff6b1c00, 0xff00a2ef, 0xff731c00, 0xff00c3ef, 0xff7b1c00, 0xff00e3ef,
  1441. 0xff841c00, 0xff0000f7, 0xff8c1c00, 0xff0020f7, 0xff941c00, 0xff0041f7, 0xff9c1c00, 0xff0061f7,
  1442. 0xffa51c00, 0xff0082f7, 0xffad1c00, 0xff00a2f7, 0xffb51c00, 0xff00c3f7, 0xffbd1c00, 0xff00e3f7,
  1443. 0xffc61c00, 0xff0000ff, 0xffce1c00, 0xff0020ff, 0xffd61c00, 0xff0041ff, 0xffde1c00, 0xff0061ff,
  1444. 0xffe71c00, 0xff0082ff, 0xffef1c00, 0xff00a2ff, 0xfff71c00, 0xff00c3ff, 0xffff1c00, 0xff00e3ff
  1445. };
  1446. static void Blit_RGB565_ABGR8888(SDL_BlitInfo * info)
  1447. {
  1448. Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
  1449. }
  1450. // Special optimized blit for RGB565 -> RGBA8888
  1451. static const Uint32 RGB565_RGBA8888_LUT[512] = {
  1452. 0x000000ff, 0x000000ff, 0x000008ff, 0x002000ff, 0x000010ff, 0x004100ff, 0x000018ff, 0x006100ff,
  1453. 0x000021ff, 0x008200ff, 0x000029ff, 0x00a200ff, 0x000031ff, 0x00c300ff, 0x000039ff, 0x00e300ff,
  1454. 0x000042ff, 0x080000ff, 0x00004aff, 0x082000ff, 0x000052ff, 0x084100ff, 0x00005aff, 0x086100ff,
  1455. 0x000063ff, 0x088200ff, 0x00006bff, 0x08a200ff, 0x000073ff, 0x08c300ff, 0x00007bff, 0x08e300ff,
  1456. 0x000084ff, 0x100000ff, 0x00008cff, 0x102000ff, 0x000094ff, 0x104100ff, 0x00009cff, 0x106100ff,
  1457. 0x0000a5ff, 0x108200ff, 0x0000adff, 0x10a200ff, 0x0000b5ff, 0x10c300ff, 0x0000bdff, 0x10e300ff,
  1458. 0x0000c6ff, 0x180000ff, 0x0000ceff, 0x182000ff, 0x0000d6ff, 0x184100ff, 0x0000deff, 0x186100ff,
  1459. 0x0000e7ff, 0x188200ff, 0x0000efff, 0x18a200ff, 0x0000f7ff, 0x18c300ff, 0x0000ffff, 0x18e300ff,
  1460. 0x000400ff, 0x210000ff, 0x000408ff, 0x212000ff, 0x000410ff, 0x214100ff, 0x000418ff, 0x216100ff,
  1461. 0x000421ff, 0x218200ff, 0x000429ff, 0x21a200ff, 0x000431ff, 0x21c300ff, 0x000439ff, 0x21e300ff,
  1462. 0x000442ff, 0x290000ff, 0x00044aff, 0x292000ff, 0x000452ff, 0x294100ff, 0x00045aff, 0x296100ff,
  1463. 0x000463ff, 0x298200ff, 0x00046bff, 0x29a200ff, 0x000473ff, 0x29c300ff, 0x00047bff, 0x29e300ff,
  1464. 0x000484ff, 0x310000ff, 0x00048cff, 0x312000ff, 0x000494ff, 0x314100ff, 0x00049cff, 0x316100ff,
  1465. 0x0004a5ff, 0x318200ff, 0x0004adff, 0x31a200ff, 0x0004b5ff, 0x31c300ff, 0x0004bdff, 0x31e300ff,
  1466. 0x0004c6ff, 0x390000ff, 0x0004ceff, 0x392000ff, 0x0004d6ff, 0x394100ff, 0x0004deff, 0x396100ff,
  1467. 0x0004e7ff, 0x398200ff, 0x0004efff, 0x39a200ff, 0x0004f7ff, 0x39c300ff, 0x0004ffff, 0x39e300ff,
  1468. 0x000800ff, 0x420000ff, 0x000808ff, 0x422000ff, 0x000810ff, 0x424100ff, 0x000818ff, 0x426100ff,
  1469. 0x000821ff, 0x428200ff, 0x000829ff, 0x42a200ff, 0x000831ff, 0x42c300ff, 0x000839ff, 0x42e300ff,
  1470. 0x000842ff, 0x4a0000ff, 0x00084aff, 0x4a2000ff, 0x000852ff, 0x4a4100ff, 0x00085aff, 0x4a6100ff,
  1471. 0x000863ff, 0x4a8200ff, 0x00086bff, 0x4aa200ff, 0x000873ff, 0x4ac300ff, 0x00087bff, 0x4ae300ff,
  1472. 0x000884ff, 0x520000ff, 0x00088cff, 0x522000ff, 0x000894ff, 0x524100ff, 0x00089cff, 0x526100ff,
  1473. 0x0008a5ff, 0x528200ff, 0x0008adff, 0x52a200ff, 0x0008b5ff, 0x52c300ff, 0x0008bdff, 0x52e300ff,
  1474. 0x0008c6ff, 0x5a0000ff, 0x0008ceff, 0x5a2000ff, 0x0008d6ff, 0x5a4100ff, 0x0008deff, 0x5a6100ff,
  1475. 0x0008e7ff, 0x5a8200ff, 0x0008efff, 0x5aa200ff, 0x0008f7ff, 0x5ac300ff, 0x0008ffff, 0x5ae300ff,
  1476. 0x000c00ff, 0x630000ff, 0x000c08ff, 0x632000ff, 0x000c10ff, 0x634100ff, 0x000c18ff, 0x636100ff,
  1477. 0x000c21ff, 0x638200ff, 0x000c29ff, 0x63a200ff, 0x000c31ff, 0x63c300ff, 0x000c39ff, 0x63e300ff,
  1478. 0x000c42ff, 0x6b0000ff, 0x000c4aff, 0x6b2000ff, 0x000c52ff, 0x6b4100ff, 0x000c5aff, 0x6b6100ff,
  1479. 0x000c63ff, 0x6b8200ff, 0x000c6bff, 0x6ba200ff, 0x000c73ff, 0x6bc300ff, 0x000c7bff, 0x6be300ff,
  1480. 0x000c84ff, 0x730000ff, 0x000c8cff, 0x732000ff, 0x000c94ff, 0x734100ff, 0x000c9cff, 0x736100ff,
  1481. 0x000ca5ff, 0x738200ff, 0x000cadff, 0x73a200ff, 0x000cb5ff, 0x73c300ff, 0x000cbdff, 0x73e300ff,
  1482. 0x000cc6ff, 0x7b0000ff, 0x000cceff, 0x7b2000ff, 0x000cd6ff, 0x7b4100ff, 0x000cdeff, 0x7b6100ff,
  1483. 0x000ce7ff, 0x7b8200ff, 0x000cefff, 0x7ba200ff, 0x000cf7ff, 0x7bc300ff, 0x000cffff, 0x7be300ff,
  1484. 0x001000ff, 0x840000ff, 0x001008ff, 0x842000ff, 0x001010ff, 0x844100ff, 0x001018ff, 0x846100ff,
  1485. 0x001021ff, 0x848200ff, 0x001029ff, 0x84a200ff, 0x001031ff, 0x84c300ff, 0x001039ff, 0x84e300ff,
  1486. 0x001042ff, 0x8c0000ff, 0x00104aff, 0x8c2000ff, 0x001052ff, 0x8c4100ff, 0x00105aff, 0x8c6100ff,
  1487. 0x001063ff, 0x8c8200ff, 0x00106bff, 0x8ca200ff, 0x001073ff, 0x8cc300ff, 0x00107bff, 0x8ce300ff,
  1488. 0x001084ff, 0x940000ff, 0x00108cff, 0x942000ff, 0x001094ff, 0x944100ff, 0x00109cff, 0x946100ff,
  1489. 0x0010a5ff, 0x948200ff, 0x0010adff, 0x94a200ff, 0x0010b5ff, 0x94c300ff, 0x0010bdff, 0x94e300ff,
  1490. 0x0010c6ff, 0x9c0000ff, 0x0010ceff, 0x9c2000ff, 0x0010d6ff, 0x9c4100ff, 0x0010deff, 0x9c6100ff,
  1491. 0x0010e7ff, 0x9c8200ff, 0x0010efff, 0x9ca200ff, 0x0010f7ff, 0x9cc300ff, 0x0010ffff, 0x9ce300ff,
  1492. 0x001400ff, 0xa50000ff, 0x001408ff, 0xa52000ff, 0x001410ff, 0xa54100ff, 0x001418ff, 0xa56100ff,
  1493. 0x001421ff, 0xa58200ff, 0x001429ff, 0xa5a200ff, 0x001431ff, 0xa5c300ff, 0x001439ff, 0xa5e300ff,
  1494. 0x001442ff, 0xad0000ff, 0x00144aff, 0xad2000ff, 0x001452ff, 0xad4100ff, 0x00145aff, 0xad6100ff,
  1495. 0x001463ff, 0xad8200ff, 0x00146bff, 0xada200ff, 0x001473ff, 0xadc300ff, 0x00147bff, 0xade300ff,
  1496. 0x001484ff, 0xb50000ff, 0x00148cff, 0xb52000ff, 0x001494ff, 0xb54100ff, 0x00149cff, 0xb56100ff,
  1497. 0x0014a5ff, 0xb58200ff, 0x0014adff, 0xb5a200ff, 0x0014b5ff, 0xb5c300ff, 0x0014bdff, 0xb5e300ff,
  1498. 0x0014c6ff, 0xbd0000ff, 0x0014ceff, 0xbd2000ff, 0x0014d6ff, 0xbd4100ff, 0x0014deff, 0xbd6100ff,
  1499. 0x0014e7ff, 0xbd8200ff, 0x0014efff, 0xbda200ff, 0x0014f7ff, 0xbdc300ff, 0x0014ffff, 0xbde300ff,
  1500. 0x001800ff, 0xc60000ff, 0x001808ff, 0xc62000ff, 0x001810ff, 0xc64100ff, 0x001818ff, 0xc66100ff,
  1501. 0x001821ff, 0xc68200ff, 0x001829ff, 0xc6a200ff, 0x001831ff, 0xc6c300ff, 0x001839ff, 0xc6e300ff,
  1502. 0x001842ff, 0xce0000ff, 0x00184aff, 0xce2000ff, 0x001852ff, 0xce4100ff, 0x00185aff, 0xce6100ff,
  1503. 0x001863ff, 0xce8200ff, 0x00186bff, 0xcea200ff, 0x001873ff, 0xcec300ff, 0x00187bff, 0xcee300ff,
  1504. 0x001884ff, 0xd60000ff, 0x00188cff, 0xd62000ff, 0x001894ff, 0xd64100ff, 0x00189cff, 0xd66100ff,
  1505. 0x0018a5ff, 0xd68200ff, 0x0018adff, 0xd6a200ff, 0x0018b5ff, 0xd6c300ff, 0x0018bdff, 0xd6e300ff,
  1506. 0x0018c6ff, 0xde0000ff, 0x0018ceff, 0xde2000ff, 0x0018d6ff, 0xde4100ff, 0x0018deff, 0xde6100ff,
  1507. 0x0018e7ff, 0xde8200ff, 0x0018efff, 0xdea200ff, 0x0018f7ff, 0xdec300ff, 0x0018ffff, 0xdee300ff,
  1508. 0x001c00ff, 0xe70000ff, 0x001c08ff, 0xe72000ff, 0x001c10ff, 0xe74100ff, 0x001c18ff, 0xe76100ff,
  1509. 0x001c21ff, 0xe78200ff, 0x001c29ff, 0xe7a200ff, 0x001c31ff, 0xe7c300ff, 0x001c39ff, 0xe7e300ff,
  1510. 0x001c42ff, 0xef0000ff, 0x001c4aff, 0xef2000ff, 0x001c52ff, 0xef4100ff, 0x001c5aff, 0xef6100ff,
  1511. 0x001c63ff, 0xef8200ff, 0x001c6bff, 0xefa200ff, 0x001c73ff, 0xefc300ff, 0x001c7bff, 0xefe300ff,
  1512. 0x001c84ff, 0xf70000ff, 0x001c8cff, 0xf72000ff, 0x001c94ff, 0xf74100ff, 0x001c9cff, 0xf76100ff,
  1513. 0x001ca5ff, 0xf78200ff, 0x001cadff, 0xf7a200ff, 0x001cb5ff, 0xf7c300ff, 0x001cbdff, 0xf7e300ff,
  1514. 0x001cc6ff, 0xff0000ff, 0x001cceff, 0xff2000ff, 0x001cd6ff, 0xff4100ff, 0x001cdeff, 0xff6100ff,
  1515. 0x001ce7ff, 0xff8200ff, 0x001cefff, 0xffa200ff, 0x001cf7ff, 0xffc300ff, 0x001cffff, 0xffe300ff
  1516. };
  1517. static void Blit_RGB565_RGBA8888(SDL_BlitInfo * info)
  1518. {
  1519. Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
  1520. }
  1521. // Special optimized blit for RGB565 -> BGRA8888
  1522. static const Uint32 RGB565_BGRA8888_LUT[512] = {
  1523. 0x000000ff, 0x000000ff, 0x080000ff, 0x002000ff, 0x100000ff, 0x004100ff, 0x180000ff, 0x006100ff,
  1524. 0x210000ff, 0x008200ff, 0x290000ff, 0x00a200ff, 0x310000ff, 0x00c300ff, 0x390000ff, 0x00e300ff,
  1525. 0x420000ff, 0x000008ff, 0x4a0000ff, 0x002008ff, 0x520000ff, 0x004108ff, 0x5a0000ff, 0x006108ff,
  1526. 0x630000ff, 0x008208ff, 0x6b0000ff, 0x00a208ff, 0x730000ff, 0x00c308ff, 0x7b0000ff, 0x00e308ff,
  1527. 0x840000ff, 0x000010ff, 0x8c0000ff, 0x002010ff, 0x940000ff, 0x004110ff, 0x9c0000ff, 0x006110ff,
  1528. 0xa50000ff, 0x008210ff, 0xad0000ff, 0x00a210ff, 0xb50000ff, 0x00c310ff, 0xbd0000ff, 0x00e310ff,
  1529. 0xc60000ff, 0x000018ff, 0xce0000ff, 0x002018ff, 0xd60000ff, 0x004118ff, 0xde0000ff, 0x006118ff,
  1530. 0xe70000ff, 0x008218ff, 0xef0000ff, 0x00a218ff, 0xf70000ff, 0x00c318ff, 0xff0000ff, 0x00e318ff,
  1531. 0x000400ff, 0x000021ff, 0x080400ff, 0x002021ff, 0x100400ff, 0x004121ff, 0x180400ff, 0x006121ff,
  1532. 0x210400ff, 0x008221ff, 0x290400ff, 0x00a221ff, 0x310400ff, 0x00c321ff, 0x390400ff, 0x00e321ff,
  1533. 0x420400ff, 0x000029ff, 0x4a0400ff, 0x002029ff, 0x520400ff, 0x004129ff, 0x5a0400ff, 0x006129ff,
  1534. 0x630400ff, 0x008229ff, 0x6b0400ff, 0x00a229ff, 0x730400ff, 0x00c329ff, 0x7b0400ff, 0x00e329ff,
  1535. 0x840400ff, 0x000031ff, 0x8c0400ff, 0x002031ff, 0x940400ff, 0x004131ff, 0x9c0400ff, 0x006131ff,
  1536. 0xa50400ff, 0x008231ff, 0xad0400ff, 0x00a231ff, 0xb50400ff, 0x00c331ff, 0xbd0400ff, 0x00e331ff,
  1537. 0xc60400ff, 0x000039ff, 0xce0400ff, 0x002039ff, 0xd60400ff, 0x004139ff, 0xde0400ff, 0x006139ff,
  1538. 0xe70400ff, 0x008239ff, 0xef0400ff, 0x00a239ff, 0xf70400ff, 0x00c339ff, 0xff0400ff, 0x00e339ff,
  1539. 0x000800ff, 0x000042ff, 0x080800ff, 0x002042ff, 0x100800ff, 0x004142ff, 0x180800ff, 0x006142ff,
  1540. 0x210800ff, 0x008242ff, 0x290800ff, 0x00a242ff, 0x310800ff, 0x00c342ff, 0x390800ff, 0x00e342ff,
  1541. 0x420800ff, 0x00004aff, 0x4a0800ff, 0x00204aff, 0x520800ff, 0x00414aff, 0x5a0800ff, 0x00614aff,
  1542. 0x630800ff, 0x00824aff, 0x6b0800ff, 0x00a24aff, 0x730800ff, 0x00c34aff, 0x7b0800ff, 0x00e34aff,
  1543. 0x840800ff, 0x000052ff, 0x8c0800ff, 0x002052ff, 0x940800ff, 0x004152ff, 0x9c0800ff, 0x006152ff,
  1544. 0xa50800ff, 0x008252ff, 0xad0800ff, 0x00a252ff, 0xb50800ff, 0x00c352ff, 0xbd0800ff, 0x00e352ff,
  1545. 0xc60800ff, 0x00005aff, 0xce0800ff, 0x00205aff, 0xd60800ff, 0x00415aff, 0xde0800ff, 0x00615aff,
  1546. 0xe70800ff, 0x00825aff, 0xef0800ff, 0x00a25aff, 0xf70800ff, 0x00c35aff, 0xff0800ff, 0x00e35aff,
  1547. 0x000c00ff, 0x000063ff, 0x080c00ff, 0x002063ff, 0x100c00ff, 0x004163ff, 0x180c00ff, 0x006163ff,
  1548. 0x210c00ff, 0x008263ff, 0x290c00ff, 0x00a263ff, 0x310c00ff, 0x00c363ff, 0x390c00ff, 0x00e363ff,
  1549. 0x420c00ff, 0x00006bff, 0x4a0c00ff, 0x00206bff, 0x520c00ff, 0x00416bff, 0x5a0c00ff, 0x00616bff,
  1550. 0x630c00ff, 0x00826bff, 0x6b0c00ff, 0x00a26bff, 0x730c00ff, 0x00c36bff, 0x7b0c00ff, 0x00e36bff,
  1551. 0x840c00ff, 0x000073ff, 0x8c0c00ff, 0x002073ff, 0x940c00ff, 0x004173ff, 0x9c0c00ff, 0x006173ff,
  1552. 0xa50c00ff, 0x008273ff, 0xad0c00ff, 0x00a273ff, 0xb50c00ff, 0x00c373ff, 0xbd0c00ff, 0x00e373ff,
  1553. 0xc60c00ff, 0x00007bff, 0xce0c00ff, 0x00207bff, 0xd60c00ff, 0x00417bff, 0xde0c00ff, 0x00617bff,
  1554. 0xe70c00ff, 0x00827bff, 0xef0c00ff, 0x00a27bff, 0xf70c00ff, 0x00c37bff, 0xff0c00ff, 0x00e37bff,
  1555. 0x001000ff, 0x000084ff, 0x081000ff, 0x002084ff, 0x101000ff, 0x004184ff, 0x181000ff, 0x006184ff,
  1556. 0x211000ff, 0x008284ff, 0x291000ff, 0x00a284ff, 0x311000ff, 0x00c384ff, 0x391000ff, 0x00e384ff,
  1557. 0x421000ff, 0x00008cff, 0x4a1000ff, 0x00208cff, 0x521000ff, 0x00418cff, 0x5a1000ff, 0x00618cff,
  1558. 0x631000ff, 0x00828cff, 0x6b1000ff, 0x00a28cff, 0x731000ff, 0x00c38cff, 0x7b1000ff, 0x00e38cff,
  1559. 0x841000ff, 0x000094ff, 0x8c1000ff, 0x002094ff, 0x941000ff, 0x004194ff, 0x9c1000ff, 0x006194ff,
  1560. 0xa51000ff, 0x008294ff, 0xad1000ff, 0x00a294ff, 0xb51000ff, 0x00c394ff, 0xbd1000ff, 0x00e394ff,
  1561. 0xc61000ff, 0x00009cff, 0xce1000ff, 0x00209cff, 0xd61000ff, 0x00419cff, 0xde1000ff, 0x00619cff,
  1562. 0xe71000ff, 0x00829cff, 0xef1000ff, 0x00a29cff, 0xf71000ff, 0x00c39cff, 0xff1000ff, 0x00e39cff,
  1563. 0x001400ff, 0x0000a5ff, 0x081400ff, 0x0020a5ff, 0x101400ff, 0x0041a5ff, 0x181400ff, 0x0061a5ff,
  1564. 0x211400ff, 0x0082a5ff, 0x291400ff, 0x00a2a5ff, 0x311400ff, 0x00c3a5ff, 0x391400ff, 0x00e3a5ff,
  1565. 0x421400ff, 0x0000adff, 0x4a1400ff, 0x0020adff, 0x521400ff, 0x0041adff, 0x5a1400ff, 0x0061adff,
  1566. 0x631400ff, 0x0082adff, 0x6b1400ff, 0x00a2adff, 0x731400ff, 0x00c3adff, 0x7b1400ff, 0x00e3adff,
  1567. 0x841400ff, 0x0000b5ff, 0x8c1400ff, 0x0020b5ff, 0x941400ff, 0x0041b5ff, 0x9c1400ff, 0x0061b5ff,
  1568. 0xa51400ff, 0x0082b5ff, 0xad1400ff, 0x00a2b5ff, 0xb51400ff, 0x00c3b5ff, 0xbd1400ff, 0x00e3b5ff,
  1569. 0xc61400ff, 0x0000bdff, 0xce1400ff, 0x0020bdff, 0xd61400ff, 0x0041bdff, 0xde1400ff, 0x0061bdff,
  1570. 0xe71400ff, 0x0082bdff, 0xef1400ff, 0x00a2bdff, 0xf71400ff, 0x00c3bdff, 0xff1400ff, 0x00e3bdff,
  1571. 0x001800ff, 0x0000c6ff, 0x081800ff, 0x0020c6ff, 0x101800ff, 0x0041c6ff, 0x181800ff, 0x0061c6ff,
  1572. 0x211800ff, 0x0082c6ff, 0x291800ff, 0x00a2c6ff, 0x311800ff, 0x00c3c6ff, 0x391800ff, 0x00e3c6ff,
  1573. 0x421800ff, 0x0000ceff, 0x4a1800ff, 0x0020ceff, 0x521800ff, 0x0041ceff, 0x5a1800ff, 0x0061ceff,
  1574. 0x631800ff, 0x0082ceff, 0x6b1800ff, 0x00a2ceff, 0x731800ff, 0x00c3ceff, 0x7b1800ff, 0x00e3ceff,
  1575. 0x841800ff, 0x0000d6ff, 0x8c1800ff, 0x0020d6ff, 0x941800ff, 0x0041d6ff, 0x9c1800ff, 0x0061d6ff,
  1576. 0xa51800ff, 0x0082d6ff, 0xad1800ff, 0x00a2d6ff, 0xb51800ff, 0x00c3d6ff, 0xbd1800ff, 0x00e3d6ff,
  1577. 0xc61800ff, 0x0000deff, 0xce1800ff, 0x0020deff, 0xd61800ff, 0x0041deff, 0xde1800ff, 0x0061deff,
  1578. 0xe71800ff, 0x0082deff, 0xef1800ff, 0x00a2deff, 0xf71800ff, 0x00c3deff, 0xff1800ff, 0x00e3deff,
  1579. 0x001c00ff, 0x0000e7ff, 0x081c00ff, 0x0020e7ff, 0x101c00ff, 0x0041e7ff, 0x181c00ff, 0x0061e7ff,
  1580. 0x211c00ff, 0x0082e7ff, 0x291c00ff, 0x00a2e7ff, 0x311c00ff, 0x00c3e7ff, 0x391c00ff, 0x00e3e7ff,
  1581. 0x421c00ff, 0x0000efff, 0x4a1c00ff, 0x0020efff, 0x521c00ff, 0x0041efff, 0x5a1c00ff, 0x0061efff,
  1582. 0x631c00ff, 0x0082efff, 0x6b1c00ff, 0x00a2efff, 0x731c00ff, 0x00c3efff, 0x7b1c00ff, 0x00e3efff,
  1583. 0x841c00ff, 0x0000f7ff, 0x8c1c00ff, 0x0020f7ff, 0x941c00ff, 0x0041f7ff, 0x9c1c00ff, 0x0061f7ff,
  1584. 0xa51c00ff, 0x0082f7ff, 0xad1c00ff, 0x00a2f7ff, 0xb51c00ff, 0x00c3f7ff, 0xbd1c00ff, 0x00e3f7ff,
  1585. 0xc61c00ff, 0x0000ffff, 0xce1c00ff, 0x0020ffff, 0xd61c00ff, 0x0041ffff, 0xde1c00ff, 0x0061ffff,
  1586. 0xe71c00ff, 0x0082ffff, 0xef1c00ff, 0x00a2ffff, 0xf71c00ff, 0x00c3ffff, 0xff1c00ff, 0x00e3ffff
  1587. };
  1588. static void Blit_RGB565_BGRA8888(SDL_BlitInfo * info)
  1589. {
  1590. Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
  1591. }
  1592. /* *INDENT-ON* */ // clang-format on
  1593. #endif // SDL_HAVE_BLIT_N_RGB565
  1594. // blits 16 bit RGB<->RGBA with both surfaces having the same R,G,B fields
  1595. static void Blit2to2MaskAlpha(SDL_BlitInfo *info)
  1596. {
  1597. int width = info->dst_w;
  1598. int height = info->dst_h;
  1599. Uint16 *src = (Uint16 *)info->src;
  1600. int srcskip = info->src_skip;
  1601. Uint16 *dst = (Uint16 *)info->dst;
  1602. int dstskip = info->dst_skip;
  1603. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  1604. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  1605. if (dstfmt->Amask) {
  1606. // RGB->RGBA, SET_ALPHA
  1607. Uint16 mask = ((Uint32)info->a >> (8 - dstfmt->Abits)) << dstfmt->Ashift;
  1608. while (height--) {
  1609. /* *INDENT-OFF* */ // clang-format off
  1610. DUFFS_LOOP_TRIVIAL(
  1611. {
  1612. *dst = *src | mask;
  1613. ++dst;
  1614. ++src;
  1615. },
  1616. width);
  1617. /* *INDENT-ON* */ // clang-format on
  1618. src = (Uint16 *)((Uint8 *)src + srcskip);
  1619. dst = (Uint16 *)((Uint8 *)dst + dstskip);
  1620. }
  1621. } else {
  1622. // RGBA->RGB, NO_ALPHA
  1623. Uint16 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  1624. while (height--) {
  1625. /* *INDENT-OFF* */ // clang-format off
  1626. DUFFS_LOOP_TRIVIAL(
  1627. {
  1628. *dst = *src & mask;
  1629. ++dst;
  1630. ++src;
  1631. },
  1632. width);
  1633. /* *INDENT-ON* */ // clang-format on
  1634. src = (Uint16 *)((Uint8 *)src + srcskip);
  1635. dst = (Uint16 *)((Uint8 *)dst + dstskip);
  1636. }
  1637. }
  1638. }
  1639. // blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields
  1640. static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
  1641. {
  1642. int width = info->dst_w;
  1643. int height = info->dst_h;
  1644. Uint32 *src = (Uint32 *)info->src;
  1645. int srcskip = info->src_skip;
  1646. Uint32 *dst = (Uint32 *)info->dst;
  1647. int dstskip = info->dst_skip;
  1648. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  1649. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  1650. if (dstfmt->Amask) {
  1651. // RGB->RGBA, SET_ALPHA
  1652. Uint32 mask = ((Uint32)info->a >> (8 - dstfmt->Abits)) << dstfmt->Ashift;
  1653. while (height--) {
  1654. /* *INDENT-OFF* */ // clang-format off
  1655. DUFFS_LOOP_TRIVIAL(
  1656. {
  1657. *dst = *src | mask;
  1658. ++dst;
  1659. ++src;
  1660. },
  1661. width);
  1662. /* *INDENT-ON* */ // clang-format on
  1663. src = (Uint32 *)((Uint8 *)src + srcskip);
  1664. dst = (Uint32 *)((Uint8 *)dst + dstskip);
  1665. }
  1666. } else {
  1667. // RGBA->RGB, NO_ALPHA
  1668. Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  1669. while (height--) {
  1670. /* *INDENT-OFF* */ // clang-format off
  1671. DUFFS_LOOP_TRIVIAL(
  1672. {
  1673. *dst = *src & mask;
  1674. ++dst;
  1675. ++src;
  1676. },
  1677. width);
  1678. /* *INDENT-ON* */ // clang-format on
  1679. src = (Uint32 *)((Uint8 *)src + srcskip);
  1680. dst = (Uint32 *)((Uint8 *)dst + dstskip);
  1681. }
  1682. }
  1683. }
  1684. // permutation for mapping srcfmt to dstfmt, overloading or not the alpha channel
  1685. static void get_permutation(const SDL_PixelFormatDetails *srcfmt, const SDL_PixelFormatDetails *dstfmt,
  1686. int *_p0, int *_p1, int *_p2, int *_p3, int *_alpha_channel)
  1687. {
  1688. int alpha_channel = 0, p0, p1, p2, p3;
  1689. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1690. int Pixel = 0x04030201; // identity permutation
  1691. #else
  1692. int Pixel = 0x01020304; // identity permutation
  1693. int srcbpp = srcfmt->bytes_per_pixel;
  1694. int dstbpp = dstfmt->bytes_per_pixel;
  1695. #endif
  1696. if (srcfmt->Amask) {
  1697. RGBA_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2, p3);
  1698. } else {
  1699. RGB_FROM_PIXEL(Pixel, srcfmt, p0, p1, p2);
  1700. p3 = 0;
  1701. }
  1702. if (dstfmt->Amask) {
  1703. if (srcfmt->Amask) {
  1704. PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, p3);
  1705. } else {
  1706. PIXEL_FROM_RGBA(Pixel, dstfmt, p0, p1, p2, 0);
  1707. }
  1708. } else {
  1709. PIXEL_FROM_RGB(Pixel, dstfmt, p0, p1, p2);
  1710. }
  1711. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1712. p0 = Pixel & 0xFF;
  1713. p1 = (Pixel >> 8) & 0xFF;
  1714. p2 = (Pixel >> 16) & 0xFF;
  1715. p3 = (Pixel >> 24) & 0xFF;
  1716. #else
  1717. p3 = Pixel & 0xFF;
  1718. p2 = (Pixel >> 8) & 0xFF;
  1719. p1 = (Pixel >> 16) & 0xFF;
  1720. p0 = (Pixel >> 24) & 0xFF;
  1721. #endif
  1722. if (p0 == 0) {
  1723. p0 = 1;
  1724. alpha_channel = 0;
  1725. } else if (p1 == 0) {
  1726. p1 = 1;
  1727. alpha_channel = 1;
  1728. } else if (p2 == 0) {
  1729. p2 = 1;
  1730. alpha_channel = 2;
  1731. } else if (p3 == 0) {
  1732. p3 = 1;
  1733. alpha_channel = 3;
  1734. }
  1735. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  1736. #else
  1737. if (srcbpp == 3 && dstbpp == 4) {
  1738. if (p0 != 1) {
  1739. p0--;
  1740. }
  1741. if (p1 != 1) {
  1742. p1--;
  1743. }
  1744. if (p2 != 1) {
  1745. p2--;
  1746. }
  1747. if (p3 != 1) {
  1748. p3--;
  1749. }
  1750. } else if (srcbpp == 4 && dstbpp == 3) {
  1751. p0 = p1;
  1752. p1 = p2;
  1753. p2 = p3;
  1754. }
  1755. #endif
  1756. *_p0 = p0 - 1;
  1757. *_p1 = p1 - 1;
  1758. *_p2 = p2 - 1;
  1759. *_p3 = p3 - 1;
  1760. if (_alpha_channel) {
  1761. *_alpha_channel = alpha_channel;
  1762. }
  1763. }
  1764. static void BlitNtoN(SDL_BlitInfo *info)
  1765. {
  1766. int width = info->dst_w;
  1767. int height = info->dst_h;
  1768. Uint8 *src = info->src;
  1769. int srcskip = info->src_skip;
  1770. Uint8 *dst = info->dst;
  1771. int dstskip = info->dst_skip;
  1772. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  1773. int srcbpp = srcfmt->bytes_per_pixel;
  1774. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  1775. int dstbpp = dstfmt->bytes_per_pixel;
  1776. unsigned alpha = dstfmt->Amask ? info->a : 0;
  1777. #if HAVE_FAST_WRITE_INT8
  1778. // Blit with permutation: 4->4
  1779. if (srcbpp == 4 && dstbpp == 4 &&
  1780. !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) &&
  1781. !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
  1782. // Find the appropriate permutation
  1783. int alpha_channel, p0, p1, p2, p3;
  1784. get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
  1785. while (height--) {
  1786. /* *INDENT-OFF* */ // clang-format off
  1787. DUFFS_LOOP(
  1788. {
  1789. dst[0] = src[p0];
  1790. dst[1] = src[p1];
  1791. dst[2] = src[p2];
  1792. dst[3] = src[p3];
  1793. dst[alpha_channel] = (Uint8)alpha;
  1794. src += 4;
  1795. dst += 4;
  1796. }, width);
  1797. /* *INDENT-ON* */ // clang-format on
  1798. src += srcskip;
  1799. dst += dstskip;
  1800. }
  1801. return;
  1802. }
  1803. #endif
  1804. // Blit with permutation: 4->3
  1805. if (srcbpp == 4 && dstbpp == 3 &&
  1806. !SDL_ISPIXELFORMAT_10BIT(srcfmt->format)) {
  1807. // Find the appropriate permutation
  1808. int p0, p1, p2, p3;
  1809. get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
  1810. while (height--) {
  1811. /* *INDENT-OFF* */ // clang-format off
  1812. DUFFS_LOOP(
  1813. {
  1814. dst[0] = src[p0];
  1815. dst[1] = src[p1];
  1816. dst[2] = src[p2];
  1817. src += 4;
  1818. dst += 3;
  1819. }, width);
  1820. /* *INDENT-ON* */ // clang-format on
  1821. src += srcskip;
  1822. dst += dstskip;
  1823. }
  1824. return;
  1825. }
  1826. #if HAVE_FAST_WRITE_INT8
  1827. // Blit with permutation: 3->4
  1828. if (srcbpp == 3 && dstbpp == 4 &&
  1829. !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
  1830. // Find the appropriate permutation
  1831. int alpha_channel, p0, p1, p2, p3;
  1832. get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
  1833. while (height--) {
  1834. /* *INDENT-OFF* */ // clang-format off
  1835. DUFFS_LOOP(
  1836. {
  1837. dst[0] = src[p0];
  1838. dst[1] = src[p1];
  1839. dst[2] = src[p2];
  1840. dst[3] = src[p3];
  1841. dst[alpha_channel] = (Uint8)alpha;
  1842. src += 3;
  1843. dst += 4;
  1844. }, width);
  1845. /* *INDENT-ON* */ // clang-format on
  1846. src += srcskip;
  1847. dst += dstskip;
  1848. }
  1849. return;
  1850. }
  1851. #endif
  1852. while (height--) {
  1853. /* *INDENT-OFF* */ // clang-format off
  1854. DUFFS_LOOP(
  1855. {
  1856. Uint32 Pixel;
  1857. unsigned sR;
  1858. unsigned sG;
  1859. unsigned sB;
  1860. DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
  1861. ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  1862. dst += dstbpp;
  1863. src += srcbpp;
  1864. },
  1865. width);
  1866. /* *INDENT-ON* */ // clang-format on
  1867. src += srcskip;
  1868. dst += dstskip;
  1869. }
  1870. }
  1871. static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
  1872. {
  1873. int width = info->dst_w;
  1874. int height = info->dst_h;
  1875. Uint8 *src = info->src;
  1876. int srcskip = info->src_skip;
  1877. Uint8 *dst = info->dst;
  1878. int dstskip = info->dst_skip;
  1879. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  1880. int srcbpp = srcfmt->bytes_per_pixel;
  1881. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  1882. int dstbpp = dstfmt->bytes_per_pixel;
  1883. int c;
  1884. #if HAVE_FAST_WRITE_INT8
  1885. // Blit with permutation: 4->4
  1886. if (srcbpp == 4 && dstbpp == 4 &&
  1887. !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) &&
  1888. !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
  1889. // Find the appropriate permutation
  1890. int p0, p1, p2, p3;
  1891. get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
  1892. while (height--) {
  1893. /* *INDENT-OFF* */ // clang-format off
  1894. DUFFS_LOOP(
  1895. {
  1896. dst[0] = src[p0];
  1897. dst[1] = src[p1];
  1898. dst[2] = src[p2];
  1899. dst[3] = src[p3];
  1900. src += 4;
  1901. dst += 4;
  1902. }, width);
  1903. /* *INDENT-ON* */ // clang-format on
  1904. src += srcskip;
  1905. dst += dstskip;
  1906. }
  1907. return;
  1908. }
  1909. #endif
  1910. while (height--) {
  1911. for (c = width; c; --c) {
  1912. Uint32 Pixel;
  1913. unsigned sR, sG, sB, sA;
  1914. DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  1915. ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  1916. dst += dstbpp;
  1917. src += srcbpp;
  1918. }
  1919. src += srcskip;
  1920. dst += dstskip;
  1921. }
  1922. }
  1923. static void Blit2to2Key(SDL_BlitInfo *info)
  1924. {
  1925. int width = info->dst_w;
  1926. int height = info->dst_h;
  1927. Uint16 *srcp = (Uint16 *)info->src;
  1928. int srcskip = info->src_skip;
  1929. Uint16 *dstp = (Uint16 *)info->dst;
  1930. int dstskip = info->dst_skip;
  1931. Uint32 ckey = info->colorkey;
  1932. Uint32 rgbmask = ~info->src_fmt->Amask;
  1933. // Set up some basic variables
  1934. srcskip /= 2;
  1935. dstskip /= 2;
  1936. ckey &= rgbmask;
  1937. while (height--) {
  1938. /* *INDENT-OFF* */ // clang-format off
  1939. DUFFS_LOOP_TRIVIAL(
  1940. {
  1941. if ( (*srcp & rgbmask) != ckey ) {
  1942. *dstp = *srcp;
  1943. }
  1944. dstp++;
  1945. srcp++;
  1946. },
  1947. width);
  1948. /* *INDENT-ON* */ // clang-format on
  1949. srcp += srcskip;
  1950. dstp += dstskip;
  1951. }
  1952. }
  1953. static void BlitNtoNKey(SDL_BlitInfo *info)
  1954. {
  1955. int width = info->dst_w;
  1956. int height = info->dst_h;
  1957. Uint8 *src = info->src;
  1958. int srcskip = info->src_skip;
  1959. Uint8 *dst = info->dst;
  1960. int dstskip = info->dst_skip;
  1961. Uint32 ckey = info->colorkey;
  1962. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  1963. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  1964. int srcbpp = srcfmt->bytes_per_pixel;
  1965. int dstbpp = dstfmt->bytes_per_pixel;
  1966. unsigned alpha = dstfmt->Amask ? info->a : 0;
  1967. Uint32 rgbmask = ~srcfmt->Amask;
  1968. int sfmt = srcfmt->format;
  1969. int dfmt = dstfmt->format;
  1970. // Set up some basic variables
  1971. ckey &= rgbmask;
  1972. // BPP 4, same rgb
  1973. if (srcbpp == 4 && dstbpp == 4 && srcfmt->Rmask == dstfmt->Rmask && srcfmt->Gmask == dstfmt->Gmask && srcfmt->Bmask == dstfmt->Bmask) {
  1974. Uint32 *src32 = (Uint32 *)src;
  1975. Uint32 *dst32 = (Uint32 *)dst;
  1976. if (dstfmt->Amask) {
  1977. // RGB->RGBA, SET_ALPHA
  1978. Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift;
  1979. while (height--) {
  1980. /* *INDENT-OFF* */ // clang-format off
  1981. DUFFS_LOOP_TRIVIAL(
  1982. {
  1983. if ((*src32 & rgbmask) != ckey) {
  1984. *dst32 = *src32 | mask;
  1985. }
  1986. ++dst32;
  1987. ++src32;
  1988. }, width);
  1989. /* *INDENT-ON* */ // clang-format on
  1990. src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
  1991. dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
  1992. }
  1993. return;
  1994. } else {
  1995. // RGBA->RGB, NO_ALPHA
  1996. Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
  1997. while (height--) {
  1998. /* *INDENT-OFF* */ // clang-format off
  1999. DUFFS_LOOP_TRIVIAL(
  2000. {
  2001. if ((*src32 & rgbmask) != ckey) {
  2002. *dst32 = *src32 & mask;
  2003. }
  2004. ++dst32;
  2005. ++src32;
  2006. }, width);
  2007. /* *INDENT-ON* */ // clang-format on
  2008. src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
  2009. dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
  2010. }
  2011. return;
  2012. }
  2013. }
  2014. #if HAVE_FAST_WRITE_INT8
  2015. // Blit with permutation: 4->4
  2016. if (srcbpp == 4 && dstbpp == 4 &&
  2017. !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) &&
  2018. !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
  2019. // Find the appropriate permutation
  2020. int alpha_channel, p0, p1, p2, p3;
  2021. get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
  2022. while (height--) {
  2023. /* *INDENT-OFF* */ // clang-format off
  2024. DUFFS_LOOP(
  2025. {
  2026. Uint32 *src32 = (Uint32 *)src;
  2027. if ((*src32 & rgbmask) != ckey) {
  2028. dst[0] = src[p0];
  2029. dst[1] = src[p1];
  2030. dst[2] = src[p2];
  2031. dst[3] = src[p3];
  2032. dst[alpha_channel] = (Uint8)alpha;
  2033. }
  2034. src += 4;
  2035. dst += 4;
  2036. }, width);
  2037. /* *INDENT-ON* */ // clang-format on
  2038. src += srcskip;
  2039. dst += dstskip;
  2040. }
  2041. return;
  2042. }
  2043. #endif
  2044. // BPP 3, same rgb triplet
  2045. if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_RGB24) ||
  2046. (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_BGR24)) {
  2047. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2048. Uint8 k0 = ckey & 0xFF;
  2049. Uint8 k1 = (ckey >> 8) & 0xFF;
  2050. Uint8 k2 = (ckey >> 16) & 0xFF;
  2051. #else
  2052. Uint8 k0 = (ckey >> 16) & 0xFF;
  2053. Uint8 k1 = (ckey >> 8) & 0xFF;
  2054. Uint8 k2 = ckey & 0xFF;
  2055. #endif
  2056. while (height--) {
  2057. /* *INDENT-OFF* */ // clang-format off
  2058. DUFFS_LOOP(
  2059. {
  2060. Uint8 s0 = src[0];
  2061. Uint8 s1 = src[1];
  2062. Uint8 s2 = src[2];
  2063. if (k0 != s0 || k1 != s1 || k2 != s2) {
  2064. dst[0] = s0;
  2065. dst[1] = s1;
  2066. dst[2] = s2;
  2067. }
  2068. src += 3;
  2069. dst += 3;
  2070. },
  2071. width);
  2072. /* *INDENT-ON* */ // clang-format on
  2073. src += srcskip;
  2074. dst += dstskip;
  2075. }
  2076. return;
  2077. }
  2078. // BPP 3, inversed rgb triplet
  2079. if ((sfmt == SDL_PIXELFORMAT_RGB24 && dfmt == SDL_PIXELFORMAT_BGR24) ||
  2080. (sfmt == SDL_PIXELFORMAT_BGR24 && dfmt == SDL_PIXELFORMAT_RGB24)) {
  2081. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2082. Uint8 k0 = ckey & 0xFF;
  2083. Uint8 k1 = (ckey >> 8) & 0xFF;
  2084. Uint8 k2 = (ckey >> 16) & 0xFF;
  2085. #else
  2086. Uint8 k0 = (ckey >> 16) & 0xFF;
  2087. Uint8 k1 = (ckey >> 8) & 0xFF;
  2088. Uint8 k2 = ckey & 0xFF;
  2089. #endif
  2090. while (height--) {
  2091. /* *INDENT-OFF* */ // clang-format off
  2092. DUFFS_LOOP(
  2093. {
  2094. Uint8 s0 = src[0];
  2095. Uint8 s1 = src[1];
  2096. Uint8 s2 = src[2];
  2097. if (k0 != s0 || k1 != s1 || k2 != s2) {
  2098. // Inversed RGB
  2099. dst[0] = s2;
  2100. dst[1] = s1;
  2101. dst[2] = s0;
  2102. }
  2103. src += 3;
  2104. dst += 3;
  2105. },
  2106. width);
  2107. /* *INDENT-ON* */ // clang-format on
  2108. src += srcskip;
  2109. dst += dstskip;
  2110. }
  2111. return;
  2112. }
  2113. // Blit with permutation: 4->3
  2114. if (srcbpp == 4 && dstbpp == 3 &&
  2115. !SDL_ISPIXELFORMAT_10BIT(srcfmt->format)) {
  2116. // Find the appropriate permutation
  2117. int p0, p1, p2, p3;
  2118. get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
  2119. while (height--) {
  2120. /* *INDENT-OFF* */ // clang-format off
  2121. DUFFS_LOOP(
  2122. {
  2123. Uint32 *src32 = (Uint32 *)src;
  2124. if ((*src32 & rgbmask) != ckey) {
  2125. dst[0] = src[p0];
  2126. dst[1] = src[p1];
  2127. dst[2] = src[p2];
  2128. }
  2129. src += 4;
  2130. dst += 3;
  2131. }, width);
  2132. /* *INDENT-ON* */ // clang-format on
  2133. src += srcskip;
  2134. dst += dstskip;
  2135. }
  2136. return;
  2137. }
  2138. #if HAVE_FAST_WRITE_INT8
  2139. // Blit with permutation: 3->4
  2140. if (srcbpp == 3 && dstbpp == 4 &&
  2141. !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
  2142. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2143. Uint8 k0 = ckey & 0xFF;
  2144. Uint8 k1 = (ckey >> 8) & 0xFF;
  2145. Uint8 k2 = (ckey >> 16) & 0xFF;
  2146. #else
  2147. Uint8 k0 = (ckey >> 16) & 0xFF;
  2148. Uint8 k1 = (ckey >> 8) & 0xFF;
  2149. Uint8 k2 = ckey & 0xFF;
  2150. #endif
  2151. // Find the appropriate permutation
  2152. int alpha_channel, p0, p1, p2, p3;
  2153. get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, &alpha_channel);
  2154. while (height--) {
  2155. /* *INDENT-OFF* */ // clang-format off
  2156. DUFFS_LOOP(
  2157. {
  2158. Uint8 s0 = src[0];
  2159. Uint8 s1 = src[1];
  2160. Uint8 s2 = src[2];
  2161. if (k0 != s0 || k1 != s1 || k2 != s2) {
  2162. dst[0] = src[p0];
  2163. dst[1] = src[p1];
  2164. dst[2] = src[p2];
  2165. dst[3] = src[p3];
  2166. dst[alpha_channel] = (Uint8)alpha;
  2167. }
  2168. src += 3;
  2169. dst += 4;
  2170. }, width);
  2171. /* *INDENT-ON* */ // clang-format on
  2172. src += srcskip;
  2173. dst += dstskip;
  2174. }
  2175. return;
  2176. }
  2177. #endif
  2178. while (height--) {
  2179. /* *INDENT-OFF* */ // clang-format off
  2180. DUFFS_LOOP(
  2181. {
  2182. Uint32 Pixel;
  2183. unsigned sR;
  2184. unsigned sG;
  2185. unsigned sB;
  2186. RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
  2187. if ( (Pixel & rgbmask) != ckey ) {
  2188. RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
  2189. ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
  2190. }
  2191. dst += dstbpp;
  2192. src += srcbpp;
  2193. },
  2194. width);
  2195. /* *INDENT-ON* */ // clang-format on
  2196. src += srcskip;
  2197. dst += dstskip;
  2198. }
  2199. }
  2200. static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
  2201. {
  2202. int width = info->dst_w;
  2203. int height = info->dst_h;
  2204. Uint8 *src = info->src;
  2205. int srcskip = info->src_skip;
  2206. Uint8 *dst = info->dst;
  2207. int dstskip = info->dst_skip;
  2208. Uint32 ckey = info->colorkey;
  2209. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  2210. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  2211. Uint32 rgbmask = ~srcfmt->Amask;
  2212. Uint8 srcbpp;
  2213. Uint8 dstbpp;
  2214. Uint32 Pixel;
  2215. unsigned sR, sG, sB, sA;
  2216. // Set up some basic variables
  2217. srcbpp = srcfmt->bytes_per_pixel;
  2218. dstbpp = dstfmt->bytes_per_pixel;
  2219. ckey &= rgbmask;
  2220. // Fastpath: same source/destination format, with Amask, bpp 32, loop is vectorized. ~10x faster
  2221. if (srcfmt->format == dstfmt->format) {
  2222. if (srcfmt->format == SDL_PIXELFORMAT_ARGB8888 ||
  2223. srcfmt->format == SDL_PIXELFORMAT_ABGR8888 ||
  2224. srcfmt->format == SDL_PIXELFORMAT_BGRA8888 ||
  2225. srcfmt->format == SDL_PIXELFORMAT_RGBA8888) {
  2226. Uint32 *src32 = (Uint32 *)src;
  2227. Uint32 *dst32 = (Uint32 *)dst;
  2228. while (height--) {
  2229. /* *INDENT-OFF* */ // clang-format off
  2230. DUFFS_LOOP_TRIVIAL(
  2231. {
  2232. if ((*src32 & rgbmask) != ckey) {
  2233. *dst32 = *src32;
  2234. }
  2235. ++src32;
  2236. ++dst32;
  2237. },
  2238. width);
  2239. /* *INDENT-ON* */ // clang-format on
  2240. src32 = (Uint32 *)((Uint8 *)src32 + srcskip);
  2241. dst32 = (Uint32 *)((Uint8 *)dst32 + dstskip);
  2242. }
  2243. }
  2244. return;
  2245. }
  2246. #if HAVE_FAST_WRITE_INT8
  2247. // Blit with permutation: 4->4
  2248. if (srcbpp == 4 && dstbpp == 4 &&
  2249. !SDL_ISPIXELFORMAT_10BIT(srcfmt->format) &&
  2250. !SDL_ISPIXELFORMAT_10BIT(dstfmt->format)) {
  2251. // Find the appropriate permutation
  2252. int p0, p1, p2, p3;
  2253. get_permutation(srcfmt, dstfmt, &p0, &p1, &p2, &p3, NULL);
  2254. while (height--) {
  2255. /* *INDENT-OFF* */ // clang-format off
  2256. DUFFS_LOOP(
  2257. {
  2258. Uint32 *src32 = (Uint32 *)src;
  2259. if ((*src32 & rgbmask) != ckey) {
  2260. dst[0] = src[p0];
  2261. dst[1] = src[p1];
  2262. dst[2] = src[p2];
  2263. dst[3] = src[p3];
  2264. }
  2265. src += 4;
  2266. dst += 4;
  2267. }, width);
  2268. /* *INDENT-ON* */ // clang-format on
  2269. src += srcskip;
  2270. dst += dstskip;
  2271. }
  2272. return;
  2273. }
  2274. #endif
  2275. while (height--) {
  2276. /* *INDENT-OFF* */ // clang-format off
  2277. DUFFS_LOOP(
  2278. {
  2279. DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
  2280. if ( (Pixel & rgbmask) != ckey ) {
  2281. ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, sA);
  2282. }
  2283. dst += dstbpp;
  2284. src += srcbpp;
  2285. },
  2286. width);
  2287. /* *INDENT-ON* */ // clang-format on
  2288. src += srcskip;
  2289. dst += dstskip;
  2290. }
  2291. }
  2292. // Convert between two 8888 pixels with differing formats.
  2293. #define SWIZZLE_8888_SRC_ALPHA(src, dst, srcfmt, dstfmt) \
  2294. do { \
  2295. dst = (((src >> srcfmt->Rshift) & 0xFF) << dstfmt->Rshift) | \
  2296. (((src >> srcfmt->Gshift) & 0xFF) << dstfmt->Gshift) | \
  2297. (((src >> srcfmt->Bshift) & 0xFF) << dstfmt->Bshift) | \
  2298. (((src >> srcfmt->Ashift) & 0xFF) << dstfmt->Ashift); \
  2299. } while (0)
  2300. #define SWIZZLE_8888_DST_ALPHA(src, dst, srcfmt, dstfmt, dstAmask) \
  2301. do { \
  2302. dst = (((src >> srcfmt->Rshift) & 0xFF) << dstfmt->Rshift) | \
  2303. (((src >> srcfmt->Gshift) & 0xFF) << dstfmt->Gshift) | \
  2304. (((src >> srcfmt->Bshift) & 0xFF) << dstfmt->Bshift) | \
  2305. dstAmask; \
  2306. } while (0)
  2307. #ifdef SDL_SSE4_1_INTRINSICS
  2308. static void SDL_TARGETING("sse4.1") Blit8888to8888PixelSwizzleSSE41(SDL_BlitInfo *info)
  2309. {
  2310. int width = info->dst_w;
  2311. int height = info->dst_h;
  2312. Uint8 *src = info->src;
  2313. int srcskip = info->src_skip;
  2314. Uint8 *dst = info->dst;
  2315. int dstskip = info->dst_skip;
  2316. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  2317. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  2318. bool fill_alpha = (!srcfmt->Amask || !dstfmt->Amask);
  2319. Uint32 srcAmask, srcAshift;
  2320. Uint32 dstAmask, dstAshift;
  2321. SDL_Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift);
  2322. SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift);
  2323. // The byte offsets for the start of each pixel
  2324. const __m128i mask_offsets = _mm_set_epi8(
  2325. 12, 12, 12, 12, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0);
  2326. const __m128i convert_mask = _mm_add_epi32(
  2327. _mm_set1_epi32(
  2328. ((srcfmt->Rshift >> 3) << dstfmt->Rshift) |
  2329. ((srcfmt->Gshift >> 3) << dstfmt->Gshift) |
  2330. ((srcfmt->Bshift >> 3) << dstfmt->Bshift) |
  2331. ((srcAshift >> 3) << dstAshift)),
  2332. mask_offsets);
  2333. const __m128i alpha_fill_mask = _mm_set1_epi32((int)dstAmask);
  2334. while (height--) {
  2335. int i = 0;
  2336. for (; i + 4 <= width; i += 4) {
  2337. // Load 4 src pixels
  2338. __m128i src128 = _mm_loadu_si128((__m128i *)src);
  2339. // Convert to dst format
  2340. // This is an SSSE3 instruction
  2341. src128 = _mm_shuffle_epi8(src128, convert_mask);
  2342. if (fill_alpha) {
  2343. // Set the alpha channels of src to 255
  2344. src128 = _mm_or_si128(src128, alpha_fill_mask);
  2345. }
  2346. // Save the result
  2347. _mm_storeu_si128((__m128i *)dst, src128);
  2348. src += 16;
  2349. dst += 16;
  2350. }
  2351. for (; i < width; ++i) {
  2352. Uint32 src32 = *(Uint32 *)src;
  2353. Uint32 dst32;
  2354. if (fill_alpha) {
  2355. SWIZZLE_8888_DST_ALPHA(src32, dst32, srcfmt, dstfmt, dstAmask);
  2356. } else {
  2357. SWIZZLE_8888_SRC_ALPHA(src32, dst32, srcfmt, dstfmt);
  2358. }
  2359. *(Uint32 *)dst = dst32;
  2360. src += 4;
  2361. dst += 4;
  2362. }
  2363. src += srcskip;
  2364. dst += dstskip;
  2365. }
  2366. }
  2367. #endif
  2368. #ifdef SDL_AVX2_INTRINSICS
  2369. static void SDL_TARGETING("avx2") Blit8888to8888PixelSwizzleAVX2(SDL_BlitInfo *info)
  2370. {
  2371. int width = info->dst_w;
  2372. int height = info->dst_h;
  2373. Uint8 *src = info->src;
  2374. int srcskip = info->src_skip;
  2375. Uint8 *dst = info->dst;
  2376. int dstskip = info->dst_skip;
  2377. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  2378. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  2379. bool fill_alpha = (!srcfmt->Amask || !dstfmt->Amask);
  2380. Uint32 srcAmask, srcAshift;
  2381. Uint32 dstAmask, dstAshift;
  2382. SDL_Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift);
  2383. SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift);
  2384. // The byte offsets for the start of each pixel
  2385. const __m256i mask_offsets = _mm256_set_epi8(
  2386. 28, 28, 28, 28, 24, 24, 24, 24, 20, 20, 20, 20, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0);
  2387. const __m256i convert_mask = _mm256_add_epi32(
  2388. _mm256_set1_epi32(
  2389. ((srcfmt->Rshift >> 3) << dstfmt->Rshift) |
  2390. ((srcfmt->Gshift >> 3) << dstfmt->Gshift) |
  2391. ((srcfmt->Bshift >> 3) << dstfmt->Bshift) |
  2392. ((srcAshift >> 3) << dstAshift)),
  2393. mask_offsets);
  2394. const __m256i alpha_fill_mask = _mm256_set1_epi32((int)dstAmask);
  2395. while (height--) {
  2396. int i = 0;
  2397. for (; i + 8 <= width; i += 8) {
  2398. // Load 8 src pixels
  2399. __m256i src256 = _mm256_loadu_si256((__m256i *)src);
  2400. // Convert to dst format
  2401. src256 = _mm256_shuffle_epi8(src256, convert_mask);
  2402. if (fill_alpha) {
  2403. // Set the alpha channels of src to 255
  2404. src256 = _mm256_or_si256(src256, alpha_fill_mask);
  2405. }
  2406. // Save the result
  2407. _mm256_storeu_si256((__m256i *)dst, src256);
  2408. src += 32;
  2409. dst += 32;
  2410. }
  2411. for (; i < width; ++i) {
  2412. Uint32 src32 = *(Uint32 *)src;
  2413. Uint32 dst32;
  2414. if (fill_alpha) {
  2415. SWIZZLE_8888_DST_ALPHA(src32, dst32, srcfmt, dstfmt, dstAmask);
  2416. } else {
  2417. SWIZZLE_8888_SRC_ALPHA(src32, dst32, srcfmt, dstfmt);
  2418. }
  2419. *(Uint32 *)dst = dst32;
  2420. src += 4;
  2421. dst += 4;
  2422. }
  2423. src += srcskip;
  2424. dst += dstskip;
  2425. }
  2426. }
  2427. #endif
  2428. #if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
  2429. static void Blit8888to8888PixelSwizzleNEON(SDL_BlitInfo *info)
  2430. {
  2431. int width = info->dst_w;
  2432. int height = info->dst_h;
  2433. Uint8 *src = info->src;
  2434. int srcskip = info->src_skip;
  2435. Uint8 *dst = info->dst;
  2436. int dstskip = info->dst_skip;
  2437. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  2438. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  2439. bool fill_alpha = (!srcfmt->Amask || !dstfmt->Amask);
  2440. Uint32 srcAmask, srcAshift;
  2441. Uint32 dstAmask, dstAshift;
  2442. SDL_Get8888AlphaMaskAndShift(srcfmt, &srcAmask, &srcAshift);
  2443. SDL_Get8888AlphaMaskAndShift(dstfmt, &dstAmask, &dstAshift);
  2444. // The byte offsets for the start of each pixel
  2445. const uint8x16_t mask_offsets = vreinterpretq_u8_u64(vcombine_u64(
  2446. vcreate_u64(0x0404040400000000), vcreate_u64(0x0c0c0c0c08080808)));
  2447. const uint8x16_t convert_mask = vreinterpretq_u8_u32(vaddq_u32(
  2448. vreinterpretq_u32_u8(mask_offsets),
  2449. vdupq_n_u32(
  2450. ((srcfmt->Rshift >> 3) << dstfmt->Rshift) |
  2451. ((srcfmt->Gshift >> 3) << dstfmt->Gshift) |
  2452. ((srcfmt->Bshift >> 3) << dstfmt->Bshift) |
  2453. ((srcAshift >> 3) << dstAshift))));
  2454. const uint8x16_t alpha_fill_mask = vreinterpretq_u8_u32(vdupq_n_u32(dstAmask));
  2455. while (height--) {
  2456. int i = 0;
  2457. for (; i + 4 <= width; i += 4) {
  2458. // Load 4 src pixels
  2459. uint8x16_t src128 = vld1q_u8(src);
  2460. // Convert to dst format
  2461. src128 = vqtbl1q_u8(src128, convert_mask);
  2462. if (fill_alpha) {
  2463. // Set the alpha channels of src to 255
  2464. src128 = vorrq_u8(src128, alpha_fill_mask);
  2465. }
  2466. // Save the result
  2467. vst1q_u8(dst, src128);
  2468. src += 16;
  2469. dst += 16;
  2470. }
  2471. // Process 1 pixel per iteration, max 3 iterations, same calculations as above
  2472. for (; i < width; ++i) {
  2473. // Top 32-bits will be not used in src32
  2474. uint8x8_t src32 = vreinterpret_u8_u32(vld1_dup_u32((Uint32 *)src));
  2475. // Convert to dst format
  2476. src32 = vtbl1_u8(src32, vget_low_u8(convert_mask));
  2477. if (fill_alpha) {
  2478. // Set the alpha channels of src to 255
  2479. src32 = vorr_u8(src32, vget_low_u8(alpha_fill_mask));
  2480. }
  2481. // Save the result, only low 32-bits
  2482. vst1_lane_u32((Uint32 *)dst, vreinterpret_u32_u8(src32), 0);
  2483. src += 4;
  2484. dst += 4;
  2485. }
  2486. src += srcskip;
  2487. dst += dstskip;
  2488. }
  2489. }
  2490. #endif
  2491. // Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet
  2492. static void Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo *info)
  2493. {
  2494. int width = info->dst_w;
  2495. int height = info->dst_h;
  2496. Uint8 *src = info->src;
  2497. int srcskip = info->src_skip;
  2498. Uint8 *dst = info->dst;
  2499. int dstskip = info->dst_skip;
  2500. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  2501. int srcbpp = srcfmt->bytes_per_pixel;
  2502. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  2503. int dstbpp = dstfmt->bytes_per_pixel;
  2504. if (dstfmt->Amask) {
  2505. // SET_ALPHA
  2506. Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift;
  2507. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2508. int i0 = 0, i1 = 1, i2 = 2;
  2509. #else
  2510. int i0 = srcbpp - 1 - 0;
  2511. int i1 = srcbpp - 1 - 1;
  2512. int i2 = srcbpp - 1 - 2;
  2513. #endif
  2514. while (height--) {
  2515. /* *INDENT-OFF* */ // clang-format off
  2516. DUFFS_LOOP(
  2517. {
  2518. Uint32 *dst32 = (Uint32 *)dst;
  2519. Uint8 s0 = src[i0];
  2520. Uint8 s1 = src[i1];
  2521. Uint8 s2 = src[i2];
  2522. *dst32 = (s0) | (s1 << 8) | (s2 << 16) | mask;
  2523. dst += 4;
  2524. src += srcbpp;
  2525. }, width);
  2526. /* *INDENT-ON* */ // clang-format on
  2527. src += srcskip;
  2528. dst += dstskip;
  2529. }
  2530. } else {
  2531. // NO_ALPHA
  2532. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2533. int i0 = 0, i1 = 1, i2 = 2;
  2534. int j0 = 0, j1 = 1, j2 = 2;
  2535. #else
  2536. int i0 = srcbpp - 1 - 0;
  2537. int i1 = srcbpp - 1 - 1;
  2538. int i2 = srcbpp - 1 - 2;
  2539. int j0 = dstbpp - 1 - 0;
  2540. int j1 = dstbpp - 1 - 1;
  2541. int j2 = dstbpp - 1 - 2;
  2542. #endif
  2543. while (height--) {
  2544. /* *INDENT-OFF* */ // clang-format off
  2545. DUFFS_LOOP(
  2546. {
  2547. Uint8 s0 = src[i0];
  2548. Uint8 s1 = src[i1];
  2549. Uint8 s2 = src[i2];
  2550. dst[j0] = s0;
  2551. dst[j1] = s1;
  2552. dst[j2] = s2;
  2553. dst += dstbpp;
  2554. src += srcbpp;
  2555. }, width);
  2556. /* *INDENT-ON* */ // clang-format on
  2557. src += srcskip;
  2558. dst += dstskip;
  2559. }
  2560. }
  2561. }
  2562. // Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet
  2563. static void Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo *info)
  2564. {
  2565. int width = info->dst_w;
  2566. int height = info->dst_h;
  2567. Uint8 *src = info->src;
  2568. int srcskip = info->src_skip;
  2569. Uint8 *dst = info->dst;
  2570. int dstskip = info->dst_skip;
  2571. const SDL_PixelFormatDetails *srcfmt = info->src_fmt;
  2572. int srcbpp = srcfmt->bytes_per_pixel;
  2573. const SDL_PixelFormatDetails *dstfmt = info->dst_fmt;
  2574. int dstbpp = dstfmt->bytes_per_pixel;
  2575. if (dstfmt->Amask) {
  2576. if (srcfmt->Amask) {
  2577. // COPY_ALPHA
  2578. // Only to switch ABGR8888 <-> ARGB8888
  2579. while (height--) {
  2580. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2581. int i0 = 0, i1 = 1, i2 = 2, i3 = 3;
  2582. #else
  2583. int i0 = 3, i1 = 2, i2 = 1, i3 = 0;
  2584. #endif
  2585. /* *INDENT-OFF* */ // clang-format off
  2586. DUFFS_LOOP(
  2587. {
  2588. Uint32 *dst32 = (Uint32 *)dst;
  2589. Uint8 s0 = src[i0];
  2590. Uint8 s1 = src[i1];
  2591. Uint8 s2 = src[i2];
  2592. Uint32 alphashift = ((Uint32)src[i3]) << dstfmt->Ashift;
  2593. // inversed, compared to Blit_3or4_to_3or4__same_rgb
  2594. *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
  2595. dst += 4;
  2596. src += 4;
  2597. }, width);
  2598. /* *INDENT-ON* */ // clang-format on
  2599. src += srcskip;
  2600. dst += dstskip;
  2601. }
  2602. } else {
  2603. // SET_ALPHA
  2604. Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift;
  2605. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2606. int i0 = 0, i1 = 1, i2 = 2;
  2607. #else
  2608. int i0 = srcbpp - 1 - 0;
  2609. int i1 = srcbpp - 1 - 1;
  2610. int i2 = srcbpp - 1 - 2;
  2611. #endif
  2612. while (height--) {
  2613. /* *INDENT-OFF* */ // clang-format off
  2614. DUFFS_LOOP(
  2615. {
  2616. Uint32 *dst32 = (Uint32 *)dst;
  2617. Uint8 s0 = src[i0];
  2618. Uint8 s1 = src[i1];
  2619. Uint8 s2 = src[i2];
  2620. // inversed, compared to Blit_3or4_to_3or4__same_rgb
  2621. *dst32 = (s0 << 16) | (s1 << 8) | (s2) | mask;
  2622. dst += 4;
  2623. src += srcbpp;
  2624. }, width);
  2625. /* *INDENT-ON* */ // clang-format on
  2626. src += srcskip;
  2627. dst += dstskip;
  2628. }
  2629. }
  2630. } else {
  2631. // NO_ALPHA
  2632. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  2633. int i0 = 0, i1 = 1, i2 = 2;
  2634. int j0 = 2, j1 = 1, j2 = 0;
  2635. #else
  2636. int i0 = srcbpp - 1 - 0;
  2637. int i1 = srcbpp - 1 - 1;
  2638. int i2 = srcbpp - 1 - 2;
  2639. int j0 = dstbpp - 1 - 2;
  2640. int j1 = dstbpp - 1 - 1;
  2641. int j2 = dstbpp - 1 - 0;
  2642. #endif
  2643. while (height--) {
  2644. /* *INDENT-OFF* */ // clang-format off
  2645. DUFFS_LOOP(
  2646. {
  2647. Uint8 s0 = src[i0];
  2648. Uint8 s1 = src[i1];
  2649. Uint8 s2 = src[i2];
  2650. // inversed, compared to Blit_3or4_to_3or4__same_rgb
  2651. dst[j0] = s0;
  2652. dst[j1] = s1;
  2653. dst[j2] = s2;
  2654. dst += dstbpp;
  2655. src += srcbpp;
  2656. }, width);
  2657. /* *INDENT-ON* */ // clang-format on
  2658. src += srcskip;
  2659. dst += dstskip;
  2660. }
  2661. }
  2662. }
  2663. // Normal N to N optimized blitters
  2664. #define NO_ALPHA 1
  2665. #define SET_ALPHA 2
  2666. #define COPY_ALPHA 4
  2667. struct blit_table
  2668. {
  2669. Uint32 srcR, srcG, srcB;
  2670. int dstbpp;
  2671. Uint32 dstR, dstG, dstB;
  2672. Uint32 blit_features;
  2673. SDL_BlitFunc blitfunc;
  2674. Uint32 alpha; // bitwise NO_ALPHA, SET_ALPHA, COPY_ALPHA
  2675. };
  2676. static const struct blit_table normal_blit_1[] = {
  2677. // Default for 8-bit RGB source, never optimized
  2678. { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 }
  2679. };
  2680. static const struct blit_table normal_blit_2[] = {
  2681. #ifdef SDL_ALTIVEC_BLITTERS
  2682. #ifdef BROKEN_ALTIVEC_BLITTERS
  2683. // has-altivec
  2684. { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
  2685. BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2686. { 0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
  2687. BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2688. #endif // BROKEN_ALTIVEC_BLITTERS
  2689. #endif
  2690. #ifdef SDL_SSE4_1_INTRINSICS
  2691. { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2692. BLIT_FEATURE_HAS_SSE41, Blit_RGB565_32_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2693. { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2694. BLIT_FEATURE_HAS_SSE41, Blit_RGB565_32_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2695. { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
  2696. BLIT_FEATURE_HAS_SSE41, Blit_RGB565_32_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2697. { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
  2698. BLIT_FEATURE_HAS_SSE41, Blit_RGB565_32_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2699. #endif
  2700. #ifdef SDL_HAVE_BLIT_N_RGB565
  2701. { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2702. 0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2703. { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2704. 0, Blit_RGB565_ABGR8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2705. { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
  2706. 0, Blit_RGB565_RGBA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2707. { 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
  2708. 0, Blit_RGB565_BGRA8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2709. #endif
  2710. // Default for 16-bit RGB source, used if no other blitter matches
  2711. { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 }
  2712. };
  2713. static const struct blit_table normal_blit_3[] = {
  2714. // 3->4 with same rgb triplet
  2715. { 0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2716. 0, Blit_3or4_to_3or4__same_rgb,
  2717. #if HAVE_FAST_WRITE_INT8
  2718. NO_ALPHA |
  2719. #endif
  2720. SET_ALPHA },
  2721. { 0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2722. 0, Blit_3or4_to_3or4__same_rgb,
  2723. #if HAVE_FAST_WRITE_INT8
  2724. NO_ALPHA |
  2725. #endif
  2726. SET_ALPHA },
  2727. // 3->4 with inversed rgb triplet
  2728. { 0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2729. 0, Blit_3or4_to_3or4__inversed_rgb,
  2730. #if HAVE_FAST_WRITE_INT8
  2731. NO_ALPHA |
  2732. #endif
  2733. SET_ALPHA },
  2734. { 0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2735. 0, Blit_3or4_to_3or4__inversed_rgb,
  2736. #if HAVE_FAST_WRITE_INT8
  2737. NO_ALPHA |
  2738. #endif
  2739. SET_ALPHA },
  2740. // 3->3 to switch RGB 24 <-> BGR 24
  2741. { 0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2742. 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
  2743. { 0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2744. 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
  2745. // Default for 24-bit RGB source, never optimized
  2746. { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 }
  2747. };
  2748. static const struct blit_table normal_blit_4[] = {
  2749. #ifdef SDL_ALTIVEC_BLITTERS
  2750. // has-altivec | dont-use-prefetch
  2751. { 0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
  2752. BLIT_FEATURE_HAS_ALTIVEC | BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2753. // has-altivec
  2754. { 0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000, 0x00000000,
  2755. BLIT_FEATURE_HAS_ALTIVEC, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
  2756. // has-altivec
  2757. { 0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
  2758. BLIT_FEATURE_HAS_ALTIVEC, Blit_XRGB8888_RGB565Altivec, NO_ALPHA },
  2759. #endif
  2760. // 4->3 with same rgb triplet
  2761. { 0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2762. 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA },
  2763. { 0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2764. 0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA },
  2765. // 4->3 with inversed rgb triplet
  2766. { 0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2767. 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA },
  2768. { 0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2769. 0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA },
  2770. // 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888
  2771. { 0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
  2772. 0, Blit_3or4_to_3or4__inversed_rgb,
  2773. #if HAVE_FAST_WRITE_INT8
  2774. NO_ALPHA |
  2775. #endif
  2776. SET_ALPHA | COPY_ALPHA },
  2777. { 0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
  2778. 0, Blit_3or4_to_3or4__inversed_rgb,
  2779. #if HAVE_FAST_WRITE_INT8
  2780. NO_ALPHA |
  2781. #endif
  2782. SET_ALPHA | COPY_ALPHA },
  2783. // RGB 888 and RGB 565
  2784. { 0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
  2785. 0, Blit_XRGB8888_RGB565, NO_ALPHA },
  2786. { 0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
  2787. 0, Blit_XRGB8888_RGB555, NO_ALPHA },
  2788. // Default for 32-bit RGB source, used if no other blitter matches
  2789. { 0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0 }
  2790. };
  2791. static const struct blit_table *const normal_blit[] = {
  2792. normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
  2793. };
  2794. // Mask matches table, or table entry is zero
  2795. #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
  2796. SDL_BlitFunc SDL_CalculateBlitN(SDL_Surface *surface)
  2797. {
  2798. const SDL_PixelFormatDetails *srcfmt;
  2799. const SDL_PixelFormatDetails *dstfmt;
  2800. const struct blit_table *table;
  2801. int which;
  2802. SDL_BlitFunc blitfun;
  2803. // Set up data for choosing the blit
  2804. srcfmt = surface->fmt;
  2805. dstfmt = surface->map.info.dst_fmt;
  2806. // We don't support destinations less than 8-bits
  2807. if (dstfmt->bits_per_pixel < 8) {
  2808. return NULL;
  2809. }
  2810. switch (surface->map.info.flags & ~SDL_COPY_RLE_MASK) {
  2811. case 0:
  2812. if (SDL_PIXELLAYOUT(srcfmt->format) == SDL_PACKEDLAYOUT_8888 &&
  2813. SDL_PIXELLAYOUT(dstfmt->format) == SDL_PACKEDLAYOUT_8888) {
  2814. #ifdef SDL_AVX2_INTRINSICS
  2815. if (SDL_HasAVX2()) {
  2816. return Blit8888to8888PixelSwizzleAVX2;
  2817. }
  2818. #endif
  2819. #ifdef SDL_SSE4_1_INTRINSICS
  2820. if (SDL_HasSSE41()) {
  2821. return Blit8888to8888PixelSwizzleSSE41;
  2822. }
  2823. #endif
  2824. #if defined(SDL_NEON_INTRINSICS) && (__ARM_ARCH >= 8) && (defined(__aarch64__) || defined(_M_ARM64))
  2825. return Blit8888to8888PixelSwizzleNEON;
  2826. #endif
  2827. }
  2828. blitfun = NULL;
  2829. if (dstfmt->bits_per_pixel > 8) {
  2830. Uint32 a_need = NO_ALPHA;
  2831. if (dstfmt->Amask) {
  2832. a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
  2833. }
  2834. if (srcfmt->bytes_per_pixel > 0 &&
  2835. srcfmt->bytes_per_pixel <= SDL_arraysize(normal_blit)) {
  2836. table = normal_blit[srcfmt->bytes_per_pixel - 1];
  2837. for (which = 0; table[which].dstbpp; ++which) {
  2838. if (MASKOK(srcfmt->Rmask, table[which].srcR) &&
  2839. MASKOK(srcfmt->Gmask, table[which].srcG) &&
  2840. MASKOK(srcfmt->Bmask, table[which].srcB) &&
  2841. MASKOK(dstfmt->Rmask, table[which].dstR) &&
  2842. MASKOK(dstfmt->Gmask, table[which].dstG) &&
  2843. MASKOK(dstfmt->Bmask, table[which].dstB) &&
  2844. dstfmt->bytes_per_pixel == table[which].dstbpp &&
  2845. (a_need & table[which].alpha) == a_need &&
  2846. ((table[which].blit_features & GetBlitFeatures()) ==
  2847. table[which].blit_features)) {
  2848. break;
  2849. }
  2850. }
  2851. blitfun = table[which].blitfunc;
  2852. }
  2853. if (blitfun == BlitNtoN) { // default C fallback catch-all. Slow!
  2854. if (srcfmt->bytes_per_pixel == dstfmt->bytes_per_pixel &&
  2855. srcfmt->Rmask == dstfmt->Rmask &&
  2856. srcfmt->Gmask == dstfmt->Gmask &&
  2857. srcfmt->Bmask == dstfmt->Bmask) {
  2858. if (a_need == COPY_ALPHA) {
  2859. if (srcfmt->Amask == dstfmt->Amask) {
  2860. // Fastpath C fallback: RGBA<->RGBA blit with matching RGBA
  2861. blitfun = SDL_BlitCopy;
  2862. } else {
  2863. blitfun = BlitNtoNCopyAlpha;
  2864. }
  2865. } else {
  2866. if (srcfmt->bytes_per_pixel == 4) {
  2867. // Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB
  2868. blitfun = Blit4to4MaskAlpha;
  2869. } else if (srcfmt->bytes_per_pixel == 2) {
  2870. // Fastpath C fallback: 16bit RGB<->RGBA blit with matching RGB
  2871. blitfun = Blit2to2MaskAlpha;
  2872. }
  2873. }
  2874. } else if (a_need == COPY_ALPHA) {
  2875. blitfun = BlitNtoNCopyAlpha;
  2876. }
  2877. }
  2878. }
  2879. return blitfun;
  2880. case SDL_COPY_COLORKEY:
  2881. /* colorkey blit: Here we don't have too many options, mostly
  2882. because RLE is the preferred fast way to deal with this.
  2883. If a particular case turns out to be useful we'll add it. */
  2884. if (srcfmt->bytes_per_pixel == 2 && surface->map.identity != 0) {
  2885. return Blit2to2Key;
  2886. } else {
  2887. #ifdef SDL_ALTIVEC_BLITTERS
  2888. if ((srcfmt->bytes_per_pixel == 4) && (dstfmt->bytes_per_pixel == 4) && SDL_HasAltiVec()) {
  2889. return Blit32to32KeyAltivec;
  2890. } else
  2891. #endif
  2892. if (srcfmt->Amask && dstfmt->Amask) {
  2893. return BlitNtoNKeyCopyAlpha;
  2894. } else {
  2895. return BlitNtoNKey;
  2896. }
  2897. }
  2898. }
  2899. return NULL;
  2900. }
  2901. #endif // SDL_HAVE_BLIT_N