SDL_fillrect.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. /*
  2. Simple DirectMedia Layer
  3. Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any damages
  6. arising from the use of this software.
  7. Permission is granted to anyone to use this software for any purpose,
  8. including commercial applications, and to alter it and redistribute it
  9. freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you must not
  11. claim that you wrote the original software. If you use this software
  12. in a product, an acknowledgment in the product documentation would be
  13. appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and must not be
  15. misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source distribution.
  17. */
  18. #include "SDL_internal.h"
  19. #include "SDL_blit.h"
  20. #include "../SDL_intrin.h"
  21. #ifdef __SSE__
  22. /* *INDENT-OFF* */ /* clang-format off */
  23. #if defined(_MSC_VER) && !defined(__clang__)
  24. #define SSE_BEGIN \
  25. __m128 c128; \
  26. c128.m128_u32[0] = color; \
  27. c128.m128_u32[1] = color; \
  28. c128.m128_u32[2] = color; \
  29. c128.m128_u32[3] = color;
  30. #else
  31. #define SSE_BEGIN \
  32. __m128 c128; \
  33. DECLARE_ALIGNED(Uint32, cccc[4], 16); \
  34. cccc[0] = color; \
  35. cccc[1] = color; \
  36. cccc[2] = color; \
  37. cccc[3] = color; \
  38. c128 = *(__m128 *)cccc;
  39. #endif
  40. #define SSE_WORK \
  41. for (i = n / 64; i--;) { \
  42. _mm_stream_ps((float *)(p+0), c128); \
  43. _mm_stream_ps((float *)(p+16), c128); \
  44. _mm_stream_ps((float *)(p+32), c128); \
  45. _mm_stream_ps((float *)(p+48), c128); \
  46. p += 64; \
  47. }
  48. #define SSE_END
  49. #define DEFINE_SSE_FILLRECT(bpp, type) \
  50. static void SDL_FillSurfaceRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
  51. { \
  52. int i, n; \
  53. Uint8 *p = NULL; \
  54. \
  55. SSE_BEGIN; \
  56. \
  57. while (h--) { \
  58. n = (w) * (bpp); \
  59. p = pixels; \
  60. \
  61. if (n > 63) { \
  62. int adjust = 16 - ((uintptr_t)p & 15); \
  63. if (adjust < 16) { \
  64. n -= adjust; \
  65. adjust /= (bpp); \
  66. while (adjust--) { \
  67. *((type *)p) = (type)color; \
  68. p += (bpp); \
  69. } \
  70. } \
  71. SSE_WORK; \
  72. } \
  73. if (n & 63) { \
  74. int remainder = (n & 63); \
  75. remainder /= (bpp); \
  76. while (remainder--) { \
  77. *((type *)p) = (type)color; \
  78. p += (bpp); \
  79. } \
  80. } \
  81. pixels += pitch; \
  82. } \
  83. \
  84. SSE_END; \
  85. }
  86. static void SDL_FillSurfaceRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  87. {
  88. int i, n;
  89. SSE_BEGIN;
  90. while (h--) {
  91. Uint8 *p = pixels;
  92. n = w;
  93. if (n > 63) {
  94. int adjust = 16 - ((uintptr_t)p & 15);
  95. if (adjust) {
  96. n -= adjust;
  97. SDL_memset(p, color, adjust);
  98. p += adjust;
  99. }
  100. SSE_WORK;
  101. }
  102. if (n & 63) {
  103. int remainder = (n & 63);
  104. SDL_memset(p, color, remainder);
  105. }
  106. pixels += pitch;
  107. }
  108. SSE_END;
  109. }
  110. /* DEFINE_SSE_FILLRECT(1, Uint8) */
  111. DEFINE_SSE_FILLRECT(2, Uint16)
  112. DEFINE_SSE_FILLRECT(4, Uint32)
  113. /* *INDENT-ON* */ /* clang-format on */
  114. #endif /* __SSE__ */
  115. static void SDL_FillSurfaceRect1(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  116. {
  117. int n;
  118. Uint8 *p = NULL;
  119. while (h--) {
  120. n = w;
  121. p = pixels;
  122. if (n > 3) {
  123. switch ((uintptr_t)p & 3) {
  124. case 1:
  125. *p++ = (Uint8)color;
  126. --n;
  127. SDL_FALLTHROUGH;
  128. case 2:
  129. *p++ = (Uint8)color;
  130. --n;
  131. SDL_FALLTHROUGH;
  132. case 3:
  133. *p++ = (Uint8)color;
  134. --n;
  135. }
  136. SDL_memset4(p, color, (n >> 2));
  137. }
  138. if (n & 3) {
  139. p += (n & ~3);
  140. switch (n & 3) {
  141. case 3:
  142. *p++ = (Uint8)color;
  143. SDL_FALLTHROUGH;
  144. case 2:
  145. *p++ = (Uint8)color;
  146. SDL_FALLTHROUGH;
  147. case 1:
  148. *p++ = (Uint8)color;
  149. }
  150. }
  151. pixels += pitch;
  152. }
  153. }
  154. static void SDL_FillSurfaceRect2(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  155. {
  156. int n;
  157. Uint16 *p = NULL;
  158. while (h--) {
  159. n = w;
  160. p = (Uint16 *)pixels;
  161. if (n > 1) {
  162. if ((uintptr_t)p & 2) {
  163. *p++ = (Uint16)color;
  164. --n;
  165. }
  166. SDL_memset4(p, color, (n >> 1));
  167. }
  168. if (n & 1) {
  169. p[n - 1] = (Uint16)color;
  170. }
  171. pixels += pitch;
  172. }
  173. }
  174. static void SDL_FillSurfaceRect3(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  175. {
  176. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  177. Uint8 b1 = (Uint8)(color & 0xFF);
  178. Uint8 b2 = (Uint8)((color >> 8) & 0xFF);
  179. Uint8 b3 = (Uint8)((color >> 16) & 0xFF);
  180. #elif SDL_BYTEORDER == SDL_BIG_ENDIAN
  181. Uint8 b1 = (Uint8)((color >> 16) & 0xFF);
  182. Uint8 b2 = (Uint8)((color >> 8) & 0xFF);
  183. Uint8 b3 = (Uint8)(color & 0xFF);
  184. #endif
  185. int n;
  186. Uint8 *p = NULL;
  187. while (h--) {
  188. n = w;
  189. p = pixels;
  190. while (n--) {
  191. *p++ = b1;
  192. *p++ = b2;
  193. *p++ = b3;
  194. }
  195. pixels += pitch;
  196. }
  197. }
  198. static void SDL_FillSurfaceRect4(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  199. {
  200. while (h--) {
  201. SDL_memset4(pixels, color, w);
  202. pixels += pitch;
  203. }
  204. }
  205. /*
  206. * This function performs a fast fill of the given rectangle with 'color'
  207. */
  208. int SDL_FillSurfaceRect(SDL_Surface *dst, const SDL_Rect *rect, Uint32 color)
  209. {
  210. if (dst == NULL) {
  211. return SDL_InvalidParamError("SDL_FillSurfaceRect(): dst");
  212. }
  213. /* If 'rect' == NULL, then fill the whole surface */
  214. if (rect == NULL) {
  215. rect = &dst->clip_rect;
  216. /* Don't attempt to fill if the surface's clip_rect is empty */
  217. if (SDL_RectEmpty(rect)) {
  218. return 0;
  219. }
  220. }
  221. return SDL_FillSurfaceRects(dst, rect, 1, color);
  222. }
  223. #if SDL_ARM_NEON_BLITTERS
  224. void FillSurfaceRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
  225. void FillSurfaceRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
  226. void FillSurfaceRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
  227. static void fill_8_neon(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  228. {
  229. FillSurfaceRect8ARMNEONAsm(w, h, (uint8_t *)pixels, pitch >> 0, color);
  230. return;
  231. }
  232. static void fill_16_neon(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  233. {
  234. FillSurfaceRect16ARMNEONAsm(w, h, (uint16_t *)pixels, pitch >> 1, color);
  235. return;
  236. }
  237. static void fill_32_neon(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  238. {
  239. FillSurfaceRect32ARMNEONAsm(w, h, (uint32_t *)pixels, pitch >> 2, color);
  240. return;
  241. }
  242. #endif
  243. #if SDL_ARM_SIMD_BLITTERS
  244. void FillSurfaceRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
  245. void FillSurfaceRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
  246. void FillSurfaceRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
  247. static void fill_8_simd(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  248. {
  249. FillSurfaceRect8ARMSIMDAsm(w, h, (uint8_t *)pixels, pitch >> 0, color);
  250. return;
  251. }
  252. static void fill_16_simd(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  253. {
  254. FillSurfaceRect16ARMSIMDAsm(w, h, (uint16_t *)pixels, pitch >> 1, color);
  255. return;
  256. }
  257. static void fill_32_simd(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  258. {
  259. FillSurfaceRect32ARMSIMDAsm(w, h, (uint32_t *)pixels, pitch >> 2, color);
  260. return;
  261. }
  262. #endif
  263. int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
  264. Uint32 color)
  265. {
  266. SDL_Rect clipped;
  267. Uint8 *pixels;
  268. const SDL_Rect *rect;
  269. void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL;
  270. int i;
  271. if (dst == NULL) {
  272. return SDL_InvalidParamError("SDL_FillSurfaceRects(): dst");
  273. }
  274. /* Nothing to do */
  275. if (dst->w == 0 || dst->h == 0) {
  276. return 0;
  277. }
  278. /* Perform software fill */
  279. if (!dst->pixels) {
  280. return SDL_SetError("SDL_FillSurfaceRects(): You must lock the surface");
  281. }
  282. if (rects == NULL) {
  283. return SDL_InvalidParamError("SDL_FillSurfaceRects(): rects");
  284. }
  285. /* This function doesn't usually work on surfaces < 8 bpp
  286. * Except: support for 4bits, when filling full size.
  287. */
  288. if (dst->format->BitsPerPixel < 8) {
  289. if (count == 1) {
  290. const SDL_Rect *r = &rects[0];
  291. if (r->x == 0 && r->y == 0 && r->w == dst->w && r->h == dst->h) {
  292. if (dst->format->BitsPerPixel == 4) {
  293. Uint8 b = (((Uint8)color << 4) | (Uint8)color);
  294. SDL_memset(dst->pixels, b, (size_t)dst->h * dst->pitch);
  295. return 1;
  296. }
  297. }
  298. }
  299. return SDL_SetError("SDL_FillSurfaceRects(): Unsupported surface format");
  300. }
  301. #if SDL_ARM_NEON_BLITTERS
  302. if (SDL_HasNEON() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
  303. switch (dst->format->BytesPerPixel) {
  304. case 1:
  305. fill_function = fill_8_neon;
  306. break;
  307. case 2:
  308. fill_function = fill_16_neon;
  309. break;
  310. case 4:
  311. fill_function = fill_32_neon;
  312. break;
  313. }
  314. }
  315. #endif
  316. #if SDL_ARM_SIMD_BLITTERS
  317. if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
  318. switch (dst->format->BytesPerPixel) {
  319. case 1:
  320. fill_function = fill_8_simd;
  321. break;
  322. case 2:
  323. fill_function = fill_16_simd;
  324. break;
  325. case 4:
  326. fill_function = fill_32_simd;
  327. break;
  328. }
  329. }
  330. #endif
  331. if (fill_function == NULL) {
  332. switch (dst->format->BytesPerPixel) {
  333. case 1:
  334. {
  335. color |= (color << 8);
  336. color |= (color << 16);
  337. #ifdef __SSE__
  338. if (SDL_HasSSE()) {
  339. fill_function = SDL_FillSurfaceRect1SSE;
  340. break;
  341. }
  342. #endif
  343. fill_function = SDL_FillSurfaceRect1;
  344. break;
  345. }
  346. case 2:
  347. {
  348. color |= (color << 16);
  349. #ifdef __SSE__
  350. if (SDL_HasSSE()) {
  351. fill_function = SDL_FillSurfaceRect2SSE;
  352. break;
  353. }
  354. #endif
  355. fill_function = SDL_FillSurfaceRect2;
  356. break;
  357. }
  358. case 3:
  359. /* 24-bit RGB is a slow path, at least for now. */
  360. {
  361. fill_function = SDL_FillSurfaceRect3;
  362. break;
  363. }
  364. case 4:
  365. {
  366. #ifdef __SSE__
  367. if (SDL_HasSSE()) {
  368. fill_function = SDL_FillSurfaceRect4SSE;
  369. break;
  370. }
  371. #endif
  372. fill_function = SDL_FillSurfaceRect4;
  373. break;
  374. }
  375. default:
  376. return SDL_SetError("Unsupported pixel format");
  377. }
  378. }
  379. for (i = 0; i < count; ++i) {
  380. rect = &rects[i];
  381. /* Perform clipping */
  382. if (!SDL_GetRectIntersection(rect, &dst->clip_rect, &clipped)) {
  383. continue;
  384. }
  385. rect = &clipped;
  386. pixels = (Uint8 *)dst->pixels + rect->y * dst->pitch +
  387. rect->x * dst->format->BytesPerPixel;
  388. fill_function(pixels, dst->pitch, color, rect->w, rect->h);
  389. }
  390. /* We're done! */
  391. return 0;
  392. }