SDL_fillrect.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. /*
  2. Simple DirectMedia Layer
  3. Copyright (C) 1997-2026 Sam Lantinga <slouken@libsdl.org>
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any damages
  6. arising from the use of this software.
  7. Permission is granted to anyone to use this software for any purpose,
  8. including commercial applications, and to alter it and redistribute it
  9. freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you must not
  11. claim that you wrote the original software. If you use this software
  12. in a product, an acknowledgment in the product documentation would be
  13. appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and must not be
  15. misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source distribution.
  17. */
  18. #include "SDL_internal.h"
  19. #include "SDL_surface_c.h"
  20. #ifdef SDL_SSE_INTRINSICS
  21. /* *INDENT-OFF* */ // clang-format off
  22. #if defined(_MSC_VER) && !defined(__clang__)
  23. #define SSE_BEGIN \
  24. __m128 c128; \
  25. c128.m128_u32[0] = color; \
  26. c128.m128_u32[1] = color; \
  27. c128.m128_u32[2] = color; \
  28. c128.m128_u32[3] = color
  29. #else
  30. #define SSE_BEGIN \
  31. __m128 c128; \
  32. DECLARE_ALIGNED(Uint32, cccc[4], 16); \
  33. cccc[0] = color; \
  34. cccc[1] = color; \
  35. cccc[2] = color; \
  36. cccc[3] = color; \
  37. c128 = *(__m128 *)cccc
  38. #endif
  39. #define SSE_WORK \
  40. for (i = n / 64; i--;) { \
  41. _mm_stream_ps((float *)(p+0), c128); \
  42. _mm_stream_ps((float *)(p+16), c128); \
  43. _mm_stream_ps((float *)(p+32), c128); \
  44. _mm_stream_ps((float *)(p+48), c128); \
  45. p += 64; \
  46. }
  47. #define SSE_END
  48. #define DEFINE_SSE_FILLRECT(bpp, type) \
  49. static void SDL_TARGETING("sse") SDL_FillSurfaceRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
  50. { \
  51. int i, n; \
  52. Uint8 *p = NULL; \
  53. \
  54. /* If the number of bytes per row is equal to the pitch, treat */ \
  55. /* all rows as one long continuous row (for better performance) */ \
  56. if ((w) * (bpp) == pitch) { \
  57. w = w * h; \
  58. h = 1; \
  59. } \
  60. \
  61. SSE_BEGIN; \
  62. \
  63. while (h--) { \
  64. n = (w) * (bpp); \
  65. p = pixels; \
  66. \
  67. if (n > 63) { \
  68. int adjust = 16 - ((uintptr_t)p & 15); \
  69. if (adjust < 16) { \
  70. n -= adjust; \
  71. adjust /= (bpp); \
  72. while (adjust--) { \
  73. *((type *)p) = (type)color; \
  74. p += (bpp); \
  75. } \
  76. } \
  77. SSE_WORK; \
  78. } \
  79. if (n & 63) { \
  80. int remainder = (n & 63); \
  81. remainder /= (bpp); \
  82. while (remainder--) { \
  83. *((type *)p) = (type)color; \
  84. p += (bpp); \
  85. } \
  86. } \
  87. pixels += pitch; \
  88. } \
  89. \
  90. SSE_END; \
  91. }
  92. static void SDL_TARGETING("sse") SDL_FillSurfaceRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  93. {
  94. int i, n;
  95. SSE_BEGIN;
  96. while (h--) {
  97. Uint8 *p = pixels;
  98. n = w;
  99. if (n > 63) {
  100. int adjust = 16 - ((uintptr_t)p & 15);
  101. if (adjust) {
  102. n -= adjust;
  103. SDL_memset(p, color, adjust);
  104. p += adjust;
  105. }
  106. SSE_WORK;
  107. }
  108. if (n & 63) {
  109. int remainder = (n & 63);
  110. SDL_memset(p, color, remainder);
  111. }
  112. pixels += pitch;
  113. }
  114. SSE_END;
  115. }
  116. // DEFINE_SSE_FILLRECT(1, Uint8)
  117. DEFINE_SSE_FILLRECT(2, Uint16)
  118. DEFINE_SSE_FILLRECT(4, Uint32)
  119. /* *INDENT-ON* */ // clang-format on
  120. #endif // __SSE__
  121. #ifdef SDL_LSX_INTRINSICS
  122. /* *INDENT-OFF* */ // clang-format off
  123. #define LSX_BEGIN __m128i c128 = __lsx_vreplgr2vr_w(color);
  124. #define LSX_WORK \
  125. for (i = n / 64; i--;) { \
  126. __lsx_vst(c128, p, 0); \
  127. __lsx_vst(c128, p, 16); \
  128. __lsx_vst(c128, p, 32); \
  129. __lsx_vst(c128, p, 48); \
  130. p += 64; \
  131. }
  132. #define DEFINE_LSX_FILLRECT(bpp, type) \
  133. static void SDL_TARGETING("lsx") SDL_FillSurfaceRect##bpp##LSX(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
  134. { \
  135. int i, n; \
  136. Uint8 *p = NULL; \
  137. \
  138. /* If the number of bytes per row is equal to the pitch, treat */ \
  139. /* all rows as one long continuous row (for better performance) */ \
  140. if ((w) * (bpp) == pitch) { \
  141. w = w * h; \
  142. h = 1; \
  143. } \
  144. \
  145. LSX_BEGIN; \
  146. \
  147. while (h--) { \
  148. n = (w) * (bpp); \
  149. p = pixels; \
  150. \
  151. if (n > 63) { \
  152. int adjust = 16 - ((uintptr_t)p & 15); \
  153. if (adjust < 16) { \
  154. n -= adjust; \
  155. adjust /= (bpp); \
  156. while (adjust--) { \
  157. *((type *)p) = (type)color; \
  158. p += (bpp); \
  159. } \
  160. } \
  161. LSX_WORK; \
  162. } \
  163. if (n & 63) { \
  164. int remainder = (n & 63); \
  165. remainder /= (bpp); \
  166. while (remainder--) { \
  167. *((type *)p) = (type)color; \
  168. p += (bpp); \
  169. } \
  170. } \
  171. pixels += pitch; \
  172. } \
  173. \
  174. }
  175. DEFINE_LSX_FILLRECT(4, Uint32)
  176. /* *INDENT-ON* */ // clang-format on
  177. #endif /* __LSX__ */
  178. static void SDL_FillSurfaceRect1(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  179. {
  180. int n;
  181. Uint8 *p = NULL;
  182. while (h--) {
  183. n = w;
  184. p = pixels;
  185. if (n > 3) {
  186. switch ((uintptr_t)p & 3) {
  187. case 1:
  188. *p++ = (Uint8)color;
  189. --n;
  190. SDL_FALLTHROUGH;
  191. case 2:
  192. *p++ = (Uint8)color;
  193. --n;
  194. SDL_FALLTHROUGH;
  195. case 3:
  196. *p++ = (Uint8)color;
  197. --n;
  198. }
  199. SDL_memset4(p, color, (n >> 2));
  200. }
  201. if (n & 3) {
  202. p += (n & ~3);
  203. switch (n & 3) {
  204. case 3:
  205. *p++ = (Uint8)color;
  206. SDL_FALLTHROUGH;
  207. case 2:
  208. *p++ = (Uint8)color;
  209. SDL_FALLTHROUGH;
  210. case 1:
  211. *p++ = (Uint8)color;
  212. }
  213. }
  214. pixels += pitch;
  215. }
  216. }
  217. static void SDL_FillSurfaceRect2(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  218. {
  219. int n;
  220. Uint16 *p = NULL;
  221. while (h--) {
  222. n = w;
  223. p = (Uint16 *)pixels;
  224. if (n > 1) {
  225. if ((uintptr_t)p & 2) {
  226. *p++ = (Uint16)color;
  227. --n;
  228. }
  229. SDL_memset4(p, color, (n >> 1));
  230. }
  231. if (n & 1) {
  232. p[n - 1] = (Uint16)color;
  233. }
  234. pixels += pitch;
  235. }
  236. }
  237. static void SDL_FillSurfaceRect3(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  238. {
  239. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  240. Uint8 b1 = (Uint8)(color & 0xFF);
  241. Uint8 b2 = (Uint8)((color >> 8) & 0xFF);
  242. Uint8 b3 = (Uint8)((color >> 16) & 0xFF);
  243. #elif SDL_BYTEORDER == SDL_BIG_ENDIAN
  244. Uint8 b1 = (Uint8)((color >> 16) & 0xFF);
  245. Uint8 b2 = (Uint8)((color >> 8) & 0xFF);
  246. Uint8 b3 = (Uint8)(color & 0xFF);
  247. #endif
  248. int n;
  249. Uint8 *p = NULL;
  250. while (h--) {
  251. n = w;
  252. p = pixels;
  253. while (n--) {
  254. *p++ = b1;
  255. *p++ = b2;
  256. *p++ = b3;
  257. }
  258. pixels += pitch;
  259. }
  260. }
  261. static void SDL_FillSurfaceRect4(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  262. {
  263. while (h--) {
  264. SDL_memset4(pixels, color, w);
  265. pixels += pitch;
  266. }
  267. }
  268. /*
  269. * This function performs a fast fill of the given rectangle with 'color'
  270. */
  271. bool SDL_FillSurfaceRect(SDL_Surface *dst, const SDL_Rect *rect, Uint32 color)
  272. {
  273. CHECK_PARAM(!SDL_SurfaceValid(dst)) {
  274. return SDL_InvalidParamError("SDL_FillSurfaceRect(): dst");
  275. }
  276. // If 'rect' == NULL, then fill the whole surface
  277. if (!rect) {
  278. rect = &dst->clip_rect;
  279. // Don't attempt to fill if the surface's clip_rect is empty
  280. if (SDL_RectEmpty(rect)) {
  281. return true;
  282. }
  283. }
  284. return SDL_FillSurfaceRects(dst, rect, 1, color);
  285. }
  286. bool SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count, Uint32 color)
  287. {
  288. SDL_Rect clipped;
  289. Uint8 *pixels;
  290. const SDL_Rect *rect;
  291. void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL;
  292. int i;
  293. CHECK_PARAM(!SDL_SurfaceValid(dst)) {
  294. return SDL_InvalidParamError("SDL_FillSurfaceRects(): dst");
  295. }
  296. CHECK_PARAM(!rects) {
  297. return SDL_InvalidParamError("SDL_FillSurfaceRects(): rects");
  298. }
  299. if (!dst->pixels && SDL_MUSTLOCK(dst)) {
  300. return SDL_SetError("SDL_FillSurfaceRects(): You must lock the surface");
  301. }
  302. // Nothing to do
  303. if (dst->w == 0 || dst->h == 0 || !dst->pixels) {
  304. return true;
  305. }
  306. /* This function doesn't usually work on surfaces < 8 bpp
  307. * Except: support for 4bits, when filling full size.
  308. */
  309. if (SDL_BITSPERPIXEL(dst->format) < 8) {
  310. if (count == 1) {
  311. const SDL_Rect *r = &rects[0];
  312. if (r->x == 0 && r->y == 0 && r->w == dst->w && r->h == dst->h) {
  313. if (SDL_BITSPERPIXEL(dst->format) == 4) {
  314. Uint8 b = (((Uint8)color << 4) | (Uint8)color);
  315. SDL_memset(dst->pixels, b, (size_t)dst->h * dst->pitch);
  316. return true;
  317. }
  318. }
  319. }
  320. return SDL_SetError("SDL_FillSurfaceRects(): Unsupported surface format");
  321. }
  322. if (fill_function == NULL) {
  323. switch (SDL_BYTESPERPIXEL(dst->format)) {
  324. case 1:
  325. {
  326. color |= (color << 8);
  327. color |= (color << 16);
  328. #ifdef SDL_SSE_INTRINSICS
  329. if (SDL_HasSSE()) {
  330. fill_function = SDL_FillSurfaceRect1SSE;
  331. break;
  332. }
  333. #endif
  334. fill_function = SDL_FillSurfaceRect1;
  335. break;
  336. }
  337. case 2:
  338. {
  339. color |= (color << 16);
  340. #ifdef SDL_SSE_INTRINSICS
  341. if (SDL_HasSSE()) {
  342. fill_function = SDL_FillSurfaceRect2SSE;
  343. break;
  344. }
  345. #endif
  346. fill_function = SDL_FillSurfaceRect2;
  347. break;
  348. }
  349. case 3:
  350. // 24-bit RGB is a slow path, at least for now.
  351. {
  352. fill_function = SDL_FillSurfaceRect3;
  353. break;
  354. }
  355. case 4:
  356. {
  357. #ifdef SDL_SSE_INTRINSICS
  358. if (SDL_HasSSE()) {
  359. fill_function = SDL_FillSurfaceRect4SSE;
  360. break;
  361. }
  362. #endif
  363. #ifdef SDL_LSX_INTRINSICS
  364. if (SDL_HasLSX()) {
  365. fill_function = SDL_FillSurfaceRect4LSX;
  366. break;
  367. }
  368. #endif
  369. fill_function = SDL_FillSurfaceRect4;
  370. break;
  371. }
  372. default:
  373. return SDL_SetError("Unsupported pixel format");
  374. }
  375. }
  376. for (i = 0; i < count; ++i) {
  377. rect = &rects[i];
  378. // Perform clipping
  379. if (!SDL_GetRectIntersection(rect, &dst->clip_rect, &clipped)) {
  380. continue;
  381. }
  382. rect = &clipped;
  383. pixels = (Uint8 *)dst->pixels + rect->y * dst->pitch +
  384. rect->x * SDL_BYTESPERPIXEL(dst->format);
  385. fill_function(pixels, dst->pitch, color, rect->w, rect->h);
  386. }
  387. // We're done!
  388. return true;
  389. }