SDL_fillrect.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. /*
  2. Simple DirectMedia Layer
  3. Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any damages
  6. arising from the use of this software.
  7. Permission is granted to anyone to use this software for any purpose,
  8. including commercial applications, and to alter it and redistribute it
  9. freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you must not
  11. claim that you wrote the original software. If you use this software
  12. in a product, an acknowledgment in the product documentation would be
  13. appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and must not be
  15. misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source distribution.
  17. */
  18. #include "../SDL_internal.h"
  19. #include "SDL_video.h"
  20. #include "SDL_blit.h"
  21. #include "SDL_cpuinfo.h"
  22. #ifdef __SSE__
  23. /* *INDENT-OFF* */
  24. #ifdef _MSC_VER
  25. #define SSE_BEGIN \
  26. __m128 c128; \
  27. c128.m128_u32[0] = color; \
  28. c128.m128_u32[1] = color; \
  29. c128.m128_u32[2] = color; \
  30. c128.m128_u32[3] = color;
  31. #else
  32. #define SSE_BEGIN \
  33. __m128 c128; \
  34. DECLARE_ALIGNED(Uint32, cccc[4], 16); \
  35. cccc[0] = color; \
  36. cccc[1] = color; \
  37. cccc[2] = color; \
  38. cccc[3] = color; \
  39. c128 = *(__m128 *)cccc;
  40. #endif
  41. #define SSE_WORK \
  42. for (i = n / 64; i--;) { \
  43. _mm_stream_ps((float *)(p+0), c128); \
  44. _mm_stream_ps((float *)(p+16), c128); \
  45. _mm_stream_ps((float *)(p+32), c128); \
  46. _mm_stream_ps((float *)(p+48), c128); \
  47. p += 64; \
  48. }
  49. #define SSE_END
  50. #define DEFINE_SSE_FILLRECT(bpp, type) \
  51. static void \
  52. SDL_FillRect##bpp##SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h) \
  53. { \
  54. int i, n; \
  55. Uint8 *p = NULL; \
  56. \
  57. SSE_BEGIN; \
  58. \
  59. while (h--) { \
  60. n = w * bpp; \
  61. p = pixels; \
  62. \
  63. if (n > 63) { \
  64. int adjust = 16 - ((uintptr_t)p & 15); \
  65. if (adjust < 16) { \
  66. n -= adjust; \
  67. adjust /= bpp; \
  68. while (adjust--) { \
  69. *((type *)p) = (type)color; \
  70. p += bpp; \
  71. } \
  72. } \
  73. SSE_WORK; \
  74. } \
  75. if (n & 63) { \
  76. int remainder = (n & 63); \
  77. remainder /= bpp; \
  78. while (remainder--) { \
  79. *((type *)p) = (type)color; \
  80. p += bpp; \
  81. } \
  82. } \
  83. pixels += pitch; \
  84. } \
  85. \
  86. SSE_END; \
  87. }
  88. static void
  89. SDL_FillRect1SSE(Uint8 *pixels, int pitch, Uint32 color, int w, int h)
  90. {
  91. int i, n;
  92. SSE_BEGIN;
  93. while (h--) {
  94. Uint8 *p = pixels;
  95. n = w;
  96. if (n > 63) {
  97. int adjust = 16 - ((uintptr_t)p & 15);
  98. if (adjust) {
  99. n -= adjust;
  100. SDL_memset(p, color, adjust);
  101. p += adjust;
  102. }
  103. SSE_WORK;
  104. }
  105. if (n & 63) {
  106. int remainder = (n & 63);
  107. SDL_memset(p, color, remainder);
  108. }
  109. pixels += pitch;
  110. }
  111. SSE_END;
  112. }
  113. /* DEFINE_SSE_FILLRECT(1, Uint8) */
  114. DEFINE_SSE_FILLRECT(2, Uint16)
  115. DEFINE_SSE_FILLRECT(4, Uint32)
  116. /* *INDENT-ON* */
  117. #endif /* __SSE__ */
  118. static void
  119. SDL_FillRect1(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
  120. {
  121. int n;
  122. Uint8 *p = NULL;
  123. while (h--) {
  124. n = w;
  125. p = pixels;
  126. if (n > 3) {
  127. switch ((uintptr_t) p & 3) {
  128. case 1:
  129. *p++ = (Uint8) color;
  130. --n; /* fallthrough */
  131. case 2:
  132. *p++ = (Uint8) color;
  133. --n; /* fallthrough */
  134. case 3:
  135. *p++ = (Uint8) color;
  136. --n; /* fallthrough */
  137. }
  138. SDL_memset4(p, color, (n >> 2));
  139. }
  140. if (n & 3) {
  141. p += (n & ~3);
  142. switch (n & 3) {
  143. case 3:
  144. *p++ = (Uint8) color; /* fallthrough */
  145. case 2:
  146. *p++ = (Uint8) color; /* fallthrough */
  147. case 1:
  148. *p++ = (Uint8) color; /* fallthrough */
  149. }
  150. }
  151. pixels += pitch;
  152. }
  153. }
  154. static void
  155. SDL_FillRect2(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
  156. {
  157. int n;
  158. Uint16 *p = NULL;
  159. while (h--) {
  160. n = w;
  161. p = (Uint16 *) pixels;
  162. if (n > 1) {
  163. if ((uintptr_t) p & 2) {
  164. *p++ = (Uint16) color;
  165. --n;
  166. }
  167. SDL_memset4(p, color, (n >> 1));
  168. }
  169. if (n & 1) {
  170. p[n - 1] = (Uint16) color;
  171. }
  172. pixels += pitch;
  173. }
  174. }
  175. static void
  176. SDL_FillRect3(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
  177. {
  178. #if SDL_BYTEORDER == SDL_LIL_ENDIAN
  179. Uint8 b1 = (Uint8) (color & 0xFF);
  180. Uint8 b2 = (Uint8) ((color >> 8) & 0xFF);
  181. Uint8 b3 = (Uint8) ((color >> 16) & 0xFF);
  182. #elif SDL_BYTEORDER == SDL_BIG_ENDIAN
  183. Uint8 b1 = (Uint8) ((color >> 16) & 0xFF);
  184. Uint8 b2 = (Uint8) ((color >> 8) & 0xFF);
  185. Uint8 b3 = (Uint8) (color & 0xFF);
  186. #endif
  187. int n;
  188. Uint8 *p = NULL;
  189. while (h--) {
  190. n = w;
  191. p = pixels;
  192. while (n--) {
  193. *p++ = b1;
  194. *p++ = b2;
  195. *p++ = b3;
  196. }
  197. pixels += pitch;
  198. }
  199. }
  200. static void
  201. SDL_FillRect4(Uint8 * pixels, int pitch, Uint32 color, int w, int h)
  202. {
  203. while (h--) {
  204. SDL_memset4(pixels, color, w);
  205. pixels += pitch;
  206. }
  207. }
  208. /*
  209. * This function performs a fast fill of the given rectangle with 'color'
  210. */
  211. int
  212. SDL_FillRect(SDL_Surface * dst, const SDL_Rect * rect, Uint32 color)
  213. {
  214. if (!dst) {
  215. return SDL_SetError("Passed NULL destination surface");
  216. }
  217. /* If 'rect' == NULL, then fill the whole surface */
  218. if (!rect) {
  219. rect = &dst->clip_rect;
  220. /* Don't attempt to fill if the surface's clip_rect is empty */
  221. if (SDL_RectEmpty(rect)) {
  222. return 0;
  223. }
  224. }
  225. return SDL_FillRects(dst, rect, 1, color);
  226. }
  227. #if SDL_ARM_NEON_BLITTERS
  228. void FillRect8ARMNEONAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
  229. void FillRect16ARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
  230. void FillRect32ARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
  231. static void fill_8_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
  232. FillRect8ARMNEONAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
  233. return;
  234. }
  235. static void fill_16_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
  236. FillRect16ARMNEONAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
  237. return;
  238. }
  239. static void fill_32_neon(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
  240. FillRect32ARMNEONAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
  241. return;
  242. }
  243. #endif
  244. #if SDL_ARM_SIMD_BLITTERS
  245. void FillRect8ARMSIMDAsm(int32_t w, int32_t h, uint8_t *dst, int32_t dst_stride, uint8_t src);
  246. void FillRect16ARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint16_t src);
  247. void FillRect32ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t src);
  248. static void fill_8_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
  249. FillRect8ARMSIMDAsm(w, h, (uint8_t *) pixels, pitch >> 0, color);
  250. return;
  251. }
  252. static void fill_16_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
  253. FillRect16ARMSIMDAsm(w, h, (uint16_t *) pixels, pitch >> 1, color);
  254. return;
  255. }
  256. static void fill_32_simd(Uint8 * pixels, int pitch, Uint32 color, int w, int h) {
  257. FillRect32ARMSIMDAsm(w, h, (uint32_t *) pixels, pitch >> 2, color);
  258. return;
  259. }
  260. #endif
  261. int
  262. SDL_FillRects(SDL_Surface * dst, const SDL_Rect * rects, int count,
  263. Uint32 color)
  264. {
  265. SDL_Rect clipped;
  266. Uint8 *pixels;
  267. const SDL_Rect* rect;
  268. void (*fill_function)(Uint8 * pixels, int pitch, Uint32 color, int w, int h) = NULL;
  269. int i;
  270. if (!dst) {
  271. return SDL_SetError("Passed NULL destination surface");
  272. }
  273. /* This function doesn't work on surfaces < 8 bpp */
  274. if (dst->format->BitsPerPixel < 8) {
  275. return SDL_SetError("SDL_FillRect(): Unsupported surface format");
  276. }
  277. /* Perform software fill */
  278. if (!dst->pixels) {
  279. return SDL_SetError("SDL_FillRect(): You must lock the surface");
  280. }
  281. if (!rects) {
  282. return SDL_SetError("SDL_FillRects() passed NULL rects");
  283. }
  284. #if SDL_ARM_NEON_BLITTERS
  285. if (SDL_HasNEON() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
  286. switch (dst->format->BytesPerPixel) {
  287. case 1:
  288. fill_function = fill_8_neon;
  289. break;
  290. case 2:
  291. fill_function = fill_16_neon;
  292. break;
  293. case 4:
  294. fill_function = fill_32_neon;
  295. break;
  296. }
  297. }
  298. #endif
  299. #if SDL_ARM_SIMD_BLITTERS
  300. if (SDL_HasARMSIMD() && dst->format->BytesPerPixel != 3 && fill_function == NULL) {
  301. switch (dst->format->BytesPerPixel) {
  302. case 1:
  303. fill_function = fill_8_simd;
  304. break;
  305. case 2:
  306. fill_function = fill_16_simd;
  307. break;
  308. case 4:
  309. fill_function = fill_32_simd;
  310. break;
  311. }
  312. }
  313. #endif
  314. if (fill_function == NULL) {
  315. switch (dst->format->BytesPerPixel) {
  316. case 1:
  317. {
  318. color |= (color << 8);
  319. color |= (color << 16);
  320. #ifdef __SSE__
  321. if (SDL_HasSSE()) {
  322. fill_function = SDL_FillRect1SSE;
  323. break;
  324. }
  325. #endif
  326. fill_function = SDL_FillRect1;
  327. break;
  328. }
  329. case 2:
  330. {
  331. color |= (color << 16);
  332. #ifdef __SSE__
  333. if (SDL_HasSSE()) {
  334. fill_function = SDL_FillRect2SSE;
  335. break;
  336. }
  337. #endif
  338. fill_function = SDL_FillRect2;
  339. break;
  340. }
  341. case 3:
  342. /* 24-bit RGB is a slow path, at least for now. */
  343. {
  344. fill_function = SDL_FillRect3;
  345. break;
  346. }
  347. case 4:
  348. {
  349. #ifdef __SSE__
  350. if (SDL_HasSSE()) {
  351. fill_function = SDL_FillRect4SSE;
  352. break;
  353. }
  354. #endif
  355. fill_function = SDL_FillRect4;
  356. break;
  357. }
  358. default:
  359. return SDL_SetError("Unsupported pixel format");
  360. }
  361. }
  362. for (i = 0; i < count; ++i) {
  363. rect = &rects[i];
  364. /* Perform clipping */
  365. if (!SDL_IntersectRect(rect, &dst->clip_rect, &clipped)) {
  366. continue;
  367. }
  368. rect = &clipped;
  369. pixels = (Uint8 *) dst->pixels + rect->y * dst->pitch +
  370. rect->x * dst->format->BytesPerPixel;
  371. fill_function(pixels, dst->pitch, color, rect->w, rect->h);
  372. }
  373. /* We're done! */
  374. return 0;
  375. }
  376. /* vi: set ts=4 sw=4 expandtab: */