SDL_cpuinfo.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205
  1. /*
  2. Simple DirectMedia Layer
  3. Copyright (C) 1997-2022 Sam Lantinga <slouken@libsdl.org>
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any damages
  6. arising from the use of this software.
  7. Permission is granted to anyone to use this software for any purpose,
  8. including commercial applications, and to alter it and redistribute it
  9. freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you must not
  11. claim that you wrote the original software. If you use this software
  12. in a product, an acknowledgment in the product documentation would be
  13. appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and must not be
  15. misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source distribution.
  17. */
  18. #ifdef TEST_MAIN
  19. #include "SDL_config.h"
  20. #else
  21. #include "../SDL_internal.h"
  22. #endif
  23. #if defined(__WIN32__) || defined(__WINRT__) || defined(__GDK__)
  24. #include "../core/windows/SDL_windows.h"
  25. #endif
  26. /* CPU feature detection for SDL */
  27. #include "SDL_cpuinfo.h"
  28. #include "SDL_assert.h"
  29. #ifdef HAVE_SYSCONF
  30. #include <unistd.h>
  31. #endif
  32. #ifdef HAVE_SYSCTLBYNAME
  33. #include <sys/types.h>
  34. #include <sys/sysctl.h>
  35. #endif
  36. #if defined(__MACOS__) && (defined(__ppc__) || defined(__ppc64__))
  37. #include <sys/sysctl.h> /* For AltiVec check */
  38. #elif defined(__OpenBSD__) && defined(__powerpc__)
  39. #include <sys/types.h>
  40. #include <sys/sysctl.h> /* For AltiVec check */
  41. #include <machine/cpu.h>
  42. #elif defined(__FreeBSD__) && defined(__powerpc__)
  43. #include <machine/cpu.h>
  44. #include <sys/auxv.h>
  45. #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
  46. #include <signal.h>
  47. #include <setjmp.h>
  48. #endif
  49. #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__arm__)
  50. #include <unistd.h>
  51. #include <sys/types.h>
  52. #include <sys/stat.h>
  53. #include <fcntl.h>
  54. #include <elf.h>
  55. /*#include <asm/hwcap.h>*/
  56. #ifndef AT_HWCAP
  57. #define AT_HWCAP 16
  58. #endif
  59. #ifndef AT_PLATFORM
  60. #define AT_PLATFORM 15
  61. #endif
  62. #ifndef HWCAP_NEON
  63. #define HWCAP_NEON (1 << 12)
  64. #endif
  65. #endif
  66. #if defined(__ANDROID__) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
  67. #include <cpu-features.h>
  68. #endif
  69. #if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
  70. #include <sys/auxv.h>
  71. #endif
  72. #ifdef __RISCOS__
  73. #include <kernel.h>
  74. #include <swis.h>
  75. #endif
  76. #ifdef __PS2__
  77. #include <kernel.h>
  78. #endif
  79. #define CPU_HAS_RDTSC (1 << 0)
  80. #define CPU_HAS_ALTIVEC (1 << 1)
  81. #define CPU_HAS_MMX (1 << 2)
  82. #define CPU_HAS_3DNOW (1 << 3)
  83. #define CPU_HAS_SSE (1 << 4)
  84. #define CPU_HAS_SSE2 (1 << 5)
  85. #define CPU_HAS_SSE3 (1 << 6)
  86. #define CPU_HAS_SSE41 (1 << 7)
  87. #define CPU_HAS_SSE42 (1 << 8)
  88. #define CPU_HAS_AVX (1 << 9)
  89. #define CPU_HAS_AVX2 (1 << 10)
  90. #define CPU_HAS_NEON (1 << 11)
  91. #define CPU_HAS_AVX512F (1 << 12)
  92. #define CPU_HAS_ARM_SIMD (1 << 13)
  93. #define CPU_HAS_LSX (1 << 14)
  94. #define CPU_HAS_LASX (1 << 15)
  95. #define CPU_CFG2 0x2
  96. #define CPU_CFG2_LSX (1 << 6)
  97. #define CPU_CFG2_LASX (1 << 7)
  98. #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOS__ && !__OpenBSD__ && !__FreeBSD__
  99. /* This is the brute force way of detecting instruction sets...
  100. the idea is borrowed from the libmpeg2 library - thanks!
  101. */
  102. static jmp_buf jmpbuf;
  103. static void
  104. illegal_instruction(int sig)
  105. {
  106. longjmp(jmpbuf, 1);
  107. }
  108. #endif /* HAVE_SETJMP */
  109. static int
  110. CPU_haveCPUID(void)
  111. {
  112. int has_CPUID = 0;
  113. /* *INDENT-OFF* */ /* clang-format off */
  114. #ifndef SDL_CPUINFO_DISABLED
  115. #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
  116. __asm__ (
  117. " pushfl # Get original EFLAGS \n"
  118. " popl %%eax \n"
  119. " movl %%eax,%%ecx \n"
  120. " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
  121. " pushl %%eax # Save new EFLAGS value on stack \n"
  122. " popfl # Replace current EFLAGS value \n"
  123. " pushfl # Get new EFLAGS \n"
  124. " popl %%eax # Store new EFLAGS in EAX \n"
  125. " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
  126. " jz 1f # Processor=80486 \n"
  127. " movl $1,%0 # We have CPUID support \n"
  128. "1: \n"
  129. : "=m" (has_CPUID)
  130. :
  131. : "%eax", "%ecx"
  132. );
  133. #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
  134. /* Technically, if this is being compiled under __x86_64__ then it has
  135. CPUid by definition. But it's nice to be able to prove it. :) */
  136. __asm__ (
  137. " pushfq # Get original EFLAGS \n"
  138. " popq %%rax \n"
  139. " movq %%rax,%%rcx \n"
  140. " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
  141. " pushq %%rax # Save new EFLAGS value on stack \n"
  142. " popfq # Replace current EFLAGS value \n"
  143. " pushfq # Get new EFLAGS \n"
  144. " popq %%rax # Store new EFLAGS in EAX \n"
  145. " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
  146. " jz 1f # Processor=80486 \n"
  147. " movl $1,%0 # We have CPUID support \n"
  148. "1: \n"
  149. : "=m" (has_CPUID)
  150. :
  151. : "%rax", "%rcx"
  152. );
  153. #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
  154. __asm {
  155. pushfd ; Get original EFLAGS
  156. pop eax
  157. mov ecx, eax
  158. xor eax, 200000h ; Flip ID bit in EFLAGS
  159. push eax ; Save new EFLAGS value on stack
  160. popfd ; Replace current EFLAGS value
  161. pushfd ; Get new EFLAGS
  162. pop eax ; Store new EFLAGS in EAX
  163. xor eax, ecx ; Can not toggle ID bit,
  164. jz done ; Processor=80486
  165. mov has_CPUID,1 ; We have CPUID support
  166. done:
  167. }
  168. #elif defined(_MSC_VER) && defined(_M_X64)
  169. has_CPUID = 1;
  170. #elif defined(__sun) && defined(__i386)
  171. __asm (
  172. " pushfl \n"
  173. " popl %eax \n"
  174. " movl %eax,%ecx \n"
  175. " xorl $0x200000,%eax \n"
  176. " pushl %eax \n"
  177. " popfl \n"
  178. " pushfl \n"
  179. " popl %eax \n"
  180. " xorl %ecx,%eax \n"
  181. " jz 1f \n"
  182. " movl $1,-8(%ebp) \n"
  183. "1: \n"
  184. );
  185. #elif defined(__sun) && defined(__amd64)
  186. __asm (
  187. " pushfq \n"
  188. " popq %rax \n"
  189. " movq %rax,%rcx \n"
  190. " xorl $0x200000,%eax \n"
  191. " pushq %rax \n"
  192. " popfq \n"
  193. " pushfq \n"
  194. " popq %rax \n"
  195. " xorl %ecx,%eax \n"
  196. " jz 1f \n"
  197. " movl $1,-8(%rbp) \n"
  198. "1: \n"
  199. );
  200. #endif
  201. #endif
  202. /* *INDENT-ON* */ /* clang-format on */
  203. return has_CPUID;
  204. }
  205. #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
  206. #define cpuid(func, a, b, c, d) \
  207. __asm__ __volatile__ ( \
  208. " pushl %%ebx \n" \
  209. " xorl %%ecx,%%ecx \n" \
  210. " cpuid \n" \
  211. " movl %%ebx, %%esi \n" \
  212. " popl %%ebx \n" : \
  213. "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
  214. #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
  215. #define cpuid(func, a, b, c, d) \
  216. __asm__ __volatile__ ( \
  217. " pushq %%rbx \n" \
  218. " xorq %%rcx,%%rcx \n" \
  219. " cpuid \n" \
  220. " movq %%rbx, %%rsi \n" \
  221. " popq %%rbx \n" : \
  222. "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
  223. #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
  224. #define cpuid(func, a, b, c, d) \
  225. __asm { \
  226. __asm mov eax, func \
  227. __asm xor ecx, ecx \
  228. __asm cpuid \
  229. __asm mov a, eax \
  230. __asm mov b, ebx \
  231. __asm mov c, ecx \
  232. __asm mov d, edx \
  233. }
  234. #elif defined(_MSC_VER) && defined(_M_X64)
  235. #define cpuid(func, a, b, c, d) \
  236. { \
  237. int CPUInfo[4]; \
  238. __cpuid(CPUInfo, func); \
  239. a = CPUInfo[0]; \
  240. b = CPUInfo[1]; \
  241. c = CPUInfo[2]; \
  242. d = CPUInfo[3]; \
  243. }
  244. #else
  245. #define cpuid(func, a, b, c, d) \
  246. do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
  247. #endif
  248. static int CPU_CPUIDFeatures[4];
  249. static int CPU_CPUIDMaxFunction = 0;
  250. static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
  251. static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
  252. static void
  253. CPU_calcCPUIDFeatures(void)
  254. {
  255. static SDL_bool checked = SDL_FALSE;
  256. if (!checked) {
  257. checked = SDL_TRUE;
  258. if (CPU_haveCPUID()) {
  259. int a, b, c, d;
  260. cpuid(0, a, b, c, d);
  261. CPU_CPUIDMaxFunction = a;
  262. if (CPU_CPUIDMaxFunction >= 1) {
  263. cpuid(1, a, b, c, d);
  264. CPU_CPUIDFeatures[0] = a;
  265. CPU_CPUIDFeatures[1] = b;
  266. CPU_CPUIDFeatures[2] = c;
  267. CPU_CPUIDFeatures[3] = d;
  268. /* Check to make sure we can call xgetbv */
  269. if (c & 0x08000000) {
  270. /* Call xgetbv to see if YMM (etc) register state is saved */
  271. #if (defined(__GNUC__) || defined(__llvm__)) && (defined(__i386__) || defined(__x86_64__))
  272. __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
  273. #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
  274. a = (int)_xgetbv(0);
  275. #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
  276. __asm
  277. {
  278. xor ecx, ecx
  279. _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
  280. mov a, eax
  281. }
  282. #endif
  283. CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
  284. CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
  285. }
  286. }
  287. }
  288. }
  289. }
  290. static int
  291. CPU_haveAltiVec(void)
  292. {
  293. volatile int altivec = 0;
  294. #ifndef SDL_CPUINFO_DISABLED
  295. #if (defined(__MACOS__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
  296. #ifdef __OpenBSD__
  297. int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
  298. #else
  299. int selectors[2] = { CTL_HW, HW_VECTORUNIT };
  300. #endif
  301. int hasVectorUnit = 0;
  302. size_t length = sizeof(hasVectorUnit);
  303. int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
  304. if (0 == error)
  305. altivec = (hasVectorUnit != 0);
  306. #elif defined(__FreeBSD__) && defined(__powerpc__)
  307. unsigned long cpufeatures = 0;
  308. elf_aux_info(AT_HWCAP, &cpufeatures, sizeof(cpufeatures));
  309. altivec = cpufeatures & PPC_FEATURE_HAS_ALTIVEC;
  310. return altivec;
  311. #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
  312. void (*handler) (int sig);
  313. handler = signal(SIGILL, illegal_instruction);
  314. if (setjmp(jmpbuf) == 0) {
  315. asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
  316. altivec = 1;
  317. }
  318. signal(SIGILL, handler);
  319. #endif
  320. #endif
  321. return altivec;
  322. }
  323. #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 6)) || defined(__aarch64__)
  324. static int
  325. CPU_haveARMSIMD(void)
  326. {
  327. return 1;
  328. }
  329. #elif !defined(__arm__)
  330. static int
  331. CPU_haveARMSIMD(void)
  332. {
  333. return 0;
  334. }
  335. #elif defined(__LINUX__)
  336. static int
  337. CPU_haveARMSIMD(void)
  338. {
  339. int arm_simd = 0;
  340. int fd;
  341. fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
  342. if (fd >= 0)
  343. {
  344. Elf32_auxv_t aux;
  345. while (read(fd, &aux, sizeof aux) == sizeof aux)
  346. {
  347. if (aux.a_type == AT_PLATFORM)
  348. {
  349. const char *plat = (const char *) aux.a_un.a_val;
  350. if (plat) {
  351. arm_simd = SDL_strncmp(plat, "v6l", 3) == 0 ||
  352. SDL_strncmp(plat, "v7l", 3) == 0;
  353. }
  354. }
  355. }
  356. close(fd);
  357. }
  358. return arm_simd;
  359. }
  360. #elif defined(__RISCOS__)
  361. static int
  362. CPU_haveARMSIMD(void)
  363. {
  364. _kernel_swi_regs regs;
  365. regs.r[0] = 0;
  366. if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
  367. return 0;
  368. if (!(regs.r[0] & (1<<31)))
  369. return 0;
  370. regs.r[0] = 34;
  371. regs.r[1] = 29;
  372. if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
  373. return 0;
  374. return regs.r[0];
  375. }
  376. #else
  377. static int
  378. CPU_haveARMSIMD(void)
  379. {
  380. #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
  381. return 0;
  382. }
  383. #endif
  384. #if defined(__LINUX__) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
  385. static int
  386. readProcAuxvForNeon(void)
  387. {
  388. int neon = 0;
  389. int fd;
  390. fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
  391. if (fd >= 0)
  392. {
  393. Elf32_auxv_t aux;
  394. while (read(fd, &aux, sizeof (aux)) == sizeof (aux)) {
  395. if (aux.a_type == AT_HWCAP) {
  396. neon = (aux.a_un.a_val & HWCAP_NEON) == HWCAP_NEON;
  397. break;
  398. }
  399. }
  400. close(fd);
  401. }
  402. return neon;
  403. }
  404. #endif
  405. static int
  406. CPU_haveNEON(void)
  407. {
  408. /* The way you detect NEON is a privileged instruction on ARM, so you have
  409. query the OS kernel in a platform-specific way. :/ */
  410. #if defined(SDL_CPUINFO_DISABLED)
  411. return 0; /* disabled */
  412. #elif (defined(__WINDOWS__) || defined(__WINRT__) || defined(__GDK__)) && (defined(_M_ARM) || defined(_M_ARM64))
  413. /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
  414. /* Seems to have been removed */
  415. # if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
  416. # define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
  417. # endif
  418. /* All WinRT ARM devices are required to support NEON, but just in case. */
  419. return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
  420. #elif (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || defined(__aarch64__)
  421. return 1; /* ARMv8 always has non-optional NEON support. */
  422. #elif __VITA__
  423. return 1;
  424. #elif __3DS__
  425. return 0;
  426. #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
  427. /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
  428. return 1; /* all Apple ARMv7 chips and later have NEON. */
  429. #elif defined(__APPLE__)
  430. return 0; /* assume anything else from Apple doesn't have NEON. */
  431. #elif !defined(__arm__)
  432. return 0; /* not an ARM CPU at all. */
  433. #elif defined(__OpenBSD__)
  434. return 1; /* OpenBSD only supports ARMv7 CPUs that have NEON. */
  435. #elif defined(HAVE_ELF_AUX_INFO)
  436. unsigned long hasneon = 0;
  437. if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0)
  438. return 0;
  439. return ((hasneon & HWCAP_NEON) == HWCAP_NEON);
  440. #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
  441. return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
  442. #elif defined(__LINUX__)
  443. return readProcAuxvForNeon();
  444. #elif defined(__ANDROID__)
  445. /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
  446. {
  447. AndroidCpuFamily cpu_family = android_getCpuFamily();
  448. if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
  449. uint64_t cpu_features = android_getCpuFeatures();
  450. if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
  451. return 1;
  452. }
  453. }
  454. return 0;
  455. }
  456. #elif defined(__RISCOS__)
  457. /* Use the VFPSupport_Features SWI to access the MVFR registers */
  458. {
  459. _kernel_swi_regs regs;
  460. regs.r[0] = 0;
  461. if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
  462. if ((regs.r[2] & 0xFFF000) == 0x111000) {
  463. return 1;
  464. }
  465. }
  466. return 0;
  467. }
  468. #else
  469. #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
  470. return 0;
  471. #endif
  472. }
  473. static int
  474. CPU_readCPUCFG(void)
  475. {
  476. uint32_t cfg2 = 0;
  477. #if defined __loongarch__
  478. __asm__ volatile(
  479. "cpucfg %0, %1 \n\t"
  480. : "+&r"(cfg2)
  481. : "r"(CPU_CFG2)
  482. );
  483. #endif
  484. return cfg2;
  485. }
  486. #define CPU_haveLSX() (CPU_readCPUCFG() & CPU_CFG2_LSX)
  487. #define CPU_haveLASX() (CPU_readCPUCFG() & CPU_CFG2_LASX)
  488. #if defined(__e2k__)
  489. inline int
  490. CPU_have3DNow(void)
  491. {
  492. #if defined(__3dNOW__)
  493. return 1;
  494. #else
  495. return 0;
  496. #endif
  497. }
  498. #else
  499. static int
  500. CPU_have3DNow(void)
  501. {
  502. if (CPU_CPUIDMaxFunction > 0) { /* that is, do we have CPUID at all? */
  503. int a, b, c, d;
  504. cpuid(0x80000000, a, b, c, d);
  505. if (a >= 0x80000001) {
  506. cpuid(0x80000001, a, b, c, d);
  507. return (d & 0x80000000);
  508. }
  509. }
  510. return 0;
  511. }
  512. #endif
  513. #if defined(__e2k__)
  514. #define CPU_haveRDTSC() (0)
  515. #if defined(__MMX__)
  516. #define CPU_haveMMX() (1)
  517. #else
  518. #define CPU_haveMMX() (0)
  519. #endif
  520. #if defined(__SSE__)
  521. #define CPU_haveSSE() (1)
  522. #else
  523. #define CPU_haveSSE() (0)
  524. #endif
  525. #if defined(__SSE2__)
  526. #define CPU_haveSSE2() (1)
  527. #else
  528. #define CPU_haveSSE2() (0)
  529. #endif
  530. #if defined(__SSE3__)
  531. #define CPU_haveSSE3() (1)
  532. #else
  533. #define CPU_haveSSE3() (0)
  534. #endif
  535. #if defined(__SSE4_1__)
  536. #define CPU_haveSSE41() (1)
  537. #else
  538. #define CPU_haveSSE41() (0)
  539. #endif
  540. #if defined(__SSE4_2__)
  541. #define CPU_haveSSE42() (1)
  542. #else
  543. #define CPU_haveSSE42() (0)
  544. #endif
  545. #if defined(__AVX__)
  546. #define CPU_haveAVX() (1)
  547. #else
  548. #define CPU_haveAVX() (0)
  549. #endif
  550. #else
  551. #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
  552. #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
  553. #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
  554. #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
  555. #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
  556. #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
  557. #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
  558. #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
  559. #endif
  560. #if defined(__e2k__)
  561. inline int
  562. CPU_haveAVX2(void)
  563. {
  564. #if defined(__AVX2__)
  565. return 1;
  566. #else
  567. return 0;
  568. #endif
  569. }
  570. #else
  571. static int
  572. CPU_haveAVX2(void)
  573. {
  574. if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
  575. int a, b, c, d;
  576. (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
  577. cpuid(7, a, b, c, d);
  578. return (b & 0x00000020);
  579. }
  580. return 0;
  581. }
  582. #endif
  583. #if defined(__e2k__)
  584. inline int
  585. CPU_haveAVX512F(void)
  586. {
  587. return 0;
  588. }
  589. #else
  590. static int
  591. CPU_haveAVX512F(void)
  592. {
  593. if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
  594. int a, b, c, d;
  595. (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
  596. cpuid(7, a, b, c, d);
  597. return (b & 0x00010000);
  598. }
  599. return 0;
  600. }
  601. #endif
  602. static int SDL_CPUCount = 0;
  603. int
  604. SDL_GetCPUCount(void)
  605. {
  606. if (!SDL_CPUCount) {
  607. #ifndef SDL_CPUINFO_DISABLED
  608. #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
  609. if (SDL_CPUCount <= 0) {
  610. SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
  611. }
  612. #endif
  613. #ifdef HAVE_SYSCTLBYNAME
  614. if (SDL_CPUCount <= 0) {
  615. size_t size = sizeof(SDL_CPUCount);
  616. sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
  617. }
  618. #endif
  619. #if defined(__WIN32__) || defined(__GDK__)
  620. if (SDL_CPUCount <= 0) {
  621. SYSTEM_INFO info;
  622. GetSystemInfo(&info);
  623. SDL_CPUCount = info.dwNumberOfProcessors;
  624. }
  625. #endif
  626. #endif
  627. /* There has to be at least 1, right? :) */
  628. if (SDL_CPUCount <= 0) {
  629. SDL_CPUCount = 1;
  630. }
  631. }
  632. return SDL_CPUCount;
  633. }
  634. #if defined(__e2k__)
  635. inline const char *
  636. SDL_GetCPUType(void)
  637. {
  638. static char SDL_CPUType[13];
  639. SDL_strlcpy(SDL_CPUType, "E2K MACHINE", sizeof(SDL_CPUType));
  640. return SDL_CPUType;
  641. }
  642. #else
  643. /* Oh, such a sweet sweet trick, just not very useful. :) */
  644. static const char *
  645. SDL_GetCPUType(void)
  646. {
  647. static char SDL_CPUType[13];
  648. if (!SDL_CPUType[0]) {
  649. int i = 0;
  650. CPU_calcCPUIDFeatures();
  651. if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
  652. int a, b, c, d;
  653. cpuid(0x00000000, a, b, c, d);
  654. (void) a;
  655. SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
  656. SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
  657. SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
  658. SDL_CPUType[i++] = (char)(b & 0xff);
  659. SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
  660. SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
  661. SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
  662. SDL_CPUType[i++] = (char)(d & 0xff);
  663. SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
  664. SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
  665. SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
  666. SDL_CPUType[i++] = (char)(c & 0xff);
  667. }
  668. if (!SDL_CPUType[0]) {
  669. SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
  670. }
  671. }
  672. return SDL_CPUType;
  673. }
  674. #endif
  675. #ifdef TEST_MAIN /* !!! FIXME: only used for test at the moment. */
  676. #if defined(__e2k__)
  677. inline const char *
  678. SDL_GetCPUName(void)
  679. {
  680. static char SDL_CPUName[48];
  681. SDL_strlcpy(SDL_CPUName, __builtin_cpu_name(), sizeof(SDL_CPUName));
  682. return SDL_CPUName;
  683. }
  684. #else
  685. static const char *
  686. SDL_GetCPUName(void)
  687. {
  688. static char SDL_CPUName[48];
  689. if (!SDL_CPUName[0]) {
  690. int i = 0;
  691. int a, b, c, d;
  692. CPU_calcCPUIDFeatures();
  693. if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
  694. cpuid(0x80000000, a, b, c, d);
  695. if (a >= 0x80000004) {
  696. cpuid(0x80000002, a, b, c, d);
  697. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  698. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  699. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  700. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  701. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  702. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  703. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  704. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  705. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  706. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  707. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  708. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  709. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  710. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  711. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  712. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  713. cpuid(0x80000003, a, b, c, d);
  714. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  715. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  716. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  717. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  718. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  719. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  720. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  721. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  722. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  723. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  724. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  725. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  726. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  727. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  728. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  729. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  730. cpuid(0x80000004, a, b, c, d);
  731. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  732. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  733. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  734. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  735. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  736. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  737. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  738. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  739. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  740. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  741. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  742. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  743. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  744. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  745. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  746. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  747. }
  748. }
  749. if (!SDL_CPUName[0]) {
  750. SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
  751. }
  752. }
  753. return SDL_CPUName;
  754. }
  755. #endif
  756. #endif
  757. int
  758. SDL_GetCPUCacheLineSize(void)
  759. {
  760. const char *cpuType = SDL_GetCPUType();
  761. int a, b, c, d;
  762. (void) a; (void) b; (void) c; (void) d;
  763. if (SDL_strcmp(cpuType, "GenuineIntel") == 0 || SDL_strcmp(cpuType, "CentaurHauls") == 0 || SDL_strcmp(cpuType, " Shanghai ") == 0) {
  764. cpuid(0x00000001, a, b, c, d);
  765. return (((b >> 8) & 0xff) * 8);
  766. } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
  767. cpuid(0x80000005, a, b, c, d);
  768. return (c & 0xff);
  769. } else {
  770. /* Just make a guess here... */
  771. return SDL_CACHELINE_SIZE;
  772. }
  773. }
  774. static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
  775. static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
  776. static Uint32
  777. SDL_GetCPUFeatures(void)
  778. {
  779. if (SDL_CPUFeatures == 0xFFFFFFFF) {
  780. CPU_calcCPUIDFeatures();
  781. SDL_CPUFeatures = 0;
  782. SDL_SIMDAlignment = sizeof(void *); /* a good safe base value */
  783. if (CPU_haveRDTSC()) {
  784. SDL_CPUFeatures |= CPU_HAS_RDTSC;
  785. }
  786. if (CPU_haveAltiVec()) {
  787. SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
  788. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  789. }
  790. if (CPU_haveMMX()) {
  791. SDL_CPUFeatures |= CPU_HAS_MMX;
  792. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
  793. }
  794. if (CPU_have3DNow()) {
  795. SDL_CPUFeatures |= CPU_HAS_3DNOW;
  796. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
  797. }
  798. if (CPU_haveSSE()) {
  799. SDL_CPUFeatures |= CPU_HAS_SSE;
  800. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  801. }
  802. if (CPU_haveSSE2()) {
  803. SDL_CPUFeatures |= CPU_HAS_SSE2;
  804. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  805. }
  806. if (CPU_haveSSE3()) {
  807. SDL_CPUFeatures |= CPU_HAS_SSE3;
  808. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  809. }
  810. if (CPU_haveSSE41()) {
  811. SDL_CPUFeatures |= CPU_HAS_SSE41;
  812. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  813. }
  814. if (CPU_haveSSE42()) {
  815. SDL_CPUFeatures |= CPU_HAS_SSE42;
  816. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  817. }
  818. if (CPU_haveAVX()) {
  819. SDL_CPUFeatures |= CPU_HAS_AVX;
  820. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
  821. }
  822. if (CPU_haveAVX2()) {
  823. SDL_CPUFeatures |= CPU_HAS_AVX2;
  824. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
  825. }
  826. if (CPU_haveAVX512F()) {
  827. SDL_CPUFeatures |= CPU_HAS_AVX512F;
  828. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
  829. }
  830. if (CPU_haveARMSIMD()) {
  831. SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
  832. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  833. }
  834. if (CPU_haveNEON()) {
  835. SDL_CPUFeatures |= CPU_HAS_NEON;
  836. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  837. }
  838. if (CPU_haveLSX()) {
  839. SDL_CPUFeatures |= CPU_HAS_LSX;
  840. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  841. }
  842. if (CPU_haveLASX()) {
  843. SDL_CPUFeatures |= CPU_HAS_LASX;
  844. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
  845. }
  846. }
  847. return SDL_CPUFeatures;
  848. }
  849. #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
  850. SDL_bool SDL_HasRDTSC(void)
  851. {
  852. return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
  853. }
  854. SDL_bool
  855. SDL_HasAltiVec(void)
  856. {
  857. return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
  858. }
  859. SDL_bool
  860. SDL_HasMMX(void)
  861. {
  862. return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
  863. }
  864. SDL_bool
  865. SDL_Has3DNow(void)
  866. {
  867. return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
  868. }
  869. SDL_bool
  870. SDL_HasSSE(void)
  871. {
  872. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
  873. }
  874. SDL_bool
  875. SDL_HasSSE2(void)
  876. {
  877. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
  878. }
  879. SDL_bool
  880. SDL_HasSSE3(void)
  881. {
  882. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
  883. }
  884. SDL_bool
  885. SDL_HasSSE41(void)
  886. {
  887. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
  888. }
  889. SDL_bool
  890. SDL_HasSSE42(void)
  891. {
  892. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
  893. }
  894. SDL_bool
  895. SDL_HasAVX(void)
  896. {
  897. return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
  898. }
  899. SDL_bool
  900. SDL_HasAVX2(void)
  901. {
  902. return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
  903. }
  904. SDL_bool
  905. SDL_HasAVX512F(void)
  906. {
  907. return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
  908. }
  909. SDL_bool
  910. SDL_HasARMSIMD(void)
  911. {
  912. return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
  913. }
  914. SDL_bool
  915. SDL_HasNEON(void)
  916. {
  917. return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
  918. }
  919. SDL_bool
  920. SDL_HasLSX(void)
  921. {
  922. return CPU_FEATURE_AVAILABLE(CPU_HAS_LSX);
  923. }
  924. SDL_bool
  925. SDL_HasLASX(void)
  926. {
  927. return CPU_FEATURE_AVAILABLE(CPU_HAS_LASX);
  928. }
  929. static int SDL_SystemRAM = 0;
  930. int
  931. SDL_GetSystemRAM(void)
  932. {
  933. if (!SDL_SystemRAM) {
  934. #ifndef SDL_CPUINFO_DISABLED
  935. #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
  936. if (SDL_SystemRAM <= 0) {
  937. SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
  938. }
  939. #endif
  940. #ifdef HAVE_SYSCTLBYNAME
  941. if (SDL_SystemRAM <= 0) {
  942. #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__DragonFly__)
  943. #ifdef HW_REALMEM
  944. int mib[2] = {CTL_HW, HW_REALMEM};
  945. #else
  946. /* might only report up to 2 GiB */
  947. int mib[2] = {CTL_HW, HW_PHYSMEM};
  948. #endif /* HW_REALMEM */
  949. #else
  950. int mib[2] = {CTL_HW, HW_MEMSIZE};
  951. #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
  952. Uint64 memsize = 0;
  953. size_t len = sizeof(memsize);
  954. if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
  955. SDL_SystemRAM = (int)(memsize / (1024*1024));
  956. }
  957. }
  958. #endif
  959. #if defined(__WIN32__) || defined(__GDK__)
  960. if (SDL_SystemRAM <= 0) {
  961. MEMORYSTATUSEX stat;
  962. stat.dwLength = sizeof(stat);
  963. if (GlobalMemoryStatusEx(&stat)) {
  964. SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
  965. }
  966. }
  967. #endif
  968. #ifdef __RISCOS__
  969. if (SDL_SystemRAM <= 0) {
  970. _kernel_swi_regs regs;
  971. regs.r[0] = 0x108;
  972. if (_kernel_swi(OS_Memory, &regs, &regs) == NULL) {
  973. SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
  974. }
  975. }
  976. #endif
  977. #ifdef __VITA__
  978. if (SDL_SystemRAM <= 0) {
  979. /* Vita has 512MiB on SoC, that's split into 256MiB(+109MiB in extended memory mode) for app
  980. +26MiB of physically continuous memory, +112MiB of CDRAM(VRAM) + system reserved memory. */
  981. SDL_SystemRAM = 536870912;
  982. }
  983. #endif
  984. #ifdef __PS2__
  985. if (SDL_SystemRAM <= 0) {
  986. /* PlayStation 2 has 32MiB however there are some special models with 64 and 128 */
  987. SDL_SystemRAM = GetMemorySize();
  988. }
  989. #endif
  990. #endif
  991. }
  992. return SDL_SystemRAM;
  993. }
  994. size_t
  995. SDL_SIMDGetAlignment(void)
  996. {
  997. if (SDL_SIMDAlignment == 0xFFFFFFFF) {
  998. SDL_GetCPUFeatures(); /* make sure this has been calculated */
  999. }
  1000. SDL_assert(SDL_SIMDAlignment != 0);
  1001. return SDL_SIMDAlignment;
  1002. }
  1003. void *
  1004. SDL_SIMDAlloc(const size_t len)
  1005. {
  1006. const size_t alignment = SDL_SIMDGetAlignment();
  1007. const size_t padding = (alignment - (len % alignment)) % alignment;
  1008. Uint8 *retval = NULL;
  1009. Uint8 *ptr;
  1010. size_t to_allocate;
  1011. /* alignment + padding + sizeof (void *) is bounded (a few hundred
  1012. * bytes max), so no need to check for overflow within that argument */
  1013. if (SDL_size_add_overflow(len, alignment + padding + sizeof (void *), &to_allocate)) {
  1014. return NULL;
  1015. }
  1016. ptr = (Uint8 *) SDL_malloc(to_allocate);
  1017. if (ptr) {
  1018. /* store the actual allocated pointer right before our aligned pointer. */
  1019. retval = ptr + sizeof (void *);
  1020. retval += alignment - (((size_t) retval) % alignment);
  1021. *(((void **) retval) - 1) = ptr;
  1022. }
  1023. return retval;
  1024. }
  1025. void *
  1026. SDL_SIMDRealloc(void *mem, const size_t len)
  1027. {
  1028. const size_t alignment = SDL_SIMDGetAlignment();
  1029. const size_t padding = (alignment - (len % alignment)) % alignment;
  1030. Uint8 *retval = (Uint8*) mem;
  1031. void *oldmem = mem;
  1032. size_t memdiff = 0, ptrdiff;
  1033. Uint8 *ptr;
  1034. size_t to_allocate;
  1035. /* alignment + padding + sizeof (void *) is bounded (a few hundred
  1036. * bytes max), so no need to check for overflow within that argument */
  1037. if (SDL_size_add_overflow(len, alignment + padding + sizeof (void *), &to_allocate)) {
  1038. return NULL;
  1039. }
  1040. if (mem) {
  1041. mem = *(((void **) mem) - 1);
  1042. /* Check the delta between the real pointer and user pointer */
  1043. memdiff = ((size_t) oldmem) - ((size_t) mem);
  1044. }
  1045. ptr = (Uint8 *) SDL_realloc(mem, to_allocate);
  1046. if (ptr == NULL) {
  1047. return NULL; /* Out of memory, bail! */
  1048. }
  1049. /* Store the actual allocated pointer right before our aligned pointer. */
  1050. retval = ptr + sizeof (void *);
  1051. retval += alignment - (((size_t) retval) % alignment);
  1052. /* Make sure the delta is the same! */
  1053. if (mem) {
  1054. ptrdiff = ((size_t) retval) - ((size_t) ptr);
  1055. if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
  1056. oldmem = (void*) (((uintptr_t) ptr) + memdiff);
  1057. /* Even though the data past the old `len` is undefined, this is the
  1058. * only length value we have, and it guarantees that we copy all the
  1059. * previous memory anyhow.
  1060. */
  1061. SDL_memmove(retval, oldmem, len);
  1062. }
  1063. }
  1064. /* Actually store the allocated pointer, finally. */
  1065. *(((void **) retval) - 1) = ptr;
  1066. return retval;
  1067. }
  1068. void
  1069. SDL_SIMDFree(void *ptr)
  1070. {
  1071. if (ptr) {
  1072. SDL_free(*(((void **) ptr) - 1));
  1073. }
  1074. }
  1075. #ifdef TEST_MAIN
  1076. #include <stdio.h>
  1077. int
  1078. main()
  1079. {
  1080. printf("CPU count: %d\n", SDL_GetCPUCount());
  1081. printf("CPU type: %s\n", SDL_GetCPUType());
  1082. printf("CPU name: %s\n", SDL_GetCPUName());
  1083. printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
  1084. printf("RDTSC: %d\n", SDL_HasRDTSC());
  1085. printf("Altivec: %d\n", SDL_HasAltiVec());
  1086. printf("MMX: %d\n", SDL_HasMMX());
  1087. printf("3DNow: %d\n", SDL_Has3DNow());
  1088. printf("SSE: %d\n", SDL_HasSSE());
  1089. printf("SSE2: %d\n", SDL_HasSSE2());
  1090. printf("SSE3: %d\n", SDL_HasSSE3());
  1091. printf("SSE4.1: %d\n", SDL_HasSSE41());
  1092. printf("SSE4.2: %d\n", SDL_HasSSE42());
  1093. printf("AVX: %d\n", SDL_HasAVX());
  1094. printf("AVX2: %d\n", SDL_HasAVX2());
  1095. printf("AVX-512F: %d\n", SDL_HasAVX512F());
  1096. printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
  1097. printf("NEON: %d\n", SDL_HasNEON());
  1098. printf("LSX: %d\n", SDL_HasLSX());
  1099. printf("LASX: %d\n", SDL_HasLASX());
  1100. printf("RAM: %d MB\n", SDL_GetSystemRAM());
  1101. return 0;
  1102. }
  1103. #endif /* TEST_MAIN */
  1104. /* vi: set ts=4 sw=4 expandtab: */