1
0

SDL_cpuinfo.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110
  1. /*
  2. Simple DirectMedia Layer
  3. Copyright (C) 1997-2023 Sam Lantinga <slouken@libsdl.org>
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any damages
  6. arising from the use of this software.
  7. Permission is granted to anyone to use this software for any purpose,
  8. including commercial applications, and to alter it and redistribute it
  9. freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you must not
  11. claim that you wrote the original software. If you use this software
  12. in a product, an acknowledgment in the product documentation would be
  13. appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and must not be
  15. misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source distribution.
  17. */
  18. #include "SDL_internal.h"
  19. #if defined(__WIN32__) || defined(__WINRT__) || defined(__GDK__)
  20. #include "../core/windows/SDL_windows.h"
  21. #endif
  22. /* CPU feature detection for SDL */
  23. #ifdef HAVE_SYSCONF
  24. #include <unistd.h>
  25. #endif
  26. #ifdef HAVE_SYSCTLBYNAME
  27. #include <sys/types.h>
  28. #include <sys/sysctl.h>
  29. #endif
  30. #if defined(__MACOS__) && (defined(__ppc__) || defined(__ppc64__))
  31. #include <sys/sysctl.h> /* For AltiVec check */
  32. #elif defined(__OpenBSD__) && defined(__powerpc__)
  33. #include <sys/types.h>
  34. #include <sys/sysctl.h> /* For AltiVec check */
  35. #include <machine/cpu.h>
  36. #elif defined(__FreeBSD__) && defined(__powerpc__)
  37. #include <machine/cpu.h>
  38. #include <sys/auxv.h>
  39. #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
  40. #include <signal.h>
  41. #include <setjmp.h>
  42. #endif
  43. #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__arm__)
  44. #include <unistd.h>
  45. #include <sys/types.h>
  46. #include <sys/stat.h>
  47. #include <fcntl.h>
  48. #include <elf.h>
  49. /*#include <asm/hwcap.h>*/
  50. #ifndef AT_HWCAP
  51. #define AT_HWCAP 16
  52. #endif
  53. #ifndef AT_PLATFORM
  54. #define AT_PLATFORM 15
  55. #endif
  56. #ifndef HWCAP_NEON
  57. #define HWCAP_NEON (1 << 12)
  58. #endif
  59. #endif
  60. #if defined(__ANDROID__) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
  61. #include <cpu-features.h>
  62. #endif
  63. #if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
  64. #include <sys/auxv.h>
  65. #endif
  66. #ifdef __RISCOS__
  67. #include <kernel.h>
  68. #include <swis.h>
  69. #endif
  70. #ifdef __PS2__
  71. #include <kernel.h>
  72. #endif
  73. #ifdef __HAIKU__
  74. #include <kernel/OS.h>
  75. #endif
  76. #define CPU_HAS_RDTSC (1 << 0)
  77. #define CPU_HAS_ALTIVEC (1 << 1)
  78. #define CPU_HAS_MMX (1 << 2)
  79. #define CPU_HAS_SSE (1 << 3)
  80. #define CPU_HAS_SSE2 (1 << 4)
  81. #define CPU_HAS_SSE3 (1 << 5)
  82. #define CPU_HAS_SSE41 (1 << 6)
  83. #define CPU_HAS_SSE42 (1 << 7)
  84. #define CPU_HAS_AVX (1 << 8)
  85. #define CPU_HAS_AVX2 (1 << 9)
  86. #define CPU_HAS_NEON (1 << 10)
  87. #define CPU_HAS_AVX512F (1 << 11)
  88. #define CPU_HAS_ARM_SIMD (1 << 12)
  89. #define CPU_HAS_LSX (1 << 13)
  90. #define CPU_HAS_LASX (1 << 14)
  91. #define CPU_CFG2 0x2
  92. #define CPU_CFG2_LSX (1 << 6)
  93. #define CPU_CFG2_LASX (1 << 7)
  94. #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOS__ && !__OpenBSD__ && !__FreeBSD__
  95. /* This is the brute force way of detecting instruction sets...
  96. the idea is borrowed from the libmpeg2 library - thanks!
  97. */
  98. static jmp_buf jmpbuf;
  99. static void illegal_instruction(int sig)
  100. {
  101. longjmp(jmpbuf, 1);
  102. }
  103. #endif /* HAVE_SETJMP */
  104. static int CPU_haveCPUID(void)
  105. {
  106. int has_CPUID = 0;
  107. /* *INDENT-OFF* */ /* clang-format off */
  108. #ifndef SDL_CPUINFO_DISABLED
  109. #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
  110. __asm__ (
  111. " pushfl # Get original EFLAGS \n"
  112. " popl %%eax \n"
  113. " movl %%eax,%%ecx \n"
  114. " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
  115. " pushl %%eax # Save new EFLAGS value on stack \n"
  116. " popfl # Replace current EFLAGS value \n"
  117. " pushfl # Get new EFLAGS \n"
  118. " popl %%eax # Store new EFLAGS in EAX \n"
  119. " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
  120. " jz 1f # Processor=80486 \n"
  121. " movl $1,%0 # We have CPUID support \n"
  122. "1: \n"
  123. : "=m" (has_CPUID)
  124. :
  125. : "%eax", "%ecx"
  126. );
  127. #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
  128. /* Technically, if this is being compiled under __x86_64__ then it has
  129. CPUid by definition. But it's nice to be able to prove it. :) */
  130. __asm__ (
  131. " pushfq # Get original EFLAGS \n"
  132. " popq %%rax \n"
  133. " movq %%rax,%%rcx \n"
  134. " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
  135. " pushq %%rax # Save new EFLAGS value on stack \n"
  136. " popfq # Replace current EFLAGS value \n"
  137. " pushfq # Get new EFLAGS \n"
  138. " popq %%rax # Store new EFLAGS in EAX \n"
  139. " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
  140. " jz 1f # Processor=80486 \n"
  141. " movl $1,%0 # We have CPUID support \n"
  142. "1: \n"
  143. : "=m" (has_CPUID)
  144. :
  145. : "%rax", "%rcx"
  146. );
  147. #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
  148. __asm {
  149. pushfd ; Get original EFLAGS
  150. pop eax
  151. mov ecx, eax
  152. xor eax, 200000h ; Flip ID bit in EFLAGS
  153. push eax ; Save new EFLAGS value on stack
  154. popfd ; Replace current EFLAGS value
  155. pushfd ; Get new EFLAGS
  156. pop eax ; Store new EFLAGS in EAX
  157. xor eax, ecx ; Can not toggle ID bit,
  158. jz done ; Processor=80486
  159. mov has_CPUID,1 ; We have CPUID support
  160. done:
  161. }
  162. #elif defined(_MSC_VER) && defined(_M_X64)
  163. has_CPUID = 1;
  164. #elif defined(__sun) && defined(__i386)
  165. __asm (
  166. " pushfl \n"
  167. " popl %eax \n"
  168. " movl %eax,%ecx \n"
  169. " xorl $0x200000,%eax \n"
  170. " pushl %eax \n"
  171. " popfl \n"
  172. " pushfl \n"
  173. " popl %eax \n"
  174. " xorl %ecx,%eax \n"
  175. " jz 1f \n"
  176. " movl $1,-8(%ebp) \n"
  177. "1: \n"
  178. );
  179. #elif defined(__sun) && defined(__amd64)
  180. __asm (
  181. " pushfq \n"
  182. " popq %rax \n"
  183. " movq %rax,%rcx \n"
  184. " xorl $0x200000,%eax \n"
  185. " pushq %rax \n"
  186. " popfq \n"
  187. " pushfq \n"
  188. " popq %rax \n"
  189. " xorl %ecx,%eax \n"
  190. " jz 1f \n"
  191. " movl $1,-8(%rbp) \n"
  192. "1: \n"
  193. );
  194. #endif
  195. #endif
  196. /* *INDENT-ON* */ /* clang-format on */
  197. return has_CPUID;
  198. }
  199. #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
  200. #define cpuid(func, a, b, c, d) \
  201. __asm__ __volatile__( \
  202. " pushl %%ebx \n" \
  203. " xorl %%ecx,%%ecx \n" \
  204. " cpuid \n" \
  205. " movl %%ebx, %%esi \n" \
  206. " popl %%ebx \n" \
  207. : "=a"(a), "=S"(b), "=c"(c), "=d"(d) \
  208. : "a"(func))
  209. #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
  210. #define cpuid(func, a, b, c, d) \
  211. __asm__ __volatile__( \
  212. " pushq %%rbx \n" \
  213. " xorq %%rcx,%%rcx \n" \
  214. " cpuid \n" \
  215. " movq %%rbx, %%rsi \n" \
  216. " popq %%rbx \n" \
  217. : "=a"(a), "=S"(b), "=c"(c), "=d"(d) \
  218. : "a"(func))
  219. #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
  220. #define cpuid(func, a, b, c, d) \
  221. __asm { \
  222. __asm mov eax, func \
  223. __asm xor ecx, ecx \
  224. __asm cpuid \
  225. __asm mov a, eax \
  226. __asm mov b, ebx \
  227. __asm mov c, ecx \
  228. __asm mov d, edx \
  229. }
  230. #elif defined(_MSC_VER) && defined(_M_X64)
  231. #define cpuid(func, a, b, c, d) \
  232. { \
  233. int CPUInfo[4]; \
  234. __cpuid(CPUInfo, func); \
  235. a = CPUInfo[0]; \
  236. b = CPUInfo[1]; \
  237. c = CPUInfo[2]; \
  238. d = CPUInfo[3]; \
  239. }
  240. #else
  241. #define cpuid(func, a, b, c, d) \
  242. do { \
  243. a = b = c = d = 0; \
  244. (void)a; \
  245. (void)b; \
  246. (void)c; \
  247. (void)d; \
  248. } while (0)
  249. #endif
  250. static int CPU_CPUIDFeatures[4];
  251. static int CPU_CPUIDMaxFunction = 0;
  252. static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
  253. static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
  254. static void CPU_calcCPUIDFeatures(void)
  255. {
  256. static SDL_bool checked = SDL_FALSE;
  257. if (!checked) {
  258. checked = SDL_TRUE;
  259. if (CPU_haveCPUID()) {
  260. int a, b, c, d;
  261. cpuid(0, a, b, c, d);
  262. CPU_CPUIDMaxFunction = a;
  263. if (CPU_CPUIDMaxFunction >= 1) {
  264. cpuid(1, a, b, c, d);
  265. CPU_CPUIDFeatures[0] = a;
  266. CPU_CPUIDFeatures[1] = b;
  267. CPU_CPUIDFeatures[2] = c;
  268. CPU_CPUIDFeatures[3] = d;
  269. /* Check to make sure we can call xgetbv */
  270. if (c & 0x08000000) {
  271. /* Call xgetbv to see if YMM (etc) register state is saved */
  272. #if (defined(__GNUC__) || defined(__llvm__)) && (defined(__i386__) || defined(__x86_64__))
  273. __asm__(".byte 0x0f, 0x01, 0xd0"
  274. : "=a"(a)
  275. : "c"(0)
  276. : "%edx");
  277. #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
  278. a = (int)_xgetbv(0);
  279. #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
  280. __asm
  281. {
  282. xor ecx, ecx
  283. _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
  284. mov a, eax
  285. }
  286. #endif
  287. CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
  288. CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
  289. }
  290. }
  291. }
  292. }
  293. }
  294. static int CPU_haveAltiVec(void)
  295. {
  296. volatile int altivec = 0;
  297. #ifndef SDL_CPUINFO_DISABLED
  298. #if (defined(__MACOS__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
  299. #ifdef __OpenBSD__
  300. int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
  301. #else
  302. int selectors[2] = { CTL_HW, HW_VECTORUNIT };
  303. #endif
  304. int hasVectorUnit = 0;
  305. size_t length = sizeof(hasVectorUnit);
  306. int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
  307. if (0 == error) {
  308. altivec = (hasVectorUnit != 0);
  309. }
  310. #elif defined(__FreeBSD__) && defined(__powerpc__)
  311. unsigned long cpufeatures = 0;
  312. elf_aux_info(AT_HWCAP, &cpufeatures, sizeof(cpufeatures));
  313. altivec = cpufeatures & PPC_FEATURE_HAS_ALTIVEC;
  314. return altivec;
  315. #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
  316. void (*handler)(int sig);
  317. handler = signal(SIGILL, illegal_instruction);
  318. if (setjmp(jmpbuf) == 0) {
  319. asm volatile("mtspr 256, %0\n\t"
  320. "vand %%v0, %%v0, %%v0" ::"r"(-1));
  321. altivec = 1;
  322. }
  323. signal(SIGILL, handler);
  324. #endif
  325. #endif
  326. return altivec;
  327. }
  328. #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 6)) || defined(__aarch64__)
  329. static int CPU_haveARMSIMD(void)
  330. {
  331. return 1;
  332. }
  333. #elif !defined(__arm__)
  334. static int CPU_haveARMSIMD(void)
  335. {
  336. return 0;
  337. }
  338. #elif defined(__LINUX__)
  339. static int CPU_haveARMSIMD(void)
  340. {
  341. int arm_simd = 0;
  342. int fd;
  343. fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
  344. if (fd >= 0) {
  345. Elf32_auxv_t aux;
  346. while (read(fd, &aux, sizeof aux) == sizeof aux) {
  347. if (aux.a_type == AT_PLATFORM) {
  348. const char *plat = (const char *)aux.a_un.a_val;
  349. if (plat) {
  350. arm_simd = SDL_strncmp(plat, "v6l", 3) == 0 ||
  351. SDL_strncmp(plat, "v7l", 3) == 0;
  352. }
  353. }
  354. }
  355. close(fd);
  356. }
  357. return arm_simd;
  358. }
  359. #elif defined(__RISCOS__)
  360. static int CPU_haveARMSIMD(void)
  361. {
  362. _kernel_swi_regs regs;
  363. regs.r[0] = 0;
  364. if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL) {
  365. return 0;
  366. }
  367. if (!(regs.r[0] & (1 << 31))) {
  368. return 0;
  369. }
  370. regs.r[0] = 34;
  371. regs.r[1] = 29;
  372. if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL) {
  373. return 0;
  374. }
  375. return regs.r[0];
  376. }
  377. #else
  378. static int CPU_haveARMSIMD(void)
  379. {
  380. #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
  381. return 0;
  382. }
  383. #endif
  384. #if defined(__LINUX__) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
  385. static int readProcAuxvForNeon(void)
  386. {
  387. int neon = 0;
  388. int fd;
  389. fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
  390. if (fd >= 0) {
  391. Elf32_auxv_t aux;
  392. while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) {
  393. if (aux.a_type == AT_HWCAP) {
  394. neon = (aux.a_un.a_val & HWCAP_NEON) == HWCAP_NEON;
  395. break;
  396. }
  397. }
  398. close(fd);
  399. }
  400. return neon;
  401. }
  402. #endif
  403. static int CPU_haveNEON(void)
  404. {
  405. /* The way you detect NEON is a privileged instruction on ARM, so you have
  406. query the OS kernel in a platform-specific way. :/ */
  407. #if defined(SDL_CPUINFO_DISABLED)
  408. return 0; /* disabled */
  409. #elif (defined(__WINDOWS__) || defined(__WINRT__) || defined(__GDK__)) && (defined(_M_ARM) || defined(_M_ARM64))
  410. /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
  411. /* Seems to have been removed */
  412. #if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
  413. #define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
  414. #endif
  415. /* All WinRT ARM devices are required to support NEON, but just in case. */
  416. return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
  417. #elif (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || defined(__aarch64__)
  418. return 1; /* ARMv8 always has non-optional NEON support. */
  419. #elif __VITA__
  420. return 1;
  421. #elif __3DS__
  422. return 0;
  423. #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
  424. /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
  425. return 1; /* all Apple ARMv7 chips and later have NEON. */
  426. #elif defined(__APPLE__)
  427. return 0; /* assume anything else from Apple doesn't have NEON. */
  428. #elif !defined(__arm__)
  429. return 0; /* not an ARM CPU at all. */
  430. #elif defined(__OpenBSD__)
  431. return 1; /* OpenBSD only supports ARMv7 CPUs that have NEON. */
  432. #elif defined(HAVE_ELF_AUX_INFO)
  433. unsigned long hasneon = 0;
  434. if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0) {
  435. return 0;
  436. }
  437. return (hasneon & HWCAP_NEON) == HWCAP_NEON;
  438. #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
  439. return (getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON;
  440. #elif defined(__LINUX__)
  441. return readProcAuxvForNeon();
  442. #elif defined(__ANDROID__)
  443. /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
  444. {
  445. AndroidCpuFamily cpu_family = android_getCpuFamily();
  446. if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
  447. uint64_t cpu_features = android_getCpuFeatures();
  448. if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) {
  449. return 1;
  450. }
  451. }
  452. return 0;
  453. }
  454. #elif defined(__RISCOS__)
  455. /* Use the VFPSupport_Features SWI to access the MVFR registers */
  456. {
  457. _kernel_swi_regs regs;
  458. regs.r[0] = 0;
  459. if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
  460. if ((regs.r[2] & 0xFFF000) == 0x111000) {
  461. return 1;
  462. }
  463. }
  464. return 0;
  465. }
  466. #else
  467. #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
  468. return 0;
  469. #endif
  470. }
  471. static int CPU_readCPUCFG(void)
  472. {
  473. uint32_t cfg2 = 0;
  474. #if defined __loongarch__
  475. __asm__ volatile(
  476. "cpucfg %0, %1 \n\t"
  477. : "+&r"(cfg2)
  478. : "r"(CPU_CFG2));
  479. #endif
  480. return cfg2;
  481. }
  482. #define CPU_haveLSX() (CPU_readCPUCFG() & CPU_CFG2_LSX)
  483. #define CPU_haveLASX() (CPU_readCPUCFG() & CPU_CFG2_LASX)
  484. #if defined(__e2k__)
  485. #define CPU_haveRDTSC() (0)
  486. #if defined(__MMX__)
  487. #define CPU_haveMMX() (1)
  488. #else
  489. #define CPU_haveMMX() (0)
  490. #endif
  491. #if defined(__SSE__)
  492. #define CPU_haveSSE() (1)
  493. #else
  494. #define CPU_haveSSE() (0)
  495. #endif
  496. #if defined(__SSE2__)
  497. #define CPU_haveSSE2() (1)
  498. #else
  499. #define CPU_haveSSE2() (0)
  500. #endif
  501. #if defined(__SSE3__)
  502. #define CPU_haveSSE3() (1)
  503. #else
  504. #define CPU_haveSSE3() (0)
  505. #endif
  506. #if defined(__SSE4_1__)
  507. #define CPU_haveSSE41() (1)
  508. #else
  509. #define CPU_haveSSE41() (0)
  510. #endif
  511. #if defined(__SSE4_2__)
  512. #define CPU_haveSSE42() (1)
  513. #else
  514. #define CPU_haveSSE42() (0)
  515. #endif
  516. #if defined(__AVX__)
  517. #define CPU_haveAVX() (1)
  518. #else
  519. #define CPU_haveAVX() (0)
  520. #endif
  521. #else
  522. #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
  523. #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
  524. #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
  525. #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
  526. #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
  527. #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
  528. #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
  529. #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
  530. #endif
  531. #if defined(__e2k__)
  532. inline int
  533. CPU_haveAVX2(void)
  534. {
  535. #if defined(__AVX2__)
  536. return 1;
  537. #else
  538. return 0;
  539. #endif
  540. }
  541. #else
  542. static int CPU_haveAVX2(void)
  543. {
  544. if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
  545. int a, b, c, d;
  546. (void)a;
  547. (void)b;
  548. (void)c;
  549. (void)d; /* compiler warnings... */
  550. cpuid(7, a, b, c, d);
  551. return b & 0x00000020;
  552. }
  553. return 0;
  554. }
  555. #endif
  556. #if defined(__e2k__)
  557. inline int
  558. CPU_haveAVX512F(void)
  559. {
  560. return 0;
  561. }
  562. #else
  563. static int CPU_haveAVX512F(void)
  564. {
  565. if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
  566. int a, b, c, d;
  567. (void)a;
  568. (void)b;
  569. (void)c;
  570. (void)d; /* compiler warnings... */
  571. cpuid(7, a, b, c, d);
  572. return b & 0x00010000;
  573. }
  574. return 0;
  575. }
  576. #endif
  577. static int SDL_CPUCount = 0;
  578. int SDL_GetCPUCount(void)
  579. {
  580. if (!SDL_CPUCount) {
  581. #ifndef SDL_CPUINFO_DISABLED
  582. #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
  583. if (SDL_CPUCount <= 0) {
  584. SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
  585. }
  586. #endif
  587. #ifdef HAVE_SYSCTLBYNAME
  588. if (SDL_CPUCount <= 0) {
  589. size_t size = sizeof(SDL_CPUCount);
  590. sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
  591. }
  592. #endif
  593. #if defined(__WIN32__) || defined(__GDK__)
  594. if (SDL_CPUCount <= 0) {
  595. SYSTEM_INFO info;
  596. GetSystemInfo(&info);
  597. SDL_CPUCount = info.dwNumberOfProcessors;
  598. }
  599. #endif
  600. #endif
  601. /* There has to be at least 1, right? :) */
  602. if (SDL_CPUCount <= 0) {
  603. SDL_CPUCount = 1;
  604. }
  605. }
  606. return SDL_CPUCount;
  607. }
  608. #if defined(__e2k__)
  609. inline const char *
  610. SDL_GetCPUType(void)
  611. {
  612. static char SDL_CPUType[13];
  613. SDL_strlcpy(SDL_CPUType, "E2K MACHINE", sizeof(SDL_CPUType));
  614. return SDL_CPUType;
  615. }
  616. #else
  617. /* Oh, such a sweet sweet trick, just not very useful. :) */
  618. static const char *SDL_GetCPUType(void)
  619. {
  620. static char SDL_CPUType[13];
  621. if (!SDL_CPUType[0]) {
  622. int i = 0;
  623. CPU_calcCPUIDFeatures();
  624. if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
  625. int a, b, c, d;
  626. cpuid(0x00000000, a, b, c, d);
  627. (void)a;
  628. SDL_CPUType[i++] = (char)(b & 0xff);
  629. b >>= 8;
  630. SDL_CPUType[i++] = (char)(b & 0xff);
  631. b >>= 8;
  632. SDL_CPUType[i++] = (char)(b & 0xff);
  633. b >>= 8;
  634. SDL_CPUType[i++] = (char)(b & 0xff);
  635. SDL_CPUType[i++] = (char)(d & 0xff);
  636. d >>= 8;
  637. SDL_CPUType[i++] = (char)(d & 0xff);
  638. d >>= 8;
  639. SDL_CPUType[i++] = (char)(d & 0xff);
  640. d >>= 8;
  641. SDL_CPUType[i++] = (char)(d & 0xff);
  642. SDL_CPUType[i++] = (char)(c & 0xff);
  643. c >>= 8;
  644. SDL_CPUType[i++] = (char)(c & 0xff);
  645. c >>= 8;
  646. SDL_CPUType[i++] = (char)(c & 0xff);
  647. c >>= 8;
  648. SDL_CPUType[i++] = (char)(c & 0xff);
  649. }
  650. if (!SDL_CPUType[0]) {
  651. SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
  652. }
  653. }
  654. return SDL_CPUType;
  655. }
  656. #endif
  657. #if 0
  658. !!! FIXME: Not used at the moment. */
  659. #if defined(__e2k__)
  660. inline const char *
  661. SDL_GetCPUName(void)
  662. {
  663. static char SDL_CPUName[48];
  664. SDL_strlcpy(SDL_CPUName, __builtin_cpu_name(), sizeof(SDL_CPUName));
  665. return SDL_CPUName;
  666. }
  667. #else
  668. static const char *SDL_GetCPUName(void)
  669. {
  670. static char SDL_CPUName[48];
  671. if (!SDL_CPUName[0]) {
  672. int i = 0;
  673. int a, b, c, d;
  674. CPU_calcCPUIDFeatures();
  675. if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
  676. cpuid(0x80000000, a, b, c, d);
  677. if (a >= 0x80000004) {
  678. cpuid(0x80000002, a, b, c, d);
  679. SDL_CPUName[i++] = (char)(a & 0xff);
  680. a >>= 8;
  681. SDL_CPUName[i++] = (char)(a & 0xff);
  682. a >>= 8;
  683. SDL_CPUName[i++] = (char)(a & 0xff);
  684. a >>= 8;
  685. SDL_CPUName[i++] = (char)(a & 0xff);
  686. a >>= 8;
  687. SDL_CPUName[i++] = (char)(b & 0xff);
  688. b >>= 8;
  689. SDL_CPUName[i++] = (char)(b & 0xff);
  690. b >>= 8;
  691. SDL_CPUName[i++] = (char)(b & 0xff);
  692. b >>= 8;
  693. SDL_CPUName[i++] = (char)(b & 0xff);
  694. b >>= 8;
  695. SDL_CPUName[i++] = (char)(c & 0xff);
  696. c >>= 8;
  697. SDL_CPUName[i++] = (char)(c & 0xff);
  698. c >>= 8;
  699. SDL_CPUName[i++] = (char)(c & 0xff);
  700. c >>= 8;
  701. SDL_CPUName[i++] = (char)(c & 0xff);
  702. c >>= 8;
  703. SDL_CPUName[i++] = (char)(d & 0xff);
  704. d >>= 8;
  705. SDL_CPUName[i++] = (char)(d & 0xff);
  706. d >>= 8;
  707. SDL_CPUName[i++] = (char)(d & 0xff);
  708. d >>= 8;
  709. SDL_CPUName[i++] = (char)(d & 0xff);
  710. d >>= 8;
  711. cpuid(0x80000003, a, b, c, d);
  712. SDL_CPUName[i++] = (char)(a & 0xff);
  713. a >>= 8;
  714. SDL_CPUName[i++] = (char)(a & 0xff);
  715. a >>= 8;
  716. SDL_CPUName[i++] = (char)(a & 0xff);
  717. a >>= 8;
  718. SDL_CPUName[i++] = (char)(a & 0xff);
  719. a >>= 8;
  720. SDL_CPUName[i++] = (char)(b & 0xff);
  721. b >>= 8;
  722. SDL_CPUName[i++] = (char)(b & 0xff);
  723. b >>= 8;
  724. SDL_CPUName[i++] = (char)(b & 0xff);
  725. b >>= 8;
  726. SDL_CPUName[i++] = (char)(b & 0xff);
  727. b >>= 8;
  728. SDL_CPUName[i++] = (char)(c & 0xff);
  729. c >>= 8;
  730. SDL_CPUName[i++] = (char)(c & 0xff);
  731. c >>= 8;
  732. SDL_CPUName[i++] = (char)(c & 0xff);
  733. c >>= 8;
  734. SDL_CPUName[i++] = (char)(c & 0xff);
  735. c >>= 8;
  736. SDL_CPUName[i++] = (char)(d & 0xff);
  737. d >>= 8;
  738. SDL_CPUName[i++] = (char)(d & 0xff);
  739. d >>= 8;
  740. SDL_CPUName[i++] = (char)(d & 0xff);
  741. d >>= 8;
  742. SDL_CPUName[i++] = (char)(d & 0xff);
  743. d >>= 8;
  744. cpuid(0x80000004, a, b, c, d);
  745. SDL_CPUName[i++] = (char)(a & 0xff);
  746. a >>= 8;
  747. SDL_CPUName[i++] = (char)(a & 0xff);
  748. a >>= 8;
  749. SDL_CPUName[i++] = (char)(a & 0xff);
  750. a >>= 8;
  751. SDL_CPUName[i++] = (char)(a & 0xff);
  752. a >>= 8;
  753. SDL_CPUName[i++] = (char)(b & 0xff);
  754. b >>= 8;
  755. SDL_CPUName[i++] = (char)(b & 0xff);
  756. b >>= 8;
  757. SDL_CPUName[i++] = (char)(b & 0xff);
  758. b >>= 8;
  759. SDL_CPUName[i++] = (char)(b & 0xff);
  760. b >>= 8;
  761. SDL_CPUName[i++] = (char)(c & 0xff);
  762. c >>= 8;
  763. SDL_CPUName[i++] = (char)(c & 0xff);
  764. c >>= 8;
  765. SDL_CPUName[i++] = (char)(c & 0xff);
  766. c >>= 8;
  767. SDL_CPUName[i++] = (char)(c & 0xff);
  768. c >>= 8;
  769. SDL_CPUName[i++] = (char)(d & 0xff);
  770. d >>= 8;
  771. SDL_CPUName[i++] = (char)(d & 0xff);
  772. d >>= 8;
  773. SDL_CPUName[i++] = (char)(d & 0xff);
  774. d >>= 8;
  775. SDL_CPUName[i++] = (char)(d & 0xff);
  776. d >>= 8;
  777. }
  778. }
  779. if (!SDL_CPUName[0]) {
  780. SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
  781. }
  782. }
  783. return SDL_CPUName;
  784. }
  785. #endif
  786. #endif
  787. int SDL_GetCPUCacheLineSize(void)
  788. {
  789. const char *cpuType = SDL_GetCPUType();
  790. int a, b, c, d;
  791. (void)a;
  792. (void)b;
  793. (void)c;
  794. (void)d;
  795. if (SDL_strcmp(cpuType, "GenuineIntel") == 0 || SDL_strcmp(cpuType, "CentaurHauls") == 0 || SDL_strcmp(cpuType, " Shanghai ") == 0) {
  796. cpuid(0x00000001, a, b, c, d);
  797. return ((b >> 8) & 0xff) * 8;
  798. } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
  799. cpuid(0x80000005, a, b, c, d);
  800. return c & 0xff;
  801. } else {
  802. /* Just make a guess here... */
  803. return SDL_CACHELINE_SIZE;
  804. }
  805. }
  806. static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
  807. static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
  808. static Uint32 SDL_GetCPUFeatures(void)
  809. {
  810. if (SDL_CPUFeatures == 0xFFFFFFFF) {
  811. CPU_calcCPUIDFeatures();
  812. SDL_CPUFeatures = 0;
  813. SDL_SIMDAlignment = sizeof(void *); /* a good safe base value */
  814. if (CPU_haveRDTSC()) {
  815. SDL_CPUFeatures |= CPU_HAS_RDTSC;
  816. }
  817. if (CPU_haveAltiVec()) {
  818. SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
  819. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  820. }
  821. if (CPU_haveMMX()) {
  822. SDL_CPUFeatures |= CPU_HAS_MMX;
  823. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
  824. }
  825. if (CPU_haveSSE()) {
  826. SDL_CPUFeatures |= CPU_HAS_SSE;
  827. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  828. }
  829. if (CPU_haveSSE2()) {
  830. SDL_CPUFeatures |= CPU_HAS_SSE2;
  831. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  832. }
  833. if (CPU_haveSSE3()) {
  834. SDL_CPUFeatures |= CPU_HAS_SSE3;
  835. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  836. }
  837. if (CPU_haveSSE41()) {
  838. SDL_CPUFeatures |= CPU_HAS_SSE41;
  839. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  840. }
  841. if (CPU_haveSSE42()) {
  842. SDL_CPUFeatures |= CPU_HAS_SSE42;
  843. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  844. }
  845. if (CPU_haveAVX()) {
  846. SDL_CPUFeatures |= CPU_HAS_AVX;
  847. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
  848. }
  849. if (CPU_haveAVX2()) {
  850. SDL_CPUFeatures |= CPU_HAS_AVX2;
  851. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
  852. }
  853. if (CPU_haveAVX512F()) {
  854. SDL_CPUFeatures |= CPU_HAS_AVX512F;
  855. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
  856. }
  857. if (CPU_haveARMSIMD()) {
  858. SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
  859. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  860. }
  861. if (CPU_haveNEON()) {
  862. SDL_CPUFeatures |= CPU_HAS_NEON;
  863. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  864. }
  865. if (CPU_haveLSX()) {
  866. SDL_CPUFeatures |= CPU_HAS_LSX;
  867. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  868. }
  869. if (CPU_haveLASX()) {
  870. SDL_CPUFeatures |= CPU_HAS_LASX;
  871. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
  872. }
  873. }
  874. return SDL_CPUFeatures;
  875. }
  876. #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & (f)) ? SDL_TRUE : SDL_FALSE)
  877. SDL_bool SDL_HasRDTSC(void)
  878. {
  879. return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
  880. }
  881. SDL_bool
  882. SDL_HasAltiVec(void)
  883. {
  884. return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
  885. }
  886. SDL_bool
  887. SDL_HasMMX(void)
  888. {
  889. return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
  890. }
  891. SDL_bool
  892. SDL_HasSSE(void)
  893. {
  894. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
  895. }
  896. SDL_bool
  897. SDL_HasSSE2(void)
  898. {
  899. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
  900. }
  901. SDL_bool
  902. SDL_HasSSE3(void)
  903. {
  904. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
  905. }
  906. SDL_bool
  907. SDL_HasSSE41(void)
  908. {
  909. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
  910. }
  911. SDL_bool
  912. SDL_HasSSE42(void)
  913. {
  914. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
  915. }
  916. SDL_bool
  917. SDL_HasAVX(void)
  918. {
  919. return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
  920. }
  921. SDL_bool
  922. SDL_HasAVX2(void)
  923. {
  924. return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
  925. }
  926. SDL_bool
  927. SDL_HasAVX512F(void)
  928. {
  929. return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
  930. }
  931. SDL_bool
  932. SDL_HasARMSIMD(void)
  933. {
  934. return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
  935. }
  936. SDL_bool
  937. SDL_HasNEON(void)
  938. {
  939. return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
  940. }
  941. SDL_bool
  942. SDL_HasLSX(void)
  943. {
  944. return CPU_FEATURE_AVAILABLE(CPU_HAS_LSX);
  945. }
  946. SDL_bool
  947. SDL_HasLASX(void)
  948. {
  949. return CPU_FEATURE_AVAILABLE(CPU_HAS_LASX);
  950. }
  951. static int SDL_SystemRAM = 0;
  952. int SDL_GetSystemRAM(void)
  953. {
  954. if (!SDL_SystemRAM) {
  955. #ifndef SDL_CPUINFO_DISABLED
  956. #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
  957. if (SDL_SystemRAM <= 0) {
  958. SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024 * 1024));
  959. }
  960. #endif
  961. #ifdef HAVE_SYSCTLBYNAME
  962. if (SDL_SystemRAM <= 0) {
  963. #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__DragonFly__)
  964. #ifdef HW_REALMEM
  965. int mib[2] = { CTL_HW, HW_REALMEM };
  966. #else
  967. /* might only report up to 2 GiB */
  968. int mib[2] = { CTL_HW, HW_PHYSMEM };
  969. #endif /* HW_REALMEM */
  970. #else
  971. int mib[2] = { CTL_HW, HW_MEMSIZE };
  972. #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
  973. Uint64 memsize = 0;
  974. size_t len = sizeof(memsize);
  975. if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
  976. SDL_SystemRAM = (int)(memsize / (1024 * 1024));
  977. }
  978. }
  979. #endif
  980. #if defined(__WIN32__) || defined(__GDK__)
  981. if (SDL_SystemRAM <= 0) {
  982. MEMORYSTATUSEX stat;
  983. stat.dwLength = sizeof(stat);
  984. if (GlobalMemoryStatusEx(&stat)) {
  985. SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
  986. }
  987. }
  988. #endif
  989. #ifdef __RISCOS__
  990. if (SDL_SystemRAM <= 0) {
  991. _kernel_swi_regs regs;
  992. regs.r[0] = 0x108;
  993. if (_kernel_swi(OS_Memory, &regs, &regs) == NULL) {
  994. SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
  995. }
  996. }
  997. #endif
  998. #ifdef __VITA__
  999. if (SDL_SystemRAM <= 0) {
  1000. /* Vita has 512MiB on SoC, that's split into 256MiB(+109MiB in extended memory mode) for app
  1001. +26MiB of physically continuous memory, +112MiB of CDRAM(VRAM) + system reserved memory. */
  1002. SDL_SystemRAM = 536870912;
  1003. }
  1004. #endif
  1005. #ifdef __PS2__
  1006. if (SDL_SystemRAM <= 0) {
  1007. /* PlayStation 2 has 32MiB however there are some special models with 64 and 128 */
  1008. SDL_SystemRAM = GetMemorySize();
  1009. }
  1010. #endif
  1011. #ifdef __HAIKU__
  1012. if (SDL_SystemRAM <= 0) {
  1013. system_info info;
  1014. if (get_system_info(&info) == B_OK) {
  1015. /* To have an accurate amount, we also take in account the inaccessible pages (aka ignored)
  1016. which is a bit handier compared to the legacy system's api (i.e. used_pages).*/
  1017. SDL_SystemRAM = (int)round((info.max_pages + info.ignored_pages > 0 ? info.ignored_pages : 0) * B_PAGE_SIZE / 1048576.0);
  1018. }
  1019. }
  1020. #endif
  1021. #endif
  1022. }
  1023. return SDL_SystemRAM;
  1024. }
  1025. size_t
  1026. SDL_SIMDGetAlignment(void)
  1027. {
  1028. if (SDL_SIMDAlignment == 0xFFFFFFFF) {
  1029. SDL_GetCPUFeatures(); /* make sure this has been calculated */
  1030. }
  1031. SDL_assert(SDL_SIMDAlignment != 0);
  1032. return SDL_SIMDAlignment;
  1033. }