SDL_cpuinfo.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996
  1. /*
  2. Simple DirectMedia Layer
  3. Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
  4. This software is provided 'as-is', without any express or implied
  5. warranty. In no event will the authors be held liable for any damages
  6. arising from the use of this software.
  7. Permission is granted to anyone to use this software for any purpose,
  8. including commercial applications, and to alter it and redistribute it
  9. freely, subject to the following restrictions:
  10. 1. The origin of this software must not be misrepresented; you must not
  11. claim that you wrote the original software. If you use this software
  12. in a product, an acknowledgment in the product documentation would be
  13. appreciated but is not required.
  14. 2. Altered source versions must be plainly marked as such, and must not be
  15. misrepresented as being the original software.
  16. 3. This notice may not be removed or altered from any source distribution.
  17. */
  18. #ifdef TEST_MAIN
  19. #include "SDL_config.h"
  20. #else
  21. #include "../SDL_internal.h"
  22. #endif
  23. #if defined(__WIN32__) || defined(__WINRT__)
  24. #include "../core/windows/SDL_windows.h"
  25. #endif
  26. #if defined(__OS2__)
  27. #define INCL_DOS
  28. #include <os2.h>
  29. #ifndef QSV_NUMPROCESSORS
  30. #define QSV_NUMPROCESSORS 26
  31. #endif
  32. #endif
  33. /* CPU feature detection for SDL */
  34. #include "SDL_cpuinfo.h"
  35. #include "SDL_assert.h"
  36. #ifdef HAVE_SYSCONF
  37. #include <unistd.h>
  38. #endif
  39. #ifdef HAVE_SYSCTLBYNAME
  40. #include <sys/types.h>
  41. #include <sys/sysctl.h>
  42. #endif
  43. #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
  44. #include <sys/sysctl.h> /* For AltiVec check */
  45. #elif defined(__OpenBSD__) && defined(__powerpc__)
  46. #include <sys/param.h>
  47. #include <sys/sysctl.h> /* For AltiVec check */
  48. #include <machine/cpu.h>
  49. #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
  50. #include <signal.h>
  51. #include <setjmp.h>
  52. #endif
  53. #if defined(__QNXNTO__)
  54. #include <sys/syspage.h>
  55. #endif
  56. #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
  57. /*#include <asm/hwcap.h>*/
  58. #ifndef AT_HWCAP
  59. #define AT_HWCAP 16
  60. #endif
  61. #ifndef AT_PLATFORM
  62. #define AT_PLATFORM 15
  63. #endif
  64. /* Prevent compilation error when including elf.h would also try to define AT_* as an enum */
  65. #ifndef AT_NULL
  66. #define AT_NULL 0
  67. #endif
  68. #ifndef HWCAP_NEON
  69. #define HWCAP_NEON (1 << 12)
  70. #endif
  71. #if defined HAVE_GETAUXVAL
  72. #include <sys/auxv.h>
  73. #else
  74. #include <fcntl.h>
  75. #endif
  76. #endif
  77. #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
  78. #if __ARM_ARCH < 8
  79. #include <cpu-features.h>
  80. #endif
  81. #endif
  82. #ifdef __RISCOS__
  83. #include <kernel.h>
  84. #include <swis.h>
  85. #endif
  86. #define CPU_HAS_RDTSC (1 << 0)
  87. #define CPU_HAS_ALTIVEC (1 << 1)
  88. #define CPU_HAS_MMX (1 << 2)
  89. #define CPU_HAS_3DNOW (1 << 3)
  90. #define CPU_HAS_SSE (1 << 4)
  91. #define CPU_HAS_SSE2 (1 << 5)
  92. #define CPU_HAS_SSE3 (1 << 6)
  93. #define CPU_HAS_SSE41 (1 << 7)
  94. #define CPU_HAS_SSE42 (1 << 8)
  95. #define CPU_HAS_AVX (1 << 9)
  96. #define CPU_HAS_AVX2 (1 << 10)
  97. #define CPU_HAS_NEON (1 << 11)
  98. #define CPU_HAS_AVX512F (1 << 12)
  99. #define CPU_HAS_ARM_SIMD (1 << 13)
  100. #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
  101. /* This is the brute force way of detecting instruction sets...
  102. the idea is borrowed from the libmpeg2 library - thanks!
  103. */
  104. static jmp_buf jmpbuf;
  105. static void
  106. illegal_instruction(int sig)
  107. {
  108. longjmp(jmpbuf, 1);
  109. }
  110. #endif /* HAVE_SETJMP */
  111. static int
  112. CPU_haveCPUID(void)
  113. {
  114. int has_CPUID = 0;
  115. /* *INDENT-OFF* */
  116. #ifndef SDL_CPUINFO_DISABLED
  117. #if defined(__GNUC__) && defined(i386)
  118. __asm__ (
  119. " pushfl # Get original EFLAGS \n"
  120. " popl %%eax \n"
  121. " movl %%eax,%%ecx \n"
  122. " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
  123. " pushl %%eax # Save new EFLAGS value on stack \n"
  124. " popfl # Replace current EFLAGS value \n"
  125. " pushfl # Get new EFLAGS \n"
  126. " popl %%eax # Store new EFLAGS in EAX \n"
  127. " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
  128. " jz 1f # Processor=80486 \n"
  129. " movl $1,%0 # We have CPUID support \n"
  130. "1: \n"
  131. : "=m" (has_CPUID)
  132. :
  133. : "%eax", "%ecx"
  134. );
  135. #elif defined(__GNUC__) && defined(__x86_64__)
  136. /* Technically, if this is being compiled under __x86_64__ then it has
  137. CPUid by definition. But it's nice to be able to prove it. :) */
  138. __asm__ (
  139. " pushfq # Get original EFLAGS \n"
  140. " popq %%rax \n"
  141. " movq %%rax,%%rcx \n"
  142. " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
  143. " pushq %%rax # Save new EFLAGS value on stack \n"
  144. " popfq # Replace current EFLAGS value \n"
  145. " pushfq # Get new EFLAGS \n"
  146. " popq %%rax # Store new EFLAGS in EAX \n"
  147. " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
  148. " jz 1f # Processor=80486 \n"
  149. " movl $1,%0 # We have CPUID support \n"
  150. "1: \n"
  151. : "=m" (has_CPUID)
  152. :
  153. : "%rax", "%rcx"
  154. );
  155. #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
  156. __asm {
  157. pushfd ; Get original EFLAGS
  158. pop eax
  159. mov ecx, eax
  160. xor eax, 200000h ; Flip ID bit in EFLAGS
  161. push eax ; Save new EFLAGS value on stack
  162. popfd ; Replace current EFLAGS value
  163. pushfd ; Get new EFLAGS
  164. pop eax ; Store new EFLAGS in EAX
  165. xor eax, ecx ; Can not toggle ID bit,
  166. jz done ; Processor=80486
  167. mov has_CPUID,1 ; We have CPUID support
  168. done:
  169. }
  170. #elif defined(_MSC_VER) && defined(_M_X64)
  171. has_CPUID = 1;
  172. #elif defined(__sun) && defined(__i386)
  173. __asm (
  174. " pushfl \n"
  175. " popl %eax \n"
  176. " movl %eax,%ecx \n"
  177. " xorl $0x200000,%eax \n"
  178. " pushl %eax \n"
  179. " popfl \n"
  180. " pushfl \n"
  181. " popl %eax \n"
  182. " xorl %ecx,%eax \n"
  183. " jz 1f \n"
  184. " movl $1,-8(%ebp) \n"
  185. "1: \n"
  186. );
  187. #elif defined(__sun) && defined(__amd64)
  188. __asm (
  189. " pushfq \n"
  190. " popq %rax \n"
  191. " movq %rax,%rcx \n"
  192. " xorl $0x200000,%eax \n"
  193. " pushq %rax \n"
  194. " popfq \n"
  195. " pushfq \n"
  196. " popq %rax \n"
  197. " xorl %ecx,%eax \n"
  198. " jz 1f \n"
  199. " movl $1,-8(%rbp) \n"
  200. "1: \n"
  201. );
  202. #endif
  203. #endif
  204. /* *INDENT-ON* */
  205. return has_CPUID;
  206. }
  207. #if defined(__GNUC__) && defined(i386)
  208. #define cpuid(func, a, b, c, d) \
  209. __asm__ __volatile__ ( \
  210. " pushl %%ebx \n" \
  211. " xorl %%ecx,%%ecx \n" \
  212. " cpuid \n" \
  213. " movl %%ebx, %%esi \n" \
  214. " popl %%ebx \n" : \
  215. "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
  216. #elif defined(__GNUC__) && defined(__x86_64__)
  217. #define cpuid(func, a, b, c, d) \
  218. __asm__ __volatile__ ( \
  219. " pushq %%rbx \n" \
  220. " xorq %%rcx,%%rcx \n" \
  221. " cpuid \n" \
  222. " movq %%rbx, %%rsi \n" \
  223. " popq %%rbx \n" : \
  224. "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
  225. #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
  226. #define cpuid(func, a, b, c, d) \
  227. __asm { \
  228. __asm mov eax, func \
  229. __asm xor ecx, ecx \
  230. __asm cpuid \
  231. __asm mov a, eax \
  232. __asm mov b, ebx \
  233. __asm mov c, ecx \
  234. __asm mov d, edx \
  235. }
  236. #elif defined(_MSC_VER) && defined(_M_X64)
  237. #define cpuid(func, a, b, c, d) \
  238. { \
  239. int CPUInfo[4]; \
  240. __cpuid(CPUInfo, func); \
  241. a = CPUInfo[0]; \
  242. b = CPUInfo[1]; \
  243. c = CPUInfo[2]; \
  244. d = CPUInfo[3]; \
  245. }
  246. #else
  247. #define cpuid(func, a, b, c, d) \
  248. do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
  249. #endif
  250. static int CPU_CPUIDFeatures[4];
  251. static int CPU_CPUIDMaxFunction = 0;
  252. static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
  253. static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
  254. static void
  255. CPU_calcCPUIDFeatures(void)
  256. {
  257. static SDL_bool checked = SDL_FALSE;
  258. if (!checked) {
  259. checked = SDL_TRUE;
  260. if (CPU_haveCPUID()) {
  261. int a, b, c, d;
  262. cpuid(0, a, b, c, d);
  263. CPU_CPUIDMaxFunction = a;
  264. if (CPU_CPUIDMaxFunction >= 1) {
  265. cpuid(1, a, b, c, d);
  266. CPU_CPUIDFeatures[0] = a;
  267. CPU_CPUIDFeatures[1] = b;
  268. CPU_CPUIDFeatures[2] = c;
  269. CPU_CPUIDFeatures[3] = d;
  270. /* Check to make sure we can call xgetbv */
  271. if (c & 0x08000000) {
  272. /* Call xgetbv to see if YMM (etc) register state is saved */
  273. #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
  274. __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
  275. #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
  276. a = (int)_xgetbv(0);
  277. #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
  278. __asm
  279. {
  280. xor ecx, ecx
  281. _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
  282. mov a, eax
  283. }
  284. #endif
  285. CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
  286. CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
  287. }
  288. }
  289. }
  290. }
  291. }
  292. static int
  293. CPU_haveAltiVec(void)
  294. {
  295. volatile int altivec = 0;
  296. #ifndef SDL_CPUINFO_DISABLED
  297. #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
  298. #ifdef __OpenBSD__
  299. int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
  300. #else
  301. int selectors[2] = { CTL_HW, HW_VECTORUNIT };
  302. #endif
  303. int hasVectorUnit = 0;
  304. size_t length = sizeof(hasVectorUnit);
  305. int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
  306. if (0 == error)
  307. altivec = (hasVectorUnit != 0);
  308. #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
  309. void (*handler) (int sig);
  310. handler = signal(SIGILL, illegal_instruction);
  311. if (setjmp(jmpbuf) == 0) {
  312. asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
  313. altivec = 1;
  314. }
  315. signal(SIGILL, handler);
  316. #endif
  317. #endif
  318. return altivec;
  319. }
  320. #if !defined(__ARM_ARCH)
  321. static int
  322. CPU_haveARMSIMD(void)
  323. {
  324. return 0;
  325. }
  326. #elif defined(__LINUX__)
  327. #include <unistd.h>
  328. #include <sys/types.h>
  329. #include <sys/stat.h>
  330. #include <fcntl.h>
  331. #include <elf.h>
  332. static int
  333. CPU_haveARMSIMD(void)
  334. {
  335. int arm_simd = 0;
  336. int fd;
  337. fd = open("/proc/self/auxv", O_RDONLY);
  338. if (fd >= 0)
  339. {
  340. Elf32_auxv_t aux;
  341. while (read(fd, &aux, sizeof aux) == sizeof aux)
  342. {
  343. if (aux.a_type == AT_PLATFORM)
  344. {
  345. const char *plat = (const char *) aux.a_un.a_val;
  346. if (plat) {
  347. arm_simd = strncmp(plat, "v6l", 3) == 0 ||
  348. strncmp(plat, "v7l", 3) == 0;
  349. }
  350. }
  351. }
  352. close(fd);
  353. }
  354. return arm_simd;
  355. }
  356. #elif defined(__RISCOS__)
  357. static int
  358. CPU_haveARMSIMD(void)
  359. {
  360. _kernel_swi_regs regs;
  361. regs.r[0] = 0;
  362. if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
  363. return 0;
  364. if (!(regs.r[0] & (1<<31)))
  365. return 0;
  366. regs.r[0] = 34;
  367. regs.r[1] = 29;
  368. if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
  369. return 0;
  370. return regs.r[0];
  371. }
  372. #else
  373. static int
  374. CPU_haveARMSIMD(void)
  375. {
  376. #if !defined(__ANDROID__) && !defined(__IPHONEOS__) && !defined(__TVOS__)
  377. #warning SDL_HasARMSIMD is not implemented for this ARM platform, defaulting to TRUE
  378. #endif
  379. return 1;
  380. }
  381. #endif
  382. #if defined(__LINUX__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
  383. static int
  384. readProcAuxvForNeon(void)
  385. {
  386. int neon = 0;
  387. int kv[2];
  388. const int fd = open("/proc/self/auxv", O_RDONLY);
  389. if (fd != -1) {
  390. while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
  391. if (kv[0] == AT_HWCAP) {
  392. neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
  393. break;
  394. }
  395. }
  396. close(fd);
  397. }
  398. return neon;
  399. }
  400. #endif
  401. static int
  402. CPU_haveNEON(void)
  403. {
  404. /* The way you detect NEON is a privileged instruction on ARM, so you have
  405. query the OS kernel in a platform-specific way. :/ */
  406. #if defined(SDL_CPUINFO_DISABLED)
  407. return 0; /* disabled */
  408. #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
  409. /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
  410. /* Seems to have been removed */
  411. # if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
  412. # define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
  413. # endif
  414. /* All WinRT ARM devices are required to support NEON, but just in case. */
  415. return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
  416. #elif !defined(__ARM_ARCH)
  417. return 0; /* not an ARM CPU at all. */
  418. #elif __ARM_ARCH >= 8
  419. return 1; /* ARMv8 always has non-optional NEON support. */
  420. #elif defined(__APPLE__) && (__ARM_ARCH >= 7)
  421. /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
  422. return 1; /* all Apple ARMv7 chips and later have NEON. */
  423. #elif defined(__APPLE__)
  424. return 0; /* assume anything else from Apple doesn't have NEON. */
  425. #elif defined(__QNXNTO__)
  426. return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
  427. #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
  428. return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
  429. #elif defined(__LINUX__)
  430. return readProcAuxvForNeon();
  431. #elif defined(__ANDROID__)
  432. /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
  433. {
  434. AndroidCpuFamily cpu_family = android_getCpuFamily();
  435. if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
  436. uint64_t cpu_features = android_getCpuFeatures();
  437. if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
  438. return 1;
  439. }
  440. }
  441. return 0;
  442. }
  443. #elif defined(__RISCOS__)
  444. /* Use the VFPSupport_Features SWI to access the MVFR registers */
  445. {
  446. _kernel_swi_regs regs;
  447. regs.r[0] = 0;
  448. if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
  449. if ((regs.r[2] & 0xFFF000) == 0x111000) {
  450. return 1;
  451. }
  452. }
  453. return 0;
  454. }
  455. #else
  456. #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
  457. return 0;
  458. #endif
  459. }
  460. static int
  461. CPU_have3DNow(void)
  462. {
  463. if (CPU_CPUIDMaxFunction > 0) { /* that is, do we have CPUID at all? */
  464. int a, b, c, d;
  465. cpuid(0x80000000, a, b, c, d);
  466. if (a >= 0x80000001) {
  467. cpuid(0x80000001, a, b, c, d);
  468. return (d & 0x80000000);
  469. }
  470. }
  471. return 0;
  472. }
  473. #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
  474. #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
  475. #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
  476. #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
  477. #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
  478. #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
  479. #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
  480. #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
  481. static int
  482. CPU_haveAVX2(void)
  483. {
  484. if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
  485. int a, b, c, d;
  486. (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
  487. cpuid(7, a, b, c, d);
  488. return (b & 0x00000020);
  489. }
  490. return 0;
  491. }
  492. static int
  493. CPU_haveAVX512F(void)
  494. {
  495. if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
  496. int a, b, c, d;
  497. (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
  498. cpuid(7, a, b, c, d);
  499. return (b & 0x00010000);
  500. }
  501. return 0;
  502. }
  503. static int SDL_CPUCount = 0;
  504. int
  505. SDL_GetCPUCount(void)
  506. {
  507. if (!SDL_CPUCount) {
  508. #ifndef SDL_CPUINFO_DISABLED
  509. #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
  510. if (SDL_CPUCount <= 0) {
  511. SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
  512. }
  513. #endif
  514. #ifdef HAVE_SYSCTLBYNAME
  515. if (SDL_CPUCount <= 0) {
  516. size_t size = sizeof(SDL_CPUCount);
  517. sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
  518. }
  519. #endif
  520. #ifdef __WIN32__
  521. if (SDL_CPUCount <= 0) {
  522. SYSTEM_INFO info;
  523. GetSystemInfo(&info);
  524. SDL_CPUCount = info.dwNumberOfProcessors;
  525. }
  526. #endif
  527. #ifdef __OS2__
  528. if (SDL_CPUCount <= 0) {
  529. DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
  530. &SDL_CPUCount, sizeof(SDL_CPUCount) );
  531. }
  532. #endif
  533. #endif
  534. /* There has to be at least 1, right? :) */
  535. if (SDL_CPUCount <= 0) {
  536. SDL_CPUCount = 1;
  537. }
  538. }
  539. return SDL_CPUCount;
  540. }
  541. /* Oh, such a sweet sweet trick, just not very useful. :) */
  542. static const char *
  543. SDL_GetCPUType(void)
  544. {
  545. static char SDL_CPUType[13];
  546. if (!SDL_CPUType[0]) {
  547. int i = 0;
  548. CPU_calcCPUIDFeatures();
  549. if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
  550. int a, b, c, d;
  551. cpuid(0x00000000, a, b, c, d);
  552. (void) a;
  553. SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
  554. SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
  555. SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
  556. SDL_CPUType[i++] = (char)(b & 0xff);
  557. SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
  558. SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
  559. SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
  560. SDL_CPUType[i++] = (char)(d & 0xff);
  561. SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
  562. SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
  563. SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
  564. SDL_CPUType[i++] = (char)(c & 0xff);
  565. }
  566. if (!SDL_CPUType[0]) {
  567. SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
  568. }
  569. }
  570. return SDL_CPUType;
  571. }
  572. #ifdef TEST_MAIN /* !!! FIXME: only used for test at the moment. */
  573. static const char *
  574. SDL_GetCPUName(void)
  575. {
  576. static char SDL_CPUName[48];
  577. if (!SDL_CPUName[0]) {
  578. int i = 0;
  579. int a, b, c, d;
  580. CPU_calcCPUIDFeatures();
  581. if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
  582. cpuid(0x80000000, a, b, c, d);
  583. if (a >= 0x80000004) {
  584. cpuid(0x80000002, a, b, c, d);
  585. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  586. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  587. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  588. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  589. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  590. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  591. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  592. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  593. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  594. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  595. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  596. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  597. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  598. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  599. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  600. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  601. cpuid(0x80000003, a, b, c, d);
  602. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  603. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  604. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  605. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  606. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  607. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  608. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  609. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  610. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  611. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  612. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  613. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  614. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  615. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  616. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  617. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  618. cpuid(0x80000004, a, b, c, d);
  619. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  620. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  621. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  622. SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
  623. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  624. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  625. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  626. SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
  627. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  628. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  629. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  630. SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
  631. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  632. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  633. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  634. SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
  635. }
  636. }
  637. if (!SDL_CPUName[0]) {
  638. SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
  639. }
  640. }
  641. return SDL_CPUName;
  642. }
  643. #endif
  644. int
  645. SDL_GetCPUCacheLineSize(void)
  646. {
  647. const char *cpuType = SDL_GetCPUType();
  648. int a, b, c, d;
  649. (void) a; (void) b; (void) c; (void) d;
  650. if (SDL_strcmp(cpuType, "GenuineIntel") == 0) {
  651. cpuid(0x00000001, a, b, c, d);
  652. return (((b >> 8) & 0xff) * 8);
  653. } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
  654. cpuid(0x80000005, a, b, c, d);
  655. return (c & 0xff);
  656. } else {
  657. /* Just make a guess here... */
  658. return SDL_CACHELINE_SIZE;
  659. }
  660. }
  661. static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
  662. static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
  663. static Uint32
  664. SDL_GetCPUFeatures(void)
  665. {
  666. if (SDL_CPUFeatures == 0xFFFFFFFF) {
  667. CPU_calcCPUIDFeatures();
  668. SDL_CPUFeatures = 0;
  669. SDL_SIMDAlignment = sizeof(void *); /* a good safe base value */
  670. if (CPU_haveRDTSC()) {
  671. SDL_CPUFeatures |= CPU_HAS_RDTSC;
  672. }
  673. if (CPU_haveAltiVec()) {
  674. SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
  675. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  676. }
  677. if (CPU_haveMMX()) {
  678. SDL_CPUFeatures |= CPU_HAS_MMX;
  679. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
  680. }
  681. if (CPU_have3DNow()) {
  682. SDL_CPUFeatures |= CPU_HAS_3DNOW;
  683. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
  684. }
  685. if (CPU_haveSSE()) {
  686. SDL_CPUFeatures |= CPU_HAS_SSE;
  687. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  688. }
  689. if (CPU_haveSSE2()) {
  690. SDL_CPUFeatures |= CPU_HAS_SSE2;
  691. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  692. }
  693. if (CPU_haveSSE3()) {
  694. SDL_CPUFeatures |= CPU_HAS_SSE3;
  695. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  696. }
  697. if (CPU_haveSSE41()) {
  698. SDL_CPUFeatures |= CPU_HAS_SSE41;
  699. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  700. }
  701. if (CPU_haveSSE42()) {
  702. SDL_CPUFeatures |= CPU_HAS_SSE42;
  703. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  704. }
  705. if (CPU_haveAVX()) {
  706. SDL_CPUFeatures |= CPU_HAS_AVX;
  707. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
  708. }
  709. if (CPU_haveAVX2()) {
  710. SDL_CPUFeatures |= CPU_HAS_AVX2;
  711. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
  712. }
  713. if (CPU_haveAVX512F()) {
  714. SDL_CPUFeatures |= CPU_HAS_AVX512F;
  715. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
  716. }
  717. if (CPU_haveARMSIMD()) {
  718. SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
  719. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  720. }
  721. if (CPU_haveNEON()) {
  722. SDL_CPUFeatures |= CPU_HAS_NEON;
  723. SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
  724. }
  725. }
  726. return SDL_CPUFeatures;
  727. }
  728. #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
  729. SDL_bool SDL_HasRDTSC(void)
  730. {
  731. return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
  732. }
  733. SDL_bool
  734. SDL_HasAltiVec(void)
  735. {
  736. return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
  737. }
  738. SDL_bool
  739. SDL_HasMMX(void)
  740. {
  741. return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
  742. }
  743. SDL_bool
  744. SDL_Has3DNow(void)
  745. {
  746. return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
  747. }
  748. SDL_bool
  749. SDL_HasSSE(void)
  750. {
  751. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
  752. }
  753. SDL_bool
  754. SDL_HasSSE2(void)
  755. {
  756. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
  757. }
  758. SDL_bool
  759. SDL_HasSSE3(void)
  760. {
  761. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
  762. }
  763. SDL_bool
  764. SDL_HasSSE41(void)
  765. {
  766. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
  767. }
  768. SDL_bool
  769. SDL_HasSSE42(void)
  770. {
  771. return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
  772. }
  773. SDL_bool
  774. SDL_HasAVX(void)
  775. {
  776. return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
  777. }
  778. SDL_bool
  779. SDL_HasAVX2(void)
  780. {
  781. return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
  782. }
  783. SDL_bool
  784. SDL_HasAVX512F(void)
  785. {
  786. return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
  787. }
  788. SDL_bool
  789. SDL_HasARMSIMD(void)
  790. {
  791. return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
  792. }
  793. SDL_bool
  794. SDL_HasNEON(void)
  795. {
  796. return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
  797. }
  798. static int SDL_SystemRAM = 0;
  799. int
  800. SDL_GetSystemRAM(void)
  801. {
  802. if (!SDL_SystemRAM) {
  803. #ifndef SDL_CPUINFO_DISABLED
  804. #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
  805. if (SDL_SystemRAM <= 0) {
  806. SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
  807. }
  808. #endif
  809. #ifdef HAVE_SYSCTLBYNAME
  810. if (SDL_SystemRAM <= 0) {
  811. #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
  812. #ifdef HW_REALMEM
  813. int mib[2] = {CTL_HW, HW_REALMEM};
  814. #else
  815. /* might only report up to 2 GiB */
  816. int mib[2] = {CTL_HW, HW_PHYSMEM};
  817. #endif /* HW_REALMEM */
  818. #else
  819. int mib[2] = {CTL_HW, HW_MEMSIZE};
  820. #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
  821. Uint64 memsize = 0;
  822. size_t len = sizeof(memsize);
  823. if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
  824. SDL_SystemRAM = (int)(memsize / (1024*1024));
  825. }
  826. }
  827. #endif
  828. #ifdef __WIN32__
  829. if (SDL_SystemRAM <= 0) {
  830. MEMORYSTATUSEX stat;
  831. stat.dwLength = sizeof(stat);
  832. if (GlobalMemoryStatusEx(&stat)) {
  833. SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
  834. }
  835. }
  836. #endif
  837. #ifdef __OS2__
  838. if (SDL_SystemRAM <= 0) {
  839. Uint32 sysram = 0;
  840. DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
  841. SDL_SystemRAM = (int) (sysram / 0x100000U);
  842. }
  843. #endif
  844. #ifdef __RISCOS__
  845. if (SDL_SystemRAM <= 0) {
  846. _kernel_swi_regs regs;
  847. regs.r[0] = 0x108;
  848. if (_kernel_swi(OS_Memory, &regs, &regs) == NULL) {
  849. SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
  850. }
  851. }
  852. #endif
  853. #endif
  854. }
  855. return SDL_SystemRAM;
  856. }
  857. size_t
  858. SDL_SIMDGetAlignment(void)
  859. {
  860. if (SDL_SIMDAlignment == 0xFFFFFFFF) {
  861. SDL_GetCPUFeatures(); /* make sure this has been calculated */
  862. }
  863. SDL_assert(SDL_SIMDAlignment != 0);
  864. return SDL_SIMDAlignment;
  865. }
  866. void *
  867. SDL_SIMDAlloc(const size_t len)
  868. {
  869. const size_t alignment = SDL_SIMDGetAlignment();
  870. const size_t padding = alignment - (len % alignment);
  871. const size_t padded = (padding != alignment) ? (len + padding) : len;
  872. Uint8 *retval = NULL;
  873. Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
  874. if (ptr) {
  875. /* store the actual malloc pointer right before our aligned pointer. */
  876. retval = ptr + sizeof (void *);
  877. retval += alignment - (((size_t) retval) % alignment);
  878. *(((void **) retval) - 1) = ptr;
  879. }
  880. return retval;
  881. }
  882. void
  883. SDL_SIMDFree(void *ptr)
  884. {
  885. if (ptr) {
  886. void **realptr = (void **) ptr;
  887. realptr--;
  888. SDL_free(*(((void **) ptr) - 1));
  889. }
  890. }
  891. #ifdef TEST_MAIN
  892. #include <stdio.h>
  893. int
  894. main()
  895. {
  896. printf("CPU count: %d\n", SDL_GetCPUCount());
  897. printf("CPU type: %s\n", SDL_GetCPUType());
  898. printf("CPU name: %s\n", SDL_GetCPUName());
  899. printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
  900. printf("RDTSC: %d\n", SDL_HasRDTSC());
  901. printf("Altivec: %d\n", SDL_HasAltiVec());
  902. printf("MMX: %d\n", SDL_HasMMX());
  903. printf("3DNow: %d\n", SDL_Has3DNow());
  904. printf("SSE: %d\n", SDL_HasSSE());
  905. printf("SSE2: %d\n", SDL_HasSSE2());
  906. printf("SSE3: %d\n", SDL_HasSSE3());
  907. printf("SSE4.1: %d\n", SDL_HasSSE41());
  908. printf("SSE4.2: %d\n", SDL_HasSSE42());
  909. printf("AVX: %d\n", SDL_HasAVX());
  910. printf("AVX2: %d\n", SDL_HasAVX2());
  911. printf("AVX-512F: %d\n", SDL_HasAVX512F());
  912. printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
  913. printf("NEON: %d\n", SDL_HasNEON());
  914. printf("RAM: %d MB\n", SDL_GetSystemRAM());
  915. return 0;
  916. }
  917. #endif /* TEST_MAIN */
  918. /* vi: set ts=4 sw=4 expandtab: */