py_dict.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. #include "pocketpy/pocketpy.h"
  2. #include "pocketpy/common/utils.h"
  3. #include "pocketpy/common/sstream.h"
  4. #include "pocketpy/objects/object.h"
  5. #include "pocketpy/interpreter/vm.h"
  6. #define PK_DICT_MAX_COLLISION 3
  7. typedef struct {
  8. py_i64 hash;
  9. py_TValue key;
  10. py_TValue val;
  11. } DictEntry;
  12. typedef struct {
  13. int _[PK_DICT_MAX_COLLISION];
  14. } DictIndex;
  15. typedef struct {
  16. int length;
  17. int capacity;
  18. DictIndex* indices;
  19. c11_vector /*T=DictEntry*/ entries;
  20. } Dict;
  21. typedef struct {
  22. DictEntry* curr;
  23. DictEntry* end;
  24. } DictIterator;
  25. static void Dict__ctor(Dict* self, int capacity) {
  26. self->length = 0;
  27. self->capacity = capacity;
  28. self->indices = malloc(self->capacity * sizeof(DictIndex));
  29. memset(self->indices, -1, self->capacity * sizeof(DictIndex));
  30. c11_vector__ctor(&self->entries, sizeof(DictEntry));
  31. }
  32. static void Dict__dtor(Dict* self) {
  33. self->length = 0;
  34. self->capacity = 0;
  35. free(self->indices);
  36. c11_vector__dtor(&self->entries);
  37. }
  38. static bool Dict__try_get(Dict* self, py_TValue* key, DictEntry** out) {
  39. py_i64 hash;
  40. if(!py_hash(key, &hash)) return false;
  41. int idx = hash & (self->capacity - 1);
  42. for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
  43. int idx2 = self->indices[idx]._[i];
  44. if(idx2 == -1) continue;
  45. DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
  46. int res = py_equal(&entry->key, key);
  47. if(res == 1) {
  48. *out = entry;
  49. return true;
  50. }
  51. if(res == -1) return false; // error
  52. }
  53. *out = NULL;
  54. return true;
  55. }
  56. static void Dict__clear(Dict* self) {
  57. memset(self->indices, -1, self->capacity * sizeof(DictIndex));
  58. c11_vector__clear(&self->entries);
  59. self->length = 0;
  60. }
  61. static void Dict__rehash_2x(Dict* self) {
  62. Dict old_dict = *self;
  63. int new_capacity = self->capacity * 2;
  64. assert(new_capacity <= 1073741824);
  65. do {
  66. Dict__ctor(self, new_capacity);
  67. for(int i = 0; i < old_dict.entries.count; i++) {
  68. DictEntry* entry = c11__at(DictEntry, &old_dict.entries, i);
  69. if(py_isnil(&entry->key)) continue;
  70. int idx = entry->hash & (new_capacity - 1);
  71. bool success = false;
  72. for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
  73. int idx2 = self->indices[idx]._[i];
  74. if(idx2 == -1) {
  75. // insert new entry (empty slot)
  76. c11_vector__push(DictEntry, &self->entries, *entry);
  77. self->indices[idx]._[i] = self->entries.count - 1;
  78. self->length++;
  79. success = true;
  80. break;
  81. }
  82. }
  83. if(!success) {
  84. Dict__dtor(self);
  85. new_capacity *= 2;
  86. continue;
  87. }
  88. }
  89. // resize complete
  90. Dict__dtor(&old_dict);
  91. return;
  92. } while(1);
  93. }
  94. static void Dict__compact_entries(Dict* self) {
  95. int* mappings = malloc(self->entries.count * sizeof(int));
  96. int n = 0;
  97. for(int i = 0; i < self->entries.count; i++) {
  98. DictEntry* entry = c11__at(DictEntry, &self->entries, i);
  99. if(py_isnil(&entry->key)) continue;
  100. mappings[i] = n;
  101. if(i != n) {
  102. DictEntry* new_entry = c11__at(DictEntry, &self->entries, n);
  103. *new_entry = *entry;
  104. }
  105. n++;
  106. }
  107. self->entries.count = n;
  108. // update indices
  109. for(int i = 0; i < self->capacity; i++) {
  110. for(int j = 0; j < PK_DICT_MAX_COLLISION; j++) {
  111. int idx = self->indices[i]._[j];
  112. if(idx == -1) continue;
  113. self->indices[i]._[j] = mappings[idx];
  114. }
  115. }
  116. free(mappings);
  117. }
  118. static bool Dict__set(Dict* self, py_TValue* key, py_TValue* val) {
  119. py_i64 hash;
  120. if(!py_hash(key, &hash)) return false;
  121. int idx = hash & (self->capacity - 1);
  122. for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
  123. int idx2 = self->indices[idx]._[i];
  124. if(idx2 == -1) {
  125. // insert new entry
  126. DictEntry* new_entry = c11_vector__emplace(&self->entries);
  127. new_entry->hash = hash;
  128. new_entry->key = *key;
  129. new_entry->val = *val;
  130. self->indices[idx]._[i] = self->entries.count - 1;
  131. self->length++;
  132. return true;
  133. }
  134. // update existing entry
  135. DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
  136. int res = py_equal(&entry->key, key);
  137. if(res == 1) {
  138. entry->val = *val;
  139. return true;
  140. }
  141. if(res == -1) return false; // error
  142. }
  143. // no empty slot found
  144. Dict__rehash_2x(self);
  145. return Dict__set(self, key, val);
  146. }
  147. /// Delete an entry from the dict.
  148. /// If the key is found, `py_retval()` is set to the value.
  149. /// If the key is not found, `py_retval()` is set to `nil`.
  150. /// Returns false on error.
  151. static bool Dict__pop(Dict* self, py_Ref key) {
  152. py_i64 hash;
  153. if(!py_hash(key, &hash)) return false;
  154. int idx = hash & (self->capacity - 1);
  155. for(int i = 0; i < PK_DICT_MAX_COLLISION; i++) {
  156. int idx2 = self->indices[idx]._[i];
  157. if(idx2 == -1) continue;
  158. DictEntry* entry = c11__at(DictEntry, &self->entries, idx2);
  159. int res = py_equal(&entry->key, key);
  160. if(res == 1) {
  161. *py_retval() = entry->val;
  162. py_newnil(&entry->key);
  163. self->indices[idx]._[i] = -1;
  164. self->length--;
  165. if(self->length < self->entries.count / 2) Dict__compact_entries(self);
  166. return true;
  167. }
  168. if(res == -1) return false; // error
  169. }
  170. py_newnil(py_retval());
  171. return true;
  172. }
  173. static void DictIterator__ctor(DictIterator* self, Dict* dict) {
  174. self->curr = dict->entries.data;
  175. self->end = self->curr + dict->entries.count;
  176. }
  177. static DictEntry* DictIterator__next(DictIterator* self) {
  178. DictEntry* retval;
  179. do {
  180. if(self->curr == self->end) return NULL;
  181. retval = self->curr++;
  182. } while(py_isnil(&retval->key));
  183. return retval;
  184. }
  185. ///////////////////////////////
  186. static bool _py_dict__new__(int argc, py_Ref argv) {
  187. py_newdict(py_retval());
  188. return true;
  189. }
  190. static bool _py_dict__init__(int argc, py_Ref argv) {
  191. if(argc > 2) return TypeError("dict.__init__() takes at most 2 arguments (%d given)", argc);
  192. if(argc == 1) return true;
  193. assert(argc == 2);
  194. PY_CHECK_ARG_TYPE(1, tp_list);
  195. Dict* self = py_touserdata(argv);
  196. py_Ref list = py_arg(1);
  197. for(int i = 0; i < py_list__len(list); i++) {
  198. py_Ref tuple = py_list__getitem(list, i);
  199. if(!py_istuple(tuple) || py_tuple__len(tuple) != 2) {
  200. return TypeError("dict.__init__() argument must be a list of tuple-2");
  201. }
  202. py_Ref key = py_tuple__getitem(tuple, 0);
  203. py_Ref val = py_tuple__getitem(tuple, 1);
  204. if(!Dict__set(self, key, val)) return false;
  205. }
  206. return true;
  207. }
  208. static bool _py_dict__getitem__(int argc, py_Ref argv) {
  209. PY_CHECK_ARGC(2);
  210. Dict* self = py_touserdata(argv);
  211. DictEntry* entry;
  212. if(!Dict__try_get(self, py_arg(1), &entry)) return false;
  213. if(entry) {
  214. *py_retval() = entry->val;
  215. return true;
  216. }
  217. return KeyError(py_arg(1));
  218. }
  219. static bool _py_dict__setitem__(int argc, py_Ref argv) {
  220. PY_CHECK_ARGC(3);
  221. Dict* self = py_touserdata(argv);
  222. return Dict__set(self, py_arg(1), py_arg(2));
  223. }
  224. static bool _py_dict__delitem__(int argc, py_Ref argv) {
  225. PY_CHECK_ARGC(2);
  226. Dict* self = py_touserdata(argv);
  227. if(!Dict__pop(self, py_arg(1))) return false;
  228. return true;
  229. }
  230. static bool _py_dict__contains__(int argc, py_Ref argv) {
  231. PY_CHECK_ARGC(2);
  232. Dict* self = py_touserdata(argv);
  233. DictEntry* entry;
  234. if(!Dict__try_get(self, py_arg(1), &entry)) return false;
  235. py_newbool(py_retval(), entry != NULL);
  236. return true;
  237. }
  238. static bool _py_dict__len__(int argc, py_Ref argv) {
  239. PY_CHECK_ARGC(1);
  240. Dict* self = py_touserdata(argv);
  241. py_newint(py_retval(), self->length);
  242. return true;
  243. }
  244. static bool _py_dict__repr__(int argc, py_Ref argv) {
  245. PY_CHECK_ARGC(1);
  246. Dict* self = py_touserdata(argv);
  247. c11_sbuf buf;
  248. c11_sbuf__ctor(&buf);
  249. c11_sbuf__write_char(&buf, '{');
  250. bool is_first = true;
  251. for(int i = 0; i < self->entries.count; i++) {
  252. DictEntry* entry = c11__at(DictEntry, &self->entries, i);
  253. if(py_isnil(&entry->key)) continue;
  254. if(!is_first) c11_sbuf__write_cstr(&buf, ", ");
  255. if(!py_repr(&entry->key)) return false;
  256. c11_sbuf__write_sv(&buf, py_tosv(py_retval()));
  257. c11_sbuf__write_cstr(&buf, ": ");
  258. if(!py_repr(&entry->val)) return false;
  259. c11_sbuf__write_sv(&buf, py_tosv(py_retval()));
  260. is_first = false;
  261. }
  262. c11_sbuf__write_char(&buf, '}');
  263. c11_string* res = c11_sbuf__submit(&buf);
  264. py_newstrn(py_retval(), res->data, res->size);
  265. c11_string__delete(res);
  266. return true;
  267. }
  268. static bool _py_dict__eq__(int argc, py_Ref argv) {
  269. PY_CHECK_ARGC(2);
  270. Dict* self = py_touserdata(py_arg(0));
  271. if(!py_isdict(py_arg(1))) {
  272. py_newnotimplemented(py_retval());
  273. return true;
  274. }
  275. Dict* other = py_touserdata(py_arg(1));
  276. if(self->length != other->length) {
  277. py_newbool(py_retval(), false);
  278. return true;
  279. }
  280. DictIterator iter;
  281. DictIterator__ctor(&iter, self);
  282. // for each self key
  283. while(1) {
  284. DictEntry* entry = DictIterator__next(&iter);
  285. if(!entry) break;
  286. DictEntry* other_entry;
  287. if(!Dict__try_get(other, &entry->key, &other_entry)) return false;
  288. if(!other_entry) {
  289. py_newbool(py_retval(), false);
  290. return true;
  291. }
  292. int res = py_equal(&entry->val, &other_entry->val);
  293. if(res == -1) return false;
  294. if(!res) {
  295. py_newbool(py_retval(), false);
  296. return true;
  297. }
  298. }
  299. return true;
  300. }
  301. static bool _py_dict__ne__(int argc, py_Ref argv) {
  302. if(!_py_dict__eq__(argc, argv)) return false;
  303. if(py_isbool(py_retval())) {
  304. bool res = py_tobool(py_retval());
  305. py_newbool(py_retval(), !res);
  306. }
  307. return true;
  308. }
  309. static bool _py_dict__clear(int argc, py_Ref argv) {
  310. PY_CHECK_ARGC(1);
  311. Dict* self = py_touserdata(argv);
  312. Dict__clear(self);
  313. return true;
  314. }
  315. static bool _py_dict__copy(int argc, py_Ref argv) {
  316. PY_CHECK_ARGC(1);
  317. Dict* self = py_touserdata(argv);
  318. Dict* new_dict = py_newobject(py_retval(), tp_dict, 0, sizeof(Dict));
  319. new_dict->capacity = self->capacity;
  320. new_dict->length = self->length;
  321. new_dict->entries = c11_vector__copy(&self->entries);
  322. // copy indices
  323. new_dict->indices = malloc(new_dict->capacity * sizeof(DictIndex));
  324. memcpy(new_dict->indices, self->indices, new_dict->capacity * sizeof(DictIndex));
  325. return true;
  326. }
  327. static bool _py_dict__update(int argc, py_Ref argv) {
  328. PY_CHECK_ARGC(2);
  329. PY_CHECK_ARG_TYPE(1, tp_dict);
  330. Dict* self = py_touserdata(argv);
  331. Dict* other = py_touserdata(py_arg(1));
  332. for(int i = 0; i < other->entries.count; i++) {
  333. DictEntry* entry = c11__at(DictEntry, &other->entries, i);
  334. if(py_isnil(&entry->key)) continue;
  335. if(!Dict__set(self, &entry->key, &entry->val)) return false;
  336. }
  337. return true;
  338. }
  339. static bool _py_dict__get(int argc, py_Ref argv) {
  340. Dict* self = py_touserdata(argv);
  341. if(argc > 3) return TypeError("get() takes at most 3 arguments (%d given)", argc);
  342. py_Ref default_val = argc == 3 ? py_arg(2) : py_None;
  343. DictEntry* entry;
  344. if(!Dict__try_get(self, py_arg(1), &entry)) return false;
  345. *py_retval() = entry ? entry->val : *default_val;
  346. return true;
  347. }
  348. static bool _py_dict__pop(int argc, py_Ref argv) {
  349. Dict* self = py_touserdata(argv);
  350. if(argc < 2 || argc > 3) return TypeError("pop() takes 2 or 3 arguments (%d given)", argc);
  351. py_Ref default_val = argc == 3 ? py_arg(2) : py_None;
  352. if(!Dict__pop(self, py_arg(1))) return false;
  353. if(py_isnil(py_retval())) *py_retval() = *default_val;
  354. return true;
  355. }
  356. static bool _py_dict__items(int argc, py_Ref argv) {
  357. PY_CHECK_ARGC(1);
  358. Dict* self = py_touserdata(argv);
  359. DictIterator* ud = py_newobject(py_retval(), tp_dict_items, 1, sizeof(DictIterator));
  360. DictIterator__ctor(ud, self);
  361. py_setslot(py_retval(), 0, argv); // keep a reference to the dict
  362. return true;
  363. }
  364. static bool _py_dict__keys(int argc, py_Ref argv) {
  365. PY_CHECK_ARGC(1);
  366. Dict* self = py_touserdata(argv);
  367. py_newtuple(py_retval(), self->length);
  368. DictIterator iter;
  369. DictIterator__ctor(&iter, self);
  370. int i = 0;
  371. while(1) {
  372. DictEntry* entry = DictIterator__next(&iter);
  373. if(!entry) break;
  374. py_tuple__setitem(py_retval(), i++, &entry->key);
  375. }
  376. assert(i == self->length);
  377. return true;
  378. }
  379. static bool _py_dict__values(int argc, py_Ref argv) {
  380. PY_CHECK_ARGC(1);
  381. Dict* self = py_touserdata(argv);
  382. py_newtuple(py_retval(), self->length);
  383. DictIterator iter;
  384. DictIterator__ctor(&iter, self);
  385. int i = 0;
  386. while(1) {
  387. DictEntry* entry = DictIterator__next(&iter);
  388. if(!entry) break;
  389. py_tuple__setitem(py_retval(), i++, &entry->val);
  390. }
  391. assert(i == self->length);
  392. return true;
  393. }
  394. py_Type pk_dict__register() {
  395. py_Type type = pk_newtype("dict", tp_object, NULL, (void (*)(void*))Dict__dtor, false, false);
  396. py_bindmagic(type, __new__, _py_dict__new__);
  397. py_bindmagic(type, __init__, _py_dict__init__);
  398. py_bindmagic(type, __getitem__, _py_dict__getitem__);
  399. py_bindmagic(type, __setitem__, _py_dict__setitem__);
  400. py_bindmagic(type, __delitem__, _py_dict__delitem__);
  401. py_bindmagic(type, __contains__, _py_dict__contains__);
  402. py_bindmagic(type, __len__, _py_dict__len__);
  403. py_bindmagic(type, __repr__, _py_dict__repr__);
  404. py_bindmagic(type, __eq__, _py_dict__eq__);
  405. py_bindmagic(type, __ne__, _py_dict__ne__);
  406. py_bindmethod(type, "clear", _py_dict__clear);
  407. py_bindmethod(type, "copy", _py_dict__copy);
  408. py_bindmethod(type, "update", _py_dict__update);
  409. py_bindmethod(type, "get", _py_dict__get);
  410. py_bindmethod(type, "pop", _py_dict__pop);
  411. py_bindmethod(type, "items", _py_dict__items);
  412. py_bindmethod(type, "keys", _py_dict__keys);
  413. py_bindmethod(type, "values", _py_dict__values);
  414. py_setdict(py_tpobject(type), __hash__, py_None);
  415. return type;
  416. }
  417. //////////////////////////
  418. static bool _py_dict_items__iter__(int argc, py_Ref argv) {
  419. PY_CHECK_ARGC(1);
  420. *py_retval() = *argv;
  421. return true;
  422. }
  423. static bool _py_dict_items__next__(int argc, py_Ref argv) {
  424. PY_CHECK_ARGC(1);
  425. DictIterator* iter = py_touserdata(py_arg(0));
  426. DictEntry* entry = (DictIterator__next(iter));
  427. if(entry) {
  428. py_newtuple(py_retval(), 2);
  429. py_tuple__setitem(py_retval(), 0, &entry->key);
  430. py_tuple__setitem(py_retval(), 1, &entry->val);
  431. return true;
  432. }
  433. return StopIteration();
  434. }
  435. py_Type pk_dict_items__register() {
  436. py_Type type = pk_newtype("dict_items", tp_object, NULL, NULL, false, true);
  437. py_bindmagic(type, __iter__, _py_dict_items__iter__);
  438. py_bindmagic(type, __next__, _py_dict_items__next__);
  439. return type;
  440. }
  441. //////////////////////////
  442. void py_newdict(py_Ref out) {
  443. Dict* ud = py_newobject(out, tp_dict, 0, sizeof(Dict));
  444. Dict__ctor(ud, 8);
  445. }
  446. py_Ref py_dict__getitem(py_Ref self, py_Ref key) {
  447. assert(py_isdict(self));
  448. Dict* ud = py_touserdata(self);
  449. DictEntry* entry;
  450. if(!Dict__try_get(ud, key, &entry)) return NULL;
  451. if(entry) return &entry->val;
  452. return NULL;
  453. }
  454. void py_dict__delitem(py_Ref self, py_Ref key) {
  455. assert(py_isdict(self));
  456. Dict* ud = py_touserdata(self);
  457. Dict__pop(ud, key);
  458. }
  459. void py_dict__setitem(py_Ref self, py_Ref key, py_Ref val) {
  460. assert(py_isdict(self));
  461. Dict* ud = py_touserdata(self);
  462. Dict__set(ud, key, val);
  463. }
  464. bool py_dict__contains(py_Ref self, py_Ref key) {
  465. assert(py_isdict(self));
  466. Dict* ud = py_touserdata(self);
  467. DictEntry* entry;
  468. bool ok = Dict__try_get(ud, key, &entry);
  469. return ok && entry != NULL;
  470. }
  471. int py_dict__len(py_Ref self) {
  472. assert(py_isdict(self));
  473. Dict* ud = py_touserdata(self);
  474. return ud->length;
  475. }
  476. bool py_dict__apply(py_Ref self, bool (*f)(py_Ref, py_Ref, void *), void *ctx){
  477. Dict* ud = py_touserdata(self);
  478. for(int i = 0; i < ud->entries.count; i++) {
  479. DictEntry* entry = c11__at(DictEntry, &ud->entries, i);
  480. if(py_isnil(&entry->key)) continue;
  481. if(!f(&entry->key, &entry->val, ctx)) return false;
  482. }
  483. return true;
  484. }
  485. void pk_dict__mark(void* ud, void (*marker)(py_TValue*)) {
  486. Dict* self = ud;
  487. for(int i = 0; i < self->entries.count; i++) {
  488. DictEntry* entry = c11__at(DictEntry, &self->entries, i);
  489. if(py_isnil(&entry->key)) continue;
  490. marker(&entry->key);
  491. marker(&entry->val);
  492. }
  493. }