vm.cpp 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112
  1. #include "pocketpy/vm.h"
  2. namespace pkpy{
  3. VM::VM(bool enable_os) : heap(this), enable_os(enable_os) {
  4. this->vm = this;
  5. this->_c.error = nullptr;
  6. _stdout = [](VM* vm, const char* buf, int size) {
  7. PK_UNUSED(vm);
  8. std::cout.write(buf, size);
  9. };
  10. _stderr = [](VM* vm, const char* buf, int size) {
  11. PK_UNUSED(vm);
  12. std::cerr.write(buf, size);
  13. };
  14. callstack.reserve(8);
  15. _main = nullptr;
  16. _last_exception = nullptr;
  17. _import_handler = [](const Str& name) {
  18. PK_UNUSED(name);
  19. return Bytes();
  20. };
  21. init_builtin_types();
  22. }
  23. PyObject* VM::py_str(PyObject* obj){
  24. const PyTypeInfo* ti = _inst_type_info(obj);
  25. if(ti->m__str__) return ti->m__str__(this, obj);
  26. PyObject* self;
  27. PyObject* f = get_unbound_method(obj, __str__, &self, false);
  28. if(self != PY_NULL) return call_method(self, f);
  29. return py_repr(obj);
  30. }
  31. PyObject* VM::py_repr(PyObject* obj){
  32. const PyTypeInfo* ti = _inst_type_info(obj);
  33. if(ti->m__repr__) return ti->m__repr__(this, obj);
  34. return call_method(obj, __repr__);
  35. }
  36. PyObject* VM::py_json(PyObject* obj){
  37. const PyTypeInfo* ti = _inst_type_info(obj);
  38. if(ti->m__json__) return ti->m__json__(this, obj);
  39. return call_method(obj, __json__);
  40. }
  41. PyObject* VM::py_iter(PyObject* obj){
  42. const PyTypeInfo* ti = _inst_type_info(obj);
  43. if(ti->m__iter__) return ti->m__iter__(this, obj);
  44. PyObject* self;
  45. PyObject* iter_f = get_unbound_method(obj, __iter__, &self, false);
  46. if(self != PY_NULL) return call_method(self, iter_f);
  47. TypeError(OBJ_NAME(_t(obj)).escape() + " object is not iterable");
  48. return nullptr;
  49. }
  50. FrameId VM::top_frame(){
  51. #if PK_DEBUG_EXTRA_CHECK
  52. if(callstack.empty()) FATAL_ERROR();
  53. #endif
  54. return FrameId(&callstack.data(), callstack.size()-1);
  55. }
  56. void VM::_pop_frame(){
  57. Frame* frame = &callstack.top();
  58. s_data.reset(frame->_sp_base);
  59. callstack.pop();
  60. }
  61. PyObject* VM::find_name_in_mro(PyObject* cls, StrName name){
  62. PyObject* val;
  63. do{
  64. val = cls->attr().try_get(name);
  65. if(val != nullptr) return val;
  66. Type base = _all_types[PK_OBJ_GET(Type, cls)].base;
  67. if(base.index == -1) break;
  68. cls = _all_types[base].obj;
  69. }while(true);
  70. return nullptr;
  71. }
  72. bool VM::isinstance(PyObject* obj, Type cls_t){
  73. Type obj_t = PK_OBJ_GET(Type, _t(obj));
  74. do{
  75. if(obj_t == cls_t) return true;
  76. Type base = _all_types[obj_t].base;
  77. if(base.index == -1) break;
  78. obj_t = base;
  79. }while(true);
  80. return false;
  81. }
  82. PyObject* VM::exec(Str source, Str filename, CompileMode mode, PyObject* _module){
  83. if(_module == nullptr) _module = _main;
  84. try {
  85. CodeObject_ code = compile(source, filename, mode);
  86. #if PK_DEBUG_DIS_EXEC
  87. if(_module == _main) std::cout << disassemble(code) << '\n';
  88. #endif
  89. return _exec(code, _module);
  90. }catch (const Exception& e){
  91. Str sum = e.summary() + "\n";
  92. _stderr(this, sum.data, sum.size);
  93. }
  94. #if !PK_DEBUG_FULL_EXCEPTION
  95. catch (const std::exception& e) {
  96. Str msg = "An std::exception occurred! It could be a bug.\n";
  97. msg = msg + e.what() + "\n";
  98. _stderr(this, msg.data, msg.size);
  99. }
  100. #endif
  101. callstack.clear();
  102. s_data.clear();
  103. return nullptr;
  104. }
  105. void VM::exec(Str source){
  106. exec(source, "main.py", EXEC_MODE);
  107. }
  108. PyObject* VM::eval(Str source){
  109. return exec(source, "<eval>", EVAL_MODE);
  110. }
  111. PyObject* VM::new_type_object(PyObject* mod, StrName name, Type base, bool subclass_enabled){
  112. PyObject* obj = heap._new<Type>(tp_type, _all_types.size());
  113. const PyTypeInfo& base_info = _all_types[base];
  114. if(!base_info.subclass_enabled){
  115. TypeError(fmt("type ", base_info.name.escape(), " is not `subclass_enabled`"));
  116. }
  117. PyTypeInfo info{
  118. obj,
  119. base,
  120. (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.sv()): name.sv(),
  121. subclass_enabled,
  122. };
  123. if(mod != nullptr) mod->attr().set(name, obj);
  124. _all_types.push_back(info);
  125. return obj;
  126. }
  127. Type VM::_new_type_object(StrName name, Type base) {
  128. PyObject* obj = new_type_object(nullptr, name, base, false);
  129. return PK_OBJ_GET(Type, obj);
  130. }
  131. PyObject* VM::_find_type_object(const Str& type){
  132. PyObject* obj = builtins->attr().try_get(type);
  133. if(obj == nullptr){
  134. for(auto& t: _all_types) if(t.name == type) return t.obj;
  135. throw std::runtime_error(fmt("type not found: ", type));
  136. }
  137. check_non_tagged_type(obj, tp_type);
  138. return obj;
  139. }
  140. Type VM::_type(const Str& type){
  141. PyObject* obj = _find_type_object(type);
  142. return PK_OBJ_GET(Type, obj);
  143. }
  144. PyTypeInfo* VM::_type_info(const Str& type){
  145. PyObject* obj = builtins->attr().try_get(type);
  146. if(obj == nullptr){
  147. for(auto& t: _all_types) if(t.name == type) return &t;
  148. FATAL_ERROR();
  149. }
  150. return &_all_types[PK_OBJ_GET(Type, obj)];
  151. }
  152. PyTypeInfo* VM::_type_info(Type type){
  153. return &_all_types[type];
  154. }
  155. const PyTypeInfo* VM::_inst_type_info(PyObject* obj){
  156. if(is_int(obj)) return &_all_types[tp_int];
  157. if(is_float(obj)) return &_all_types[tp_float];
  158. return &_all_types[obj->type];
  159. }
  160. bool VM::py_equals(PyObject* lhs, PyObject* rhs){
  161. if(lhs == rhs) return true;
  162. const PyTypeInfo* ti = _inst_type_info(lhs);
  163. PyObject* res;
  164. if(ti->m__eq__){
  165. res = ti->m__eq__(this, lhs, rhs);
  166. if(res != vm->NotImplemented) return res == vm->True;
  167. }
  168. res = call_method(lhs, __eq__, rhs);
  169. if(res != vm->NotImplemented) return res == vm->True;
  170. ti = _inst_type_info(rhs);
  171. if(ti->m__eq__){
  172. res = ti->m__eq__(this, rhs, lhs);
  173. if(res != vm->NotImplemented) return res == vm->True;
  174. }
  175. res = call_method(rhs, __eq__, lhs);
  176. if(res != vm->NotImplemented) return res == vm->True;
  177. return false;
  178. }
  179. int VM::normalized_index(int index, int size){
  180. if(index < 0) index += size;
  181. if(index < 0 || index >= size){
  182. IndexError(std::to_string(index) + " not in [0, " + std::to_string(size) + ")");
  183. }
  184. return index;
  185. }
  186. PyObject* VM::py_next(PyObject* obj){
  187. const PyTypeInfo* ti = _inst_type_info(obj);
  188. if(ti->m__next__) return ti->m__next__(this, obj);
  189. return call_method(obj, __next__);
  190. }
  191. PyObject* VM::py_import(StrName name, bool relative){
  192. Str filename;
  193. int type;
  194. if(relative){
  195. ImportContext* ctx = &_import_context;
  196. type = 2;
  197. for(auto it=ctx->pending.rbegin(); it!=ctx->pending.rend(); ++it){
  198. if(it->second == 2) continue;
  199. if(it->second == 1){
  200. filename = fmt(it->first, kPlatformSep, name, ".py");
  201. name = fmt(it->first, '.', name).c_str();
  202. break;
  203. }
  204. }
  205. if(filename.length() == 0) _error("ImportError", "relative import outside of package");
  206. }else{
  207. type = 0;
  208. filename = fmt(name, ".py");
  209. }
  210. for(auto& [k, v]: _import_context.pending){
  211. if(k == name){
  212. vm->_error("ImportError", fmt("circular import ", name.escape()));
  213. }
  214. }
  215. PyObject* ext_mod = _modules.try_get(name);
  216. if(ext_mod == nullptr){
  217. Str source;
  218. auto it = _lazy_modules.find(name);
  219. if(it == _lazy_modules.end()){
  220. Bytes b = _import_handler(filename);
  221. if(!relative && !b){
  222. filename = fmt(name, kPlatformSep, "__init__.py");
  223. b = _import_handler(filename);
  224. if(b) type = 1;
  225. }
  226. if(!b) _error("ImportError", fmt("module ", name.escape(), " not found"));
  227. source = Str(b.str());
  228. }else{
  229. source = it->second;
  230. _lazy_modules.erase(it);
  231. }
  232. auto _ = _import_context.temp(this, name, type);
  233. CodeObject_ code = compile(source, filename, EXEC_MODE);
  234. PyObject* new_mod = new_module(name);
  235. _exec(code, new_mod);
  236. new_mod->attr()._try_perfect_rehash();
  237. return new_mod;
  238. }else{
  239. return ext_mod;
  240. }
  241. }
  242. VM::~VM() {
  243. callstack.clear();
  244. s_data.clear();
  245. _all_types.clear();
  246. _modules.clear();
  247. _lazy_modules.clear();
  248. }
  249. PyObject* VM::py_negate(PyObject* obj){
  250. const PyTypeInfo* ti = _inst_type_info(obj);
  251. if(ti->m__neg__) return ti->m__neg__(this, obj);
  252. return call_method(obj, __neg__);
  253. }
  254. void VM::check_int_or_float(PyObject *obj){
  255. if(!is_tagged(obj)){
  256. TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape());
  257. }
  258. }
  259. bool VM::py_bool(PyObject* obj){
  260. if(is_non_tagged_type(obj, tp_bool)) return obj == True;
  261. if(obj == None) return false;
  262. if(is_int(obj)) return _CAST(i64, obj) != 0;
  263. if(is_float(obj)) return _CAST(f64, obj) != 0.0;
  264. PyObject* self;
  265. PyObject* len_f = get_unbound_method(obj, __len__, &self, false);
  266. if(self != PY_NULL){
  267. PyObject* ret = call_method(self, len_f);
  268. return CAST(i64, ret) > 0;
  269. }
  270. return true;
  271. }
  272. PyObject* VM::py_list(PyObject* it){
  273. auto _lock = heap.gc_scope_lock();
  274. it = py_iter(it);
  275. List list;
  276. PyObject* obj = py_next(it);
  277. while(obj != StopIteration){
  278. list.push_back(obj);
  279. obj = py_next(it);
  280. }
  281. return VAR(std::move(list));
  282. }
  283. void VM::parse_int_slice(const Slice& s, int length, int& start, int& stop, int& step){
  284. auto clip = [](int value, int min, int max){
  285. if(value < min) return min;
  286. if(value > max) return max;
  287. return value;
  288. };
  289. if(s.step == None) step = 1;
  290. else step = CAST(int, s.step);
  291. if(step == 0) ValueError("slice step cannot be zero");
  292. if(step > 0){
  293. if(s.start == None){
  294. start = 0;
  295. }else{
  296. start = CAST(int, s.start);
  297. if(start < 0) start += length;
  298. start = clip(start, 0, length);
  299. }
  300. if(s.stop == None){
  301. stop = length;
  302. }else{
  303. stop = CAST(int, s.stop);
  304. if(stop < 0) stop += length;
  305. stop = clip(stop, 0, length);
  306. }
  307. }else{
  308. if(s.start == None){
  309. start = length - 1;
  310. }else{
  311. start = CAST(int, s.start);
  312. if(start < 0) start += length;
  313. start = clip(start, -1, length - 1);
  314. }
  315. if(s.stop == None){
  316. stop = -1;
  317. }else{
  318. stop = CAST(int, s.stop);
  319. if(stop < 0) stop += length;
  320. stop = clip(stop, -1, length - 1);
  321. }
  322. }
  323. }
  324. i64 VM::py_hash(PyObject* obj){
  325. // https://docs.python.org/3.10/reference/datamodel.html#object.__hash__
  326. const PyTypeInfo* ti = _inst_type_info(obj);
  327. if(ti->m__hash__) return ti->m__hash__(this, obj);
  328. PyObject* self;
  329. PyObject* f = get_unbound_method(obj, __hash__, &self, false);
  330. if(f != nullptr){
  331. PyObject* ret = call_method(self, f);
  332. return CAST(i64, ret);
  333. }
  334. // if it is trivial `object`, return PK_BITS
  335. if(ti == &_all_types[tp_object]) return PK_BITS(obj);
  336. // otherwise, we check if it has a custom __eq__ other than object.__eq__
  337. bool has_custom_eq = false;
  338. if(ti->m__eq__) has_custom_eq = true;
  339. else{
  340. f = get_unbound_method(obj, __eq__, &self, false);
  341. has_custom_eq = f != _t(tp_object)->attr(__eq__);
  342. }
  343. if(has_custom_eq){
  344. TypeError(fmt("unhashable type: ", ti->name.escape()));
  345. return 0;
  346. }else{
  347. return PK_BITS(obj);
  348. }
  349. }
  350. PyObject* VM::format(Str spec, PyObject* obj){
  351. if(spec.empty()) return py_str(obj);
  352. char type;
  353. switch(spec.end()[-1]){
  354. case 'f': case 'd': case 's':
  355. type = spec.end()[-1];
  356. spec = spec.substr(0, spec.length() - 1);
  357. break;
  358. default: type = ' '; break;
  359. }
  360. char pad_c = ' ';
  361. for(char c: std::string_view("0-=*#@!~")){
  362. if(spec[0] == c){
  363. pad_c = c;
  364. spec = spec.substr(1);
  365. break;
  366. }
  367. }
  368. char align;
  369. if(spec[0] == '>'){
  370. align = '>';
  371. spec = spec.substr(1);
  372. }else if(spec[0] == '<'){
  373. align = '<';
  374. spec = spec.substr(1);
  375. }else{
  376. if(is_int(obj) || is_float(obj)) align = '>';
  377. else align = '<';
  378. }
  379. int dot = spec.index(".");
  380. int width, precision;
  381. try{
  382. if(dot >= 0){
  383. if(dot == 0){
  384. width = -1;
  385. }else{
  386. width = Number::stoi(spec.substr(0, dot).str());
  387. }
  388. precision = Number::stoi(spec.substr(dot+1).str());
  389. }else{
  390. width = Number::stoi(spec.str());
  391. precision = -1;
  392. }
  393. }catch(...){
  394. ValueError("invalid format specifer");
  395. UNREACHABLE();
  396. }
  397. if(type != 'f' && dot >= 0) ValueError("precision not allowed in the format specifier");
  398. Str ret;
  399. if(type == 'f'){
  400. f64 val = CAST(f64, obj);
  401. if(precision < 0) precision = 6;
  402. std::stringstream ss;
  403. ss << std::fixed << std::setprecision(precision) << val;
  404. ret = ss.str();
  405. }else if(type == 'd'){
  406. ret = std::to_string(CAST(i64, obj));
  407. }else if(type == 's'){
  408. ret = CAST(Str&, obj);
  409. }else{
  410. ret = CAST(Str&, py_str(obj));
  411. }
  412. if(width != -1 && width > ret.length()){
  413. int pad = width - ret.length();
  414. std::string padding(pad, pad_c);
  415. if(align == '>') ret = padding.c_str() + ret;
  416. else ret = ret + padding.c_str();
  417. }
  418. return VAR(ret);
  419. }
  420. PyObject* VM::new_module(StrName name) {
  421. PyObject* obj = heap._new<DummyModule>(tp_module);
  422. obj->attr().set("__name__", VAR(name.sv()));
  423. // we do not allow override in order to avoid memory leak
  424. // it is because Module objects are not garbage collected
  425. if(_modules.contains(name)) throw std::runtime_error("module already exists");
  426. _modules.set(name, obj);
  427. return obj;
  428. }
  429. static std::string _opcode_argstr(VM* vm, Bytecode byte, const CodeObject* co){
  430. std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
  431. switch(byte.op){
  432. case OP_LOAD_CONST: case OP_FORMAT_STRING:
  433. if(vm != nullptr){
  434. argStr += fmt(" (", CAST(Str, vm->py_repr(co->consts[byte.arg])), ")");
  435. }
  436. break;
  437. case OP_LOAD_NAME: case OP_LOAD_GLOBAL: case OP_LOAD_NONLOCAL: case OP_STORE_GLOBAL:
  438. case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR:
  439. case OP_IMPORT_NAME: case OP_BEGIN_CLASS: case OP_RAISE:
  440. case OP_DELETE_GLOBAL: case OP_INC_GLOBAL: case OP_DEC_GLOBAL: case OP_STORE_CLASS_ATTR:
  441. argStr += fmt(" (", StrName(byte.arg).sv(), ")");
  442. break;
  443. case OP_LOAD_FAST: case OP_STORE_FAST: case OP_DELETE_FAST: case OP_INC_FAST: case OP_DEC_FAST:
  444. argStr += fmt(" (", co->varnames[byte.arg].sv(), ")");
  445. break;
  446. case OP_LOAD_FUNCTION:
  447. argStr += fmt(" (", co->func_decls[byte.arg]->code->name, ")");
  448. break;
  449. }
  450. return argStr;
  451. }
  452. Str VM::disassemble(CodeObject_ co){
  453. auto pad = [](const Str& s, const int n){
  454. if(s.length() >= n) return s.substr(0, n);
  455. return s + std::string(n - s.length(), ' ');
  456. };
  457. std::vector<int> jumpTargets;
  458. for(auto byte : co->codes){
  459. if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE || byte.op == OP_SHORTCUT_IF_FALSE_OR_POP){
  460. jumpTargets.push_back(byte.arg);
  461. }
  462. }
  463. std::stringstream ss;
  464. int prev_line = -1;
  465. for(int i=0; i<co->codes.size(); i++){
  466. const Bytecode& byte = co->codes[i];
  467. Str line = std::to_string(co->lines[i]);
  468. if(co->lines[i] == prev_line) line = "";
  469. else{
  470. if(prev_line != -1) ss << "\n";
  471. prev_line = co->lines[i];
  472. }
  473. std::string pointer;
  474. if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){
  475. pointer = "-> ";
  476. }else{
  477. pointer = " ";
  478. }
  479. ss << pad(line, 8) << pointer << pad(std::to_string(i), 3);
  480. ss << " " << pad(OP_NAMES[byte.op], 25) << " ";
  481. // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5);
  482. std::string argStr = _opcode_argstr(this, byte, co.get());
  483. ss << argStr;
  484. // ss << pad(argStr, 40); // may overflow
  485. // ss << co->blocks[byte.block].type;
  486. if(i != co->codes.size() - 1) ss << '\n';
  487. }
  488. for(auto& decl: co->func_decls){
  489. ss << "\n\n" << "Disassembly of " << decl->code->name << ":\n";
  490. ss << disassemble(decl->code);
  491. }
  492. ss << "\n";
  493. return Str(ss.str());
  494. }
  495. #if PK_DEBUG_CEVAL_STEP
  496. void VM::_log_s_data(const char* title) {
  497. if(_main == nullptr) return;
  498. if(callstack.empty()) return;
  499. std::stringstream ss;
  500. if(title) ss << title << " | ";
  501. std::map<PyObject**, int> sp_bases;
  502. for(Frame& f: callstack.data()){
  503. if(f._sp_base == nullptr) FATAL_ERROR();
  504. sp_bases[f._sp_base] += 1;
  505. }
  506. FrameId frame = top_frame();
  507. int line = frame->co->lines[frame->_ip];
  508. ss << frame->co->name << ":" << line << " [";
  509. for(PyObject** p=s_data.begin(); p!=s_data.end(); p++){
  510. ss << std::string(sp_bases[p], '|');
  511. if(sp_bases[p] > 0) ss << " ";
  512. PyObject* obj = *p;
  513. if(obj == nullptr) ss << "(nil)";
  514. else if(obj == PY_NULL) ss << "NULL";
  515. else if(is_int(obj)) ss << CAST(i64, obj);
  516. else if(is_float(obj)) ss << CAST(f64, obj);
  517. else if(is_type(obj, tp_str)) ss << CAST(Str, obj).escape();
  518. else if(obj == None) ss << "None";
  519. else if(obj == True) ss << "True";
  520. else if(obj == False) ss << "False";
  521. else if(is_type(obj, tp_function)){
  522. auto& f = CAST(Function&, obj);
  523. ss << f.decl->code->name << "(...)";
  524. } else if(is_type(obj, tp_type)){
  525. Type t = PK_OBJ_GET(Type, obj);
  526. ss << "<class " + _all_types[t].name.escape() + ">";
  527. } else if(is_type(obj, tp_list)){
  528. auto& t = CAST(List&, obj);
  529. ss << "list(size=" << t.size() << ")";
  530. } else if(is_type(obj, tp_tuple)){
  531. auto& t = CAST(Tuple&, obj);
  532. ss << "tuple(size=" << t.size() << ")";
  533. } else ss << "(" << obj_type_name(this, obj->type) << ")";
  534. ss << ", ";
  535. }
  536. std::string output = ss.str();
  537. if(!s_data.empty()) {
  538. output.pop_back(); output.pop_back();
  539. }
  540. output.push_back(']');
  541. Bytecode byte = frame->co->codes[frame->_ip];
  542. std::cout << output << " " << OP_NAMES[byte.op] << " " << _opcode_argstr(nullptr, byte, frame->co) << std::endl;
  543. }
  544. #endif
  545. void VM::init_builtin_types(){
  546. _all_types.push_back({heap._new<Type>(Type(1), Type(0)), -1, "object", true});
  547. _all_types.push_back({heap._new<Type>(Type(1), Type(1)), 0, "type", false});
  548. tp_object = 0; tp_type = 1;
  549. tp_int = _new_type_object("int");
  550. tp_float = _new_type_object("float");
  551. if(tp_int.index != kTpIntIndex || tp_float.index != kTpFloatIndex) FATAL_ERROR();
  552. tp_bool = _new_type_object("bool");
  553. tp_str = _new_type_object("str");
  554. tp_list = _new_type_object("list");
  555. tp_tuple = _new_type_object("tuple");
  556. tp_slice = _new_type_object("slice");
  557. tp_range = _new_type_object("range");
  558. tp_module = _new_type_object("module");
  559. tp_function = _new_type_object("function");
  560. tp_native_func = _new_type_object("native_func");
  561. tp_bound_method = _new_type_object("bound_method");
  562. tp_super = _new_type_object("super");
  563. tp_exception = _new_type_object("Exception");
  564. tp_bytes = _new_type_object("bytes");
  565. tp_mappingproxy = _new_type_object("mappingproxy");
  566. tp_dict = _new_type_object("dict");
  567. tp_property = _new_type_object("property");
  568. tp_star_wrapper = _new_type_object("_star_wrapper");
  569. this->None = heap._new<Dummy>(_new_type_object("NoneType"));
  570. this->NotImplemented = heap._new<Dummy>(_new_type_object("NotImplementedType"));
  571. this->Ellipsis = heap._new<Dummy>(_new_type_object("ellipsis"));
  572. this->True = heap._new<Dummy>(tp_bool);
  573. this->False = heap._new<Dummy>(tp_bool);
  574. this->StopIteration = heap._new<Dummy>(_new_type_object("StopIterationType"));
  575. this->builtins = new_module("builtins");
  576. // setup public types
  577. builtins->attr().set("type", _t(tp_type));
  578. builtins->attr().set("object", _t(tp_object));
  579. builtins->attr().set("bool", _t(tp_bool));
  580. builtins->attr().set("int", _t(tp_int));
  581. builtins->attr().set("float", _t(tp_float));
  582. builtins->attr().set("str", _t(tp_str));
  583. builtins->attr().set("list", _t(tp_list));
  584. builtins->attr().set("tuple", _t(tp_tuple));
  585. builtins->attr().set("range", _t(tp_range));
  586. builtins->attr().set("bytes", _t(tp_bytes));
  587. builtins->attr().set("dict", _t(tp_dict));
  588. builtins->attr().set("property", _t(tp_property));
  589. builtins->attr().set("StopIteration", StopIteration);
  590. builtins->attr().set("NotImplemented", NotImplemented);
  591. builtins->attr().set("slice", _t(tp_slice));
  592. post_init();
  593. for(int i=0; i<_all_types.size(); i++){
  594. _all_types[i].obj->attr()._try_perfect_rehash();
  595. }
  596. for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash();
  597. this->_main = new_module("__main__");
  598. }
  599. // `heap.gc_scope_lock();` needed before calling this function
  600. void VM::_unpack_as_list(ArgsView args, List& list){
  601. for(PyObject* obj: args){
  602. if(is_non_tagged_type(obj, tp_star_wrapper)){
  603. const StarWrapper& w = _CAST(StarWrapper&, obj);
  604. // maybe this check should be done in the compile time
  605. if(w.level != 1) TypeError("expected level 1 star wrapper");
  606. PyObject* _0 = py_iter(w.obj);
  607. PyObject* _1 = py_next(_0);
  608. while(_1 != StopIteration){
  609. list.push_back(_1);
  610. _1 = py_next(_0);
  611. }
  612. }else{
  613. list.push_back(obj);
  614. }
  615. }
  616. }
  617. // `heap.gc_scope_lock();` needed before calling this function
  618. void VM::_unpack_as_dict(ArgsView args, Dict& dict){
  619. for(PyObject* obj: args){
  620. if(is_non_tagged_type(obj, tp_star_wrapper)){
  621. const StarWrapper& w = _CAST(StarWrapper&, obj);
  622. // maybe this check should be done in the compile time
  623. if(w.level != 2) TypeError("expected level 2 star wrapper");
  624. const Dict& other = CAST(Dict&, w.obj);
  625. dict.update(other);
  626. }else{
  627. const Tuple& t = CAST(Tuple&, obj);
  628. if(t.size() != 2) TypeError("expected tuple of length 2");
  629. dict.set(t[0], t[1]);
  630. }
  631. }
  632. }
  633. void VM::_prepare_py_call(PyObject** buffer, ArgsView args, ArgsView kwargs, const FuncDecl_& decl){
  634. const CodeObject* co = decl->code.get();
  635. int co_nlocals = co->varnames.size();
  636. int decl_argc = decl->args.size();
  637. if(args.size() < decl_argc){
  638. vm->TypeError(fmt(
  639. "expected ", decl_argc, " positional arguments, got ", args.size(),
  640. " (", co->name, ')'
  641. ));
  642. }
  643. int i = 0;
  644. // prepare args
  645. for(int index: decl->args) buffer[index] = args[i++];
  646. // set extra varnames to nullptr
  647. for(int j=i; j<co_nlocals; j++) buffer[j] = PY_NULL;
  648. // prepare kwdefaults
  649. for(auto& kv: decl->kwargs) buffer[kv.key] = kv.value;
  650. // handle *args
  651. if(decl->starred_arg != -1){
  652. ArgsView vargs(args.begin() + i, args.end());
  653. buffer[decl->starred_arg] = VAR(vargs.to_tuple());
  654. i += vargs.size();
  655. }else{
  656. // kwdefaults override
  657. for(auto& kv: decl->kwargs){
  658. if(i >= args.size()) break;
  659. buffer[kv.key] = args[i++];
  660. }
  661. if(i < args.size()) TypeError(fmt("too many arguments", " (", decl->code->name, ')'));
  662. }
  663. PyObject* vkwargs;
  664. if(decl->starred_kwarg != -1){
  665. vkwargs = VAR(Dict(this));
  666. buffer[decl->starred_kwarg] = vkwargs;
  667. }else{
  668. vkwargs = nullptr;
  669. }
  670. for(int j=0; j<kwargs.size(); j+=2){
  671. StrName key(CAST(int, kwargs[j]));
  672. int index = co->varnames_inv.try_get(key);
  673. if(index < 0){
  674. if(vkwargs == nullptr){
  675. TypeError(fmt(key.escape(), " is an invalid keyword argument for ", co->name, "()"));
  676. }else{
  677. Dict& dict = _CAST(Dict&, vkwargs);
  678. dict.set(VAR(key.sv()), kwargs[j+1]);
  679. }
  680. }else{
  681. buffer[index] = kwargs[j+1];
  682. }
  683. }
  684. }
  685. PyObject* VM::vectorcall(int ARGC, int KWARGC, bool op_call){
  686. PyObject** p1 = s_data._sp - KWARGC*2;
  687. PyObject** p0 = p1 - ARGC - 2;
  688. // [callable, <self>, args..., kwargs...]
  689. // ^p0 ^p1 ^_sp
  690. PyObject* callable = p1[-(ARGC + 2)];
  691. bool method_call = p1[-(ARGC + 1)] != PY_NULL;
  692. // handle boundmethod, do a patch
  693. if(is_non_tagged_type(callable, tp_bound_method)){
  694. if(method_call) FATAL_ERROR();
  695. auto& bm = CAST(BoundMethod&, callable);
  696. callable = bm.func; // get unbound method
  697. p1[-(ARGC + 2)] = bm.func;
  698. p1[-(ARGC + 1)] = bm.self;
  699. method_call = true;
  700. // [unbound, self, args..., kwargs...]
  701. }
  702. ArgsView args(p1 - ARGC - int(method_call), p1);
  703. ArgsView kwargs(p1, s_data._sp);
  704. PyObject* buffer[PK_MAX_CO_VARNAMES];
  705. if(is_non_tagged_type(callable, tp_native_func)){
  706. const auto& f = PK_OBJ_GET(NativeFunc, callable);
  707. PyObject* ret;
  708. if(f.decl != nullptr){
  709. int co_nlocals = f.decl->code->varnames.size();
  710. _prepare_py_call(buffer, args, kwargs, f.decl);
  711. // copy buffer back to stack
  712. s_data.reset(args.begin());
  713. for(int j=0; j<co_nlocals; j++) PUSH(buffer[j]);
  714. ret = f.call(vm, ArgsView(s_data._sp - co_nlocals, s_data._sp));
  715. }else{
  716. if(KWARGC != 0) TypeError("old-style native_func does not accept keyword arguments");
  717. f.check_size(this, args);
  718. ret = f.call(this, args);
  719. }
  720. s_data.reset(p0);
  721. return ret;
  722. }
  723. if(is_non_tagged_type(callable, tp_function)){
  724. /*****************_py_call*****************/
  725. // callable must be a `function` object
  726. if(s_data.is_overflow()) StackOverflowError();
  727. const Function& fn = PK_OBJ_GET(Function, callable);
  728. const FuncDecl_& decl = fn.decl;
  729. const CodeObject* co = decl->code.get();
  730. int co_nlocals = co->varnames.size();
  731. _prepare_py_call(buffer, args, kwargs, decl);
  732. if(co->is_generator){
  733. s_data.reset(p0);
  734. return _py_generator(
  735. Frame(&s_data, nullptr, co, fn._module, callable),
  736. ArgsView(buffer, buffer + co_nlocals)
  737. );
  738. }
  739. // copy buffer back to stack
  740. s_data.reset(args.begin());
  741. for(int j=0; j<co_nlocals; j++) PUSH(buffer[j]);
  742. callstack.emplace(&s_data, p0, co, fn._module, callable, FastLocals(co, args.begin()));
  743. if(op_call) return PY_OP_CALL;
  744. return _run_top_frame();
  745. /*****************_py_call*****************/
  746. }
  747. if(is_non_tagged_type(callable, tp_type)){
  748. if(method_call) FATAL_ERROR();
  749. // [type, NULL, args..., kwargs...]
  750. PyObject* new_f = find_name_in_mro(callable, __new__);
  751. PyObject* obj;
  752. #if PK_DEBUG_EXTRA_CHECK
  753. PK_ASSERT(new_f != nullptr);
  754. #endif
  755. if(new_f == cached_object__new__) {
  756. // fast path for object.__new__
  757. Type t = PK_OBJ_GET(Type, callable);
  758. obj= vm->heap.gcnew<DummyInstance>(t);
  759. }else{
  760. PUSH(new_f);
  761. PUSH(PY_NULL);
  762. PUSH(callable); // cls
  763. for(PyObject* o: args) PUSH(o);
  764. for(PyObject* o: kwargs) PUSH(o);
  765. // if obj is not an instance of callable, the behavior is undefined
  766. obj = vectorcall(ARGC+1, KWARGC);
  767. }
  768. // __init__
  769. PyObject* self;
  770. callable = get_unbound_method(obj, __init__, &self, false);
  771. if (self != PY_NULL) {
  772. // replace `NULL` with `self`
  773. p1[-(ARGC + 2)] = callable;
  774. p1[-(ARGC + 1)] = self;
  775. // [init_f, self, args..., kwargs...]
  776. vectorcall(ARGC, KWARGC);
  777. // We just discard the return value of `__init__`
  778. // in cpython it raises a TypeError if the return value is not None
  779. }else{
  780. // manually reset the stack
  781. s_data.reset(p0);
  782. }
  783. return obj;
  784. }
  785. // handle `__call__` overload
  786. PyObject* self;
  787. PyObject* call_f = get_unbound_method(callable, __call__, &self, false);
  788. if(self != PY_NULL){
  789. p1[-(ARGC + 2)] = call_f;
  790. p1[-(ARGC + 1)] = self;
  791. // [call_f, self, args..., kwargs...]
  792. return vectorcall(ARGC, KWARGC, false);
  793. }
  794. TypeError(OBJ_NAME(_t(callable)).escape() + " object is not callable");
  795. return nullptr;
  796. }
  797. // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance
  798. PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){
  799. PyObject* objtype;
  800. // handle super() proxy
  801. if(is_non_tagged_type(obj, tp_super)){
  802. const Super& super = PK_OBJ_GET(Super, obj);
  803. obj = super.first;
  804. objtype = _t(super.second);
  805. }else{
  806. objtype = _t(obj);
  807. }
  808. PyObject* cls_var = find_name_in_mro(objtype, name);
  809. if(cls_var != nullptr){
  810. // handle descriptor
  811. if(is_non_tagged_type(cls_var, tp_property)){
  812. const Property& prop = _CAST(Property&, cls_var);
  813. return call(prop.getter, obj);
  814. }
  815. }
  816. // handle instance __dict__
  817. if(!is_tagged(obj) && obj->is_attr_valid()){
  818. PyObject* val = obj->attr().try_get(name);
  819. if(val != nullptr) return val;
  820. }
  821. if(cls_var != nullptr){
  822. // bound method is non-data descriptor
  823. if(is_non_tagged_type(cls_var, tp_function) || is_non_tagged_type(cls_var, tp_native_func)){
  824. return VAR(BoundMethod(obj, cls_var));
  825. }
  826. return cls_var;
  827. }
  828. if(throw_err) AttributeError(obj, name);
  829. return nullptr;
  830. }
  831. // used by OP_LOAD_METHOD
  832. // try to load a unbound method (fallback to `getattr` if not found)
  833. PyObject* VM::get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err, bool fallback){
  834. *self = PY_NULL;
  835. PyObject* objtype;
  836. // handle super() proxy
  837. if(is_non_tagged_type(obj, tp_super)){
  838. const Super& super = PK_OBJ_GET(Super, obj);
  839. obj = super.first;
  840. objtype = _t(super.second);
  841. }else{
  842. objtype = _t(obj);
  843. }
  844. PyObject* cls_var = find_name_in_mro(objtype, name);
  845. if(fallback){
  846. if(cls_var != nullptr){
  847. // handle descriptor
  848. if(is_non_tagged_type(cls_var, tp_property)){
  849. const Property& prop = _CAST(Property&, cls_var);
  850. return call(prop.getter, obj);
  851. }
  852. }
  853. // handle instance __dict__
  854. if(!is_tagged(obj) && obj->is_attr_valid()){
  855. PyObject* val = obj->attr().try_get(name);
  856. if(val != nullptr) return val;
  857. }
  858. }
  859. if(cls_var != nullptr){
  860. if(is_non_tagged_type(cls_var, tp_function) || is_non_tagged_type(cls_var, tp_native_func)){
  861. *self = obj;
  862. }
  863. return cls_var;
  864. }
  865. if(throw_err) AttributeError(obj, name);
  866. return nullptr;
  867. }
  868. void VM::setattr(PyObject* obj, StrName name, PyObject* value){
  869. PyObject* objtype;
  870. // handle super() proxy
  871. if(is_non_tagged_type(obj, tp_super)){
  872. Super& super = PK_OBJ_GET(Super, obj);
  873. obj = super.first;
  874. objtype = _t(super.second);
  875. }else{
  876. objtype = _t(obj);
  877. }
  878. PyObject* cls_var = find_name_in_mro(objtype, name);
  879. if(cls_var != nullptr){
  880. // handle descriptor
  881. if(is_non_tagged_type(cls_var, tp_property)){
  882. const Property& prop = _CAST(Property&, cls_var);
  883. if(prop.setter != vm->None){
  884. call(prop.setter, obj, value);
  885. }else{
  886. TypeError(fmt("readonly attribute: ", name.escape()));
  887. }
  888. return;
  889. }
  890. }
  891. // handle instance __dict__
  892. if(is_tagged(obj) || !obj->is_attr_valid()) TypeError("cannot set attribute");
  893. obj->attr().set(name, value);
  894. }
  895. PyObject* VM::bind(PyObject* obj, const char* sig, NativeFuncC fn, UserData userdata){
  896. return bind(obj, sig, nullptr, fn, userdata);
  897. }
  898. PyObject* VM::bind(PyObject* obj, const char* sig, const char* docstring, NativeFuncC fn, UserData userdata){
  899. CodeObject_ co;
  900. try{
  901. // fn(a, b, *c, d=1) -> None
  902. co = compile("def " + Str(sig) + " : pass", "<bind>", EXEC_MODE);
  903. }catch(Exception&){
  904. throw std::runtime_error("invalid signature: " + std::string(sig));
  905. }
  906. if(co->func_decls.size() != 1){
  907. throw std::runtime_error("expected 1 function declaration");
  908. }
  909. FuncDecl_ decl = co->func_decls[0];
  910. decl->signature = Str(sig);
  911. if(docstring != nullptr){
  912. decl->docstring = Str(docstring).strip();
  913. }
  914. PyObject* f_obj = VAR(NativeFunc(fn, decl));
  915. PK_OBJ_GET(NativeFunc, f_obj).set_userdata(userdata);
  916. if(obj != nullptr) obj->attr().set(decl->code->name, f_obj);
  917. return f_obj;
  918. }
  919. PyObject* VM::bind_property(PyObject* obj, Str name, NativeFuncC fget, NativeFuncC fset){
  920. PyObject* _0 = heap.gcnew<NativeFunc>(tp_native_func, fget, 1, false);
  921. PyObject* _1 = vm->None;
  922. if(fset != nullptr) _1 = heap.gcnew<NativeFunc>(tp_native_func, fset, 2, false);
  923. Str signature = name;
  924. int pos = name.index(":");
  925. if(pos > 0) name = name.substr(0, pos).strip();
  926. PyObject* prop = VAR(Property(_0, _1, signature));
  927. obj->attr().set(name, prop);
  928. return prop;
  929. }
  930. void VM::_error(Exception e){
  931. if(callstack.empty()){
  932. e.is_re = false;
  933. throw e;
  934. }
  935. PUSH(VAR(e));
  936. _raise();
  937. }
  938. void ManagedHeap::mark() {
  939. for(PyObject* obj: _no_gc) PK_OBJ_MARK(obj);
  940. for(auto& frame : vm->callstack.data()) frame._gc_mark();
  941. for(PyObject* obj: vm->s_data) PK_OBJ_MARK(obj);
  942. if(_gc_marker_ex) _gc_marker_ex(vm);
  943. if(vm->_last_exception) PK_OBJ_MARK(vm->_last_exception);
  944. if(vm->_c.error != nullptr) PK_OBJ_MARK(vm->_c.error);
  945. }
  946. Str obj_type_name(VM *vm, Type type){
  947. return vm->_all_types[type].name;
  948. }
  949. void VM::bind__hash__(Type type, i64 (*f)(VM*, PyObject*)){
  950. PyObject* obj = _t(type);
  951. _all_types[type].m__hash__ = f;
  952. PyObject* nf = bind_method<0>(obj, "__hash__", [](VM* vm, ArgsView args){
  953. i64 ret = lambda_get_userdata<i64(*)(VM*, PyObject*)>(args.begin())(vm, args[0]);
  954. return VAR(ret);
  955. });
  956. PK_OBJ_GET(NativeFunc, nf).set_userdata(f);
  957. }
  958. void VM::bind__len__(Type type, i64 (*f)(VM*, PyObject*)){
  959. PyObject* obj = _t(type);
  960. _all_types[type].m__len__ = f;
  961. PyObject* nf = bind_method<0>(obj, "__len__", [](VM* vm, ArgsView args){
  962. i64 ret = lambda_get_userdata<i64(*)(VM*, PyObject*)>(args.begin())(vm, args[0]);
  963. return VAR(ret);
  964. });
  965. PK_OBJ_GET(NativeFunc, nf).set_userdata(f);
  966. }
  967. void Dict::_probe_0(PyObject *key, bool &ok, int &i) const{
  968. ok = false;
  969. i64 hash = vm->py_hash(key);
  970. i = hash & _mask;
  971. // std::cout << CAST(Str, vm->py_repr(key)) << " " << hash << " " << i << std::endl;
  972. for(int j=0; j<_capacity; j++) {
  973. if(_items[i].first != nullptr){
  974. if(vm->py_equals(_items[i].first, key)) { ok = true; break; }
  975. }else{
  976. if(_items[i].second == nullptr) break;
  977. }
  978. // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166
  979. i = ((5*i) + 1) & _mask;
  980. // std::cout << CAST(Str, vm->py_repr(key)) << " next: " << i << std::endl;
  981. }
  982. }
  983. void Dict::_probe_1(PyObject *key, bool &ok, int &i) const{
  984. ok = false;
  985. i = vm->py_hash(key) & _mask;
  986. while(_items[i].first != nullptr) {
  987. if(vm->py_equals(_items[i].first, key)) { ok = true; break; }
  988. // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166
  989. i = ((5*i) + 1) & _mask;
  990. }
  991. }
  992. void CodeObjectSerializer::write_object(VM *vm, PyObject *obj){
  993. if(is_int(obj)) write_int(_CAST(i64, obj));
  994. else if(is_float(obj)) write_float(_CAST(f64, obj));
  995. else if(is_type(obj, vm->tp_str)) write_str(_CAST(Str&, obj));
  996. else if(is_type(obj, vm->tp_bool)) write_bool(_CAST(bool, obj));
  997. else if(obj == vm->None) write_none();
  998. else if(obj == vm->Ellipsis) write_ellipsis();
  999. else{
  1000. throw std::runtime_error(fmt(OBJ_NAME(vm->_t(obj)).escape(), " is not serializable"));
  1001. }
  1002. }
  1003. void NativeFunc::check_size(VM* vm, ArgsView args) const{
  1004. if(args.size() != argc && argc != -1) {
  1005. vm->TypeError(fmt("expected ", argc, " arguments, got ", args.size()));
  1006. }
  1007. }
  1008. PyObject* NativeFunc::call(VM *vm, ArgsView args) const {
  1009. return f(vm, args);
  1010. }
  1011. } // namespace pkpy