vm.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065
  1. #include "pocketpy/vm.h"
  2. namespace pkpy{
  3. VM::VM(bool enable_os) : heap(this), enable_os(enable_os) {
  4. this->vm = this;
  5. this->_c.error = nullptr;
  6. _stdout = [](VM* vm, const Str& s) {
  7. PK_UNUSED(vm);
  8. std::cout << s;
  9. };
  10. _stderr = [](VM* vm, const Str& s) {
  11. PK_UNUSED(vm);
  12. std::cerr << s;
  13. };
  14. callstack.reserve(8);
  15. _main = nullptr;
  16. _last_exception = nullptr;
  17. _import_handler = [](const Str& name) {
  18. PK_UNUSED(name);
  19. return Bytes();
  20. };
  21. init_builtin_types();
  22. }
  23. PyObject* VM::py_str(PyObject* obj){
  24. const PyTypeInfo* ti = _inst_type_info(obj);
  25. if(ti->m__str__) return ti->m__str__(this, obj);
  26. PyObject* self;
  27. PyObject* f = get_unbound_method(obj, __str__, &self, false);
  28. if(self != PY_NULL) return call_method(self, f);
  29. return py_repr(obj);
  30. }
  31. PyObject* VM::py_repr(PyObject* obj){
  32. const PyTypeInfo* ti = _inst_type_info(obj);
  33. if(ti->m__repr__) return ti->m__repr__(this, obj);
  34. return call_method(obj, __repr__);
  35. }
  36. PyObject* VM::py_json(PyObject* obj){
  37. const PyTypeInfo* ti = _inst_type_info(obj);
  38. if(ti->m__json__) return ti->m__json__(this, obj);
  39. return call_method(obj, __json__);
  40. }
  41. PyObject* VM::py_iter(PyObject* obj){
  42. const PyTypeInfo* ti = _inst_type_info(obj);
  43. if(ti->m__iter__) return ti->m__iter__(this, obj);
  44. PyObject* self;
  45. PyObject* iter_f = get_unbound_method(obj, __iter__, &self, false);
  46. if(self != PY_NULL) return call_method(self, iter_f);
  47. TypeError(OBJ_NAME(_t(obj)).escape() + " object is not iterable");
  48. return nullptr;
  49. }
  50. FrameId VM::top_frame(){
  51. #if PK_DEBUG_EXTRA_CHECK
  52. if(callstack.empty()) FATAL_ERROR();
  53. #endif
  54. return FrameId(&callstack.data(), callstack.size()-1);
  55. }
  56. void VM::_pop_frame(){
  57. Frame* frame = &callstack.top();
  58. s_data.reset(frame->_sp_base);
  59. callstack.pop();
  60. }
  61. PyObject* VM::find_name_in_mro(PyObject* cls, StrName name){
  62. PyObject* val;
  63. do{
  64. val = cls->attr().try_get(name);
  65. if(val != nullptr) return val;
  66. Type base = _all_types[PK_OBJ_GET(Type, cls)].base;
  67. if(base.index == -1) break;
  68. cls = _all_types[base].obj;
  69. }while(true);
  70. return nullptr;
  71. }
  72. bool VM::isinstance(PyObject* obj, Type cls_t){
  73. Type obj_t = PK_OBJ_GET(Type, _t(obj));
  74. do{
  75. if(obj_t == cls_t) return true;
  76. Type base = _all_types[obj_t].base;
  77. if(base.index == -1) break;
  78. obj_t = base;
  79. }while(true);
  80. return false;
  81. }
  82. PyObject* VM::exec(Str source, Str filename, CompileMode mode, PyObject* _module){
  83. if(_module == nullptr) _module = _main;
  84. try {
  85. CodeObject_ code = compile(source, filename, mode);
  86. #if PK_DEBUG_DIS_EXEC
  87. if(_module == _main) std::cout << disassemble(code) << '\n';
  88. #endif
  89. return _exec(code, _module);
  90. }catch (const Exception& e){
  91. _stderr(this, e.summary() + "\n");
  92. }
  93. #if !PK_DEBUG_FULL_EXCEPTION
  94. catch (const std::exception& e) {
  95. Str msg = "An std::exception occurred! It could be a bug.\n";
  96. msg = msg + e.what();
  97. _stderr(this, msg + "\n");
  98. }
  99. #endif
  100. callstack.clear();
  101. s_data.clear();
  102. return nullptr;
  103. }
  104. PyObject* VM::new_type_object(PyObject* mod, StrName name, Type base, bool subclass_enabled){
  105. PyObject* obj = heap._new<Type>(tp_type, _all_types.size());
  106. const PyTypeInfo& base_info = _all_types[base];
  107. if(!base_info.subclass_enabled){
  108. TypeError(fmt("type ", base_info.name.escape(), " is not `subclass_enabled`"));
  109. }
  110. PyTypeInfo info{
  111. obj,
  112. base,
  113. (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.sv()): name.sv(),
  114. subclass_enabled,
  115. };
  116. if(mod != nullptr) mod->attr().set(name, obj);
  117. _all_types.push_back(info);
  118. return obj;
  119. }
  120. Type VM::_new_type_object(StrName name, Type base) {
  121. PyObject* obj = new_type_object(nullptr, name, base, false);
  122. return PK_OBJ_GET(Type, obj);
  123. }
  124. PyObject* VM::_find_type_object(const Str& type){
  125. PyObject* obj = builtins->attr().try_get(type);
  126. if(obj == nullptr){
  127. for(auto& t: _all_types) if(t.name == type) return t.obj;
  128. throw std::runtime_error(fmt("type not found: ", type));
  129. }
  130. check_non_tagged_type(obj, tp_type);
  131. return obj;
  132. }
  133. Type VM::_type(const Str& type){
  134. PyObject* obj = _find_type_object(type);
  135. return PK_OBJ_GET(Type, obj);
  136. }
  137. PyTypeInfo* VM::_type_info(const Str& type){
  138. PyObject* obj = builtins->attr().try_get(type);
  139. if(obj == nullptr){
  140. for(auto& t: _all_types) if(t.name == type) return &t;
  141. FATAL_ERROR();
  142. }
  143. return &_all_types[PK_OBJ_GET(Type, obj)];
  144. }
  145. PyTypeInfo* VM::_type_info(Type type){
  146. return &_all_types[type];
  147. }
  148. const PyTypeInfo* VM::_inst_type_info(PyObject* obj){
  149. if(is_int(obj)) return &_all_types[tp_int];
  150. if(is_float(obj)) return &_all_types[tp_float];
  151. return &_all_types[obj->type];
  152. }
  153. bool VM::py_equals(PyObject* lhs, PyObject* rhs){
  154. if(lhs == rhs) return true;
  155. const PyTypeInfo* ti = _inst_type_info(lhs);
  156. PyObject* res;
  157. if(ti->m__eq__){
  158. res = ti->m__eq__(this, lhs, rhs);
  159. if(res != vm->NotImplemented) return res == vm->True;
  160. }
  161. res = call_method(lhs, __eq__, rhs);
  162. if(res != vm->NotImplemented) return res == vm->True;
  163. ti = _inst_type_info(rhs);
  164. if(ti->m__eq__){
  165. res = ti->m__eq__(this, rhs, lhs);
  166. if(res != vm->NotImplemented) return res == vm->True;
  167. }
  168. res = call_method(rhs, __eq__, lhs);
  169. if(res != vm->NotImplemented) return res == vm->True;
  170. return false;
  171. }
  172. int VM::normalized_index(int index, int size){
  173. if(index < 0) index += size;
  174. if(index < 0 || index >= size){
  175. IndexError(std::to_string(index) + " not in [0, " + std::to_string(size) + ")");
  176. }
  177. return index;
  178. }
  179. PyObject* VM::py_next(PyObject* obj){
  180. const PyTypeInfo* ti = _inst_type_info(obj);
  181. if(ti->m__next__) return ti->m__next__(this, obj);
  182. return call_method(obj, __next__);
  183. }
  184. PyObject* VM::py_import(StrName name, bool relative){
  185. Str filename;
  186. int type;
  187. if(relative){
  188. ImportContext* ctx = &_import_context;
  189. type = 2;
  190. for(auto it=ctx->pending.rbegin(); it!=ctx->pending.rend(); ++it){
  191. if(it->second == 2) continue;
  192. if(it->second == 1){
  193. filename = fmt(it->first, kPlatformSep, name, ".py");
  194. name = fmt(it->first, '.', name).c_str();
  195. break;
  196. }
  197. }
  198. if(filename.length() == 0) _error("ImportError", "relative import outside of package");
  199. }else{
  200. type = 0;
  201. filename = fmt(name, ".py");
  202. }
  203. for(auto& [k, v]: _import_context.pending){
  204. if(k == name){
  205. vm->_error("ImportError", fmt("circular import ", name.escape()));
  206. }
  207. }
  208. PyObject* ext_mod = _modules.try_get(name);
  209. if(ext_mod == nullptr){
  210. Str source;
  211. auto it = _lazy_modules.find(name);
  212. if(it == _lazy_modules.end()){
  213. Bytes b = _import_handler(filename);
  214. if(!relative && !b){
  215. filename = fmt(name, kPlatformSep, "__init__.py");
  216. b = _import_handler(filename);
  217. if(b) type = 1;
  218. }
  219. if(!b) _error("ImportError", fmt("module ", name.escape(), " not found"));
  220. source = Str(b.str());
  221. }else{
  222. source = it->second;
  223. _lazy_modules.erase(it);
  224. }
  225. auto _ = _import_context.temp(this, name, type);
  226. CodeObject_ code = compile(source, filename, EXEC_MODE);
  227. PyObject* new_mod = new_module(name);
  228. _exec(code, new_mod);
  229. new_mod->attr()._try_perfect_rehash();
  230. return new_mod;
  231. }else{
  232. return ext_mod;
  233. }
  234. }
  235. VM::~VM() {
  236. callstack.clear();
  237. s_data.clear();
  238. _all_types.clear();
  239. _modules.clear();
  240. _lazy_modules.clear();
  241. }
  242. PyObject* VM::py_negate(PyObject* obj){
  243. const PyTypeInfo* ti = _inst_type_info(obj);
  244. if(ti->m__neg__) return ti->m__neg__(this, obj);
  245. return call_method(obj, __neg__);
  246. }
  247. void VM::check_int_or_float(PyObject *obj){
  248. if(!is_tagged(obj)){
  249. TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape());
  250. }
  251. }
  252. bool VM::py_bool(PyObject* obj){
  253. if(is_non_tagged_type(obj, tp_bool)) return obj == True;
  254. if(obj == None) return false;
  255. if(is_int(obj)) return _CAST(i64, obj) != 0;
  256. if(is_float(obj)) return _CAST(f64, obj) != 0.0;
  257. PyObject* self;
  258. PyObject* len_f = get_unbound_method(obj, __len__, &self, false);
  259. if(self != PY_NULL){
  260. PyObject* ret = call_method(self, len_f);
  261. return CAST(i64, ret) > 0;
  262. }
  263. return true;
  264. }
  265. PyObject* VM::py_list(PyObject* it){
  266. auto _lock = heap.gc_scope_lock();
  267. it = py_iter(it);
  268. List list;
  269. PyObject* obj = py_next(it);
  270. while(obj != StopIteration){
  271. list.push_back(obj);
  272. obj = py_next(it);
  273. }
  274. return VAR(std::move(list));
  275. }
  276. void VM::parse_int_slice(const Slice& s, int length, int& start, int& stop, int& step){
  277. auto clip = [](int value, int min, int max){
  278. if(value < min) return min;
  279. if(value > max) return max;
  280. return value;
  281. };
  282. if(s.step == None) step = 1;
  283. else step = CAST(int, s.step);
  284. if(step == 0) ValueError("slice step cannot be zero");
  285. if(step > 0){
  286. if(s.start == None){
  287. start = 0;
  288. }else{
  289. start = CAST(int, s.start);
  290. if(start < 0) start += length;
  291. start = clip(start, 0, length);
  292. }
  293. if(s.stop == None){
  294. stop = length;
  295. }else{
  296. stop = CAST(int, s.stop);
  297. if(stop < 0) stop += length;
  298. stop = clip(stop, 0, length);
  299. }
  300. }else{
  301. if(s.start == None){
  302. start = length - 1;
  303. }else{
  304. start = CAST(int, s.start);
  305. if(start < 0) start += length;
  306. start = clip(start, -1, length - 1);
  307. }
  308. if(s.stop == None){
  309. stop = -1;
  310. }else{
  311. stop = CAST(int, s.stop);
  312. if(stop < 0) stop += length;
  313. stop = clip(stop, -1, length - 1);
  314. }
  315. }
  316. }
  317. i64 VM::py_hash(PyObject* obj){
  318. const PyTypeInfo* ti = _inst_type_info(obj);
  319. if(ti->m__hash__) return ti->m__hash__(this, obj);
  320. PyObject* ret = call_method(obj, __hash__);
  321. return CAST(i64, ret);
  322. }
  323. PyObject* VM::format(Str spec, PyObject* obj){
  324. if(spec.empty()) return py_str(obj);
  325. char type;
  326. switch(spec.end()[-1]){
  327. case 'f': case 'd': case 's':
  328. type = spec.end()[-1];
  329. spec = spec.substr(0, spec.length() - 1);
  330. break;
  331. default: type = ' '; break;
  332. }
  333. char pad_c = ' ';
  334. if(spec[0] == '0'){
  335. pad_c = '0';
  336. spec = spec.substr(1);
  337. }
  338. char align;
  339. if(spec[0] == '>'){
  340. align = '>';
  341. spec = spec.substr(1);
  342. }else if(spec[0] == '<'){
  343. align = '<';
  344. spec = spec.substr(1);
  345. }else{
  346. if(is_int(obj) || is_float(obj)) align = '>';
  347. else align = '<';
  348. }
  349. int dot = spec.index(".");
  350. int width, precision;
  351. try{
  352. if(dot >= 0){
  353. if(dot == 0){
  354. width = -1;
  355. }else{
  356. width = Number::stoi(spec.substr(0, dot).str());
  357. }
  358. precision = Number::stoi(spec.substr(dot+1).str());
  359. }else{
  360. width = Number::stoi(spec.str());
  361. precision = -1;
  362. }
  363. }catch(...){
  364. ValueError("invalid format specifer");
  365. UNREACHABLE();
  366. }
  367. if(type != 'f' && dot >= 0) ValueError("precision not allowed in the format specifier");
  368. Str ret;
  369. if(type == 'f'){
  370. f64 val = CAST(f64, obj);
  371. if(precision < 0) precision = 6;
  372. std::stringstream ss;
  373. ss << std::fixed << std::setprecision(precision) << val;
  374. ret = ss.str();
  375. }else if(type == 'd'){
  376. ret = std::to_string(CAST(i64, obj));
  377. }else if(type == 's'){
  378. ret = CAST(Str&, obj);
  379. }else{
  380. ret = CAST(Str&, py_str(obj));
  381. }
  382. if(width != -1 && width > ret.length()){
  383. int pad = width - ret.length();
  384. std::string padding(pad, pad_c);
  385. if(align == '>') ret = padding.c_str() + ret;
  386. else ret = ret + padding.c_str();
  387. }
  388. return VAR(ret);
  389. }
  390. PyObject* VM::new_module(StrName name) {
  391. PyObject* obj = heap._new<DummyModule>(tp_module);
  392. obj->attr().set("__name__", VAR(name.sv()));
  393. // we do not allow override in order to avoid memory leak
  394. // it is because Module objects are not garbage collected
  395. if(_modules.contains(name)) throw std::runtime_error("module already exists");
  396. _modules.set(name, obj);
  397. return obj;
  398. }
  399. static std::string _opcode_argstr(VM* vm, Bytecode byte, const CodeObject* co){
  400. std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
  401. switch(byte.op){
  402. case OP_LOAD_CONST: case OP_FORMAT_STRING:
  403. if(vm != nullptr){
  404. argStr += fmt(" (", CAST(Str, vm->py_repr(co->consts[byte.arg])), ")");
  405. }
  406. break;
  407. case OP_LOAD_NAME: case OP_LOAD_GLOBAL: case OP_LOAD_NONLOCAL: case OP_STORE_GLOBAL:
  408. case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR:
  409. case OP_IMPORT_NAME: case OP_BEGIN_CLASS: case OP_RAISE:
  410. case OP_DELETE_GLOBAL: case OP_INC_GLOBAL: case OP_DEC_GLOBAL: case OP_STORE_CLASS_ATTR:
  411. argStr += fmt(" (", StrName(byte.arg).sv(), ")");
  412. break;
  413. case OP_LOAD_FAST: case OP_STORE_FAST: case OP_DELETE_FAST: case OP_INC_FAST: case OP_DEC_FAST:
  414. argStr += fmt(" (", co->varnames[byte.arg].sv(), ")");
  415. break;
  416. case OP_LOAD_FUNCTION:
  417. argStr += fmt(" (", co->func_decls[byte.arg]->code->name, ")");
  418. break;
  419. }
  420. return argStr;
  421. }
  422. Str VM::disassemble(CodeObject_ co){
  423. auto pad = [](const Str& s, const int n){
  424. if(s.length() >= n) return s.substr(0, n);
  425. return s + std::string(n - s.length(), ' ');
  426. };
  427. std::vector<int> jumpTargets;
  428. for(auto byte : co->codes){
  429. if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE || byte.op == OP_SHORTCUT_IF_FALSE_OR_POP){
  430. jumpTargets.push_back(byte.arg);
  431. }
  432. }
  433. std::stringstream ss;
  434. int prev_line = -1;
  435. for(int i=0; i<co->codes.size(); i++){
  436. const Bytecode& byte = co->codes[i];
  437. Str line = std::to_string(co->lines[i]);
  438. if(co->lines[i] == prev_line) line = "";
  439. else{
  440. if(prev_line != -1) ss << "\n";
  441. prev_line = co->lines[i];
  442. }
  443. std::string pointer;
  444. if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){
  445. pointer = "-> ";
  446. }else{
  447. pointer = " ";
  448. }
  449. ss << pad(line, 8) << pointer << pad(std::to_string(i), 3);
  450. ss << " " << pad(OP_NAMES[byte.op], 25) << " ";
  451. // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5);
  452. std::string argStr = _opcode_argstr(this, byte, co.get());
  453. ss << argStr;
  454. // ss << pad(argStr, 40); // may overflow
  455. // ss << co->blocks[byte.block].type;
  456. if(i != co->codes.size() - 1) ss << '\n';
  457. }
  458. for(auto& decl: co->func_decls){
  459. ss << "\n\n" << "Disassembly of " << decl->code->name << ":\n";
  460. ss << disassemble(decl->code);
  461. }
  462. ss << "\n";
  463. return Str(ss.str());
  464. }
  465. #if PK_DEBUG_CEVAL_STEP
  466. void VM::_log_s_data(const char* title) {
  467. if(_main == nullptr) return;
  468. if(callstack.empty()) return;
  469. std::stringstream ss;
  470. if(title) ss << title << " | ";
  471. std::map<PyObject**, int> sp_bases;
  472. for(Frame& f: callstack.data()){
  473. if(f._sp_base == nullptr) FATAL_ERROR();
  474. sp_bases[f._sp_base] += 1;
  475. }
  476. FrameId frame = top_frame();
  477. int line = frame->co->lines[frame->_ip];
  478. ss << frame->co->name << ":" << line << " [";
  479. for(PyObject** p=s_data.begin(); p!=s_data.end(); p++){
  480. ss << std::string(sp_bases[p], '|');
  481. if(sp_bases[p] > 0) ss << " ";
  482. PyObject* obj = *p;
  483. if(obj == nullptr) ss << "(nil)";
  484. else if(obj == PY_NULL) ss << "NULL";
  485. else if(is_int(obj)) ss << CAST(i64, obj);
  486. else if(is_float(obj)) ss << CAST(f64, obj);
  487. else if(is_type(obj, tp_str)) ss << CAST(Str, obj).escape();
  488. else if(obj == None) ss << "None";
  489. else if(obj == True) ss << "True";
  490. else if(obj == False) ss << "False";
  491. else if(is_type(obj, tp_function)){
  492. auto& f = CAST(Function&, obj);
  493. ss << f.decl->code->name << "(...)";
  494. } else if(is_type(obj, tp_type)){
  495. Type t = PK_OBJ_GET(Type, obj);
  496. ss << "<class " + _all_types[t].name.escape() + ">";
  497. } else if(is_type(obj, tp_list)){
  498. auto& t = CAST(List&, obj);
  499. ss << "list(size=" << t.size() << ")";
  500. } else if(is_type(obj, tp_tuple)){
  501. auto& t = CAST(Tuple&, obj);
  502. ss << "tuple(size=" << t.size() << ")";
  503. } else ss << "(" << obj_type_name(this, obj->type) << ")";
  504. ss << ", ";
  505. }
  506. std::string output = ss.str();
  507. if(!s_data.empty()) {
  508. output.pop_back(); output.pop_back();
  509. }
  510. output.push_back(']');
  511. Bytecode byte = frame->co->codes[frame->_ip];
  512. std::cout << output << " " << OP_NAMES[byte.op] << " " << _opcode_argstr(nullptr, byte, frame->co) << std::endl;
  513. }
  514. #endif
  515. void VM::init_builtin_types(){
  516. _all_types.push_back({heap._new<Type>(Type(1), Type(0)), -1, "object", true});
  517. _all_types.push_back({heap._new<Type>(Type(1), Type(1)), 0, "type", false});
  518. tp_object = 0; tp_type = 1;
  519. tp_int = _new_type_object("int");
  520. tp_float = _new_type_object("float");
  521. if(tp_int.index != kTpIntIndex || tp_float.index != kTpFloatIndex) FATAL_ERROR();
  522. tp_bool = _new_type_object("bool");
  523. tp_str = _new_type_object("str");
  524. tp_list = _new_type_object("list");
  525. tp_tuple = _new_type_object("tuple");
  526. tp_slice = _new_type_object("slice");
  527. tp_range = _new_type_object("range");
  528. tp_module = _new_type_object("module");
  529. tp_function = _new_type_object("function");
  530. tp_native_func = _new_type_object("native_func");
  531. tp_bound_method = _new_type_object("bound_method");
  532. tp_super = _new_type_object("super");
  533. tp_exception = _new_type_object("Exception");
  534. tp_bytes = _new_type_object("bytes");
  535. tp_mappingproxy = _new_type_object("mappingproxy");
  536. tp_dict = _new_type_object("dict");
  537. tp_property = _new_type_object("property");
  538. tp_star_wrapper = _new_type_object("_star_wrapper");
  539. this->None = heap._new<Dummy>(_new_type_object("NoneType"));
  540. this->NotImplemented = heap._new<Dummy>(_new_type_object("NotImplementedType"));
  541. this->Ellipsis = heap._new<Dummy>(_new_type_object("ellipsis"));
  542. this->True = heap._new<Dummy>(tp_bool);
  543. this->False = heap._new<Dummy>(tp_bool);
  544. this->StopIteration = heap._new<Dummy>(_new_type_object("StopIterationType"));
  545. this->builtins = new_module("builtins");
  546. // setup public types
  547. builtins->attr().set("type", _t(tp_type));
  548. builtins->attr().set("object", _t(tp_object));
  549. builtins->attr().set("bool", _t(tp_bool));
  550. builtins->attr().set("int", _t(tp_int));
  551. builtins->attr().set("float", _t(tp_float));
  552. builtins->attr().set("str", _t(tp_str));
  553. builtins->attr().set("list", _t(tp_list));
  554. builtins->attr().set("tuple", _t(tp_tuple));
  555. builtins->attr().set("range", _t(tp_range));
  556. builtins->attr().set("bytes", _t(tp_bytes));
  557. builtins->attr().set("dict", _t(tp_dict));
  558. builtins->attr().set("property", _t(tp_property));
  559. builtins->attr().set("StopIteration", StopIteration);
  560. builtins->attr().set("NotImplemented", NotImplemented);
  561. builtins->attr().set("slice", _t(tp_slice));
  562. post_init();
  563. for(int i=0; i<_all_types.size(); i++){
  564. _all_types[i].obj->attr()._try_perfect_rehash();
  565. }
  566. for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash();
  567. this->_main = new_module("__main__");
  568. }
  569. // `heap.gc_scope_lock();` needed before calling this function
  570. void VM::_unpack_as_list(ArgsView args, List& list){
  571. for(PyObject* obj: args){
  572. if(is_non_tagged_type(obj, tp_star_wrapper)){
  573. const StarWrapper& w = _CAST(StarWrapper&, obj);
  574. // maybe this check should be done in the compile time
  575. if(w.level != 1) TypeError("expected level 1 star wrapper");
  576. PyObject* _0 = py_iter(w.obj);
  577. PyObject* _1 = py_next(_0);
  578. while(_1 != StopIteration){
  579. list.push_back(_1);
  580. _1 = py_next(_0);
  581. }
  582. }else{
  583. list.push_back(obj);
  584. }
  585. }
  586. }
  587. // `heap.gc_scope_lock();` needed before calling this function
  588. void VM::_unpack_as_dict(ArgsView args, Dict& dict){
  589. for(PyObject* obj: args){
  590. if(is_non_tagged_type(obj, tp_star_wrapper)){
  591. const StarWrapper& w = _CAST(StarWrapper&, obj);
  592. // maybe this check should be done in the compile time
  593. if(w.level != 2) TypeError("expected level 2 star wrapper");
  594. const Dict& other = CAST(Dict&, w.obj);
  595. dict.update(other);
  596. }else{
  597. const Tuple& t = CAST(Tuple&, obj);
  598. if(t.size() != 2) TypeError("expected tuple of length 2");
  599. dict.set(t[0], t[1]);
  600. }
  601. }
  602. }
  603. void VM::_prepare_py_call(PyObject** buffer, ArgsView args, ArgsView kwargs, const FuncDecl_& decl){
  604. const CodeObject* co = decl->code.get();
  605. int co_nlocals = co->varnames.size();
  606. int decl_argc = decl->args.size();
  607. if(args.size() < decl_argc){
  608. vm->TypeError(fmt(
  609. "expected ", decl_argc, " positional arguments, got ", args.size(),
  610. " (", co->name, ')'
  611. ));
  612. }
  613. int i = 0;
  614. // prepare args
  615. for(int index: decl->args) buffer[index] = args[i++];
  616. // set extra varnames to nullptr
  617. for(int j=i; j<co_nlocals; j++) buffer[j] = PY_NULL;
  618. // prepare kwdefaults
  619. for(auto& kv: decl->kwargs) buffer[kv.key] = kv.value;
  620. // handle *args
  621. if(decl->starred_arg != -1){
  622. ArgsView vargs(args.begin() + i, args.end());
  623. buffer[decl->starred_arg] = VAR(vargs.to_tuple());
  624. i += vargs.size();
  625. }else{
  626. // kwdefaults override
  627. for(auto& kv: decl->kwargs){
  628. if(i >= args.size()) break;
  629. buffer[kv.key] = args[i++];
  630. }
  631. if(i < args.size()) TypeError(fmt("too many arguments", " (", decl->code->name, ')'));
  632. }
  633. PyObject* vkwargs;
  634. if(decl->starred_kwarg != -1){
  635. vkwargs = VAR(Dict(this));
  636. buffer[decl->starred_kwarg] = vkwargs;
  637. }else{
  638. vkwargs = nullptr;
  639. }
  640. for(int j=0; j<kwargs.size(); j+=2){
  641. StrName key(CAST(int, kwargs[j]));
  642. int index = co->varnames_inv.try_get(key);
  643. if(index < 0){
  644. if(vkwargs == nullptr){
  645. TypeError(fmt(key.escape(), " is an invalid keyword argument for ", co->name, "()"));
  646. }else{
  647. Dict& dict = _CAST(Dict&, vkwargs);
  648. dict.set(VAR(key.sv()), kwargs[j+1]);
  649. }
  650. }else{
  651. buffer[index] = kwargs[j+1];
  652. }
  653. }
  654. }
  655. PyObject* VM::vectorcall(int ARGC, int KWARGC, bool op_call){
  656. PyObject** p1 = s_data._sp - KWARGC*2;
  657. PyObject** p0 = p1 - ARGC - 2;
  658. // [callable, <self>, args..., kwargs...]
  659. // ^p0 ^p1 ^_sp
  660. PyObject* callable = p1[-(ARGC + 2)];
  661. bool method_call = p1[-(ARGC + 1)] != PY_NULL;
  662. // handle boundmethod, do a patch
  663. if(is_non_tagged_type(callable, tp_bound_method)){
  664. if(method_call) FATAL_ERROR();
  665. auto& bm = CAST(BoundMethod&, callable);
  666. callable = bm.func; // get unbound method
  667. p1[-(ARGC + 2)] = bm.func;
  668. p1[-(ARGC + 1)] = bm.self;
  669. method_call = true;
  670. // [unbound, self, args..., kwargs...]
  671. }
  672. ArgsView args(p1 - ARGC - int(method_call), p1);
  673. ArgsView kwargs(p1, s_data._sp);
  674. PyObject* buffer[PK_MAX_CO_VARNAMES];
  675. if(is_non_tagged_type(callable, tp_native_func)){
  676. const auto& f = PK_OBJ_GET(NativeFunc, callable);
  677. PyObject* ret;
  678. if(f.decl != nullptr){
  679. int co_nlocals = f.decl->code->varnames.size();
  680. _prepare_py_call(buffer, args, kwargs, f.decl);
  681. // copy buffer back to stack
  682. s_data.reset(args.begin());
  683. for(int j=0; j<co_nlocals; j++) PUSH(buffer[j]);
  684. ret = f.call(vm, ArgsView(s_data._sp - co_nlocals, s_data._sp));
  685. }else{
  686. if(KWARGC != 0) TypeError("old-style native_func does not accept keyword arguments");
  687. f.check_size(this, args);
  688. ret = f.call(this, args);
  689. }
  690. s_data.reset(p0);
  691. return ret;
  692. }
  693. if(is_non_tagged_type(callable, tp_function)){
  694. /*****************_py_call*****************/
  695. // callable must be a `function` object
  696. if(s_data.is_overflow()) StackOverflowError();
  697. const Function& fn = PK_OBJ_GET(Function, callable);
  698. const FuncDecl_& decl = fn.decl;
  699. const CodeObject* co = decl->code.get();
  700. int co_nlocals = co->varnames.size();
  701. _prepare_py_call(buffer, args, kwargs, decl);
  702. if(co->is_generator){
  703. s_data.reset(p0);
  704. return _py_generator(
  705. Frame(&s_data, nullptr, co, fn._module, callable),
  706. ArgsView(buffer, buffer + co_nlocals)
  707. );
  708. }
  709. // copy buffer back to stack
  710. s_data.reset(args.begin());
  711. for(int j=0; j<co_nlocals; j++) PUSH(buffer[j]);
  712. callstack.emplace(&s_data, p0, co, fn._module, callable, FastLocals(co, args.begin()));
  713. if(op_call) return PY_OP_CALL;
  714. return _run_top_frame();
  715. /*****************_py_call*****************/
  716. }
  717. if(is_non_tagged_type(callable, tp_type)){
  718. if(method_call) FATAL_ERROR();
  719. // [type, NULL, args..., kwargs...]
  720. PyObject* new_f = find_name_in_mro(callable, __new__);
  721. PyObject* obj;
  722. #if PK_DEBUG_EXTRA_CHECK
  723. PK_ASSERT(new_f != nullptr);
  724. #endif
  725. if(new_f == cached_object__new__) {
  726. // fast path for object.__new__
  727. Type t = PK_OBJ_GET(Type, callable);
  728. obj= vm->heap.gcnew<DummyInstance>(t);
  729. }else{
  730. PUSH(new_f);
  731. PUSH(PY_NULL);
  732. PUSH(callable); // cls
  733. for(PyObject* o: args) PUSH(o);
  734. for(PyObject* o: kwargs) PUSH(o);
  735. // if obj is not an instance of callable, the behavior is undefined
  736. obj = vectorcall(ARGC+1, KWARGC);
  737. }
  738. // __init__
  739. PyObject* self;
  740. callable = get_unbound_method(obj, __init__, &self, false);
  741. if (self != PY_NULL) {
  742. // replace `NULL` with `self`
  743. p1[-(ARGC + 2)] = callable;
  744. p1[-(ARGC + 1)] = self;
  745. // [init_f, self, args..., kwargs...]
  746. vectorcall(ARGC, KWARGC);
  747. // We just discard the return value of `__init__`
  748. // in cpython it raises a TypeError if the return value is not None
  749. }else{
  750. // manually reset the stack
  751. s_data.reset(p0);
  752. }
  753. return obj;
  754. }
  755. // handle `__call__` overload
  756. PyObject* self;
  757. PyObject* call_f = get_unbound_method(callable, __call__, &self, false);
  758. if(self != PY_NULL){
  759. p1[-(ARGC + 2)] = call_f;
  760. p1[-(ARGC + 1)] = self;
  761. // [call_f, self, args..., kwargs...]
  762. return vectorcall(ARGC, KWARGC, false);
  763. }
  764. TypeError(OBJ_NAME(_t(callable)).escape() + " object is not callable");
  765. return nullptr;
  766. }
  767. // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance
  768. PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){
  769. PyObject* objtype;
  770. // handle super() proxy
  771. if(is_non_tagged_type(obj, tp_super)){
  772. const Super& super = PK_OBJ_GET(Super, obj);
  773. obj = super.first;
  774. objtype = _t(super.second);
  775. }else{
  776. objtype = _t(obj);
  777. }
  778. PyObject* cls_var = find_name_in_mro(objtype, name);
  779. if(cls_var != nullptr){
  780. // handle descriptor
  781. if(is_non_tagged_type(cls_var, tp_property)){
  782. const Property& prop = _CAST(Property&, cls_var);
  783. return call(prop.getter, obj);
  784. }
  785. }
  786. // handle instance __dict__
  787. if(!is_tagged(obj) && obj->is_attr_valid()){
  788. PyObject* val = obj->attr().try_get(name);
  789. if(val != nullptr) return val;
  790. }
  791. if(cls_var != nullptr){
  792. // bound method is non-data descriptor
  793. if(is_non_tagged_type(cls_var, tp_function) || is_non_tagged_type(cls_var, tp_native_func)){
  794. return VAR(BoundMethod(obj, cls_var));
  795. }
  796. return cls_var;
  797. }
  798. if(throw_err) AttributeError(obj, name);
  799. return nullptr;
  800. }
  801. // used by OP_LOAD_METHOD
  802. // try to load a unbound method (fallback to `getattr` if not found)
  803. PyObject* VM::get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err, bool fallback){
  804. *self = PY_NULL;
  805. PyObject* objtype;
  806. // handle super() proxy
  807. if(is_non_tagged_type(obj, tp_super)){
  808. const Super& super = PK_OBJ_GET(Super, obj);
  809. obj = super.first;
  810. objtype = _t(super.second);
  811. }else{
  812. objtype = _t(obj);
  813. }
  814. PyObject* cls_var = find_name_in_mro(objtype, name);
  815. if(fallback){
  816. if(cls_var != nullptr){
  817. // handle descriptor
  818. if(is_non_tagged_type(cls_var, tp_property)){
  819. const Property& prop = _CAST(Property&, cls_var);
  820. return call(prop.getter, obj);
  821. }
  822. }
  823. // handle instance __dict__
  824. if(!is_tagged(obj) && obj->is_attr_valid()){
  825. PyObject* val = obj->attr().try_get(name);
  826. if(val != nullptr) return val;
  827. }
  828. }
  829. if(cls_var != nullptr){
  830. if(is_non_tagged_type(cls_var, tp_function) || is_non_tagged_type(cls_var, tp_native_func)){
  831. *self = obj;
  832. }
  833. return cls_var;
  834. }
  835. if(throw_err) AttributeError(obj, name);
  836. return nullptr;
  837. }
  838. void VM::setattr(PyObject* obj, StrName name, PyObject* value){
  839. PyObject* objtype;
  840. // handle super() proxy
  841. if(is_non_tagged_type(obj, tp_super)){
  842. Super& super = PK_OBJ_GET(Super, obj);
  843. obj = super.first;
  844. objtype = _t(super.second);
  845. }else{
  846. objtype = _t(obj);
  847. }
  848. PyObject* cls_var = find_name_in_mro(objtype, name);
  849. if(cls_var != nullptr){
  850. // handle descriptor
  851. if(is_non_tagged_type(cls_var, tp_property)){
  852. const Property& prop = _CAST(Property&, cls_var);
  853. if(prop.setter != vm->None){
  854. call(prop.setter, obj, value);
  855. }else{
  856. TypeError(fmt("readonly attribute: ", name.escape()));
  857. }
  858. return;
  859. }
  860. }
  861. // handle instance __dict__
  862. if(is_tagged(obj) || !obj->is_attr_valid()) TypeError("cannot set attribute");
  863. obj->attr().set(name, value);
  864. }
  865. PyObject* VM::bind(PyObject* obj, const char* sig, NativeFuncC fn, UserData userdata){
  866. return bind(obj, sig, nullptr, fn, userdata);
  867. }
  868. PyObject* VM::bind(PyObject* obj, const char* sig, const char* docstring, NativeFuncC fn, UserData userdata){
  869. CodeObject_ co;
  870. try{
  871. // fn(a, b, *c, d=1) -> None
  872. co = compile("def " + Str(sig) + " : pass", "<bind>", EXEC_MODE);
  873. }catch(Exception&){
  874. throw std::runtime_error("invalid signature: " + std::string(sig));
  875. }
  876. if(co->func_decls.size() != 1){
  877. throw std::runtime_error("expected 1 function declaration");
  878. }
  879. FuncDecl_ decl = co->func_decls[0];
  880. decl->signature = Str(sig);
  881. if(docstring != nullptr){
  882. decl->docstring = Str(docstring).strip();
  883. }
  884. PyObject* f_obj = VAR(NativeFunc(fn, decl));
  885. PK_OBJ_GET(NativeFunc, f_obj).set_userdata(userdata);
  886. if(obj != nullptr) obj->attr().set(decl->code->name, f_obj);
  887. return f_obj;
  888. }
  889. PyObject* VM::bind_property(PyObject* obj, Str name, NativeFuncC fget, NativeFuncC fset){
  890. PyObject* _0 = heap.gcnew<NativeFunc>(tp_native_func, fget, 1, false);
  891. PyObject* _1 = vm->None;
  892. if(fset != nullptr) _1 = heap.gcnew<NativeFunc>(tp_native_func, fset, 2, false);
  893. Str type_hint;
  894. int pos = name.index(":");
  895. if(pos > 0){
  896. type_hint = name.substr(pos + 1).strip();
  897. name = name.substr(0, pos).strip();
  898. }
  899. PyObject* prop = VAR(Property(_0, _1, type_hint));
  900. obj->attr().set(name, prop);
  901. return prop;
  902. }
  903. void VM::_error(Exception e){
  904. if(callstack.empty()){
  905. e.is_re = false;
  906. throw e;
  907. }
  908. PUSH(VAR(e));
  909. _raise();
  910. }
  911. void ManagedHeap::mark() {
  912. for(PyObject* obj: _no_gc) PK_OBJ_MARK(obj);
  913. for(auto& frame : vm->callstack.data()) frame._gc_mark();
  914. for(PyObject* obj: vm->s_data) PK_OBJ_MARK(obj);
  915. if(_gc_marker_ex) _gc_marker_ex(vm);
  916. if(vm->_last_exception) PK_OBJ_MARK(vm->_last_exception);
  917. if(vm->_c.error != nullptr) PK_OBJ_MARK(vm->_c.error);
  918. }
  919. Str obj_type_name(VM *vm, Type type){
  920. return vm->_all_types[type].name;
  921. }
  922. void VM::bind__hash__(Type type, i64 (*f)(VM*, PyObject*)){
  923. PyObject* obj = _t(type);
  924. _all_types[type].m__hash__ = f;
  925. PyObject* nf = bind_method<0>(obj, "__hash__", [](VM* vm, ArgsView args){
  926. i64 ret = lambda_get_userdata<i64(*)(VM*, PyObject*)>(args.begin())(vm, args[0]);
  927. return VAR(ret);
  928. });
  929. PK_OBJ_GET(NativeFunc, nf).set_userdata(f);
  930. }
  931. void VM::bind__len__(Type type, i64 (*f)(VM*, PyObject*)){
  932. PyObject* obj = _t(type);
  933. _all_types[type].m__len__ = f;
  934. PyObject* nf = bind_method<0>(obj, "__len__", [](VM* vm, ArgsView args){
  935. i64 ret = lambda_get_userdata<i64(*)(VM*, PyObject*)>(args.begin())(vm, args[0]);
  936. return VAR(ret);
  937. });
  938. PK_OBJ_GET(NativeFunc, nf).set_userdata(f);
  939. }
  940. void Dict::_probe(PyObject *key, bool &ok, int &i) const{
  941. ok = false;
  942. i = vm->py_hash(key) & _mask;
  943. while(_items[i].first != nullptr) {
  944. if(vm->py_equals(_items[i].first, key)) { ok = true; break; }
  945. // https://github.com/python/cpython/blob/3.8/Objects/dictobject.c#L166
  946. i = ((5*i) + 1) & _mask;
  947. }
  948. }
  949. void CodeObjectSerializer::write_object(VM *vm, PyObject *obj){
  950. if(is_int(obj)) write_int(_CAST(i64, obj));
  951. else if(is_float(obj)) write_float(_CAST(f64, obj));
  952. else if(is_type(obj, vm->tp_str)) write_str(_CAST(Str&, obj));
  953. else if(is_type(obj, vm->tp_bool)) write_bool(_CAST(bool, obj));
  954. else if(obj == vm->None) write_none();
  955. else if(obj == vm->Ellipsis) write_ellipsis();
  956. else{
  957. throw std::runtime_error(fmt(OBJ_NAME(vm->_t(obj)).escape(), " is not serializable"));
  958. }
  959. }
  960. void NativeFunc::check_size(VM* vm, ArgsView args) const{
  961. if(args.size() != argc && argc != -1) {
  962. vm->TypeError(fmt("expected ", argc, " arguments, got ", args.size()));
  963. }
  964. }
  965. PyObject* NativeFunc::call(VM *vm, ArgsView args) const {
  966. return f(vm, args);
  967. }
  968. } // namespace pkpy