vm.h 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057
  1. #pragma once
  2. #include "codeobject.h"
  3. #include "iter.h"
  4. #include "error.h"
  5. #define __DEF_PY_AS_C(type, ctype, ptype) \
  6. inline ctype& Py##type##_AS_C(const PyVar& obj) { \
  7. check_type(obj, ptype); \
  8. return UNION_GET(ctype, obj); \
  9. }
  10. #define __DEF_PY(type, ctype, ptype) \
  11. inline PyVar Py##type(ctype value) { \
  12. return new_object(ptype, value); \
  13. }
  14. #define DEF_NATIVE(type, ctype, ptype) \
  15. __DEF_PY(type, ctype, ptype) \
  16. __DEF_PY_AS_C(type, ctype, ptype)
  17. class VM {
  18. std::vector<PyVar> _small_integers; // [-5, 256]
  19. protected:
  20. std::deque< std::unique_ptr<Frame> > callstack;
  21. PyVar __py2py_call_signal;
  22. PyVar run_frame(Frame* frame){
  23. while(frame->has_next_bytecode()){
  24. const Bytecode& byte = frame->next_bytecode();
  25. //printf("[%d] %s (%d)\n", frame->stack_size(), OP_NAMES[byte.op], byte.arg);
  26. //printf("%s\n", frame->code->src->getLine(byte.line).c_str());
  27. switch (byte.op)
  28. {
  29. case OP_NO_OP: break; // do nothing
  30. case OP_LOAD_CONST: frame->push(frame->code->co_consts[byte.arg]); break;
  31. case OP_LOAD_LAMBDA: {
  32. PyVar obj = frame->code->co_consts[byte.arg];
  33. setattr(obj, __module__, frame->_module);
  34. frame->push(obj);
  35. } break;
  36. case OP_LOAD_NAME_REF: {
  37. frame->push(PyRef(NameRef(frame->code->co_names[byte.arg])));
  38. } break;
  39. case OP_LOAD_NAME: {
  40. frame->push(NameRef(frame->code->co_names[byte.arg]).get(this, frame));
  41. } break;
  42. case OP_STORE_NAME: {
  43. const auto& p = frame->code->co_names[byte.arg];
  44. NameRef(p).set(this, frame, frame->pop_value(this));
  45. } break;
  46. case OP_BUILD_ATTR_REF: {
  47. const auto& attr = frame->code->co_names[byte.arg];
  48. PyVar obj = frame->pop_value(this);
  49. frame->push(PyRef(AttrRef(obj, NameRef(attr))));
  50. } break;
  51. case OP_BUILD_INDEX_REF: {
  52. PyVar index = frame->pop_value(this);
  53. PyVarRef obj = frame->pop_value(this);
  54. frame->push(PyRef(IndexRef(obj, index)));
  55. } break;
  56. case OP_STORE_REF: {
  57. PyVar obj = frame->pop_value(this);
  58. PyVarRef r = frame->pop();
  59. PyRef_AS_C(r)->set(this, frame, std::move(obj));
  60. } break;
  61. case OP_DELETE_REF: {
  62. PyVarRef r = frame->pop();
  63. PyRef_AS_C(r)->del(this, frame);
  64. } break;
  65. case OP_BUILD_SMART_TUPLE:
  66. {
  67. pkpy::ArgList items = frame->pop_n_reversed(byte.arg);
  68. bool done = false;
  69. for(int i=0; i<items.size(); i++){
  70. if(!items[i]->is_type(_tp_ref)) {
  71. done = true;
  72. PyVarList values = items.toList();
  73. for(int j=i; j<values.size(); j++) frame->try_deref(this, values[j]);
  74. frame->push(PyTuple(values));
  75. break;
  76. }
  77. }
  78. if(done) break;
  79. frame->push(PyRef(TupleRef(items.toList())));
  80. } break;
  81. case OP_BUILD_STRING:
  82. {
  83. pkpy::ArgList items = frame->pop_n_values_reversed(this, byte.arg);
  84. _StrStream ss;
  85. for(int i=0; i<items.size(); i++) ss << PyStr_AS_C(asStr(items[i]));
  86. frame->push(PyStr(ss.str()));
  87. } break;
  88. case OP_LOAD_EVAL_FN: {
  89. frame->push(builtins->attribs[m_eval]);
  90. } break;
  91. case OP_LIST_APPEND: {
  92. pkpy::ArgList args(2);
  93. args[1] = frame->pop_value(this); // obj
  94. args[0] = frame->top_value_offset(this, -2); // list
  95. fast_call(m_append, std::move(args));
  96. } break;
  97. case OP_STORE_FUNCTION:
  98. {
  99. PyVar obj = frame->pop_value(this);
  100. const _Func& fn = PyFunction_AS_C(obj);
  101. setattr(obj, __module__, frame->_module);
  102. frame->f_globals()[fn->name] = obj;
  103. } break;
  104. case OP_BUILD_CLASS:
  105. {
  106. const _Str& clsName = frame->code->co_names[byte.arg].first;
  107. PyVar clsBase = frame->pop_value(this);
  108. if(clsBase == None) clsBase = _tp_object;
  109. check_type(clsBase, _tp_type);
  110. PyVar cls = new_user_type_object(frame->_module, clsName, clsBase);
  111. while(true){
  112. PyVar fn = frame->pop_value(this);
  113. if(fn == None) break;
  114. const _Func& f = PyFunction_AS_C(fn);
  115. setattr(fn, __module__, frame->_module);
  116. setattr(cls, f->name, fn);
  117. }
  118. } break;
  119. case OP_RETURN_VALUE: return frame->pop_value(this);
  120. case OP_PRINT_EXPR:
  121. {
  122. const PyVar expr = frame->top_value(this);
  123. if(expr == None) break;
  124. *_stdout << PyStr_AS_C(asRepr(expr)) << '\n';
  125. } break;
  126. case OP_POP_TOP: frame->__pop(); break;
  127. case OP_BINARY_OP:
  128. {
  129. pkpy::ArgList args(2);
  130. args[1] = frame->pop_value(this);
  131. args[0] = frame->top_value(this);
  132. frame->top() = fast_call(BINARY_SPECIAL_METHODS[byte.arg], std::move(args));
  133. } break;
  134. case OP_BITWISE_OP:
  135. {
  136. frame->push(
  137. fast_call(BITWISE_SPECIAL_METHODS[byte.arg],
  138. frame->pop_n_values_reversed(this, 2))
  139. );
  140. } break;
  141. case OP_COMPARE_OP:
  142. {
  143. pkpy::ArgList args(2);
  144. args[1] = frame->pop_value(this);
  145. args[0] = frame->top_value(this);
  146. frame->top() = fast_call(CMP_SPECIAL_METHODS[byte.arg], std::move(args));
  147. } break;
  148. case OP_IS_OP:
  149. {
  150. PyVar rhs = frame->pop_value(this);
  151. bool ret_c = rhs == frame->top_value(this);
  152. if(byte.arg == 1) ret_c = !ret_c;
  153. frame->top() = PyBool(ret_c);
  154. } break;
  155. case OP_CONTAINS_OP:
  156. {
  157. PyVar rhs = frame->pop_value(this);
  158. bool ret_c = PyBool_AS_C(call(rhs, __contains__, pkpy::oneArg(frame->pop_value(this))));
  159. if(byte.arg == 1) ret_c = !ret_c;
  160. frame->push(PyBool(ret_c));
  161. } break;
  162. case OP_UNARY_NEGATIVE:
  163. frame->top() = num_negated(frame->top_value(this));
  164. break;
  165. case OP_UNARY_NOT:
  166. {
  167. PyVar obj = frame->pop_value(this);
  168. const PyVar& obj_bool = asBool(obj);
  169. frame->push(PyBool(!PyBool_AS_C(obj_bool)));
  170. } break;
  171. case OP_POP_JUMP_IF_FALSE:
  172. if(!PyBool_AS_C(asBool(frame->pop_value(this)))) frame->jump_abs(byte.arg);
  173. break;
  174. case OP_LOAD_NONE: frame->push(None); break;
  175. case OP_LOAD_TRUE: frame->push(True); break;
  176. case OP_LOAD_FALSE: frame->push(False); break;
  177. case OP_LOAD_ELLIPSIS: frame->push(Ellipsis); break;
  178. case OP_ASSERT:
  179. {
  180. PyVar expr = frame->pop_value(this);
  181. if(asBool(expr) != True) _error("AssertionError", "");
  182. } break;
  183. case OP_RAISE_ERROR:
  184. {
  185. _Str msg = PyStr_AS_C(asRepr(frame->pop_value(this)));
  186. _Str type = PyStr_AS_C(frame->pop_value(this));
  187. _error(type, msg);
  188. } break;
  189. case OP_BUILD_LIST:
  190. {
  191. frame->push(PyList(
  192. frame->pop_n_values_reversed(this, byte.arg).toList()
  193. ));
  194. } break;
  195. case OP_BUILD_MAP:
  196. {
  197. pkpy::ArgList items = frame->pop_n_values_reversed(this, byte.arg*2);
  198. PyVar obj = call(builtins->attribs["dict"]);
  199. for(int i=0; i<items.size(); i+=2){
  200. call(obj, __setitem__, pkpy::twoArgs(items[i], items[i+1]));
  201. }
  202. frame->push(obj);
  203. } break;
  204. case OP_BUILD_SET:
  205. {
  206. PyVar list = PyList(
  207. frame->pop_n_values_reversed(this, byte.arg).toList()
  208. );
  209. PyVar obj = call(builtins->attribs["set"], pkpy::oneArg(list));
  210. frame->push(obj);
  211. } break;
  212. case OP_DUP_TOP: frame->push(frame->top_value(this)); break;
  213. case OP_CALL:
  214. {
  215. int ARGC = byte.arg & 0xFFFF;
  216. int KWARGC = (byte.arg >> 16) & 0xFFFF;
  217. pkpy::ArgList kwargs(0);
  218. if(KWARGC > 0) kwargs = frame->pop_n_values_reversed(this, KWARGC*2);
  219. pkpy::ArgList args = frame->pop_n_values_reversed(this, ARGC);
  220. PyVar callable = frame->pop_value(this);
  221. PyVar ret = call(callable, std::move(args), kwargs, true);
  222. if(ret == __py2py_call_signal) return ret;
  223. frame->push(std::move(ret));
  224. } break;
  225. case OP_JUMP_ABSOLUTE: frame->jump_abs(byte.arg); break;
  226. case OP_SAFE_JUMP_ABSOLUTE: frame->jump_abs_safe(byte.arg); break;
  227. case OP_GOTO: {
  228. PyVar obj = frame->pop_value(this);
  229. const _Str& label = PyStr_AS_C(obj);
  230. int* target = frame->code->co_labels.try_get(label);
  231. if(target == nullptr){
  232. _error("KeyError", "label '" + label + "' not found");
  233. }
  234. frame->jump_abs_safe(*target);
  235. } break;
  236. case OP_GET_ITER:
  237. {
  238. PyVar obj = frame->pop_value(this);
  239. PyVarOrNull iter_fn = getattr(obj, __iter__, false);
  240. if(iter_fn != nullptr){
  241. PyVar tmp = call(iter_fn);
  242. PyVarRef var = frame->pop();
  243. check_type(var, _tp_ref);
  244. PyIter_AS_C(tmp)->var = var;
  245. frame->push(std::move(tmp));
  246. }else{
  247. typeError("'" + UNION_TP_NAME(obj) + "' object is not iterable");
  248. }
  249. } break;
  250. case OP_FOR_ITER:
  251. {
  252. // top() must be PyIter, so no need to try_deref()
  253. auto& it = PyIter_AS_C(frame->top());
  254. if(it->hasNext()){
  255. PyRef_AS_C(it->var)->set(this, frame, it->next());
  256. }else{
  257. int blockEnd = frame->code->co_blocks[byte.block].end;
  258. frame->jump_abs_safe(blockEnd);
  259. }
  260. } break;
  261. case OP_LOOP_CONTINUE:
  262. {
  263. int blockStart = frame->code->co_blocks[byte.block].start;
  264. frame->jump_abs(blockStart);
  265. } break;
  266. case OP_LOOP_BREAK:
  267. {
  268. int blockEnd = frame->code->co_blocks[byte.block].end;
  269. frame->jump_abs_safe(blockEnd);
  270. } break;
  271. case OP_JUMP_IF_FALSE_OR_POP:
  272. {
  273. const PyVar expr = frame->top_value(this);
  274. if(asBool(expr)==False) frame->jump_abs(byte.arg);
  275. else frame->pop_value(this);
  276. } break;
  277. case OP_JUMP_IF_TRUE_OR_POP:
  278. {
  279. const PyVar expr = frame->top_value(this);
  280. if(asBool(expr)==True) frame->jump_abs(byte.arg);
  281. else frame->pop_value(this);
  282. } break;
  283. case OP_BUILD_SLICE:
  284. {
  285. PyVar stop = frame->pop_value(this);
  286. PyVar start = frame->pop_value(this);
  287. _Slice s;
  288. if(start != None) {check_type(start, _tp_int); s.start = (int)PyInt_AS_C(start);}
  289. if(stop != None) {check_type(stop, _tp_int); s.stop = (int)PyInt_AS_C(stop);}
  290. frame->push(PySlice(s));
  291. } break;
  292. case OP_IMPORT_NAME:
  293. {
  294. const _Str& name = frame->code->co_names[byte.arg].first;
  295. auto it = _modules.find(name);
  296. if(it == _modules.end()){
  297. auto it2 = _lazy_modules.find(name);
  298. if(it2 == _lazy_modules.end()){
  299. _error("ImportError", "module '" + name + "' not found");
  300. }else{
  301. const _Str& source = it2->second;
  302. _Code code = compile(source, name, EXEC_MODE);
  303. PyVar _m = new_module(name);
  304. _exec(code, _m, pkpy::make_shared<PyVarDict>());
  305. frame->push(_m);
  306. _lazy_modules.erase(it2);
  307. }
  308. }else{
  309. frame->push(it->second);
  310. }
  311. } break;
  312. // TODO: using "goto" inside with block may cause __exit__ not called
  313. case OP_WITH_ENTER: call(frame->pop_value(this), __enter__); break;
  314. case OP_WITH_EXIT: call(frame->pop_value(this), __exit__); break;
  315. default:
  316. throw std::runtime_error(_Str("opcode ") + OP_NAMES[byte.op] + " is not implemented");
  317. break;
  318. }
  319. }
  320. if(frame->code->src->mode == EVAL_MODE || frame->code->src->mode == JSON_MODE){
  321. if(frame->stack_size() != 1) throw std::runtime_error("stack size is not 1 in EVAL_MODE/JSON_MODE");
  322. return frame->pop_value(this);
  323. }
  324. if(frame->stack_size() != 0) throw std::runtime_error("stack not empty in EXEC_MODE");
  325. return None;
  326. }
  327. public:
  328. PyVarDict _types;
  329. PyVarDict _modules; // loaded modules
  330. emhash8::HashMap<_Str, _Str> _lazy_modules; // lazy loaded modules
  331. PyVar None, True, False, Ellipsis;
  332. bool use_stdio;
  333. std::ostream* _stdout;
  334. std::ostream* _stderr;
  335. PyVar builtins; // builtins module
  336. PyVar _main; // __main__ module
  337. int maxRecursionDepth = 1000;
  338. VM(bool use_stdio){
  339. this->use_stdio = use_stdio;
  340. if(use_stdio){
  341. std::cout.setf(std::ios::unitbuf);
  342. std::cerr.setf(std::ios::unitbuf);
  343. this->_stdout = &std::cout;
  344. this->_stderr = &std::cerr;
  345. }else{
  346. this->_stdout = new _StrStream();
  347. this->_stderr = new _StrStream();
  348. }
  349. initializeBuiltinClasses();
  350. _small_integers.reserve(270);
  351. for(i64 i=-5; i<=256; i++) _small_integers.push_back(new_object(_tp_int, i));
  352. }
  353. PyVar asStr(const PyVar& obj){
  354. PyVarOrNull str_fn = getattr(obj, __str__, false);
  355. if(str_fn != nullptr) return call(str_fn);
  356. return asRepr(obj);
  357. }
  358. inline Frame* top_frame() const {
  359. if(callstack.empty()) UNREACHABLE();
  360. return callstack.back().get();
  361. }
  362. PyVar asRepr(const PyVar& obj){
  363. if(obj->is_type(_tp_type)) return PyStr("<class '" + UNION_GET(_Str, obj->attribs[__name__]) + "'>");
  364. return call(obj, __repr__);
  365. }
  366. PyVar asJson(const PyVar& obj){
  367. return call(obj, __json__);
  368. }
  369. const PyVar& asBool(const PyVar& obj){
  370. if(obj->is_type(_tp_bool)) return obj;
  371. if(obj == None) return False;
  372. if(obj->is_type(_tp_int)) return PyBool(PyInt_AS_C(obj) != 0);
  373. if(obj->is_type(_tp_float)) return PyBool(PyFloat_AS_C(obj) != 0.0);
  374. PyVarOrNull len_fn = getattr(obj, __len__, false);
  375. if(len_fn != nullptr){
  376. PyVar ret = call(len_fn);
  377. return PyBool(PyInt_AS_C(ret) > 0);
  378. }
  379. return True;
  380. }
  381. PyVar fast_call(const _Str& name, pkpy::ArgList&& args){
  382. PyObject* cls = args[0]->_type.get();
  383. while(cls != None.get()) {
  384. PyVar* val = cls->attribs.try_get(name);
  385. if(val != nullptr) return call(*val, std::move(args));
  386. cls = cls->attribs[__base__].get();
  387. }
  388. attributeError(args[0], name);
  389. return nullptr;
  390. }
  391. inline PyVar call(const PyVar& _callable){
  392. return call(_callable, pkpy::noArg(), pkpy::noArg(), false);
  393. }
  394. template<typename ArgT>
  395. inline std::enable_if_t<std::is_same_v<std::remove_const_t<std::remove_reference_t<ArgT>>, pkpy::ArgList>, PyVar>
  396. call(const PyVar& _callable, ArgT&& args){
  397. return call(_callable, std::forward<ArgT>(args), pkpy::noArg(), false);
  398. }
  399. template<typename ArgT>
  400. inline std::enable_if_t<std::is_same_v<std::remove_const_t<std::remove_reference_t<ArgT>>, pkpy::ArgList>, PyVar>
  401. call(const PyVar& obj, const _Str& func, ArgT&& args){
  402. return call(getattr(obj, func), std::forward<ArgT>(args), pkpy::noArg(), false);
  403. }
  404. inline PyVar call(const PyVar& obj, const _Str& func){
  405. return call(getattr(obj, func), pkpy::noArg(), pkpy::noArg(), false);
  406. }
  407. PyVar call(const PyVar& _callable, pkpy::ArgList args, const pkpy::ArgList& kwargs, bool opCall){
  408. if(_callable->is_type(_tp_type)){
  409. auto it = _callable->attribs.find(__new__);
  410. PyVar obj;
  411. if(it != _callable->attribs.end()){
  412. obj = call(it->second, args, kwargs, false);
  413. }else{
  414. obj = new_object(_callable, DUMMY_VAL);
  415. PyVarOrNull init_fn = getattr(obj, __init__, false);
  416. if (init_fn != nullptr) call(init_fn, args, kwargs, false);
  417. }
  418. return obj;
  419. }
  420. const PyVar* callable = &_callable;
  421. if((*callable)->is_type(_tp_bounded_method)){
  422. auto& bm = PyBoundedMethod_AS_C((*callable));
  423. callable = &bm.method; // get unbound method
  424. args.extend_self(bm.obj);
  425. }
  426. if((*callable)->is_type(_tp_native_function)){
  427. const auto& f = UNION_GET(_CppFunc, *callable);
  428. // _CppFunc do not support kwargs
  429. return f(this, args);
  430. } else if((*callable)->is_type(_tp_function)){
  431. const _Func& fn = PyFunction_AS_C((*callable));
  432. pkpy::shared_ptr<PyVarDict> _locals = pkpy::make_shared<PyVarDict>();
  433. PyVarDict& locals = *_locals;
  434. int i = 0;
  435. for(const auto& name : fn->args){
  436. if(i < args.size()){
  437. locals.emplace(name, args[i++]);
  438. continue;
  439. }
  440. typeError("missing positional argument '" + name + "'");
  441. }
  442. locals.insert(fn->kwArgs.begin(), fn->kwArgs.end());
  443. std::vector<_Str> positional_overrided_keys;
  444. if(!fn->starredArg.empty()){
  445. // handle *args
  446. PyVarList vargs;
  447. while(i < args.size()) vargs.push_back(args[i++]);
  448. locals.emplace(fn->starredArg, PyTuple(std::move(vargs)));
  449. }else{
  450. for(const auto& key : fn->kwArgsOrder){
  451. if(i < args.size()){
  452. locals[key] = args[i++];
  453. positional_overrided_keys.push_back(key);
  454. }else{
  455. break;
  456. }
  457. }
  458. if(i < args.size()) typeError("too many arguments");
  459. }
  460. for(int i=0; i<kwargs.size(); i+=2){
  461. const _Str& key = PyStr_AS_C(kwargs[i]);
  462. if(!fn->kwArgs.contains(key)){
  463. typeError(key.__escape(true) + " is an invalid keyword argument for " + fn->name + "()");
  464. }
  465. const PyVar& val = kwargs[i+1];
  466. if(!positional_overrided_keys.empty()){
  467. auto it = std::find(positional_overrided_keys.begin(), positional_overrided_keys.end(), key);
  468. if(it != positional_overrided_keys.end()){
  469. typeError("multiple values for argument '" + key + "'");
  470. }
  471. }
  472. locals[key] = val;
  473. }
  474. PyVar* it_m = (*callable)->attribs.try_get(__module__);
  475. PyVar _module = it_m != nullptr ? *it_m : top_frame()->_module;
  476. if(opCall){
  477. __push_new_frame(fn->code, _module, _locals);
  478. return __py2py_call_signal;
  479. }
  480. return _exec(fn->code, _module, _locals);
  481. }
  482. typeError("'" + UNION_TP_NAME(*callable) + "' object is not callable");
  483. return None;
  484. }
  485. // repl mode is only for setting `frame->id` to 0
  486. PyVarOrNull exec(_Str source, _Str filename, CompileMode mode, PyVar _module=nullptr){
  487. if(_module == nullptr) _module = _main;
  488. try {
  489. _Code code = compile(source, filename, mode);
  490. //if(filename != "<builtins>") std::cout << disassemble(code) << std::endl;
  491. return _exec(code, _module, pkpy::make_shared<PyVarDict>());
  492. }catch (const _Error& e){
  493. *_stderr << e.what() << '\n';
  494. }
  495. catch (const std::exception& e) {
  496. auto re = RuntimeError("UnexpectedError", e.what(), _cleanErrorAndGetSnapshots());
  497. *_stderr << re.what() << '\n';
  498. }
  499. return nullptr;
  500. }
  501. template<typename ...Args>
  502. Frame* __push_new_frame(Args&&... args){
  503. if(callstack.size() > maxRecursionDepth){
  504. throw RuntimeError("RecursionError", "maximum recursion depth exceeded", _cleanErrorAndGetSnapshots());
  505. }
  506. callstack.emplace_back(std::make_unique<Frame>(std::forward<Args>(args)...));
  507. return callstack.back().get();
  508. }
  509. template<typename ...Args>
  510. PyVar _exec(Args&&... args){
  511. Frame* frame = __push_new_frame(std::forward<Args>(args)...);
  512. Frame* frameBase = frame;
  513. PyVar ret = nullptr;
  514. while(true){
  515. ret = run_frame(frame);
  516. if(ret != __py2py_call_signal){
  517. if(frame == frameBase){ // [ frameBase<- ]
  518. break;
  519. }else{
  520. callstack.pop_back();
  521. frame = callstack.back().get();
  522. frame->push(ret);
  523. }
  524. }else{
  525. frame = callstack.back().get(); // [ frameBase, newFrame<- ]
  526. }
  527. }
  528. callstack.pop_back();
  529. return ret;
  530. }
  531. PyVar new_user_type_object(PyVar mod, _Str name, PyVar base){
  532. PyVar obj = pkpy::make_shared<PyObject, Py_<i64>>((i64)1, _tp_type);
  533. setattr(obj, __base__, base);
  534. _Str fullName = name;
  535. if(mod != builtins) fullName = UNION_NAME(mod) + "." + name;
  536. setattr(obj, __name__, PyStr(fullName));
  537. setattr(mod, name, obj);
  538. return obj;
  539. }
  540. PyVar new_type_object(_Str name, PyVar base=nullptr) {
  541. if(base == nullptr) base = _tp_object;
  542. PyVar obj = pkpy::make_shared<PyObject, Py_<i64>>((i64)0, _tp_type);
  543. setattr(obj, __base__, base);
  544. _types[name] = obj;
  545. return obj;
  546. }
  547. template<typename T>
  548. inline PyVar new_object(PyVar type, T _value) {
  549. if(!type->is_type(_tp_type)) UNREACHABLE();
  550. return pkpy::make_shared<PyObject, Py_<T>>(_value, type);
  551. }
  552. template<typename T, typename... Args>
  553. inline PyVar new_object_c(Args&&... args) {
  554. return new_object(T::_tp(this), T(std::forward<Args>(args)...));
  555. }
  556. PyVar new_module(_Str name) {
  557. PyVar obj = new_object(_tp_module, DUMMY_VAL);
  558. setattr(obj, __name__, PyStr(name));
  559. _modules[name] = obj;
  560. return obj;
  561. }
  562. PyVar new_module_if_not_existed(_Str name) {
  563. PyVar* it = _modules.try_get(name);
  564. if(it != nullptr) return *it;
  565. return new_module(name);
  566. }
  567. PyVarOrNull getattr(const PyVar& obj, const _Str& name, bool throw_err=true) {
  568. PyVarDict::iterator it;
  569. PyObject* cls;
  570. if(obj->is_type(_tp_super)){
  571. const PyVar* root = &obj;
  572. int depth = 1;
  573. while(true){
  574. root = &UNION_GET(PyVar, *root);
  575. if(!(*root)->is_type(_tp_super)) break;
  576. depth++;
  577. }
  578. cls = (*root)->_type.get();
  579. for(int i=0; i<depth; i++) cls = cls->attribs[__base__].get();
  580. it = (*root)->attribs.find(name);
  581. if(it != (*root)->attribs.end()) return it->second;
  582. }else{
  583. it = obj->attribs.find(name);
  584. if(it != obj->attribs.end()) return it->second;
  585. cls = obj->_type.get();
  586. }
  587. while(cls != None.get()) {
  588. it = cls->attribs.find(name);
  589. if(it != cls->attribs.end()){
  590. PyVar valueFromCls = it->second;
  591. if(valueFromCls->is_type(_tp_function) || valueFromCls->is_type(_tp_native_function)){
  592. return PyBoundedMethod({obj, std::move(valueFromCls)});
  593. }else{
  594. return valueFromCls;
  595. }
  596. }
  597. cls = cls->attribs[__base__].get();
  598. }
  599. if(throw_err) attributeError(obj, name);
  600. return nullptr;
  601. }
  602. template<typename T>
  603. inline void setattr(PyVar& obj, const _Str& name, T&& value) {
  604. PyObject* p = obj.get();
  605. while(p->is_type(_tp_super)) p = ((Py_<PyVar>*)p)->_valueT.get();
  606. p->attribs[name] = std::forward<T>(value);
  607. }
  608. template<int ARGC>
  609. void bindMethod(PyVar obj, _Str funcName, _CppFuncRaw fn) {
  610. check_type(obj, _tp_type);
  611. setattr(obj, funcName, PyNativeFunction(_CppFunc(fn, ARGC, true)));
  612. }
  613. template<int ARGC>
  614. void bindFunc(PyVar obj, _Str funcName, _CppFuncRaw fn) {
  615. setattr(obj, funcName, PyNativeFunction(_CppFunc(fn, ARGC, false)));
  616. }
  617. template<int ARGC>
  618. void bindMethod(_Str typeName, _Str funcName, _CppFuncRaw fn) {
  619. bindMethod<ARGC>(_types[typeName], funcName, fn);
  620. }
  621. template<int ARGC>
  622. void bindStaticMethod(_Str typeName, _Str funcName, _CppFuncRaw fn) {
  623. bindFunc<ARGC>(_types[typeName], funcName, fn);
  624. }
  625. template<int ARGC>
  626. void bindMethodMulti(std::vector<_Str> typeNames, _Str funcName, _CppFuncRaw fn) {
  627. for(auto& typeName : typeNames) bindMethod<ARGC>(typeName, funcName, fn);
  628. }
  629. template<int ARGC>
  630. void bindBuiltinFunc(_Str funcName, _CppFuncRaw fn) {
  631. bindFunc<ARGC>(builtins, funcName, fn);
  632. }
  633. inline f64 num_to_float(const PyVar& obj){
  634. if (obj->is_type(_tp_int)){
  635. return (f64)PyInt_AS_C(obj);
  636. }else if(obj->is_type(_tp_float)){
  637. return PyFloat_AS_C(obj);
  638. }
  639. typeError("expected int or float, got " + UNION_TP_NAME(obj));
  640. return 0;
  641. }
  642. PyVar num_negated(const PyVar& obj){
  643. if (obj->is_type(_tp_int)){
  644. return PyInt(-PyInt_AS_C(obj));
  645. }else if(obj->is_type(_tp_float)){
  646. return PyFloat(-PyFloat_AS_C(obj));
  647. }
  648. typeError("unsupported operand type(s) for -");
  649. return nullptr;
  650. }
  651. int normalized_index(int index, int size){
  652. if(index < 0) index += size;
  653. if(index < 0 || index >= size){
  654. indexError("index out of range, " + std::to_string(index) + " not in [0, " + std::to_string(size) + ")");
  655. }
  656. return index;
  657. }
  658. _Str disassemble(_Code code){
  659. std::vector<int> jumpTargets;
  660. for(auto byte : code->co_code){
  661. if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_SAFE_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){
  662. jumpTargets.push_back(byte.arg);
  663. }
  664. }
  665. _StrStream ss;
  666. ss << std::string(54, '-') << '\n';
  667. ss << code->name << ":\n";
  668. int prev_line = -1;
  669. for(int i=0; i<code->co_code.size(); i++){
  670. const Bytecode& byte = code->co_code[i];
  671. _Str line = std::to_string(byte.line);
  672. if(byte.line == prev_line) line = "";
  673. else{
  674. if(prev_line != -1) ss << "\n";
  675. prev_line = byte.line;
  676. }
  677. std::string pointer;
  678. if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){
  679. pointer = "-> ";
  680. }else{
  681. pointer = " ";
  682. }
  683. ss << pad(line, 8) << pointer << pad(std::to_string(i), 3);
  684. ss << " " << pad(OP_NAMES[byte.op], 20) << " ";
  685. // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5);
  686. std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
  687. if(byte.op == OP_LOAD_CONST){
  688. argStr += " (" + PyStr_AS_C(asRepr(code->co_consts[byte.arg])) + ")";
  689. }
  690. if(byte.op == OP_LOAD_NAME_REF || byte.op == OP_LOAD_NAME){
  691. argStr += " (" + code->co_names[byte.arg].first.__escape(true) + ")";
  692. }
  693. ss << pad(argStr, 20); // may overflow
  694. ss << code->co_blocks[byte.block].to_string();
  695. if(i != code->co_code.size() - 1) ss << '\n';
  696. }
  697. _StrStream consts;
  698. consts << "co_consts: ";
  699. consts << PyStr_AS_C(asRepr(PyList(code->co_consts)));
  700. _StrStream names;
  701. names << "co_names: ";
  702. PyVarList list;
  703. for(int i=0; i<code->co_names.size(); i++){
  704. list.push_back(PyStr(code->co_names[i].first));
  705. }
  706. names << PyStr_AS_C(asRepr(PyList(list)));
  707. ss << '\n' << consts.str() << '\n' << names.str() << '\n';
  708. for(int i=0; i<code->co_consts.size(); i++){
  709. PyVar obj = code->co_consts[i];
  710. if(obj->is_type(_tp_function)){
  711. const auto& f = PyFunction_AS_C(obj);
  712. ss << disassemble(f->code);
  713. }
  714. }
  715. return _Str(ss.str());
  716. }
  717. // for quick access
  718. PyVar _tp_object, _tp_type, _tp_int, _tp_float, _tp_bool, _tp_str;
  719. PyVar _tp_list, _tp_tuple;
  720. PyVar _tp_function, _tp_native_function, _tp_native_iterator, _tp_bounded_method;
  721. PyVar _tp_slice, _tp_range, _tp_module, _tp_ref;
  722. PyVar _tp_super;
  723. template<typename P>
  724. inline PyVarRef PyRef(P&& value) {
  725. static_assert(std::is_base_of<BaseRef, P>::value, "P should derive from BaseRef");
  726. return new_object(_tp_ref, std::forward<P>(value));
  727. }
  728. inline const BaseRef* PyRef_AS_C(const PyVar& obj)
  729. {
  730. if(!obj->is_type(_tp_ref)) typeError("expected an l-value");
  731. return (const BaseRef*)(obj->value());
  732. }
  733. __DEF_PY_AS_C(Int, i64, _tp_int)
  734. inline PyVar PyInt(i64 value) {
  735. if(value >= -5 && value <= 256) return _small_integers[value + 5];
  736. return new_object(_tp_int, value);
  737. }
  738. DEF_NATIVE(Float, f64, _tp_float)
  739. DEF_NATIVE(Str, _Str, _tp_str)
  740. DEF_NATIVE(List, PyVarList, _tp_list)
  741. DEF_NATIVE(Tuple, PyVarList, _tp_tuple)
  742. DEF_NATIVE(Function, _Func, _tp_function)
  743. DEF_NATIVE(NativeFunction, _CppFunc, _tp_native_function)
  744. DEF_NATIVE(Iter, _Iterator, _tp_native_iterator)
  745. DEF_NATIVE(BoundedMethod, _BoundedMethod, _tp_bounded_method)
  746. DEF_NATIVE(Range, _Range, _tp_range)
  747. DEF_NATIVE(Slice, _Slice, _tp_slice)
  748. // there is only one True/False, so no need to copy them!
  749. inline bool PyBool_AS_C(const PyVar& obj){return obj == True;}
  750. inline const PyVar& PyBool(bool value){return value ? True : False;}
  751. void initializeBuiltinClasses(){
  752. _tp_object = pkpy::make_shared<PyObject, Py_<i64>>((i64)0, nullptr);
  753. _tp_type = pkpy::make_shared<PyObject, Py_<i64>>((i64)0, nullptr);
  754. _types["object"] = _tp_object;
  755. _types["type"] = _tp_type;
  756. _tp_bool = new_type_object("bool");
  757. _tp_int = new_type_object("int");
  758. _tp_float = new_type_object("float");
  759. _tp_str = new_type_object("str");
  760. _tp_list = new_type_object("list");
  761. _tp_tuple = new_type_object("tuple");
  762. _tp_slice = new_type_object("slice");
  763. _tp_range = new_type_object("range");
  764. _tp_module = new_type_object("module");
  765. _tp_ref = new_type_object("_ref");
  766. _tp_function = new_type_object("function");
  767. _tp_native_function = new_type_object("_native_function");
  768. _tp_native_iterator = new_type_object("_native_iterator");
  769. _tp_bounded_method = new_type_object("_bounded_method");
  770. _tp_super = new_type_object("super");
  771. this->None = new_object(new_type_object("NoneType"), DUMMY_VAL);
  772. this->Ellipsis = new_object(new_type_object("ellipsis"), DUMMY_VAL);
  773. this->True = new_object(_tp_bool, true);
  774. this->False = new_object(_tp_bool, false);
  775. this->builtins = new_module("builtins");
  776. this->_main = new_module("__main__");
  777. setattr(_tp_type, __base__, _tp_object);
  778. _tp_type->_type = _tp_type;
  779. setattr(_tp_object, __base__, None);
  780. _tp_object->_type = _tp_type;
  781. for (auto& [name, type] : _types) {
  782. setattr(type, __name__, PyStr(name));
  783. }
  784. this->__py2py_call_signal = new_object(_tp_object, DUMMY_VAL);
  785. std::vector<_Str> publicTypes = {"type", "object", "bool", "int", "float", "str", "list", "tuple", "range"};
  786. for (auto& name : publicTypes) {
  787. setattr(builtins, name, _types[name]);
  788. }
  789. }
  790. i64 hash(const PyVar& obj){
  791. if (obj->is_type(_tp_int)) return PyInt_AS_C(obj);
  792. if (obj->is_type(_tp_bool)) return PyBool_AS_C(obj) ? 1 : 0;
  793. if (obj->is_type(_tp_float)){
  794. f64 val = PyFloat_AS_C(obj);
  795. return (i64)std::hash<f64>()(val);
  796. }
  797. if (obj->is_type(_tp_str)) return PyStr_AS_C(obj).hash();
  798. if (obj->is_type(_tp_type)) return (i64)obj.get();
  799. if (obj->is_type(_tp_tuple)) {
  800. i64 x = 1000003;
  801. for (const auto& item : PyTuple_AS_C(obj)) {
  802. i64 y = hash(item);
  803. x = x ^ (y + 0x9e3779b9 + (x << 6) + (x >> 2)); // recommended by Github Copilot
  804. }
  805. return x;
  806. }
  807. typeError("unhashable type: " + UNION_TP_NAME(obj));
  808. return 0;
  809. }
  810. /***** Error Reporter *****/
  811. private:
  812. void _error(const _Str& name, const _Str& msg){
  813. throw RuntimeError(name, msg, _cleanErrorAndGetSnapshots());
  814. }
  815. std::stack<_Str> _cleanErrorAndGetSnapshots(){
  816. std::stack<_Str> snapshots;
  817. while (!callstack.empty()){
  818. if(snapshots.size() < 8){
  819. snapshots.push(callstack.back()->curr_snapshot());
  820. }
  821. callstack.pop_back();
  822. }
  823. return snapshots;
  824. }
  825. public:
  826. void typeError(const _Str& msg){ _error("TypeError", msg); }
  827. void zeroDivisionError(){ _error("ZeroDivisionError", "division by zero"); }
  828. void indexError(const _Str& msg){ _error("IndexError", msg); }
  829. void valueError(const _Str& msg){ _error("ValueError", msg); }
  830. void nameError(const _Str& name){ _error("NameError", "name '" + name + "' is not defined"); }
  831. void attributeError(PyVar obj, const _Str& name){
  832. _error("AttributeError", "type '" + UNION_TP_NAME(obj) + "' has no attribute '" + name + "'");
  833. }
  834. inline void check_type(const PyVar& obj, const PyVar& type){
  835. if(!obj->is_type(type)) typeError("expected '" + UNION_NAME(type) + "', but got '" + UNION_TP_NAME(obj) + "'");
  836. }
  837. ~VM() {
  838. if(!use_stdio){
  839. delete _stdout;
  840. delete _stderr;
  841. }
  842. }
  843. _Code compile(_Str source, _Str filename, CompileMode mode);
  844. };
  845. /***** Pointers' Impl *****/
  846. PyVar NameRef::get(VM* vm, Frame* frame) const{
  847. PyVar* val;
  848. val = frame->f_locals().try_get(pair->first);
  849. if(val) return *val;
  850. val = frame->f_globals().try_get(pair->first);
  851. if(val) return *val;
  852. val = vm->builtins->attribs.try_get(pair->first);
  853. if(val) return *val;
  854. vm->nameError(pair->first);
  855. return nullptr;
  856. }
  857. void NameRef::set(VM* vm, Frame* frame, PyVar val) const{
  858. switch(pair->second) {
  859. case NAME_LOCAL: frame->f_locals()[pair->first] = std::move(val); break;
  860. case NAME_GLOBAL:
  861. {
  862. PyVar* existing = frame->f_locals().try_get(pair->first);
  863. if(existing != nullptr){
  864. *existing = std::move(val);
  865. }else{
  866. frame->f_globals()[pair->first] = std::move(val);
  867. }
  868. } break;
  869. default: UNREACHABLE();
  870. }
  871. }
  872. void NameRef::del(VM* vm, Frame* frame) const{
  873. switch(pair->second) {
  874. case NAME_LOCAL: {
  875. if(frame->f_locals().contains(pair->first)){
  876. frame->f_locals().erase(pair->first);
  877. }else{
  878. vm->nameError(pair->first);
  879. }
  880. } break;
  881. case NAME_GLOBAL:
  882. {
  883. if(frame->f_locals().contains(pair->first)){
  884. frame->f_locals().erase(pair->first);
  885. }else{
  886. if(frame->f_globals().contains(pair->first)){
  887. frame->f_globals().erase(pair->first);
  888. }else{
  889. vm->nameError(pair->first);
  890. }
  891. }
  892. } break;
  893. default: UNREACHABLE();
  894. }
  895. }
  896. PyVar AttrRef::get(VM* vm, Frame* frame) const{
  897. return vm->getattr(obj, attr.pair->first);
  898. }
  899. void AttrRef::set(VM* vm, Frame* frame, PyVar val) const{
  900. vm->setattr(obj, attr.pair->first, val);
  901. }
  902. void AttrRef::del(VM* vm, Frame* frame) const{
  903. vm->typeError("cannot delete attribute");
  904. }
  905. PyVar IndexRef::get(VM* vm, Frame* frame) const{
  906. return vm->call(obj, __getitem__, pkpy::oneArg(index));
  907. }
  908. void IndexRef::set(VM* vm, Frame* frame, PyVar val) const{
  909. vm->call(obj, __setitem__, pkpy::twoArgs(index, val));
  910. }
  911. void IndexRef::del(VM* vm, Frame* frame) const{
  912. vm->call(obj, __delitem__, pkpy::oneArg(index));
  913. }
  914. PyVar TupleRef::get(VM* vm, Frame* frame) const{
  915. PyVarList args(varRefs.size());
  916. for (int i = 0; i < varRefs.size(); i++) {
  917. args[i] = vm->PyRef_AS_C(varRefs[i])->get(vm, frame);
  918. }
  919. return vm->PyTuple(args);
  920. }
  921. void TupleRef::set(VM* vm, Frame* frame, PyVar val) const{
  922. if(!val->is_type(vm->_tp_tuple) && !val->is_type(vm->_tp_list)){
  923. vm->typeError("only tuple or list can be unpacked");
  924. }
  925. const PyVarList& args = UNION_GET(PyVarList, val);
  926. if(args.size() > varRefs.size()) vm->valueError("too many values to unpack");
  927. if(args.size() < varRefs.size()) vm->valueError("not enough values to unpack");
  928. for (int i = 0; i < varRefs.size(); i++) {
  929. vm->PyRef_AS_C(varRefs[i])->set(vm, frame, args[i]);
  930. }
  931. }
  932. void TupleRef::del(VM* vm, Frame* frame) const{
  933. for (auto& r : varRefs) vm->PyRef_AS_C(r)->del(vm, frame);
  934. }
  935. /***** Frame's Impl *****/
  936. inline void Frame::try_deref(VM* vm, PyVar& v){
  937. if(v->is_type(vm->_tp_ref)) v = vm->PyRef_AS_C(v)->get(vm, this);
  938. }
  939. /***** Iterators' Impl *****/
  940. PyVar RangeIterator::next(){
  941. PyVar val = vm->PyInt(current);
  942. current += r.step;
  943. return val;
  944. }
  945. PyVar StringIterator::next(){
  946. return vm->PyStr(str.u8_getitem(index++));
  947. }
  948. PyVar _CppFunc::operator()(VM* vm, const pkpy::ArgList& args) const{
  949. int args_size = args.size() - (int)method; // remove self
  950. if(argc != -1 && args_size != argc) {
  951. vm->typeError("expected " + std::to_string(argc) + " arguments, but got " + std::to_string(args_size));
  952. }
  953. return f(vm, args);
  954. }