vm.h 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081
  1. #pragma once
  2. #include "codeobject.h"
  3. #include "common.h"
  4. #include "frame.h"
  5. #include "error.h"
  6. #include "gc.h"
  7. #include "memory.h"
  8. #include "obj.h"
  9. #include "str.h"
  10. #include "tuplelist.h"
  11. #include <tuple>
  12. namespace pkpy{
  13. /* Stack manipulation macros */
  14. // https://github.com/python/cpython/blob/3.9/Python/ceval.c#L1123
  15. #define TOP() (s_data.top())
  16. #define SECOND() (s_data.second())
  17. #define THIRD() (s_data.third())
  18. #define PEEK(n) (s_data.peek(n))
  19. #define STACK_SHRINK(n) (s_data.shrink(n))
  20. #define PUSH(v) (s_data.push(v))
  21. #define POP() (s_data.pop())
  22. #define POPX() (s_data.popx())
  23. #define STACK_VIEW(n) (s_data.view(n))
  24. Str _read_file_cwd(const Str& name, bool* ok);
  25. #define DEF_NATIVE_2(ctype, ptype) \
  26. template<> inline ctype py_cast<ctype>(VM* vm, PyObject* obj) { \
  27. vm->check_type(obj, vm->ptype); \
  28. return OBJ_GET(ctype, obj); \
  29. } \
  30. template<> inline ctype _py_cast<ctype>(VM* vm, PyObject* obj) { \
  31. return OBJ_GET(ctype, obj); \
  32. } \
  33. template<> inline ctype& py_cast<ctype&>(VM* vm, PyObject* obj) { \
  34. vm->check_type(obj, vm->ptype); \
  35. return OBJ_GET(ctype, obj); \
  36. } \
  37. template<> inline ctype& _py_cast<ctype&>(VM* vm, PyObject* obj) { \
  38. return OBJ_GET(ctype, obj); \
  39. } \
  40. inline PyObject* py_var(VM* vm, const ctype& value) { return vm->heap.gcnew(vm->ptype, value);} \
  41. inline PyObject* py_var(VM* vm, ctype&& value) { return vm->heap.gcnew(vm->ptype, std::move(value));}
  42. class Generator final: public BaseIter {
  43. Frame frame;
  44. int state; // 0,1,2
  45. List s_backup;
  46. public:
  47. Generator(VM* vm, Frame&& frame, ArgsView buffer): BaseIter(vm), frame(std::move(frame)), state(0) {
  48. for(PyObject* obj: buffer) s_backup.push_back(obj);
  49. }
  50. PyObject* next() override;
  51. void _gc_mark() const override;
  52. };
  53. struct PyTypeInfo{
  54. PyObject* obj;
  55. Type base;
  56. Str name;
  57. };
  58. struct FrameId{
  59. std::vector<pkpy::Frame>* data;
  60. int index;
  61. FrameId(std::vector<pkpy::Frame>* data, int index) : data(data), index(index) {}
  62. Frame* operator->() const { return &data->operator[](index); }
  63. };
  64. class VM {
  65. VM* vm; // self reference for simplify code
  66. public:
  67. ManagedHeap heap;
  68. ValueStack s_data;
  69. stack< Frame > callstack;
  70. std::vector<PyTypeInfo> _all_types;
  71. NameDict _modules; // loaded modules
  72. std::map<StrName, Str> _lazy_modules; // lazy loaded modules
  73. PyObject* _py_null;
  74. PyObject* _py_begin_call;
  75. PyObject* _py_op_call;
  76. PyObject* _py_op_yield;
  77. PyObject* None;
  78. PyObject* True;
  79. PyObject* False;
  80. PyObject* Ellipsis;
  81. PyObject* builtins; // builtins module
  82. PyObject* _main; // __main__ module
  83. std::stringstream _stdout_buffer;
  84. std::stringstream _stderr_buffer;
  85. std::ostream* _stdout;
  86. std::ostream* _stderr;
  87. bool _initialized;
  88. // for quick access
  89. Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str;
  90. Type tp_list, tp_tuple;
  91. Type tp_function, tp_native_func, tp_iterator, tp_bound_method;
  92. Type tp_slice, tp_range, tp_module;
  93. Type tp_super, tp_exception;
  94. VM(bool use_stdio) : heap(this){
  95. this->vm = this;
  96. this->_stdout = use_stdio ? &std::cout : &_stdout_buffer;
  97. this->_stderr = use_stdio ? &std::cerr : &_stderr_buffer;
  98. callstack.reserve(8);
  99. _initialized = false;
  100. init_builtin_types();
  101. _initialized = true;
  102. }
  103. bool is_stdio_used() const { return _stdout == &std::cout; }
  104. FrameId top_frame() {
  105. #if DEBUG_EXTRA_CHECK
  106. if(callstack.empty()) FATAL_ERROR();
  107. #endif
  108. return FrameId(&callstack.data(), callstack.size()-1);
  109. }
  110. PyObject* asStr(PyObject* obj){
  111. PyObject* self;
  112. PyObject* f = get_unbound_method(obj, __str__, &self, false);
  113. if(self != _py_null) return call_method(self, f);
  114. return asRepr(obj);
  115. }
  116. PyObject* asIter(PyObject* obj){
  117. if(is_type(obj, tp_iterator)) return obj;
  118. PyObject* self;
  119. PyObject* iter_f = get_unbound_method(obj, __iter__, &self, false);
  120. if(self != _py_null) return call_method(self, iter_f);
  121. TypeError(OBJ_NAME(_t(obj)).escape() + " object is not iterable");
  122. return nullptr;
  123. }
  124. PyObject* asList(PyObject* it){
  125. if(is_non_tagged_type(it, tp_list)) return it;
  126. return call(_t(tp_list), it);
  127. }
  128. PyObject* find_name_in_mro(PyObject* cls, StrName name){
  129. PyObject* val;
  130. do{
  131. val = cls->attr().try_get(name);
  132. if(val != nullptr) return val;
  133. Type cls_t = OBJ_GET(Type, cls);
  134. Type base = _all_types[cls_t].base;
  135. if(base.index == -1) break;
  136. cls = _all_types[base].obj;
  137. }while(true);
  138. return nullptr;
  139. }
  140. bool isinstance(PyObject* obj, Type cls_t){
  141. Type obj_t = OBJ_GET(Type, _t(obj));
  142. do{
  143. if(obj_t == cls_t) return true;
  144. Type base = _all_types[obj_t].base;
  145. if(base.index == -1) break;
  146. obj_t = base;
  147. }while(true);
  148. return false;
  149. }
  150. PyObject* exec(Str source, Str filename, CompileMode mode, PyObject* _module=nullptr){
  151. if(_module == nullptr) _module = _main;
  152. try {
  153. CodeObject_ code = compile(source, filename, mode);
  154. #if DEBUG_DIS_EXEC
  155. if(_module == _main) std::cout << disassemble(code) << '\n';
  156. #endif
  157. return _exec(code, _module);
  158. }catch (const Exception& e){
  159. *_stderr << e.summary() << '\n';
  160. }
  161. #if !DEBUG_FULL_EXCEPTION
  162. catch (const std::exception& e) {
  163. *_stderr << "An std::exception occurred! It could be a bug.\n";
  164. *_stderr << e.what() << '\n';
  165. }
  166. #endif
  167. callstack.clear();
  168. s_data.clear();
  169. return nullptr;
  170. }
  171. template<typename ...Args>
  172. PyObject* _exec(Args&&... args){
  173. callstack.emplace(&s_data, s_data._sp, std::forward<Args>(args)...);
  174. return _run_top_frame();
  175. }
  176. void _pop_frame(){
  177. Frame* frame = &callstack.top();
  178. s_data.reset(frame->_sp_base);
  179. callstack.pop();
  180. }
  181. void _push_varargs(){ }
  182. void _push_varargs(PyObject* _0){ PUSH(_0); }
  183. void _push_varargs(PyObject* _0, PyObject* _1){ PUSH(_0); PUSH(_1); }
  184. void _push_varargs(PyObject* _0, PyObject* _1, PyObject* _2){ PUSH(_0); PUSH(_1); PUSH(_2); }
  185. void _push_varargs(PyObject* _0, PyObject* _1, PyObject* _2, PyObject* _3){ PUSH(_0); PUSH(_1); PUSH(_2); PUSH(_3); }
  186. template<typename... Args>
  187. PyObject* call(PyObject* callable, Args&&... args){
  188. PUSH(callable);
  189. PUSH(_py_null);
  190. _push_varargs(args...);
  191. return vectorcall(sizeof...(args));
  192. }
  193. template<typename... Args>
  194. PyObject* call_method(PyObject* self, PyObject* callable, Args&&... args){
  195. PUSH(callable);
  196. PUSH(self);
  197. _push_varargs(args...);
  198. return vectorcall(sizeof...(args));
  199. }
  200. template<typename... Args>
  201. PyObject* call_method(PyObject* self, StrName name, Args&&... args){
  202. PyObject* callable = get_unbound_method(self, name, &self);
  203. return call_method(self, callable, args...);
  204. }
  205. PyObject* property(NativeFuncRaw fget){
  206. PyObject* p = builtins->attr("property");
  207. PyObject* method = heap.gcnew(tp_native_func, NativeFunc(fget, 1, false));
  208. return call(p, method);
  209. }
  210. PyObject* new_type_object(PyObject* mod, StrName name, Type base){
  211. PyObject* obj = heap._new<Type>(tp_type, _all_types.size());
  212. PyTypeInfo info{
  213. obj,
  214. base,
  215. (mod!=nullptr && mod!=builtins) ? Str(OBJ_NAME(mod)+"."+name.sv()): name.sv()
  216. };
  217. if(mod != nullptr) mod->attr().set(name, obj);
  218. _all_types.push_back(info);
  219. return obj;
  220. }
  221. Type _new_type_object(StrName name, Type base=0) {
  222. PyObject* obj = new_type_object(nullptr, name, base);
  223. return OBJ_GET(Type, obj);
  224. }
  225. PyObject* _find_type(const Str& type){
  226. PyObject* obj = builtins->attr().try_get(type);
  227. if(obj == nullptr){
  228. for(auto& t: _all_types) if(t.name == type) return t.obj;
  229. throw std::runtime_error(fmt("type not found: ", type));
  230. }
  231. return obj;
  232. }
  233. template<int ARGC>
  234. void bind_func(Str type, Str name, NativeFuncRaw fn) {
  235. bind_func<ARGC>(_find_type(type), name, fn);
  236. }
  237. template<int ARGC>
  238. void bind_method(Str type, Str name, NativeFuncRaw fn) {
  239. bind_method<ARGC>(_find_type(type), name, fn);
  240. }
  241. template<int ARGC, typename... Args>
  242. void bind_static_method(Args&&... args) {
  243. bind_func<ARGC>(std::forward<Args>(args)...);
  244. }
  245. template<int ARGC>
  246. void _bind_methods(std::vector<Str> types, Str name, NativeFuncRaw fn) {
  247. for(auto& type: types) bind_method<ARGC>(type, name, fn);
  248. }
  249. template<int ARGC>
  250. void bind_builtin_func(Str name, NativeFuncRaw fn) {
  251. bind_func<ARGC>(builtins, name, fn);
  252. }
  253. int normalized_index(int index, int size){
  254. if(index < 0) index += size;
  255. if(index < 0 || index >= size){
  256. IndexError(std::to_string(index) + " not in [0, " + std::to_string(size) + ")");
  257. }
  258. return index;
  259. }
  260. template<typename P>
  261. PyObject* PyIter(P&& value) {
  262. static_assert(std::is_base_of_v<BaseIter, std::decay_t<P>>);
  263. return heap.gcnew<P>(tp_iterator, std::forward<P>(value));
  264. }
  265. BaseIter* PyIter_AS_C(PyObject* obj)
  266. {
  267. check_type(obj, tp_iterator);
  268. return static_cast<BaseIter*>(obj->value());
  269. }
  270. BaseIter* _PyIter_AS_C(PyObject* obj)
  271. {
  272. return static_cast<BaseIter*>(obj->value());
  273. }
  274. /***** Error Reporter *****/
  275. void _error(StrName name, const Str& msg){
  276. _error(Exception(name, msg));
  277. }
  278. void _raise(){
  279. bool ok = top_frame()->jump_to_exception_handler();
  280. if(ok) throw HandledException();
  281. else throw UnhandledException();
  282. }
  283. void StackOverflowError() { _error("StackOverflowError", ""); }
  284. void IOError(const Str& msg) { _error("IOError", msg); }
  285. void NotImplementedError(){ _error("NotImplementedError", ""); }
  286. void TypeError(const Str& msg){ _error("TypeError", msg); }
  287. void ZeroDivisionError(){ _error("ZeroDivisionError", "division by zero"); }
  288. void IndexError(const Str& msg){ _error("IndexError", msg); }
  289. void ValueError(const Str& msg){ _error("ValueError", msg); }
  290. void NameError(StrName name){ _error("NameError", fmt("name ", name.escape() + " is not defined")); }
  291. void AttributeError(PyObject* obj, StrName name){
  292. // OBJ_NAME calls getattr, which may lead to a infinite recursion
  293. _error("AttributeError", fmt("type ", OBJ_NAME(_t(obj)).escape(), " has no attribute ", name.escape()));
  294. }
  295. void AttributeError(Str msg){ _error("AttributeError", msg); }
  296. void check_type(PyObject* obj, Type type){
  297. if(is_type(obj, type)) return;
  298. TypeError("expected " + OBJ_NAME(_t(type)).escape() + ", but got " + OBJ_NAME(_t(obj)).escape());
  299. }
  300. PyObject* _t(Type t){
  301. return _all_types[t.index].obj;
  302. }
  303. PyObject* _t(PyObject* obj){
  304. if(is_int(obj)) return _t(tp_int);
  305. if(is_float(obj)) return _t(tp_float);
  306. return _all_types[OBJ_GET(Type, _t(obj->type)).index].obj;
  307. }
  308. ~VM() {
  309. callstack.clear();
  310. s_data.clear();
  311. _all_types.clear();
  312. _modules.clear();
  313. _lazy_modules.clear();
  314. }
  315. void _log_s_data(const char* title = nullptr);
  316. PyObject* vectorcall(int ARGC, int KWARGC=0, bool op_call=false);
  317. CodeObject_ compile(Str source, Str filename, CompileMode mode, bool unknown_global_scope=false);
  318. PyObject* num_negated(PyObject* obj);
  319. f64 num_to_float(PyObject* obj);
  320. bool asBool(PyObject* obj);
  321. i64 hash(PyObject* obj);
  322. PyObject* asRepr(PyObject* obj);
  323. PyObject* new_module(StrName name);
  324. Str disassemble(CodeObject_ co);
  325. void init_builtin_types();
  326. PyObject* _py_call(PyObject** sp_base, PyObject* callable, ArgsView args, ArgsView kwargs);
  327. PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true);
  328. PyObject* get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err=true, bool fallback=false);
  329. void setattr(PyObject* obj, StrName name, PyObject* value);
  330. template<int ARGC>
  331. void bind_method(PyObject*, Str, NativeFuncRaw);
  332. template<int ARGC>
  333. void bind_func(PyObject*, Str, NativeFuncRaw);
  334. void _error(Exception);
  335. PyObject* _run_top_frame();
  336. void post_init();
  337. };
  338. inline PyObject* NativeFunc::operator()(VM* vm, ArgsView args) const{
  339. int args_size = args.size() - (int)method; // remove self
  340. if(argc != -1 && args_size != argc) {
  341. vm->TypeError(fmt("expected ", argc, " arguments, but got ", args_size));
  342. }
  343. return f(vm, args);
  344. }
  345. inline void CodeObject::optimize(VM* vm){
  346. // uint32_t base_n = (uint32_t)(names.size() / kLocalsLoadFactor + 0.5);
  347. // perfect_locals_capacity = std::max(find_next_capacity(base_n), NameDict::__Capacity);
  348. // perfect_hash_seed = find_perfect_hash_seed(perfect_locals_capacity, names);
  349. }
  350. DEF_NATIVE_2(Str, tp_str)
  351. DEF_NATIVE_2(List, tp_list)
  352. DEF_NATIVE_2(Tuple, tp_tuple)
  353. DEF_NATIVE_2(Function, tp_function)
  354. DEF_NATIVE_2(NativeFunc, tp_native_func)
  355. DEF_NATIVE_2(BoundMethod, tp_bound_method)
  356. DEF_NATIVE_2(Range, tp_range)
  357. DEF_NATIVE_2(Slice, tp_slice)
  358. DEF_NATIVE_2(Exception, tp_exception)
  359. #define PY_CAST_INT(T) \
  360. template<> inline T py_cast<T>(VM* vm, PyObject* obj){ \
  361. vm->check_type(obj, vm->tp_int); \
  362. return (T)(BITS(obj) >> 2); \
  363. } \
  364. template<> inline T _py_cast<T>(VM* vm, PyObject* obj){ \
  365. return (T)(BITS(obj) >> 2); \
  366. }
  367. PY_CAST_INT(char)
  368. PY_CAST_INT(short)
  369. PY_CAST_INT(int)
  370. PY_CAST_INT(long)
  371. PY_CAST_INT(long long)
  372. PY_CAST_INT(unsigned char)
  373. PY_CAST_INT(unsigned short)
  374. PY_CAST_INT(unsigned int)
  375. PY_CAST_INT(unsigned long)
  376. PY_CAST_INT(unsigned long long)
  377. template<> inline float py_cast<float>(VM* vm, PyObject* obj){
  378. vm->check_type(obj, vm->tp_float);
  379. i64 bits = BITS(obj);
  380. bits = (bits >> 2) << 2;
  381. return BitsCvt(bits)._float;
  382. }
  383. template<> inline float _py_cast<float>(VM* vm, PyObject* obj){
  384. i64 bits = BITS(obj);
  385. bits = (bits >> 2) << 2;
  386. return BitsCvt(bits)._float;
  387. }
  388. template<> inline double py_cast<double>(VM* vm, PyObject* obj){
  389. vm->check_type(obj, vm->tp_float);
  390. i64 bits = BITS(obj);
  391. bits = (bits >> 2) << 2;
  392. return BitsCvt(bits)._float;
  393. }
  394. template<> inline double _py_cast<double>(VM* vm, PyObject* obj){
  395. i64 bits = BITS(obj);
  396. bits = (bits >> 2) << 2;
  397. return BitsCvt(bits)._float;
  398. }
  399. #define PY_VAR_INT(T) \
  400. inline PyObject* py_var(VM* vm, T _val){ \
  401. i64 val = static_cast<i64>(_val); \
  402. if(((val << 2) >> 2) != val){ \
  403. vm->_error("OverflowError", std::to_string(val) + " is out of range"); \
  404. } \
  405. val = (val << 2) | 0b01; \
  406. return reinterpret_cast<PyObject*>(val); \
  407. }
  408. PY_VAR_INT(char)
  409. PY_VAR_INT(short)
  410. PY_VAR_INT(int)
  411. PY_VAR_INT(long)
  412. PY_VAR_INT(long long)
  413. PY_VAR_INT(unsigned char)
  414. PY_VAR_INT(unsigned short)
  415. PY_VAR_INT(unsigned int)
  416. PY_VAR_INT(unsigned long)
  417. PY_VAR_INT(unsigned long long)
  418. #define PY_VAR_FLOAT(T) \
  419. inline PyObject* py_var(VM* vm, T _val){ \
  420. f64 val = static_cast<f64>(_val); \
  421. i64 bits = BitsCvt(val)._int; \
  422. bits = (bits >> 2) << 2; \
  423. bits |= 0b10; \
  424. return reinterpret_cast<PyObject*>(bits); \
  425. }
  426. PY_VAR_FLOAT(float)
  427. PY_VAR_FLOAT(double)
  428. inline PyObject* py_var(VM* vm, bool val){
  429. return val ? vm->True : vm->False;
  430. }
  431. template<> inline bool py_cast<bool>(VM* vm, PyObject* obj){
  432. vm->check_type(obj, vm->tp_bool);
  433. return obj == vm->True;
  434. }
  435. template<> inline bool _py_cast<bool>(VM* vm, PyObject* obj){
  436. return obj == vm->True;
  437. }
  438. inline PyObject* py_var(VM* vm, const char val[]){
  439. return VAR(Str(val));
  440. }
  441. inline PyObject* py_var(VM* vm, std::string val){
  442. return VAR(Str(std::move(val)));
  443. }
  444. inline PyObject* py_var(VM* vm, std::string_view val){
  445. return VAR(Str(val));
  446. }
  447. template<typename T>
  448. void _check_py_class(VM* vm, PyObject* obj){
  449. vm->check_type(obj, T::_type(vm));
  450. }
  451. inline PyObject* VM::num_negated(PyObject* obj){
  452. if (is_int(obj)){
  453. return VAR(-CAST(i64, obj));
  454. }else if(is_float(obj)){
  455. return VAR(-CAST(f64, obj));
  456. }
  457. TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape());
  458. return nullptr;
  459. }
  460. inline f64 VM::num_to_float(PyObject* obj){
  461. if(is_float(obj)){
  462. return CAST(f64, obj);
  463. } else if (is_int(obj)){
  464. return (f64)CAST(i64, obj);
  465. }
  466. TypeError("expected 'int' or 'float', got " + OBJ_NAME(_t(obj)).escape());
  467. return 0;
  468. }
  469. inline bool VM::asBool(PyObject* obj){
  470. if(is_non_tagged_type(obj, tp_bool)) return obj == True;
  471. if(obj == None) return false;
  472. if(is_int(obj)) return CAST(i64, obj) != 0;
  473. if(is_float(obj)) return CAST(f64, obj) != 0.0;
  474. PyObject* self;
  475. PyObject* len_f = get_unbound_method(obj, __len__, &self, false);
  476. if(self != _py_null){
  477. PyObject* ret = call_method(self, len_f);
  478. return CAST(i64, ret) > 0;
  479. }
  480. return true;
  481. }
  482. inline i64 VM::hash(PyObject* obj){
  483. if (is_non_tagged_type(obj, tp_str)) return CAST(Str&, obj).hash();
  484. if (is_int(obj)) return CAST(i64, obj);
  485. if (is_non_tagged_type(obj, tp_tuple)) {
  486. i64 x = 1000003;
  487. const Tuple& items = CAST(Tuple&, obj);
  488. for (int i=0; i<items.size(); i++) {
  489. i64 y = hash(items[i]);
  490. // recommended by Github Copilot
  491. x = x ^ (y + 0x9e3779b9 + (x << 6) + (x >> 2));
  492. }
  493. return x;
  494. }
  495. if (is_non_tagged_type(obj, tp_type)) return BITS(obj);
  496. if (is_non_tagged_type(obj, tp_bool)) return _CAST(bool, obj) ? 1 : 0;
  497. if (is_float(obj)){
  498. f64 val = CAST(f64, obj);
  499. return (i64)std::hash<f64>()(val);
  500. }
  501. TypeError("unhashable type: " + OBJ_NAME(_t(obj)).escape());
  502. return 0;
  503. }
  504. inline PyObject* VM::asRepr(PyObject* obj){
  505. return call_method(obj, __repr__);
  506. }
  507. inline PyObject* VM::new_module(StrName name) {
  508. PyObject* obj = heap._new<DummyModule>(tp_module, DummyModule());
  509. obj->attr().set(__name__, VAR(name.sv()));
  510. // we do not allow override in order to avoid memory leak
  511. // it is because Module objects are not garbage collected
  512. if(_modules.contains(name)) FATAL_ERROR();
  513. _modules.set(name, obj);
  514. return obj;
  515. }
  516. inline std::string _opcode_argstr(VM* vm, Bytecode byte, const CodeObject* co){
  517. std::string argStr = byte.arg == -1 ? "" : std::to_string(byte.arg);
  518. switch(byte.op){
  519. case OP_LOAD_CONST:
  520. if(vm != nullptr){
  521. argStr += fmt(" (", CAST(Str, vm->asRepr(co->consts[byte.arg])), ")");
  522. }
  523. break;
  524. case OP_LOAD_NAME: case OP_LOAD_GLOBAL: case OP_LOAD_NONLOCAL: case OP_STORE_GLOBAL:
  525. case OP_LOAD_ATTR: case OP_LOAD_METHOD: case OP_STORE_ATTR: case OP_DELETE_ATTR:
  526. case OP_IMPORT_NAME: case OP_BEGIN_CLASS:
  527. case OP_DELETE_GLOBAL:
  528. argStr += fmt(" (", StrName(byte.arg).sv(), ")");
  529. break;
  530. case OP_LOAD_FAST: case OP_STORE_FAST: case OP_DELETE_FAST:
  531. argStr += fmt(" (", co->varnames[byte.arg].sv(), ")");
  532. break;
  533. case OP_BINARY_OP:
  534. argStr += fmt(" (", BINARY_SPECIAL_METHODS[byte.arg], ")");
  535. break;
  536. case OP_LOAD_FUNCTION:
  537. argStr += fmt(" (", co->func_decls[byte.arg]->code->name, ")");
  538. break;
  539. }
  540. return argStr;
  541. }
  542. inline Str VM::disassemble(CodeObject_ co){
  543. auto pad = [](const Str& s, const int n){
  544. if(s.length() >= n) return s.substr(0, n);
  545. return s + std::string(n - s.length(), ' ');
  546. };
  547. std::vector<int> jumpTargets;
  548. for(auto byte : co->codes){
  549. if(byte.op == OP_JUMP_ABSOLUTE || byte.op == OP_POP_JUMP_IF_FALSE){
  550. jumpTargets.push_back(byte.arg);
  551. }
  552. }
  553. std::stringstream ss;
  554. int prev_line = -1;
  555. for(int i=0; i<co->codes.size(); i++){
  556. const Bytecode& byte = co->codes[i];
  557. Str line = std::to_string(co->lines[i]);
  558. if(co->lines[i] == prev_line) line = "";
  559. else{
  560. if(prev_line != -1) ss << "\n";
  561. prev_line = co->lines[i];
  562. }
  563. std::string pointer;
  564. if(std::find(jumpTargets.begin(), jumpTargets.end(), i) != jumpTargets.end()){
  565. pointer = "-> ";
  566. }else{
  567. pointer = " ";
  568. }
  569. ss << pad(line, 8) << pointer << pad(std::to_string(i), 3);
  570. ss << " " << pad(OP_NAMES[byte.op], 20) << " ";
  571. // ss << pad(byte.arg == -1 ? "" : std::to_string(byte.arg), 5);
  572. std::string argStr = _opcode_argstr(this, byte, co.get());
  573. ss << pad(argStr, 40); // may overflow
  574. ss << co->blocks[byte.block].type;
  575. if(i != co->codes.size() - 1) ss << '\n';
  576. }
  577. for(auto& decl: co->func_decls){
  578. ss << "\n\n" << "Disassembly of " << decl->code->name << ":\n";
  579. ss << disassemble(decl->code);
  580. }
  581. ss << "\n";
  582. return Str(ss.str());
  583. }
  584. inline void VM::_log_s_data(const char* title) {
  585. if(!_initialized) return;
  586. if(callstack.empty()) return;
  587. std::stringstream ss;
  588. if(title) ss << title << " | ";
  589. std::map<PyObject**, int> sp_bases;
  590. for(Frame& f: callstack.data()){
  591. if(f._sp_base == nullptr) FATAL_ERROR();
  592. sp_bases[f._sp_base] += 1;
  593. }
  594. FrameId frame = top_frame();
  595. int line = frame->co->lines[frame->_ip];
  596. ss << frame->co->name << ":" << line << " [";
  597. for(PyObject** p=s_data.begin(); p!=s_data.end(); p++){
  598. ss << std::string(sp_bases[p], '|');
  599. if(sp_bases[p] > 0) ss << " ";
  600. PyObject* obj = *p;
  601. if(obj == nullptr) ss << "(nil)";
  602. else if(obj == _py_begin_call) ss << "BEGIN_CALL";
  603. else if(obj == _py_null) ss << "NULL";
  604. else if(is_int(obj)) ss << CAST(i64, obj);
  605. else if(is_float(obj)) ss << CAST(f64, obj);
  606. else if(is_type(obj, tp_str)) ss << CAST(Str, obj).escape();
  607. else if(obj == None) ss << "None";
  608. else if(obj == True) ss << "True";
  609. else if(obj == False) ss << "False";
  610. else if(is_type(obj, tp_function)){
  611. auto& f = CAST(Function&, obj);
  612. ss << f.decl->code->name << "(...)";
  613. } else if(is_type(obj, tp_type)){
  614. Type t = OBJ_GET(Type, obj);
  615. ss << "<class " + _all_types[t].name.escape() + ">";
  616. } else if(is_type(obj, tp_list)){
  617. auto& t = CAST(List&, obj);
  618. ss << "list(size=" << t.size() << ")";
  619. } else if(is_type(obj, tp_tuple)){
  620. auto& t = CAST(Tuple&, obj);
  621. ss << "tuple(size=" << t.size() << ")";
  622. } else ss << "(" << obj_type_name(this, obj->type) << ")";
  623. ss << ", ";
  624. }
  625. std::string output = ss.str();
  626. if(!s_data.empty()) {
  627. output.pop_back(); output.pop_back();
  628. }
  629. output.push_back(']');
  630. Bytecode byte = frame->co->codes[frame->_ip];
  631. std::cout << output << " " << OP_NAMES[byte.op] << " " << _opcode_argstr(nullptr, byte, frame->co) << std::endl;
  632. }
  633. inline void VM::init_builtin_types(){
  634. _all_types.push_back({heap._new<Type>(Type(1), Type(0)), -1, "object"});
  635. _all_types.push_back({heap._new<Type>(Type(1), Type(1)), 0, "type"});
  636. tp_object = 0; tp_type = 1;
  637. tp_int = _new_type_object("int");
  638. tp_float = _new_type_object("float");
  639. if(tp_int.index != kTpIntIndex || tp_float.index != kTpFloatIndex) FATAL_ERROR();
  640. tp_bool = _new_type_object("bool");
  641. tp_str = _new_type_object("str");
  642. tp_list = _new_type_object("list");
  643. tp_tuple = _new_type_object("tuple");
  644. tp_slice = _new_type_object("slice");
  645. tp_range = _new_type_object("range");
  646. tp_module = _new_type_object("module");
  647. tp_function = _new_type_object("function");
  648. tp_native_func = _new_type_object("native_func");
  649. tp_iterator = _new_type_object("iterator");
  650. tp_bound_method = _new_type_object("bound_method");
  651. tp_super = _new_type_object("super");
  652. tp_exception = _new_type_object("Exception");
  653. this->None = heap._new<Dummy>(_new_type_object("NoneType"), {});
  654. this->Ellipsis = heap._new<Dummy>(_new_type_object("ellipsis"), {});
  655. this->True = heap._new<Dummy>(tp_bool, {});
  656. this->False = heap._new<Dummy>(tp_bool, {});
  657. Type _internal_type = _new_type_object("_internal");
  658. this->_py_null = heap._new<Dummy>(_internal_type, {});
  659. this->_py_begin_call = heap._new<Dummy>(_internal_type, {});
  660. this->_py_op_call = heap._new<Dummy>(_internal_type, {});
  661. this->_py_op_yield = heap._new<Dummy>(_internal_type, {});
  662. this->builtins = new_module("builtins");
  663. this->_main = new_module("__main__");
  664. // setup public types
  665. builtins->attr().set("type", _t(tp_type));
  666. builtins->attr().set("object", _t(tp_object));
  667. builtins->attr().set("bool", _t(tp_bool));
  668. builtins->attr().set("int", _t(tp_int));
  669. builtins->attr().set("float", _t(tp_float));
  670. builtins->attr().set("str", _t(tp_str));
  671. builtins->attr().set("list", _t(tp_list));
  672. builtins->attr().set("tuple", _t(tp_tuple));
  673. builtins->attr().set("range", _t(tp_range));
  674. post_init();
  675. for(int i=0; i<_all_types.size(); i++){
  676. _all_types[i].obj->attr()._try_perfect_rehash();
  677. }
  678. for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash();
  679. }
  680. inline PyObject* VM::vectorcall(int ARGC, int KWARGC, bool op_call){
  681. bool is_varargs = ARGC == 0xFFFF;
  682. PyObject** p0;
  683. PyObject** p1 = s_data._sp - KWARGC*2;
  684. if(is_varargs){
  685. p0 = p1 - 1;
  686. while(*p0 != _py_begin_call) p0--;
  687. // [BEGIN_CALL, callable, <self>, args..., kwargs...]
  688. // ^p0 ^p1 ^_sp
  689. ARGC = p1 - (p0 + 3);
  690. }else{
  691. p0 = p1 - ARGC - 2 - (int)is_varargs;
  692. // [callable, <self>, args..., kwargs...]
  693. // ^p0 ^p1 ^_sp
  694. }
  695. PyObject* callable = p1[-(ARGC + 2)];
  696. bool method_call = p1[-(ARGC + 1)] != _py_null;
  697. // handle boundmethod, do a patch
  698. if(is_non_tagged_type(callable, tp_bound_method)){
  699. if(method_call) FATAL_ERROR();
  700. auto& bm = CAST(BoundMethod&, callable);
  701. callable = bm.method; // get unbound method
  702. p1[-(ARGC + 2)] = bm.method;
  703. p1[-(ARGC + 1)] = bm.obj;
  704. method_call = true;
  705. // [unbound, self, args..., kwargs...]
  706. }
  707. ArgsView args(p1 - ARGC - int(method_call), p1);
  708. if(is_non_tagged_type(callable, tp_native_func)){
  709. const auto& f = OBJ_GET(NativeFunc, callable);
  710. if(KWARGC != 0) TypeError("native_func does not accept keyword arguments");
  711. PyObject* ret = f(this, args);
  712. s_data.reset(p0);
  713. return ret;
  714. }
  715. ArgsView kwargs(p1, s_data._sp);
  716. if(is_non_tagged_type(callable, tp_function)){
  717. // ret is nullptr or a generator
  718. PyObject* ret = _py_call(p0, callable, args, kwargs);
  719. // stack resetting is handled by _py_call
  720. if(ret != nullptr) return ret;
  721. if(op_call) return _py_op_call;
  722. return _run_top_frame();
  723. }
  724. if(is_non_tagged_type(callable, tp_type)){
  725. if(method_call) FATAL_ERROR();
  726. // [type, NULL, args..., kwargs...]
  727. // TODO: derived __new__ ?
  728. PyObject* new_f = callable->attr().try_get(__new__);
  729. PyObject* obj;
  730. if(new_f != nullptr){
  731. PUSH(new_f);
  732. PUSH(_py_null);
  733. for(PyObject* obj: args) PUSH(obj);
  734. for(PyObject* obj: kwargs) PUSH(obj);
  735. obj = vectorcall(ARGC, KWARGC);
  736. if(!isinstance(obj, OBJ_GET(Type, callable))) return obj;
  737. }else{
  738. obj = heap.gcnew<DummyInstance>(OBJ_GET(Type, callable), {});
  739. }
  740. PyObject* self;
  741. callable = get_unbound_method(obj, __init__, &self, false);
  742. if (self != _py_null) {
  743. // replace `NULL` with `self`
  744. p1[-(ARGC + 2)] = callable;
  745. p1[-(ARGC + 1)] = self;
  746. // [init_f, self, args..., kwargs...]
  747. vectorcall(ARGC, KWARGC);
  748. // We just discard the return value of `__init__`
  749. // in cpython it raises a TypeError if the return value is not None
  750. }else{
  751. // manually reset the stack
  752. s_data.reset(p0);
  753. }
  754. return obj;
  755. }
  756. // handle `__call__` overload
  757. PyObject* self;
  758. PyObject* call_f = get_unbound_method(callable, __call__, &self, false);
  759. if(self != _py_null){
  760. p1[-(ARGC + 2)] = call_f;
  761. p1[-(ARGC + 1)] = self;
  762. // [call_f, self, args..., kwargs...]
  763. return vectorcall(ARGC, KWARGC, false);
  764. }
  765. TypeError(OBJ_NAME(_t(callable)).escape() + " object is not callable");
  766. return nullptr;
  767. }
  768. inline PyObject* VM::_py_call(PyObject** p0, PyObject* callable, ArgsView args, ArgsView kwargs){
  769. // callable must be a `function` object
  770. if(s_data.is_overflow()) StackOverflowError();
  771. const Function& fn = CAST(Function&, callable);
  772. const CodeObject* co = fn.decl->code.get();
  773. PyObject* _module = fn._module != nullptr ? fn._module : callstack.top()._module;
  774. if(args.size() < fn.decl->args.size()){
  775. vm->TypeError(fmt(
  776. "expected ",
  777. fn.decl->args.size(),
  778. " positional arguments, but got ",
  779. args.size(),
  780. " (", fn.decl->code->name, ')'
  781. ));
  782. }
  783. // if this function is simple, a.k.a, no kwargs and no *args and not a generator
  784. // we can use a fast path to avoid using buffer copy
  785. if(fn.is_simple){
  786. #if DEBUG_EXTRA_CHECK
  787. for(PyObject** p=p0; p<args.begin(); p++) *p = nullptr;
  788. #endif
  789. int spaces = co->varnames.size() - fn.decl->args.size();
  790. for(int j=0; j<spaces; j++) PUSH(nullptr);
  791. callstack.emplace(&s_data, p0, co, _module, callable, FastLocals(co, args.begin()));
  792. return nullptr;
  793. }
  794. int i = 0;
  795. static THREAD_LOCAL PyObject* buffer[PK_MAX_CO_VARNAMES];
  796. // prepare args
  797. for(int index: fn.decl->args) buffer[index] = args[i++];
  798. // set extra varnames to nullptr
  799. for(int j=i; j<co->varnames.size(); j++) buffer[j] = nullptr;
  800. // prepare kwdefaults
  801. for(auto& kv: fn.decl->kwargs) buffer[kv.key] = kv.value;
  802. // handle *args
  803. if(fn.decl->starred_arg != -1){
  804. List vargs; // handle *args
  805. while(i < args.size()) vargs.push_back(args[i++]);
  806. buffer[fn.decl->starred_arg] = VAR(Tuple(std::move(vargs)));
  807. }else{
  808. // kwdefaults override
  809. for(auto& kv: fn.decl->kwargs){
  810. if(i < args.size()){
  811. buffer[kv.key] = args[i++];
  812. }else{
  813. break;
  814. }
  815. }
  816. if(i < args.size()) TypeError(fmt("too many arguments", " (", fn.decl->code->name, ')'));
  817. }
  818. for(int i=0; i<kwargs.size(); i+=2){
  819. StrName key = CAST(int, kwargs[i]);
  820. int index = co->varnames_inv.try_get(key);
  821. if(index<0) TypeError(fmt(key.escape(), " is an invalid keyword argument for ", co->name, "()"));
  822. buffer[index] = kwargs[i+1];
  823. }
  824. s_data.reset(p0);
  825. if(co->is_generator){
  826. PyObject* ret = PyIter(Generator(
  827. this,
  828. Frame(&s_data, nullptr, co, _module, callable),
  829. ArgsView(buffer, buffer + co->varnames.size())
  830. ));
  831. return ret;
  832. }
  833. // copy buffer to stack
  834. for(int i=0; i<co->varnames.size(); i++) PUSH(buffer[i]);
  835. callstack.emplace(&s_data, p0, co, _module, callable);
  836. return nullptr;
  837. }
  838. // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance
  839. inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err){
  840. PyObject* objtype = _t(obj);
  841. // handle super() proxy
  842. if(is_non_tagged_type(obj, tp_super)){
  843. const Super& super = OBJ_GET(Super, obj);
  844. obj = super.first;
  845. objtype = _t(super.second);
  846. }
  847. PyObject* cls_var = find_name_in_mro(objtype, name);
  848. if(cls_var != nullptr){
  849. // handle descriptor
  850. PyObject* descr_get = _t(cls_var)->attr().try_get(__get__);
  851. if(descr_get != nullptr) return call_method(cls_var, descr_get, obj);
  852. }
  853. // handle instance __dict__
  854. if(!is_tagged(obj) && obj->is_attr_valid()){
  855. PyObject* val = obj->attr().try_get(name);
  856. if(val != nullptr) return val;
  857. }
  858. if(cls_var != nullptr){
  859. // bound method is non-data descriptor
  860. if(is_non_tagged_type(cls_var, tp_function) || is_non_tagged_type(cls_var, tp_native_func)){
  861. return VAR(BoundMethod(obj, cls_var));
  862. }
  863. return cls_var;
  864. }
  865. if(throw_err) AttributeError(obj, name);
  866. return nullptr;
  867. }
  868. // used by OP_LOAD_METHOD
  869. // try to load a unbound method (fallback to `getattr` if not found)
  870. inline PyObject* VM::get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err, bool fallback){
  871. *self = _py_null;
  872. PyObject* objtype = _t(obj);
  873. // handle super() proxy
  874. if(is_non_tagged_type(obj, tp_super)){
  875. const Super& super = OBJ_GET(Super, obj);
  876. obj = super.first;
  877. objtype = _t(super.second);
  878. }
  879. PyObject* cls_var = find_name_in_mro(objtype, name);
  880. if(fallback){
  881. if(cls_var != nullptr){
  882. // handle descriptor
  883. PyObject* descr_get = _t(cls_var)->attr().try_get(__get__);
  884. if(descr_get != nullptr) return call_method(cls_var, descr_get, obj);
  885. }
  886. // handle instance __dict__
  887. if(!is_tagged(obj) && obj->is_attr_valid()){
  888. PyObject* val = obj->attr().try_get(name);
  889. if(val != nullptr) return val;
  890. }
  891. }
  892. if(cls_var != nullptr){
  893. if(is_non_tagged_type(cls_var, tp_function) || is_non_tagged_type(cls_var, tp_native_func)){
  894. *self = obj;
  895. }
  896. return cls_var;
  897. }
  898. if(throw_err) AttributeError(obj, name);
  899. return nullptr;
  900. }
  901. inline void VM::setattr(PyObject* obj, StrName name, PyObject* value){
  902. PyObject* objtype = _t(obj);
  903. // handle super() proxy
  904. if(is_non_tagged_type(obj, tp_super)){
  905. Super& super = OBJ_GET(Super, obj);
  906. obj = super.first;
  907. objtype = _t(super.second);
  908. }
  909. PyObject* cls_var = find_name_in_mro(objtype, name);
  910. if(cls_var != nullptr){
  911. // handle descriptor
  912. PyObject* cls_var_t = _t(cls_var);
  913. if(cls_var_t->attr().contains(__get__)){
  914. PyObject* descr_set = cls_var_t->attr().try_get(__set__);
  915. if(descr_set != nullptr){
  916. call_method(cls_var, descr_set, obj, value);
  917. }else{
  918. TypeError(fmt("readonly attribute: ", name.escape()));
  919. }
  920. return;
  921. }
  922. }
  923. // handle instance __dict__
  924. if(is_tagged(obj) || !obj->is_attr_valid()) TypeError("cannot set attribute");
  925. obj->attr().set(name, value);
  926. }
  927. template<int ARGC>
  928. void VM::bind_method(PyObject* obj, Str name, NativeFuncRaw fn) {
  929. check_type(obj, tp_type);
  930. obj->attr().set(name, VAR(NativeFunc(fn, ARGC, true)));
  931. }
  932. template<int ARGC>
  933. void VM::bind_func(PyObject* obj, Str name, NativeFuncRaw fn) {
  934. obj->attr().set(name, VAR(NativeFunc(fn, ARGC, false)));
  935. }
  936. inline void VM::_error(Exception e){
  937. if(callstack.empty()){
  938. e.is_re = false;
  939. throw e;
  940. }
  941. PUSH(VAR(e));
  942. _raise();
  943. }
  944. inline void ManagedHeap::mark() {
  945. for(PyObject* obj: _no_gc) OBJ_MARK(obj);
  946. for(auto& frame : vm->callstack.data()) frame._gc_mark();
  947. // TODO: avoid use nullptr?
  948. for(PyObject* obj: vm->s_data) if(obj != nullptr) OBJ_MARK(obj);
  949. }
  950. inline Str obj_type_name(VM *vm, Type type){
  951. return vm->_all_types[type].name;
  952. }
  953. #undef PY_VAR_INT
  954. #undef PY_VAR_FLOAT
  955. } // namespace pkpy