codeobject.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. #pragma once
  2. #include "obj.h"
  3. #include "error.h"
  4. namespace pkpy{
  5. enum NameScope { NAME_LOCAL, NAME_GLOBAL, NAME_GLOBAL_UNKNOWN };
  6. enum Opcode {
  7. #define OPCODE(name) OP_##name,
  8. #include "opcodes.h"
  9. #undef OPCODE
  10. };
  11. inline const char* OP_NAMES[] = {
  12. #define OPCODE(name) #name,
  13. #include "opcodes.h"
  14. #undef OPCODE
  15. };
  16. struct Bytecode{
  17. uint16_t op;
  18. uint16_t block;
  19. int arg;
  20. };
  21. enum CodeBlockType {
  22. NO_BLOCK,
  23. FOR_LOOP,
  24. WHILE_LOOP,
  25. CONTEXT_MANAGER,
  26. TRY_EXCEPT,
  27. };
  28. #define BC_NOARG -1
  29. #define BC_KEEPLINE -1
  30. struct CodeBlock {
  31. CodeBlockType type;
  32. int parent; // parent index in blocks
  33. int for_loop_depth; // this is used for exception handling
  34. int start; // start index of this block in codes, inclusive
  35. int end; // end index of this block in codes, exclusive
  36. CodeBlock(CodeBlockType type, int parent, int for_loop_depth, int start):
  37. type(type), parent(parent), for_loop_depth(for_loop_depth), start(start), end(-1) {}
  38. };
  39. struct CodeObjectSerializer{
  40. std::string buffer;
  41. int depth = 0;
  42. std::set<StrName> names;
  43. static const char END = '\n';
  44. CodeObjectSerializer(){
  45. write_str(PK_VERSION);
  46. }
  47. void write_int(i64 v){
  48. buffer += 'i';
  49. buffer += std::to_string(v);
  50. buffer += END;
  51. }
  52. void write_float(f64 v){
  53. buffer += 'f';
  54. buffer += std::to_string(v);
  55. buffer += END;
  56. }
  57. void write_str(const Str& v){
  58. buffer += 's';
  59. buffer += v.escape(false).str();
  60. buffer += END;
  61. }
  62. void write_none(){
  63. buffer += 'N';
  64. buffer += END;
  65. }
  66. void write_ellipsis(){
  67. buffer += 'E';
  68. buffer += END;
  69. }
  70. void write_bool(bool v){
  71. buffer += 'b';
  72. buffer += v ? '1' : '0';
  73. buffer += END;
  74. }
  75. void write_begin_mark(){
  76. buffer += '[';
  77. buffer += END;
  78. depth++;
  79. }
  80. void write_name(StrName name){
  81. PK_ASSERT(StrName::is_valid(name.index));
  82. buffer += 'n';
  83. buffer += std::to_string(name.index);
  84. buffer += END;
  85. names.insert(name);
  86. }
  87. void write_end_mark(){
  88. buffer += ']';
  89. buffer += END;
  90. depth--;
  91. PK_ASSERT(depth >= 0);
  92. }
  93. template<typename T>
  94. void write_bytes(T v){
  95. static_assert(std::is_trivially_copyable<T>::value);
  96. buffer += 'x';
  97. char* p = (char*)&v;
  98. for(int i=0; i<sizeof(T); i++){
  99. char c = p[i];
  100. buffer += "0123456789abcdef"[(c >> 4) & 0xf];
  101. buffer += "0123456789abcdef"[c & 0xf];
  102. }
  103. buffer += END;
  104. }
  105. void write_object(VM* vm, PyObject* obj);
  106. void write_code(VM* vm, const CodeObject* co);
  107. std::string str(){
  108. PK_ASSERT(depth == 0);
  109. for(auto name: names){
  110. PK_ASSERT(StrName::is_valid(name.index));
  111. write_name(name);
  112. write_str(name.sv());
  113. }
  114. return std::move(buffer);
  115. }
  116. };
  117. struct CodeObject {
  118. shared_ptr<SourceData> src;
  119. Str name;
  120. bool is_generator = false;
  121. CodeObject(shared_ptr<SourceData> src, const Str& name):
  122. src(src), name(name) {}
  123. std::vector<Bytecode> codes;
  124. std::vector<int> lines; // line number for each bytecode
  125. List consts;
  126. std::vector<StrName> varnames; // local variables
  127. NameDictInt varnames_inv;
  128. std::vector<CodeBlock> blocks = { CodeBlock(NO_BLOCK, -1, 0, 0) };
  129. NameDictInt labels;
  130. std::vector<FuncDecl_> func_decls;
  131. void _gc_mark() const {
  132. for(PyObject* v : consts) PK_OBJ_MARK(v);
  133. for(auto& decl: func_decls) decl->_gc_mark();
  134. }
  135. void write(VM* vm, CodeObjectSerializer& ss) const{
  136. ss.write_begin_mark(); // [
  137. ss.write_str(src->filename); // src->filename
  138. ss.write_int(src->mode); // src->mode
  139. ss.write_end_mark(); // ]
  140. ss.write_str(name); // name
  141. ss.write_bool(is_generator); // is_generator
  142. ss.write_begin_mark(); // [
  143. for(Bytecode bc: codes){
  144. if(StrName::is_valid(bc.arg)) ss.names.insert(StrName(bc.arg));
  145. ss.write_bytes(bc);
  146. }
  147. ss.write_end_mark(); // ]
  148. ss.write_begin_mark(); // [
  149. for(int line: lines){
  150. ss.write_int(line); // line
  151. }
  152. ss.write_end_mark(); // ]
  153. ss.write_begin_mark(); // [
  154. for(PyObject* o: consts){
  155. ss.write_object(vm, o);
  156. }
  157. ss.write_end_mark(); // ]
  158. ss.write_begin_mark(); // [
  159. for(StrName vn: varnames){
  160. ss.write_name(vn); // name
  161. }
  162. ss.write_end_mark(); // ]
  163. ss.write_begin_mark(); // [
  164. for(CodeBlock block: blocks){
  165. ss.write_bytes(block); // block
  166. }
  167. ss.write_end_mark(); // ]
  168. ss.write_begin_mark(); // [
  169. for(auto& label: labels.items()){
  170. ss.write_name(label.first); // label.first
  171. ss.write_int(label.second); // label.second
  172. }
  173. ss.write_end_mark(); // ]
  174. ss.write_begin_mark(); // [
  175. for(auto& decl: func_decls){
  176. ss.write_code(vm, decl->code.get()); // decl->code
  177. ss.write_begin_mark(); // [
  178. for(int arg: decl->args) ss.write_int(arg);
  179. ss.write_end_mark(); // ]
  180. ss.write_begin_mark(); // [
  181. for(auto kw: decl->kwargs){
  182. ss.write_int(kw.key); // kw.key
  183. ss.write_object(vm, kw.value); // kw.value
  184. }
  185. ss.write_end_mark(); // ]
  186. ss.write_int(decl->starred_arg);
  187. ss.write_int(decl->starred_kwarg);
  188. ss.write_bool(decl->nested);
  189. }
  190. ss.write_end_mark(); // ]
  191. }
  192. Str serialize(VM* vm) const{
  193. CodeObjectSerializer ss;
  194. ss.write_code(vm, this);
  195. return ss.str();
  196. }
  197. };
  198. inline void CodeObjectSerializer::write_code(VM* vm, const CodeObject* co){
  199. buffer += '(';
  200. buffer += END;
  201. co->write(vm, *this);
  202. buffer += ')';
  203. buffer += END;
  204. }
  205. } // namespace pkpy