compiler.h 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
  14. class Compiler {
  15. std::unique_ptr<Parser> parser;
  16. std::stack<CodeObject_> codes;
  17. bool is_compiling_class = false;
  18. int lexing_count = 0;
  19. bool used = false;
  20. VM* vm;
  21. emhash8::HashMap<TokenIndex, GrammarRule> rules;
  22. CodeObject_ co() const{ return codes.top(); }
  23. CompileMode mode() const{ return parser->src->mode; }
  24. public:
  25. Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
  26. this->vm = vm;
  27. this->parser = std::make_unique<Parser>(
  28. pkpy::make_shared<SourceData>(source, filename, mode)
  29. );
  30. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  31. #define METHOD(name) &Compiler::name
  32. #define NO_INFIX nullptr, PREC_NONE
  33. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  34. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  35. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  36. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  37. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  38. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  39. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  40. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  41. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  42. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  43. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  44. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  45. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  46. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  47. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  48. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  49. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  50. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  51. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  52. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  53. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  54. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  55. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  56. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  57. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  58. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  59. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  60. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  61. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  62. rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
  63. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  64. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  65. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  66. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  67. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  68. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  69. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  70. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  71. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  72. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  73. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  74. rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  75. rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  76. rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  77. rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  78. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  79. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  80. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  81. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  82. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  83. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  84. #undef METHOD
  85. #undef NO_INFIX
  86. #define EXPR() parse_expression(PREC_TERNARY) // no '=' and ',' just a simple expression
  87. #define EXPR_TUPLE() parse_expression(PREC_COMMA) // no '=', but ',' is allowed
  88. #define EXPR_ANY() parse_expression(PREC_ASSIGNMENT)
  89. }
  90. private:
  91. Str eat_string_until(char quote, bool raw) {
  92. bool quote3 = parser->match_n_chars(2, quote);
  93. std::vector<char> buff;
  94. while (true) {
  95. char c = parser->eatchar_include_newline();
  96. if (c == quote){
  97. if(quote3 && !parser->match_n_chars(2, quote)){
  98. buff.push_back(c);
  99. continue;
  100. }
  101. break;
  102. }
  103. if (c == '\0'){
  104. if(quote3 && parser->src->mode == REPL_MODE){
  105. throw NeedMoreLines(false);
  106. }
  107. SyntaxError("EOL while scanning string literal");
  108. }
  109. if (c == '\n'){
  110. if(!quote3) SyntaxError("EOL while scanning string literal");
  111. else{
  112. buff.push_back(c);
  113. continue;
  114. }
  115. }
  116. if (!raw && c == '\\') {
  117. switch (parser->eatchar_include_newline()) {
  118. case '"': buff.push_back('"'); break;
  119. case '\'': buff.push_back('\''); break;
  120. case '\\': buff.push_back('\\'); break;
  121. case 'n': buff.push_back('\n'); break;
  122. case 'r': buff.push_back('\r'); break;
  123. case 't': buff.push_back('\t'); break;
  124. default: SyntaxError("invalid escape character");
  125. }
  126. } else {
  127. buff.push_back(c);
  128. }
  129. }
  130. return Str(buff.data(), buff.size());
  131. }
  132. void eat_string(char quote, StringType type) {
  133. Str s = eat_string_until(quote, type == RAW_STRING);
  134. if(type == F_STRING){
  135. parser->set_next_token(TK("@fstr"), vm->PyStr(s));
  136. }else{
  137. parser->set_next_token(TK("@str"), vm->PyStr(s));
  138. }
  139. }
  140. void eat_number() {
  141. static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
  142. std::smatch m;
  143. const char* i = parser->token_start;
  144. while(*i != '\n' && *i != '\0') i++;
  145. std::string s = std::string(parser->token_start, i);
  146. try{
  147. if (std::regex_search(s, m, pattern)) {
  148. // here is m.length()-1, since the first char was eaten by lex_token()
  149. for(int j=0; j<m.length()-1; j++) parser->eatchar();
  150. int base = 10;
  151. size_t size;
  152. if (m[1].matched) base = 16;
  153. if (m[2].matched) {
  154. if(base == 16) SyntaxError("hex literal should not contain a dot");
  155. parser->set_next_token(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  156. } else {
  157. parser->set_next_token(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  158. }
  159. if (size != m.length()) UNREACHABLE();
  160. }
  161. }catch(std::exception& _){
  162. SyntaxError("invalid number literal");
  163. }
  164. }
  165. void lex_token(){
  166. lexing_count++;
  167. _lex_token();
  168. lexing_count--;
  169. }
  170. // Lex the next token and set it as the next token.
  171. void _lex_token() {
  172. parser->prev = parser->curr;
  173. parser->curr = parser->next_token();
  174. //Str _info = parser->curr.info(); std::cout << _info << '[' << parser->current_line << ']' << std::endl;
  175. while (parser->peekchar() != '\0') {
  176. parser->token_start = parser->curr_char;
  177. char c = parser->eatchar_include_newline();
  178. switch (c) {
  179. case '\'': case '"': eat_string(c, NORMAL_STRING); return;
  180. case '#': parser->skip_line_comment(); break;
  181. case '{': parser->set_next_token(TK("{")); return;
  182. case '}': parser->set_next_token(TK("}")); return;
  183. case ',': parser->set_next_token(TK(",")); return;
  184. case ':': parser->set_next_token(TK(":")); return;
  185. case ';': parser->set_next_token(TK(";")); return;
  186. case '(': parser->set_next_token(TK("(")); return;
  187. case ')': parser->set_next_token(TK(")")); return;
  188. case '[': parser->set_next_token(TK("[")); return;
  189. case ']': parser->set_next_token(TK("]")); return;
  190. case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return;
  191. case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return;
  192. case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return;
  193. case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return;
  194. case '?': parser->set_next_token(TK("?")); return;
  195. case '.': {
  196. if(parser->matchchar('.')) {
  197. if(parser->matchchar('.')) {
  198. parser->set_next_token(TK("..."));
  199. } else {
  200. SyntaxError("invalid token '..'");
  201. }
  202. } else {
  203. parser->set_next_token(TK("."));
  204. }
  205. return;
  206. }
  207. case '=': parser->set_next_token_2('=', TK("="), TK("==")); return;
  208. case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return;
  209. case '>': {
  210. if(parser->matchchar('=')) parser->set_next_token(TK(">="));
  211. else if(parser->matchchar('>')) parser->set_next_token(TK(">>"));
  212. else parser->set_next_token(TK(">"));
  213. return;
  214. }
  215. case '<': {
  216. if(parser->matchchar('=')) parser->set_next_token(TK("<="));
  217. else if(parser->matchchar('<')) parser->set_next_token(TK("<<"));
  218. else parser->set_next_token(TK("<"));
  219. return;
  220. }
  221. case '-': {
  222. if(parser->matchchar('=')) parser->set_next_token(TK("-="));
  223. else if(parser->matchchar('>')) parser->set_next_token(TK("->"));
  224. else parser->set_next_token(TK("-"));
  225. return;
  226. }
  227. case '!':
  228. if(parser->matchchar('=')) parser->set_next_token(TK("!="));
  229. else SyntaxError("expected '=' after '!'");
  230. break;
  231. case '*':
  232. if (parser->matchchar('*')) {
  233. parser->set_next_token(TK("**")); // '**'
  234. } else {
  235. parser->set_next_token_2('=', TK("*"), TK("*="));
  236. }
  237. return;
  238. case '/':
  239. if(parser->matchchar('/')) {
  240. parser->set_next_token_2('=', TK("//"), TK("//="));
  241. } else {
  242. parser->set_next_token_2('=', TK("/"), TK("/="));
  243. }
  244. return;
  245. case '\r': break; // just ignore '\r'
  246. case ' ': case '\t': parser->eat_spaces(); break;
  247. case '\n': {
  248. parser->set_next_token(TK("@eol"));
  249. if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level");
  250. return;
  251. }
  252. default: {
  253. if(c == 'f'){
  254. if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;}
  255. if(parser->matchchar('"')) {eat_string('"', F_STRING); return;}
  256. }else if(c == 'r'){
  257. if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;}
  258. if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;}
  259. }
  260. if (c >= '0' && c <= '9') {
  261. eat_number();
  262. return;
  263. }
  264. switch (parser->eat_name())
  265. {
  266. case 0: break;
  267. case 1: SyntaxError("invalid char: " + std::string(1, c));
  268. case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
  269. case 3: SyntaxError("@id contains invalid char"); break;
  270. case 4: SyntaxError("invalid JSON token"); break;
  271. default: UNREACHABLE();
  272. }
  273. return;
  274. }
  275. }
  276. }
  277. parser->token_start = parser->curr_char;
  278. parser->set_next_token(TK("@eof"));
  279. }
  280. inline TokenIndex peek() {
  281. return parser->curr.type;
  282. }
  283. // not sure this will work
  284. TokenIndex peek_next() {
  285. if(parser->nexts.empty()) return TK("@eof");
  286. return parser->nexts.front().type;
  287. }
  288. bool match(TokenIndex expected) {
  289. if (peek() != expected) return false;
  290. lex_token();
  291. return true;
  292. }
  293. void consume(TokenIndex expected) {
  294. if (!match(expected)){
  295. _StrStream ss;
  296. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  297. SyntaxError(ss.str());
  298. }
  299. }
  300. bool match_newlines(bool repl_throw=false) {
  301. bool consumed = false;
  302. if (peek() == TK("@eol")) {
  303. while (peek() == TK("@eol")) lex_token();
  304. consumed = true;
  305. }
  306. if (repl_throw && peek() == TK("@eof")){
  307. throw NeedMoreLines(is_compiling_class);
  308. }
  309. return consumed;
  310. }
  311. bool match_end_stmt() {
  312. if (match(TK(";"))) { match_newlines(); return true; }
  313. if (match_newlines() || peek()==TK("@eof")) return true;
  314. if (peek() == TK("@dedent")) return true;
  315. return false;
  316. }
  317. void consume_end_stmt() {
  318. if (!match_end_stmt()) SyntaxError("expected statement end");
  319. }
  320. void exprLiteral() {
  321. PyVar value = parser->prev.value;
  322. int index = co()->add_const(value);
  323. emit(OP_LOAD_CONST, index);
  324. }
  325. void exprFString() {
  326. static const std::regex pattern(R"(\{(.*?)\})");
  327. PyVar value = parser->prev.value;
  328. Str s = vm->PyStr_AS_C(value);
  329. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  330. std::sregex_iterator end;
  331. int size = 0;
  332. int i = 0;
  333. for(auto it = begin; it != end; it++) {
  334. std::smatch m = *it;
  335. if (i < m.position()) {
  336. std::string literal = s.substr(i, m.position() - i);
  337. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  338. size++;
  339. }
  340. emit(OP_LOAD_EVAL_FN);
  341. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(m[1].str())));
  342. emit(OP_CALL, 1);
  343. size++;
  344. i = (int)(m.position() + m.length());
  345. }
  346. if (i < s.size()) {
  347. std::string literal = s.substr(i, s.size() - i);
  348. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  349. size++;
  350. }
  351. emit(OP_BUILD_STRING, size);
  352. }
  353. void exprLambda() {
  354. pkpy::Function_ func = pkpy::make_shared<pkpy::Function>();
  355. func->name = "<lambda>";
  356. if(!match(TK(":"))){
  357. _compile_f_args(func, false);
  358. consume(TK(":"));
  359. }
  360. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  361. this->codes.push(func->code);
  362. EXPR_TUPLE();
  363. emit(OP_RETURN_VALUE);
  364. func->code->optimize();
  365. this->codes.pop();
  366. emit(OP_LOAD_LAMBDA, co()->add_const(vm->PyFunction(func)));
  367. }
  368. void exprAssign() {
  369. co()->_rvalue = true;
  370. TokenIndex op = parser->prev.type;
  371. if(op == TK("=")) { // a = (expr)
  372. EXPR_TUPLE();
  373. emit(OP_STORE_REF);
  374. }else{ // a += (expr) -> a = a + (expr)
  375. emit(OP_DUP_TOP);
  376. EXPR();
  377. switch (op) {
  378. case TK("+="): emit(OP_BINARY_OP, 0); break;
  379. case TK("-="): emit(OP_BINARY_OP, 1); break;
  380. case TK("*="): emit(OP_BINARY_OP, 2); break;
  381. case TK("/="): emit(OP_BINARY_OP, 3); break;
  382. case TK("//="): emit(OP_BINARY_OP, 4); break;
  383. case TK("%="): emit(OP_BINARY_OP, 5); break;
  384. case TK("&="): emit(OP_BITWISE_OP, 2); break;
  385. case TK("|="): emit(OP_BITWISE_OP, 3); break;
  386. case TK("^="): emit(OP_BITWISE_OP, 4); break;
  387. default: UNREACHABLE();
  388. }
  389. emit(OP_STORE_REF);
  390. }
  391. co()->_rvalue = false;
  392. }
  393. void exprComma() {
  394. int size = 1; // an expr is in the stack now
  395. do {
  396. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  397. size++;
  398. } while(match(TK(",")));
  399. emit(OP_BUILD_SMART_TUPLE, size);
  400. }
  401. void exprOr() {
  402. int patch = emit(OP_JUMP_IF_TRUE_OR_POP);
  403. parse_expression(PREC_LOGICAL_OR);
  404. patch_jump(patch);
  405. }
  406. void exprAnd() {
  407. int patch = emit(OP_JUMP_IF_FALSE_OR_POP);
  408. parse_expression(PREC_LOGICAL_AND);
  409. patch_jump(patch);
  410. }
  411. void exprTernary() {
  412. int patch = emit(OP_POP_JUMP_IF_FALSE);
  413. EXPR(); // if true
  414. int patch2 = emit(OP_JUMP_ABSOLUTE);
  415. consume(TK(":"));
  416. patch_jump(patch);
  417. EXPR(); // if false
  418. patch_jump(patch2);
  419. }
  420. void exprBinaryOp() {
  421. TokenIndex op = parser->prev.type;
  422. parse_expression((Precedence)(rules[op].precedence + 1));
  423. switch (op) {
  424. case TK("+"): emit(OP_BINARY_OP, 0); break;
  425. case TK("-"): emit(OP_BINARY_OP, 1); break;
  426. case TK("*"): emit(OP_BINARY_OP, 2); break;
  427. case TK("/"): emit(OP_BINARY_OP, 3); break;
  428. case TK("//"): emit(OP_BINARY_OP, 4); break;
  429. case TK("%"): emit(OP_BINARY_OP, 5); break;
  430. case TK("**"): emit(OP_BINARY_OP, 6); break;
  431. case TK("<"): emit(OP_COMPARE_OP, 0); break;
  432. case TK("<="): emit(OP_COMPARE_OP, 1); break;
  433. case TK("=="): emit(OP_COMPARE_OP, 2); break;
  434. case TK("!="): emit(OP_COMPARE_OP, 3); break;
  435. case TK(">"): emit(OP_COMPARE_OP, 4); break;
  436. case TK(">="): emit(OP_COMPARE_OP, 5); break;
  437. case TK("in"): emit(OP_CONTAINS_OP, 0); break;
  438. case TK("not in"): emit(OP_CONTAINS_OP, 1); break;
  439. case TK("is"): emit(OP_IS_OP, 0); break;
  440. case TK("is not"): emit(OP_IS_OP, 1); break;
  441. case TK("<<"): emit(OP_BITWISE_OP, 0); break;
  442. case TK(">>"): emit(OP_BITWISE_OP, 1); break;
  443. case TK("&"): emit(OP_BITWISE_OP, 2); break;
  444. case TK("|"): emit(OP_BITWISE_OP, 3); break;
  445. case TK("^"): emit(OP_BITWISE_OP, 4); break;
  446. default: UNREACHABLE();
  447. }
  448. }
  449. void exprUnaryOp() {
  450. TokenIndex op = parser->prev.type;
  451. parse_expression((Precedence)(PREC_UNARY + 1));
  452. switch (op) {
  453. case TK("-"): emit(OP_UNARY_NEGATIVE); break;
  454. case TK("not"): emit(OP_UNARY_NOT); break;
  455. case TK("*"): SyntaxError("cannot use '*' as unary operator"); break;
  456. default: UNREACHABLE();
  457. }
  458. }
  459. void exprGrouping() {
  460. match_newlines(mode()==REPL_MODE);
  461. EXPR_TUPLE();
  462. match_newlines(mode()==REPL_MODE);
  463. consume(TK(")"));
  464. }
  465. void exprList() {
  466. int _patch = emit(OP_NO_OP);
  467. int _body_start = co()->codes.size();
  468. int ARGC = 0;
  469. do {
  470. match_newlines(mode()==REPL_MODE);
  471. if (peek() == TK("]")) break;
  472. EXPR(); ARGC++;
  473. match_newlines(mode()==REPL_MODE);
  474. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  475. } while (match(TK(",")));
  476. match_newlines(mode()==REPL_MODE);
  477. consume(TK("]"));
  478. emit(OP_BUILD_LIST, ARGC);
  479. return;
  480. __LISTCOMP:
  481. int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  482. int _body_end = co()->codes.size();
  483. co()->codes[_patch].op = OP_JUMP_ABSOLUTE;
  484. co()->codes[_patch].arg = _body_end;
  485. emit(OP_BUILD_LIST, 0);
  486. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  487. match_newlines(mode()==REPL_MODE);
  488. int _skipPatch = emit(OP_JUMP_ABSOLUTE);
  489. int _cond_start = co()->codes.size();
  490. int _cond_end_return = -1;
  491. if(match(TK("if"))) {
  492. EXPR_TUPLE();
  493. _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  494. }
  495. patch_jump(_skipPatch);
  496. emit(OP_GET_ITER);
  497. co()->_enter_block(FOR_LOOP);
  498. emit(OP_FOR_ITER);
  499. if(_cond_end_return != -1) { // there is an if condition
  500. emit(OP_JUMP_ABSOLUTE, _cond_start);
  501. patch_jump(_cond_end_return);
  502. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  503. emit(OP_JUMP_ABSOLUTE, _body_start);
  504. patch_jump(_body_end_return);
  505. emit(OP_LIST_APPEND);
  506. patch_jump(ifpatch);
  507. }else{
  508. emit(OP_JUMP_ABSOLUTE, _body_start);
  509. patch_jump(_body_end_return);
  510. emit(OP_LIST_APPEND);
  511. }
  512. emit(OP_LOOP_CONTINUE, -1, true);
  513. co()->_exit_block();
  514. match_newlines(mode()==REPL_MODE);
  515. consume(TK("]"));
  516. }
  517. void exprMap() {
  518. bool parsing_dict = false;
  519. int size = 0;
  520. do {
  521. match_newlines(mode()==REPL_MODE);
  522. if (peek() == TK("}")) break;
  523. EXPR();
  524. if(peek() == TK(":")) parsing_dict = true;
  525. if(parsing_dict){
  526. consume(TK(":"));
  527. EXPR();
  528. }
  529. size++;
  530. match_newlines(mode()==REPL_MODE);
  531. } while (match(TK(",")));
  532. consume(TK("}"));
  533. if(size == 0 || parsing_dict) emit(OP_BUILD_MAP, size);
  534. else emit(OP_BUILD_SET, size);
  535. }
  536. void exprCall() {
  537. int ARGC = 0;
  538. int KWARGC = 0;
  539. do {
  540. match_newlines(mode()==REPL_MODE);
  541. if (peek() == TK(")")) break;
  542. if(peek() == TK("@id") && peek_next() == TK("=")) {
  543. consume(TK("@id"));
  544. const Str& key = parser->prev.str();
  545. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(key)));
  546. consume(TK("="));
  547. co()->_rvalue=true; EXPR(); co()->_rvalue=false;
  548. KWARGC++;
  549. } else{
  550. if(KWARGC > 0) SyntaxError("positional argument follows keyword argument");
  551. co()->_rvalue=true; EXPR(); co()->_rvalue=false;
  552. ARGC++;
  553. }
  554. match_newlines(mode()==REPL_MODE);
  555. } while (match(TK(",")));
  556. consume(TK(")"));
  557. emit(OP_CALL, (KWARGC << 16) | ARGC);
  558. }
  559. void exprName(){ _exprName(false); }
  560. void _exprName(bool force_lvalue) {
  561. Token tkname = parser->prev;
  562. int index = co()->add_name(
  563. tkname.str(),
  564. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  565. );
  566. bool fast_load = !force_lvalue && co()->_rvalue;
  567. emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index);
  568. }
  569. void exprAttrib() {
  570. consume(TK("@id"));
  571. const Str& name = parser->prev.str();
  572. int index = co()->add_name(name, NAME_ATTR);
  573. emit(OP_BUILD_ATTR_REF, index);
  574. }
  575. // [:], [:b]
  576. // [a], [a:], [a:b]
  577. void exprSubscript() {
  578. if(match(TK(":"))){
  579. emit(OP_LOAD_NONE);
  580. if(match(TK("]"))){
  581. emit(OP_LOAD_NONE);
  582. }else{
  583. EXPR_TUPLE();
  584. consume(TK("]"));
  585. }
  586. emit(OP_BUILD_SLICE);
  587. }else{
  588. EXPR_TUPLE();
  589. if(match(TK(":"))){
  590. if(match(TK("]"))){
  591. emit(OP_LOAD_NONE);
  592. }else{
  593. EXPR_TUPLE();
  594. consume(TK("]"));
  595. }
  596. emit(OP_BUILD_SLICE);
  597. }else{
  598. consume(TK("]"));
  599. }
  600. }
  601. emit(OP_BUILD_INDEX_REF);
  602. }
  603. void exprValue() {
  604. TokenIndex op = parser->prev.type;
  605. switch (op) {
  606. case TK("None"): emit(OP_LOAD_NONE); break;
  607. case TK("True"): emit(OP_LOAD_TRUE); break;
  608. case TK("False"): emit(OP_LOAD_FALSE); break;
  609. case TK("..."): emit(OP_LOAD_ELLIPSIS); break;
  610. default: UNREACHABLE();
  611. }
  612. }
  613. int emit(Opcode opcode, int arg=-1, bool keepline=false) {
  614. int line = parser->prev.line;
  615. co()->codes.push_back(
  616. Bytecode{(uint8_t)opcode, arg, line, (uint16_t)co()->_curr_block_i}
  617. );
  618. int i = co()->codes.size() - 1;
  619. if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line;
  620. return i;
  621. }
  622. inline void patch_jump(int addr_index) {
  623. int target = co()->codes.size();
  624. co()->codes[addr_index].arg = target;
  625. }
  626. void compile_block_body(CompilerAction action=nullptr) {
  627. if(action == nullptr) action = &Compiler::compile_stmt;
  628. consume(TK(":"));
  629. if(!match_newlines(mode()==REPL_MODE)){
  630. SyntaxError("expected a new line after ':'");
  631. }
  632. consume(TK("@indent"));
  633. while (peek() != TK("@dedent")) {
  634. match_newlines();
  635. (this->*action)();
  636. match_newlines();
  637. }
  638. consume(TK("@dedent"));
  639. }
  640. Token _compile_import() {
  641. consume(TK("@id"));
  642. Token tkmodule = parser->prev;
  643. int index = co()->add_name(tkmodule.str(), NAME_SPECIAL);
  644. emit(OP_IMPORT_NAME, index);
  645. return tkmodule;
  646. }
  647. // import a as b
  648. void compile_normal_import() {
  649. do {
  650. Token tkmodule = _compile_import();
  651. if (match(TK("as"))) {
  652. consume(TK("@id"));
  653. tkmodule = parser->prev;
  654. }
  655. int index = co()->add_name(tkmodule.str(), NAME_GLOBAL);
  656. emit(OP_STORE_NAME, index);
  657. } while (match(TK(",")));
  658. consume_end_stmt();
  659. }
  660. // from a import b as c, d as e
  661. void compile_from_import() {
  662. Token tkmodule = _compile_import();
  663. consume(TK("import"));
  664. do {
  665. emit(OP_DUP_TOP);
  666. consume(TK("@id"));
  667. Token tkname = parser->prev;
  668. int index = co()->add_name(tkname.str(), NAME_ATTR);
  669. emit(OP_BUILD_ATTR_REF, index);
  670. if (match(TK("as"))) {
  671. consume(TK("@id"));
  672. tkname = parser->prev;
  673. }
  674. index = co()->add_name(tkname.str(), NAME_GLOBAL);
  675. emit(OP_STORE_NAME, index);
  676. } while (match(TK(",")));
  677. emit(OP_POP_TOP);
  678. consume_end_stmt();
  679. }
  680. void parse_expression(Precedence precedence) {
  681. lex_token();
  682. GrammarFn prefix = rules[parser->prev.type].prefix;
  683. if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(parser->prev.type));
  684. (this->*prefix)();
  685. while (rules[peek()].precedence >= precedence) {
  686. lex_token();
  687. TokenIndex op = parser->prev.type;
  688. GrammarFn infix = rules[op].infix;
  689. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  690. (this->*infix)();
  691. }
  692. }
  693. void compile_if_stmt() {
  694. match_newlines();
  695. co()->_rvalue = true;
  696. EXPR_TUPLE(); // condition
  697. co()->_rvalue = false;
  698. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  699. compile_block_body();
  700. if (match(TK("elif"))) {
  701. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  702. patch_jump(ifpatch);
  703. compile_if_stmt();
  704. patch_jump(exit_jump);
  705. } else if (match(TK("else"))) {
  706. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  707. patch_jump(ifpatch);
  708. compile_block_body();
  709. patch_jump(exit_jump);
  710. } else {
  711. patch_jump(ifpatch);
  712. }
  713. }
  714. void compile_while_loop() {
  715. co()->_enter_block(WHILE_LOOP);
  716. co()->_rvalue = true;
  717. EXPR_TUPLE(); // condition
  718. co()->_rvalue = false;
  719. int patch = emit(OP_POP_JUMP_IF_FALSE);
  720. compile_block_body();
  721. emit(OP_LOOP_CONTINUE, -1, true);
  722. patch_jump(patch);
  723. co()->_exit_block();
  724. }
  725. void EXPR_FOR_VARS(){
  726. int size = 0;
  727. do {
  728. consume(TK("@id"));
  729. _exprName(true); size++;
  730. } while (match(TK(",")));
  731. if(size > 1) emit(OP_BUILD_SMART_TUPLE, size);
  732. }
  733. void compile_for_loop() {
  734. EXPR_FOR_VARS();consume(TK("in")); EXPR_TUPLE();
  735. emit(OP_GET_ITER);
  736. co()->_enter_block(FOR_LOOP);
  737. emit(OP_FOR_ITER);
  738. compile_block_body();
  739. emit(OP_LOOP_CONTINUE, -1, true);
  740. co()->_exit_block();
  741. }
  742. void compile_try_except() {
  743. co()->_enter_block(TRY_EXCEPT);
  744. emit(OP_TRY_BLOCK_ENTER);
  745. compile_block_body();
  746. emit(OP_TRY_BLOCK_EXIT);
  747. std::vector<int> patches = { emit(OP_JUMP_ABSOLUTE) };
  748. co()->_exit_block();
  749. do {
  750. consume(TK("except"));
  751. if(match(TK("@id"))){
  752. int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL);
  753. emit(OP_EXCEPTION_MATCH, name_idx);
  754. }else{
  755. emit(OP_LOAD_TRUE);
  756. }
  757. int patch = emit(OP_POP_JUMP_IF_FALSE);
  758. emit(OP_POP_TOP); // pop the exception on match
  759. compile_block_body();
  760. patches.push_back(emit(OP_JUMP_ABSOLUTE));
  761. patch_jump(patch);
  762. }while(peek() == TK("except"));
  763. emit(OP_RE_RAISE); // no match, re-raise
  764. for (int patch : patches) patch_jump(patch);
  765. }
  766. void compile_stmt() {
  767. if (match(TK("break"))) {
  768. if (!co()->_is_curr_block_loop()) SyntaxError("'break' outside loop");
  769. consume_end_stmt();
  770. emit(OP_LOOP_BREAK);
  771. } else if (match(TK("continue"))) {
  772. if (!co()->_is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  773. consume_end_stmt();
  774. emit(OP_LOOP_CONTINUE);
  775. } else if (match(TK("return"))) {
  776. if (codes.size() == 1)
  777. SyntaxError("'return' outside function");
  778. if(match_end_stmt()){
  779. emit(OP_LOAD_NONE);
  780. }else{
  781. co()->_rvalue = true;
  782. EXPR_TUPLE(); // return value
  783. co()->_rvalue = false;
  784. consume_end_stmt();
  785. }
  786. emit(OP_RETURN_VALUE, -1, true);
  787. } else if (match(TK("if"))) {
  788. compile_if_stmt();
  789. } else if (match(TK("while"))) {
  790. compile_while_loop();
  791. } else if (match(TK("for"))) {
  792. compile_for_loop();
  793. } else if (match(TK("try"))) {
  794. compile_try_except();
  795. }else if(match(TK("assert"))){
  796. EXPR();
  797. emit(OP_ASSERT);
  798. consume_end_stmt();
  799. } else if(match(TK("with"))){
  800. EXPR();
  801. consume(TK("as"));
  802. consume(TK("@id"));
  803. Token tkname = parser->prev;
  804. int index = co()->add_name(
  805. tkname.str(),
  806. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  807. );
  808. emit(OP_STORE_NAME, index);
  809. emit(OP_LOAD_NAME_REF, index);
  810. emit(OP_WITH_ENTER);
  811. compile_block_body();
  812. emit(OP_LOAD_NAME_REF, index);
  813. emit(OP_WITH_EXIT);
  814. } else if(match(TK("label"))){
  815. if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  816. consume(TK(".")); consume(TK("@id"));
  817. Str label = parser->prev.str();
  818. bool ok = co()->add_label(label);
  819. if(!ok) SyntaxError("label '" + label + "' already exists");
  820. consume_end_stmt();
  821. } else if(match(TK("goto"))){ // https://entrian.com/goto/
  822. if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  823. consume(TK(".")); consume(TK("@id"));
  824. emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL));
  825. consume_end_stmt();
  826. } else if(match(TK("raise"))){
  827. consume(TK("@id"));
  828. int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL);
  829. if(match(TK("("))){
  830. EXPR(); consume(TK(")"));
  831. }else{
  832. emit(OP_LOAD_NONE);
  833. }
  834. emit(OP_RAISE, dummy_t);
  835. consume_end_stmt();
  836. } else if(match(TK("del"))){
  837. EXPR();
  838. emit(OP_DELETE_REF);
  839. consume_end_stmt();
  840. } else if(match(TK("global"))){
  841. do {
  842. consume(TK("@id"));
  843. co()->global_names[parser->prev.str()] = 1;
  844. } while (match(TK(",")));
  845. consume_end_stmt();
  846. } else if(match(TK("pass"))){
  847. consume_end_stmt();
  848. } else {
  849. EXPR_ANY();
  850. consume_end_stmt();
  851. // If last op is not an assignment, pop the result.
  852. uint8_t last_op = co()->codes.back().op;
  853. if( last_op!=OP_STORE_NAME && last_op!=OP_STORE_REF){
  854. if(mode()==REPL_MODE && parser->indents.top()==0) emit(OP_PRINT_EXPR, -1, true);
  855. emit(OP_POP_TOP, -1, true);
  856. }
  857. }
  858. }
  859. void compile_class(){
  860. consume(TK("@id"));
  861. int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  862. int super_cls_name_idx = -1;
  863. if(match(TK("(")) && match(TK("@id"))){
  864. super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  865. consume(TK(")"));
  866. }
  867. emit(OP_LOAD_NONE);
  868. is_compiling_class = true;
  869. compile_block_body(&Compiler::compile_function);
  870. is_compiling_class = false;
  871. if(super_cls_name_idx == -1) emit(OP_LOAD_NONE);
  872. else emit(OP_LOAD_NAME_REF, super_cls_name_idx);
  873. emit(OP_BUILD_CLASS, cls_name_idx);
  874. }
  875. void _compile_f_args(pkpy::Function_ func, bool enable_type_hints){
  876. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  877. do {
  878. if(state == 3) SyntaxError("**kwargs should be the last argument");
  879. match_newlines();
  880. if(match(TK("*"))){
  881. if(state < 1) state = 1;
  882. else SyntaxError("*args should be placed before **kwargs");
  883. }
  884. else if(match(TK("**"))){
  885. state = 3;
  886. }
  887. consume(TK("@id"));
  888. const Str& name = parser->prev.str();
  889. if(func->hasName(name)) SyntaxError("duplicate argument name");
  890. // eat type hints
  891. if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
  892. if(state == 0 && peek() == TK("=")) state = 2;
  893. switch (state)
  894. {
  895. case 0: func->args.push_back(name); break;
  896. case 1: func->starredArg = name; state+=1; break;
  897. case 2: {
  898. consume(TK("="));
  899. PyVarOrNull value = read_literal();
  900. if(value == nullptr){
  901. SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type));
  902. }
  903. func->kwArgs[name] = value;
  904. func->kwArgsOrder.push_back(name);
  905. } break;
  906. case 3: SyntaxError("**kwargs is not supported yet"); break;
  907. }
  908. } while (match(TK(",")));
  909. }
  910. void compile_function(){
  911. if(is_compiling_class){
  912. if(match(TK("pass"))) return;
  913. consume(TK("def"));
  914. }
  915. pkpy::Function_ func = pkpy::make_shared<pkpy::Function>();
  916. consume(TK("@id"));
  917. func->name = parser->prev.str();
  918. if (match(TK("(")) && !match(TK(")"))) {
  919. _compile_f_args(func, true);
  920. consume(TK(")"));
  921. }
  922. // eat type hints
  923. if(match(TK("->"))) consume(TK("@id"));
  924. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  925. this->codes.push(func->code);
  926. compile_block_body();
  927. func->code->optimize();
  928. this->codes.pop();
  929. emit(OP_LOAD_CONST, co()->add_const(vm->PyFunction(func)));
  930. if(!is_compiling_class) emit(OP_STORE_FUNCTION);
  931. }
  932. PyVarOrNull read_literal(){
  933. if(match(TK("-"))){
  934. consume(TK("@num"));
  935. PyVar val = parser->prev.value;
  936. return vm->num_negated(val);
  937. }
  938. if(match(TK("@num"))) return parser->prev.value;
  939. if(match(TK("@str"))) return parser->prev.value;
  940. if(match(TK("True"))) return vm->PyBool(true);
  941. if(match(TK("False"))) return vm->PyBool(false);
  942. if(match(TK("None"))) return vm->None;
  943. if(match(TK("..."))) return vm->Ellipsis;
  944. return nullptr;
  945. }
  946. /***** Error Reporter *****/
  947. void throw_err(Str type, Str msg){
  948. int lineno = parser->curr.line;
  949. const char* cursor = parser->curr.start;
  950. // if error occurs in lexing, lineno should be `parser->current_line`
  951. if(lexing_count > 0){
  952. lineno = parser->current_line;
  953. cursor = parser->curr_char;
  954. }
  955. if(parser->peekchar() == '\n') lineno--;
  956. auto e = pkpy::Exception("SyntaxError", msg);
  957. e.st_push(parser->src->snapshot(lineno, cursor));
  958. throw e;
  959. }
  960. void SyntaxError(Str msg){ throw_err("SyntaxError", msg); }
  961. void IndentationError(Str msg){ throw_err("IndentationError", msg); }
  962. public:
  963. CodeObject_ compile(){
  964. // can only be called once
  965. if(used) UNREACHABLE();
  966. used = true;
  967. CodeObject_ code = pkpy::make_shared<CodeObject>(parser->src, Str("<module>"));
  968. codes.push(code);
  969. lex_token(); lex_token();
  970. match_newlines();
  971. if(mode()==EVAL_MODE) {
  972. EXPR_TUPLE();
  973. consume(TK("@eof"));
  974. code->optimize();
  975. return code;
  976. }else if(mode()==JSON_MODE){
  977. PyVarOrNull value = read_literal();
  978. if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value));
  979. else if(match(TK("{"))) exprMap();
  980. else if(match(TK("["))) exprList();
  981. else SyntaxError("expect a JSON object or array");
  982. consume(TK("@eof"));
  983. return code; // no need to optimize for JSON decoding
  984. }
  985. while (!match(TK("@eof"))) {
  986. // compile top-level statement
  987. if (match(TK("class"))) {
  988. compile_class();
  989. } else if (match(TK("def"))) {
  990. compile_function();
  991. } else if (match(TK("import"))) {
  992. compile_normal_import();
  993. } else if (match(TK("from"))) {
  994. compile_from_import();
  995. } else {
  996. compile_stmt();
  997. }
  998. match_newlines();
  999. }
  1000. code->optimize();
  1001. return code;
  1002. }
  1003. };