compiler.h 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
  14. class Compiler {
  15. std::unique_ptr<Parser> parser;
  16. std::stack<CodeObject_> codes;
  17. bool is_compiling_class = false;
  18. int lexing_count = 0;
  19. bool used = false;
  20. VM* vm;
  21. emhash8::HashMap<TokenIndex, GrammarRule> rules;
  22. CodeObject_ co() const{ return codes.top(); }
  23. CompileMode mode() const{ return parser->src->mode; }
  24. public:
  25. Compiler(VM* vm, const char* source, Str filename, CompileMode mode){
  26. this->vm = vm;
  27. this->parser = std::make_unique<Parser>(
  28. pkpy::make_shared<SourceData>(source, filename, mode)
  29. );
  30. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  31. #define METHOD(name) &Compiler::name
  32. #define NO_INFIX nullptr, PREC_NONE
  33. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  34. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  35. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  36. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  37. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  38. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  39. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  40. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  41. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  42. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  43. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  44. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  45. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  46. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  47. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  48. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  49. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  50. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  51. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  52. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  53. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  54. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  55. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  56. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  57. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  58. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  59. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  60. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  61. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  62. rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
  63. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  64. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  65. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  66. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  67. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  68. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  69. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  70. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  71. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  72. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  73. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  74. rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  75. rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  76. rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  77. rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  78. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  79. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  80. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  81. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  82. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  83. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  84. #undef METHOD
  85. #undef NO_INFIX
  86. #define EXPR() parse_expression(PREC_TERNARY) // no '=' and ',' just a simple expression
  87. #define EXPR_TUPLE() parse_expression(PREC_COMMA) // no '=', but ',' is allowed
  88. #define EXPR_ANY() parse_expression(PREC_ASSIGNMENT)
  89. }
  90. private:
  91. Str eat_string_until(char quote, bool raw) {
  92. bool quote3 = parser->match_n_chars(2, quote);
  93. std::vector<char> buff;
  94. while (true) {
  95. char c = parser->eatchar_include_newline();
  96. if (c == quote){
  97. if(quote3 && !parser->match_n_chars(2, quote)){
  98. buff.push_back(c);
  99. continue;
  100. }
  101. break;
  102. }
  103. if (c == '\0'){
  104. if(quote3 && parser->src->mode == REPL_MODE){
  105. throw NeedMoreLines(false);
  106. }
  107. SyntaxError("EOL while scanning string literal");
  108. }
  109. if (c == '\n'){
  110. if(!quote3) SyntaxError("EOL while scanning string literal");
  111. else{
  112. buff.push_back(c);
  113. continue;
  114. }
  115. }
  116. if (!raw && c == '\\') {
  117. switch (parser->eatchar_include_newline()) {
  118. case '"': buff.push_back('"'); break;
  119. case '\'': buff.push_back('\''); break;
  120. case '\\': buff.push_back('\\'); break;
  121. case 'n': buff.push_back('\n'); break;
  122. case 'r': buff.push_back('\r'); break;
  123. case 't': buff.push_back('\t'); break;
  124. default: SyntaxError("invalid escape character");
  125. }
  126. } else {
  127. buff.push_back(c);
  128. }
  129. }
  130. return Str(buff.data(), buff.size());
  131. }
  132. void eat_string(char quote, StringType type) {
  133. Str s = eat_string_until(quote, type == RAW_STRING);
  134. if(type == F_STRING){
  135. parser->set_next_token(TK("@fstr"), vm->PyStr(s));
  136. }else{
  137. parser->set_next_token(TK("@str"), vm->PyStr(s));
  138. }
  139. }
  140. void eat_number() {
  141. static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
  142. std::smatch m;
  143. const char* i = parser->token_start;
  144. while(*i != '\n' && *i != '\0') i++;
  145. std::string s = std::string(parser->token_start, i);
  146. try{
  147. if (std::regex_search(s, m, pattern)) {
  148. // here is m.length()-1, since the first char was eaten by lex_token()
  149. for(int j=0; j<m.length()-1; j++) parser->eatchar();
  150. int base = 10;
  151. size_t size;
  152. if (m[1].matched) base = 16;
  153. if (m[2].matched) {
  154. if(base == 16) SyntaxError("hex literal should not contain a dot");
  155. parser->set_next_token(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  156. } else {
  157. parser->set_next_token(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  158. }
  159. if (size != m.length()) UNREACHABLE();
  160. }
  161. }catch(std::exception& _){
  162. SyntaxError("invalid number literal");
  163. }
  164. }
  165. void lex_token(){
  166. lexing_count++;
  167. _lex_token();
  168. lexing_count--;
  169. }
  170. // Lex the next token and set it as the next token.
  171. void _lex_token() {
  172. parser->prev = parser->curr;
  173. parser->curr = parser->next_token();
  174. //Str _info = parser->curr.info(); std::cout << _info << '[' << parser->current_line << ']' << std::endl;
  175. while (parser->peekchar() != '\0') {
  176. parser->token_start = parser->curr_char;
  177. char c = parser->eatchar_include_newline();
  178. switch (c) {
  179. case '\'': case '"': eat_string(c, NORMAL_STRING); return;
  180. case '#': parser->skip_line_comment(); break;
  181. case '{': parser->set_next_token(TK("{")); return;
  182. case '}': parser->set_next_token(TK("}")); return;
  183. case ',': parser->set_next_token(TK(",")); return;
  184. case ':': parser->set_next_token(TK(":")); return;
  185. case ';': parser->set_next_token(TK(";")); return;
  186. case '(': parser->set_next_token(TK("(")); return;
  187. case ')': parser->set_next_token(TK(")")); return;
  188. case '[': parser->set_next_token(TK("[")); return;
  189. case ']': parser->set_next_token(TK("]")); return;
  190. case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return;
  191. case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return;
  192. case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return;
  193. case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return;
  194. case '?': parser->set_next_token(TK("?")); return;
  195. case '.': {
  196. if(parser->matchchar('.')) {
  197. if(parser->matchchar('.')) {
  198. parser->set_next_token(TK("..."));
  199. } else {
  200. SyntaxError("invalid token '..'");
  201. }
  202. } else {
  203. parser->set_next_token(TK("."));
  204. }
  205. return;
  206. }
  207. case '=': parser->set_next_token_2('=', TK("="), TK("==")); return;
  208. case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return;
  209. case '>': {
  210. if(parser->matchchar('=')) parser->set_next_token(TK(">="));
  211. else if(parser->matchchar('>')) parser->set_next_token(TK(">>"));
  212. else parser->set_next_token(TK(">"));
  213. return;
  214. }
  215. case '<': {
  216. if(parser->matchchar('=')) parser->set_next_token(TK("<="));
  217. else if(parser->matchchar('<')) parser->set_next_token(TK("<<"));
  218. else parser->set_next_token(TK("<"));
  219. return;
  220. }
  221. case '-': {
  222. if(parser->matchchar('=')) parser->set_next_token(TK("-="));
  223. else if(parser->matchchar('>')) parser->set_next_token(TK("->"));
  224. else if(parser->matchchar('-')) parser->set_next_token(TK("--"));
  225. else parser->set_next_token(TK("-"));
  226. return;
  227. }
  228. case '!':
  229. if(parser->matchchar('=')) parser->set_next_token(TK("!="));
  230. else SyntaxError("expected '=' after '!'");
  231. break;
  232. case '*':
  233. if (parser->matchchar('*')) {
  234. parser->set_next_token(TK("**")); // '**'
  235. } else {
  236. parser->set_next_token_2('=', TK("*"), TK("*="));
  237. }
  238. return;
  239. case '/':
  240. if(parser->matchchar('/')) {
  241. parser->set_next_token_2('=', TK("//"), TK("//="));
  242. } else {
  243. parser->set_next_token_2('=', TK("/"), TK("/="));
  244. }
  245. return;
  246. case '\r': break; // just ignore '\r'
  247. case ' ': case '\t': parser->eat_spaces(); break;
  248. case '\n': {
  249. parser->set_next_token(TK("@eol"));
  250. if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level");
  251. return;
  252. }
  253. default: {
  254. if(c == 'f'){
  255. if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;}
  256. if(parser->matchchar('"')) {eat_string('"', F_STRING); return;}
  257. }else if(c == 'r'){
  258. if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;}
  259. if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;}
  260. }
  261. if (c >= '0' && c <= '9') {
  262. eat_number();
  263. return;
  264. }
  265. switch (parser->eat_name())
  266. {
  267. case 0: break;
  268. case 1: SyntaxError("invalid char: " + std::string(1, c));
  269. case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
  270. case 3: SyntaxError("@id contains invalid char"); break;
  271. case 4: SyntaxError("invalid JSON token"); break;
  272. default: UNREACHABLE();
  273. }
  274. return;
  275. }
  276. }
  277. }
  278. parser->token_start = parser->curr_char;
  279. parser->set_next_token(TK("@eof"));
  280. }
  281. inline TokenIndex peek() {
  282. return parser->curr.type;
  283. }
  284. // not sure this will work
  285. TokenIndex peek_next() {
  286. if(parser->nexts.empty()) return TK("@eof");
  287. return parser->nexts.front().type;
  288. }
  289. bool match(TokenIndex expected) {
  290. if (peek() != expected) return false;
  291. lex_token();
  292. return true;
  293. }
  294. void consume(TokenIndex expected) {
  295. if (!match(expected)){
  296. StrStream ss;
  297. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  298. SyntaxError(ss.str());
  299. }
  300. }
  301. bool match_newlines(bool repl_throw=false) {
  302. bool consumed = false;
  303. if (peek() == TK("@eol")) {
  304. while (peek() == TK("@eol")) lex_token();
  305. consumed = true;
  306. }
  307. if (repl_throw && peek() == TK("@eof")){
  308. throw NeedMoreLines(is_compiling_class);
  309. }
  310. return consumed;
  311. }
  312. bool match_end_stmt() {
  313. if (match(TK(";"))) { match_newlines(); return true; }
  314. if (match_newlines() || peek()==TK("@eof")) return true;
  315. if (peek() == TK("@dedent")) return true;
  316. return false;
  317. }
  318. void consume_end_stmt() {
  319. if (!match_end_stmt()) SyntaxError("expected statement end");
  320. }
  321. void exprLiteral() {
  322. PyVar value = parser->prev.value;
  323. int index = co()->add_const(value);
  324. emit(OP_LOAD_CONST, index);
  325. }
  326. void exprFString() {
  327. static const std::regex pattern(R"(\{(.*?)\})");
  328. PyVar value = parser->prev.value;
  329. Str s = vm->PyStr_AS_C(value);
  330. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  331. std::sregex_iterator end;
  332. int size = 0;
  333. int i = 0;
  334. for(auto it = begin; it != end; it++) {
  335. std::smatch m = *it;
  336. if (i < m.position()) {
  337. std::string literal = s.substr(i, m.position() - i);
  338. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  339. size++;
  340. }
  341. emit(OP_LOAD_EVAL_FN);
  342. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(m[1].str())));
  343. emit(OP_CALL, 1);
  344. size++;
  345. i = (int)(m.position() + m.length());
  346. }
  347. if (i < s.size()) {
  348. std::string literal = s.substr(i, s.size() - i);
  349. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  350. size++;
  351. }
  352. emit(OP_BUILD_STRING, size);
  353. }
  354. void exprLambda() {
  355. pkpy::Function_ func = pkpy::make_shared<pkpy::Function>();
  356. func->name = "<lambda>";
  357. if(!match(TK(":"))){
  358. _compile_f_args(func, false);
  359. consume(TK(":"));
  360. }
  361. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  362. this->codes.push(func->code);
  363. EXPR_TUPLE();
  364. emit(OP_RETURN_VALUE);
  365. func->code->optimize(vm);
  366. this->codes.pop();
  367. emit(OP_LOAD_LAMBDA, co()->add_const(vm->PyFunction(func)));
  368. }
  369. void exprAssign() {
  370. co()->_rvalue = true;
  371. TokenIndex op = parser->prev.type;
  372. if(op == TK("=")) { // a = (expr)
  373. EXPR_TUPLE();
  374. emit(OP_STORE_REF);
  375. }else{ // a += (expr) -> a = a + (expr)
  376. emit(OP_DUP_TOP);
  377. EXPR();
  378. switch (op) {
  379. case TK("+="): emit(OP_BINARY_OP, 0); break;
  380. case TK("-="): emit(OP_BINARY_OP, 1); break;
  381. case TK("*="): emit(OP_BINARY_OP, 2); break;
  382. case TK("/="): emit(OP_BINARY_OP, 3); break;
  383. case TK("//="): emit(OP_BINARY_OP, 4); break;
  384. case TK("%="): emit(OP_BINARY_OP, 5); break;
  385. case TK("&="): emit(OP_BITWISE_OP, 2); break;
  386. case TK("|="): emit(OP_BITWISE_OP, 3); break;
  387. case TK("^="): emit(OP_BITWISE_OP, 4); break;
  388. default: UNREACHABLE();
  389. }
  390. emit(OP_STORE_REF);
  391. }
  392. co()->_rvalue = false;
  393. }
  394. void exprComma() {
  395. int size = 1; // an expr is in the stack now
  396. do {
  397. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  398. size++;
  399. } while(match(TK(",")));
  400. emit(OP_BUILD_SMART_TUPLE, size);
  401. }
  402. void exprOr() {
  403. int patch = emit(OP_JUMP_IF_TRUE_OR_POP);
  404. parse_expression(PREC_LOGICAL_OR);
  405. patch_jump(patch);
  406. }
  407. void exprAnd() {
  408. int patch = emit(OP_JUMP_IF_FALSE_OR_POP);
  409. parse_expression(PREC_LOGICAL_AND);
  410. patch_jump(patch);
  411. }
  412. void exprTernary() {
  413. int patch = emit(OP_POP_JUMP_IF_FALSE);
  414. EXPR(); // if true
  415. int patch2 = emit(OP_JUMP_ABSOLUTE);
  416. consume(TK(":"));
  417. patch_jump(patch);
  418. EXPR(); // if false
  419. patch_jump(patch2);
  420. }
  421. void exprBinaryOp() {
  422. TokenIndex op = parser->prev.type;
  423. parse_expression((Precedence)(rules[op].precedence + 1));
  424. switch (op) {
  425. case TK("+"): emit(OP_BINARY_OP, 0); break;
  426. case TK("-"): emit(OP_BINARY_OP, 1); break;
  427. case TK("*"): emit(OP_BINARY_OP, 2); break;
  428. case TK("/"): emit(OP_BINARY_OP, 3); break;
  429. case TK("//"): emit(OP_BINARY_OP, 4); break;
  430. case TK("%"): emit(OP_BINARY_OP, 5); break;
  431. case TK("**"): emit(OP_BINARY_OP, 6); break;
  432. case TK("<"): emit(OP_COMPARE_OP, 0); break;
  433. case TK("<="): emit(OP_COMPARE_OP, 1); break;
  434. case TK("=="): emit(OP_COMPARE_OP, 2); break;
  435. case TK("!="): emit(OP_COMPARE_OP, 3); break;
  436. case TK(">"): emit(OP_COMPARE_OP, 4); break;
  437. case TK(">="): emit(OP_COMPARE_OP, 5); break;
  438. case TK("in"): emit(OP_CONTAINS_OP, 0); break;
  439. case TK("not in"): emit(OP_CONTAINS_OP, 1); break;
  440. case TK("is"): emit(OP_IS_OP, 0); break;
  441. case TK("is not"): emit(OP_IS_OP, 1); break;
  442. case TK("<<"): emit(OP_BITWISE_OP, 0); break;
  443. case TK(">>"): emit(OP_BITWISE_OP, 1); break;
  444. case TK("&"): emit(OP_BITWISE_OP, 2); break;
  445. case TK("|"): emit(OP_BITWISE_OP, 3); break;
  446. case TK("^"): emit(OP_BITWISE_OP, 4); break;
  447. default: UNREACHABLE();
  448. }
  449. }
  450. void exprUnaryOp() {
  451. TokenIndex op = parser->prev.type;
  452. parse_expression((Precedence)(PREC_UNARY + 1));
  453. switch (op) {
  454. case TK("-"): emit(OP_UNARY_NEGATIVE); break;
  455. case TK("not"): emit(OP_UNARY_NOT); break;
  456. case TK("*"): SyntaxError("cannot use '*' as unary operator"); break;
  457. default: UNREACHABLE();
  458. }
  459. }
  460. void exprGrouping() {
  461. match_newlines(mode()==REPL_MODE);
  462. EXPR_TUPLE();
  463. match_newlines(mode()==REPL_MODE);
  464. consume(TK(")"));
  465. }
  466. void exprList() {
  467. int _patch = emit(OP_NO_OP);
  468. int _body_start = co()->codes.size();
  469. int ARGC = 0;
  470. do {
  471. match_newlines(mode()==REPL_MODE);
  472. if (peek() == TK("]")) break;
  473. EXPR(); ARGC++;
  474. match_newlines(mode()==REPL_MODE);
  475. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  476. } while (match(TK(",")));
  477. match_newlines(mode()==REPL_MODE);
  478. consume(TK("]"));
  479. emit(OP_BUILD_LIST, ARGC);
  480. return;
  481. __LISTCOMP:
  482. int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  483. int _body_end = co()->codes.size();
  484. co()->codes[_patch].op = OP_JUMP_ABSOLUTE;
  485. co()->codes[_patch].arg = _body_end;
  486. emit(OP_BUILD_LIST, 0);
  487. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  488. match_newlines(mode()==REPL_MODE);
  489. int _skipPatch = emit(OP_JUMP_ABSOLUTE);
  490. int _cond_start = co()->codes.size();
  491. int _cond_end_return = -1;
  492. if(match(TK("if"))) {
  493. EXPR_TUPLE();
  494. _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  495. }
  496. patch_jump(_skipPatch);
  497. emit(OP_GET_ITER);
  498. co()->_enter_block(FOR_LOOP);
  499. emit(OP_FOR_ITER);
  500. if(_cond_end_return != -1) { // there is an if condition
  501. emit(OP_JUMP_ABSOLUTE, _cond_start);
  502. patch_jump(_cond_end_return);
  503. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  504. emit(OP_JUMP_ABSOLUTE, _body_start);
  505. patch_jump(_body_end_return);
  506. emit(OP_LIST_APPEND);
  507. patch_jump(ifpatch);
  508. }else{
  509. emit(OP_JUMP_ABSOLUTE, _body_start);
  510. patch_jump(_body_end_return);
  511. emit(OP_LIST_APPEND);
  512. }
  513. emit(OP_LOOP_CONTINUE, -1, true);
  514. co()->_exit_block();
  515. match_newlines(mode()==REPL_MODE);
  516. consume(TK("]"));
  517. }
  518. void exprMap() {
  519. bool parsing_dict = false;
  520. int size = 0;
  521. do {
  522. match_newlines(mode()==REPL_MODE);
  523. if (peek() == TK("}")) break;
  524. EXPR();
  525. if(peek() == TK(":")) parsing_dict = true;
  526. if(parsing_dict){
  527. consume(TK(":"));
  528. EXPR();
  529. }
  530. size++;
  531. match_newlines(mode()==REPL_MODE);
  532. } while (match(TK(",")));
  533. consume(TK("}"));
  534. if(size == 0 || parsing_dict) emit(OP_BUILD_MAP, size);
  535. else emit(OP_BUILD_SET, size);
  536. }
  537. void exprCall() {
  538. int ARGC = 0;
  539. int KWARGC = 0;
  540. do {
  541. match_newlines(mode()==REPL_MODE);
  542. if (peek() == TK(")")) break;
  543. if(peek() == TK("@id") && peek_next() == TK("=")) {
  544. consume(TK("@id"));
  545. const Str& key = parser->prev.str();
  546. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(key)));
  547. consume(TK("="));
  548. co()->_rvalue=true; EXPR(); co()->_rvalue=false;
  549. KWARGC++;
  550. } else{
  551. if(KWARGC > 0) SyntaxError("positional argument follows keyword argument");
  552. co()->_rvalue=true; EXPR(); co()->_rvalue=false;
  553. ARGC++;
  554. }
  555. match_newlines(mode()==REPL_MODE);
  556. } while (match(TK(",")));
  557. consume(TK(")"));
  558. emit(OP_CALL, (KWARGC << 16) | ARGC);
  559. }
  560. void exprName(){ _exprName(false); }
  561. void _exprName(bool force_lvalue) {
  562. Token tkname = parser->prev;
  563. int index = co()->add_name(
  564. tkname.str(),
  565. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  566. );
  567. bool fast_load = !force_lvalue && co()->_rvalue;
  568. emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index);
  569. }
  570. void exprAttrib() {
  571. consume(TK("@id"));
  572. const Str& name = parser->prev.str();
  573. int index = co()->add_name(name, NAME_ATTR);
  574. index = (index<<1) + (int)co()->_rvalue;
  575. emit(OP_BUILD_ATTR, index);
  576. }
  577. // [:], [:b]
  578. // [a], [a:], [a:b]
  579. void exprSubscript() {
  580. if(match(TK(":"))){
  581. emit(OP_LOAD_NONE);
  582. if(match(TK("]"))){
  583. emit(OP_LOAD_NONE);
  584. }else{
  585. EXPR_TUPLE();
  586. consume(TK("]"));
  587. }
  588. emit(OP_BUILD_SLICE);
  589. }else{
  590. EXPR_TUPLE();
  591. if(match(TK(":"))){
  592. if(match(TK("]"))){
  593. emit(OP_LOAD_NONE);
  594. }else{
  595. EXPR_TUPLE();
  596. consume(TK("]"));
  597. }
  598. emit(OP_BUILD_SLICE);
  599. }else{
  600. consume(TK("]"));
  601. }
  602. }
  603. emit(OP_BUILD_INDEX, (int)co()->_rvalue);
  604. }
  605. void exprValue() {
  606. TokenIndex op = parser->prev.type;
  607. switch (op) {
  608. case TK("None"): emit(OP_LOAD_NONE); break;
  609. case TK("True"): emit(OP_LOAD_TRUE); break;
  610. case TK("False"): emit(OP_LOAD_FALSE); break;
  611. case TK("..."): emit(OP_LOAD_ELLIPSIS); break;
  612. default: UNREACHABLE();
  613. }
  614. }
  615. int emit(Opcode opcode, int arg=-1, bool keepline=false) {
  616. int line = parser->prev.line;
  617. co()->codes.push_back(
  618. Bytecode{(uint8_t)opcode, arg, line, (uint16_t)co()->_curr_block_i}
  619. );
  620. int i = co()->codes.size() - 1;
  621. if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line;
  622. return i;
  623. }
  624. inline void patch_jump(int addr_index) {
  625. int target = co()->codes.size();
  626. co()->codes[addr_index].arg = target;
  627. }
  628. void compile_block_body(CompilerAction action=nullptr) {
  629. if(action == nullptr) action = &Compiler::compile_stmt;
  630. consume(TK(":"));
  631. if(peek()!=TK("@eol") && peek()!=TK("@eof")){
  632. (this->*action)(); // inline block
  633. return;
  634. }
  635. if(!match_newlines(mode()==REPL_MODE)){
  636. SyntaxError("expected a new line after ':'");
  637. }
  638. consume(TK("@indent"));
  639. while (peek() != TK("@dedent")) {
  640. match_newlines();
  641. (this->*action)();
  642. match_newlines();
  643. }
  644. consume(TK("@dedent"));
  645. }
  646. Token _compile_import() {
  647. consume(TK("@id"));
  648. Token tkmodule = parser->prev;
  649. int index = co()->add_name(tkmodule.str(), NAME_SPECIAL);
  650. emit(OP_IMPORT_NAME, index);
  651. return tkmodule;
  652. }
  653. // import a as b
  654. void compile_normal_import() {
  655. do {
  656. Token tkmodule = _compile_import();
  657. if (match(TK("as"))) {
  658. consume(TK("@id"));
  659. tkmodule = parser->prev;
  660. }
  661. int index = co()->add_name(tkmodule.str(), NAME_GLOBAL);
  662. emit(OP_STORE_NAME, index);
  663. } while (match(TK(",")));
  664. consume_end_stmt();
  665. }
  666. // from a import b as c, d as e
  667. void compile_from_import() {
  668. Token tkmodule = _compile_import();
  669. consume(TK("import"));
  670. do {
  671. emit(OP_DUP_TOP);
  672. consume(TK("@id"));
  673. Token tkname = parser->prev;
  674. int index = co()->add_name(tkname.str(), NAME_ATTR);
  675. emit(OP_BUILD_ATTR, (index<<1)+1);
  676. if (match(TK("as"))) {
  677. consume(TK("@id"));
  678. tkname = parser->prev;
  679. }
  680. index = co()->add_name(tkname.str(), NAME_GLOBAL);
  681. emit(OP_STORE_NAME, index);
  682. } while (match(TK(",")));
  683. emit(OP_POP_TOP);
  684. consume_end_stmt();
  685. }
  686. void parse_expression(Precedence precedence) {
  687. lex_token();
  688. GrammarFn prefix = rules[parser->prev.type].prefix;
  689. if (prefix == nullptr) SyntaxError(Str("expected an expression, but got ") + TK_STR(parser->prev.type));
  690. (this->*prefix)();
  691. while (rules[peek()].precedence >= precedence) {
  692. lex_token();
  693. TokenIndex op = parser->prev.type;
  694. GrammarFn infix = rules[op].infix;
  695. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  696. (this->*infix)();
  697. }
  698. }
  699. void compile_if_stmt() {
  700. match_newlines();
  701. co()->_rvalue = true;
  702. EXPR_TUPLE(); // condition
  703. co()->_rvalue = false;
  704. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  705. compile_block_body();
  706. if (match(TK("elif"))) {
  707. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  708. patch_jump(ifpatch);
  709. compile_if_stmt();
  710. patch_jump(exit_jump);
  711. } else if (match(TK("else"))) {
  712. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  713. patch_jump(ifpatch);
  714. compile_block_body();
  715. patch_jump(exit_jump);
  716. } else {
  717. patch_jump(ifpatch);
  718. }
  719. }
  720. void compile_while_loop() {
  721. co()->_enter_block(WHILE_LOOP);
  722. co()->_rvalue = true;
  723. EXPR_TUPLE(); // condition
  724. co()->_rvalue = false;
  725. int patch = emit(OP_POP_JUMP_IF_FALSE);
  726. compile_block_body();
  727. emit(OP_LOOP_CONTINUE, -1, true);
  728. patch_jump(patch);
  729. co()->_exit_block();
  730. }
  731. void EXPR_FOR_VARS(){
  732. int size = 0;
  733. do {
  734. consume(TK("@id"));
  735. _exprName(true); size++;
  736. } while (match(TK(",")));
  737. if(size > 1) emit(OP_BUILD_SMART_TUPLE, size);
  738. }
  739. void compile_for_loop() {
  740. EXPR_FOR_VARS();consume(TK("in"));
  741. co()->_rvalue = true; EXPR_TUPLE(); co()->_rvalue = false;
  742. emit(OP_GET_ITER);
  743. co()->_enter_block(FOR_LOOP);
  744. emit(OP_FOR_ITER);
  745. compile_block_body();
  746. emit(OP_LOOP_CONTINUE, -1, true);
  747. co()->_exit_block();
  748. }
  749. void compile_try_except() {
  750. co()->_enter_block(TRY_EXCEPT);
  751. emit(OP_TRY_BLOCK_ENTER);
  752. compile_block_body();
  753. emit(OP_TRY_BLOCK_EXIT);
  754. std::vector<int> patches = { emit(OP_JUMP_ABSOLUTE) };
  755. co()->_exit_block();
  756. do {
  757. consume(TK("except"));
  758. if(match(TK("@id"))){
  759. int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL);
  760. emit(OP_EXCEPTION_MATCH, name_idx);
  761. }else{
  762. emit(OP_LOAD_TRUE);
  763. }
  764. int patch = emit(OP_POP_JUMP_IF_FALSE);
  765. emit(OP_POP_TOP); // pop the exception on match
  766. compile_block_body();
  767. patches.push_back(emit(OP_JUMP_ABSOLUTE));
  768. patch_jump(patch);
  769. }while(peek() == TK("except"));
  770. emit(OP_RE_RAISE); // no match, re-raise
  771. for (int patch : patches) patch_jump(patch);
  772. }
  773. void compile_stmt() {
  774. if (match(TK("break"))) {
  775. if (!co()->_is_curr_block_loop()) SyntaxError("'break' outside loop");
  776. consume_end_stmt();
  777. emit(OP_LOOP_BREAK);
  778. } else if (match(TK("continue"))) {
  779. if (!co()->_is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  780. consume_end_stmt();
  781. emit(OP_LOOP_CONTINUE);
  782. } else if (match(TK("yield"))) {
  783. if (codes.size() == 1) SyntaxError("'yield' outside function");
  784. co()->_rvalue = true;
  785. EXPR_TUPLE();
  786. co()->_rvalue = false;
  787. consume_end_stmt();
  788. co()->is_generator = true;
  789. emit(OP_YIELD_VALUE, -1, true);
  790. } else if (match(TK("return"))) {
  791. if (codes.size() == 1) SyntaxError("'return' outside function");
  792. if(match_end_stmt()){
  793. emit(OP_LOAD_NONE);
  794. }else{
  795. co()->_rvalue = true;
  796. EXPR_TUPLE(); // return value
  797. co()->_rvalue = false;
  798. consume_end_stmt();
  799. }
  800. emit(OP_RETURN_VALUE, -1, true);
  801. } else if (match(TK("if"))) {
  802. compile_if_stmt();
  803. } else if (match(TK("while"))) {
  804. compile_while_loop();
  805. } else if (match(TK("for"))) {
  806. compile_for_loop();
  807. } else if (match(TK("try"))) {
  808. compile_try_except();
  809. }else if(match(TK("assert"))){
  810. EXPR();
  811. if (match(TK(","))) EXPR();
  812. else emit(OP_LOAD_CONST, co()->add_const(vm->PyStr("")));
  813. emit(OP_ASSERT);
  814. consume_end_stmt();
  815. } else if(match(TK("with"))){
  816. EXPR();
  817. consume(TK("as"));
  818. consume(TK("@id"));
  819. Token tkname = parser->prev;
  820. int index = co()->add_name(
  821. tkname.str(),
  822. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  823. );
  824. emit(OP_STORE_NAME, index);
  825. emit(OP_LOAD_NAME_REF, index);
  826. emit(OP_WITH_ENTER);
  827. compile_block_body();
  828. emit(OP_LOAD_NAME_REF, index);
  829. emit(OP_WITH_EXIT);
  830. } else if(match(TK("label"))){
  831. if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  832. consume(TK(".")); consume(TK("@id"));
  833. Str label = parser->prev.str();
  834. bool ok = co()->add_label(label);
  835. if(!ok) SyntaxError("label '" + label + "' already exists");
  836. consume_end_stmt();
  837. } else if(match(TK("goto"))){ // https://entrian.com/goto/
  838. if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  839. consume(TK(".")); consume(TK("@id"));
  840. emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL));
  841. consume_end_stmt();
  842. } else if(match(TK("raise"))){
  843. consume(TK("@id"));
  844. int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL);
  845. if(match(TK("(")) && !match(TK(")"))){
  846. EXPR(); consume(TK(")"));
  847. }else{
  848. emit(OP_LOAD_NONE);
  849. }
  850. emit(OP_RAISE, dummy_t);
  851. consume_end_stmt();
  852. } else if(match(TK("del"))){
  853. EXPR();
  854. emit(OP_DELETE_REF);
  855. consume_end_stmt();
  856. } else if(match(TK("global"))){
  857. do {
  858. consume(TK("@id"));
  859. co()->global_names[parser->prev.str()] = 1;
  860. } while (match(TK(",")));
  861. consume_end_stmt();
  862. } else if(match(TK("pass"))){
  863. consume_end_stmt();
  864. } else {
  865. EXPR_ANY();
  866. consume_end_stmt();
  867. // If last op is not an assignment, pop the result.
  868. uint8_t last_op = co()->codes.back().op;
  869. if( last_op!=OP_STORE_NAME && last_op!=OP_STORE_REF){
  870. if(mode()==REPL_MODE && parser->indents.top()==0) emit(OP_PRINT_EXPR, -1, true);
  871. emit(OP_POP_TOP, -1, true);
  872. }
  873. }
  874. }
  875. void compile_class(){
  876. consume(TK("@id"));
  877. int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  878. int super_cls_name_idx = -1;
  879. if(match(TK("(")) && match(TK("@id"))){
  880. super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  881. consume(TK(")"));
  882. }
  883. emit(OP_LOAD_NONE);
  884. is_compiling_class = true;
  885. compile_block_body(&Compiler::compile_function);
  886. is_compiling_class = false;
  887. if(super_cls_name_idx == -1) emit(OP_LOAD_NONE);
  888. else emit(OP_LOAD_NAME_REF, super_cls_name_idx);
  889. emit(OP_BUILD_CLASS, cls_name_idx);
  890. }
  891. void _compile_f_args(pkpy::Function_ func, bool enable_type_hints){
  892. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  893. do {
  894. if(state == 3) SyntaxError("**kwargs should be the last argument");
  895. match_newlines();
  896. if(match(TK("*"))){
  897. if(state < 1) state = 1;
  898. else SyntaxError("*args should be placed before **kwargs");
  899. }
  900. else if(match(TK("**"))){
  901. state = 3;
  902. }
  903. consume(TK("@id"));
  904. const Str& name = parser->prev.str();
  905. if(func->hasName(name)) SyntaxError("duplicate argument name");
  906. // eat type hints
  907. if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
  908. if(state == 0 && peek() == TK("=")) state = 2;
  909. switch (state)
  910. {
  911. case 0: func->args.push_back(name); break;
  912. case 1: func->starredArg = name; state+=1; break;
  913. case 2: {
  914. consume(TK("="));
  915. PyVarOrNull value = read_literal();
  916. if(value == nullptr){
  917. SyntaxError(Str("expect a literal, not ") + TK_STR(parser->curr.type));
  918. }
  919. func->kwArgs[name] = value;
  920. func->kwArgsOrder.push_back(name);
  921. } break;
  922. case 3: SyntaxError("**kwargs is not supported yet"); break;
  923. }
  924. } while (match(TK(",")));
  925. }
  926. void compile_function(){
  927. if(is_compiling_class){
  928. if(match(TK("pass"))) return;
  929. consume(TK("def"));
  930. }
  931. pkpy::Function_ func = pkpy::make_shared<pkpy::Function>();
  932. consume(TK("@id"));
  933. func->name = parser->prev.str();
  934. consume(TK("("));
  935. if (!match(TK(")"))) {
  936. _compile_f_args(func, true);
  937. consume(TK(")"));
  938. }
  939. // eat type hints
  940. if(match(TK("->"))) consume(TK("@id"));
  941. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  942. this->codes.push(func->code);
  943. compile_block_body();
  944. func->code->optimize(vm);
  945. this->codes.pop();
  946. emit(OP_LOAD_CONST, co()->add_const(vm->PyFunction(func)));
  947. if(!is_compiling_class) emit(OP_STORE_FUNCTION);
  948. }
  949. PyVarOrNull read_literal(){
  950. if(match(TK("-"))){
  951. consume(TK("@num"));
  952. PyVar val = parser->prev.value;
  953. return vm->num_negated(val);
  954. }
  955. if(match(TK("@num"))) return parser->prev.value;
  956. if(match(TK("@str"))) return parser->prev.value;
  957. if(match(TK("True"))) return vm->PyBool(true);
  958. if(match(TK("False"))) return vm->PyBool(false);
  959. if(match(TK("None"))) return vm->None;
  960. if(match(TK("..."))) return vm->Ellipsis;
  961. return nullptr;
  962. }
  963. /***** Error Reporter *****/
  964. void throw_err(Str type, Str msg){
  965. int lineno = parser->curr.line;
  966. const char* cursor = parser->curr.start;
  967. // if error occurs in lexing, lineno should be `parser->current_line`
  968. if(lexing_count > 0){
  969. lineno = parser->current_line;
  970. cursor = parser->curr_char;
  971. }
  972. if(parser->peekchar() == '\n') lineno--;
  973. auto e = pkpy::Exception("SyntaxError", msg);
  974. e.st_push(parser->src->snapshot(lineno, cursor));
  975. throw e;
  976. }
  977. void SyntaxError(Str msg){ throw_err("SyntaxError", msg); }
  978. void IndentationError(Str msg){ throw_err("IndentationError", msg); }
  979. public:
  980. CodeObject_ compile(){
  981. // can only be called once
  982. if(used) UNREACHABLE();
  983. used = true;
  984. CodeObject_ code = pkpy::make_shared<CodeObject>(parser->src, Str("<module>"));
  985. codes.push(code);
  986. lex_token(); lex_token();
  987. match_newlines();
  988. if(mode()==EVAL_MODE) {
  989. EXPR_TUPLE();
  990. consume(TK("@eof"));
  991. code->optimize(vm);
  992. return code;
  993. }else if(mode()==JSON_MODE){
  994. PyVarOrNull value = read_literal();
  995. if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value));
  996. else if(match(TK("{"))) exprMap();
  997. else if(match(TK("["))) exprList();
  998. else SyntaxError("expect a JSON object or array");
  999. consume(TK("@eof"));
  1000. return code; // no need to optimize for JSON decoding
  1001. }
  1002. while (!match(TK("@eof"))) {
  1003. // compile top-level statement
  1004. if (match(TK("class"))) {
  1005. compile_class();
  1006. } else if (match(TK("def"))) {
  1007. compile_function();
  1008. } else if (match(TK("import"))) {
  1009. compile_normal_import();
  1010. } else if (match(TK("from"))) {
  1011. compile_from_import();
  1012. } else {
  1013. compile_stmt();
  1014. }
  1015. match_newlines();
  1016. }
  1017. code->optimize(vm);
  1018. return code;
  1019. }
  1020. };