compiler.h 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. struct Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
  14. struct Compiler {
  15. std::unique_ptr<Parser> parser;
  16. std::stack<_Code> codes;
  17. bool isCompilingClass = false;
  18. int lexingCnt = 0;
  19. VM* vm;
  20. emhash8::HashMap<_TokenType, GrammarRule> rules;
  21. _Code co() const{ return codes.top(); }
  22. CompileMode mode() const{ return parser->src->mode;}
  23. Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
  24. this->vm = vm;
  25. this->parser = std::make_unique<Parser>(
  26. pkpy::make_shared<SourceData>(source, filename, mode)
  27. );
  28. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  29. #define METHOD(name) &Compiler::name
  30. #define NO_INFIX nullptr, PREC_NONE
  31. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  32. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  33. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  34. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  35. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  36. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  37. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  38. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  39. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  40. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  41. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  42. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  43. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  44. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  45. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  46. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  47. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  48. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  49. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  50. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  51. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  52. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  53. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  54. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  55. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  56. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  57. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  58. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  59. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  60. rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
  61. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  62. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  63. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  64. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  65. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  66. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  67. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  68. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  69. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  70. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  71. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  72. rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  73. rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  74. rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  75. rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  76. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  77. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  78. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  79. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  80. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  81. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  82. #undef METHOD
  83. #undef NO_INFIX
  84. #define EXPR() parsePrecedence(PREC_TERNARY) // no '=' and ',' just a simple expression
  85. #define EXPR_TUPLE() parsePrecedence(PREC_COMMA) // no '=', but ',' is allowed
  86. #define EXPR_ANY() parsePrecedence(PREC_ASSIGNMENT)
  87. }
  88. _Str eatStringUntil(char quote, bool raw) {
  89. bool quote3 = parser->match_n_chars(2, quote);
  90. std::vector<char> buff;
  91. while (true) {
  92. char c = parser->eatchar_include_newline();
  93. if (c == quote){
  94. if(quote3 && !parser->match_n_chars(2, quote)){
  95. buff.push_back(c);
  96. continue;
  97. }
  98. break;
  99. }
  100. if (c == '\0'){
  101. if(quote3 && parser->src->mode == SINGLE_MODE){
  102. throw NeedMoreLines(false);
  103. }
  104. syntaxError("EOL while scanning string literal");
  105. }
  106. if (c == '\n'){
  107. if(!quote3) syntaxError("EOL while scanning string literal");
  108. else{
  109. buff.push_back(c);
  110. continue;
  111. }
  112. }
  113. if (!raw && c == '\\') {
  114. switch (parser->eatchar_include_newline()) {
  115. case '"': buff.push_back('"'); break;
  116. case '\'': buff.push_back('\''); break;
  117. case '\\': buff.push_back('\\'); break;
  118. case 'n': buff.push_back('\n'); break;
  119. case 'r': buff.push_back('\r'); break;
  120. case 't': buff.push_back('\t'); break;
  121. default: syntaxError("invalid escape character");
  122. }
  123. } else {
  124. buff.push_back(c);
  125. }
  126. }
  127. return _Str(buff.data(), buff.size());
  128. }
  129. void eatString(char quote, StringType type) {
  130. _Str s = eatStringUntil(quote, type == RAW_STRING);
  131. if(type == F_STRING){
  132. parser->set_next_token(TK("@fstr"), vm->PyStr(s));
  133. }else{
  134. parser->set_next_token(TK("@str"), vm->PyStr(s));
  135. }
  136. }
  137. void eatNumber() {
  138. static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
  139. std::smatch m;
  140. const char* i = parser->token_start;
  141. while(*i != '\n' && *i != '\0') i++;
  142. std::string s = std::string(parser->token_start, i);
  143. try{
  144. if (std::regex_search(s, m, pattern)) {
  145. // here is m.length()-1, since the first char was eaten by lexToken()
  146. for(int j=0; j<m.length()-1; j++) parser->eatchar();
  147. int base = 10;
  148. size_t size;
  149. if (m[1].matched) base = 16;
  150. if (m[2].matched) {
  151. if(base == 16) syntaxError("hex literal should not contain a dot");
  152. parser->set_next_token(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  153. } else {
  154. parser->set_next_token(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  155. }
  156. if (size != m.length()) UNREACHABLE();
  157. }
  158. }catch(std::exception& _){
  159. syntaxError("invalid number literal");
  160. }
  161. }
  162. void lexToken(){
  163. lexingCnt++;
  164. _lexToken();
  165. lexingCnt--;
  166. }
  167. // Lex the next token and set it as the next token.
  168. void _lexToken() {
  169. parser->prev = parser->curr;
  170. parser->curr = parser->next_token();
  171. //_Str _info = parser->curr.info(); std::cout << _info << '[' << parser->current_line << ']' << std::endl;
  172. while (parser->peekchar() != '\0') {
  173. parser->token_start = parser->curr_char;
  174. char c = parser->eatchar_include_newline();
  175. switch (c) {
  176. case '\'': case '"': eatString(c, NORMAL_STRING); return;
  177. case '#': parser->skip_line_comment(); break;
  178. case '{': parser->set_next_token(TK("{")); return;
  179. case '}': parser->set_next_token(TK("}")); return;
  180. case ',': parser->set_next_token(TK(",")); return;
  181. case ':': parser->set_next_token(TK(":")); return;
  182. case ';': parser->set_next_token(TK(";")); return;
  183. case '(': parser->set_next_token(TK("(")); return;
  184. case ')': parser->set_next_token(TK(")")); return;
  185. case '[': parser->set_next_token(TK("[")); return;
  186. case ']': parser->set_next_token(TK("]")); return;
  187. case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return;
  188. case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return;
  189. case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return;
  190. case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return;
  191. case '?': parser->set_next_token(TK("?")); return;
  192. case '.': {
  193. if(parser->matchchar('.')) {
  194. if(parser->matchchar('.')) {
  195. parser->set_next_token(TK("..."));
  196. } else {
  197. syntaxError("invalid token '..'");
  198. }
  199. } else {
  200. parser->set_next_token(TK("."));
  201. }
  202. return;
  203. }
  204. case '=': parser->set_next_token_2('=', TK("="), TK("==")); return;
  205. case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return;
  206. case '>': {
  207. if(parser->matchchar('=')) parser->set_next_token(TK(">="));
  208. else if(parser->matchchar('>')) parser->set_next_token(TK(">>"));
  209. else parser->set_next_token(TK(">"));
  210. return;
  211. }
  212. case '<': {
  213. if(parser->matchchar('=')) parser->set_next_token(TK("<="));
  214. else if(parser->matchchar('<')) parser->set_next_token(TK("<<"));
  215. else parser->set_next_token(TK("<"));
  216. return;
  217. }
  218. case '-': {
  219. if(parser->matchchar('=')) parser->set_next_token(TK("-="));
  220. else if(parser->matchchar('>')) parser->set_next_token(TK("->"));
  221. else parser->set_next_token(TK("-"));
  222. return;
  223. }
  224. case '!':
  225. if(parser->matchchar('=')) parser->set_next_token(TK("!="));
  226. else syntaxError("expected '=' after '!'");
  227. break;
  228. case '*':
  229. if (parser->matchchar('*')) {
  230. parser->set_next_token(TK("**")); // '**'
  231. } else {
  232. parser->set_next_token_2('=', TK("*"), TK("*="));
  233. }
  234. return;
  235. case '/':
  236. if(parser->matchchar('/')) {
  237. parser->set_next_token_2('=', TK("//"), TK("//="));
  238. } else {
  239. parser->set_next_token_2('=', TK("/"), TK("/="));
  240. }
  241. return;
  242. case '\r': break; // just ignore '\r'
  243. case ' ': case '\t': parser->eat_spaces(); break;
  244. case '\n': {
  245. parser->set_next_token(TK("@eol"));
  246. if(!parser->eat_indentation()) indentationError("unindent does not match any outer indentation level");
  247. return;
  248. }
  249. default: {
  250. if(c == 'f'){
  251. if(parser->matchchar('\'')) {eatString('\'', F_STRING); return;}
  252. if(parser->matchchar('"')) {eatString('"', F_STRING); return;}
  253. }else if(c == 'r'){
  254. if(parser->matchchar('\'')) {eatString('\'', RAW_STRING); return;}
  255. if(parser->matchchar('"')) {eatString('"', RAW_STRING); return;}
  256. }
  257. if (c >= '0' && c <= '9') {
  258. eatNumber();
  259. return;
  260. }
  261. switch (parser->eat_name())
  262. {
  263. case 0: break;
  264. case 1: syntaxError("invalid char: " + std::string(1, c));
  265. case 2: syntaxError("invalid utf8 sequence: " + std::string(1, c));
  266. case 3: syntaxError("@id contains invalid char"); break;
  267. case 4: syntaxError("invalid JSON token"); break;
  268. default: UNREACHABLE();
  269. }
  270. return;
  271. }
  272. }
  273. }
  274. parser->token_start = parser->curr_char;
  275. parser->set_next_token(TK("@eof"));
  276. }
  277. inline _TokenType peek() {
  278. return parser->curr.type;
  279. }
  280. // not sure this will work
  281. _TokenType peek_next() {
  282. if(parser->nexts.empty()) return TK("@eof");
  283. return parser->nexts.front().type;
  284. }
  285. bool match(_TokenType expected) {
  286. if (peek() != expected) return false;
  287. lexToken();
  288. return true;
  289. }
  290. void consume(_TokenType expected) {
  291. if (!match(expected)){
  292. _StrStream ss;
  293. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  294. syntaxError(ss.str());
  295. }
  296. }
  297. bool match_newlines(bool repl_throw=false) {
  298. bool consumed = false;
  299. if (peek() == TK("@eol")) {
  300. while (peek() == TK("@eol")) lexToken();
  301. consumed = true;
  302. }
  303. if (repl_throw && peek() == TK("@eof")){
  304. throw NeedMoreLines(isCompilingClass);
  305. }
  306. return consumed;
  307. }
  308. bool matchEndStatement() {
  309. if (match(TK(";"))) { match_newlines(); return true; }
  310. if (match_newlines() || peek()==TK("@eof")) return true;
  311. if (peek() == TK("@dedent")) return true;
  312. return false;
  313. }
  314. void consumeEndStatement() {
  315. if (!matchEndStatement()) syntaxError("expected statement end");
  316. }
  317. void exprLiteral() {
  318. PyVar value = parser->prev.value;
  319. int index = co()->add_const(value);
  320. emit(OP_LOAD_CONST, index);
  321. }
  322. void exprFString() {
  323. static const std::regex pattern(R"(\{(.*?)\})");
  324. PyVar value = parser->prev.value;
  325. _Str s = vm->PyStr_AS_C(value);
  326. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  327. std::sregex_iterator end;
  328. int size = 0;
  329. int i = 0;
  330. for(auto it = begin; it != end; it++) {
  331. std::smatch m = *it;
  332. if (i < m.position()) {
  333. std::string literal = s.substr(i, m.position() - i);
  334. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  335. size++;
  336. }
  337. emit(OP_LOAD_EVAL_FN);
  338. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(m[1].str())));
  339. emit(OP_CALL, 1);
  340. size++;
  341. i = (int)(m.position() + m.length());
  342. }
  343. if (i < s.size()) {
  344. std::string literal = s.substr(i, s.size() - i);
  345. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  346. size++;
  347. }
  348. emit(OP_BUILD_STRING, size);
  349. }
  350. void exprLambda() {
  351. _Func func = pkpy::make_shared<Function>();
  352. func->name = "<lambda>";
  353. if(!match(TK(":"))){
  354. __compileFunctionArgs(func, false);
  355. consume(TK(":"));
  356. }
  357. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  358. this->codes.push(func->code);
  359. EXPR_TUPLE();
  360. emit(OP_RETURN_VALUE);
  361. func->code->optimize();
  362. this->codes.pop();
  363. emit(OP_LOAD_LAMBDA, co()->add_const(vm->PyFunction(func)));
  364. }
  365. void exprAssign() {
  366. _TokenType op = parser->prev.type;
  367. if(op == TK("=")) { // a = (expr)
  368. EXPR_TUPLE();
  369. emit(OP_STORE_REF);
  370. }else{ // a += (expr) -> a = a + (expr)
  371. emit(OP_DUP_TOP);
  372. EXPR();
  373. switch (op) {
  374. case TK("+="): emit(OP_BINARY_OP, 0); break;
  375. case TK("-="): emit(OP_BINARY_OP, 1); break;
  376. case TK("*="): emit(OP_BINARY_OP, 2); break;
  377. case TK("/="): emit(OP_BINARY_OP, 3); break;
  378. case TK("//="): emit(OP_BINARY_OP, 4); break;
  379. case TK("%="): emit(OP_BINARY_OP, 5); break;
  380. case TK("&="): emit(OP_BITWISE_OP, 2); break;
  381. case TK("|="): emit(OP_BITWISE_OP, 3); break;
  382. case TK("^="): emit(OP_BITWISE_OP, 4); break;
  383. default: UNREACHABLE();
  384. }
  385. emit(OP_STORE_REF);
  386. }
  387. }
  388. void exprComma() {
  389. int size = 1; // an expr is in the stack now
  390. do {
  391. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  392. size++;
  393. } while(match(TK(",")));
  394. emit(OP_BUILD_SMART_TUPLE, size);
  395. }
  396. void exprOr() {
  397. int patch = emit(OP_JUMP_IF_TRUE_OR_POP);
  398. parsePrecedence(PREC_LOGICAL_OR);
  399. patch_jump(patch);
  400. }
  401. void exprAnd() {
  402. int patch = emit(OP_JUMP_IF_FALSE_OR_POP);
  403. parsePrecedence(PREC_LOGICAL_AND);
  404. patch_jump(patch);
  405. }
  406. void exprTernary() {
  407. int patch = emit(OP_POP_JUMP_IF_FALSE);
  408. EXPR(); // if true
  409. int patch2 = emit(OP_JUMP_ABSOLUTE);
  410. consume(TK(":"));
  411. patch_jump(patch);
  412. EXPR(); // if false
  413. patch_jump(patch2);
  414. }
  415. void exprBinaryOp() {
  416. _TokenType op = parser->prev.type;
  417. parsePrecedence((Precedence)(rules[op].precedence + 1));
  418. switch (op) {
  419. case TK("+"): emit(OP_BINARY_OP, 0); break;
  420. case TK("-"): emit(OP_BINARY_OP, 1); break;
  421. case TK("*"): emit(OP_BINARY_OP, 2); break;
  422. case TK("/"): emit(OP_BINARY_OP, 3); break;
  423. case TK("//"): emit(OP_BINARY_OP, 4); break;
  424. case TK("%"): emit(OP_BINARY_OP, 5); break;
  425. case TK("**"): emit(OP_BINARY_OP, 6); break;
  426. case TK("<"): emit(OP_COMPARE_OP, 0); break;
  427. case TK("<="): emit(OP_COMPARE_OP, 1); break;
  428. case TK("=="): emit(OP_COMPARE_OP, 2); break;
  429. case TK("!="): emit(OP_COMPARE_OP, 3); break;
  430. case TK(">"): emit(OP_COMPARE_OP, 4); break;
  431. case TK(">="): emit(OP_COMPARE_OP, 5); break;
  432. case TK("in"): emit(OP_CONTAINS_OP, 0); break;
  433. case TK("not in"): emit(OP_CONTAINS_OP, 1); break;
  434. case TK("is"): emit(OP_IS_OP, 0); break;
  435. case TK("is not"): emit(OP_IS_OP, 1); break;
  436. case TK("<<"): emit(OP_BITWISE_OP, 0); break;
  437. case TK(">>"): emit(OP_BITWISE_OP, 1); break;
  438. case TK("&"): emit(OP_BITWISE_OP, 2); break;
  439. case TK("|"): emit(OP_BITWISE_OP, 3); break;
  440. case TK("^"): emit(OP_BITWISE_OP, 4); break;
  441. default: UNREACHABLE();
  442. }
  443. }
  444. void exprUnaryOp() {
  445. _TokenType op = parser->prev.type;
  446. parsePrecedence((Precedence)(PREC_UNARY + 1));
  447. switch (op) {
  448. case TK("-"): emit(OP_UNARY_NEGATIVE); break;
  449. case TK("not"): emit(OP_UNARY_NOT); break;
  450. case TK("*"): syntaxError("cannot use '*' as unary operator"); break;
  451. default: UNREACHABLE();
  452. }
  453. }
  454. void exprGrouping() {
  455. match_newlines(mode()==SINGLE_MODE);
  456. EXPR_TUPLE();
  457. match_newlines(mode()==SINGLE_MODE);
  458. consume(TK(")"));
  459. }
  460. void exprList() {
  461. int _patch = emit(OP_NO_OP);
  462. int _body_start = co()->co_code.size();
  463. int ARGC = 0;
  464. do {
  465. match_newlines(mode()==SINGLE_MODE);
  466. if (peek() == TK("]")) break;
  467. EXPR(); ARGC++;
  468. match_newlines(mode()==SINGLE_MODE);
  469. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  470. } while (match(TK(",")));
  471. match_newlines(mode()==SINGLE_MODE);
  472. consume(TK("]"));
  473. emit(OP_BUILD_LIST, ARGC);
  474. return;
  475. __LISTCOMP:
  476. int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  477. int _body_end = co()->co_code.size();
  478. co()->co_code[_patch].op = OP_JUMP_ABSOLUTE;
  479. co()->co_code[_patch].arg = _body_end;
  480. emit(OP_BUILD_LIST, 0);
  481. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  482. match_newlines(mode()==SINGLE_MODE);
  483. int _skipPatch = emit(OP_JUMP_ABSOLUTE);
  484. int _cond_start = co()->co_code.size();
  485. int _cond_end_return = -1;
  486. if(match(TK("if"))) {
  487. EXPR_TUPLE();
  488. _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  489. }
  490. patch_jump(_skipPatch);
  491. emit(OP_GET_ITER);
  492. co()->__enter_block(FOR_LOOP);
  493. emit(OP_FOR_ITER);
  494. if(_cond_end_return != -1) { // there is an if condition
  495. emit(OP_JUMP_ABSOLUTE, _cond_start);
  496. patch_jump(_cond_end_return);
  497. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  498. emit(OP_JUMP_ABSOLUTE, _body_start);
  499. patch_jump(_body_end_return);
  500. emit(OP_LIST_APPEND);
  501. patch_jump(ifpatch);
  502. }else{
  503. emit(OP_JUMP_ABSOLUTE, _body_start);
  504. patch_jump(_body_end_return);
  505. emit(OP_LIST_APPEND);
  506. }
  507. emit(OP_LOOP_CONTINUE, -1, true);
  508. co()->__exit_block();
  509. match_newlines(mode()==SINGLE_MODE);
  510. consume(TK("]"));
  511. }
  512. void exprMap() {
  513. bool parsing_dict = false;
  514. int size = 0;
  515. do {
  516. match_newlines(mode()==SINGLE_MODE);
  517. if (peek() == TK("}")) break;
  518. EXPR();
  519. if(peek() == TK(":")) parsing_dict = true;
  520. if(parsing_dict){
  521. consume(TK(":"));
  522. EXPR();
  523. }
  524. size++;
  525. match_newlines(mode()==SINGLE_MODE);
  526. } while (match(TK(",")));
  527. consume(TK("}"));
  528. if(size == 0 || parsing_dict) emit(OP_BUILD_MAP, size);
  529. else emit(OP_BUILD_SET, size);
  530. }
  531. void exprCall() {
  532. int ARGC = 0;
  533. int KWARGC = 0;
  534. do {
  535. match_newlines(mode()==SINGLE_MODE);
  536. if (peek() == TK(")")) break;
  537. if(peek() == TK("@id") && peek_next() == TK("=")) {
  538. consume(TK("@id"));
  539. const _Str& key = parser->prev.str();
  540. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(key)));
  541. consume(TK("="));
  542. EXPR();
  543. KWARGC++;
  544. } else{
  545. if(KWARGC > 0) syntaxError("positional argument follows keyword argument");
  546. EXPR();
  547. ARGC++;
  548. }
  549. match_newlines(mode()==SINGLE_MODE);
  550. } while (match(TK(",")));
  551. consume(TK(")"));
  552. emit(OP_CALL, (KWARGC << 16) | ARGC);
  553. }
  554. void exprName() {
  555. Token tkname = parser->prev;
  556. int index = co()->add_name(
  557. tkname.str(),
  558. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  559. );
  560. emit(OP_LOAD_NAME_REF, index);
  561. }
  562. void exprAttrib() {
  563. consume(TK("@id"));
  564. const _Str& name = parser->prev.str();
  565. int index = co()->add_name(name, NAME_ATTR);
  566. emit(OP_BUILD_ATTR_REF, index);
  567. }
  568. // [:], [:b]
  569. // [a], [a:], [a:b]
  570. void exprSubscript() {
  571. if(match(TK(":"))){
  572. emit(OP_LOAD_NONE);
  573. if(match(TK("]"))){
  574. emit(OP_LOAD_NONE);
  575. }else{
  576. EXPR_TUPLE();
  577. consume(TK("]"));
  578. }
  579. emit(OP_BUILD_SLICE);
  580. }else{
  581. EXPR_TUPLE();
  582. if(match(TK(":"))){
  583. if(match(TK("]"))){
  584. emit(OP_LOAD_NONE);
  585. }else{
  586. EXPR_TUPLE();
  587. consume(TK("]"));
  588. }
  589. emit(OP_BUILD_SLICE);
  590. }else{
  591. consume(TK("]"));
  592. }
  593. }
  594. emit(OP_BUILD_INDEX_REF);
  595. }
  596. void exprValue() {
  597. _TokenType op = parser->prev.type;
  598. switch (op) {
  599. case TK("None"): emit(OP_LOAD_NONE); break;
  600. case TK("True"): emit(OP_LOAD_TRUE); break;
  601. case TK("False"): emit(OP_LOAD_FALSE); break;
  602. case TK("..."): emit(OP_LOAD_ELLIPSIS); break;
  603. default: UNREACHABLE();
  604. }
  605. }
  606. int emit(Opcode opcode, int arg=-1, bool keepline=false) {
  607. int line = parser->prev.line;
  608. co()->co_code.push_back(
  609. Bytecode{(uint8_t)opcode, arg, line, (uint16_t)co()->_curr_block_i}
  610. );
  611. int i = co()->co_code.size() - 1;
  612. if(keepline && i>=1) co()->co_code[i].line = co()->co_code[i-1].line;
  613. return i;
  614. }
  615. inline void patch_jump(int addr_index) {
  616. int target = co()->co_code.size();
  617. co()->co_code[addr_index].arg = target;
  618. }
  619. void compileBlockBody(){
  620. __compileBlockBody(&Compiler::compileStatement);
  621. }
  622. void __compileBlockBody(CompilerAction action) {
  623. consume(TK(":"));
  624. if(!match_newlines(mode()==SINGLE_MODE)){
  625. syntaxError("expected a new line after ':'");
  626. }
  627. consume(TK("@indent"));
  628. while (peek() != TK("@dedent")) {
  629. match_newlines();
  630. (this->*action)();
  631. match_newlines();
  632. }
  633. consume(TK("@dedent"));
  634. }
  635. Token compileImportPath() {
  636. consume(TK("@id"));
  637. Token tkmodule = parser->prev;
  638. int index = co()->add_name(tkmodule.str(), NAME_SPECIAL);
  639. emit(OP_IMPORT_NAME, index);
  640. return tkmodule;
  641. }
  642. // import a as b
  643. void compileRegularImport() {
  644. do {
  645. Token tkmodule = compileImportPath();
  646. if (match(TK("as"))) {
  647. consume(TK("@id"));
  648. tkmodule = parser->prev;
  649. }
  650. int index = co()->add_name(tkmodule.str(), NAME_GLOBAL);
  651. emit(OP_STORE_NAME, index);
  652. } while (match(TK(",")));
  653. consumeEndStatement();
  654. }
  655. // from a import b as c, d as e
  656. void compileFromImport() {
  657. Token tkmodule = compileImportPath();
  658. consume(TK("import"));
  659. do {
  660. emit(OP_DUP_TOP);
  661. consume(TK("@id"));
  662. Token tkname = parser->prev;
  663. int index = co()->add_name(tkname.str(), NAME_ATTR);
  664. emit(OP_BUILD_ATTR_REF, index);
  665. if (match(TK("as"))) {
  666. consume(TK("@id"));
  667. tkname = parser->prev;
  668. }
  669. index = co()->add_name(tkname.str(), NAME_GLOBAL);
  670. emit(OP_STORE_NAME, index);
  671. } while (match(TK(",")));
  672. emit(OP_POP_TOP);
  673. consumeEndStatement();
  674. }
  675. void parsePrecedence(Precedence precedence) {
  676. lexToken();
  677. GrammarFn prefix = rules[parser->prev.type].prefix;
  678. if (prefix == nullptr) syntaxError(_Str("expected an expression, but got ") + TK_STR(parser->prev.type));
  679. (this->*prefix)();
  680. while (rules[peek()].precedence >= precedence) {
  681. lexToken();
  682. _TokenType op = parser->prev.type;
  683. GrammarFn infix = rules[op].infix;
  684. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  685. (this->*infix)();
  686. }
  687. }
  688. void compileIfStatement() {
  689. match_newlines();
  690. EXPR_TUPLE();
  691. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  692. compileBlockBody();
  693. if (match(TK("elif"))) {
  694. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  695. patch_jump(ifpatch);
  696. compileIfStatement();
  697. patch_jump(exit_jump);
  698. } else if (match(TK("else"))) {
  699. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  700. patch_jump(ifpatch);
  701. compileBlockBody();
  702. patch_jump(exit_jump);
  703. } else {
  704. patch_jump(ifpatch);
  705. }
  706. }
  707. void compileWhileLoop() {
  708. co()->__enter_block(WHILE_LOOP);
  709. EXPR_TUPLE();
  710. int patch = emit(OP_POP_JUMP_IF_FALSE);
  711. compileBlockBody();
  712. emit(OP_LOOP_CONTINUE, -1, true);
  713. patch_jump(patch);
  714. co()->__exit_block();
  715. }
  716. void EXPR_FOR_VARS(){
  717. int size = 0;
  718. do {
  719. consume(TK("@id"));
  720. exprName(); size++;
  721. } while (match(TK(",")));
  722. if(size > 1) emit(OP_BUILD_SMART_TUPLE, size);
  723. }
  724. void compileForLoop() {
  725. EXPR_FOR_VARS();consume(TK("in")); EXPR_TUPLE();
  726. emit(OP_GET_ITER);
  727. co()->__enter_block(FOR_LOOP);
  728. emit(OP_FOR_ITER);
  729. compileBlockBody();
  730. emit(OP_LOOP_CONTINUE, -1, true);
  731. co()->__exit_block();
  732. }
  733. void compileTryExcept() {
  734. co()->__enter_block(TRY_EXCEPT);
  735. emit(OP_TRY_BLOCK_ENTER);
  736. compileBlockBody();
  737. emit(OP_TRY_BLOCK_EXIT);
  738. std::vector<int> patches = { emit(OP_JUMP_ABSOLUTE) };
  739. co()->__exit_block();
  740. do {
  741. consume(TK("except"));
  742. if(match(TK("@id"))){
  743. int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL);
  744. emit(OP_EXCEPTION_MATCH, name_idx);
  745. }else{
  746. emit(OP_LOAD_TRUE);
  747. }
  748. int patch = emit(OP_POP_JUMP_IF_FALSE);
  749. emit(OP_POP_TOP); // pop the exception on match
  750. compileBlockBody();
  751. patches.push_back(emit(OP_JUMP_ABSOLUTE));
  752. patch_jump(patch);
  753. }while(peek() == TK("except"));
  754. emit(OP_RE_RAISE); // no match, re-raise
  755. for (int patch : patches) patch_jump(patch);
  756. }
  757. void compileStatement() {
  758. if (match(TK("break"))) {
  759. if (!co()->__is_curr_block_loop()) syntaxError("'break' outside loop");
  760. consumeEndStatement();
  761. emit(OP_LOOP_BREAK);
  762. } else if (match(TK("continue"))) {
  763. if (!co()->__is_curr_block_loop()) syntaxError("'continue' not properly in loop");
  764. consumeEndStatement();
  765. emit(OP_LOOP_CONTINUE);
  766. } else if (match(TK("return"))) {
  767. if (codes.size() == 1)
  768. syntaxError("'return' outside function");
  769. if(matchEndStatement()){
  770. emit(OP_LOAD_NONE);
  771. }else{
  772. EXPR_TUPLE();
  773. consumeEndStatement();
  774. }
  775. emit(OP_RETURN_VALUE, -1, true);
  776. } else if (match(TK("if"))) {
  777. compileIfStatement();
  778. } else if (match(TK("while"))) {
  779. compileWhileLoop();
  780. } else if (match(TK("for"))) {
  781. compileForLoop();
  782. } else if (match(TK("try"))) {
  783. compileTryExcept();
  784. }else if(match(TK("assert"))){
  785. EXPR();
  786. emit(OP_ASSERT);
  787. consumeEndStatement();
  788. } else if(match(TK("with"))){
  789. EXPR();
  790. consume(TK("as"));
  791. consume(TK("@id"));
  792. Token tkname = parser->prev;
  793. int index = co()->add_name(
  794. tkname.str(),
  795. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  796. );
  797. emit(OP_STORE_NAME, index);
  798. emit(OP_LOAD_NAME_REF, index);
  799. emit(OP_WITH_ENTER);
  800. compileBlockBody();
  801. emit(OP_LOAD_NAME_REF, index);
  802. emit(OP_WITH_EXIT);
  803. } else if(match(TK("label"))){
  804. if(mode() != EXEC_MODE) syntaxError("'label' is only available in EXEC_MODE");
  805. consume(TK(".")); consume(TK("@id"));
  806. _Str label = parser->prev.str();
  807. bool ok = co()->add_label(label);
  808. if(!ok) syntaxError("label '" + label + "' already exists");
  809. consumeEndStatement();
  810. } else if(match(TK("goto"))){ // https://entrian.com/goto/
  811. if(mode() != EXEC_MODE) syntaxError("'goto' is only available in EXEC_MODE");
  812. consume(TK(".")); consume(TK("@id"));
  813. emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL));
  814. consumeEndStatement();
  815. } else if(match(TK("raise"))){
  816. consume(TK("@id"));
  817. int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL);
  818. if(match(TK("("))){
  819. EXPR(); consume(TK(")"));
  820. }else{
  821. emit(OP_LOAD_NONE);
  822. }
  823. emit(OP_RAISE, dummy_t);
  824. consumeEndStatement();
  825. } else if(match(TK("del"))){
  826. EXPR();
  827. emit(OP_DELETE_REF);
  828. consumeEndStatement();
  829. } else if(match(TK("global"))){
  830. do {
  831. consume(TK("@id"));
  832. co()->global_names[parser->prev.str()] = 1;
  833. } while (match(TK(",")));
  834. consumeEndStatement();
  835. } else if(match(TK("pass"))){
  836. consumeEndStatement();
  837. } else {
  838. EXPR_ANY();
  839. consumeEndStatement();
  840. // If last op is not an assignment, pop the result.
  841. uint8_t lastOp = co()->co_code.back().op;
  842. if( lastOp!=OP_STORE_NAME && lastOp!=OP_STORE_REF){
  843. if(mode()==SINGLE_MODE && parser->indents.top()==0) emit(OP_PRINT_EXPR, -1, true);
  844. emit(OP_POP_TOP, -1, true);
  845. }
  846. }
  847. }
  848. void compileClass(){
  849. consume(TK("@id"));
  850. int clsNameIdx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  851. int superClsNameIdx = -1;
  852. if(match(TK("(")) && match(TK("@id"))){
  853. superClsNameIdx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  854. consume(TK(")"));
  855. }
  856. emit(OP_LOAD_NONE);
  857. isCompilingClass = true;
  858. __compileBlockBody(&Compiler::compileFunction);
  859. isCompilingClass = false;
  860. if(superClsNameIdx == -1) emit(OP_LOAD_NONE);
  861. else emit(OP_LOAD_NAME_REF, superClsNameIdx);
  862. emit(OP_BUILD_CLASS, clsNameIdx);
  863. }
  864. void __compileFunctionArgs(_Func func, bool enableTypeHints){
  865. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  866. do {
  867. if(state == 3) syntaxError("**kwargs should be the last argument");
  868. match_newlines();
  869. if(match(TK("*"))){
  870. if(state < 1) state = 1;
  871. else syntaxError("*args should be placed before **kwargs");
  872. }
  873. else if(match(TK("**"))){
  874. state = 3;
  875. }
  876. consume(TK("@id"));
  877. const _Str& name = parser->prev.str();
  878. if(func->hasName(name)) syntaxError("duplicate argument name");
  879. // eat type hints
  880. if(enableTypeHints && match(TK(":"))) consume(TK("@id"));
  881. if(state == 0 && peek() == TK("=")) state = 2;
  882. switch (state)
  883. {
  884. case 0: func->args.push_back(name); break;
  885. case 1: func->starredArg = name; state+=1; break;
  886. case 2: {
  887. consume(TK("="));
  888. PyVarOrNull value = readLiteral();
  889. if(value == nullptr){
  890. syntaxError(_Str("expect a literal, not ") + TK_STR(parser->curr.type));
  891. }
  892. func->kwArgs[name] = value;
  893. func->kwArgsOrder.push_back(name);
  894. } break;
  895. case 3: syntaxError("**kwargs is not supported yet"); break;
  896. }
  897. } while (match(TK(",")));
  898. }
  899. void compileFunction(){
  900. if(isCompilingClass){
  901. if(match(TK("pass"))) return;
  902. consume(TK("def"));
  903. }
  904. _Func func = pkpy::make_shared<Function>();
  905. consume(TK("@id"));
  906. func->name = parser->prev.str();
  907. if (match(TK("(")) && !match(TK(")"))) {
  908. __compileFunctionArgs(func, true);
  909. consume(TK(")"));
  910. }
  911. // eat type hints
  912. if(match(TK("->"))) consume(TK("@id"));
  913. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  914. this->codes.push(func->code);
  915. compileBlockBody();
  916. func->code->optimize();
  917. this->codes.pop();
  918. emit(OP_LOAD_CONST, co()->add_const(vm->PyFunction(func)));
  919. if(!isCompilingClass) emit(OP_STORE_FUNCTION);
  920. }
  921. PyVarOrNull readLiteral(){
  922. if(match(TK("-"))){
  923. consume(TK("@num"));
  924. PyVar val = parser->prev.value;
  925. return vm->num_negated(val);
  926. }
  927. if(match(TK("@num"))) return parser->prev.value;
  928. if(match(TK("@str"))) return parser->prev.value;
  929. if(match(TK("True"))) return vm->PyBool(true);
  930. if(match(TK("False"))) return vm->PyBool(false);
  931. if(match(TK("None"))) return vm->None;
  932. if(match(TK("..."))) return vm->Ellipsis;
  933. return nullptr;
  934. }
  935. void compileTopLevelStatement() {
  936. if (match(TK("class"))) {
  937. compileClass();
  938. } else if (match(TK("def"))) {
  939. compileFunction();
  940. } else if (match(TK("import"))) {
  941. compileRegularImport();
  942. } else if (match(TK("from"))) {
  943. compileFromImport();
  944. } else {
  945. compileStatement();
  946. }
  947. }
  948. bool _used = false;
  949. _Code __fillCode(){
  950. // can only be called once
  951. if(_used) UNREACHABLE();
  952. _used = true;
  953. _Code code = pkpy::make_shared<CodeObject>(parser->src, _Str("<module>"));
  954. codes.push(code);
  955. // Lex initial tokens. current <-- next.
  956. lexToken();
  957. lexToken();
  958. match_newlines();
  959. if(mode()==EVAL_MODE) {
  960. EXPR_TUPLE();
  961. consume(TK("@eof"));
  962. code->optimize();
  963. return code;
  964. }else if(mode()==JSON_MODE){
  965. PyVarOrNull value = readLiteral();
  966. if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value));
  967. else if(match(TK("{"))) exprMap();
  968. else if(match(TK("["))) exprList();
  969. else syntaxError("expect a JSON object or array");
  970. consume(TK("@eof"));
  971. return code; // no need to optimize for JSON decoding
  972. }
  973. while (!match(TK("@eof"))) {
  974. compileTopLevelStatement();
  975. match_newlines();
  976. }
  977. code->optimize();
  978. return code;
  979. }
  980. /***** Error Reporter *****/
  981. _Str getLineSnapshot(){
  982. int lineno = parser->curr.line;
  983. const char* cursor = parser->curr.start;
  984. // if error occurs in lexing, lineno should be `parser->current_line`
  985. if(lexingCnt > 0){
  986. lineno = parser->current_line;
  987. cursor = parser->curr_char;
  988. }
  989. if(parser->peekchar() == '\n') lineno--;
  990. return parser->src->snapshot(lineno, cursor);
  991. }
  992. void __throw_e(_Str type, _Str msg){
  993. auto e = _Exception("SyntaxError", msg);
  994. e.st_push(getLineSnapshot());
  995. throw e;
  996. }
  997. void syntaxError(_Str msg){ __throw_e("SyntaxError", msg); }
  998. void indentationError(_Str msg){ __throw_e("IndentationError", msg); }
  999. };