compiler.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. struct Loop {
  14. bool forLoop;
  15. int start;
  16. std::vector<int> breaks;
  17. Loop(bool forLoop, int start) : forLoop(forLoop), start(start) {}
  18. };
  19. class Compiler {
  20. public:
  21. std::unique_ptr<Parser> parser;
  22. std::stack<_Code> codes;
  23. std::stack<Loop> loops;
  24. bool isCompilingClass = false;
  25. VM* vm;
  26. std::unordered_map<_TokenType, GrammarRule> rules;
  27. _Code getCode() {
  28. return codes.top();
  29. }
  30. CompileMode mode() {
  31. return parser->src->mode;
  32. }
  33. Loop& getLoop() {
  34. return loops.top();
  35. }
  36. Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
  37. this->vm = vm;
  38. this->parser = std::make_unique<Parser>(
  39. std::make_shared<SourceMetadata>(source, filename, mode)
  40. );
  41. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  42. #define METHOD(name) &Compiler::name
  43. #define NO_INFIX nullptr, PREC_NONE
  44. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  45. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  46. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  47. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  48. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  49. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  50. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  51. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  52. rules[TK("*")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  53. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  54. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  55. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  56. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  57. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  58. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  59. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  60. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  61. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  62. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  63. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  64. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  65. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  66. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  67. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  68. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  69. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  70. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  71. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  72. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  73. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  74. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  75. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  76. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  77. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  78. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  79. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  80. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  81. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  82. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  83. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  84. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  85. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  86. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  87. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  88. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  89. #undef METHOD
  90. #undef NO_INFIX
  91. #define EXPR() parsePrecedence(PREC_LOGICAL_OR) // no '=' and ',' just a simple expression
  92. #define EXPR_TUPLE() parsePrecedence(PREC_COMMA) // no '=', but ',' is allowed
  93. #define EXPR_ANY() parsePrecedence(PREC_ASSIGNMENT)
  94. }
  95. _Str eatStringUntil(char quote) {
  96. std::vector<char> buff;
  97. while (true) {
  98. char c = parser->eatCharIncludeNewLine();
  99. if (c == quote) break;
  100. if (c == '\0' || c == '\n') syntaxError("EOL while scanning string literal");
  101. if (c == '\\') {
  102. switch (parser->eatCharIncludeNewLine()) {
  103. case '"': buff.push_back('"'); break;
  104. case '\'': buff.push_back('\''); break;
  105. case '\\': buff.push_back('\\'); break;
  106. case 'n': buff.push_back('\n'); break;
  107. case 'r': buff.push_back('\r'); break;
  108. case 't': buff.push_back('\t'); break;
  109. case '\n': case '\r': break;
  110. default: syntaxError("invalid escape character");
  111. }
  112. } else {
  113. buff.push_back(c);
  114. }
  115. }
  116. return _Str(buff.data(), buff.size());
  117. }
  118. void eatString(char quote, bool fstr) {
  119. _Str s = eatStringUntil(quote);
  120. if(fstr){
  121. parser->setNextToken(TK("@fstr"), vm->PyStr(s));
  122. }else{
  123. parser->setNextToken(TK("@str"), vm->PyStr(s));
  124. }
  125. }
  126. void eatNumber() {
  127. static const std::regex pattern("^(0x)?[0-9a-f]+(\\.[0-9]+)?");
  128. std::smatch m;
  129. const char* i = parser->token_start;
  130. while(*i != '\n' && *i != '\0') i++;
  131. std::string s = std::string(parser->token_start, i);
  132. size_t* size = new size_t;
  133. try{
  134. if (std::regex_search(s, m, pattern)) {
  135. // here is m.length()-1, since the first char is eaten by lexToken()
  136. for(int j=0; j<m.length()-1; j++) parser->eatChar();
  137. int base = 10;
  138. if (m[1].matched) base = 16;
  139. if (m[2].matched) {
  140. if(base == 16) syntaxError("hex literal should not contain a dot");
  141. parser->setNextToken(TK("@num"), vm->PyFloat(std::stod(m[0], size)));
  142. } else {
  143. parser->setNextToken(TK("@num"), vm->PyInt(std::stoll(m[0], size, base)));
  144. }
  145. if (*size != m.length()) throw std::runtime_error("length mismatch");
  146. delete size;
  147. }
  148. }catch(std::exception& e){
  149. delete size;
  150. syntaxError("invalid number literal");
  151. }
  152. }
  153. // Lex the next token and set it as the next token.
  154. void lexToken() {
  155. parser->previous = parser->current;
  156. parser->current = parser->nextToken();
  157. //_Str _info = parser->current.info(); printf("%s\n", (const char*)_info);
  158. while (parser->peekChar() != '\0') {
  159. parser->token_start = parser->current_char;
  160. char c = parser->eatCharIncludeNewLine();
  161. switch (c) {
  162. case '\'': case '"': eatString(c, false); return;
  163. case '#': parser->skipLineComment(); break;
  164. case '{': parser->setNextToken(TK("{")); return;
  165. case '}': parser->setNextToken(TK("}")); return;
  166. case ',': parser->setNextToken(TK(",")); return;
  167. case ':': parser->setNextToken(TK(":")); return;
  168. case ';': parser->setNextToken(TK(";")); return;
  169. case '(': parser->setNextToken(TK("(")); return;
  170. case ')': parser->setNextToken(TK(")")); return;
  171. case '[': parser->setNextToken(TK("[")); return;
  172. case ']': parser->setNextToken(TK("]")); return;
  173. case '%': parser->setNextToken(TK("%")); return;
  174. case '.': parser->setNextToken(TK(".")); return;
  175. case '&': parser->setNextToken(TK("&")); return;
  176. case '|': parser->setNextToken(TK("|")); return;
  177. case '^': parser->setNextToken(TK("^")); return;
  178. case '=': parser->setNextTwoCharToken('=', TK("="), TK("==")); return;
  179. case '+': parser->setNextTwoCharToken('=', TK("+"), TK("+=")); return;
  180. case '>': {
  181. if(parser->matchChar('=')) parser->setNextToken(TK(">="));
  182. else if(parser->matchChar('>')) parser->setNextToken(TK(">>"));
  183. else parser->setNextToken(TK(">"));
  184. return;
  185. }
  186. case '<': {
  187. if(parser->matchChar('=')) parser->setNextToken(TK("<="));
  188. else if(parser->matchChar('<')) parser->setNextToken(TK("<<"));
  189. else parser->setNextToken(TK("<"));
  190. return;
  191. }
  192. case '-': {
  193. parser->setNextTwoCharToken('=', TK("-"), TK("-="));
  194. return;
  195. }
  196. case '!':
  197. if(parser->matchChar('=')) parser->setNextToken(TK("!="));
  198. else syntaxError("expected '=' after '!'");
  199. break;
  200. case '*':
  201. if (parser->matchChar('*')) {
  202. parser->setNextToken(TK("**")); // '**'
  203. } else {
  204. parser->setNextTwoCharToken('=', TK("*"), TK("*="));
  205. }
  206. return;
  207. case '/':
  208. if(parser->matchChar('/')) {
  209. parser->setNextTwoCharToken('=', TK("//"), TK("//="));
  210. } else {
  211. parser->setNextTwoCharToken('=', TK("/"), TK("/="));
  212. }
  213. return;
  214. case '\r': break; // just ignore '\r'
  215. case ' ': case '\t': parser->eatSpaces(); break;
  216. case '\n': {
  217. parser->setNextToken(TK("@eol")); while(parser->matchChar('\n'));
  218. if(!parser->eatIndentation()) indentationError("unindent does not match any outer indentation level");
  219. return;
  220. }
  221. default: {
  222. if (isdigit(c)) {
  223. eatNumber();
  224. } else if (isalpha(c) || c=='_') {
  225. if(c == 'f'){
  226. if(parser->matchChar('\'')) {eatString('\'', true); return;}
  227. if(parser->matchChar('"')) {eatString('"', true); return;}
  228. }
  229. parser->eatName();
  230. } else {
  231. syntaxError("unknown character: " + _Str(1, c));
  232. }
  233. return;
  234. }
  235. }
  236. }
  237. parser->token_start = parser->current_char;
  238. parser->setNextToken(TK("@eof"));
  239. }
  240. _TokenType peek() {
  241. return parser->current.type;
  242. }
  243. bool match(_TokenType expected) {
  244. if (peek() != expected) return false;
  245. lexToken();
  246. return true;
  247. }
  248. void consume(_TokenType expected) {
  249. lexToken();
  250. Token prev = parser->previous;
  251. if (prev.type != expected){
  252. _StrStream ss;
  253. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(prev.type) << "'";
  254. syntaxError(ss.str());
  255. }
  256. }
  257. bool matchNewLines(bool repl_throw=false) {
  258. bool consumed = false;
  259. if (peek() == TK("@eol")) {
  260. while (peek() == TK("@eol")) lexToken();
  261. consumed = true;
  262. }
  263. if (repl_throw && peek() == TK("@eof")){
  264. throw NeedMoreLines(isCompilingClass);
  265. }
  266. return consumed;
  267. }
  268. bool matchEndStatement() {
  269. if (match(TK(";"))) {
  270. matchNewLines();
  271. return true;
  272. }
  273. if (matchNewLines() || peek() == TK("@eof"))
  274. return true;
  275. if (peek() == TK("@dedent")) return true;
  276. return false;
  277. }
  278. void consumeEndStatement() {
  279. if (!matchEndStatement()) syntaxError("expected statement end");
  280. }
  281. void exprLiteral() {
  282. PyVar value = parser->previous.value;
  283. int index = getCode()->addConst(value);
  284. emitCode(OP_LOAD_CONST, index);
  285. }
  286. void exprFString() {
  287. static const std::regex pattern(R"(\{(.*?)\})");
  288. PyVar value = parser->previous.value;
  289. std::string s = vm->PyStr_AS_C(value).str();
  290. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  291. std::sregex_iterator end;
  292. int size = 0;
  293. int i = 0;
  294. for(auto it = begin; it != end; it++) {
  295. std::smatch m = *it;
  296. if (i < m.position()) {
  297. std::string literal = s.substr(i, m.position() - i);
  298. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  299. size++;
  300. }
  301. emitCode(OP_LOAD_EVAL_FN);
  302. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
  303. emitCode(OP_CALL, 1);
  304. size++;
  305. i = m.position() + m.length();
  306. }
  307. if (i < s.size()) {
  308. std::string literal = s.substr(i, s.size() - i);
  309. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  310. size++;
  311. }
  312. emitCode(OP_BUILD_STRING, size);
  313. }
  314. void exprLambda() {
  315. _Func func = std::make_shared<Function>();
  316. func->name = "<lambda>";
  317. if(!match(TK(":"))){
  318. __compileFunctionArgs(func);
  319. consume(TK(":"));
  320. }
  321. func->code = std::make_shared<CodeObject>(parser->src, func->name);
  322. this->codes.push(func->code);
  323. EXPR_TUPLE();
  324. emitCode(OP_RETURN_VALUE);
  325. this->codes.pop();
  326. emitCode(OP_LOAD_LAMBDA, getCode()->addConst(vm->PyFunction(func)));
  327. }
  328. void exprAssign() {
  329. _TokenType op = parser->previous.type;
  330. if(op == TK("=")) { // a = (expr)
  331. EXPR_TUPLE();
  332. emitCode(OP_STORE_PTR);
  333. }else{ // a += (expr) -> a = a + (expr)
  334. // TODO: optimization is needed for inplace operators
  335. emitCode(OP_DUP_TOP);
  336. EXPR();
  337. switch (op) {
  338. case TK("+="): emitCode(OP_BINARY_OP, 0); break;
  339. case TK("-="): emitCode(OP_BINARY_OP, 1); break;
  340. case TK("*="): emitCode(OP_BINARY_OP, 2); break;
  341. case TK("/="): emitCode(OP_BINARY_OP, 3); break;
  342. case TK("//="): emitCode(OP_BINARY_OP, 4); break;
  343. default: UNREACHABLE();
  344. }
  345. emitCode(OP_STORE_PTR);
  346. }
  347. }
  348. void exprComma() {
  349. int size = 1; // an expr is in the stack now
  350. do {
  351. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  352. size++;
  353. } while(match(TK(",")));
  354. emitCode(OP_BUILD_SMART_TUPLE, size);
  355. }
  356. void exprOr() {
  357. int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP);
  358. parsePrecedence(PREC_LOGICAL_OR);
  359. patchJump(patch);
  360. }
  361. void exprAnd() {
  362. int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP);
  363. parsePrecedence(PREC_LOGICAL_AND);
  364. patchJump(patch);
  365. }
  366. void exprBinaryOp() {
  367. _TokenType op = parser->previous.type;
  368. parsePrecedence((Precedence)(rules[op].precedence + 1));
  369. switch (op) {
  370. case TK("+"): emitCode(OP_BINARY_OP, 0); break;
  371. case TK("-"): emitCode(OP_BINARY_OP, 1); break;
  372. case TK("*"): emitCode(OP_BINARY_OP, 2); break;
  373. case TK("/"): emitCode(OP_BINARY_OP, 3); break;
  374. case TK("//"): emitCode(OP_BINARY_OP, 4); break;
  375. case TK("%"): emitCode(OP_BINARY_OP, 5); break;
  376. case TK("**"): emitCode(OP_BINARY_OP, 6); break;
  377. case TK("<"): emitCode(OP_COMPARE_OP, 0); break;
  378. case TK("<="): emitCode(OP_COMPARE_OP, 1); break;
  379. case TK("=="): emitCode(OP_COMPARE_OP, 2); break;
  380. case TK("!="): emitCode(OP_COMPARE_OP, 3); break;
  381. case TK(">"): emitCode(OP_COMPARE_OP, 4); break;
  382. case TK(">="): emitCode(OP_COMPARE_OP, 5); break;
  383. case TK("in"): emitCode(OP_CONTAINS_OP, 0); break;
  384. case TK("not in"): emitCode(OP_CONTAINS_OP, 1); break;
  385. case TK("is"): emitCode(OP_IS_OP, 0); break;
  386. case TK("is not"): emitCode(OP_IS_OP, 1); break;
  387. case TK("<<"): emitCode(OP_BITWISE_OP, 0); break;
  388. case TK(">>"): emitCode(OP_BITWISE_OP, 1); break;
  389. case TK("&"): emitCode(OP_BITWISE_OP, 2); break;
  390. case TK("|"): emitCode(OP_BITWISE_OP, 3); break;
  391. case TK("^"): emitCode(OP_BITWISE_OP, 4); break;
  392. default: UNREACHABLE();
  393. }
  394. }
  395. void exprUnaryOp() {
  396. _TokenType op = parser->previous.type;
  397. matchNewLines();
  398. parsePrecedence((Precedence)(PREC_UNARY + 1));
  399. switch (op) {
  400. case TK("-"): emitCode(OP_UNARY_NEGATIVE); break;
  401. case TK("not"): emitCode(OP_UNARY_NOT); break;
  402. default: UNREACHABLE();
  403. }
  404. }
  405. void exprGrouping() {
  406. matchNewLines();
  407. EXPR_TUPLE();
  408. matchNewLines();
  409. consume(TK(")"));
  410. }
  411. void exprList() {
  412. int _patch = emitCode(OP_NO_OP);
  413. int _body_start = getCode()->co_code.size();
  414. int ARGC = 0;
  415. do {
  416. matchNewLines();
  417. if (peek() == TK("]")) break;
  418. EXPR(); ARGC++;
  419. matchNewLines();
  420. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  421. } while (match(TK(",")));
  422. matchNewLines();
  423. consume(TK("]"));
  424. emitCode(OP_BUILD_LIST, ARGC);
  425. return;
  426. __LISTCOMP:
  427. int _body_end = getCode()->co_code.size();
  428. getCode()->co_code[_patch].op = OP_JUMP_ABSOLUTE;
  429. getCode()->co_code[_patch].arg = _body_end;
  430. emitCode(OP_BUILD_LIST, 0);
  431. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  432. matchNewLines();
  433. int _skipPatch = emitCode(OP_JUMP_ABSOLUTE);
  434. int _cond_start = getCode()->co_code.size();
  435. if(match(TK("if"))) EXPR_TUPLE();
  436. int _cond_end = getCode()->co_code.size();
  437. patchJump(_skipPatch);
  438. emitCode(OP_GET_ITER);
  439. Loop& loop = enterLoop(true);
  440. int patch = emitCode(OP_FOR_ITER);
  441. if(_cond_end != _cond_start) { // there is an if condition
  442. getCode()->__moveToEnd(_cond_start, _cond_end);
  443. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  444. getCode()->__moveToEnd(_body_start, _body_end);
  445. emitCode(OP_LIST_APPEND);
  446. patchJump(ifpatch);
  447. }else{
  448. getCode()->__moveToEnd(_body_start, _body_end);
  449. emitCode(OP_LIST_APPEND);
  450. }
  451. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  452. patchJump(patch);
  453. exitLoop();
  454. consume(TK("]"));
  455. }
  456. void exprMap() {
  457. int size = 0;
  458. do {
  459. matchNewLines();
  460. if (peek() == TK("}")) break;
  461. EXPR();consume(TK(":"));EXPR();
  462. size++;
  463. matchNewLines();
  464. } while (match(TK(",")));
  465. matchNewLines();
  466. consume(TK("}"));
  467. emitCode(OP_BUILD_MAP, size);
  468. }
  469. void exprCall() {
  470. int ARGC = 0;
  471. do {
  472. matchNewLines();
  473. if (peek() == TK(")")) break;
  474. EXPR();
  475. ARGC++;
  476. matchNewLines();
  477. } while (match(TK(",")));
  478. matchNewLines();
  479. consume(TK(")"));
  480. emitCode(OP_CALL, ARGC);
  481. }
  482. void exprName() {
  483. Token tkname = parser->previous;
  484. int index = getCode()->addName(
  485. tkname.str(),
  486. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  487. );
  488. emitCode(OP_LOAD_NAME_PTR, index);
  489. }
  490. void exprAttrib() {
  491. consume(TK("@id"));
  492. const _Str& name = parser->previous.str();
  493. int index = getCode()->addName(name, NAME_ATTR);
  494. emitCode(OP_BUILD_ATTR_PTR, index);
  495. }
  496. // [:], [:b]
  497. // [a], [a:], [a:b]
  498. void exprSubscript() {
  499. if(match(TK(":"))){
  500. emitCode(OP_LOAD_NONE);
  501. if(match(TK("]"))){
  502. emitCode(OP_LOAD_NONE);
  503. }else{
  504. EXPR();
  505. consume(TK("]"));
  506. }
  507. emitCode(OP_BUILD_SLICE);
  508. }else{
  509. EXPR();
  510. if(match(TK(":"))){
  511. if(match(TK("]"))){
  512. emitCode(OP_LOAD_NONE);
  513. }else{
  514. EXPR();
  515. consume(TK("]"));
  516. }
  517. emitCode(OP_BUILD_SLICE);
  518. }else{
  519. consume(TK("]"));
  520. }
  521. }
  522. emitCode(OP_BUILD_INDEX_PTR);
  523. }
  524. void exprValue() {
  525. _TokenType op = parser->previous.type;
  526. switch (op) {
  527. case TK("None"): emitCode(OP_LOAD_NONE); break;
  528. case TK("True"): emitCode(OP_LOAD_TRUE); break;
  529. case TK("False"): emitCode(OP_LOAD_FALSE); break;
  530. default: UNREACHABLE();
  531. }
  532. }
  533. void keepOpcodeLine(){
  534. int i = getCode()->co_code.size() - 1;
  535. getCode()->co_code[i].line = getCode()->co_code[i-1].line;
  536. }
  537. int emitCode(Opcode opcode, int arg=-1) {
  538. int line = parser->previous.line;
  539. getCode()->co_code.push_back(
  540. ByteCode{(uint8_t)opcode, arg, (uint16_t)line}
  541. );
  542. return getCode()->co_code.size() - 1;
  543. }
  544. inline void patchJump(int addr_index) {
  545. int target = getCode()->co_code.size();
  546. getCode()->co_code[addr_index].arg = target;
  547. }
  548. void compileBlockBody(){
  549. __compileBlockBody(&Compiler::compileStatement);
  550. }
  551. void __compileBlockBody(CompilerAction action) {
  552. consume(TK(":"));
  553. if(!matchNewLines(mode()==SINGLE_MODE)){
  554. syntaxError("expected a new line after ':'");
  555. }
  556. consume(TK("@indent"));
  557. while (peek() != TK("@dedent")) {
  558. matchNewLines();
  559. (this->*action)();
  560. matchNewLines();
  561. }
  562. consume(TK("@dedent"));
  563. }
  564. Token compileImportPath() {
  565. consume(TK("@id"));
  566. Token tkmodule = parser->previous;
  567. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  568. emitCode(OP_IMPORT_NAME, index);
  569. return tkmodule;
  570. }
  571. // import module1 [as alias1 [, module2 [as alias2 ...]]
  572. void compileRegularImport() {
  573. do {
  574. Token tkmodule = compileImportPath();
  575. if (match(TK("as"))) {
  576. consume(TK("@id"));
  577. tkmodule = parser->previous;
  578. }
  579. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  580. emitCode(OP_STORE_NAME_PTR, index);
  581. } while (match(TK(",")));
  582. consumeEndStatement();
  583. }
  584. void parsePrecedence(Precedence precedence) {
  585. lexToken();
  586. GrammarFn prefix = rules[parser->previous.type].prefix;
  587. if (prefix == nullptr) syntaxError(_Str("expected an expression, but got ") + TK_STR(parser->previous.type));
  588. (this->*prefix)();
  589. while (rules[peek()].precedence >= precedence) {
  590. lexToken();
  591. _TokenType op = parser->previous.type;
  592. GrammarFn infix = rules[op].infix;
  593. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  594. (this->*infix)();
  595. }
  596. }
  597. void compileIfStatement() {
  598. matchNewLines();
  599. EXPR_TUPLE();
  600. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  601. compileBlockBody();
  602. if (match(TK("elif"))) {
  603. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  604. patchJump(ifpatch);
  605. compileIfStatement();
  606. patchJump(exit_jump);
  607. } else if (match(TK("else"))) {
  608. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  609. patchJump(ifpatch);
  610. compileBlockBody();
  611. patchJump(exit_jump);
  612. } else {
  613. patchJump(ifpatch);
  614. }
  615. }
  616. Loop& enterLoop(bool forLoop){
  617. Loop lp(forLoop, (int)getCode()->co_code.size());
  618. loops.push(lp);
  619. return loops.top();
  620. }
  621. void exitLoop(){
  622. Loop& lp = loops.top();
  623. for(int addr : lp.breaks) patchJump(addr);
  624. loops.pop();
  625. }
  626. void compileWhileLoop() {
  627. Loop& loop = enterLoop(false);
  628. EXPR_TUPLE();
  629. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  630. compileBlockBody();
  631. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  632. patchJump(patch);
  633. exitLoop();
  634. }
  635. void EXPR_FOR_VARS(){
  636. int size = 0;
  637. do {
  638. consume(TK("@id"));
  639. exprName(); size++;
  640. } while (match(TK(",")));
  641. if(size > 1) emitCode(OP_BUILD_SMART_TUPLE, size);
  642. }
  643. void compileForLoop() {
  644. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  645. emitCode(OP_GET_ITER);
  646. Loop& loop = enterLoop(true);
  647. int patch = emitCode(OP_FOR_ITER);
  648. compileBlockBody();
  649. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  650. patchJump(patch);
  651. exitLoop();
  652. }
  653. void compileStatement() {
  654. if (match(TK("break"))) {
  655. if (loops.empty()) syntaxError("'break' outside loop");
  656. consumeEndStatement();
  657. if(getLoop().forLoop) emitCode(OP_POP_TOP); // pop the iterator of for loop.
  658. int patch = emitCode(OP_JUMP_ABSOLUTE);
  659. getLoop().breaks.push_back(patch);
  660. } else if (match(TK("continue"))) {
  661. if (loops.empty()) syntaxError("'continue' not properly in loop");
  662. consumeEndStatement();
  663. emitCode(OP_JUMP_ABSOLUTE, getLoop().start);
  664. } else if (match(TK("return"))) {
  665. if (codes.size() == 1)
  666. syntaxError("'return' outside function");
  667. if(matchEndStatement()){
  668. emitCode(OP_LOAD_NONE);
  669. }else{
  670. EXPR_TUPLE();
  671. consumeEndStatement();
  672. }
  673. emitCode(OP_RETURN_VALUE);
  674. } else if (match(TK("if"))) {
  675. compileIfStatement();
  676. } else if (match(TK("while"))) {
  677. compileWhileLoop();
  678. } else if (match(TK("for"))) {
  679. compileForLoop();
  680. } else if(match(TK("assert"))){
  681. EXPR();
  682. emitCode(OP_ASSERT);
  683. consumeEndStatement();
  684. } else if(match(TK("label"))){
  685. if(mode() != EXEC_MODE) syntaxError("'label' is only available in EXEC_MODE");
  686. consume(TK(".")); consume(TK("@id"));
  687. getCode()->addLabel(parser->previous.str());
  688. consumeEndStatement();
  689. } else if(match(TK("goto"))){
  690. // https://entrian.com/goto/
  691. if(mode() != EXEC_MODE) syntaxError("'goto' is only available in EXEC_MODE");
  692. consume(TK(".")); consume(TK("@id"));
  693. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  694. emitCode(OP_GOTO);
  695. consumeEndStatement();
  696. } else if(match(TK("raise"))){
  697. consume(TK("@id")); // dummy exception type
  698. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  699. consume(TK("("));EXPR();consume(TK(")"));
  700. emitCode(OP_RAISE_ERROR);
  701. consumeEndStatement();
  702. } else if(match(TK("del"))){
  703. EXPR();
  704. emitCode(OP_DELETE_PTR);
  705. consumeEndStatement();
  706. } else if(match(TK("global"))){
  707. consume(TK("@id"));
  708. int index = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  709. consumeEndStatement();
  710. } else if(match(TK("pass"))){
  711. consumeEndStatement();
  712. } else {
  713. EXPR_ANY();
  714. consumeEndStatement();
  715. // If last op is not an assignment, pop the result.
  716. uint8_t lastOp = getCode()->co_code.back().op;
  717. if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
  718. if(mode()==SINGLE_MODE && parser->indents.top() == 0){
  719. emitCode(OP_PRINT_EXPR);
  720. }
  721. emitCode(OP_POP_TOP);
  722. }
  723. }
  724. }
  725. void compileClass(){
  726. consume(TK("@id"));
  727. int clsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  728. int superClsNameIdx = -1;
  729. if(match(TK("("))){
  730. consume(TK("@id"));
  731. superClsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  732. consume(TK(")"));
  733. }
  734. emitCode(OP_LOAD_NONE);
  735. isCompilingClass = true;
  736. __compileBlockBody(&Compiler::compileFunction);
  737. isCompilingClass = false;
  738. if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE);
  739. else emitCode(OP_LOAD_NAME_PTR, superClsNameIdx);
  740. emitCode(OP_BUILD_CLASS, clsNameIdx);
  741. }
  742. void __compileFunctionArgs(_Func func){
  743. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  744. do {
  745. if(state == 3) syntaxError("**kwargs should be the last argument");
  746. matchNewLines();
  747. if(match(TK("*"))){
  748. if(state < 1) state = 1;
  749. else syntaxError("*args should be placed before **kwargs");
  750. }
  751. else if(match(TK("**"))){
  752. state = 3;
  753. }
  754. consume(TK("@id"));
  755. const _Str& name = parser->previous.str();
  756. if(func->hasName(name)) syntaxError("duplicate argument name");
  757. if(state == 0 && peek() == TK("=")) state = 2;
  758. switch (state)
  759. {
  760. case 0: func->args.push_back(name); break;
  761. case 1: func->starredArg = name; state+=1; break;
  762. case 2: consume(TK("=")); func->kwArgs[name] = consumeLiteral(); break;
  763. case 3: syntaxError("**kwargs is not supported yet"); break;
  764. }
  765. } while (match(TK(",")));
  766. }
  767. void compileFunction(){
  768. if(isCompilingClass){
  769. if(match(TK("pass"))) return;
  770. consume(TK("def"));
  771. }
  772. _Func func = std::make_shared<Function>();
  773. consume(TK("@id"));
  774. func->name = parser->previous.str();
  775. if (match(TK("(")) && !match(TK(")"))) {
  776. __compileFunctionArgs(func);
  777. consume(TK(")"));
  778. }
  779. func->code = std::make_shared<CodeObject>(parser->src, func->name);
  780. this->codes.push(func->code);
  781. compileBlockBody();
  782. this->codes.pop();
  783. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyFunction(func)));
  784. if(!isCompilingClass) emitCode(OP_STORE_FUNCTION);
  785. }
  786. PyVar consumeLiteral(){
  787. if(match(TK("-"))){
  788. consume(TK("@num"));
  789. PyVar val = parser->previous.value;
  790. return vm->numNegated(val);
  791. }
  792. if(match(TK("@num"))) return parser->previous.value;
  793. if(match(TK("@str"))) return parser->previous.value;
  794. if(match(TK("True"))) return vm->PyBool(true);
  795. if(match(TK("False"))) return vm->PyBool(false);
  796. if(match(TK("None"))) return vm->None;
  797. syntaxError(_Str("expect a literal, not ") + TK_STR(parser->current.type));
  798. return nullptr;
  799. }
  800. void compileTopLevelStatement() {
  801. if (match(TK("class"))) {
  802. compileClass();
  803. } else if (match(TK("def"))) {
  804. compileFunction();
  805. } else if (match(TK("import"))) {
  806. compileRegularImport();
  807. } else {
  808. compileStatement();
  809. }
  810. }
  811. _Code __fillCode(){
  812. _Code code = std::make_shared<CodeObject>(parser->src, _Str("<module>"), mode());
  813. codes.push(code);
  814. // Lex initial tokens. current <-- next.
  815. lexToken();
  816. lexToken();
  817. matchNewLines();
  818. if(mode()==EVAL_MODE) {
  819. EXPR_TUPLE();
  820. consume(TK("@eof"));
  821. return code;
  822. }
  823. while (!match(TK("@eof"))) {
  824. compileTopLevelStatement();
  825. matchNewLines();
  826. }
  827. return code;
  828. }
  829. /***** Error Reporter *****/
  830. _Str getLineSnapshot(){
  831. int lineno = parser->current_line;
  832. if(parser->peekChar() == '\n') lineno--;
  833. return parser->src->snapshot(lineno);
  834. }
  835. void syntaxError(_Str msg){
  836. throw CompileError("SyntaxError", msg, getLineSnapshot());
  837. }
  838. void indentationError(_Str msg){
  839. throw CompileError("IndentationError", msg, getLineSnapshot());
  840. }
  841. void unexpectedError(_Str msg){
  842. throw CompileError("UnexpectedError", msg, getLineSnapshot());
  843. }
  844. };
  845. _Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE) {
  846. Compiler compiler(vm, source, filename, mode);
  847. try{
  848. return compiler.__fillCode();
  849. }catch(std::exception& e){
  850. if(const _Error* _ = dynamic_cast<const _Error*>(&e)){
  851. vm->_stderr(vm, e.what());
  852. }else{
  853. auto ce = CompileError("UnexpectedError", e.what(), compiler.getLineSnapshot());
  854. vm->_stderr(vm, ce.what());
  855. }
  856. vm->_stderr(vm, "\n");
  857. return nullptr;
  858. }
  859. }