compiler.h 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895
  1. #pragma once
  2. #include <vector>
  3. #include <string>
  4. #include <cstring>
  5. #include "parser.h"
  6. #include "error.h"
  7. #include "vm.h"
  8. class Compiler;
  9. typedef void (Compiler::*GrammarFn)();
  10. typedef void (Compiler::*CompilerAction)();
  11. struct GrammarRule{
  12. GrammarFn prefix;
  13. GrammarFn infix;
  14. Precedence precedence;
  15. };
  16. struct Loop {
  17. bool forLoop;
  18. int start;
  19. std::vector<int> breaks;
  20. Loop(bool forLoop, int start) : forLoop(forLoop), start(start) {}
  21. };
  22. #define ExprCommaSplitArgs(end) \
  23. int ARGC = 0; \
  24. do { \
  25. matchNewLines(); \
  26. if (peek() == TK(end)) break; \
  27. EXPR(); \
  28. ARGC++; \
  29. matchNewLines(); \
  30. } while (match(TK(","))); \
  31. matchNewLines(); \
  32. consume(TK(end));
  33. class Compiler {
  34. public:
  35. std::unique_ptr<Parser> parser;
  36. std::stack<_Code> codes;
  37. std::stack<Loop> loops;
  38. CompileMode mode;
  39. bool isCompilingClass = false;
  40. _Str path = "<?>";
  41. VM* vm;
  42. std::unordered_map<_TokenType, GrammarRule> rules;
  43. _Code getCode() {
  44. return codes.top();
  45. }
  46. Loop& getLoop() {
  47. return loops.top();
  48. }
  49. Compiler(VM* vm, const char* source, _Code code){
  50. this->vm = vm;
  51. this->codes.push(code);
  52. this->mode = code->mode;
  53. if (!code->co_filename.empty()) path = code->co_filename;
  54. this->parser = std::make_unique<Parser>(source);
  55. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  56. #define METHOD(name) &Compiler::name
  57. #define NO_INFIX nullptr, PREC_NONE
  58. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  59. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  60. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  61. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  62. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  63. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  64. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  65. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  66. rules[TK("*")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  67. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  68. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  69. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  70. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  71. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  72. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  73. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  74. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  75. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  76. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  77. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  78. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  79. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  80. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  81. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  82. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  83. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  84. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  85. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  86. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  87. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  88. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  89. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  90. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  91. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  92. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  93. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  94. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  95. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  96. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  97. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  98. #undef METHOD
  99. #undef NO_INFIX
  100. #define EXPR() parsePrecedence(PREC_LOGICAL_OR) // no '=' and ',' just a simple expression
  101. #define EXPR_TUPLE() parsePrecedence(PREC_COMMA) // no '=', but ',' is allowed
  102. #define EXPR_ANY() parsePrecedence(PREC_ASSIGNMENT)
  103. }
  104. _Str eatStringUntil(char quote) {
  105. std::vector<char> buff;
  106. while (true) {
  107. char c = parser->eatChar();
  108. if (c == quote) break;
  109. if (c == '\0')
  110. throw SyntaxError(path, parser->makeErrToken(), "EOL while scanning string literal");
  111. if (c == '\\') {
  112. switch (parser->eatCharIncludeNewLine()) {
  113. case '"': buff.push_back('"'); break;
  114. case '\'': buff.push_back('\''); break;
  115. case '\\': buff.push_back('\\'); break;
  116. case 'n': buff.push_back('\n'); break;
  117. case 'r': buff.push_back('\r'); break;
  118. case 't': buff.push_back('\t'); break;
  119. case '\n': case '\r': break;
  120. default: throw SyntaxError(path, parser->makeErrToken(), "invalid escape character");
  121. }
  122. } else {
  123. buff.push_back(c);
  124. }
  125. }
  126. return _Str(buff.data(), buff.size());
  127. }
  128. void eatString(char quote, bool fstr) {
  129. _Str s = eatStringUntil(quote);
  130. if(fstr){
  131. parser->setNextToken(TK("@fstr"), vm->PyStr(s));
  132. }else{
  133. parser->setNextToken(TK("@str"), vm->PyStr(s));
  134. }
  135. }
  136. void eatNumber() {
  137. static const std::regex pattern("^[+-]?([0-9]+)(\\.[0-9]+)?");
  138. std::smatch m;
  139. const char* i = parser->token_start;
  140. while(*i != '\n' && *i != '\0') i++;
  141. std::string s = std::string(parser->token_start, i);
  142. try{
  143. if (std::regex_search(s, m, pattern)) {
  144. // here is m.length()-1, since the first char is eaten by lexToken()
  145. for(int j=0; j<m.length()-1; j++) parser->eatChar();
  146. if (m[2].matched) {
  147. parser->setNextToken(TK("@num"), vm->PyFloat(std::stof(m[0])));
  148. } else {
  149. parser->setNextToken(TK("@num"), vm->PyInt(std::stoi(m[0])));
  150. }
  151. }
  152. }catch(std::exception& e){
  153. throw SyntaxError(path, parser->makeErrToken(), "invalid number (%s)", e.what());
  154. }
  155. }
  156. // Lex the next token and set it as the next token.
  157. void lexToken() {
  158. parser->previous = parser->current;
  159. parser->current = parser->nextToken();
  160. //printf("<%s> ", TK_STR(peek()));
  161. while (parser->peekChar() != '\0') {
  162. parser->token_start = parser->current_char;
  163. char c = parser->eatCharIncludeNewLine();
  164. switch (c) {
  165. case '\'': case '"': eatString(c, false); return;
  166. case '#': parser->skipLineComment(); break;
  167. case '{': parser->setNextToken(TK("{")); return;
  168. case '}': parser->setNextToken(TK("}")); return;
  169. case ',': parser->setNextToken(TK(",")); return;
  170. case ':': parser->setNextToken(TK(":")); return;
  171. case ';': parser->setNextToken(TK(";")); return;
  172. case '(': parser->setNextToken(TK("(")); return;
  173. case ')': parser->setNextToken(TK(")")); return;
  174. case '[': parser->setNextToken(TK("[")); return;
  175. case ']': parser->setNextToken(TK("]")); return;
  176. case '%': parser->setNextToken(TK("%")); return;
  177. case '.': parser->setNextToken(TK(".")); return;
  178. case '=': parser->setNextTwoCharToken('=', TK("="), TK("==")); return;
  179. case '>': parser->setNextTwoCharToken('=', TK(">"), TK(">=")); return;
  180. case '<': parser->setNextTwoCharToken('=', TK("<"), TK("<=")); return;
  181. case '+': parser->setNextTwoCharToken('=', TK("+"), TK("+=")); return;
  182. case '-': {
  183. if(isdigit(parser->peekChar())) eatNumber();
  184. else parser->setNextTwoCharToken('=', TK("-"), TK("-="));
  185. return;
  186. }
  187. case '!':
  188. if(parser->matchChar('=')) parser->setNextToken(TK("!="));
  189. else SyntaxError(path, parser->makeErrToken(), "expected '=' after '!'");
  190. break;
  191. case '*':
  192. if (parser->matchChar('*')) {
  193. parser->setNextToken(TK("**")); // '**'
  194. } else {
  195. parser->setNextTwoCharToken('=', TK("*"), TK("*="));
  196. }
  197. return;
  198. case '/':
  199. if(parser->matchChar('/')) {
  200. parser->setNextTwoCharToken('=', TK("//"), TK("//="));
  201. } else {
  202. parser->setNextTwoCharToken('=', TK("/"), TK("/="));
  203. }
  204. return;
  205. case '\r': break; // just ignore '\r'
  206. case ' ': case '\t': parser->eatSpaces(); break;
  207. case '\n': {
  208. parser->setNextToken(TK("@eol"));
  209. while(parser->matchChar('\n'));
  210. if(!parser->eatIndentation())
  211. throw SyntaxError(path, parser->makeErrToken(), "unindent does not match any outer indentation level");
  212. return;
  213. }
  214. default: {
  215. if (isdigit(c)) {
  216. eatNumber();
  217. } else if (isalpha(c) || c=='_') {
  218. if(c == 'f'){
  219. if(parser->matchChar('\'')) {eatString('\'', true); return;}
  220. if(parser->matchChar('"')) {eatString('"', true); return;}
  221. }
  222. parser->eatName();
  223. } else {
  224. throw SyntaxError(path, parser->makeErrToken(), "unknown character: %c", c);
  225. }
  226. return;
  227. }
  228. }
  229. }
  230. parser->token_start = parser->current_char;
  231. parser->setNextToken(TK("@eof"));
  232. }
  233. _TokenType peek() {
  234. return parser->current.type;
  235. }
  236. bool match(_TokenType expected) {
  237. if (peek() != expected) return false;
  238. lexToken();
  239. return true;
  240. }
  241. void consume(_TokenType expected) {
  242. lexToken();
  243. Token prev = parser->previous;
  244. if (prev.type != expected){
  245. throw SyntaxError(path, prev, "expected '%s', but got '%s'", TK_STR(expected), TK_STR(prev.type));
  246. }
  247. }
  248. bool matchNewLines(bool repl_throw=false) {
  249. bool consumed = false;
  250. if (peek() == TK("@eol")) {
  251. while (peek() == TK("@eol")) lexToken();
  252. consumed = true;
  253. }
  254. if (repl_throw && peek() == TK("@eof")){
  255. throw NeedMoreLines(isCompilingClass);
  256. }
  257. return consumed;
  258. }
  259. bool matchEndStatement() {
  260. if (match(TK(";"))) {
  261. matchNewLines();
  262. return true;
  263. }
  264. if (matchNewLines() || peek() == TK("@eof"))
  265. return true;
  266. if (peek() == TK("@dedent")) return true;
  267. return false;
  268. }
  269. void consumeEndStatement() {
  270. if (!matchEndStatement())
  271. throw SyntaxError(path, parser->current, "expected statement end");
  272. }
  273. void exprLiteral() {
  274. PyVar value = parser->previous.value;
  275. int index = getCode()->addConst(value);
  276. emitCode(OP_LOAD_CONST, index);
  277. }
  278. void exprFString() {
  279. static const std::regex pattern(R"(\{(.*?)\})");
  280. PyVar value = parser->previous.value;
  281. std::string s = vm->PyStr_AS_C(value).str();
  282. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  283. std::sregex_iterator end;
  284. int size = 0;
  285. int i = 0;
  286. for(auto it = begin; it != end; it++) {
  287. std::smatch m = *it;
  288. if (i < m.position()) {
  289. std::string literal = s.substr(i, m.position() - i);
  290. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  291. size++;
  292. }
  293. emitCode(OP_LOAD_EVAL_FN);
  294. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
  295. emitCode(OP_CALL, 1);
  296. size++;
  297. i = m.position() + m.length();
  298. }
  299. if (i < s.size()) {
  300. std::string literal = s.substr(i, s.size() - i);
  301. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  302. size++;
  303. }
  304. emitCode(OP_BUILD_STRING, size);
  305. }
  306. void exprLambda() {
  307. _Func func;
  308. func.name = "<lambda>";
  309. __compileFunctionArgs(func);
  310. consume(TK(":"));
  311. func.code = std::make_shared<CodeObject>();
  312. func.code->co_name = func.name;
  313. func.code->co_filename = path;
  314. this->codes.push(func.code);
  315. EXPR_TUPLE();
  316. emitCode(OP_RETURN_VALUE);
  317. this->codes.pop();
  318. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyFunction(func)));
  319. }
  320. void exprAssign() {
  321. _TokenType op = parser->previous.type;
  322. if(op == TK("=")) { // a = (expr)
  323. EXPR_TUPLE();
  324. emitCode(OP_STORE_PTR);
  325. }else{ // a += (expr) -> a = a + (expr)
  326. // TODO: optimization is needed for inplace operators
  327. emitCode(OP_DUP_TOP);
  328. EXPR();
  329. switch (op) {
  330. case TK("+="): emitCode(OP_BINARY_OP, 0); break;
  331. case TK("-="): emitCode(OP_BINARY_OP, 1); break;
  332. case TK("*="): emitCode(OP_BINARY_OP, 2); break;
  333. case TK("/="): emitCode(OP_BINARY_OP, 3); break;
  334. case TK("//="): emitCode(OP_BINARY_OP, 4); break;
  335. default: UNREACHABLE();
  336. }
  337. emitCode(OP_STORE_PTR);
  338. }
  339. }
  340. void exprComma() {
  341. int size = 1; // an expr is in the stack now
  342. do {
  343. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  344. size++;
  345. } while(match(TK(",")));
  346. emitCode(OP_BUILD_SMART_TUPLE, size);
  347. }
  348. void exprOr() {
  349. int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP);
  350. parsePrecedence(PREC_LOGICAL_OR);
  351. patchJump(patch);
  352. }
  353. void exprAnd() {
  354. int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP);
  355. parsePrecedence(PREC_LOGICAL_AND);
  356. patchJump(patch);
  357. }
  358. void exprBinaryOp() {
  359. _TokenType op = parser->previous.type;
  360. parsePrecedence((Precedence)(rules[op].precedence + 1));
  361. switch (op) {
  362. case TK("+"): emitCode(OP_BINARY_OP, 0); break;
  363. case TK("-"): emitCode(OP_BINARY_OP, 1); break;
  364. case TK("*"): emitCode(OP_BINARY_OP, 2); break;
  365. case TK("/"): emitCode(OP_BINARY_OP, 3); break;
  366. case TK("//"): emitCode(OP_BINARY_OP, 4); break;
  367. case TK("%"): emitCode(OP_BINARY_OP, 5); break;
  368. case TK("**"): emitCode(OP_BINARY_OP, 6); break;
  369. case TK("<"): emitCode(OP_COMPARE_OP, 0); break;
  370. case TK("<="): emitCode(OP_COMPARE_OP, 1); break;
  371. case TK("=="): emitCode(OP_COMPARE_OP, 2); break;
  372. case TK("!="): emitCode(OP_COMPARE_OP, 3); break;
  373. case TK(">"): emitCode(OP_COMPARE_OP, 4); break;
  374. case TK(">="): emitCode(OP_COMPARE_OP, 5); break;
  375. case TK("in"): emitCode(OP_CONTAINS_OP, 0); break;
  376. case TK("not in"): emitCode(OP_CONTAINS_OP, 1); break;
  377. case TK("is"): emitCode(OP_IS_OP, 0); break;
  378. case TK("is not"): emitCode(OP_IS_OP, 1); break;
  379. default: UNREACHABLE();
  380. }
  381. }
  382. void exprUnaryOp() {
  383. _TokenType op = parser->previous.type;
  384. matchNewLines();
  385. parsePrecedence((Precedence)(PREC_UNARY + 1));
  386. switch (op) {
  387. case TK("-"): emitCode(OP_UNARY_NEGATIVE); break;
  388. case TK("not"): emitCode(OP_UNARY_NOT); break;
  389. default: UNREACHABLE();
  390. }
  391. }
  392. void exprGrouping() {
  393. matchNewLines();
  394. EXPR_TUPLE();
  395. matchNewLines();
  396. consume(TK(")"));
  397. }
  398. void exprList() {
  399. int _patch = emitCode(OP_NO_OP);
  400. int _body_start = getCode()->co_code.size();
  401. int ARGC = 0;
  402. do {
  403. matchNewLines();
  404. if (peek() == TK("]")) break;
  405. EXPR(); ARGC++;
  406. matchNewLines();
  407. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  408. } while (match(TK(",")));
  409. matchNewLines();
  410. consume(TK("]"));
  411. emitCode(OP_BUILD_LIST, ARGC);
  412. return;
  413. __LISTCOMP:
  414. int _body_end = getCode()->co_code.size();
  415. getCode()->co_code[_patch].op = OP_JUMP_ABSOLUTE;
  416. getCode()->co_code[_patch].arg = _body_end;
  417. emitCode(OP_BUILD_LIST, 0);
  418. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  419. matchNewLines();
  420. int _skipPatch = emitCode(OP_JUMP_ABSOLUTE);
  421. int _cond_start = getCode()->co_code.size();
  422. if(match(TK("if"))) EXPR_TUPLE();
  423. int _cond_end = getCode()->co_code.size();
  424. patchJump(_skipPatch);
  425. emitCode(OP_GET_ITER);
  426. Loop& loop = enterLoop(true);
  427. int patch = emitCode(OP_FOR_ITER);
  428. if(_cond_end != _cond_start) { // there is an if condition
  429. getCode()->__moveToEnd(_cond_start, _cond_end);
  430. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  431. getCode()->__moveToEnd(_body_start, _body_end);
  432. emitCode(OP_LIST_APPEND);
  433. patchJump(ifpatch);
  434. }else{
  435. getCode()->__moveToEnd(_body_start, _body_end);
  436. emitCode(OP_LIST_APPEND);
  437. }
  438. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  439. patchJump(patch);
  440. exitLoop();
  441. consume(TK("]"));
  442. }
  443. void exprMap() {
  444. int size = 0;
  445. do {
  446. matchNewLines();
  447. if (peek() == TK("}")) break;
  448. EXPR();consume(TK(":"));EXPR();
  449. size++;
  450. matchNewLines();
  451. } while (match(TK(",")));
  452. matchNewLines();
  453. consume(TK("}"));
  454. emitCode(OP_BUILD_MAP, size);
  455. }
  456. void exprCall() {
  457. ExprCommaSplitArgs(")");
  458. emitCode(OP_CALL, ARGC);
  459. }
  460. void exprName() {
  461. Token tkname = parser->previous;
  462. int index = getCode()->addName(
  463. tkname.str(),
  464. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  465. );
  466. emitCode(OP_LOAD_NAME_PTR, index);
  467. }
  468. void exprAttrib() {
  469. consume(TK("@id"));
  470. const _Str& name = parser->previous.str();
  471. int index = getCode()->addName(name, NAME_ATTR);
  472. emitCode(OP_BUILD_ATTR_PTR, index);
  473. }
  474. // [:], [:b]
  475. // [a], [a:], [a:b]
  476. void exprSubscript() {
  477. if(match(TK(":"))){
  478. emitCode(OP_LOAD_NONE);
  479. if(match(TK("]"))){
  480. emitCode(OP_LOAD_NONE);
  481. }else{
  482. EXPR();
  483. consume(TK("]"));
  484. }
  485. emitCode(OP_BUILD_SLICE);
  486. }else{
  487. EXPR();
  488. if(match(TK(":"))){
  489. if(match(TK("]"))){
  490. emitCode(OP_LOAD_NONE);
  491. }else{
  492. EXPR();
  493. consume(TK("]"));
  494. }
  495. emitCode(OP_BUILD_SLICE);
  496. }else{
  497. consume(TK("]"));
  498. }
  499. }
  500. emitCode(OP_BUILD_INDEX_PTR);
  501. }
  502. void exprValue() {
  503. _TokenType op = parser->previous.type;
  504. switch (op) {
  505. case TK("None"): emitCode(OP_LOAD_NONE); break;
  506. case TK("True"): emitCode(OP_LOAD_TRUE); break;
  507. case TK("False"): emitCode(OP_LOAD_FALSE); break;
  508. default: UNREACHABLE();
  509. }
  510. }
  511. void keepOpcodeLine(){
  512. int i = getCode()->co_code.size() - 1;
  513. getCode()->co_code[i].line = getCode()->co_code[i-1].line;
  514. }
  515. int emitCode(Opcode opcode, int arg=-1) {
  516. int line = parser->previous.line;
  517. getCode()->co_code.push_back(
  518. ByteCode{(uint8_t)opcode, arg, (uint16_t)line}
  519. );
  520. return getCode()->co_code.size() - 1;
  521. }
  522. inline void patchJump(int addr_index) {
  523. int target = getCode()->co_code.size();
  524. getCode()->co_code[addr_index].arg = target;
  525. }
  526. void compileBlockBody(){
  527. __compileBlockBody(&Compiler::compileStatement);
  528. }
  529. void __compileBlockBody(CompilerAction action) {
  530. consume(TK(":"));
  531. if(!matchNewLines(mode==SINGLE_MODE)){
  532. throw SyntaxError(path, parser->previous, "expected a new line after ':'");
  533. }
  534. consume(TK("@indent"));
  535. while (peek() != TK("@dedent")) {
  536. (this->*action)();
  537. matchNewLines();
  538. }
  539. consume(TK("@dedent"));
  540. }
  541. Token compileImportPath() {
  542. consume(TK("@id"));
  543. Token tkmodule = parser->previous;
  544. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  545. emitCode(OP_IMPORT_NAME, index);
  546. return tkmodule;
  547. }
  548. // import module1 [as alias1 [, module2 [as alias2 ...]]
  549. void compileRegularImport() {
  550. do {
  551. Token tkmodule = compileImportPath();
  552. if (match(TK("as"))) {
  553. consume(TK("@id"));
  554. tkmodule = parser->previous;
  555. }
  556. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  557. emitCode(OP_STORE_NAME_PTR, index);
  558. } while (match(TK(",")));
  559. consumeEndStatement();
  560. }
  561. void parsePrecedence(Precedence precedence) {
  562. lexToken();
  563. GrammarFn prefix = rules[parser->previous.type].prefix;
  564. if (prefix == nullptr) {
  565. throw SyntaxError(path, parser->previous, "expected an expression");
  566. }
  567. (this->*prefix)();
  568. while (rules[peek()].precedence >= precedence) {
  569. lexToken();
  570. _TokenType op = parser->previous.type;
  571. GrammarFn infix = rules[op].infix;
  572. (this->*infix)();
  573. }
  574. }
  575. void compileIfStatement() {
  576. matchNewLines();
  577. EXPR_TUPLE();
  578. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  579. compileBlockBody();
  580. if (match(TK("elif"))) {
  581. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  582. patchJump(ifpatch);
  583. compileIfStatement();
  584. patchJump(exit_jump);
  585. } else if (match(TK("else"))) {
  586. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  587. patchJump(ifpatch);
  588. compileBlockBody();
  589. patchJump(exit_jump);
  590. } else {
  591. patchJump(ifpatch);
  592. }
  593. }
  594. Loop& enterLoop(bool forLoop){
  595. Loop lp(forLoop, (int)getCode()->co_code.size());
  596. loops.push(lp);
  597. return loops.top();
  598. }
  599. void exitLoop(){
  600. Loop& lp = loops.top();
  601. for(int addr : lp.breaks) patchJump(addr);
  602. loops.pop();
  603. }
  604. void compileWhileLoop() {
  605. Loop& loop = enterLoop(false);
  606. EXPR_TUPLE();
  607. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  608. compileBlockBody();
  609. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  610. patchJump(patch);
  611. exitLoop();
  612. }
  613. void EXPR_FOR_VARS(){
  614. int size = 0;
  615. do {
  616. consume(TK("@id"));
  617. exprName(); size++;
  618. } while (match(TK(",")));
  619. if(size > 1) emitCode(OP_BUILD_SMART_TUPLE, size);
  620. }
  621. void compileForLoop() {
  622. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  623. emitCode(OP_GET_ITER);
  624. Loop& loop = enterLoop(true);
  625. int patch = emitCode(OP_FOR_ITER);
  626. compileBlockBody();
  627. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  628. patchJump(patch);
  629. exitLoop();
  630. }
  631. void compileStatement() {
  632. if (match(TK("break"))) {
  633. if (loops.empty()) throw SyntaxError(path, parser->previous, "'break' outside loop");
  634. consumeEndStatement();
  635. if(getLoop().forLoop) emitCode(OP_POP_TOP); // pop the iterator of for loop.
  636. int patch = emitCode(OP_JUMP_ABSOLUTE);
  637. getLoop().breaks.push_back(patch);
  638. } else if (match(TK("continue"))) {
  639. if (loops.empty()) {
  640. throw SyntaxError(path, parser->previous, "'continue' not properly in loop");
  641. }
  642. consumeEndStatement();
  643. emitCode(OP_JUMP_ABSOLUTE, getLoop().start);
  644. } else if (match(TK("return"))) {
  645. if (codes.size() == 1)
  646. throw SyntaxError(path, parser->previous, "'return' outside function");
  647. if(matchEndStatement()){
  648. emitCode(OP_LOAD_NONE);
  649. }else{
  650. EXPR_TUPLE();
  651. consumeEndStatement();
  652. }
  653. emitCode(OP_RETURN_VALUE);
  654. } else if (match(TK("if"))) {
  655. compileIfStatement();
  656. } else if (match(TK("while"))) {
  657. compileWhileLoop();
  658. } else if (match(TK("for"))) {
  659. compileForLoop();
  660. } else if(match(TK("assert"))){
  661. EXPR();
  662. emitCode(OP_ASSERT);
  663. consumeEndStatement();
  664. } else if(match(TK("raise"))){
  665. consume(TK("@id")); // dummy exception type
  666. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  667. consume(TK("("));EXPR();consume(TK(")"));
  668. emitCode(OP_RAISE_ERROR);
  669. consumeEndStatement();
  670. } else if(match(TK("del"))){
  671. EXPR();
  672. emitCode(OP_DELETE_PTR);
  673. consumeEndStatement();
  674. } else if(match(TK("pass"))){
  675. consumeEndStatement();
  676. } else {
  677. EXPR_ANY();
  678. consumeEndStatement();
  679. // If last op is not an assignment, pop the result.
  680. uint8_t lastOp = getCode()->co_code.back().op;
  681. if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
  682. if(mode==SINGLE_MODE && parser->indents.top() == 0){
  683. emitCode(OP_PRINT_EXPR);
  684. }
  685. emitCode(OP_POP_TOP);
  686. }
  687. }
  688. }
  689. void compileClass(){
  690. consume(TK("@id"));
  691. int clsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  692. int superClsNameIdx = -1;
  693. if(match(TK("("))){
  694. consume(TK("@id"));
  695. superClsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  696. consume(TK(")"));
  697. }
  698. emitCode(OP_LOAD_NONE);
  699. isCompilingClass = true;
  700. __compileBlockBody(&Compiler::compileFunction);
  701. isCompilingClass = false;
  702. if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE);
  703. else emitCode(OP_LOAD_NAME_PTR, superClsNameIdx);
  704. emitCode(OP_BUILD_CLASS, clsNameIdx);
  705. }
  706. void __compileFunctionArgs(_Func& func){
  707. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  708. do {
  709. if(state == 3){
  710. throw SyntaxError(path, parser->previous, "**kwargs should be the last argument");
  711. }
  712. matchNewLines();
  713. if(match(TK("*"))){
  714. if(state < 1) state = 1;
  715. else throw SyntaxError(path, parser->previous, "*args should be placed before **kwargs");
  716. }
  717. else if(match(TK("**"))){
  718. state = 3;
  719. }
  720. consume(TK("@id"));
  721. const _Str& name = parser->previous.str();
  722. if(func.hasName(name)) throw SyntaxError(path, parser->previous, "duplicate argument name");
  723. if(state == 0 && peek() == TK("=")) state = 2;
  724. switch (state)
  725. {
  726. case 0: func.args.push_back(name); break;
  727. case 1: func.starredArg = name; state+=1; break;
  728. case 2: consume(TK("=")); func.kwArgs[name] = consumeLiteral(); break;
  729. case 3: func.doubleStarredArg = name; break;
  730. }
  731. } while (match(TK(",")));
  732. }
  733. void compileFunction(){
  734. if(isCompilingClass){
  735. if(match(TK("pass"))) return;
  736. consume(TK("def"));
  737. }
  738. _Func func;
  739. consume(TK("@id"));
  740. func.name = parser->previous.str();
  741. if (match(TK("(")) && !match(TK(")"))) {
  742. __compileFunctionArgs(func);
  743. consume(TK(")"));
  744. }
  745. func.code = std::make_shared<CodeObject>();
  746. func.code->co_name = func.name;
  747. func.code->co_filename = path;
  748. this->codes.push(func.code);
  749. compileBlockBody();
  750. this->codes.pop();
  751. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyFunction(func)));
  752. if(!isCompilingClass) emitCode(OP_STORE_FUNCTION);
  753. }
  754. PyVar consumeLiteral(){
  755. if(match(TK("@num"))) goto __LITERAL_EXIT;
  756. if(match(TK("@str"))) goto __LITERAL_EXIT;
  757. if(match(TK("True"))) goto __LITERAL_EXIT;
  758. if(match(TK("False"))) goto __LITERAL_EXIT;
  759. if(match(TK("None"))) goto __LITERAL_EXIT;
  760. throw SyntaxError(path, parser->previous, "expect a literal, not %s", TK_STR(parser->current.type));
  761. __LITERAL_EXIT:
  762. return parser->previous.value;
  763. }
  764. void compileTopLevelStatement() {
  765. if (match(TK("class"))) {
  766. compileClass();
  767. } else if (match(TK("def"))) {
  768. compileFunction();
  769. } else if (match(TK("import"))) {
  770. compileRegularImport();
  771. } else {
  772. compileStatement();
  773. }
  774. }
  775. void __fillCode(){
  776. // Lex initial tokens. current <-- next.
  777. lexToken();
  778. lexToken();
  779. matchNewLines();
  780. if(mode == EVAL_MODE) {
  781. EXPR_TUPLE();
  782. consume(TK("@eof"));
  783. return;
  784. }
  785. while (!match(TK("@eof"))) {
  786. compileTopLevelStatement();
  787. matchNewLines();
  788. }
  789. }
  790. };
  791. _Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE) {
  792. // Skip utf8 BOM if there is any.
  793. if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
  794. _Code code = std::make_shared<CodeObject>();
  795. code->co_filename = filename;
  796. code->mode = mode;
  797. Compiler compiler(vm, source, code);
  798. compiler.__fillCode();
  799. return code;
  800. }