compiler.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755
  1. #pragma once
  2. #include <vector>
  3. #include <string>
  4. #include <cstring>
  5. #include "parser.h"
  6. #include "error.h"
  7. #include "vm.h"
  8. class Compiler;
  9. typedef void (Compiler::*GrammarFn)();
  10. typedef void (Compiler::*CompilerAction)();
  11. struct GrammarRule{
  12. GrammarFn prefix;
  13. GrammarFn infix;
  14. Precedence precedence;
  15. };
  16. struct Loop {
  17. bool forLoop;
  18. int start;
  19. std::vector<int> breaks;
  20. Loop(bool forLoop, int start) : forLoop(forLoop), start(start) {}
  21. };
  22. #define ExprCommaSplitArgs(end) \
  23. int ARGC = 0; \
  24. do { \
  25. matchNewLines(); \
  26. if (peek() == TK(end)) break; \
  27. EXPR(); \
  28. ARGC++; \
  29. matchNewLines(); \
  30. } while (match(TK(","))); \
  31. matchNewLines(); \
  32. consume(TK(end));
  33. class Compiler {
  34. public:
  35. std::unique_ptr<Parser> parser;
  36. bool repl_mode;
  37. std::stack<_Code> codes;
  38. std::stack<Loop> loops;
  39. bool isCompilingClass = false;
  40. _Str path = "<?>";
  41. VM* vm;
  42. std::unordered_map<_TokenType, GrammarRule> rules;
  43. _Code getCode() {
  44. return codes.top();
  45. }
  46. Loop& getLoop() {
  47. return loops.top();
  48. }
  49. Compiler(VM* vm, const char* source, _Code code, bool repl_mode){
  50. this->vm = vm;
  51. this->codes.push(code);
  52. this->repl_mode = repl_mode;
  53. if (!code->co_filename.empty()) path = code->co_filename;
  54. this->parser = std::make_unique<Parser>(source);
  55. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  56. #define METHOD(name) &Compiler::name
  57. #define NO_INFIX nullptr, PREC_NONE
  58. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  59. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  60. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  61. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  62. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  63. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  64. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  65. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  66. rules[TK("*")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  67. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  68. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  69. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  70. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  71. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  72. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  73. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  74. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  75. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  76. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  77. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  78. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  79. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  80. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  81. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  82. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  83. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  84. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  85. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  86. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  87. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  88. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  89. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  90. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  91. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  92. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  93. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  94. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  95. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  96. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  97. #undef METHOD
  98. #undef NO_INFIX
  99. #define EXPR() parsePrecedence(PREC_COMMA) // no '=' and ',' just a simple expression
  100. #define EXPR_TUPLE() parsePrecedence(PREC_ASSIGNMENT) // no '=', but ',' is allowed
  101. #define EXPR_ANY() parsePrecedence(PREC_NONE)
  102. }
  103. void eatString(bool single_quote) {
  104. std::vector<char> buff;
  105. char quote = (single_quote) ? '\'' : '"';
  106. while (true) {
  107. char c = parser->eatChar();
  108. if (c == quote) break;
  109. if (c == '\0')
  110. throw SyntaxError(path, parser->makeErrToken(), "EOL while scanning string literal");
  111. if (c == '\\') {
  112. switch (parser->eatCharIncludeNewLine()) {
  113. case '"': buff.push_back('"'); break;
  114. case '\'': buff.push_back('\''); break;
  115. case '\\': buff.push_back('\\'); break;
  116. case 'n': buff.push_back('\n'); break;
  117. case 'r': buff.push_back('\r'); break;
  118. case 't': buff.push_back('\t'); break;
  119. case '\n': break; // Just ignore the next line.
  120. case '\r': if (parser->matchChar('\n')) break;
  121. default: throw SyntaxError(path, parser->makeErrToken(), "invalid syntax");
  122. }
  123. } else {
  124. buff.push_back(c);
  125. }
  126. }
  127. parser->setNextToken(TK("@str"), vm->PyStr(_Str(buff.data(), buff.size())));
  128. }
  129. void eatNumber() {
  130. char c = *(parser->token_start);
  131. bool is_float = false;
  132. while (isdigit(parser->peekChar())) parser->eatChar();
  133. if (parser->peekChar() == '.' && isdigit(parser->peekNextChar())) {
  134. parser->matchChar('.');
  135. is_float = true;
  136. while (isdigit(parser->peekChar())) parser->eatChar();
  137. }
  138. errno = 0;
  139. PyVar value = vm->None;
  140. if(is_float){
  141. value = vm->PyFloat(atof(parser->token_start));
  142. } else {
  143. value = vm->PyInt(atoi(parser->token_start));
  144. }
  145. if (errno == ERANGE) {
  146. const char* start = parser->token_start;
  147. int len = (int)(parser->current_char - start);
  148. throw SyntaxError(path, parser->makeErrToken(), "number literal too large: %.*s", len, start);
  149. }
  150. parser->setNextToken(TK("@num"), value);
  151. }
  152. // Lex the next token and set it as the next token.
  153. void lexToken() {
  154. parser->previous = parser->current;
  155. parser->current = parser->nextToken();
  156. //printf("<%s> ", TK_STR(peek()));
  157. while (parser->peekChar() != '\0') {
  158. parser->token_start = parser->current_char;
  159. char c = parser->eatCharIncludeNewLine();
  160. switch (c) {
  161. case '"': eatString(false); return;
  162. case '\'': eatString(true); return;
  163. case '#': parser->skipLineComment(); break;
  164. case '{': parser->setNextToken(TK("{")); return;
  165. case '}': parser->setNextToken(TK("}")); return;
  166. case ',': parser->setNextToken(TK(",")); return;
  167. case ':': parser->setNextToken(TK(":")); return;
  168. case ';': parser->setNextToken(TK(";")); return;
  169. case '(': parser->setNextToken(TK("(")); return;
  170. case ')': parser->setNextToken(TK(")")); return;
  171. case '[': parser->setNextToken(TK("[")); return;
  172. case ']': parser->setNextToken(TK("]")); return;
  173. case '%': parser->setNextToken(TK("%")); return;
  174. case '.': parser->setNextToken(TK(".")); return;
  175. case '=': parser->setNextTwoCharToken('=', TK("="), TK("==")); return;
  176. case '>': parser->setNextTwoCharToken('=', TK(">"), TK(">=")); return;
  177. case '<': parser->setNextTwoCharToken('=', TK("<"), TK("<=")); return;
  178. case '+': parser->setNextTwoCharToken('=', TK("+"), TK("+=")); return;
  179. case '-': parser->setNextTwoCharToken('=', TK("-"), TK("-=")); return;
  180. case '!':
  181. if(parser->matchChar('=')) parser->setNextToken(TK("!="));
  182. else SyntaxError(path, parser->makeErrToken(), "expected '=' after '!'");
  183. break;
  184. case '*':
  185. if (parser->matchChar('*')) {
  186. parser->setNextToken(TK("**")); // '**'
  187. } else {
  188. parser->setNextTwoCharToken('=', TK("*"), TK("*="));
  189. }
  190. return;
  191. case '/':
  192. if(parser->matchChar('/')) {
  193. parser->setNextTwoCharToken('=', TK("//"), TK("//="));
  194. } else {
  195. parser->setNextTwoCharToken('=', TK("/"), TK("/="));
  196. }
  197. return;
  198. case '\r': break; // just ignore '\r'
  199. case ' ': case '\t': parser->eatSpaces(); break;
  200. case '\n': {
  201. parser->setNextToken(TK("@eol"));
  202. while(parser->matchChar('\n'));
  203. if(!parser->eatIndentation())
  204. throw SyntaxError(path, parser->makeErrToken(), "unindent does not match any outer indentation level");
  205. return;
  206. }
  207. default: {
  208. if (isdigit(c)) {
  209. eatNumber();
  210. } else if (isalpha(c) || c=='_') {
  211. parser->eatName();
  212. } else {
  213. throw SyntaxError(path, parser->makeErrToken(), "unknown character: %c", c);
  214. }
  215. return;
  216. }
  217. }
  218. }
  219. parser->token_start = parser->current_char;
  220. parser->setNextToken(TK("@eof"));
  221. }
  222. _TokenType peek() {
  223. return parser->current.type;
  224. }
  225. bool match(_TokenType expected) {
  226. if (peek() != expected) return false;
  227. lexToken();
  228. return true;
  229. }
  230. void consume(_TokenType expected) {
  231. lexToken();
  232. Token prev = parser->previous;
  233. if (prev.type != expected){
  234. throw SyntaxError(path, prev, "expected '%s', but got '%s'", TK_STR(expected), TK_STR(prev.type));
  235. }
  236. }
  237. bool matchNewLines(bool repl_throw=false) {
  238. bool consumed = false;
  239. if (peek() == TK("@eol")) {
  240. while (peek() == TK("@eol")) lexToken();
  241. consumed = true;
  242. }
  243. if (repl_throw && peek() == TK("@eof")){
  244. throw NeedMoreLines();
  245. }
  246. return consumed;
  247. }
  248. bool matchEndStatement() {
  249. if (match(TK(";"))) {
  250. matchNewLines();
  251. return true;
  252. }
  253. if (matchNewLines() || peek() == TK("@eof"))
  254. return true;
  255. if (peek() == TK("@dedent")) return true;
  256. return false;
  257. }
  258. void consumeEndStatement() {
  259. if (!matchEndStatement())
  260. throw SyntaxError(path, parser->current, "expected statement end");
  261. }
  262. void exprLiteral() {
  263. PyVar value = parser->previous.value;
  264. int index = getCode()->addConst(value);
  265. emitCode(OP_LOAD_CONST, index);
  266. }
  267. void exprLambda() {
  268. }
  269. void exprAssign() {
  270. _TokenType op = parser->previous.type;
  271. if(op == TK("=")) { // a = (expr)
  272. EXPR_TUPLE();
  273. emitCode(OP_STORE_PTR);
  274. }else{ // a += (expr) -> a = a + (expr)
  275. // TODO: optimization is needed for inplace operators
  276. emitCode(OP_DUP_TOP);
  277. EXPR();
  278. switch (op) {
  279. case TK("+="): emitCode(OP_BINARY_OP, 0); break;
  280. case TK("-="): emitCode(OP_BINARY_OP, 1); break;
  281. case TK("*="): emitCode(OP_BINARY_OP, 2); break;
  282. case TK("/="): emitCode(OP_BINARY_OP, 3); break;
  283. case TK("//="): emitCode(OP_BINARY_OP, 4); break;
  284. default: UNREACHABLE();
  285. }
  286. emitCode(OP_STORE_PTR);
  287. }
  288. }
  289. void exprComma() {
  290. int size = 1; // an expr is in the stack now
  291. do {
  292. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  293. size++;
  294. } while(match(TK(",")));
  295. emitCode(OP_BUILD_SMART_TUPLE, size);
  296. }
  297. void exprOr() {
  298. int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP);
  299. parsePrecedence(PREC_LOGICAL_OR);
  300. patchJump(patch);
  301. }
  302. void exprAnd() {
  303. int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP);
  304. parsePrecedence(PREC_LOGICAL_AND);
  305. patchJump(patch);
  306. }
  307. void exprBinaryOp() {
  308. _TokenType op = parser->previous.type;
  309. parsePrecedence((Precedence)(rules[op].precedence + 1));
  310. switch (op) {
  311. case TK("+"): emitCode(OP_BINARY_OP, 0); break;
  312. case TK("-"): emitCode(OP_BINARY_OP, 1); break;
  313. case TK("*"): emitCode(OP_BINARY_OP, 2); break;
  314. case TK("/"): emitCode(OP_BINARY_OP, 3); break;
  315. case TK("//"): emitCode(OP_BINARY_OP, 4); break;
  316. case TK("%"): emitCode(OP_BINARY_OP, 5); break;
  317. case TK("**"): emitCode(OP_BINARY_OP, 6); break;
  318. case TK("<"): emitCode(OP_COMPARE_OP, 0); break;
  319. case TK("<="): emitCode(OP_COMPARE_OP, 1); break;
  320. case TK("=="): emitCode(OP_COMPARE_OP, 2); break;
  321. case TK("!="): emitCode(OP_COMPARE_OP, 3); break;
  322. case TK(">"): emitCode(OP_COMPARE_OP, 4); break;
  323. case TK(">="): emitCode(OP_COMPARE_OP, 5); break;
  324. case TK("in"): emitCode(OP_CONTAINS_OP, 0); break;
  325. case TK("not in"): emitCode(OP_CONTAINS_OP, 1); break;
  326. case TK("is"): emitCode(OP_IS_OP, 0); break;
  327. case TK("is not"): emitCode(OP_IS_OP, 1); break;
  328. default: UNREACHABLE();
  329. }
  330. }
  331. void exprUnaryOp() {
  332. _TokenType op = parser->previous.type;
  333. matchNewLines();
  334. parsePrecedence((Precedence)(PREC_UNARY + 1));
  335. switch (op) {
  336. case TK("-"): emitCode(OP_UNARY_NEGATIVE); break;
  337. case TK("not"): emitCode(OP_UNARY_NOT); break;
  338. default: UNREACHABLE();
  339. }
  340. }
  341. void exprGrouping() {
  342. matchNewLines();
  343. EXPR_TUPLE();
  344. matchNewLines();
  345. consume(TK(")"));
  346. }
  347. void exprList() {
  348. ExprCommaSplitArgs("]");
  349. emitCode(OP_BUILD_LIST, ARGC);
  350. }
  351. void exprMap() {
  352. int size = 0;
  353. do {
  354. matchNewLines();
  355. if (peek() == TK("}")) break;
  356. EXPR();consume(TK(":"));EXPR();
  357. emitCode(OP_BUILD_SMART_TUPLE, 2);
  358. size++;
  359. matchNewLines();
  360. } while (match(TK(",")));
  361. matchNewLines();
  362. consume(TK("}"));
  363. emitCode(OP_BUILD_MAP, size);
  364. }
  365. void exprCall() {
  366. ExprCommaSplitArgs(")");
  367. emitCode(OP_CALL, ARGC);
  368. }
  369. void exprName() {
  370. Token tkname = parser->previous;
  371. int index = getCode()->addName(
  372. tkname.str(),
  373. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  374. );
  375. emitCode(OP_LOAD_NAME_PTR, index);
  376. }
  377. void exprAttrib() {
  378. consume(TK("@id"));
  379. const _Str& name = parser->previous.str();
  380. int index = getCode()->addName(name, NAME_ATTR);
  381. emitCode(OP_BUILD_ATTR_PTR, index);
  382. }
  383. // [:], [:b]
  384. // [a], [a:], [a:b]
  385. void exprSubscript() {
  386. if(match(TK(":"))){
  387. emitCode(OP_LOAD_NONE);
  388. if(match(TK("]"))){
  389. emitCode(OP_LOAD_NONE);
  390. }else{
  391. EXPR();
  392. consume(TK("]"));
  393. }
  394. emitCode(OP_BUILD_SLICE);
  395. }else{
  396. EXPR();
  397. if(match(TK(":"))){
  398. if(match(TK("]"))){
  399. emitCode(OP_LOAD_NONE);
  400. }else{
  401. EXPR();
  402. consume(TK("]"));
  403. }
  404. emitCode(OP_BUILD_SLICE);
  405. }else{
  406. consume(TK("]"));
  407. }
  408. }
  409. emitCode(OP_BUILD_INDEX_PTR);
  410. }
  411. void exprValue() {
  412. _TokenType op = parser->previous.type;
  413. switch (op) {
  414. case TK("None"): emitCode(OP_LOAD_NONE); break;
  415. case TK("True"): emitCode(OP_LOAD_TRUE); break;
  416. case TK("False"): emitCode(OP_LOAD_FALSE); break;
  417. default: UNREACHABLE();
  418. }
  419. }
  420. void keepOpcodeLine(){
  421. int i = getCode()->co_code.size() - 1;
  422. getCode()->co_code[i].line = getCode()->co_code[i-1].line;
  423. }
  424. int emitCode(Opcode opcode, int arg=-1) {
  425. int line = parser->previous.line;
  426. getCode()->co_code.push_back(
  427. ByteCode{(uint8_t)opcode, arg, (uint16_t)line}
  428. );
  429. return getCode()->co_code.size() - 1;
  430. }
  431. void patchJump(int addr_index) {
  432. int target = getCode()->co_code.size();
  433. getCode()->co_code[addr_index].arg = target;
  434. }
  435. void compileBlockBody(){
  436. __compileBlockBody(&Compiler::compileStatement);
  437. }
  438. void __compileBlockBody(CompilerAction action) {
  439. consume(TK(":"));
  440. if(!matchNewLines(repl_mode)){
  441. throw SyntaxError(path, parser->previous, "expected a new line after ':'");
  442. }
  443. consume(TK("@indent"));
  444. while (peek() != TK("@dedent")) {
  445. (this->*action)();
  446. matchNewLines();
  447. }
  448. consume(TK("@dedent"));
  449. }
  450. Token compileImportPath() {
  451. consume(TK("@id"));
  452. Token tkmodule = parser->previous;
  453. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  454. emitCode(OP_IMPORT_NAME, index);
  455. return tkmodule;
  456. }
  457. // import module1 [as alias1 [, module2 [as alias2 ...]]
  458. void compileRegularImport() {
  459. do {
  460. Token tkmodule = compileImportPath();
  461. if (match(TK("as"))) {
  462. consume(TK("@id"));
  463. tkmodule = parser->previous;
  464. }
  465. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  466. emitCode(OP_STORE_NAME_PTR, index);
  467. } while (match(TK(",")));
  468. consumeEndStatement();
  469. }
  470. void parsePrecedence(Precedence precedence) {
  471. lexToken();
  472. GrammarFn prefix = rules[parser->previous.type].prefix;
  473. if (prefix == nullptr) {
  474. throw SyntaxError(path, parser->previous, "expected an expression");
  475. }
  476. (this->*prefix)();
  477. while (rules[peek()].precedence > precedence) {
  478. lexToken();
  479. _TokenType op = parser->previous.type;
  480. GrammarFn infix = rules[op].infix;
  481. (this->*infix)();
  482. }
  483. }
  484. void compileIfStatement() {
  485. matchNewLines();
  486. EXPR_TUPLE();
  487. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  488. compileBlockBody();
  489. if (match(TK("elif"))) {
  490. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  491. patchJump(ifpatch);
  492. compileIfStatement();
  493. patchJump(exit_jump);
  494. } else if (match(TK("else"))) {
  495. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  496. patchJump(ifpatch);
  497. compileBlockBody();
  498. patchJump(exit_jump);
  499. } else {
  500. patchJump(ifpatch);
  501. }
  502. }
  503. Loop& enterLoop(bool forLoop){
  504. Loop lp(forLoop, (int)getCode()->co_code.size());
  505. loops.push(lp);
  506. return loops.top();
  507. }
  508. void exitLoop(){
  509. Loop& lp = loops.top();
  510. for(int addr : lp.breaks) patchJump(addr);
  511. loops.pop();
  512. }
  513. void compileWhileStatement() {
  514. Loop& loop = enterLoop(false);
  515. EXPR_TUPLE();
  516. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  517. compileBlockBody();
  518. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  519. patchJump(patch);
  520. exitLoop();
  521. }
  522. void compileForStatement() {
  523. consume(TK("@id"));
  524. int iterIndex = getCode()->addName(
  525. parser->previous.str(),
  526. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  527. );
  528. consume(TK("in"));
  529. EXPR_TUPLE();
  530. emitCode(OP_GET_ITER);
  531. Loop& loop = enterLoop(true);
  532. int patch = emitCode(OP_FOR_ITER);
  533. emitCode(OP_STORE_NAME_PTR, iterIndex);
  534. compileBlockBody();
  535. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  536. patchJump(patch);
  537. exitLoop();
  538. }
  539. void compileStatement() {
  540. if (match(TK("break"))) {
  541. if (loops.empty()) throw SyntaxError(path, parser->previous, "'break' outside loop");
  542. consumeEndStatement();
  543. if(getLoop().forLoop) emitCode(OP_POP_TOP); // pop the iterator of for loop.
  544. int patch = emitCode(OP_JUMP_ABSOLUTE);
  545. getLoop().breaks.push_back(patch);
  546. } else if (match(TK("continue"))) {
  547. if (loops.empty()) {
  548. throw SyntaxError(path, parser->previous, "'continue' not properly in loop");
  549. }
  550. consumeEndStatement();
  551. emitCode(OP_JUMP_ABSOLUTE, getLoop().start);
  552. } else if (match(TK("return"))) {
  553. if (codes.size() == 1)
  554. throw SyntaxError(path, parser->previous, "'return' outside function");
  555. if(matchEndStatement()){
  556. emitCode(OP_LOAD_NONE);
  557. }else{
  558. EXPR_TUPLE();
  559. consumeEndStatement();
  560. }
  561. emitCode(OP_RETURN_VALUE);
  562. } else if (match(TK("if"))) {
  563. compileIfStatement();
  564. } else if (match(TK("while"))) {
  565. compileWhileStatement();
  566. } else if (match(TK("for"))) {
  567. compileForStatement();
  568. } else if(match(TK("assert"))){
  569. EXPR();
  570. emitCode(OP_ASSERT);
  571. consumeEndStatement();
  572. } else if(match(TK("raise"))){
  573. consume(TK("@id")); // dummy exception type
  574. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  575. consume(TK("("));EXPR();consume(TK(")"));
  576. emitCode(OP_RAISE_ERROR);
  577. consumeEndStatement();
  578. } else if(match(TK("del"))){
  579. EXPR();
  580. emitCode(OP_DELETE_PTR);
  581. consumeEndStatement();
  582. } else if(match(TK("pass"))){
  583. consumeEndStatement();
  584. } else {
  585. EXPR_ANY();
  586. consumeEndStatement();
  587. // If last op is not an assignment, pop the result.
  588. uint8_t lastOp = getCode()->co_code.back().op;
  589. if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
  590. if(repl_mode && parser->indents.top() == 0){
  591. emitCode(OP_PRINT_EXPR);
  592. }
  593. emitCode(OP_POP_TOP);
  594. }
  595. }
  596. }
  597. void compileClass(){
  598. consume(TK("@id"));
  599. int clsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  600. int superClsNameIdx = -1;
  601. if(match(TK("("))){
  602. consume(TK("@id"));
  603. superClsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  604. consume(TK(")"));
  605. }
  606. emitCode(OP_LOAD_NONE);
  607. isCompilingClass = true;
  608. __compileBlockBody(&Compiler::compileFunction);
  609. isCompilingClass = false;
  610. if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE);
  611. else emitCode(OP_LOAD_NAME_PTR, superClsNameIdx);
  612. emitCode(OP_BUILD_CLASS, clsNameIdx);
  613. }
  614. void compileFunction(){
  615. if(isCompilingClass){
  616. if(match(TK("pass"))) return;
  617. consume(TK("def"));
  618. }
  619. consume(TK("@id"));
  620. const _Str& name = parser->previous.str();
  621. std::vector<_Str> argNames;
  622. if (match(TK("(")) && !match(TK(")"))) {
  623. do {
  624. matchNewLines();
  625. consume(TK("@id"));
  626. const _Str& argName = parser->previous.str();
  627. if (std::find(argNames.begin(), argNames.end(), argName) != argNames.end()) {
  628. throw SyntaxError(path, parser->previous, "duplicate argument in function definition");
  629. }
  630. argNames.push_back(argName);
  631. } while (match(TK(",")));
  632. consume(TK(")"));
  633. }
  634. _Code fnCode = std::make_shared<CodeObject>();
  635. fnCode->co_name = name;
  636. fnCode->co_filename = path;
  637. this->codes.push(fnCode);
  638. compileBlockBody();
  639. this->codes.pop();
  640. PyVar fn = vm->PyFunction(_Func{name, fnCode, argNames});
  641. emitCode(OP_LOAD_CONST, getCode()->addConst(fn));
  642. if(!isCompilingClass) emitCode(OP_STORE_FUNCTION);
  643. }
  644. void compileTopLevelStatement() {
  645. if (match(TK("class"))) {
  646. compileClass();
  647. } else if (match(TK("def"))) {
  648. compileFunction();
  649. } else if (match(TK("import"))) {
  650. compileRegularImport();
  651. } else {
  652. compileStatement();
  653. }
  654. }
  655. };
  656. _Code compile(VM* vm, const char* source, _Str filename, bool repl_mode=false) {
  657. // Skip utf8 BOM if there is any.
  658. if (strncmp(source, "\xEF\xBB\xBF", 3) == 0) source += 3;
  659. _Code code = std::make_shared<CodeObject>();
  660. code->co_filename = filename;
  661. Compiler compiler(vm, source, code, repl_mode);
  662. // Lex initial tokens. current <-- next.
  663. compiler.lexToken();
  664. compiler.lexToken();
  665. compiler.matchNewLines();
  666. while (!compiler.match(TK("@eof"))) {
  667. compiler.compileTopLevelStatement();
  668. compiler.matchNewLines();
  669. }
  670. return code;
  671. }