compiler.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. struct Loop {
  14. int start;
  15. std::vector<int> breaks;
  16. Loop(int start) : start(start) {}
  17. };
  18. class Compiler {
  19. public:
  20. std::unique_ptr<Parser> parser;
  21. std::stack<_Code> codes;
  22. std::stack<Loop> loops;
  23. bool isCompilingClass = false;
  24. VM* vm;
  25. std::unordered_map<_TokenType, GrammarRule> rules;
  26. _Code getCode() {
  27. return codes.top();
  28. }
  29. CompileMode mode() {
  30. return parser->src->mode;
  31. }
  32. Loop& getLoop() {
  33. return loops.top();
  34. }
  35. Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
  36. this->vm = vm;
  37. this->parser = std::make_unique<Parser>(
  38. std::make_shared<SourceMetadata>(source, filename, mode)
  39. );
  40. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  41. #define METHOD(name) &Compiler::name
  42. #define NO_INFIX nullptr, PREC_NONE
  43. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  44. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  45. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  46. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  47. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  48. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  49. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  50. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  51. rules[TK("*")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  52. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  53. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  54. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  55. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  56. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  57. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  58. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  59. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  60. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  61. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  62. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  63. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  64. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  65. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  66. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  67. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  68. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  69. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  70. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  71. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  72. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  73. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  74. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  75. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  76. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  77. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  78. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  79. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  80. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  81. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  82. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  83. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  84. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  85. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  86. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  87. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  88. #undef METHOD
  89. #undef NO_INFIX
  90. #define EXPR() parsePrecedence(PREC_LOGICAL_OR) // no '=' and ',' just a simple expression
  91. #define EXPR_TUPLE() parsePrecedence(PREC_COMMA) // no '=', but ',' is allowed
  92. #define EXPR_ANY() parsePrecedence(PREC_ASSIGNMENT)
  93. }
  94. _Str eatStringUntil(char quote) {
  95. std::vector<char> buff;
  96. while (true) {
  97. char c = parser->eatCharIncludeNewLine();
  98. if (c == quote) break;
  99. if (c == '\0' || c == '\n') syntaxError("EOL while scanning string literal");
  100. if (c == '\\') {
  101. switch (parser->eatCharIncludeNewLine()) {
  102. case '"': buff.push_back('"'); break;
  103. case '\'': buff.push_back('\''); break;
  104. case '\\': buff.push_back('\\'); break;
  105. case 'n': buff.push_back('\n'); break;
  106. case 'r': buff.push_back('\r'); break;
  107. case 't': buff.push_back('\t'); break;
  108. case '\n': case '\r': break;
  109. default: syntaxError("invalid escape character");
  110. }
  111. } else {
  112. buff.push_back(c);
  113. }
  114. }
  115. return _Str(buff.data(), buff.size());
  116. }
  117. void eatString(char quote, bool fstr) {
  118. _Str s = eatStringUntil(quote);
  119. if(fstr){
  120. parser->setNextToken(TK("@fstr"), vm->PyStr(s));
  121. }else{
  122. parser->setNextToken(TK("@str"), vm->PyStr(s));
  123. }
  124. }
  125. void eatNumber() {
  126. static const std::regex pattern("^(0x)?[0-9a-f]+(\\.[0-9]+)?");
  127. std::smatch m;
  128. const char* i = parser->token_start;
  129. while(*i != '\n' && *i != '\0') i++;
  130. std::string s = std::string(parser->token_start, i);
  131. try{
  132. if (std::regex_search(s, m, pattern)) {
  133. // here is m.length()-1, since the first char is eaten by lexToken()
  134. for(int j=0; j<m.length()-1; j++) parser->eatChar();
  135. int base = 10;
  136. size_t size;
  137. if (m[1].matched) base = 16;
  138. if (m[2].matched) {
  139. if(base == 16) syntaxError("hex literal should not contain a dot");
  140. parser->setNextToken(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  141. } else {
  142. parser->setNextToken(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  143. }
  144. if (size != m.length()) throw std::runtime_error("length mismatch");
  145. }
  146. }catch(std::exception& e){
  147. syntaxError("invalid number literal");
  148. }
  149. }
  150. // Lex the next token and set it as the next token.
  151. void lexToken() {
  152. parser->previous = parser->current;
  153. parser->current = parser->nextToken();
  154. //_Str _info = parser->current.info(); printf("%s\n", (const char*)_info);
  155. while (parser->peekChar() != '\0') {
  156. parser->token_start = parser->current_char;
  157. char c = parser->eatCharIncludeNewLine();
  158. switch (c) {
  159. case '\'': case '"': eatString(c, false); return;
  160. case '#': parser->skipLineComment(); break;
  161. case '{': parser->setNextToken(TK("{")); return;
  162. case '}': parser->setNextToken(TK("}")); return;
  163. case ',': parser->setNextToken(TK(",")); return;
  164. case ':': parser->setNextToken(TK(":")); return;
  165. case ';': parser->setNextToken(TK(";")); return;
  166. case '(': parser->setNextToken(TK("(")); return;
  167. case ')': parser->setNextToken(TK(")")); return;
  168. case '[': parser->setNextToken(TK("[")); return;
  169. case ']': parser->setNextToken(TK("]")); return;
  170. case '%': parser->setNextToken(TK("%")); return;
  171. case '.': parser->setNextToken(TK(".")); return;
  172. case '&': parser->setNextToken(TK("&")); return;
  173. case '|': parser->setNextToken(TK("|")); return;
  174. case '^': parser->setNextToken(TK("^")); return;
  175. case '=': parser->setNextTwoCharToken('=', TK("="), TK("==")); return;
  176. case '+': parser->setNextTwoCharToken('=', TK("+"), TK("+=")); return;
  177. case '>': {
  178. if(parser->matchChar('=')) parser->setNextToken(TK(">="));
  179. else if(parser->matchChar('>')) parser->setNextToken(TK(">>"));
  180. else parser->setNextToken(TK(">"));
  181. return;
  182. }
  183. case '<': {
  184. if(parser->matchChar('=')) parser->setNextToken(TK("<="));
  185. else if(parser->matchChar('<')) parser->setNextToken(TK("<<"));
  186. else parser->setNextToken(TK("<"));
  187. return;
  188. }
  189. case '-': {
  190. parser->setNextTwoCharToken('=', TK("-"), TK("-="));
  191. return;
  192. }
  193. case '!':
  194. if(parser->matchChar('=')) parser->setNextToken(TK("!="));
  195. else syntaxError("expected '=' after '!'");
  196. break;
  197. case '*':
  198. if (parser->matchChar('*')) {
  199. parser->setNextToken(TK("**")); // '**'
  200. } else {
  201. parser->setNextTwoCharToken('=', TK("*"), TK("*="));
  202. }
  203. return;
  204. case '/':
  205. if(parser->matchChar('/')) {
  206. parser->setNextTwoCharToken('=', TK("//"), TK("//="));
  207. } else {
  208. parser->setNextTwoCharToken('=', TK("/"), TK("/="));
  209. }
  210. return;
  211. case '\r': break; // just ignore '\r'
  212. case ' ': case '\t': parser->eatSpaces(); break;
  213. case '\n': {
  214. parser->setNextToken(TK("@eol")); while(parser->matchChar('\n'));
  215. if(!parser->eatIndentation()) indentationError("unindent does not match any outer indentation level");
  216. return;
  217. }
  218. default: {
  219. if(c == 'f'){
  220. if(parser->matchChar('\'')) {eatString('\'', true); return;}
  221. if(parser->matchChar('"')) {eatString('"', true); return;}
  222. }
  223. if (isdigit(c)) {
  224. eatNumber();
  225. } else if (parser->isNameStart(c)) {
  226. int ret = parser->eatName();
  227. if(ret!=0) syntaxError("identifier is illegal, err " + std::to_string(ret));
  228. } else {
  229. syntaxError("unknown character: " + _Str(1, c));
  230. }
  231. return;
  232. }
  233. }
  234. }
  235. parser->token_start = parser->current_char;
  236. parser->setNextToken(TK("@eof"));
  237. }
  238. _TokenType peek() {
  239. return parser->current.type;
  240. }
  241. bool match(_TokenType expected) {
  242. if (peek() != expected) return false;
  243. lexToken();
  244. return true;
  245. }
  246. void consume(_TokenType expected) {
  247. if (!match(expected)){
  248. _StrStream ss;
  249. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  250. syntaxError(ss.str());
  251. }
  252. }
  253. bool matchNewLines(bool repl_throw=false) {
  254. bool consumed = false;
  255. if (peek() == TK("@eol")) {
  256. while (peek() == TK("@eol")) lexToken();
  257. consumed = true;
  258. }
  259. if (repl_throw && peek() == TK("@eof")){
  260. throw NeedMoreLines(isCompilingClass);
  261. }
  262. return consumed;
  263. }
  264. bool matchEndStatement() {
  265. if (match(TK(";"))) {
  266. matchNewLines();
  267. return true;
  268. }
  269. if (matchNewLines() || peek() == TK("@eof"))
  270. return true;
  271. if (peek() == TK("@dedent")) return true;
  272. return false;
  273. }
  274. void consumeEndStatement() {
  275. if (!matchEndStatement()) syntaxError("expected statement end");
  276. }
  277. void exprLiteral() {
  278. PyVar value = parser->previous.value;
  279. int index = getCode()->addConst(value);
  280. emitCode(OP_LOAD_CONST, index);
  281. }
  282. void exprFString() {
  283. static const std::regex pattern(R"(\{(.*?)\})");
  284. PyVar value = parser->previous.value;
  285. std::string s = vm->PyStr_AS_C(value).str();
  286. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  287. std::sregex_iterator end;
  288. int size = 0;
  289. int i = 0;
  290. for(auto it = begin; it != end; it++) {
  291. std::smatch m = *it;
  292. if (i < m.position()) {
  293. std::string literal = s.substr(i, m.position() - i);
  294. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  295. size++;
  296. }
  297. emitCode(OP_LOAD_EVAL_FN);
  298. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
  299. emitCode(OP_CALL, 1);
  300. size++;
  301. i = m.position() + m.length();
  302. }
  303. if (i < s.size()) {
  304. std::string literal = s.substr(i, s.size() - i);
  305. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  306. size++;
  307. }
  308. emitCode(OP_BUILD_STRING, size);
  309. }
  310. void exprLambda() {
  311. _Func func = std::make_shared<Function>();
  312. func->name = "<lambda>";
  313. if(!match(TK(":"))){
  314. __compileFunctionArgs(func);
  315. consume(TK(":"));
  316. }
  317. func->code = std::make_shared<CodeObject>(parser->src, func->name);
  318. this->codes.push(func->code);
  319. EXPR_TUPLE();
  320. emitCode(OP_RETURN_VALUE);
  321. this->codes.pop();
  322. emitCode(OP_LOAD_LAMBDA, getCode()->addConst(vm->PyFunction(func)));
  323. }
  324. void exprAssign() {
  325. _TokenType op = parser->previous.type;
  326. if(op == TK("=")) { // a = (expr)
  327. EXPR_TUPLE();
  328. emitCode(OP_STORE_PTR);
  329. }else{ // a += (expr) -> a = a + (expr)
  330. // TODO: optimization is needed for inplace operators
  331. emitCode(OP_DUP_TOP);
  332. EXPR();
  333. switch (op) {
  334. case TK("+="): emitCode(OP_BINARY_OP, 0); break;
  335. case TK("-="): emitCode(OP_BINARY_OP, 1); break;
  336. case TK("*="): emitCode(OP_BINARY_OP, 2); break;
  337. case TK("/="): emitCode(OP_BINARY_OP, 3); break;
  338. case TK("//="): emitCode(OP_BINARY_OP, 4); break;
  339. default: UNREACHABLE();
  340. }
  341. emitCode(OP_STORE_PTR);
  342. }
  343. }
  344. void exprComma() {
  345. int size = 1; // an expr is in the stack now
  346. do {
  347. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  348. size++;
  349. } while(match(TK(",")));
  350. emitCode(OP_BUILD_SMART_TUPLE, size);
  351. }
  352. void exprOr() {
  353. int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP);
  354. parsePrecedence(PREC_LOGICAL_OR);
  355. patchJump(patch);
  356. }
  357. void exprAnd() {
  358. int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP);
  359. parsePrecedence(PREC_LOGICAL_AND);
  360. patchJump(patch);
  361. }
  362. void exprBinaryOp() {
  363. _TokenType op = parser->previous.type;
  364. parsePrecedence((Precedence)(rules[op].precedence + 1));
  365. switch (op) {
  366. case TK("+"): emitCode(OP_BINARY_OP, 0); break;
  367. case TK("-"): emitCode(OP_BINARY_OP, 1); break;
  368. case TK("*"): emitCode(OP_BINARY_OP, 2); break;
  369. case TK("/"): emitCode(OP_BINARY_OP, 3); break;
  370. case TK("//"): emitCode(OP_BINARY_OP, 4); break;
  371. case TK("%"): emitCode(OP_BINARY_OP, 5); break;
  372. case TK("**"): emitCode(OP_BINARY_OP, 6); break;
  373. case TK("<"): emitCode(OP_COMPARE_OP, 0); break;
  374. case TK("<="): emitCode(OP_COMPARE_OP, 1); break;
  375. case TK("=="): emitCode(OP_COMPARE_OP, 2); break;
  376. case TK("!="): emitCode(OP_COMPARE_OP, 3); break;
  377. case TK(">"): emitCode(OP_COMPARE_OP, 4); break;
  378. case TK(">="): emitCode(OP_COMPARE_OP, 5); break;
  379. case TK("in"): emitCode(OP_CONTAINS_OP, 0); break;
  380. case TK("not in"): emitCode(OP_CONTAINS_OP, 1); break;
  381. case TK("is"): emitCode(OP_IS_OP, 0); break;
  382. case TK("is not"): emitCode(OP_IS_OP, 1); break;
  383. case TK("<<"): emitCode(OP_BITWISE_OP, 0); break;
  384. case TK(">>"): emitCode(OP_BITWISE_OP, 1); break;
  385. case TK("&"): emitCode(OP_BITWISE_OP, 2); break;
  386. case TK("|"): emitCode(OP_BITWISE_OP, 3); break;
  387. case TK("^"): emitCode(OP_BITWISE_OP, 4); break;
  388. default: UNREACHABLE();
  389. }
  390. }
  391. void exprUnaryOp() {
  392. _TokenType op = parser->previous.type;
  393. matchNewLines();
  394. parsePrecedence((Precedence)(PREC_UNARY + 1));
  395. switch (op) {
  396. case TK("-"): emitCode(OP_UNARY_NEGATIVE); break;
  397. case TK("not"): emitCode(OP_UNARY_NOT); break;
  398. default: UNREACHABLE();
  399. }
  400. }
  401. void exprGrouping() {
  402. matchNewLines();
  403. EXPR_TUPLE();
  404. matchNewLines();
  405. consume(TK(")"));
  406. }
  407. void exprList() {
  408. int _patch = emitCode(OP_NO_OP);
  409. int _body_start = getCode()->co_code.size();
  410. int ARGC = 0;
  411. do {
  412. matchNewLines();
  413. if (peek() == TK("]")) break;
  414. EXPR(); ARGC++;
  415. matchNewLines();
  416. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  417. } while (match(TK(",")));
  418. matchNewLines();
  419. consume(TK("]"));
  420. emitCode(OP_BUILD_LIST, ARGC);
  421. return;
  422. __LISTCOMP:
  423. int _body_end = getCode()->co_code.size();
  424. getCode()->co_code[_patch].op = OP_JUMP_ABSOLUTE;
  425. getCode()->co_code[_patch].arg = _body_end;
  426. emitCode(OP_BUILD_LIST, 0);
  427. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  428. matchNewLines();
  429. int _skipPatch = emitCode(OP_JUMP_ABSOLUTE);
  430. int _cond_start = getCode()->co_code.size();
  431. if(match(TK("if"))) EXPR_TUPLE();
  432. int _cond_end = getCode()->co_code.size();
  433. patchJump(_skipPatch);
  434. emitCode(OP_GET_ITER);
  435. Loop& loop = enterLoop();
  436. int patch = emitCode(OP_FOR_ITER);
  437. if(_cond_end != _cond_start) { // there is an if condition
  438. getCode()->__moveToEnd(_cond_start, _cond_end);
  439. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  440. getCode()->__moveToEnd(_body_start, _body_end);
  441. emitCode(OP_LIST_APPEND);
  442. patchJump(ifpatch);
  443. }else{
  444. getCode()->__moveToEnd(_body_start, _body_end);
  445. emitCode(OP_LIST_APPEND);
  446. }
  447. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  448. patchJump(patch);
  449. exitLoop();
  450. consume(TK("]"));
  451. }
  452. void exprMap() {
  453. int size = 0;
  454. do {
  455. matchNewLines();
  456. if (peek() == TK("}")) break;
  457. EXPR();consume(TK(":"));EXPR();
  458. size++;
  459. matchNewLines();
  460. } while (match(TK(",")));
  461. matchNewLines();
  462. consume(TK("}"));
  463. emitCode(OP_BUILD_MAP, size);
  464. }
  465. void exprCall() {
  466. int ARGC = 0;
  467. do {
  468. matchNewLines(mode()==SINGLE_MODE);
  469. if (peek() == TK(")")) break;
  470. EXPR();
  471. ARGC++;
  472. matchNewLines(mode()==SINGLE_MODE);
  473. } while (match(TK(",")));
  474. consume(TK(")"));
  475. emitCode(OP_CALL, ARGC);
  476. }
  477. void exprName() {
  478. Token tkname = parser->previous;
  479. int index = getCode()->addName(
  480. tkname.str(),
  481. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  482. );
  483. emitCode(OP_LOAD_NAME_PTR, index);
  484. }
  485. void exprAttrib() {
  486. consume(TK("@id"));
  487. const _Str& name = parser->previous.str();
  488. int index = getCode()->addName(name, NAME_ATTR);
  489. emitCode(OP_BUILD_ATTR_PTR, index);
  490. }
  491. // [:], [:b]
  492. // [a], [a:], [a:b]
  493. void exprSubscript() {
  494. if(match(TK(":"))){
  495. emitCode(OP_LOAD_NONE);
  496. if(match(TK("]"))){
  497. emitCode(OP_LOAD_NONE);
  498. }else{
  499. EXPR();
  500. consume(TK("]"));
  501. }
  502. emitCode(OP_BUILD_SLICE);
  503. }else{
  504. EXPR();
  505. if(match(TK(":"))){
  506. if(match(TK("]"))){
  507. emitCode(OP_LOAD_NONE);
  508. }else{
  509. EXPR();
  510. consume(TK("]"));
  511. }
  512. emitCode(OP_BUILD_SLICE);
  513. }else{
  514. consume(TK("]"));
  515. }
  516. }
  517. emitCode(OP_BUILD_INDEX_PTR);
  518. }
  519. void exprValue() {
  520. _TokenType op = parser->previous.type;
  521. switch (op) {
  522. case TK("None"): emitCode(OP_LOAD_NONE); break;
  523. case TK("True"): emitCode(OP_LOAD_TRUE); break;
  524. case TK("False"): emitCode(OP_LOAD_FALSE); break;
  525. default: UNREACHABLE();
  526. }
  527. }
  528. void keepOpcodeLine(){
  529. int i = getCode()->co_code.size() - 1;
  530. getCode()->co_code[i].line = getCode()->co_code[i-1].line;
  531. }
  532. int emitCode(Opcode opcode, int arg=-1) {
  533. int line = parser->previous.line;
  534. getCode()->co_code.push_back(
  535. ByteCode{(uint8_t)opcode, arg, (uint16_t)line}
  536. );
  537. return getCode()->co_code.size() - 1;
  538. }
  539. inline void patchJump(int addr_index) {
  540. int target = getCode()->co_code.size();
  541. getCode()->co_code[addr_index].arg = target;
  542. }
  543. void compileBlockBody(){
  544. __compileBlockBody(&Compiler::compileStatement);
  545. }
  546. void __compileBlockBody(CompilerAction action) {
  547. consume(TK(":"));
  548. if(!matchNewLines(mode()==SINGLE_MODE)){
  549. syntaxError("expected a new line after ':'");
  550. }
  551. consume(TK("@indent"));
  552. while (peek() != TK("@dedent")) {
  553. matchNewLines();
  554. (this->*action)();
  555. matchNewLines();
  556. }
  557. consume(TK("@dedent"));
  558. }
  559. Token compileImportPath() {
  560. consume(TK("@id"));
  561. Token tkmodule = parser->previous;
  562. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  563. emitCode(OP_IMPORT_NAME, index);
  564. return tkmodule;
  565. }
  566. // import module1 [as alias1 [, module2 [as alias2 ...]]
  567. void compileRegularImport() {
  568. do {
  569. Token tkmodule = compileImportPath();
  570. if (match(TK("as"))) {
  571. consume(TK("@id"));
  572. tkmodule = parser->previous;
  573. }
  574. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  575. emitCode(OP_STORE_NAME_PTR, index);
  576. } while (match(TK(",")));
  577. consumeEndStatement();
  578. }
  579. void parsePrecedence(Precedence precedence) {
  580. lexToken();
  581. GrammarFn prefix = rules[parser->previous.type].prefix;
  582. if (prefix == nullptr) syntaxError(_Str("expected an expression, but got ") + TK_STR(parser->previous.type));
  583. (this->*prefix)();
  584. while (rules[peek()].precedence >= precedence) {
  585. lexToken();
  586. _TokenType op = parser->previous.type;
  587. GrammarFn infix = rules[op].infix;
  588. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  589. (this->*infix)();
  590. }
  591. }
  592. void compileIfStatement() {
  593. matchNewLines();
  594. EXPR_TUPLE();
  595. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  596. compileBlockBody();
  597. if (match(TK("elif"))) {
  598. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  599. patchJump(ifpatch);
  600. compileIfStatement();
  601. patchJump(exit_jump);
  602. } else if (match(TK("else"))) {
  603. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  604. patchJump(ifpatch);
  605. compileBlockBody();
  606. patchJump(exit_jump);
  607. } else {
  608. patchJump(ifpatch);
  609. }
  610. }
  611. Loop& enterLoop(){
  612. Loop lp((int)getCode()->co_code.size());
  613. loops.push(lp);
  614. return loops.top();
  615. }
  616. void exitLoop(){
  617. Loop& lp = loops.top();
  618. for(int addr : lp.breaks) patchJump(addr);
  619. loops.pop();
  620. }
  621. void compileWhileLoop() {
  622. Loop& loop = enterLoop();
  623. EXPR_TUPLE();
  624. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  625. compileBlockBody();
  626. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  627. patchJump(patch);
  628. exitLoop();
  629. }
  630. void EXPR_FOR_VARS(){
  631. int size = 0;
  632. do {
  633. consume(TK("@id"));
  634. exprName(); size++;
  635. } while (match(TK(",")));
  636. if(size > 1) emitCode(OP_BUILD_SMART_TUPLE, size);
  637. }
  638. void compileForLoop() {
  639. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  640. emitCode(OP_GET_ITER);
  641. Loop& loop = enterLoop();
  642. int patch = emitCode(OP_FOR_ITER);
  643. compileBlockBody();
  644. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  645. patchJump(patch);
  646. exitLoop();
  647. }
  648. void compileStatement() {
  649. if (match(TK("break"))) {
  650. if (loops.empty()) syntaxError("'break' outside loop");
  651. consumeEndStatement();
  652. int patch = emitCode(OP_SAFE_JUMP_ABSOLUTE);
  653. getLoop().breaks.push_back(patch);
  654. } else if (match(TK("continue"))) {
  655. if (loops.empty()) syntaxError("'continue' not properly in loop");
  656. consumeEndStatement();
  657. emitCode(OP_JUMP_ABSOLUTE, getLoop().start);
  658. } else if (match(TK("return"))) {
  659. if (codes.size() == 1)
  660. syntaxError("'return' outside function");
  661. if(matchEndStatement()){
  662. emitCode(OP_LOAD_NONE);
  663. }else{
  664. EXPR_TUPLE();
  665. consumeEndStatement();
  666. }
  667. emitCode(OP_RETURN_VALUE);
  668. } else if (match(TK("if"))) {
  669. compileIfStatement();
  670. } else if (match(TK("while"))) {
  671. compileWhileLoop();
  672. } else if (match(TK("for"))) {
  673. compileForLoop();
  674. } else if(match(TK("assert"))){
  675. EXPR();
  676. emitCode(OP_ASSERT);
  677. consumeEndStatement();
  678. } else if(match(TK("label"))){
  679. if(mode() != EXEC_MODE) syntaxError("'label' is only available in EXEC_MODE");
  680. consume(TK(".")); consume(TK("@id"));
  681. getCode()->addLabel(parser->previous.str());
  682. consumeEndStatement();
  683. } else if(match(TK("goto"))){
  684. // https://entrian.com/goto/
  685. if(mode() != EXEC_MODE) syntaxError("'goto' is only available in EXEC_MODE");
  686. consume(TK(".")); consume(TK("@id"));
  687. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  688. emitCode(OP_GOTO);
  689. consumeEndStatement();
  690. } else if(match(TK("raise"))){
  691. consume(TK("@id")); // dummy exception type
  692. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  693. consume(TK("("));EXPR();consume(TK(")"));
  694. emitCode(OP_RAISE_ERROR);
  695. consumeEndStatement();
  696. } else if(match(TK("del"))){
  697. EXPR();
  698. emitCode(OP_DELETE_PTR);
  699. consumeEndStatement();
  700. } else if(match(TK("global"))){
  701. consume(TK("@id"));
  702. getCode()->co_global_names.push_back(parser->previous.str());
  703. consumeEndStatement();
  704. } else if(match(TK("pass"))){
  705. consumeEndStatement();
  706. } else {
  707. EXPR_ANY();
  708. consumeEndStatement();
  709. // If last op is not an assignment, pop the result.
  710. uint8_t lastOp = getCode()->co_code.back().op;
  711. if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
  712. if(mode()==SINGLE_MODE && parser->indents.top() == 0){
  713. emitCode(OP_PRINT_EXPR);
  714. }
  715. emitCode(OP_POP_TOP);
  716. }
  717. }
  718. }
  719. void compileClass(){
  720. consume(TK("@id"));
  721. int clsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  722. int superClsNameIdx = -1;
  723. if(match(TK("("))){
  724. consume(TK("@id"));
  725. superClsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  726. consume(TK(")"));
  727. }
  728. emitCode(OP_LOAD_NONE);
  729. isCompilingClass = true;
  730. __compileBlockBody(&Compiler::compileFunction);
  731. isCompilingClass = false;
  732. if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE);
  733. else emitCode(OP_LOAD_NAME_PTR, superClsNameIdx);
  734. emitCode(OP_BUILD_CLASS, clsNameIdx);
  735. }
  736. void __compileFunctionArgs(_Func func){
  737. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  738. do {
  739. if(state == 3) syntaxError("**kwargs should be the last argument");
  740. matchNewLines();
  741. if(match(TK("*"))){
  742. if(state < 1) state = 1;
  743. else syntaxError("*args should be placed before **kwargs");
  744. }
  745. else if(match(TK("**"))){
  746. state = 3;
  747. }
  748. consume(TK("@id"));
  749. const _Str& name = parser->previous.str();
  750. if(func->hasName(name)) syntaxError("duplicate argument name");
  751. if(state == 0 && peek() == TK("=")) state = 2;
  752. switch (state)
  753. {
  754. case 0: func->args.push_back(name); break;
  755. case 1: func->starredArg = name; state+=1; break;
  756. case 2:
  757. consume(TK("="));
  758. func->kwArgs[name] = consumeLiteral();
  759. func->kwArgsOrder.push_back(name);
  760. break;
  761. case 3: syntaxError("**kwargs is not supported yet"); break;
  762. }
  763. } while (match(TK(",")));
  764. }
  765. void compileFunction(){
  766. if(isCompilingClass){
  767. if(match(TK("pass"))) return;
  768. consume(TK("def"));
  769. }
  770. _Func func = std::make_shared<Function>();
  771. consume(TK("@id"));
  772. func->name = parser->previous.str();
  773. if (match(TK("(")) && !match(TK(")"))) {
  774. __compileFunctionArgs(func);
  775. consume(TK(")"));
  776. }
  777. func->code = std::make_shared<CodeObject>(parser->src, func->name);
  778. this->codes.push(func->code);
  779. compileBlockBody();
  780. this->codes.pop();
  781. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyFunction(func)));
  782. if(!isCompilingClass) emitCode(OP_STORE_FUNCTION);
  783. }
  784. PyVar consumeLiteral(){
  785. if(match(TK("-"))){
  786. consume(TK("@num"));
  787. PyVar val = parser->previous.value;
  788. return vm->numNegated(val);
  789. }
  790. if(match(TK("@num"))) return parser->previous.value;
  791. if(match(TK("@str"))) return parser->previous.value;
  792. if(match(TK("True"))) return vm->PyBool(true);
  793. if(match(TK("False"))) return vm->PyBool(false);
  794. if(match(TK("None"))) return vm->None;
  795. syntaxError(_Str("expect a literal, not ") + TK_STR(parser->current.type));
  796. return nullptr;
  797. }
  798. void compileTopLevelStatement() {
  799. if (match(TK("class"))) {
  800. compileClass();
  801. } else if (match(TK("def"))) {
  802. compileFunction();
  803. } else if (match(TK("import"))) {
  804. compileRegularImport();
  805. } else {
  806. compileStatement();
  807. }
  808. }
  809. _Code __fillCode(){
  810. _Code code = std::make_shared<CodeObject>(parser->src, _Str("<module>"), mode());
  811. codes.push(code);
  812. // Lex initial tokens. current <-- next.
  813. lexToken();
  814. lexToken();
  815. matchNewLines();
  816. if(mode()==EVAL_MODE) {
  817. EXPR_TUPLE();
  818. consume(TK("@eof"));
  819. return code;
  820. }
  821. while (!match(TK("@eof"))) {
  822. compileTopLevelStatement();
  823. matchNewLines();
  824. }
  825. return code;
  826. }
  827. /***** Error Reporter *****/
  828. _Str getLineSnapshot(){
  829. int lineno = parser->current_line;
  830. if(parser->peekChar() == '\n') lineno--;
  831. return parser->src->snapshot(lineno);
  832. }
  833. void syntaxError(_Str msg){
  834. throw CompileError("SyntaxError", msg, getLineSnapshot());
  835. }
  836. void indentationError(_Str msg){
  837. throw CompileError("IndentationError", msg, getLineSnapshot());
  838. }
  839. void unexpectedError(_Str msg){
  840. throw CompileError("UnexpectedError", msg, getLineSnapshot());
  841. }
  842. };
  843. _Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE) {
  844. Compiler compiler(vm, source, filename, mode);
  845. try{
  846. return compiler.__fillCode();
  847. }catch(std::exception& e){
  848. if(const _Error* _ = dynamic_cast<const _Error*>(&e)){
  849. (*vm->_stderr) << e.what() << '\n';
  850. }else{
  851. auto ce = CompileError("UnexpectedError", e.what(), compiler.getLineSnapshot());
  852. (*vm->_stderr) << ce.what() << '\n';
  853. }
  854. return nullptr;
  855. }
  856. }