compiler.h 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. struct Loop {
  14. int start;
  15. std::vector<int> breaks;
  16. Loop(int start) : start(start) {}
  17. };
  18. class Compiler {
  19. public:
  20. std::unique_ptr<Parser> parser;
  21. std::stack<_Code> codes;
  22. std::stack<Loop> loops;
  23. bool isCompilingClass = false;
  24. VM* vm;
  25. std::unordered_map<_TokenType, GrammarRule> rules;
  26. _Code getCode() {
  27. return codes.top();
  28. }
  29. CompileMode mode() {
  30. return parser->src->mode;
  31. }
  32. Loop& getLoop() {
  33. return loops.top();
  34. }
  35. Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
  36. this->vm = vm;
  37. this->parser = std::make_unique<Parser>(
  38. std::make_shared<SourceMetadata>(source, filename, mode)
  39. );
  40. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  41. #define METHOD(name) &Compiler::name
  42. #define NO_INFIX nullptr, PREC_NONE
  43. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  44. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  45. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  46. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  47. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  48. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  49. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  50. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  51. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  52. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  53. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  54. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  55. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  56. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  57. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  58. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  59. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  60. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  61. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  62. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  63. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  64. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  65. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  66. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  67. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  68. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  69. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  70. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  71. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  72. rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
  73. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  74. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  75. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  76. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  77. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  78. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  79. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  80. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  81. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  82. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  83. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  84. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  85. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  86. rules[TK("&")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_BITWISE_AND };
  87. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  88. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  89. #undef METHOD
  90. #undef NO_INFIX
  91. #define EXPR() parsePrecedence(PREC_LOGICAL_OR) // no '=' and ',' just a simple expression
  92. #define EXPR_TUPLE() parsePrecedence(PREC_COMMA) // no '=', but ',' is allowed
  93. #define EXPR_ANY() parsePrecedence(PREC_ASSIGNMENT)
  94. }
  95. _Str eatStringUntil(char quote) {
  96. std::vector<char> buff;
  97. while (true) {
  98. char c = parser->eatCharIncludeNewLine();
  99. if (c == quote) break;
  100. if (c == '\0' || c == '\n') syntaxError("EOL while scanning string literal");
  101. if (c == '\\') {
  102. switch (parser->eatCharIncludeNewLine()) {
  103. case '"': buff.push_back('"'); break;
  104. case '\'': buff.push_back('\''); break;
  105. case '\\': buff.push_back('\\'); break;
  106. case 'n': buff.push_back('\n'); break;
  107. case 'r': buff.push_back('\r'); break;
  108. case 't': buff.push_back('\t'); break;
  109. case '\n': case '\r': break;
  110. default: syntaxError("invalid escape character");
  111. }
  112. } else {
  113. buff.push_back(c);
  114. }
  115. }
  116. return _Str(buff.data(), buff.size());
  117. }
  118. void eatString(char quote, bool fstr) {
  119. _Str s = eatStringUntil(quote);
  120. if(fstr){
  121. parser->setNextToken(TK("@fstr"), vm->PyStr(s));
  122. }else{
  123. parser->setNextToken(TK("@str"), vm->PyStr(s));
  124. }
  125. }
  126. void eatNumber() {
  127. static const std::regex pattern("^(0x)?[0-9a-f]+(\\.[0-9]+)?");
  128. std::smatch m;
  129. const char* i = parser->token_start;
  130. while(*i != '\n' && *i != '\0') i++;
  131. std::string s = std::string(parser->token_start, i);
  132. try{
  133. if (std::regex_search(s, m, pattern)) {
  134. // here is m.length()-1, since the first char is eaten by lexToken()
  135. for(int j=0; j<m.length()-1; j++) parser->eatChar();
  136. int base = 10;
  137. size_t size;
  138. if (m[1].matched) base = 16;
  139. if (m[2].matched) {
  140. if(base == 16) syntaxError("hex literal should not contain a dot");
  141. parser->setNextToken(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  142. } else {
  143. parser->setNextToken(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  144. }
  145. if (size != m.length()) throw std::runtime_error("length mismatch");
  146. }
  147. }catch(std::exception& e){
  148. syntaxError("invalid number literal");
  149. }
  150. }
  151. // Lex the next token and set it as the next token.
  152. void lexToken() {
  153. parser->previous = parser->current;
  154. parser->current = parser->nextToken();
  155. //_Str _info = parser->current.info(); printf("%s\n", (const char*)_info);
  156. while (parser->peekChar() != '\0') {
  157. parser->token_start = parser->current_char;
  158. char c = parser->eatCharIncludeNewLine();
  159. switch (c) {
  160. case '\'': case '"': eatString(c, false); return;
  161. case '#': parser->skipLineComment(); break;
  162. case '{': parser->setNextToken(TK("{")); return;
  163. case '}': parser->setNextToken(TK("}")); return;
  164. case ',': parser->setNextToken(TK(",")); return;
  165. case ':': parser->setNextToken(TK(":")); return;
  166. case ';': parser->setNextToken(TK(";")); return;
  167. case '(': parser->setNextToken(TK("(")); return;
  168. case ')': parser->setNextToken(TK(")")); return;
  169. case '[': parser->setNextToken(TK("[")); return;
  170. case ']': parser->setNextToken(TK("]")); return;
  171. case '%': parser->setNextToken(TK("%")); return;
  172. case '&': parser->setNextToken(TK("&")); return;
  173. case '|': parser->setNextToken(TK("|")); return;
  174. case '^': parser->setNextToken(TK("^")); return;
  175. case '.': {
  176. if(parser->matchChar('.')) {
  177. if(parser->matchChar('.')) {
  178. parser->setNextToken(TK("..."));
  179. } else {
  180. syntaxError("invalid token '..'");
  181. }
  182. } else {
  183. parser->setNextToken(TK("."));
  184. }
  185. return;
  186. }
  187. case '=': parser->setNextTwoCharToken('=', TK("="), TK("==")); return;
  188. case '+': parser->setNextTwoCharToken('=', TK("+"), TK("+=")); return;
  189. case '>': {
  190. if(parser->matchChar('=')) parser->setNextToken(TK(">="));
  191. else if(parser->matchChar('>')) parser->setNextToken(TK(">>"));
  192. else parser->setNextToken(TK(">"));
  193. return;
  194. }
  195. case '<': {
  196. if(parser->matchChar('=')) parser->setNextToken(TK("<="));
  197. else if(parser->matchChar('<')) parser->setNextToken(TK("<<"));
  198. else parser->setNextToken(TK("<"));
  199. return;
  200. }
  201. case '-': {
  202. parser->setNextTwoCharToken('=', TK("-"), TK("-="));
  203. return;
  204. }
  205. case '!':
  206. if(parser->matchChar('=')) parser->setNextToken(TK("!="));
  207. else syntaxError("expected '=' after '!'");
  208. break;
  209. case '*':
  210. if (parser->matchChar('*')) {
  211. parser->setNextToken(TK("**")); // '**'
  212. } else {
  213. parser->setNextTwoCharToken('=', TK("*"), TK("*="));
  214. }
  215. return;
  216. case '/':
  217. if(parser->matchChar('/')) {
  218. parser->setNextTwoCharToken('=', TK("//"), TK("//="));
  219. } else {
  220. parser->setNextTwoCharToken('=', TK("/"), TK("/="));
  221. }
  222. return;
  223. case '\r': break; // just ignore '\r'
  224. case ' ': case '\t': parser->eatSpaces(); break;
  225. case '\n': {
  226. parser->setNextToken(TK("@eol")); while(parser->matchChar('\n'));
  227. if(!parser->eatIndentation()) indentationError("unindent does not match any outer indentation level");
  228. return;
  229. }
  230. default: {
  231. if(c == 'f'){
  232. if(parser->matchChar('\'')) {eatString('\'', true); return;}
  233. if(parser->matchChar('"')) {eatString('"', true); return;}
  234. }
  235. if (isdigit(c)) {
  236. eatNumber();
  237. } else if (parser->isNameStart(c)) {
  238. int ret = parser->eatName();
  239. if(ret!=0) syntaxError("identifier is illegal, err " + std::to_string(ret));
  240. } else {
  241. syntaxError("unknown character: " + std::string(1, c));
  242. }
  243. return;
  244. }
  245. }
  246. }
  247. parser->token_start = parser->current_char;
  248. parser->setNextToken(TK("@eof"));
  249. }
  250. _TokenType peek() {
  251. return parser->current.type;
  252. }
  253. bool match(_TokenType expected) {
  254. if (peek() != expected) return false;
  255. lexToken();
  256. return true;
  257. }
  258. void consume(_TokenType expected) {
  259. if (!match(expected)){
  260. _StrStream ss;
  261. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  262. syntaxError(ss.str());
  263. }
  264. }
  265. bool matchNewLines(bool repl_throw=false) {
  266. bool consumed = false;
  267. if (peek() == TK("@eol")) {
  268. while (peek() == TK("@eol")) lexToken();
  269. consumed = true;
  270. }
  271. if (repl_throw && peek() == TK("@eof")){
  272. throw NeedMoreLines(isCompilingClass);
  273. }
  274. return consumed;
  275. }
  276. bool matchEndStatement() {
  277. if (match(TK(";"))) {
  278. matchNewLines();
  279. return true;
  280. }
  281. if (matchNewLines() || peek() == TK("@eof"))
  282. return true;
  283. if (peek() == TK("@dedent")) return true;
  284. return false;
  285. }
  286. void consumeEndStatement() {
  287. if (!matchEndStatement()) syntaxError("expected statement end");
  288. }
  289. void exprLiteral() {
  290. PyVar value = parser->previous.value;
  291. int index = getCode()->addConst(value);
  292. emitCode(OP_LOAD_CONST, index);
  293. }
  294. void exprFString() {
  295. static const std::regex pattern(R"(\{(.*?)\})");
  296. PyVar value = parser->previous.value;
  297. std::string s = vm->PyStr_AS_C(value).str();
  298. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  299. std::sregex_iterator end;
  300. int size = 0;
  301. int i = 0;
  302. for(auto it = begin; it != end; it++) {
  303. std::smatch m = *it;
  304. if (i < m.position()) {
  305. std::string literal = s.substr(i, m.position() - i);
  306. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  307. size++;
  308. }
  309. emitCode(OP_LOAD_EVAL_FN);
  310. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
  311. emitCode(OP_CALL, 1);
  312. size++;
  313. i = m.position() + m.length();
  314. }
  315. if (i < s.size()) {
  316. std::string literal = s.substr(i, s.size() - i);
  317. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  318. size++;
  319. }
  320. emitCode(OP_BUILD_STRING, size);
  321. }
  322. void exprLambda() {
  323. _Func func = std::make_shared<Function>();
  324. func->name = "<lambda>";
  325. if(!match(TK(":"))){
  326. __compileFunctionArgs(func);
  327. consume(TK(":"));
  328. }
  329. func->code = std::make_shared<CodeObject>(parser->src, func->name);
  330. this->codes.push(func->code);
  331. EXPR_TUPLE();
  332. emitCode(OP_RETURN_VALUE);
  333. this->codes.pop();
  334. emitCode(OP_LOAD_LAMBDA, getCode()->addConst(vm->PyFunction(func)));
  335. }
  336. void exprAssign() {
  337. _TokenType op = parser->previous.type;
  338. if(op == TK("=")) { // a = (expr)
  339. EXPR_TUPLE();
  340. emitCode(OP_STORE_PTR);
  341. }else{ // a += (expr) -> a = a + (expr)
  342. // TODO: optimization is needed for inplace operators
  343. emitCode(OP_DUP_TOP);
  344. EXPR();
  345. switch (op) {
  346. case TK("+="): emitCode(OP_BINARY_OP, 0); break;
  347. case TK("-="): emitCode(OP_BINARY_OP, 1); break;
  348. case TK("*="): emitCode(OP_BINARY_OP, 2); break;
  349. case TK("/="): emitCode(OP_BINARY_OP, 3); break;
  350. case TK("//="): emitCode(OP_BINARY_OP, 4); break;
  351. default: UNREACHABLE();
  352. }
  353. emitCode(OP_STORE_PTR);
  354. }
  355. }
  356. void exprComma() {
  357. int size = 1; // an expr is in the stack now
  358. do {
  359. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  360. size++;
  361. } while(match(TK(",")));
  362. emitCode(OP_BUILD_SMART_TUPLE, size);
  363. }
  364. void exprOr() {
  365. int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP);
  366. parsePrecedence(PREC_LOGICAL_OR);
  367. patchJump(patch);
  368. }
  369. void exprAnd() {
  370. int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP);
  371. parsePrecedence(PREC_LOGICAL_AND);
  372. patchJump(patch);
  373. }
  374. void exprBinaryOp() {
  375. _TokenType op = parser->previous.type;
  376. parsePrecedence((Precedence)(rules[op].precedence + 1));
  377. switch (op) {
  378. case TK("+"): emitCode(OP_BINARY_OP, 0); break;
  379. case TK("-"): emitCode(OP_BINARY_OP, 1); break;
  380. case TK("*"): emitCode(OP_BINARY_OP, 2); break;
  381. case TK("/"): emitCode(OP_BINARY_OP, 3); break;
  382. case TK("//"): emitCode(OP_BINARY_OP, 4); break;
  383. case TK("%"): emitCode(OP_BINARY_OP, 5); break;
  384. case TK("**"): emitCode(OP_BINARY_OP, 6); break;
  385. case TK("<"): emitCode(OP_COMPARE_OP, 0); break;
  386. case TK("<="): emitCode(OP_COMPARE_OP, 1); break;
  387. case TK("=="): emitCode(OP_COMPARE_OP, 2); break;
  388. case TK("!="): emitCode(OP_COMPARE_OP, 3); break;
  389. case TK(">"): emitCode(OP_COMPARE_OP, 4); break;
  390. case TK(">="): emitCode(OP_COMPARE_OP, 5); break;
  391. case TK("in"): emitCode(OP_CONTAINS_OP, 0); break;
  392. case TK("not in"): emitCode(OP_CONTAINS_OP, 1); break;
  393. case TK("is"): emitCode(OP_IS_OP, 0); break;
  394. case TK("is not"): emitCode(OP_IS_OP, 1); break;
  395. case TK("<<"): emitCode(OP_BITWISE_OP, 0); break;
  396. case TK(">>"): emitCode(OP_BITWISE_OP, 1); break;
  397. case TK("&"): emitCode(OP_BITWISE_OP, 2); break;
  398. case TK("|"): emitCode(OP_BITWISE_OP, 3); break;
  399. case TK("^"): emitCode(OP_BITWISE_OP, 4); break;
  400. default: UNREACHABLE();
  401. }
  402. }
  403. void exprUnaryOp() {
  404. _TokenType op = parser->previous.type;
  405. matchNewLines();
  406. parsePrecedence((Precedence)(PREC_UNARY + 1));
  407. switch (op) {
  408. case TK("-"): emitCode(OP_UNARY_NEGATIVE); break;
  409. case TK("not"): emitCode(OP_UNARY_NOT); break;
  410. case TK("&"): emitCode(OP_UNARY_REF); break;
  411. case TK("*"): emitCode(OP_UNARY_DEREF); break;
  412. default: UNREACHABLE();
  413. }
  414. }
  415. void exprGrouping() {
  416. matchNewLines();
  417. EXPR_TUPLE();
  418. matchNewLines();
  419. consume(TK(")"));
  420. }
  421. void exprList() {
  422. int _patch = emitCode(OP_NO_OP);
  423. int _body_start = getCode()->co_code.size();
  424. int ARGC = 0;
  425. do {
  426. matchNewLines();
  427. if (peek() == TK("]")) break;
  428. EXPR(); ARGC++;
  429. matchNewLines();
  430. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  431. } while (match(TK(",")));
  432. matchNewLines();
  433. consume(TK("]"));
  434. emitCode(OP_BUILD_LIST, ARGC);
  435. return;
  436. __LISTCOMP:
  437. int _body_end = getCode()->co_code.size();
  438. getCode()->co_code[_patch].op = OP_JUMP_ABSOLUTE;
  439. getCode()->co_code[_patch].arg = _body_end;
  440. emitCode(OP_BUILD_LIST, 0);
  441. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  442. matchNewLines();
  443. int _skipPatch = emitCode(OP_JUMP_ABSOLUTE);
  444. int _cond_start = getCode()->co_code.size();
  445. if(match(TK("if"))) EXPR_TUPLE();
  446. int _cond_end = getCode()->co_code.size();
  447. patchJump(_skipPatch);
  448. emitCode(OP_GET_ITER);
  449. Loop& loop = enterLoop();
  450. int patch = emitCode(OP_FOR_ITER);
  451. if(_cond_end != _cond_start) { // there is an if condition
  452. getCode()->__moveToEnd(_cond_start, _cond_end);
  453. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  454. getCode()->__moveToEnd(_body_start, _body_end);
  455. emitCode(OP_LIST_APPEND);
  456. patchJump(ifpatch);
  457. }else{
  458. getCode()->__moveToEnd(_body_start, _body_end);
  459. emitCode(OP_LIST_APPEND);
  460. }
  461. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  462. patchJump(patch);
  463. exitLoop();
  464. consume(TK("]"));
  465. }
  466. void exprMap() {
  467. int size = 0;
  468. do {
  469. matchNewLines();
  470. if (peek() == TK("}")) break;
  471. EXPR();consume(TK(":"));EXPR();
  472. size++;
  473. matchNewLines();
  474. } while (match(TK(",")));
  475. matchNewLines();
  476. consume(TK("}"));
  477. emitCode(OP_BUILD_MAP, size);
  478. }
  479. void exprCall() {
  480. int ARGC = 0;
  481. do {
  482. matchNewLines(mode()==SINGLE_MODE);
  483. if (peek() == TK(")")) break;
  484. EXPR();
  485. ARGC++;
  486. matchNewLines(mode()==SINGLE_MODE);
  487. } while (match(TK(",")));
  488. consume(TK(")"));
  489. emitCode(OP_CALL, ARGC);
  490. }
  491. void exprName() {
  492. Token tkname = parser->previous;
  493. int index = getCode()->addName(
  494. tkname.str(),
  495. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  496. );
  497. emitCode(OP_LOAD_NAME_PTR, index);
  498. }
  499. void exprAttrib() {
  500. consume(TK("@id"));
  501. const _Str& name = parser->previous.str();
  502. int index = getCode()->addName(name, NAME_ATTR);
  503. emitCode(OP_BUILD_ATTR_PTR, index);
  504. }
  505. // [:], [:b]
  506. // [a], [a:], [a:b]
  507. void exprSubscript() {
  508. if(match(TK(":"))){
  509. emitCode(OP_LOAD_NONE);
  510. if(match(TK("]"))){
  511. emitCode(OP_LOAD_NONE);
  512. }else{
  513. EXPR();
  514. consume(TK("]"));
  515. }
  516. emitCode(OP_BUILD_SLICE);
  517. }else{
  518. EXPR();
  519. if(match(TK(":"))){
  520. if(match(TK("]"))){
  521. emitCode(OP_LOAD_NONE);
  522. }else{
  523. EXPR();
  524. consume(TK("]"));
  525. }
  526. emitCode(OP_BUILD_SLICE);
  527. }else{
  528. consume(TK("]"));
  529. }
  530. }
  531. emitCode(OP_BUILD_INDEX_PTR);
  532. }
  533. void exprValue() {
  534. _TokenType op = parser->previous.type;
  535. switch (op) {
  536. case TK("None"): emitCode(OP_LOAD_NONE); break;
  537. case TK("True"): emitCode(OP_LOAD_TRUE); break;
  538. case TK("False"): emitCode(OP_LOAD_FALSE); break;
  539. case TK("..."): emitCode(OP_LOAD_ELLIPSIS); break;
  540. default: UNREACHABLE();
  541. }
  542. }
  543. void keepOpcodeLine(){
  544. int i = getCode()->co_code.size() - 1;
  545. getCode()->co_code[i].line = getCode()->co_code[i-1].line;
  546. }
  547. int emitCode(Opcode opcode, int arg=-1) {
  548. int line = parser->previous.line;
  549. getCode()->co_code.push_back(
  550. ByteCode{(uint8_t)opcode, arg, (uint16_t)line}
  551. );
  552. return getCode()->co_code.size() - 1;
  553. }
  554. inline void patchJump(int addr_index) {
  555. int target = getCode()->co_code.size();
  556. getCode()->co_code[addr_index].arg = target;
  557. }
  558. void compileBlockBody(){
  559. __compileBlockBody(&Compiler::compileStatement);
  560. }
  561. void __compileBlockBody(CompilerAction action) {
  562. consume(TK(":"));
  563. if(!matchNewLines(mode()==SINGLE_MODE)){
  564. syntaxError("expected a new line after ':'");
  565. }
  566. consume(TK("@indent"));
  567. while (peek() != TK("@dedent")) {
  568. matchNewLines();
  569. (this->*action)();
  570. matchNewLines();
  571. }
  572. consume(TK("@dedent"));
  573. }
  574. Token compileImportPath() {
  575. consume(TK("@id"));
  576. Token tkmodule = parser->previous;
  577. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  578. emitCode(OP_IMPORT_NAME, index);
  579. return tkmodule;
  580. }
  581. // import module1 [as alias1 [, module2 [as alias2 ...]]
  582. void compileRegularImport() {
  583. do {
  584. Token tkmodule = compileImportPath();
  585. if (match(TK("as"))) {
  586. consume(TK("@id"));
  587. tkmodule = parser->previous;
  588. }
  589. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  590. emitCode(OP_STORE_NAME_PTR, index);
  591. } while (match(TK(",")));
  592. consumeEndStatement();
  593. }
  594. void parsePrecedence(Precedence precedence) {
  595. lexToken();
  596. GrammarFn prefix = rules[parser->previous.type].prefix;
  597. if (prefix == nullptr) syntaxError(_Str("expected an expression, but got ") + TK_STR(parser->previous.type));
  598. (this->*prefix)();
  599. while (rules[peek()].precedence >= precedence) {
  600. lexToken();
  601. _TokenType op = parser->previous.type;
  602. GrammarFn infix = rules[op].infix;
  603. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  604. (this->*infix)();
  605. }
  606. }
  607. void compileIfStatement() {
  608. matchNewLines();
  609. EXPR_TUPLE();
  610. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  611. compileBlockBody();
  612. if (match(TK("elif"))) {
  613. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  614. patchJump(ifpatch);
  615. compileIfStatement();
  616. patchJump(exit_jump);
  617. } else if (match(TK("else"))) {
  618. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  619. patchJump(ifpatch);
  620. compileBlockBody();
  621. patchJump(exit_jump);
  622. } else {
  623. patchJump(ifpatch);
  624. }
  625. }
  626. Loop& enterLoop(){
  627. Loop lp((int)getCode()->co_code.size());
  628. loops.push(lp);
  629. return loops.top();
  630. }
  631. void exitLoop(){
  632. Loop& lp = loops.top();
  633. for(int addr : lp.breaks) patchJump(addr);
  634. loops.pop();
  635. }
  636. void compileWhileLoop() {
  637. Loop& loop = enterLoop();
  638. EXPR_TUPLE();
  639. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  640. compileBlockBody();
  641. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  642. patchJump(patch);
  643. exitLoop();
  644. }
  645. void EXPR_FOR_VARS(){
  646. int size = 0;
  647. do {
  648. consume(TK("@id"));
  649. exprName(); size++;
  650. } while (match(TK(",")));
  651. if(size > 1) emitCode(OP_BUILD_SMART_TUPLE, size);
  652. }
  653. void compileForLoop() {
  654. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  655. emitCode(OP_GET_ITER);
  656. Loop& loop = enterLoop();
  657. int patch = emitCode(OP_FOR_ITER);
  658. compileBlockBody();
  659. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  660. patchJump(patch);
  661. exitLoop();
  662. }
  663. void compileStatement() {
  664. if (match(TK("break"))) {
  665. if (loops.empty()) syntaxError("'break' outside loop");
  666. consumeEndStatement();
  667. int patch = emitCode(OP_SAFE_JUMP_ABSOLUTE);
  668. getLoop().breaks.push_back(patch);
  669. } else if (match(TK("continue"))) {
  670. if (loops.empty()) syntaxError("'continue' not properly in loop");
  671. consumeEndStatement();
  672. emitCode(OP_JUMP_ABSOLUTE, getLoop().start);
  673. } else if (match(TK("return"))) {
  674. if (codes.size() == 1)
  675. syntaxError("'return' outside function");
  676. if(matchEndStatement()){
  677. emitCode(OP_LOAD_NONE);
  678. }else{
  679. EXPR_TUPLE();
  680. consumeEndStatement();
  681. }
  682. emitCode(OP_RETURN_VALUE);
  683. } else if (match(TK("if"))) {
  684. compileIfStatement();
  685. } else if (match(TK("while"))) {
  686. compileWhileLoop();
  687. } else if (match(TK("for"))) {
  688. compileForLoop();
  689. } else if(match(TK("assert"))){
  690. EXPR();
  691. emitCode(OP_ASSERT);
  692. consumeEndStatement();
  693. } else if(match(TK("label"))){
  694. if(mode() != EXEC_MODE) syntaxError("'label' is only available in EXEC_MODE");
  695. consume(TK(".")); consume(TK("@id"));
  696. getCode()->addLabel(parser->previous.str());
  697. consumeEndStatement();
  698. } else if(match(TK("goto"))){
  699. // https://entrian.com/goto/
  700. if(mode() != EXEC_MODE) syntaxError("'goto' is only available in EXEC_MODE");
  701. consume(TK(".")); consume(TK("@id"));
  702. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  703. emitCode(OP_GOTO);
  704. consumeEndStatement();
  705. } else if(match(TK("raise"))){
  706. consume(TK("@id")); // dummy exception type
  707. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  708. consume(TK("("));EXPR();consume(TK(")"));
  709. emitCode(OP_RAISE_ERROR);
  710. consumeEndStatement();
  711. } else if(match(TK("del"))){
  712. EXPR();
  713. emitCode(OP_DELETE_PTR);
  714. consumeEndStatement();
  715. } else if(match(TK("global"))){
  716. consume(TK("@id"));
  717. getCode()->co_global_names.push_back(parser->previous.str());
  718. consumeEndStatement();
  719. } else if(match(TK("pass"))){
  720. consumeEndStatement();
  721. } else {
  722. EXPR_ANY();
  723. consumeEndStatement();
  724. // If last op is not an assignment, pop the result.
  725. uint8_t lastOp = getCode()->co_code.back().op;
  726. if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
  727. if(mode()==SINGLE_MODE && parser->indents.top() == 0){
  728. emitCode(OP_PRINT_EXPR);
  729. }
  730. emitCode(OP_POP_TOP);
  731. }
  732. }
  733. }
  734. void compileClass(){
  735. consume(TK("@id"));
  736. int clsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  737. int superClsNameIdx = -1;
  738. if(match(TK("("))){
  739. consume(TK("@id"));
  740. superClsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  741. consume(TK(")"));
  742. }
  743. emitCode(OP_LOAD_NONE);
  744. isCompilingClass = true;
  745. __compileBlockBody(&Compiler::compileFunction);
  746. isCompilingClass = false;
  747. if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE);
  748. else emitCode(OP_LOAD_NAME_PTR, superClsNameIdx);
  749. emitCode(OP_BUILD_CLASS, clsNameIdx);
  750. }
  751. void __compileFunctionArgs(_Func func){
  752. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  753. do {
  754. if(state == 3) syntaxError("**kwargs should be the last argument");
  755. matchNewLines();
  756. if(match(TK("*"))){
  757. if(state < 1) state = 1;
  758. else syntaxError("*args should be placed before **kwargs");
  759. }
  760. else if(match(TK("**"))){
  761. state = 3;
  762. }
  763. consume(TK("@id"));
  764. const _Str& name = parser->previous.str();
  765. if(func->hasName(name)) syntaxError("duplicate argument name");
  766. if(state == 0 && peek() == TK("=")) state = 2;
  767. switch (state)
  768. {
  769. case 0: func->args.push_back(name); break;
  770. case 1: func->starredArg = name; state+=1; break;
  771. case 2:
  772. consume(TK("="));
  773. func->kwArgs[name] = consumeLiteral();
  774. func->kwArgsOrder.push_back(name);
  775. break;
  776. case 3: syntaxError("**kwargs is not supported yet"); break;
  777. }
  778. } while (match(TK(",")));
  779. }
  780. void compileFunction(){
  781. if(isCompilingClass){
  782. if(match(TK("pass"))) return;
  783. consume(TK("def"));
  784. }
  785. _Func func = std::make_shared<Function>();
  786. consume(TK("@id"));
  787. func->name = parser->previous.str();
  788. if (match(TK("(")) && !match(TK(")"))) {
  789. __compileFunctionArgs(func);
  790. consume(TK(")"));
  791. }
  792. func->code = std::make_shared<CodeObject>(parser->src, func->name);
  793. this->codes.push(func->code);
  794. compileBlockBody();
  795. this->codes.pop();
  796. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyFunction(func)));
  797. if(!isCompilingClass) emitCode(OP_STORE_FUNCTION);
  798. }
  799. PyVar consumeLiteral(){
  800. if(match(TK("-"))){
  801. consume(TK("@num"));
  802. PyVar val = parser->previous.value;
  803. return vm->numNegated(val);
  804. }
  805. if(match(TK("@num"))) return parser->previous.value;
  806. if(match(TK("@str"))) return parser->previous.value;
  807. if(match(TK("True"))) return vm->PyBool(true);
  808. if(match(TK("False"))) return vm->PyBool(false);
  809. if(match(TK("None"))) return vm->None;
  810. if(match(TK("..."))) return vm->Ellipsis;
  811. syntaxError(_Str("expect a literal, not ") + TK_STR(parser->current.type));
  812. return nullptr;
  813. }
  814. void compileTopLevelStatement() {
  815. if (match(TK("class"))) {
  816. compileClass();
  817. } else if (match(TK("def"))) {
  818. compileFunction();
  819. } else if (match(TK("import"))) {
  820. compileRegularImport();
  821. } else {
  822. compileStatement();
  823. }
  824. }
  825. _Code __fillCode(){
  826. _Code code = std::make_shared<CodeObject>(parser->src, _Str("<module>"), mode());
  827. codes.push(code);
  828. // Lex initial tokens. current <-- next.
  829. lexToken();
  830. lexToken();
  831. matchNewLines();
  832. if(mode()==EVAL_MODE) {
  833. EXPR_TUPLE();
  834. consume(TK("@eof"));
  835. return code;
  836. }
  837. while (!match(TK("@eof"))) {
  838. compileTopLevelStatement();
  839. matchNewLines();
  840. }
  841. return code;
  842. }
  843. /***** Error Reporter *****/
  844. _Str getLineSnapshot(){
  845. int lineno = parser->current_line;
  846. if(parser->peekChar() == '\n') lineno--;
  847. return parser->src->snapshot(lineno);
  848. }
  849. void syntaxError(_Str msg){
  850. throw CompileError("SyntaxError", msg, getLineSnapshot());
  851. }
  852. void indentationError(_Str msg){
  853. throw CompileError("IndentationError", msg, getLineSnapshot());
  854. }
  855. void unexpectedError(_Str msg){
  856. throw CompileError("UnexpectedError", msg, getLineSnapshot());
  857. }
  858. };
  859. _Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE) {
  860. Compiler compiler(vm, source, filename, mode);
  861. try{
  862. return compiler.__fillCode();
  863. }catch(std::exception& e){
  864. if(const _Error* _ = dynamic_cast<const _Error*>(&e)){
  865. (*vm->_stderr) << e.what() << '\n';
  866. }else{
  867. auto ce = CompileError("UnexpectedError", e.what(), compiler.getLineSnapshot());
  868. (*vm->_stderr) << ce.what() << '\n';
  869. }
  870. return nullptr;
  871. }
  872. }