compiler.h 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. struct Loop {
  14. int start;
  15. std::vector<int> breaks;
  16. Loop(int start) : start(start) {}
  17. };
  18. class Compiler {
  19. public:
  20. pkpy::unique_ptr<Parser> parser;
  21. std::stack<_Code> codes;
  22. std::stack<Loop> loops;
  23. bool isCompilingClass = false;
  24. int lexingCnt = 0;
  25. VM* vm;
  26. emhash8::HashMap<_TokenType, GrammarRule> rules;
  27. _Code getCode() {
  28. return codes.top();
  29. }
  30. CompileMode mode() {
  31. return parser->src->mode;
  32. }
  33. Loop& getLoop() {
  34. return loops.top();
  35. }
  36. Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
  37. this->vm = vm;
  38. this->parser = pkpy::make_unique<Parser>(
  39. pkpy::make_shared<SourceMetadata>(source, filename, mode)
  40. );
  41. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  42. #define METHOD(name) &Compiler::name
  43. #define NO_INFIX nullptr, PREC_NONE
  44. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  45. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  46. rules[TK("->")] = { nullptr, METHOD(exprAttribPtr), PREC_ATTRIB };
  47. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  48. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  49. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  50. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  51. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  52. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  53. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  54. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  55. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  56. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  57. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  58. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  59. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  60. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  61. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  62. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  63. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  64. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  65. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  66. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  67. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  68. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  69. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  70. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  71. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  72. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  73. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  74. rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
  75. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  76. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  77. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  78. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  79. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  80. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  81. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  82. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  83. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  84. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  85. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  86. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  87. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  88. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  89. rules[TK("&")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_BITWISE_AND };
  90. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  91. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  92. #undef METHOD
  93. #undef NO_INFIX
  94. #define EXPR() parsePrecedence(PREC_TERNARY) // no '=' and ',' just a simple expression
  95. #define EXPR_TUPLE() parsePrecedence(PREC_COMMA) // no '=', but ',' is allowed
  96. #define EXPR_ANY() parsePrecedence(PREC_ASSIGNMENT)
  97. }
  98. _Str eatStringUntil(char quote) {
  99. std::vector<char> buff;
  100. while (true) {
  101. char c = parser->eatCharIncludeNewLine();
  102. if (c == quote) break;
  103. if (c == '\0' || c == '\n') syntaxError("EOL while scanning string literal");
  104. if (c == '\\') {
  105. switch (parser->eatCharIncludeNewLine()) {
  106. case '"': buff.push_back('"'); break;
  107. case '\'': buff.push_back('\''); break;
  108. case '\\': buff.push_back('\\'); break;
  109. case 'n': buff.push_back('\n'); break;
  110. case 'r': buff.push_back('\r'); break;
  111. case 't': buff.push_back('\t'); break;
  112. case '\n': case '\r': break;
  113. default: syntaxError("invalid escape character");
  114. }
  115. } else {
  116. buff.push_back(c);
  117. }
  118. }
  119. return _Str(buff.data(), buff.size());
  120. }
  121. void eatString(char quote, bool fstr) {
  122. _Str s = eatStringUntil(quote);
  123. if(fstr){
  124. parser->setNextToken(TK("@fstr"), vm->PyStr(s));
  125. }else{
  126. parser->setNextToken(TK("@str"), vm->PyStr(s));
  127. }
  128. }
  129. void eatNumber() {
  130. static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
  131. std::smatch m;
  132. const char* i = parser->token_start;
  133. while(*i != '\n' && *i != '\0') i++;
  134. std::string s = std::string(parser->token_start, i);
  135. try{
  136. if (std::regex_search(s, m, pattern)) {
  137. // here is m.length()-1, since the first char is eaten by lexToken()
  138. for(int j=0; j<m.length()-1; j++) parser->eatChar();
  139. int base = 10;
  140. size_t size;
  141. if (m[1].matched) base = 16;
  142. if (m[2].matched) {
  143. if(base == 16) syntaxError("hex literal should not contain a dot");
  144. parser->setNextToken(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  145. } else {
  146. parser->setNextToken(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  147. }
  148. if (size != m.length()) throw std::runtime_error("length mismatch");
  149. }
  150. }catch(std::exception& _){
  151. syntaxError("invalid number literal");
  152. }
  153. }
  154. void lexToken(){
  155. lexingCnt++;
  156. _lexToken();
  157. lexingCnt--;
  158. }
  159. // Lex the next token and set it as the next token.
  160. void _lexToken() {
  161. parser->previous = parser->current;
  162. parser->current = parser->nextToken();
  163. //_Str _info = parser->current.info(); std::cout << _info << '[' << parser->current_line << ']' << std::endl;
  164. while (parser->peekChar() != '\0') {
  165. parser->token_start = parser->current_char;
  166. char c = parser->eatCharIncludeNewLine();
  167. switch (c) {
  168. case '\'': case '"': eatString(c, false); return;
  169. case '#': parser->skipLineComment(); break;
  170. case '{': parser->setNextToken(TK("{")); return;
  171. case '}': parser->setNextToken(TK("}")); return;
  172. case ',': parser->setNextToken(TK(",")); return;
  173. case ':': parser->setNextToken(TK(":")); return;
  174. case ';': parser->setNextToken(TK(";")); return;
  175. case '(': parser->setNextToken(TK("(")); return;
  176. case ')': parser->setNextToken(TK(")")); return;
  177. case '[': parser->setNextToken(TK("[")); return;
  178. case ']': parser->setNextToken(TK("]")); return;
  179. case '%': parser->setNextToken(TK("%")); return;
  180. case '&': parser->setNextToken(TK("&")); return;
  181. case '|': parser->setNextToken(TK("|")); return;
  182. case '^': parser->setNextToken(TK("^")); return;
  183. case '?': parser->setNextToken(TK("?")); return;
  184. case '.': {
  185. if(parser->matchChar('.')) {
  186. if(parser->matchChar('.')) {
  187. parser->setNextToken(TK("..."));
  188. } else {
  189. syntaxError("invalid token '..'");
  190. }
  191. } else {
  192. parser->setNextToken(TK("."));
  193. }
  194. return;
  195. }
  196. case '=': parser->setNextTwoCharToken('=', TK("="), TK("==")); return;
  197. case '+': parser->setNextTwoCharToken('=', TK("+"), TK("+=")); return;
  198. case '>': {
  199. if(parser->matchChar('=')) parser->setNextToken(TK(">="));
  200. else if(parser->matchChar('>')) parser->setNextToken(TK(">>"));
  201. else parser->setNextToken(TK(">"));
  202. return;
  203. }
  204. case '<': {
  205. if(parser->matchChar('=')) parser->setNextToken(TK("<="));
  206. else if(parser->matchChar('<')) parser->setNextToken(TK("<<"));
  207. else parser->setNextToken(TK("<"));
  208. return;
  209. }
  210. case '-': {
  211. if(parser->matchChar('=')) parser->setNextToken(TK("-="));
  212. else if(parser->matchChar('>')) parser->setNextToken(TK("->"));
  213. else parser->setNextToken(TK("-"));
  214. return;
  215. }
  216. case '!':
  217. if(parser->matchChar('=')) parser->setNextToken(TK("!="));
  218. else syntaxError("expected '=' after '!'");
  219. break;
  220. case '*':
  221. if (parser->matchChar('*')) {
  222. parser->setNextToken(TK("**")); // '**'
  223. } else {
  224. parser->setNextTwoCharToken('=', TK("*"), TK("*="));
  225. }
  226. return;
  227. case '/':
  228. if(parser->matchChar('/')) {
  229. parser->setNextTwoCharToken('=', TK("//"), TK("//="));
  230. } else {
  231. parser->setNextTwoCharToken('=', TK("/"), TK("/="));
  232. }
  233. return;
  234. case '\r': break; // just ignore '\r'
  235. case ' ': case '\t': parser->eatSpaces(); break;
  236. case '\n': {
  237. parser->setNextToken(TK("@eol"));
  238. if(!parser->eatIndentation()) indentationError("unindent does not match any outer indentation level");
  239. return;
  240. }
  241. default: {
  242. if(c == 'f'){
  243. if(parser->matchChar('\'')) {eatString('\'', true); return;}
  244. if(parser->matchChar('"')) {eatString('"', true); return;}
  245. }
  246. if (isdigit(c)) {
  247. eatNumber();
  248. return;
  249. }
  250. switch (parser->eatName())
  251. {
  252. case 0: break;
  253. case 1: syntaxError("invalid char: " + std::string(1, c));
  254. case 2: syntaxError("invalid utf8 sequence: " + std::string(1, c));
  255. case 3: syntaxError("@id contains invalid char"); break;
  256. case 4: syntaxError("invalid JSON token"); break;
  257. default: UNREACHABLE();
  258. }
  259. return;
  260. }
  261. }
  262. }
  263. parser->token_start = parser->current_char;
  264. parser->setNextToken(TK("@eof"));
  265. }
  266. inline _TokenType peek() {
  267. return parser->current.type;
  268. }
  269. // not sure this will work
  270. _TokenType peekNext() {
  271. if(parser->nexts.empty()) return TK("@eof");
  272. return parser->nexts.front().type;
  273. }
  274. bool match(_TokenType expected) {
  275. if (peek() != expected) return false;
  276. lexToken();
  277. return true;
  278. }
  279. void consume(_TokenType expected) {
  280. if (!match(expected)){
  281. _StrStream ss;
  282. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  283. syntaxError(ss.str());
  284. }
  285. }
  286. bool matchNewLines(bool repl_throw=false) {
  287. bool consumed = false;
  288. if (peek() == TK("@eol")) {
  289. while (peek() == TK("@eol")) lexToken();
  290. consumed = true;
  291. }
  292. if (repl_throw && peek() == TK("@eof")){
  293. throw NeedMoreLines(isCompilingClass);
  294. }
  295. return consumed;
  296. }
  297. bool matchEndStatement() {
  298. if (match(TK(";"))) {
  299. matchNewLines();
  300. return true;
  301. }
  302. if (matchNewLines() || peek() == TK("@eof"))
  303. return true;
  304. if (peek() == TK("@dedent")) return true;
  305. return false;
  306. }
  307. void consumeEndStatement() {
  308. if (!matchEndStatement()) syntaxError("expected statement end");
  309. }
  310. void exprLiteral() {
  311. PyVar value = parser->previous.value;
  312. int index = getCode()->addConst(value);
  313. emitCode(OP_LOAD_CONST, index);
  314. }
  315. void exprFString() {
  316. static const std::regex pattern(R"(\{(.*?)\})");
  317. PyVar value = parser->previous.value;
  318. std::string s = vm->PyStr_AS_C(value).str();
  319. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  320. std::sregex_iterator end;
  321. int size = 0;
  322. int i = 0;
  323. for(auto it = begin; it != end; it++) {
  324. std::smatch m = *it;
  325. if (i < m.position()) {
  326. std::string literal = s.substr(i, m.position() - i);
  327. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  328. size++;
  329. }
  330. emitCode(OP_LOAD_EVAL_FN);
  331. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
  332. emitCode(OP_CALL, 1);
  333. size++;
  334. i = (int)(m.position() + m.length());
  335. }
  336. if (i < s.size()) {
  337. std::string literal = s.substr(i, s.size() - i);
  338. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  339. size++;
  340. }
  341. emitCode(OP_BUILD_STRING, size);
  342. }
  343. void exprLambda() {
  344. _Func func = pkpy::make_shared<Function>();
  345. func->name = "<lambda>";
  346. if(!match(TK(":"))){
  347. __compileFunctionArgs(func, false);
  348. consume(TK(":"));
  349. }
  350. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  351. this->codes.push(func->code);
  352. EXPR_TUPLE();
  353. emitCode(OP_RETURN_VALUE);
  354. this->codes.pop();
  355. emitCode(OP_LOAD_LAMBDA, getCode()->addConst(vm->PyFunction(func)));
  356. }
  357. void exprAssign() {
  358. _TokenType op = parser->previous.type;
  359. if(op == TK("=")) { // a = (expr)
  360. EXPR_TUPLE();
  361. emitCode(OP_STORE_PTR);
  362. }else{ // a += (expr) -> a = a + (expr)
  363. // TODO: optimization is needed for inplace operators
  364. emitCode(OP_DUP_TOP);
  365. EXPR();
  366. switch (op) {
  367. case TK("+="): emitCode(OP_BINARY_OP, 0); break;
  368. case TK("-="): emitCode(OP_BINARY_OP, 1); break;
  369. case TK("*="): emitCode(OP_BINARY_OP, 2); break;
  370. case TK("/="): emitCode(OP_BINARY_OP, 3); break;
  371. case TK("//="): emitCode(OP_BINARY_OP, 4); break;
  372. default: UNREACHABLE();
  373. }
  374. emitCode(OP_STORE_PTR);
  375. }
  376. }
  377. void exprComma() {
  378. int size = 1; // an expr is in the stack now
  379. do {
  380. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  381. size++;
  382. } while(match(TK(",")));
  383. emitCode(OP_BUILD_SMART_TUPLE, size);
  384. }
  385. void exprOr() {
  386. int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP);
  387. parsePrecedence(PREC_LOGICAL_OR);
  388. patchJump(patch);
  389. }
  390. void exprAnd() {
  391. int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP);
  392. parsePrecedence(PREC_LOGICAL_AND);
  393. patchJump(patch);
  394. }
  395. void exprTernary() {
  396. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  397. EXPR(); // if true
  398. int patch2 = emitCode(OP_JUMP_ABSOLUTE);
  399. consume(TK(":"));
  400. patchJump(patch);
  401. EXPR(); // if false
  402. patchJump(patch2);
  403. }
  404. void exprBinaryOp() {
  405. _TokenType op = parser->previous.type;
  406. parsePrecedence((Precedence)(rules[op].precedence + 1));
  407. switch (op) {
  408. case TK("+"): emitCode(OP_BINARY_OP, 0); break;
  409. case TK("-"): emitCode(OP_BINARY_OP, 1); break;
  410. case TK("*"): emitCode(OP_BINARY_OP, 2); break;
  411. case TK("/"): emitCode(OP_BINARY_OP, 3); break;
  412. case TK("//"): emitCode(OP_BINARY_OP, 4); break;
  413. case TK("%"): emitCode(OP_BINARY_OP, 5); break;
  414. case TK("**"): emitCode(OP_BINARY_OP, 6); break;
  415. case TK("<"): emitCode(OP_COMPARE_OP, 0); break;
  416. case TK("<="): emitCode(OP_COMPARE_OP, 1); break;
  417. case TK("=="): emitCode(OP_COMPARE_OP, 2); break;
  418. case TK("!="): emitCode(OP_COMPARE_OP, 3); break;
  419. case TK(">"): emitCode(OP_COMPARE_OP, 4); break;
  420. case TK(">="): emitCode(OP_COMPARE_OP, 5); break;
  421. case TK("in"): emitCode(OP_CONTAINS_OP, 0); break;
  422. case TK("not in"): emitCode(OP_CONTAINS_OP, 1); break;
  423. case TK("is"): emitCode(OP_IS_OP, 0); break;
  424. case TK("is not"): emitCode(OP_IS_OP, 1); break;
  425. case TK("<<"): emitCode(OP_BITWISE_OP, 0); break;
  426. case TK(">>"): emitCode(OP_BITWISE_OP, 1); break;
  427. case TK("&"): emitCode(OP_BITWISE_OP, 2); break;
  428. case TK("|"): emitCode(OP_BITWISE_OP, 3); break;
  429. case TK("^"): emitCode(OP_BITWISE_OP, 4); break;
  430. default: UNREACHABLE();
  431. }
  432. }
  433. void exprUnaryOp() {
  434. _TokenType op = parser->previous.type;
  435. matchNewLines();
  436. parsePrecedence((Precedence)(PREC_UNARY + 1));
  437. switch (op) {
  438. case TK("-"): emitCode(OP_UNARY_NEGATIVE); break;
  439. case TK("not"): emitCode(OP_UNARY_NOT); break;
  440. case TK("&"): emitCode(OP_UNARY_REF); break;
  441. case TK("*"): emitCode(OP_UNARY_DEREF); break;
  442. default: UNREACHABLE();
  443. }
  444. }
  445. void exprGrouping() {
  446. matchNewLines(mode()==SINGLE_MODE);
  447. EXPR_TUPLE();
  448. matchNewLines(mode()==SINGLE_MODE);
  449. consume(TK(")"));
  450. }
  451. void exprList() {
  452. int _patch = emitCode(OP_NO_OP);
  453. int _body_start = getCode()->co_code.size();
  454. int ARGC = 0;
  455. do {
  456. matchNewLines(mode()==SINGLE_MODE);
  457. if (peek() == TK("]")) break;
  458. EXPR(); ARGC++;
  459. matchNewLines(mode()==SINGLE_MODE);
  460. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  461. } while (match(TK(",")));
  462. matchNewLines(mode()==SINGLE_MODE);
  463. consume(TK("]"));
  464. emitCode(OP_BUILD_LIST, ARGC);
  465. return;
  466. __LISTCOMP:
  467. int _body_end = getCode()->co_code.size();
  468. getCode()->co_code[_patch].op = OP_JUMP_ABSOLUTE;
  469. getCode()->co_code[_patch].arg = _body_end;
  470. emitCode(OP_BUILD_LIST, 0);
  471. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  472. matchNewLines(mode()==SINGLE_MODE);
  473. int _skipPatch = emitCode(OP_JUMP_ABSOLUTE);
  474. int _cond_start = getCode()->co_code.size();
  475. if(match(TK("if"))) EXPR_TUPLE();
  476. int _cond_end = getCode()->co_code.size();
  477. patchJump(_skipPatch);
  478. emitCode(OP_GET_ITER);
  479. Loop& loop = enterLoop();
  480. int patch = emitCode(OP_FOR_ITER);
  481. if(_cond_end != _cond_start) { // there is an if condition
  482. getCode()->__moveToEnd(_cond_start, _cond_end);
  483. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  484. getCode()->__moveToEnd(_body_start, _body_end);
  485. emitCode(OP_LIST_APPEND);
  486. patchJump(ifpatch);
  487. }else{
  488. getCode()->__moveToEnd(_body_start, _body_end);
  489. emitCode(OP_LIST_APPEND);
  490. }
  491. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  492. patchJump(patch);
  493. exitLoop();
  494. matchNewLines(mode()==SINGLE_MODE);
  495. consume(TK("]"));
  496. }
  497. void exprMap() {
  498. int size = 0;
  499. do {
  500. matchNewLines(mode()==SINGLE_MODE);
  501. if (peek() == TK("}")) break;
  502. EXPR();consume(TK(":"));EXPR();
  503. size++;
  504. matchNewLines(mode()==SINGLE_MODE);
  505. } while (match(TK(",")));
  506. matchNewLines();
  507. consume(TK("}"));
  508. emitCode(OP_BUILD_MAP, size);
  509. }
  510. void exprCall() {
  511. int ARGC = 0;
  512. int KWARGC = 0;
  513. do {
  514. matchNewLines(mode()==SINGLE_MODE);
  515. if (peek() == TK(")")) break;
  516. if(peek() == TK("@id") && peekNext() == TK("=")) {
  517. consume(TK("@id"));
  518. const _Str& key = parser->previous.str();
  519. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(key)));
  520. consume(TK("="));
  521. EXPR();
  522. KWARGC++;
  523. } else{
  524. if(KWARGC > 0) syntaxError("positional argument follows keyword argument");
  525. EXPR();
  526. ARGC++;
  527. }
  528. matchNewLines(mode()==SINGLE_MODE);
  529. } while (match(TK(",")));
  530. consume(TK(")"));
  531. emitCode(OP_CALL, (KWARGC << 16) | ARGC);
  532. }
  533. void exprName() {
  534. Token tkname = parser->previous;
  535. int index = getCode()->addName(
  536. tkname.str(),
  537. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  538. );
  539. emitCode(OP_LOAD_NAME_PTR, index);
  540. }
  541. void exprAttrib() {
  542. consume(TK("@id"));
  543. const _Str& name = parser->previous.str();
  544. int index = getCode()->addName(name, NAME_ATTR);
  545. emitCode(OP_BUILD_ATTR_PTR, index);
  546. }
  547. void exprAttribPtr(){
  548. consume(TK("@id"));
  549. const _Str& name = parser->previous.str();
  550. int index = getCode()->addName(name, NAME_ATTR);
  551. emitCode(OP_BUILD_ATTR_PTR_PTR, index);
  552. }
  553. // [:], [:b]
  554. // [a], [a:], [a:b]
  555. void exprSubscript() {
  556. if(match(TK(":"))){
  557. emitCode(OP_LOAD_NONE);
  558. if(match(TK("]"))){
  559. emitCode(OP_LOAD_NONE);
  560. }else{
  561. EXPR();
  562. consume(TK("]"));
  563. }
  564. emitCode(OP_BUILD_SLICE);
  565. }else{
  566. EXPR();
  567. if(match(TK(":"))){
  568. if(match(TK("]"))){
  569. emitCode(OP_LOAD_NONE);
  570. }else{
  571. EXPR();
  572. consume(TK("]"));
  573. }
  574. emitCode(OP_BUILD_SLICE);
  575. }else{
  576. consume(TK("]"));
  577. }
  578. }
  579. emitCode(OP_BUILD_INDEX_PTR);
  580. }
  581. void exprValue() {
  582. _TokenType op = parser->previous.type;
  583. switch (op) {
  584. case TK("None"): emitCode(OP_LOAD_NONE); break;
  585. case TK("True"): emitCode(OP_LOAD_TRUE); break;
  586. case TK("False"): emitCode(OP_LOAD_FALSE); break;
  587. case TK("..."): emitCode(OP_LOAD_ELLIPSIS); break;
  588. default: UNREACHABLE();
  589. }
  590. }
  591. void keepOpcodeLine(){
  592. int i = getCode()->co_code.size() - 1;
  593. getCode()->co_code[i].line = getCode()->co_code[i-1].line;
  594. }
  595. int emitCode(Opcode opcode, int arg=-1) {
  596. int line = parser->previous.line;
  597. getCode()->co_code.push_back(
  598. ByteCode{(uint8_t)opcode, arg, (uint16_t)line}
  599. );
  600. return getCode()->co_code.size() - 1;
  601. }
  602. inline void patchJump(int addr_index) {
  603. int target = getCode()->co_code.size();
  604. getCode()->co_code[addr_index].arg = target;
  605. }
  606. void compileBlockBody(){
  607. __compileBlockBody(&Compiler::compileStatement);
  608. }
  609. void __compileBlockBody(CompilerAction action) {
  610. consume(TK(":"));
  611. if(!matchNewLines(mode()==SINGLE_MODE)){
  612. syntaxError("expected a new line after ':'");
  613. }
  614. consume(TK("@indent"));
  615. while (peek() != TK("@dedent")) {
  616. matchNewLines();
  617. (this->*action)();
  618. matchNewLines();
  619. }
  620. consume(TK("@dedent"));
  621. }
  622. Token compileImportPath() {
  623. consume(TK("@id"));
  624. Token tkmodule = parser->previous;
  625. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  626. emitCode(OP_IMPORT_NAME, index);
  627. return tkmodule;
  628. }
  629. // import a as b
  630. void compileRegularImport() {
  631. do {
  632. Token tkmodule = compileImportPath();
  633. if (match(TK("as"))) {
  634. consume(TK("@id"));
  635. tkmodule = parser->previous;
  636. }
  637. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  638. emitCode(OP_STORE_NAME_PTR, index);
  639. } while (match(TK(",")));
  640. consumeEndStatement();
  641. }
  642. // from a import b as c, d as e
  643. void compileFromImport() {
  644. Token tkmodule = compileImportPath();
  645. consume(TK("import"));
  646. do {
  647. emitCode(OP_DUP_TOP);
  648. consume(TK("@id"));
  649. Token tkname = parser->previous;
  650. int index = getCode()->addName(tkname.str(), NAME_GLOBAL);
  651. emitCode(OP_BUILD_ATTR_PTR, index);
  652. if (match(TK("as"))) {
  653. consume(TK("@id"));
  654. tkname = parser->previous;
  655. }
  656. index = getCode()->addName(tkname.str(), NAME_GLOBAL);
  657. emitCode(OP_STORE_NAME_PTR, index);
  658. } while (match(TK(",")));
  659. emitCode(OP_POP_TOP);
  660. consumeEndStatement();
  661. }
  662. void parsePrecedence(Precedence precedence) {
  663. lexToken();
  664. GrammarFn prefix = rules[parser->previous.type].prefix;
  665. if (prefix == nullptr) syntaxError(_Str("expected an expression, but got ") + TK_STR(parser->previous.type));
  666. (this->*prefix)();
  667. while (rules[peek()].precedence >= precedence) {
  668. lexToken();
  669. _TokenType op = parser->previous.type;
  670. GrammarFn infix = rules[op].infix;
  671. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  672. (this->*infix)();
  673. }
  674. }
  675. void compileIfStatement() {
  676. matchNewLines();
  677. EXPR_TUPLE();
  678. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  679. compileBlockBody();
  680. if (match(TK("elif"))) {
  681. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  682. patchJump(ifpatch);
  683. compileIfStatement();
  684. patchJump(exit_jump);
  685. } else if (match(TK("else"))) {
  686. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  687. patchJump(ifpatch);
  688. compileBlockBody();
  689. patchJump(exit_jump);
  690. } else {
  691. patchJump(ifpatch);
  692. }
  693. }
  694. Loop& enterLoop(){
  695. Loop lp((int)getCode()->co_code.size());
  696. loops.push(lp);
  697. return loops.top();
  698. }
  699. void exitLoop(){
  700. Loop& lp = loops.top();
  701. for(int addr : lp.breaks) patchJump(addr);
  702. loops.pop();
  703. }
  704. void compileWhileLoop() {
  705. Loop& loop = enterLoop();
  706. EXPR_TUPLE();
  707. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  708. compileBlockBody();
  709. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  710. patchJump(patch);
  711. exitLoop();
  712. }
  713. void EXPR_FOR_VARS(){
  714. int size = 0;
  715. do {
  716. consume(TK("@id"));
  717. exprName(); size++;
  718. } while (match(TK(",")));
  719. if(size > 1) emitCode(OP_BUILD_SMART_TUPLE, size);
  720. }
  721. void compileForLoop() {
  722. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  723. emitCode(OP_GET_ITER);
  724. Loop& loop = enterLoop();
  725. int patch = emitCode(OP_FOR_ITER);
  726. compileBlockBody();
  727. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  728. patchJump(patch);
  729. exitLoop();
  730. }
  731. void compileStatement() {
  732. if (match(TK("break"))) {
  733. if (loops.empty()) syntaxError("'break' outside loop");
  734. consumeEndStatement();
  735. int patch = emitCode(OP_SAFE_JUMP_ABSOLUTE);
  736. getLoop().breaks.push_back(patch);
  737. } else if (match(TK("continue"))) {
  738. if (loops.empty()) syntaxError("'continue' not properly in loop");
  739. consumeEndStatement();
  740. emitCode(OP_JUMP_ABSOLUTE, getLoop().start);
  741. } else if (match(TK("return"))) {
  742. if (codes.size() == 1)
  743. syntaxError("'return' outside function");
  744. if(matchEndStatement()){
  745. emitCode(OP_LOAD_NONE);
  746. }else{
  747. EXPR_TUPLE();
  748. consumeEndStatement();
  749. }
  750. emitCode(OP_RETURN_VALUE);
  751. } else if (match(TK("if"))) {
  752. compileIfStatement();
  753. } else if (match(TK("while"))) {
  754. compileWhileLoop();
  755. } else if (match(TK("for"))) {
  756. compileForLoop();
  757. } else if(match(TK("assert"))){
  758. EXPR();
  759. emitCode(OP_ASSERT);
  760. consumeEndStatement();
  761. } else if(match(TK("with"))){
  762. EXPR();
  763. consume(TK("as"));
  764. consume(TK("@id"));
  765. Token tkname = parser->previous;
  766. int index = getCode()->addName(
  767. tkname.str(),
  768. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  769. );
  770. emitCode(OP_STORE_NAME_PTR, index);
  771. emitCode(OP_LOAD_NAME_PTR, index);
  772. emitCode(OP_WITH_ENTER);
  773. compileBlockBody();
  774. emitCode(OP_LOAD_NAME_PTR, index);
  775. emitCode(OP_WITH_EXIT);
  776. } else if(match(TK("label"))){
  777. if(mode() != EXEC_MODE) syntaxError("'label' is only available in EXEC_MODE");
  778. consume(TK(".")); consume(TK("@id"));
  779. getCode()->addLabel(parser->previous.str());
  780. consumeEndStatement();
  781. } else if(match(TK("goto"))){
  782. // https://entrian.com/goto/
  783. if(mode() != EXEC_MODE) syntaxError("'goto' is only available in EXEC_MODE");
  784. consume(TK(".")); consume(TK("@id"));
  785. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  786. emitCode(OP_GOTO);
  787. consumeEndStatement();
  788. } else if(match(TK("raise"))){
  789. consume(TK("@id")); // dummy exception type
  790. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  791. consume(TK("("));EXPR();consume(TK(")"));
  792. emitCode(OP_RAISE_ERROR);
  793. consumeEndStatement();
  794. } else if(match(TK("del"))){
  795. EXPR();
  796. emitCode(OP_DELETE_PTR);
  797. consumeEndStatement();
  798. } else if(match(TK("global"))){
  799. consume(TK("@id"));
  800. getCode()->co_global_names.push_back(parser->previous.str());
  801. consumeEndStatement();
  802. } else if(match(TK("pass"))){
  803. consumeEndStatement();
  804. } else {
  805. EXPR_ANY();
  806. consumeEndStatement();
  807. // If last op is not an assignment, pop the result.
  808. uint8_t lastOp = getCode()->co_code.back().op;
  809. if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
  810. if(mode()==SINGLE_MODE && parser->indents.top() == 0){
  811. emitCode(OP_PRINT_EXPR);
  812. }
  813. emitCode(OP_POP_TOP);
  814. }
  815. }
  816. }
  817. void compileClass(){
  818. consume(TK("@id"));
  819. int clsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  820. int superClsNameIdx = -1;
  821. if(match(TK("("))){
  822. consume(TK("@id"));
  823. superClsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  824. consume(TK(")"));
  825. }
  826. emitCode(OP_LOAD_NONE);
  827. isCompilingClass = true;
  828. __compileBlockBody(&Compiler::compileFunction);
  829. isCompilingClass = false;
  830. if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE);
  831. else emitCode(OP_LOAD_NAME_PTR, superClsNameIdx);
  832. emitCode(OP_BUILD_CLASS, clsNameIdx);
  833. }
  834. void __compileFunctionArgs(_Func func, bool enableTypeHints){
  835. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  836. do {
  837. if(state == 3) syntaxError("**kwargs should be the last argument");
  838. matchNewLines();
  839. if(match(TK("*"))){
  840. if(state < 1) state = 1;
  841. else syntaxError("*args should be placed before **kwargs");
  842. }
  843. else if(match(TK("**"))){
  844. state = 3;
  845. }
  846. consume(TK("@id"));
  847. const _Str& name = parser->previous.str();
  848. if(func->hasName(name)) syntaxError("duplicate argument name");
  849. // eat type hints
  850. if(enableTypeHints && match(TK(":"))) consume(TK("@id"));
  851. if(state == 0 && peek() == TK("=")) state = 2;
  852. switch (state)
  853. {
  854. case 0: func->args.push_back(name); break;
  855. case 1: func->starredArg = name; state+=1; break;
  856. case 2: {
  857. consume(TK("="));
  858. PyVarOrNull value = readLiteral();
  859. if(value == nullptr){
  860. syntaxError(_Str("expect a literal, not ") + TK_STR(parser->current.type));
  861. }
  862. func->kwArgs[name] = value;
  863. func->kwArgsOrder.push_back(name);
  864. } break;
  865. case 3: syntaxError("**kwargs is not supported yet"); break;
  866. }
  867. } while (match(TK(",")));
  868. }
  869. void compileFunction(){
  870. if(isCompilingClass){
  871. if(match(TK("pass"))) return;
  872. consume(TK("def"));
  873. }
  874. _Func func = pkpy::make_shared<Function>();
  875. consume(TK("@id"));
  876. func->name = parser->previous.str();
  877. if (match(TK("(")) && !match(TK(")"))) {
  878. __compileFunctionArgs(func, true);
  879. consume(TK(")"));
  880. }
  881. // eat type hints
  882. if(match(TK("->"))) consume(TK("@id"));
  883. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  884. this->codes.push(func->code);
  885. compileBlockBody();
  886. this->codes.pop();
  887. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyFunction(func)));
  888. if(!isCompilingClass) emitCode(OP_STORE_FUNCTION);
  889. }
  890. PyVarOrNull readLiteral(){
  891. if(match(TK("-"))){
  892. consume(TK("@num"));
  893. PyVar val = parser->previous.value;
  894. return vm->numNegated(val);
  895. }
  896. if(match(TK("@num"))) return parser->previous.value;
  897. if(match(TK("@str"))) return parser->previous.value;
  898. if(match(TK("True"))) return vm->PyBool(true);
  899. if(match(TK("False"))) return vm->PyBool(false);
  900. if(match(TK("None"))) return vm->None;
  901. if(match(TK("..."))) return vm->Ellipsis;
  902. return nullptr;
  903. }
  904. void compileTopLevelStatement() {
  905. if (match(TK("class"))) {
  906. compileClass();
  907. } else if (match(TK("def"))) {
  908. compileFunction();
  909. } else if (match(TK("import"))) {
  910. compileRegularImport();
  911. } else if (match(TK("from"))) {
  912. compileFromImport();
  913. } else {
  914. compileStatement();
  915. }
  916. }
  917. _Code __fillCode(){
  918. _Code code = pkpy::make_shared<CodeObject>(parser->src, _Str("<module>"));
  919. codes.push(code);
  920. // Lex initial tokens. current <-- next.
  921. lexToken();
  922. lexToken();
  923. matchNewLines();
  924. if(mode()==EVAL_MODE) {
  925. EXPR_TUPLE();
  926. consume(TK("@eof"));
  927. return code;
  928. }else if(mode()==JSON_MODE){
  929. PyVarOrNull value = readLiteral();
  930. if(value != nullptr) emitCode(OP_LOAD_CONST, code->addConst(value));
  931. else if(match(TK("{"))) exprMap();
  932. else if(match(TK("["))) exprList();
  933. else syntaxError("expect a JSON object or array");
  934. consume(TK("@eof"));
  935. return code;
  936. }
  937. while (!match(TK("@eof"))) {
  938. compileTopLevelStatement();
  939. matchNewLines();
  940. }
  941. return code;
  942. }
  943. /***** Error Reporter *****/
  944. _Str getLineSnapshot(){
  945. int lineno = parser->current.line;
  946. const char* cursor = parser->current.start;
  947. // if error occurs in lexing, lineno should be `parser->current_line`
  948. if(lexingCnt > 0){
  949. lineno = parser->current_line;
  950. cursor = parser->current_char;
  951. }
  952. if(parser->peekChar() == '\n') lineno--;
  953. return parser->src->snapshot(lineno, cursor);
  954. }
  955. void syntaxError(_Str msg){ throw CompileError("SyntaxError", msg, getLineSnapshot()); }
  956. void indentationError(_Str msg){ throw CompileError("IndentationError", msg, getLineSnapshot()); }
  957. void unexpectedError(_Str msg){ throw CompileError("UnexpectedError", msg, getLineSnapshot()); }
  958. };
  959. _Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE, bool noThrow=true) {
  960. Compiler compiler(vm, source, filename, mode);
  961. if(!noThrow) return compiler.__fillCode();
  962. try{
  963. return compiler.__fillCode();
  964. }catch(_Error& e){
  965. (*vm->_stderr) << e.what() << '\n';
  966. }catch(std::exception& e){
  967. auto ce = CompileError("UnexpectedError", e.what(), compiler.getLineSnapshot());
  968. (*vm->_stderr) << ce.what() << '\n';
  969. }
  970. return nullptr;
  971. }