compiler.h 37 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. struct Loop {
  14. int start;
  15. std::vector<int> breaks;
  16. Loop(int start) : start(start) {}
  17. };
  18. class Compiler {
  19. public:
  20. std::unique_ptr<Parser> parser;
  21. std::stack<_Code> codes;
  22. std::stack<Loop> loops;
  23. bool isCompilingClass = false;
  24. VM* vm;
  25. std::unordered_map<_TokenType, GrammarRule> rules;
  26. _Code getCode() {
  27. return codes.top();
  28. }
  29. CompileMode mode() {
  30. return parser->src->mode;
  31. }
  32. Loop& getLoop() {
  33. return loops.top();
  34. }
  35. Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
  36. this->vm = vm;
  37. this->parser = std::make_unique<Parser>(
  38. std::make_shared<SourceMetadata>(source, filename, mode)
  39. );
  40. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  41. #define METHOD(name) &Compiler::name
  42. #define NO_INFIX nullptr, PREC_NONE
  43. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  44. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  45. rules[TK("->")] = { nullptr, METHOD(exprAttribPtr), PREC_ATTRIB };
  46. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  47. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  48. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  49. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  50. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  51. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  52. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  53. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  54. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  55. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  56. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  57. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  58. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  59. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  60. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  61. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  62. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  63. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  64. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  65. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  66. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  67. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  68. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  69. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  70. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  71. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  72. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  73. rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
  74. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  75. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  76. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  77. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  78. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  79. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  80. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  81. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  82. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  83. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  84. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  85. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  86. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  87. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  88. rules[TK("&")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_BITWISE_AND };
  89. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  90. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  91. #undef METHOD
  92. #undef NO_INFIX
  93. #define EXPR() parsePrecedence(PREC_TERNARY) // no '=' and ',' just a simple expression
  94. #define EXPR_TUPLE() parsePrecedence(PREC_COMMA) // no '=', but ',' is allowed
  95. #define EXPR_ANY() parsePrecedence(PREC_ASSIGNMENT)
  96. }
  97. _Str eatStringUntil(char quote) {
  98. std::vector<char> buff;
  99. while (true) {
  100. char c = parser->eatCharIncludeNewLine();
  101. if (c == quote) break;
  102. if (c == '\0' || c == '\n') syntaxError("EOL while scanning string literal");
  103. if (c == '\\') {
  104. switch (parser->eatCharIncludeNewLine()) {
  105. case '"': buff.push_back('"'); break;
  106. case '\'': buff.push_back('\''); break;
  107. case '\\': buff.push_back('\\'); break;
  108. case 'n': buff.push_back('\n'); break;
  109. case 'r': buff.push_back('\r'); break;
  110. case 't': buff.push_back('\t'); break;
  111. case '\n': case '\r': break;
  112. default: syntaxError("invalid escape character");
  113. }
  114. } else {
  115. buff.push_back(c);
  116. }
  117. }
  118. return _Str(buff.data(), buff.size());
  119. }
  120. void eatString(char quote, bool fstr) {
  121. _Str s = eatStringUntil(quote);
  122. if(fstr){
  123. parser->setNextToken(TK("@fstr"), vm->PyStr(s));
  124. }else{
  125. parser->setNextToken(TK("@str"), vm->PyStr(s));
  126. }
  127. }
  128. void eatNumber() {
  129. static const std::regex pattern("^(0x)?[0-9a-f]+(\\.[0-9]+)?");
  130. std::smatch m;
  131. const char* i = parser->token_start;
  132. while(*i != '\n' && *i != '\0') i++;
  133. std::string s = std::string(parser->token_start, i);
  134. try{
  135. if (std::regex_search(s, m, pattern)) {
  136. // here is m.length()-1, since the first char is eaten by lexToken()
  137. for(int j=0; j<m.length()-1; j++) parser->eatChar();
  138. int base = 10;
  139. size_t size;
  140. if (m[1].matched) base = 16;
  141. if (m[2].matched) {
  142. if(base == 16) syntaxError("hex literal should not contain a dot");
  143. parser->setNextToken(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  144. } else {
  145. parser->setNextToken(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  146. }
  147. if (size != m.length()) throw std::runtime_error("length mismatch");
  148. }
  149. }catch(std::exception& e){
  150. syntaxError("invalid number literal");
  151. }
  152. }
  153. // Lex the next token and set it as the next token.
  154. void lexToken() {
  155. parser->previous = parser->current;
  156. parser->current = parser->nextToken();
  157. //_Str _info = parser->current.info(); printf("%s\n", (const char*)_info);
  158. while (parser->peekChar() != '\0') {
  159. parser->token_start = parser->current_char;
  160. char c = parser->eatCharIncludeNewLine();
  161. switch (c) {
  162. case '\'': case '"': eatString(c, false); return;
  163. case '#': parser->skipLineComment(); break;
  164. case '{': parser->setNextToken(TK("{")); return;
  165. case '}': parser->setNextToken(TK("}")); return;
  166. case ',': parser->setNextToken(TK(",")); return;
  167. case ':': parser->setNextToken(TK(":")); return;
  168. case ';': parser->setNextToken(TK(";")); return;
  169. case '(': parser->setNextToken(TK("(")); return;
  170. case ')': parser->setNextToken(TK(")")); return;
  171. case '[': parser->setNextToken(TK("[")); return;
  172. case ']': parser->setNextToken(TK("]")); return;
  173. case '%': parser->setNextToken(TK("%")); return;
  174. case '&': parser->setNextToken(TK("&")); return;
  175. case '|': parser->setNextToken(TK("|")); return;
  176. case '^': parser->setNextToken(TK("^")); return;
  177. case '?': parser->setNextToken(TK("?")); return;
  178. case '.': {
  179. if(parser->matchChar('.')) {
  180. if(parser->matchChar('.')) {
  181. parser->setNextToken(TK("..."));
  182. } else {
  183. syntaxError("invalid token '..'");
  184. }
  185. } else {
  186. parser->setNextToken(TK("."));
  187. }
  188. return;
  189. }
  190. case '=': parser->setNextTwoCharToken('=', TK("="), TK("==")); return;
  191. case '+': parser->setNextTwoCharToken('=', TK("+"), TK("+=")); return;
  192. case '>': {
  193. if(parser->matchChar('=')) parser->setNextToken(TK(">="));
  194. else if(parser->matchChar('>')) parser->setNextToken(TK(">>"));
  195. else parser->setNextToken(TK(">"));
  196. return;
  197. }
  198. case '<': {
  199. if(parser->matchChar('=')) parser->setNextToken(TK("<="));
  200. else if(parser->matchChar('<')) parser->setNextToken(TK("<<"));
  201. else parser->setNextToken(TK("<"));
  202. return;
  203. }
  204. case '-': {
  205. if(parser->matchChar('=')) parser->setNextToken(TK("-="));
  206. else if(parser->matchChar('>')) parser->setNextToken(TK("->"));
  207. else parser->setNextToken(TK("-"));
  208. return;
  209. }
  210. case '!':
  211. if(parser->matchChar('=')) parser->setNextToken(TK("!="));
  212. else syntaxError("expected '=' after '!'");
  213. break;
  214. case '*':
  215. if (parser->matchChar('*')) {
  216. parser->setNextToken(TK("**")); // '**'
  217. } else {
  218. parser->setNextTwoCharToken('=', TK("*"), TK("*="));
  219. }
  220. return;
  221. case '/':
  222. if(parser->matchChar('/')) {
  223. parser->setNextTwoCharToken('=', TK("//"), TK("//="));
  224. } else {
  225. parser->setNextTwoCharToken('=', TK("/"), TK("/="));
  226. }
  227. return;
  228. case '\r': break; // just ignore '\r'
  229. case ' ': case '\t': parser->eatSpaces(); break;
  230. case '\n': {
  231. parser->setNextToken(TK("@eol")); while(parser->matchChar('\n'));
  232. if(!parser->eatIndentation()) indentationError("unindent does not match any outer indentation level");
  233. return;
  234. }
  235. default: {
  236. if(c == 'f'){
  237. if(parser->matchChar('\'')) {eatString('\'', true); return;}
  238. if(parser->matchChar('"')) {eatString('"', true); return;}
  239. }
  240. if (isdigit(c)) {
  241. eatNumber();
  242. } else if (parser->isNameStart(c)) {
  243. int ret = parser->eatName();
  244. if(ret!=0) syntaxError("identifier is illegal, err " + std::to_string(ret));
  245. } else {
  246. syntaxError("unknown character: " + std::string(1, c));
  247. }
  248. return;
  249. }
  250. }
  251. }
  252. parser->token_start = parser->current_char;
  253. parser->setNextToken(TK("@eof"));
  254. }
  255. _TokenType peek() {
  256. return parser->current.type;
  257. }
  258. bool match(_TokenType expected) {
  259. if (peek() != expected) return false;
  260. lexToken();
  261. return true;
  262. }
  263. void consume(_TokenType expected) {
  264. if (!match(expected)){
  265. _StrStream ss;
  266. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  267. syntaxError(ss.str());
  268. }
  269. }
  270. bool matchNewLines(bool repl_throw=false) {
  271. bool consumed = false;
  272. if (peek() == TK("@eol")) {
  273. while (peek() == TK("@eol")) lexToken();
  274. consumed = true;
  275. }
  276. if (repl_throw && peek() == TK("@eof")){
  277. throw NeedMoreLines(isCompilingClass);
  278. }
  279. return consumed;
  280. }
  281. bool matchEndStatement() {
  282. if (match(TK(";"))) {
  283. matchNewLines();
  284. return true;
  285. }
  286. if (matchNewLines() || peek() == TK("@eof"))
  287. return true;
  288. if (peek() == TK("@dedent")) return true;
  289. return false;
  290. }
  291. void consumeEndStatement() {
  292. if (!matchEndStatement()) syntaxError("expected statement end");
  293. }
  294. void exprLiteral() {
  295. PyVar value = parser->previous.value;
  296. int index = getCode()->addConst(value);
  297. emitCode(OP_LOAD_CONST, index);
  298. }
  299. void exprFString() {
  300. static const std::regex pattern(R"(\{(.*?)\})");
  301. PyVar value = parser->previous.value;
  302. std::string s = vm->PyStr_AS_C(value).str();
  303. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  304. std::sregex_iterator end;
  305. int size = 0;
  306. int i = 0;
  307. for(auto it = begin; it != end; it++) {
  308. std::smatch m = *it;
  309. if (i < m.position()) {
  310. std::string literal = s.substr(i, m.position() - i);
  311. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  312. size++;
  313. }
  314. emitCode(OP_LOAD_EVAL_FN);
  315. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
  316. emitCode(OP_CALL, 1);
  317. size++;
  318. i = m.position() + m.length();
  319. }
  320. if (i < s.size()) {
  321. std::string literal = s.substr(i, s.size() - i);
  322. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  323. size++;
  324. }
  325. emitCode(OP_BUILD_STRING, size);
  326. }
  327. void exprLambda() {
  328. _Func func = std::make_shared<Function>();
  329. func->name = "<lambda>";
  330. if(!match(TK(":"))){
  331. __compileFunctionArgs(func);
  332. consume(TK(":"));
  333. }
  334. func->code = std::make_shared<CodeObject>(parser->src, func->name);
  335. this->codes.push(func->code);
  336. EXPR_TUPLE();
  337. emitCode(OP_RETURN_VALUE);
  338. this->codes.pop();
  339. emitCode(OP_LOAD_LAMBDA, getCode()->addConst(vm->PyFunction(func)));
  340. }
  341. void exprAssign() {
  342. _TokenType op = parser->previous.type;
  343. if(op == TK("=")) { // a = (expr)
  344. EXPR_TUPLE();
  345. emitCode(OP_STORE_PTR);
  346. }else{ // a += (expr) -> a = a + (expr)
  347. // TODO: optimization is needed for inplace operators
  348. emitCode(OP_DUP_TOP);
  349. EXPR();
  350. switch (op) {
  351. case TK("+="): emitCode(OP_BINARY_OP, 0); break;
  352. case TK("-="): emitCode(OP_BINARY_OP, 1); break;
  353. case TK("*="): emitCode(OP_BINARY_OP, 2); break;
  354. case TK("/="): emitCode(OP_BINARY_OP, 3); break;
  355. case TK("//="): emitCode(OP_BINARY_OP, 4); break;
  356. default: UNREACHABLE();
  357. }
  358. emitCode(OP_STORE_PTR);
  359. }
  360. }
  361. void exprComma() {
  362. int size = 1; // an expr is in the stack now
  363. do {
  364. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  365. size++;
  366. } while(match(TK(",")));
  367. emitCode(OP_BUILD_SMART_TUPLE, size);
  368. }
  369. void exprOr() {
  370. int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP);
  371. parsePrecedence(PREC_LOGICAL_OR);
  372. patchJump(patch);
  373. }
  374. void exprAnd() {
  375. int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP);
  376. parsePrecedence(PREC_LOGICAL_AND);
  377. patchJump(patch);
  378. }
  379. void exprTernary() {
  380. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  381. EXPR(); // if true
  382. int patch2 = emitCode(OP_JUMP_ABSOLUTE);
  383. consume(TK(":"));
  384. patchJump(patch);
  385. EXPR(); // if false
  386. patchJump(patch2);
  387. }
  388. void exprBinaryOp() {
  389. _TokenType op = parser->previous.type;
  390. parsePrecedence((Precedence)(rules[op].precedence + 1));
  391. switch (op) {
  392. case TK("+"): emitCode(OP_BINARY_OP, 0); break;
  393. case TK("-"): emitCode(OP_BINARY_OP, 1); break;
  394. case TK("*"): emitCode(OP_BINARY_OP, 2); break;
  395. case TK("/"): emitCode(OP_BINARY_OP, 3); break;
  396. case TK("//"): emitCode(OP_BINARY_OP, 4); break;
  397. case TK("%"): emitCode(OP_BINARY_OP, 5); break;
  398. case TK("**"): emitCode(OP_BINARY_OP, 6); break;
  399. case TK("<"): emitCode(OP_COMPARE_OP, 0); break;
  400. case TK("<="): emitCode(OP_COMPARE_OP, 1); break;
  401. case TK("=="): emitCode(OP_COMPARE_OP, 2); break;
  402. case TK("!="): emitCode(OP_COMPARE_OP, 3); break;
  403. case TK(">"): emitCode(OP_COMPARE_OP, 4); break;
  404. case TK(">="): emitCode(OP_COMPARE_OP, 5); break;
  405. case TK("in"): emitCode(OP_CONTAINS_OP, 0); break;
  406. case TK("not in"): emitCode(OP_CONTAINS_OP, 1); break;
  407. case TK("is"): emitCode(OP_IS_OP, 0); break;
  408. case TK("is not"): emitCode(OP_IS_OP, 1); break;
  409. case TK("<<"): emitCode(OP_BITWISE_OP, 0); break;
  410. case TK(">>"): emitCode(OP_BITWISE_OP, 1); break;
  411. case TK("&"): emitCode(OP_BITWISE_OP, 2); break;
  412. case TK("|"): emitCode(OP_BITWISE_OP, 3); break;
  413. case TK("^"): emitCode(OP_BITWISE_OP, 4); break;
  414. default: UNREACHABLE();
  415. }
  416. }
  417. void exprUnaryOp() {
  418. _TokenType op = parser->previous.type;
  419. matchNewLines();
  420. parsePrecedence((Precedence)(PREC_UNARY + 1));
  421. switch (op) {
  422. case TK("-"): emitCode(OP_UNARY_NEGATIVE); break;
  423. case TK("not"): emitCode(OP_UNARY_NOT); break;
  424. case TK("&"): emitCode(OP_UNARY_REF); break;
  425. case TK("*"): emitCode(OP_UNARY_DEREF); break;
  426. default: UNREACHABLE();
  427. }
  428. }
  429. void exprGrouping() {
  430. matchNewLines(mode()==SINGLE_MODE);
  431. EXPR_TUPLE();
  432. matchNewLines(mode()==SINGLE_MODE);
  433. consume(TK(")"));
  434. }
  435. void exprList() {
  436. int _patch = emitCode(OP_NO_OP);
  437. int _body_start = getCode()->co_code.size();
  438. int ARGC = 0;
  439. do {
  440. matchNewLines(mode()==SINGLE_MODE);
  441. if (peek() == TK("]")) break;
  442. EXPR(); ARGC++;
  443. matchNewLines(mode()==SINGLE_MODE);
  444. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  445. } while (match(TK(",")));
  446. matchNewLines(mode()==SINGLE_MODE);
  447. consume(TK("]"));
  448. emitCode(OP_BUILD_LIST, ARGC);
  449. return;
  450. __LISTCOMP:
  451. int _body_end = getCode()->co_code.size();
  452. getCode()->co_code[_patch].op = OP_JUMP_ABSOLUTE;
  453. getCode()->co_code[_patch].arg = _body_end;
  454. emitCode(OP_BUILD_LIST, 0);
  455. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  456. matchNewLines(mode()==SINGLE_MODE);
  457. int _skipPatch = emitCode(OP_JUMP_ABSOLUTE);
  458. int _cond_start = getCode()->co_code.size();
  459. if(match(TK("if"))) EXPR_TUPLE();
  460. int _cond_end = getCode()->co_code.size();
  461. patchJump(_skipPatch);
  462. emitCode(OP_GET_ITER);
  463. Loop& loop = enterLoop();
  464. int patch = emitCode(OP_FOR_ITER);
  465. if(_cond_end != _cond_start) { // there is an if condition
  466. getCode()->__moveToEnd(_cond_start, _cond_end);
  467. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  468. getCode()->__moveToEnd(_body_start, _body_end);
  469. emitCode(OP_LIST_APPEND);
  470. patchJump(ifpatch);
  471. }else{
  472. getCode()->__moveToEnd(_body_start, _body_end);
  473. emitCode(OP_LIST_APPEND);
  474. }
  475. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  476. patchJump(patch);
  477. exitLoop();
  478. matchNewLines(mode()==SINGLE_MODE);
  479. consume(TK("]"));
  480. }
  481. void exprMap() {
  482. int size = 0;
  483. do {
  484. matchNewLines(mode()==SINGLE_MODE);
  485. if (peek() == TK("}")) break;
  486. EXPR();consume(TK(":"));EXPR();
  487. size++;
  488. matchNewLines(mode()==SINGLE_MODE);
  489. } while (match(TK(",")));
  490. matchNewLines();
  491. consume(TK("}"));
  492. emitCode(OP_BUILD_MAP, size);
  493. }
  494. void exprCall() {
  495. int ARGC = 0;
  496. do {
  497. matchNewLines(mode()==SINGLE_MODE);
  498. if (peek() == TK(")")) break;
  499. EXPR();
  500. ARGC++;
  501. matchNewLines(mode()==SINGLE_MODE);
  502. } while (match(TK(",")));
  503. consume(TK(")"));
  504. emitCode(OP_CALL, ARGC);
  505. }
  506. void exprName() {
  507. Token tkname = parser->previous;
  508. int index = getCode()->addName(
  509. tkname.str(),
  510. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  511. );
  512. emitCode(OP_LOAD_NAME_PTR, index);
  513. }
  514. void exprAttrib() {
  515. consume(TK("@id"));
  516. const _Str& name = parser->previous.str();
  517. int index = getCode()->addName(name, NAME_ATTR);
  518. emitCode(OP_BUILD_ATTR_PTR, index);
  519. }
  520. void exprAttribPtr(){
  521. consume(TK("@id"));
  522. const _Str& name = parser->previous.str();
  523. int index = getCode()->addName(name, NAME_ATTR);
  524. emitCode(OP_BUILD_ATTR_PTR_PTR, index);
  525. }
  526. // [:], [:b]
  527. // [a], [a:], [a:b]
  528. void exprSubscript() {
  529. if(match(TK(":"))){
  530. emitCode(OP_LOAD_NONE);
  531. if(match(TK("]"))){
  532. emitCode(OP_LOAD_NONE);
  533. }else{
  534. EXPR();
  535. consume(TK("]"));
  536. }
  537. emitCode(OP_BUILD_SLICE);
  538. }else{
  539. EXPR();
  540. if(match(TK(":"))){
  541. if(match(TK("]"))){
  542. emitCode(OP_LOAD_NONE);
  543. }else{
  544. EXPR();
  545. consume(TK("]"));
  546. }
  547. emitCode(OP_BUILD_SLICE);
  548. }else{
  549. consume(TK("]"));
  550. }
  551. }
  552. emitCode(OP_BUILD_INDEX_PTR);
  553. }
  554. void exprValue() {
  555. _TokenType op = parser->previous.type;
  556. switch (op) {
  557. case TK("None"): emitCode(OP_LOAD_NONE); break;
  558. case TK("True"): emitCode(OP_LOAD_TRUE); break;
  559. case TK("False"): emitCode(OP_LOAD_FALSE); break;
  560. case TK("..."): emitCode(OP_LOAD_ELLIPSIS); break;
  561. default: UNREACHABLE();
  562. }
  563. }
  564. void keepOpcodeLine(){
  565. int i = getCode()->co_code.size() - 1;
  566. getCode()->co_code[i].line = getCode()->co_code[i-1].line;
  567. }
  568. int emitCode(Opcode opcode, int arg=-1) {
  569. int line = parser->previous.line;
  570. getCode()->co_code.push_back(
  571. ByteCode{(uint8_t)opcode, arg, (uint16_t)line}
  572. );
  573. return getCode()->co_code.size() - 1;
  574. }
  575. inline void patchJump(int addr_index) {
  576. int target = getCode()->co_code.size();
  577. getCode()->co_code[addr_index].arg = target;
  578. }
  579. void compileBlockBody(){
  580. __compileBlockBody(&Compiler::compileStatement);
  581. }
  582. void __compileBlockBody(CompilerAction action) {
  583. consume(TK(":"));
  584. if(!matchNewLines(mode()==SINGLE_MODE)){
  585. syntaxError("expected a new line after ':'");
  586. }
  587. consume(TK("@indent"));
  588. while (peek() != TK("@dedent")) {
  589. matchNewLines();
  590. (this->*action)();
  591. matchNewLines();
  592. }
  593. consume(TK("@dedent"));
  594. }
  595. Token compileImportPath() {
  596. consume(TK("@id"));
  597. Token tkmodule = parser->previous;
  598. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  599. emitCode(OP_IMPORT_NAME, index);
  600. return tkmodule;
  601. }
  602. // import a as b
  603. void compileRegularImport() {
  604. do {
  605. Token tkmodule = compileImportPath();
  606. if (match(TK("as"))) {
  607. consume(TK("@id"));
  608. tkmodule = parser->previous;
  609. }
  610. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  611. emitCode(OP_STORE_NAME_PTR, index);
  612. } while (match(TK(",")));
  613. consumeEndStatement();
  614. }
  615. // from a import b as c, d as e
  616. void compileFromImport() {
  617. Token tkmodule = compileImportPath();
  618. consume(TK("import"));
  619. do {
  620. consume(TK("@id"));
  621. Token tkname = parser->previous;
  622. int index = getCode()->addName(tkname.str(), NAME_GLOBAL);
  623. emitCode(OP_BUILD_ATTR_PTR, index);
  624. if (match(TK("as"))) {
  625. consume(TK("@id"));
  626. tkname = parser->previous;
  627. }
  628. index = getCode()->addName(tkname.str(), NAME_GLOBAL);
  629. emitCode(OP_STORE_NAME_PTR, index);
  630. } while (match(TK(",")));
  631. consumeEndStatement();
  632. }
  633. void parsePrecedence(Precedence precedence) {
  634. lexToken();
  635. GrammarFn prefix = rules[parser->previous.type].prefix;
  636. if (prefix == nullptr) syntaxError(_Str("expected an expression, but got ") + TK_STR(parser->previous.type));
  637. (this->*prefix)();
  638. while (rules[peek()].precedence >= precedence) {
  639. lexToken();
  640. _TokenType op = parser->previous.type;
  641. GrammarFn infix = rules[op].infix;
  642. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  643. (this->*infix)();
  644. }
  645. }
  646. void compileIfStatement() {
  647. matchNewLines();
  648. EXPR_TUPLE();
  649. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  650. compileBlockBody();
  651. if (match(TK("elif"))) {
  652. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  653. patchJump(ifpatch);
  654. compileIfStatement();
  655. patchJump(exit_jump);
  656. } else if (match(TK("else"))) {
  657. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  658. patchJump(ifpatch);
  659. compileBlockBody();
  660. patchJump(exit_jump);
  661. } else {
  662. patchJump(ifpatch);
  663. }
  664. }
  665. Loop& enterLoop(){
  666. Loop lp((int)getCode()->co_code.size());
  667. loops.push(lp);
  668. return loops.top();
  669. }
  670. void exitLoop(){
  671. Loop& lp = loops.top();
  672. for(int addr : lp.breaks) patchJump(addr);
  673. loops.pop();
  674. }
  675. void compileWhileLoop() {
  676. Loop& loop = enterLoop();
  677. EXPR_TUPLE();
  678. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  679. compileBlockBody();
  680. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  681. patchJump(patch);
  682. exitLoop();
  683. }
  684. void EXPR_FOR_VARS(){
  685. int size = 0;
  686. do {
  687. consume(TK("@id"));
  688. exprName(); size++;
  689. } while (match(TK(",")));
  690. if(size > 1) emitCode(OP_BUILD_SMART_TUPLE, size);
  691. }
  692. void compileForLoop() {
  693. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  694. emitCode(OP_GET_ITER);
  695. Loop& loop = enterLoop();
  696. int patch = emitCode(OP_FOR_ITER);
  697. compileBlockBody();
  698. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  699. patchJump(patch);
  700. exitLoop();
  701. }
  702. void compileStatement() {
  703. if (match(TK("break"))) {
  704. if (loops.empty()) syntaxError("'break' outside loop");
  705. consumeEndStatement();
  706. int patch = emitCode(OP_SAFE_JUMP_ABSOLUTE);
  707. getLoop().breaks.push_back(patch);
  708. } else if (match(TK("continue"))) {
  709. if (loops.empty()) syntaxError("'continue' not properly in loop");
  710. consumeEndStatement();
  711. emitCode(OP_JUMP_ABSOLUTE, getLoop().start);
  712. } else if (match(TK("return"))) {
  713. if (codes.size() == 1)
  714. syntaxError("'return' outside function");
  715. if(matchEndStatement()){
  716. emitCode(OP_LOAD_NONE);
  717. }else{
  718. EXPR_TUPLE();
  719. consumeEndStatement();
  720. }
  721. emitCode(OP_RETURN_VALUE);
  722. } else if (match(TK("if"))) {
  723. compileIfStatement();
  724. } else if (match(TK("while"))) {
  725. compileWhileLoop();
  726. } else if (match(TK("for"))) {
  727. compileForLoop();
  728. } else if(match(TK("assert"))){
  729. EXPR();
  730. emitCode(OP_ASSERT);
  731. consumeEndStatement();
  732. } else if(match(TK("with"))){
  733. EXPR();
  734. consume(TK("as"));
  735. consume(TK("@id"));
  736. Token tkname = parser->previous;
  737. int index = getCode()->addName(
  738. tkname.str(),
  739. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  740. );
  741. emitCode(OP_STORE_NAME_PTR, index);
  742. emitCode(OP_LOAD_NAME_PTR, index);
  743. emitCode(OP_WITH_ENTER);
  744. compileBlockBody();
  745. emitCode(OP_LOAD_NAME_PTR, index);
  746. emitCode(OP_WITH_EXIT);
  747. } else if(match(TK("label"))){
  748. if(mode() != EXEC_MODE) syntaxError("'label' is only available in EXEC_MODE");
  749. consume(TK(".")); consume(TK("@id"));
  750. getCode()->addLabel(parser->previous.str());
  751. consumeEndStatement();
  752. } else if(match(TK("goto"))){
  753. // https://entrian.com/goto/
  754. if(mode() != EXEC_MODE) syntaxError("'goto' is only available in EXEC_MODE");
  755. consume(TK(".")); consume(TK("@id"));
  756. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  757. emitCode(OP_GOTO);
  758. consumeEndStatement();
  759. } else if(match(TK("raise"))){
  760. consume(TK("@id")); // dummy exception type
  761. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  762. consume(TK("("));EXPR();consume(TK(")"));
  763. emitCode(OP_RAISE_ERROR);
  764. consumeEndStatement();
  765. } else if(match(TK("del"))){
  766. EXPR();
  767. emitCode(OP_DELETE_PTR);
  768. consumeEndStatement();
  769. } else if(match(TK("global"))){
  770. consume(TK("@id"));
  771. getCode()->co_global_names.push_back(parser->previous.str());
  772. consumeEndStatement();
  773. } else if(match(TK("pass"))){
  774. consumeEndStatement();
  775. } else {
  776. EXPR_ANY();
  777. consumeEndStatement();
  778. // If last op is not an assignment, pop the result.
  779. uint8_t lastOp = getCode()->co_code.back().op;
  780. if( lastOp != OP_STORE_NAME_PTR && lastOp != OP_STORE_PTR){
  781. if(mode()==SINGLE_MODE && parser->indents.top() == 0){
  782. emitCode(OP_PRINT_EXPR);
  783. }
  784. emitCode(OP_POP_TOP);
  785. }
  786. }
  787. }
  788. void compileClass(){
  789. consume(TK("@id"));
  790. int clsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  791. int superClsNameIdx = -1;
  792. if(match(TK("("))){
  793. consume(TK("@id"));
  794. superClsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  795. consume(TK(")"));
  796. }
  797. emitCode(OP_LOAD_NONE);
  798. isCompilingClass = true;
  799. __compileBlockBody(&Compiler::compileFunction);
  800. isCompilingClass = false;
  801. if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE);
  802. else emitCode(OP_LOAD_NAME_PTR, superClsNameIdx);
  803. emitCode(OP_BUILD_CLASS, clsNameIdx);
  804. }
  805. void __compileFunctionArgs(_Func func){
  806. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  807. do {
  808. if(state == 3) syntaxError("**kwargs should be the last argument");
  809. matchNewLines();
  810. if(match(TK("*"))){
  811. if(state < 1) state = 1;
  812. else syntaxError("*args should be placed before **kwargs");
  813. }
  814. else if(match(TK("**"))){
  815. state = 3;
  816. }
  817. consume(TK("@id"));
  818. const _Str& name = parser->previous.str();
  819. if(func->hasName(name)) syntaxError("duplicate argument name");
  820. if(state == 0 && peek() == TK("=")) state = 2;
  821. switch (state)
  822. {
  823. case 0: func->args.push_back(name); break;
  824. case 1: func->starredArg = name; state+=1; break;
  825. case 2:
  826. consume(TK("="));
  827. func->kwArgs[name] = consumeLiteral();
  828. func->kwArgsOrder.push_back(name);
  829. break;
  830. case 3: syntaxError("**kwargs is not supported yet"); break;
  831. }
  832. } while (match(TK(",")));
  833. }
  834. void compileFunction(){
  835. if(isCompilingClass){
  836. if(match(TK("pass"))) return;
  837. consume(TK("def"));
  838. }
  839. _Func func = std::make_shared<Function>();
  840. consume(TK("@id"));
  841. func->name = parser->previous.str();
  842. if (match(TK("(")) && !match(TK(")"))) {
  843. __compileFunctionArgs(func);
  844. consume(TK(")"));
  845. }
  846. func->code = std::make_shared<CodeObject>(parser->src, func->name);
  847. this->codes.push(func->code);
  848. compileBlockBody();
  849. this->codes.pop();
  850. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyFunction(func)));
  851. if(!isCompilingClass) emitCode(OP_STORE_FUNCTION);
  852. }
  853. PyVar consumeLiteral(){
  854. if(match(TK("-"))){
  855. consume(TK("@num"));
  856. PyVar val = parser->previous.value;
  857. return vm->numNegated(val);
  858. }
  859. if(match(TK("@num"))) return parser->previous.value;
  860. if(match(TK("@str"))) return parser->previous.value;
  861. if(match(TK("True"))) return vm->PyBool(true);
  862. if(match(TK("False"))) return vm->PyBool(false);
  863. if(match(TK("None"))) return vm->None;
  864. if(match(TK("..."))) return vm->Ellipsis;
  865. syntaxError(_Str("expect a literal, not ") + TK_STR(parser->current.type));
  866. return nullptr;
  867. }
  868. void compileTopLevelStatement() {
  869. if (match(TK("class"))) {
  870. compileClass();
  871. } else if (match(TK("def"))) {
  872. compileFunction();
  873. } else if (match(TK("import"))) {
  874. compileRegularImport();
  875. } else if (match(TK("from"))) {
  876. compileFromImport();
  877. } else {
  878. compileStatement();
  879. }
  880. }
  881. _Code __fillCode(){
  882. _Code code = std::make_shared<CodeObject>(parser->src, _Str("<module>"));
  883. codes.push(code);
  884. // Lex initial tokens. current <-- next.
  885. lexToken();
  886. lexToken();
  887. matchNewLines();
  888. if(mode()==EVAL_MODE) {
  889. EXPR_TUPLE();
  890. consume(TK("@eof"));
  891. return code;
  892. }
  893. while (!match(TK("@eof"))) {
  894. compileTopLevelStatement();
  895. matchNewLines();
  896. }
  897. return code;
  898. }
  899. /***** Error Reporter *****/
  900. _Str getLineSnapshot(){
  901. int lineno = parser->current_line;
  902. if(parser->peekChar() == '\n') lineno--;
  903. return parser->src->snapshot(lineno);
  904. }
  905. void syntaxError(_Str msg){
  906. throw CompileError("SyntaxError", msg, getLineSnapshot());
  907. }
  908. void indentationError(_Str msg){
  909. throw CompileError("IndentationError", msg, getLineSnapshot());
  910. }
  911. void unexpectedError(_Str msg){
  912. throw CompileError("UnexpectedError", msg, getLineSnapshot());
  913. }
  914. };
  915. _Code compile(VM* vm, const char* source, _Str filename, CompileMode mode=EXEC_MODE) {
  916. Compiler compiler(vm, source, filename, mode);
  917. try{
  918. return compiler.__fillCode();
  919. }catch(std::exception& e){
  920. if(const _Error* _ = dynamic_cast<const _Error*>(&e)){
  921. (*vm->_stderr) << e.what() << '\n';
  922. }else{
  923. auto ce = CompileError("UnexpectedError", e.what(), compiler.getLineSnapshot());
  924. (*vm->_stderr) << ce.what() << '\n';
  925. }
  926. return nullptr;
  927. }
  928. }