compiler.h 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. struct Loop {
  14. int start;
  15. std::vector<int> breaks;
  16. Loop(int start) : start(start) {}
  17. };
  18. class Compiler {
  19. public:
  20. pkpy::unique_ptr<Parser> parser;
  21. std::stack<_Code> codes;
  22. std::stack<Loop> loops;
  23. bool isCompilingClass = false;
  24. int lexingCnt = 0;
  25. VM* vm;
  26. emhash8::HashMap<_TokenType, GrammarRule> rules;
  27. _Code getCode() {
  28. return codes.top();
  29. }
  30. CompileMode mode() {
  31. return parser->src->mode;
  32. }
  33. Loop& getLoop() {
  34. return loops.top();
  35. }
  36. Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
  37. this->vm = vm;
  38. this->parser = pkpy::make_unique<Parser>(
  39. pkpy::make_shared<SourceMetadata>(source, filename, mode)
  40. );
  41. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  42. #define METHOD(name) &Compiler::name
  43. #define NO_INFIX nullptr, PREC_NONE
  44. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  45. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  46. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  47. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  48. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  49. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  50. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  51. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  52. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  53. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  54. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  55. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  56. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  57. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  58. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  59. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  60. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  61. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  62. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  63. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  64. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  65. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  66. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  67. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  68. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  69. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  70. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  71. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  72. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  73. rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
  74. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  75. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  76. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  77. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  78. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  79. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  80. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  81. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  82. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  83. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  84. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  85. rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  86. rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  87. rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  88. rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  89. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  90. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  91. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  92. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  93. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  94. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  95. #undef METHOD
  96. #undef NO_INFIX
  97. #define EXPR() parsePrecedence(PREC_TERNARY) // no '=' and ',' just a simple expression
  98. #define EXPR_TUPLE() parsePrecedence(PREC_COMMA) // no '=', but ',' is allowed
  99. #define EXPR_ANY() parsePrecedence(PREC_ASSIGNMENT)
  100. }
  101. _Str eatStringUntil(char quote) {
  102. std::vector<char> buff;
  103. while (true) {
  104. char c = parser->eatCharIncludeNewLine();
  105. if (c == quote) break;
  106. if (c == '\0' || c == '\n') syntaxError("EOL while scanning string literal");
  107. if (c == '\\') {
  108. switch (parser->eatCharIncludeNewLine()) {
  109. case '"': buff.push_back('"'); break;
  110. case '\'': buff.push_back('\''); break;
  111. case '\\': buff.push_back('\\'); break;
  112. case 'n': buff.push_back('\n'); break;
  113. case 'r': buff.push_back('\r'); break;
  114. case 't': buff.push_back('\t'); break;
  115. case '\n': case '\r': break;
  116. default: syntaxError("invalid escape character");
  117. }
  118. } else {
  119. buff.push_back(c);
  120. }
  121. }
  122. return _Str(buff.data(), buff.size());
  123. }
  124. void eatString(char quote, bool fstr) {
  125. _Str s = eatStringUntil(quote);
  126. if(fstr){
  127. parser->setNextToken(TK("@fstr"), vm->PyStr(s));
  128. }else{
  129. parser->setNextToken(TK("@str"), vm->PyStr(s));
  130. }
  131. }
  132. void eatNumber() {
  133. static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
  134. std::smatch m;
  135. const char* i = parser->token_start;
  136. while(*i != '\n' && *i != '\0') i++;
  137. std::string s = std::string(parser->token_start, i);
  138. try{
  139. if (std::regex_search(s, m, pattern)) {
  140. // here is m.length()-1, since the first char is eaten by lexToken()
  141. for(int j=0; j<m.length()-1; j++) parser->eatChar();
  142. int base = 10;
  143. size_t size;
  144. if (m[1].matched) base = 16;
  145. if (m[2].matched) {
  146. if(base == 16) syntaxError("hex literal should not contain a dot");
  147. parser->setNextToken(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  148. } else {
  149. parser->setNextToken(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  150. }
  151. if (size != m.length()) throw std::runtime_error("length mismatch");
  152. }
  153. }catch(std::exception& _){
  154. syntaxError("invalid number literal");
  155. }
  156. }
  157. void lexToken(){
  158. lexingCnt++;
  159. _lexToken();
  160. lexingCnt--;
  161. }
  162. // Lex the next token and set it as the next token.
  163. void _lexToken() {
  164. parser->previous = parser->current;
  165. parser->current = parser->nextToken();
  166. //_Str _info = parser->current.info(); std::cout << _info << '[' << parser->current_line << ']' << std::endl;
  167. while (parser->peekChar() != '\0') {
  168. parser->token_start = parser->current_char;
  169. char c = parser->eatCharIncludeNewLine();
  170. switch (c) {
  171. case '\'': case '"': eatString(c, false); return;
  172. case '#': parser->skipLineComment(); break;
  173. case '{': parser->setNextToken(TK("{")); return;
  174. case '}': parser->setNextToken(TK("}")); return;
  175. case ',': parser->setNextToken(TK(",")); return;
  176. case ':': parser->setNextToken(TK(":")); return;
  177. case ';': parser->setNextToken(TK(";")); return;
  178. case '(': parser->setNextToken(TK("(")); return;
  179. case ')': parser->setNextToken(TK(")")); return;
  180. case '[': parser->setNextToken(TK("[")); return;
  181. case ']': parser->setNextToken(TK("]")); return;
  182. case '%': parser->setNextTwoCharToken('=', TK("%"), TK("%=")); return;
  183. case '&': parser->setNextTwoCharToken('=', TK("&"), TK("&=")); return;
  184. case '|': parser->setNextTwoCharToken('=', TK("|"), TK("|=")); return;
  185. case '^': parser->setNextTwoCharToken('=', TK("^"), TK("^=")); return;
  186. case '?': parser->setNextToken(TK("?")); return;
  187. case '.': {
  188. if(parser->matchChar('.')) {
  189. if(parser->matchChar('.')) {
  190. parser->setNextToken(TK("..."));
  191. } else {
  192. syntaxError("invalid token '..'");
  193. }
  194. } else {
  195. parser->setNextToken(TK("."));
  196. }
  197. return;
  198. }
  199. case '=': parser->setNextTwoCharToken('=', TK("="), TK("==")); return;
  200. case '+': parser->setNextTwoCharToken('=', TK("+"), TK("+=")); return;
  201. case '>': {
  202. if(parser->matchChar('=')) parser->setNextToken(TK(">="));
  203. else if(parser->matchChar('>')) parser->setNextToken(TK(">>"));
  204. else parser->setNextToken(TK(">"));
  205. return;
  206. }
  207. case '<': {
  208. if(parser->matchChar('=')) parser->setNextToken(TK("<="));
  209. else if(parser->matchChar('<')) parser->setNextToken(TK("<<"));
  210. else parser->setNextToken(TK("<"));
  211. return;
  212. }
  213. case '-': {
  214. if(parser->matchChar('=')) parser->setNextToken(TK("-="));
  215. else if(parser->matchChar('>')) parser->setNextToken(TK("->"));
  216. else parser->setNextToken(TK("-"));
  217. return;
  218. }
  219. case '!':
  220. if(parser->matchChar('=')) parser->setNextToken(TK("!="));
  221. else syntaxError("expected '=' after '!'");
  222. break;
  223. case '*':
  224. if (parser->matchChar('*')) {
  225. parser->setNextToken(TK("**")); // '**'
  226. } else {
  227. parser->setNextTwoCharToken('=', TK("*"), TK("*="));
  228. }
  229. return;
  230. case '/':
  231. if(parser->matchChar('/')) {
  232. parser->setNextTwoCharToken('=', TK("//"), TK("//="));
  233. } else {
  234. parser->setNextTwoCharToken('=', TK("/"), TK("/="));
  235. }
  236. return;
  237. case '\r': break; // just ignore '\r'
  238. case ' ': case '\t': parser->eatSpaces(); break;
  239. case '\n': {
  240. parser->setNextToken(TK("@eol"));
  241. if(!parser->eatIndentation()) indentationError("unindent does not match any outer indentation level");
  242. return;
  243. }
  244. default: {
  245. if(c == 'f'){
  246. if(parser->matchChar('\'')) {eatString('\'', true); return;}
  247. if(parser->matchChar('"')) {eatString('"', true); return;}
  248. }
  249. if (c >= '0' && c <= '9') {
  250. eatNumber();
  251. return;
  252. }
  253. switch (parser->eatName())
  254. {
  255. case 0: break;
  256. case 1: syntaxError("invalid char: " + std::string(1, c));
  257. case 2: syntaxError("invalid utf8 sequence: " + std::string(1, c));
  258. case 3: syntaxError("@id contains invalid char"); break;
  259. case 4: syntaxError("invalid JSON token"); break;
  260. default: UNREACHABLE();
  261. }
  262. return;
  263. }
  264. }
  265. }
  266. parser->token_start = parser->current_char;
  267. parser->setNextToken(TK("@eof"));
  268. }
  269. inline _TokenType peek() {
  270. return parser->current.type;
  271. }
  272. // not sure this will work
  273. _TokenType peekNext() {
  274. if(parser->nexts.empty()) return TK("@eof");
  275. return parser->nexts.front().type;
  276. }
  277. bool match(_TokenType expected) {
  278. if (peek() != expected) return false;
  279. lexToken();
  280. return true;
  281. }
  282. void consume(_TokenType expected) {
  283. if (!match(expected)){
  284. _StrStream ss;
  285. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  286. syntaxError(ss.str());
  287. }
  288. }
  289. bool matchNewLines(bool repl_throw=false) {
  290. bool consumed = false;
  291. if (peek() == TK("@eol")) {
  292. while (peek() == TK("@eol")) lexToken();
  293. consumed = true;
  294. }
  295. if (repl_throw && peek() == TK("@eof")){
  296. throw NeedMoreLines(isCompilingClass);
  297. }
  298. return consumed;
  299. }
  300. bool matchEndStatement() {
  301. if (match(TK(";"))) {
  302. matchNewLines();
  303. return true;
  304. }
  305. if (matchNewLines() || peek() == TK("@eof"))
  306. return true;
  307. if (peek() == TK("@dedent")) return true;
  308. return false;
  309. }
  310. void consumeEndStatement() {
  311. if (!matchEndStatement()) syntaxError("expected statement end");
  312. }
  313. void exprLiteral() {
  314. PyVar value = parser->previous.value;
  315. int index = getCode()->addConst(value);
  316. emitCode(OP_LOAD_CONST, index);
  317. }
  318. void exprFString() {
  319. static const std::regex pattern(R"(\{(.*?)\})");
  320. PyVar value = parser->previous.value;
  321. _Str s = vm->PyStr_AS_C(value);
  322. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  323. std::sregex_iterator end;
  324. int size = 0;
  325. int i = 0;
  326. for(auto it = begin; it != end; it++) {
  327. std::smatch m = *it;
  328. if (i < m.position()) {
  329. std::string literal = s.substr(i, m.position() - i);
  330. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  331. size++;
  332. }
  333. emitCode(OP_LOAD_EVAL_FN);
  334. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(m[1].str())));
  335. emitCode(OP_CALL, 1);
  336. size++;
  337. i = (int)(m.position() + m.length());
  338. }
  339. if (i < s.size()) {
  340. std::string literal = s.substr(i, s.size() - i);
  341. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(literal)));
  342. size++;
  343. }
  344. emitCode(OP_BUILD_STRING, size);
  345. }
  346. void exprLambda() {
  347. _Func func = pkpy::make_shared<Function>();
  348. func->name = "<lambda>";
  349. if(!match(TK(":"))){
  350. __compileFunctionArgs(func, false);
  351. consume(TK(":"));
  352. }
  353. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  354. this->codes.push(func->code);
  355. EXPR_TUPLE();
  356. emitCode(OP_RETURN_VALUE);
  357. this->codes.pop();
  358. emitCode(OP_LOAD_LAMBDA, getCode()->addConst(vm->PyFunction(func)));
  359. }
  360. void exprAssign() {
  361. _TokenType op = parser->previous.type;
  362. if(op == TK("=")) { // a = (expr)
  363. EXPR_TUPLE();
  364. emitCode(OP_STORE_REF);
  365. }else{ // a += (expr) -> a = a + (expr)
  366. // TODO: optimization is needed for inplace operators
  367. emitCode(OP_DUP_TOP);
  368. EXPR();
  369. switch (op) {
  370. case TK("+="): emitCode(OP_BINARY_OP, 0); break;
  371. case TK("-="): emitCode(OP_BINARY_OP, 1); break;
  372. case TK("*="): emitCode(OP_BINARY_OP, 2); break;
  373. case TK("/="): emitCode(OP_BINARY_OP, 3); break;
  374. case TK("//="): emitCode(OP_BINARY_OP, 4); break;
  375. case TK("%="): emitCode(OP_BINARY_OP, 5); break;
  376. case TK("&="): emitCode(OP_BITWISE_OP, 2); break;
  377. case TK("|="): emitCode(OP_BITWISE_OP, 3); break;
  378. case TK("^="): emitCode(OP_BITWISE_OP, 4); break;
  379. default: UNREACHABLE();
  380. }
  381. emitCode(OP_STORE_REF);
  382. }
  383. }
  384. void exprComma() {
  385. int size = 1; // an expr is in the stack now
  386. do {
  387. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  388. size++;
  389. } while(match(TK(",")));
  390. emitCode(OP_BUILD_SMART_TUPLE, size);
  391. }
  392. void exprOr() {
  393. int patch = emitCode(OP_JUMP_IF_TRUE_OR_POP);
  394. parsePrecedence(PREC_LOGICAL_OR);
  395. patchJump(patch);
  396. }
  397. void exprAnd() {
  398. int patch = emitCode(OP_JUMP_IF_FALSE_OR_POP);
  399. parsePrecedence(PREC_LOGICAL_AND);
  400. patchJump(patch);
  401. }
  402. void exprTernary() {
  403. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  404. EXPR(); // if true
  405. int patch2 = emitCode(OP_JUMP_ABSOLUTE);
  406. consume(TK(":"));
  407. patchJump(patch);
  408. EXPR(); // if false
  409. patchJump(patch2);
  410. }
  411. void exprBinaryOp() {
  412. _TokenType op = parser->previous.type;
  413. parsePrecedence((Precedence)(rules[op].precedence + 1));
  414. switch (op) {
  415. case TK("+"): emitCode(OP_BINARY_OP, 0); break;
  416. case TK("-"): emitCode(OP_BINARY_OP, 1); break;
  417. case TK("*"): emitCode(OP_BINARY_OP, 2); break;
  418. case TK("/"): emitCode(OP_BINARY_OP, 3); break;
  419. case TK("//"): emitCode(OP_BINARY_OP, 4); break;
  420. case TK("%"): emitCode(OP_BINARY_OP, 5); break;
  421. case TK("**"): emitCode(OP_BINARY_OP, 6); break;
  422. case TK("<"): emitCode(OP_COMPARE_OP, 0); break;
  423. case TK("<="): emitCode(OP_COMPARE_OP, 1); break;
  424. case TK("=="): emitCode(OP_COMPARE_OP, 2); break;
  425. case TK("!="): emitCode(OP_COMPARE_OP, 3); break;
  426. case TK(">"): emitCode(OP_COMPARE_OP, 4); break;
  427. case TK(">="): emitCode(OP_COMPARE_OP, 5); break;
  428. case TK("in"): emitCode(OP_CONTAINS_OP, 0); break;
  429. case TK("not in"): emitCode(OP_CONTAINS_OP, 1); break;
  430. case TK("is"): emitCode(OP_IS_OP, 0); break;
  431. case TK("is not"): emitCode(OP_IS_OP, 1); break;
  432. case TK("<<"): emitCode(OP_BITWISE_OP, 0); break;
  433. case TK(">>"): emitCode(OP_BITWISE_OP, 1); break;
  434. case TK("&"): emitCode(OP_BITWISE_OP, 2); break;
  435. case TK("|"): emitCode(OP_BITWISE_OP, 3); break;
  436. case TK("^"): emitCode(OP_BITWISE_OP, 4); break;
  437. default: UNREACHABLE();
  438. }
  439. }
  440. void exprUnaryOp() {
  441. _TokenType op = parser->previous.type;
  442. matchNewLines();
  443. parsePrecedence((Precedence)(PREC_UNARY + 1));
  444. switch (op) {
  445. case TK("-"): emitCode(OP_UNARY_NEGATIVE); break;
  446. case TK("not"): emitCode(OP_UNARY_NOT); break;
  447. case TK("*"): syntaxError("cannot use '*' as unary operator"); break;
  448. default: UNREACHABLE();
  449. }
  450. }
  451. void exprGrouping() {
  452. matchNewLines(mode()==SINGLE_MODE);
  453. EXPR_TUPLE();
  454. matchNewLines(mode()==SINGLE_MODE);
  455. consume(TK(")"));
  456. }
  457. void exprList() {
  458. int _patch = emitCode(OP_NO_OP);
  459. int _body_start = getCode()->co_code.size();
  460. int ARGC = 0;
  461. do {
  462. matchNewLines(mode()==SINGLE_MODE);
  463. if (peek() == TK("]")) break;
  464. EXPR(); ARGC++;
  465. matchNewLines(mode()==SINGLE_MODE);
  466. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  467. } while (match(TK(",")));
  468. matchNewLines(mode()==SINGLE_MODE);
  469. consume(TK("]"));
  470. emitCode(OP_BUILD_LIST, ARGC);
  471. return;
  472. __LISTCOMP:
  473. int _body_end = getCode()->co_code.size();
  474. getCode()->co_code[_patch].op = OP_JUMP_ABSOLUTE;
  475. getCode()->co_code[_patch].arg = _body_end;
  476. emitCode(OP_BUILD_LIST, 0);
  477. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  478. matchNewLines(mode()==SINGLE_MODE);
  479. int _skipPatch = emitCode(OP_JUMP_ABSOLUTE);
  480. int _cond_start = getCode()->co_code.size();
  481. if(match(TK("if"))) EXPR_TUPLE();
  482. int _cond_end = getCode()->co_code.size();
  483. patchJump(_skipPatch);
  484. emitCode(OP_GET_ITER);
  485. Loop& loop = enterLoop();
  486. int patch = emitCode(OP_FOR_ITER);
  487. if(_cond_end != _cond_start) { // there is an if condition
  488. getCode()->__moveToEnd(_cond_start, _cond_end);
  489. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  490. getCode()->__moveToEnd(_body_start, _body_end);
  491. emitCode(OP_LIST_APPEND);
  492. patchJump(ifpatch);
  493. }else{
  494. getCode()->__moveToEnd(_body_start, _body_end);
  495. emitCode(OP_LIST_APPEND);
  496. }
  497. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  498. patchJump(patch);
  499. exitLoop();
  500. matchNewLines(mode()==SINGLE_MODE);
  501. consume(TK("]"));
  502. }
  503. void exprMap() {
  504. bool parsing_dict = false;
  505. int size = 0;
  506. do {
  507. matchNewLines(mode()==SINGLE_MODE);
  508. if (peek() == TK("}")) break;
  509. EXPR();
  510. if(peek() == TK(":")) parsing_dict = true;
  511. if(parsing_dict){
  512. consume(TK(":"));
  513. EXPR();
  514. }
  515. size++;
  516. matchNewLines(mode()==SINGLE_MODE);
  517. } while (match(TK(",")));
  518. matchNewLines();
  519. consume(TK("}"));
  520. if(size == 0 || parsing_dict) emitCode(OP_BUILD_MAP, size);
  521. else emitCode(OP_BUILD_SET, size);
  522. }
  523. void exprCall() {
  524. int ARGC = 0;
  525. int KWARGC = 0;
  526. do {
  527. matchNewLines(mode()==SINGLE_MODE);
  528. if (peek() == TK(")")) break;
  529. if(peek() == TK("@id") && peekNext() == TK("=")) {
  530. consume(TK("@id"));
  531. const _Str& key = parser->previous.str();
  532. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(key)));
  533. consume(TK("="));
  534. EXPR();
  535. KWARGC++;
  536. } else{
  537. if(KWARGC > 0) syntaxError("positional argument follows keyword argument");
  538. EXPR();
  539. ARGC++;
  540. }
  541. matchNewLines(mode()==SINGLE_MODE);
  542. } while (match(TK(",")));
  543. consume(TK(")"));
  544. emitCode(OP_CALL, (KWARGC << 16) | ARGC);
  545. }
  546. void exprName() {
  547. Token tkname = parser->previous;
  548. int index = getCode()->addName(
  549. tkname.str(),
  550. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  551. );
  552. emitCode(OP_LOAD_NAME_REF, index);
  553. }
  554. void exprAttrib() {
  555. consume(TK("@id"));
  556. const _Str& name = parser->previous.str();
  557. int index = getCode()->addName(name, NAME_ATTR);
  558. emitCode(OP_BUILD_ATTR_REF, index);
  559. }
  560. // [:], [:b]
  561. // [a], [a:], [a:b]
  562. void exprSubscript() {
  563. if(match(TK(":"))){
  564. emitCode(OP_LOAD_NONE);
  565. if(match(TK("]"))){
  566. emitCode(OP_LOAD_NONE);
  567. }else{
  568. EXPR_TUPLE();
  569. consume(TK("]"));
  570. }
  571. emitCode(OP_BUILD_SLICE);
  572. }else{
  573. EXPR_TUPLE();
  574. if(match(TK(":"))){
  575. if(match(TK("]"))){
  576. emitCode(OP_LOAD_NONE);
  577. }else{
  578. EXPR_TUPLE();
  579. consume(TK("]"));
  580. }
  581. emitCode(OP_BUILD_SLICE);
  582. }else{
  583. consume(TK("]"));
  584. }
  585. }
  586. emitCode(OP_BUILD_INDEX_REF);
  587. }
  588. void exprValue() {
  589. _TokenType op = parser->previous.type;
  590. switch (op) {
  591. case TK("None"): emitCode(OP_LOAD_NONE); break;
  592. case TK("True"): emitCode(OP_LOAD_TRUE); break;
  593. case TK("False"): emitCode(OP_LOAD_FALSE); break;
  594. case TK("..."): emitCode(OP_LOAD_ELLIPSIS); break;
  595. default: UNREACHABLE();
  596. }
  597. }
  598. void keepOpcodeLine(){
  599. int i = getCode()->co_code.size() - 1;
  600. getCode()->co_code[i].line = getCode()->co_code[i-1].line;
  601. }
  602. int emitCode(Opcode opcode, int arg=-1) {
  603. int line = parser->previous.line;
  604. getCode()->co_code.push_back(
  605. ByteCode{(uint8_t)opcode, arg, (uint16_t)line}
  606. );
  607. return getCode()->co_code.size() - 1;
  608. }
  609. inline void patchJump(int addr_index) {
  610. int target = getCode()->co_code.size();
  611. getCode()->co_code[addr_index].arg = target;
  612. }
  613. void compileBlockBody(){
  614. __compileBlockBody(&Compiler::compileStatement);
  615. }
  616. void __compileBlockBody(CompilerAction action) {
  617. consume(TK(":"));
  618. if(!matchNewLines(mode()==SINGLE_MODE)){
  619. syntaxError("expected a new line after ':'");
  620. }
  621. consume(TK("@indent"));
  622. while (peek() != TK("@dedent")) {
  623. matchNewLines();
  624. (this->*action)();
  625. matchNewLines();
  626. }
  627. consume(TK("@dedent"));
  628. }
  629. Token compileImportPath() {
  630. consume(TK("@id"));
  631. Token tkmodule = parser->previous;
  632. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  633. emitCode(OP_IMPORT_NAME, index);
  634. return tkmodule;
  635. }
  636. // import a as b
  637. void compileRegularImport() {
  638. do {
  639. Token tkmodule = compileImportPath();
  640. if (match(TK("as"))) {
  641. consume(TK("@id"));
  642. tkmodule = parser->previous;
  643. }
  644. int index = getCode()->addName(tkmodule.str(), NAME_GLOBAL);
  645. emitCode(OP_STORE_NAME_REF, index);
  646. } while (match(TK(",")));
  647. consumeEndStatement();
  648. }
  649. // from a import b as c, d as e
  650. void compileFromImport() {
  651. Token tkmodule = compileImportPath();
  652. consume(TK("import"));
  653. do {
  654. emitCode(OP_DUP_TOP);
  655. consume(TK("@id"));
  656. Token tkname = parser->previous;
  657. int index = getCode()->addName(tkname.str(), NAME_GLOBAL);
  658. emitCode(OP_BUILD_ATTR_REF, index);
  659. if (match(TK("as"))) {
  660. consume(TK("@id"));
  661. tkname = parser->previous;
  662. }
  663. index = getCode()->addName(tkname.str(), NAME_GLOBAL);
  664. emitCode(OP_STORE_NAME_REF, index);
  665. } while (match(TK(",")));
  666. emitCode(OP_POP_TOP);
  667. consumeEndStatement();
  668. }
  669. void parsePrecedence(Precedence precedence) {
  670. lexToken();
  671. GrammarFn prefix = rules[parser->previous.type].prefix;
  672. if (prefix == nullptr) syntaxError(_Str("expected an expression, but got ") + TK_STR(parser->previous.type));
  673. (this->*prefix)();
  674. while (rules[peek()].precedence >= precedence) {
  675. lexToken();
  676. _TokenType op = parser->previous.type;
  677. GrammarFn infix = rules[op].infix;
  678. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  679. (this->*infix)();
  680. }
  681. }
  682. void compileIfStatement() {
  683. matchNewLines();
  684. EXPR_TUPLE();
  685. int ifpatch = emitCode(OP_POP_JUMP_IF_FALSE);
  686. compileBlockBody();
  687. if (match(TK("elif"))) {
  688. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  689. patchJump(ifpatch);
  690. compileIfStatement();
  691. patchJump(exit_jump);
  692. } else if (match(TK("else"))) {
  693. int exit_jump = emitCode(OP_JUMP_ABSOLUTE);
  694. patchJump(ifpatch);
  695. compileBlockBody();
  696. patchJump(exit_jump);
  697. } else {
  698. patchJump(ifpatch);
  699. }
  700. }
  701. Loop& enterLoop(){
  702. Loop lp((int)getCode()->co_code.size());
  703. loops.push(lp);
  704. return loops.top();
  705. }
  706. void exitLoop(){
  707. Loop& lp = loops.top();
  708. for(int addr : lp.breaks) patchJump(addr);
  709. loops.pop();
  710. }
  711. void compileWhileLoop() {
  712. Loop& loop = enterLoop();
  713. EXPR_TUPLE();
  714. int patch = emitCode(OP_POP_JUMP_IF_FALSE);
  715. compileBlockBody();
  716. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  717. patchJump(patch);
  718. exitLoop();
  719. }
  720. void EXPR_FOR_VARS(){
  721. int size = 0;
  722. do {
  723. consume(TK("@id"));
  724. exprName(); size++;
  725. } while (match(TK(",")));
  726. if(size > 1) emitCode(OP_BUILD_SMART_TUPLE, size);
  727. }
  728. void compileForLoop() {
  729. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  730. emitCode(OP_GET_ITER);
  731. Loop& loop = enterLoop();
  732. int patch = emitCode(OP_FOR_ITER);
  733. compileBlockBody();
  734. emitCode(OP_JUMP_ABSOLUTE, loop.start); keepOpcodeLine();
  735. patchJump(patch);
  736. exitLoop();
  737. }
  738. void compileStatement() {
  739. if (match(TK("break"))) {
  740. if (loops.empty()) syntaxError("'break' outside loop");
  741. consumeEndStatement();
  742. int patch = emitCode(OP_SAFE_JUMP_ABSOLUTE);
  743. getLoop().breaks.push_back(patch);
  744. } else if (match(TK("continue"))) {
  745. if (loops.empty()) syntaxError("'continue' not properly in loop");
  746. consumeEndStatement();
  747. emitCode(OP_JUMP_ABSOLUTE, getLoop().start);
  748. } else if (match(TK("return"))) {
  749. if (codes.size() == 1)
  750. syntaxError("'return' outside function");
  751. if(matchEndStatement()){
  752. emitCode(OP_LOAD_NONE);
  753. }else{
  754. EXPR_TUPLE();
  755. consumeEndStatement();
  756. }
  757. emitCode(OP_RETURN_VALUE);
  758. } else if (match(TK("if"))) {
  759. compileIfStatement();
  760. } else if (match(TK("while"))) {
  761. compileWhileLoop();
  762. } else if (match(TK("for"))) {
  763. compileForLoop();
  764. } else if(match(TK("assert"))){
  765. EXPR();
  766. emitCode(OP_ASSERT);
  767. consumeEndStatement();
  768. } else if(match(TK("with"))){
  769. EXPR();
  770. consume(TK("as"));
  771. consume(TK("@id"));
  772. Token tkname = parser->previous;
  773. int index = getCode()->addName(
  774. tkname.str(),
  775. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  776. );
  777. emitCode(OP_STORE_NAME_REF, index);
  778. emitCode(OP_LOAD_NAME_REF, index);
  779. emitCode(OP_WITH_ENTER);
  780. compileBlockBody();
  781. emitCode(OP_LOAD_NAME_REF, index);
  782. emitCode(OP_WITH_EXIT);
  783. } else if(match(TK("label"))){
  784. if(mode() != EXEC_MODE) syntaxError("'label' is only available in EXEC_MODE");
  785. consume(TK(".")); consume(TK("@id"));
  786. getCode()->addLabel(parser->previous.str());
  787. consumeEndStatement();
  788. } else if(match(TK("goto"))){
  789. // https://entrian.com/goto/
  790. if(mode() != EXEC_MODE) syntaxError("'goto' is only available in EXEC_MODE");
  791. consume(TK(".")); consume(TK("@id"));
  792. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  793. emitCode(OP_GOTO);
  794. consumeEndStatement();
  795. } else if(match(TK("raise"))){
  796. consume(TK("@id")); // dummy exception type
  797. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyStr(parser->previous.str())));
  798. consume(TK("("));EXPR();consume(TK(")"));
  799. emitCode(OP_RAISE_ERROR);
  800. consumeEndStatement();
  801. } else if(match(TK("del"))){
  802. EXPR();
  803. emitCode(OP_DELETE_REF);
  804. consumeEndStatement();
  805. } else if(match(TK("global"))){
  806. do {
  807. consume(TK("@id"));
  808. getCode()->co_global_names.push_back(parser->previous.str());
  809. } while (match(TK(",")));
  810. consumeEndStatement();
  811. } else if(match(TK("pass"))){
  812. consumeEndStatement();
  813. } else {
  814. EXPR_ANY();
  815. consumeEndStatement();
  816. // If last op is not an assignment, pop the result.
  817. uint8_t lastOp = getCode()->co_code.back().op;
  818. if( lastOp != OP_STORE_NAME_REF && lastOp != OP_STORE_REF){
  819. if(mode()==SINGLE_MODE && parser->indents.top() == 0){
  820. emitCode(OP_PRINT_EXPR);
  821. }
  822. emitCode(OP_POP_TOP);
  823. }
  824. }
  825. }
  826. void compileClass(){
  827. consume(TK("@id"));
  828. int clsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  829. int superClsNameIdx = -1;
  830. if(match(TK("("))){
  831. consume(TK("@id"));
  832. superClsNameIdx = getCode()->addName(parser->previous.str(), NAME_GLOBAL);
  833. consume(TK(")"));
  834. }
  835. emitCode(OP_LOAD_NONE);
  836. isCompilingClass = true;
  837. __compileBlockBody(&Compiler::compileFunction);
  838. isCompilingClass = false;
  839. if(superClsNameIdx == -1) emitCode(OP_LOAD_NONE);
  840. else emitCode(OP_LOAD_NAME_REF, superClsNameIdx);
  841. emitCode(OP_BUILD_CLASS, clsNameIdx);
  842. }
  843. void __compileFunctionArgs(_Func func, bool enableTypeHints){
  844. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  845. do {
  846. if(state == 3) syntaxError("**kwargs should be the last argument");
  847. matchNewLines();
  848. if(match(TK("*"))){
  849. if(state < 1) state = 1;
  850. else syntaxError("*args should be placed before **kwargs");
  851. }
  852. else if(match(TK("**"))){
  853. state = 3;
  854. }
  855. consume(TK("@id"));
  856. const _Str& name = parser->previous.str();
  857. if(func->hasName(name)) syntaxError("duplicate argument name");
  858. // eat type hints
  859. if(enableTypeHints && match(TK(":"))) consume(TK("@id"));
  860. if(state == 0 && peek() == TK("=")) state = 2;
  861. switch (state)
  862. {
  863. case 0: func->args.push_back(name); break;
  864. case 1: func->starredArg = name; state+=1; break;
  865. case 2: {
  866. consume(TK("="));
  867. PyVarOrNull value = readLiteral();
  868. if(value == nullptr){
  869. syntaxError(_Str("expect a literal, not ") + TK_STR(parser->current.type));
  870. }
  871. func->kwArgs[name] = value;
  872. func->kwArgsOrder.push_back(name);
  873. } break;
  874. case 3: syntaxError("**kwargs is not supported yet"); break;
  875. }
  876. } while (match(TK(",")));
  877. }
  878. void compileFunction(){
  879. if(isCompilingClass){
  880. if(match(TK("pass"))) return;
  881. consume(TK("def"));
  882. }
  883. _Func func = pkpy::make_shared<Function>();
  884. consume(TK("@id"));
  885. func->name = parser->previous.str();
  886. if (match(TK("(")) && !match(TK(")"))) {
  887. __compileFunctionArgs(func, true);
  888. consume(TK(")"));
  889. }
  890. // eat type hints
  891. if(match(TK("->"))) consume(TK("@id"));
  892. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  893. this->codes.push(func->code);
  894. compileBlockBody();
  895. this->codes.pop();
  896. emitCode(OP_LOAD_CONST, getCode()->addConst(vm->PyFunction(func)));
  897. if(!isCompilingClass) emitCode(OP_STORE_FUNCTION);
  898. }
  899. PyVarOrNull readLiteral(){
  900. if(match(TK("-"))){
  901. consume(TK("@num"));
  902. PyVar val = parser->previous.value;
  903. return vm->numNegated(val);
  904. }
  905. if(match(TK("@num"))) return parser->previous.value;
  906. if(match(TK("@str"))) return parser->previous.value;
  907. if(match(TK("True"))) return vm->PyBool(true);
  908. if(match(TK("False"))) return vm->PyBool(false);
  909. if(match(TK("None"))) return vm->None;
  910. if(match(TK("..."))) return vm->Ellipsis;
  911. return nullptr;
  912. }
  913. void compileTopLevelStatement() {
  914. if (match(TK("class"))) {
  915. compileClass();
  916. } else if (match(TK("def"))) {
  917. compileFunction();
  918. } else if (match(TK("import"))) {
  919. compileRegularImport();
  920. } else if (match(TK("from"))) {
  921. compileFromImport();
  922. } else {
  923. compileStatement();
  924. }
  925. }
  926. _Code __fillCode(){
  927. _Code code = pkpy::make_shared<CodeObject>(parser->src, _Str("<module>"));
  928. codes.push(code);
  929. // Lex initial tokens. current <-- next.
  930. lexToken();
  931. lexToken();
  932. matchNewLines();
  933. if(mode()==EVAL_MODE) {
  934. EXPR_TUPLE();
  935. consume(TK("@eof"));
  936. return code;
  937. }else if(mode()==JSON_MODE){
  938. PyVarOrNull value = readLiteral();
  939. if(value != nullptr) emitCode(OP_LOAD_CONST, code->addConst(value));
  940. else if(match(TK("{"))) exprMap();
  941. else if(match(TK("["))) exprList();
  942. else syntaxError("expect a JSON object or array");
  943. consume(TK("@eof"));
  944. return code;
  945. }
  946. while (!match(TK("@eof"))) {
  947. compileTopLevelStatement();
  948. matchNewLines();
  949. }
  950. return code;
  951. }
  952. /***** Error Reporter *****/
  953. _Str getLineSnapshot(){
  954. int lineno = parser->current.line;
  955. const char* cursor = parser->current.start;
  956. // if error occurs in lexing, lineno should be `parser->current_line`
  957. if(lexingCnt > 0){
  958. lineno = parser->current_line;
  959. cursor = parser->current_char;
  960. }
  961. if(parser->peekChar() == '\n') lineno--;
  962. return parser->src->snapshot(lineno, cursor);
  963. }
  964. void syntaxError(_Str msg){ throw CompileError("SyntaxError", msg, getLineSnapshot()); }
  965. void indentationError(_Str msg){ throw CompileError("IndentationError", msg, getLineSnapshot()); }
  966. void unexpectedError(_Str msg){ throw CompileError("UnexpectedError", msg, getLineSnapshot()); }
  967. };