compiler.h 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
  14. class Compiler {
  15. public:
  16. std::unique_ptr<Parser> parser;
  17. std::stack<_Code> codes;
  18. bool isCompilingClass = false;
  19. int lexingCnt = 0;
  20. VM* vm;
  21. emhash8::HashMap<_TokenType, GrammarRule> rules;
  22. _Code co() const{ return codes.top(); }
  23. CompileMode mode() const{ return parser->src->mode;}
  24. Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
  25. this->vm = vm;
  26. this->parser = std::make_unique<Parser>(
  27. pkpy::make_shared<SourceMetadata>(source, filename, mode)
  28. );
  29. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  30. #define METHOD(name) &Compiler::name
  31. #define NO_INFIX nullptr, PREC_NONE
  32. for(_TokenType i=0; i<__TOKENS_LEN; i++) rules[i] = { nullptr, NO_INFIX };
  33. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  34. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  35. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  36. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  37. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  38. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  39. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  40. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  41. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  42. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  43. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  44. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  45. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  46. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  47. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  48. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  49. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  50. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  51. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  52. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  53. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  54. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  55. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  56. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  57. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  58. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  59. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  60. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  61. rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
  62. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  63. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  64. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  65. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  66. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  67. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  68. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  69. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  70. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  71. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  72. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  73. rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  74. rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  75. rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  76. rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  77. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  78. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  79. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  80. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  81. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  82. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  83. #undef METHOD
  84. #undef NO_INFIX
  85. #define EXPR() parsePrecedence(PREC_TERNARY) // no '=' and ',' just a simple expression
  86. #define EXPR_TUPLE() parsePrecedence(PREC_COMMA) // no '=', but ',' is allowed
  87. #define EXPR_ANY() parsePrecedence(PREC_ASSIGNMENT)
  88. }
  89. _Str eatStringUntil(char quote, bool raw) {
  90. bool quote3 = parser->match_n_chars(2, quote);
  91. std::vector<char> buff;
  92. while (true) {
  93. char c = parser->eatchar_include_newline();
  94. if (c == quote){
  95. if(quote3 && !parser->match_n_chars(2, quote)){
  96. buff.push_back(c);
  97. continue;
  98. }
  99. break;
  100. }
  101. if (c == '\0'){
  102. if(quote3 && parser->src->mode == SINGLE_MODE){
  103. throw NeedMoreLines(false);
  104. }
  105. syntaxError("EOL while scanning string literal");
  106. }
  107. if (c == '\n'){
  108. if(!quote3) syntaxError("EOL while scanning string literal");
  109. else{
  110. buff.push_back(c);
  111. continue;
  112. }
  113. }
  114. if (!raw && c == '\\') {
  115. switch (parser->eatchar_include_newline()) {
  116. case '"': buff.push_back('"'); break;
  117. case '\'': buff.push_back('\''); break;
  118. case '\\': buff.push_back('\\'); break;
  119. case 'n': buff.push_back('\n'); break;
  120. case 'r': buff.push_back('\r'); break;
  121. case 't': buff.push_back('\t'); break;
  122. default: syntaxError("invalid escape character");
  123. }
  124. } else {
  125. buff.push_back(c);
  126. }
  127. }
  128. return _Str(buff.data(), buff.size());
  129. }
  130. void eatString(char quote, StringType type) {
  131. _Str s = eatStringUntil(quote, type == RAW_STRING);
  132. if(type == F_STRING){
  133. parser->set_next_token(TK("@fstr"), vm->PyStr(s));
  134. }else{
  135. parser->set_next_token(TK("@str"), vm->PyStr(s));
  136. }
  137. }
  138. void eatNumber() {
  139. static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
  140. std::smatch m;
  141. const char* i = parser->token_start;
  142. while(*i != '\n' && *i != '\0') i++;
  143. std::string s = std::string(parser->token_start, i);
  144. try{
  145. if (std::regex_search(s, m, pattern)) {
  146. // here is m.length()-1, since the first char was eaten by lexToken()
  147. for(int j=0; j<m.length()-1; j++) parser->eatchar();
  148. int base = 10;
  149. size_t size;
  150. if (m[1].matched) base = 16;
  151. if (m[2].matched) {
  152. if(base == 16) syntaxError("hex literal should not contain a dot");
  153. parser->set_next_token(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  154. } else {
  155. parser->set_next_token(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  156. }
  157. if (size != m.length()) throw std::runtime_error("length mismatch");
  158. }
  159. }catch(std::exception& _){
  160. syntaxError("invalid number literal");
  161. }
  162. }
  163. void lexToken(){
  164. lexingCnt++;
  165. _lexToken();
  166. lexingCnt--;
  167. }
  168. // Lex the next token and set it as the next token.
  169. void _lexToken() {
  170. parser->prev = parser->curr;
  171. parser->curr = parser->next_token();
  172. //_Str _info = parser->curr.info(); std::cout << _info << '[' << parser->current_line << ']' << std::endl;
  173. while (parser->peekchar() != '\0') {
  174. parser->token_start = parser->curr_char;
  175. char c = parser->eatchar_include_newline();
  176. switch (c) {
  177. case '\'': case '"': eatString(c, NORMAL_STRING); return;
  178. case '#': parser->skip_line_comment(); break;
  179. case '{': parser->set_next_token(TK("{")); return;
  180. case '}': parser->set_next_token(TK("}")); return;
  181. case ',': parser->set_next_token(TK(",")); return;
  182. case ':': parser->set_next_token(TK(":")); return;
  183. case ';': parser->set_next_token(TK(";")); return;
  184. case '(': parser->set_next_token(TK("(")); return;
  185. case ')': parser->set_next_token(TK(")")); return;
  186. case '[': parser->set_next_token(TK("[")); return;
  187. case ']': parser->set_next_token(TK("]")); return;
  188. case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return;
  189. case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return;
  190. case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return;
  191. case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return;
  192. case '?': parser->set_next_token(TK("?")); return;
  193. case '.': {
  194. if(parser->matchchar('.')) {
  195. if(parser->matchchar('.')) {
  196. parser->set_next_token(TK("..."));
  197. } else {
  198. syntaxError("invalid token '..'");
  199. }
  200. } else {
  201. parser->set_next_token(TK("."));
  202. }
  203. return;
  204. }
  205. case '=': parser->set_next_token_2('=', TK("="), TK("==")); return;
  206. case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return;
  207. case '>': {
  208. if(parser->matchchar('=')) parser->set_next_token(TK(">="));
  209. else if(parser->matchchar('>')) parser->set_next_token(TK(">>"));
  210. else parser->set_next_token(TK(">"));
  211. return;
  212. }
  213. case '<': {
  214. if(parser->matchchar('=')) parser->set_next_token(TK("<="));
  215. else if(parser->matchchar('<')) parser->set_next_token(TK("<<"));
  216. else parser->set_next_token(TK("<"));
  217. return;
  218. }
  219. case '-': {
  220. if(parser->matchchar('=')) parser->set_next_token(TK("-="));
  221. else if(parser->matchchar('>')) parser->set_next_token(TK("->"));
  222. else parser->set_next_token(TK("-"));
  223. return;
  224. }
  225. case '!':
  226. if(parser->matchchar('=')) parser->set_next_token(TK("!="));
  227. else syntaxError("expected '=' after '!'");
  228. break;
  229. case '*':
  230. if (parser->matchchar('*')) {
  231. parser->set_next_token(TK("**")); // '**'
  232. } else {
  233. parser->set_next_token_2('=', TK("*"), TK("*="));
  234. }
  235. return;
  236. case '/':
  237. if(parser->matchchar('/')) {
  238. parser->set_next_token_2('=', TK("//"), TK("//="));
  239. } else {
  240. parser->set_next_token_2('=', TK("/"), TK("/="));
  241. }
  242. return;
  243. case '\r': break; // just ignore '\r'
  244. case ' ': case '\t': parser->eat_spaces(); break;
  245. case '\n': {
  246. parser->set_next_token(TK("@eol"));
  247. if(!parser->eat_indentation()) indentationError("unindent does not match any outer indentation level");
  248. return;
  249. }
  250. default: {
  251. if(c == 'f'){
  252. if(parser->matchchar('\'')) {eatString('\'', F_STRING); return;}
  253. if(parser->matchchar('"')) {eatString('"', F_STRING); return;}
  254. }else if(c == 'r'){
  255. if(parser->matchchar('\'')) {eatString('\'', RAW_STRING); return;}
  256. if(parser->matchchar('"')) {eatString('"', RAW_STRING); return;}
  257. }
  258. if (c >= '0' && c <= '9') {
  259. eatNumber();
  260. return;
  261. }
  262. switch (parser->eat_name())
  263. {
  264. case 0: break;
  265. case 1: syntaxError("invalid char: " + std::string(1, c));
  266. case 2: syntaxError("invalid utf8 sequence: " + std::string(1, c));
  267. case 3: syntaxError("@id contains invalid char"); break;
  268. case 4: syntaxError("invalid JSON token"); break;
  269. default: UNREACHABLE();
  270. }
  271. return;
  272. }
  273. }
  274. }
  275. parser->token_start = parser->curr_char;
  276. parser->set_next_token(TK("@eof"));
  277. }
  278. inline _TokenType peek() {
  279. return parser->curr.type;
  280. }
  281. // not sure this will work
  282. _TokenType peek_next() {
  283. if(parser->nexts.empty()) return TK("@eof");
  284. return parser->nexts.front().type;
  285. }
  286. bool match(_TokenType expected) {
  287. if (peek() != expected) return false;
  288. lexToken();
  289. return true;
  290. }
  291. void consume(_TokenType expected) {
  292. if (!match(expected)){
  293. _StrStream ss;
  294. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  295. syntaxError(ss.str());
  296. }
  297. }
  298. bool matchNewLines(bool repl_throw=false) {
  299. bool consumed = false;
  300. if (peek() == TK("@eol")) {
  301. while (peek() == TK("@eol")) lexToken();
  302. consumed = true;
  303. }
  304. if (repl_throw && peek() == TK("@eof")){
  305. throw NeedMoreLines(isCompilingClass);
  306. }
  307. return consumed;
  308. }
  309. bool matchEndStatement() {
  310. if (match(TK(";"))) { matchNewLines(); return true; }
  311. if (matchNewLines() || peek()==TK("@eof")) return true;
  312. if (peek() == TK("@dedent")) return true;
  313. return false;
  314. }
  315. void consumeEndStatement() {
  316. if (!matchEndStatement()) syntaxError("expected statement end");
  317. }
  318. void exprLiteral() {
  319. PyVar value = parser->prev.value;
  320. int index = co()->add_const(value);
  321. emit(OP_LOAD_CONST, index);
  322. }
  323. void exprFString() {
  324. static const std::regex pattern(R"(\{(.*?)\})");
  325. PyVar value = parser->prev.value;
  326. _Str s = vm->PyStr_AS_C(value);
  327. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  328. std::sregex_iterator end;
  329. int size = 0;
  330. int i = 0;
  331. for(auto it = begin; it != end; it++) {
  332. std::smatch m = *it;
  333. if (i < m.position()) {
  334. std::string literal = s.substr(i, m.position() - i);
  335. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  336. size++;
  337. }
  338. emit(OP_LOAD_EVAL_FN);
  339. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(m[1].str())));
  340. emit(OP_CALL, 1);
  341. size++;
  342. i = (int)(m.position() + m.length());
  343. }
  344. if (i < s.size()) {
  345. std::string literal = s.substr(i, s.size() - i);
  346. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  347. size++;
  348. }
  349. emit(OP_BUILD_STRING, size);
  350. }
  351. void exprLambda() {
  352. _Func func = pkpy::make_shared<Function>();
  353. func->name = "<lambda>";
  354. if(!match(TK(":"))){
  355. __compileFunctionArgs(func, false);
  356. consume(TK(":"));
  357. }
  358. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  359. this->codes.push(func->code);
  360. EXPR_TUPLE();
  361. emit(OP_RETURN_VALUE);
  362. func->code->optimize();
  363. this->codes.pop();
  364. emit(OP_LOAD_LAMBDA, co()->add_const(vm->PyFunction(func)));
  365. }
  366. void exprAssign() {
  367. _TokenType op = parser->prev.type;
  368. if(op == TK("=")) { // a = (expr)
  369. EXPR_TUPLE();
  370. emit(OP_STORE_REF);
  371. }else{ // a += (expr) -> a = a + (expr)
  372. emit(OP_DUP_TOP);
  373. EXPR();
  374. switch (op) {
  375. case TK("+="): emit(OP_BINARY_OP, 0); break;
  376. case TK("-="): emit(OP_BINARY_OP, 1); break;
  377. case TK("*="): emit(OP_BINARY_OP, 2); break;
  378. case TK("/="): emit(OP_BINARY_OP, 3); break;
  379. case TK("//="): emit(OP_BINARY_OP, 4); break;
  380. case TK("%="): emit(OP_BINARY_OP, 5); break;
  381. case TK("&="): emit(OP_BITWISE_OP, 2); break;
  382. case TK("|="): emit(OP_BITWISE_OP, 3); break;
  383. case TK("^="): emit(OP_BITWISE_OP, 4); break;
  384. default: UNREACHABLE();
  385. }
  386. emit(OP_STORE_REF);
  387. }
  388. }
  389. void exprComma() {
  390. int size = 1; // an expr is in the stack now
  391. do {
  392. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  393. size++;
  394. } while(match(TK(",")));
  395. emit(OP_BUILD_SMART_TUPLE, size);
  396. }
  397. void exprOr() {
  398. int patch = emit(OP_JUMP_IF_TRUE_OR_POP);
  399. parsePrecedence(PREC_LOGICAL_OR);
  400. patch_jump(patch);
  401. }
  402. void exprAnd() {
  403. int patch = emit(OP_JUMP_IF_FALSE_OR_POP);
  404. parsePrecedence(PREC_LOGICAL_AND);
  405. patch_jump(patch);
  406. }
  407. void exprTernary() {
  408. int patch = emit(OP_POP_JUMP_IF_FALSE);
  409. EXPR(); // if true
  410. int patch2 = emit(OP_JUMP_ABSOLUTE);
  411. consume(TK(":"));
  412. patch_jump(patch);
  413. EXPR(); // if false
  414. patch_jump(patch2);
  415. }
  416. void exprBinaryOp() {
  417. _TokenType op = parser->prev.type;
  418. parsePrecedence((Precedence)(rules[op].precedence + 1));
  419. switch (op) {
  420. case TK("+"): emit(OP_BINARY_OP, 0); break;
  421. case TK("-"): emit(OP_BINARY_OP, 1); break;
  422. case TK("*"): emit(OP_BINARY_OP, 2); break;
  423. case TK("/"): emit(OP_BINARY_OP, 3); break;
  424. case TK("//"): emit(OP_BINARY_OP, 4); break;
  425. case TK("%"): emit(OP_BINARY_OP, 5); break;
  426. case TK("**"): emit(OP_BINARY_OP, 6); break;
  427. case TK("<"): emit(OP_COMPARE_OP, 0); break;
  428. case TK("<="): emit(OP_COMPARE_OP, 1); break;
  429. case TK("=="): emit(OP_COMPARE_OP, 2); break;
  430. case TK("!="): emit(OP_COMPARE_OP, 3); break;
  431. case TK(">"): emit(OP_COMPARE_OP, 4); break;
  432. case TK(">="): emit(OP_COMPARE_OP, 5); break;
  433. case TK("in"): emit(OP_CONTAINS_OP, 0); break;
  434. case TK("not in"): emit(OP_CONTAINS_OP, 1); break;
  435. case TK("is"): emit(OP_IS_OP, 0); break;
  436. case TK("is not"): emit(OP_IS_OP, 1); break;
  437. case TK("<<"): emit(OP_BITWISE_OP, 0); break;
  438. case TK(">>"): emit(OP_BITWISE_OP, 1); break;
  439. case TK("&"): emit(OP_BITWISE_OP, 2); break;
  440. case TK("|"): emit(OP_BITWISE_OP, 3); break;
  441. case TK("^"): emit(OP_BITWISE_OP, 4); break;
  442. default: UNREACHABLE();
  443. }
  444. }
  445. void exprUnaryOp() {
  446. _TokenType op = parser->prev.type;
  447. parsePrecedence((Precedence)(PREC_UNARY + 1));
  448. switch (op) {
  449. case TK("-"): emit(OP_UNARY_NEGATIVE); break;
  450. case TK("not"): emit(OP_UNARY_NOT); break;
  451. case TK("*"): syntaxError("cannot use '*' as unary operator"); break;
  452. default: UNREACHABLE();
  453. }
  454. }
  455. void exprGrouping() {
  456. matchNewLines(mode()==SINGLE_MODE);
  457. EXPR_TUPLE();
  458. matchNewLines(mode()==SINGLE_MODE);
  459. consume(TK(")"));
  460. }
  461. void exprList() {
  462. int _patch = emit(OP_NO_OP);
  463. int _body_start = co()->co_code.size();
  464. int ARGC = 0;
  465. do {
  466. matchNewLines(mode()==SINGLE_MODE);
  467. if (peek() == TK("]")) break;
  468. EXPR(); ARGC++;
  469. matchNewLines(mode()==SINGLE_MODE);
  470. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  471. } while (match(TK(",")));
  472. matchNewLines(mode()==SINGLE_MODE);
  473. consume(TK("]"));
  474. emit(OP_BUILD_LIST, ARGC);
  475. return;
  476. __LISTCOMP:
  477. int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  478. int _body_end = co()->co_code.size();
  479. co()->co_code[_patch].op = OP_JUMP_ABSOLUTE;
  480. co()->co_code[_patch].arg = _body_end;
  481. emit(OP_BUILD_LIST, 0);
  482. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  483. matchNewLines(mode()==SINGLE_MODE);
  484. int _skipPatch = emit(OP_JUMP_ABSOLUTE);
  485. int _cond_start = co()->co_code.size();
  486. int _cond_end_return = -1;
  487. if(match(TK("if"))) {
  488. EXPR_TUPLE();
  489. _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  490. }
  491. patch_jump(_skipPatch);
  492. emit(OP_GET_ITER);
  493. co()->__enterBlock(FOR_LOOP);
  494. emit(OP_FOR_ITER);
  495. if(_cond_end_return != -1) { // there is an if condition
  496. emit(OP_JUMP_ABSOLUTE, _cond_start);
  497. patch_jump(_cond_end_return);
  498. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  499. emit(OP_JUMP_ABSOLUTE, _body_start);
  500. patch_jump(_body_end_return);
  501. emit(OP_LIST_APPEND);
  502. patch_jump(ifpatch);
  503. }else{
  504. emit(OP_JUMP_ABSOLUTE, _body_start);
  505. patch_jump(_body_end_return);
  506. emit(OP_LIST_APPEND);
  507. }
  508. emit(OP_LOOP_CONTINUE, -1, true);
  509. co()->__exitBlock();
  510. matchNewLines(mode()==SINGLE_MODE);
  511. consume(TK("]"));
  512. }
  513. void exprMap() {
  514. bool parsing_dict = false;
  515. int size = 0;
  516. do {
  517. matchNewLines(mode()==SINGLE_MODE);
  518. if (peek() == TK("}")) break;
  519. EXPR();
  520. if(peek() == TK(":")) parsing_dict = true;
  521. if(parsing_dict){
  522. consume(TK(":"));
  523. EXPR();
  524. }
  525. size++;
  526. matchNewLines(mode()==SINGLE_MODE);
  527. } while (match(TK(",")));
  528. consume(TK("}"));
  529. if(size == 0 || parsing_dict) emit(OP_BUILD_MAP, size);
  530. else emit(OP_BUILD_SET, size);
  531. }
  532. void exprCall() {
  533. int ARGC = 0;
  534. int KWARGC = 0;
  535. do {
  536. matchNewLines(mode()==SINGLE_MODE);
  537. if (peek() == TK(")")) break;
  538. if(peek() == TK("@id") && peek_next() == TK("=")) {
  539. consume(TK("@id"));
  540. const _Str& key = parser->prev.str();
  541. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(key)));
  542. consume(TK("="));
  543. EXPR();
  544. KWARGC++;
  545. } else{
  546. if(KWARGC > 0) syntaxError("positional argument follows keyword argument");
  547. EXPR();
  548. ARGC++;
  549. }
  550. matchNewLines(mode()==SINGLE_MODE);
  551. } while (match(TK(",")));
  552. consume(TK(")"));
  553. emit(OP_CALL, (KWARGC << 16) | ARGC);
  554. }
  555. void exprName() {
  556. Token tkname = parser->prev;
  557. int index = co()->add_name(
  558. tkname.str(),
  559. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  560. );
  561. emit(OP_LOAD_NAME_REF, index);
  562. }
  563. void exprAttrib() {
  564. consume(TK("@id"));
  565. const _Str& name = parser->prev.str();
  566. int index = co()->add_name(name, NAME_ATTR);
  567. emit(OP_BUILD_ATTR_REF, index);
  568. }
  569. // [:], [:b]
  570. // [a], [a:], [a:b]
  571. void exprSubscript() {
  572. if(match(TK(":"))){
  573. emit(OP_LOAD_NONE);
  574. if(match(TK("]"))){
  575. emit(OP_LOAD_NONE);
  576. }else{
  577. EXPR_TUPLE();
  578. consume(TK("]"));
  579. }
  580. emit(OP_BUILD_SLICE);
  581. }else{
  582. EXPR_TUPLE();
  583. if(match(TK(":"))){
  584. if(match(TK("]"))){
  585. emit(OP_LOAD_NONE);
  586. }else{
  587. EXPR_TUPLE();
  588. consume(TK("]"));
  589. }
  590. emit(OP_BUILD_SLICE);
  591. }else{
  592. consume(TK("]"));
  593. }
  594. }
  595. emit(OP_BUILD_INDEX_REF);
  596. }
  597. void exprValue() {
  598. _TokenType op = parser->prev.type;
  599. switch (op) {
  600. case TK("None"): emit(OP_LOAD_NONE); break;
  601. case TK("True"): emit(OP_LOAD_TRUE); break;
  602. case TK("False"): emit(OP_LOAD_FALSE); break;
  603. case TK("..."): emit(OP_LOAD_ELLIPSIS); break;
  604. default: UNREACHABLE();
  605. }
  606. }
  607. int emit(Opcode opcode, int arg=-1, bool keepline=false) {
  608. int line = parser->prev.line;
  609. co()->co_code.push_back(
  610. Bytecode{(uint8_t)opcode, arg, line, (uint16_t)co()->_currBlockIndex}
  611. );
  612. int i = co()->co_code.size() - 1;
  613. if(keepline && i>=1) co()->co_code[i].line = co()->co_code[i-1].line;
  614. return i;
  615. }
  616. inline void patch_jump(int addr_index) {
  617. int target = co()->co_code.size();
  618. co()->co_code[addr_index].arg = target;
  619. }
  620. void compileBlockBody(){
  621. __compileBlockBody(&Compiler::compileStatement);
  622. }
  623. void __compileBlockBody(CompilerAction action) {
  624. consume(TK(":"));
  625. if(!matchNewLines(mode()==SINGLE_MODE)){
  626. syntaxError("expected a new line after ':'");
  627. }
  628. consume(TK("@indent"));
  629. while (peek() != TK("@dedent")) {
  630. matchNewLines();
  631. (this->*action)();
  632. matchNewLines();
  633. }
  634. consume(TK("@dedent"));
  635. }
  636. Token compileImportPath() {
  637. consume(TK("@id"));
  638. Token tkmodule = parser->prev;
  639. int index = co()->add_name(tkmodule.str(), NAME_GLOBAL);
  640. emit(OP_IMPORT_NAME, index);
  641. return tkmodule;
  642. }
  643. // import a as b
  644. void compileRegularImport() {
  645. do {
  646. Token tkmodule = compileImportPath();
  647. if (match(TK("as"))) {
  648. consume(TK("@id"));
  649. tkmodule = parser->prev;
  650. }
  651. int index = co()->add_name(tkmodule.str(), NAME_GLOBAL);
  652. emit(OP_STORE_NAME, index);
  653. } while (match(TK(",")));
  654. consumeEndStatement();
  655. }
  656. // from a import b as c, d as e
  657. void compileFromImport() {
  658. Token tkmodule = compileImportPath();
  659. consume(TK("import"));
  660. do {
  661. emit(OP_DUP_TOP);
  662. consume(TK("@id"));
  663. Token tkname = parser->prev;
  664. int index = co()->add_name(tkname.str(), NAME_GLOBAL);
  665. emit(OP_BUILD_ATTR_REF, index);
  666. if (match(TK("as"))) {
  667. consume(TK("@id"));
  668. tkname = parser->prev;
  669. }
  670. index = co()->add_name(tkname.str(), NAME_GLOBAL);
  671. emit(OP_STORE_NAME, index);
  672. } while (match(TK(",")));
  673. emit(OP_POP_TOP);
  674. consumeEndStatement();
  675. }
  676. void parsePrecedence(Precedence precedence) {
  677. lexToken();
  678. GrammarFn prefix = rules[parser->prev.type].prefix;
  679. if (prefix == nullptr) syntaxError(_Str("expected an expression, but got ") + TK_STR(parser->prev.type));
  680. (this->*prefix)();
  681. while (rules[peek()].precedence >= precedence) {
  682. lexToken();
  683. _TokenType op = parser->prev.type;
  684. GrammarFn infix = rules[op].infix;
  685. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  686. (this->*infix)();
  687. }
  688. }
  689. void compileIfStatement() {
  690. matchNewLines();
  691. EXPR_TUPLE();
  692. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  693. compileBlockBody();
  694. if (match(TK("elif"))) {
  695. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  696. patch_jump(ifpatch);
  697. compileIfStatement();
  698. patch_jump(exit_jump);
  699. } else if (match(TK("else"))) {
  700. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  701. patch_jump(ifpatch);
  702. compileBlockBody();
  703. patch_jump(exit_jump);
  704. } else {
  705. patch_jump(ifpatch);
  706. }
  707. }
  708. void compileWhileLoop() {
  709. co()->__enterBlock(WHILE_LOOP);
  710. EXPR_TUPLE();
  711. int patch = emit(OP_POP_JUMP_IF_FALSE);
  712. compileBlockBody();
  713. emit(OP_LOOP_CONTINUE, -1, true);
  714. patch_jump(patch);
  715. co()->__exitBlock();
  716. }
  717. void EXPR_FOR_VARS(){
  718. int size = 0;
  719. do {
  720. consume(TK("@id"));
  721. exprName(); size++;
  722. } while (match(TK(",")));
  723. if(size > 1) emit(OP_BUILD_SMART_TUPLE, size);
  724. }
  725. void compileForLoop() {
  726. EXPR_FOR_VARS();consume(TK("in")); EXPR_TUPLE();
  727. emit(OP_GET_ITER);
  728. co()->__enterBlock(FOR_LOOP);
  729. emit(OP_FOR_ITER);
  730. compileBlockBody();
  731. emit(OP_LOOP_CONTINUE, -1, true);
  732. co()->__exitBlock();
  733. }
  734. void compileTryExcept() {
  735. co()->__enterBlock(TRY_EXCEPT);
  736. compileBlockBody();
  737. int patch = emit(OP_JUMP_ABSOLUTE);
  738. co()->__exitBlock();
  739. consume(TK("except"));
  740. if(match(TK("@id"))){ // exception name
  741. compileBlockBody();
  742. }
  743. if(match(TK("finally"))){
  744. consume(TK(":"));
  745. syntaxError("finally is not supported yet");
  746. }
  747. patch_jump(patch);
  748. }
  749. void compileStatement() {
  750. if (match(TK("break"))) {
  751. if (!co()->__isCurrBlockLoop()) syntaxError("'break' outside loop");
  752. consumeEndStatement();
  753. emit(OP_LOOP_BREAK);
  754. } else if (match(TK("continue"))) {
  755. if (!co()->__isCurrBlockLoop()) syntaxError("'continue' not properly in loop");
  756. consumeEndStatement();
  757. emit(OP_LOOP_CONTINUE);
  758. } else if (match(TK("return"))) {
  759. if (codes.size() == 1)
  760. syntaxError("'return' outside function");
  761. if(matchEndStatement()){
  762. emit(OP_LOAD_NONE);
  763. }else{
  764. EXPR_TUPLE();
  765. consumeEndStatement();
  766. }
  767. emit(OP_RETURN_VALUE, -1, true);
  768. } else if (match(TK("if"))) {
  769. compileIfStatement();
  770. } else if (match(TK("while"))) {
  771. compileWhileLoop();
  772. } else if (match(TK("for"))) {
  773. compileForLoop();
  774. } else if (match(TK("try"))) {
  775. compileTryExcept();
  776. }else if(match(TK("assert"))){
  777. EXPR();
  778. emit(OP_ASSERT);
  779. consumeEndStatement();
  780. } else if(match(TK("with"))){
  781. EXPR();
  782. consume(TK("as"));
  783. consume(TK("@id"));
  784. Token tkname = parser->prev;
  785. int index = co()->add_name(
  786. tkname.str(),
  787. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  788. );
  789. emit(OP_STORE_NAME, index);
  790. emit(OP_LOAD_NAME_REF, index);
  791. emit(OP_WITH_ENTER);
  792. compileBlockBody();
  793. emit(OP_LOAD_NAME_REF, index);
  794. emit(OP_WITH_EXIT);
  795. } else if(match(TK("label"))){
  796. if(mode() != EXEC_MODE) syntaxError("'label' is only available in EXEC_MODE");
  797. consume(TK(".")); consume(TK("@id"));
  798. co()->add_label(parser->prev.str());
  799. consumeEndStatement();
  800. } else if(match(TK("goto"))){
  801. // https://entrian.com/goto/
  802. if(mode() != EXEC_MODE) syntaxError("'goto' is only available in EXEC_MODE");
  803. consume(TK(".")); consume(TK("@id"));
  804. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(parser->prev.str())));
  805. emit(OP_GOTO);
  806. consumeEndStatement();
  807. } else if(match(TK("raise"))){
  808. consume(TK("@id")); // dummy exception type
  809. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(parser->prev.str())));
  810. if(match(TK("("))){
  811. EXPR();
  812. consume(TK(")"));
  813. }else{
  814. emit(OP_LOAD_NONE); // ...?
  815. }
  816. emit(OP_RAISE_ERROR);
  817. consumeEndStatement();
  818. } else if(match(TK("del"))){
  819. EXPR();
  820. emit(OP_DELETE_REF);
  821. consumeEndStatement();
  822. } else if(match(TK("global"))){
  823. do {
  824. consume(TK("@id"));
  825. co()->co_global_names.push_back(parser->prev.str());
  826. } while (match(TK(",")));
  827. consumeEndStatement();
  828. } else if(match(TK("pass"))){
  829. consumeEndStatement();
  830. } else {
  831. EXPR_ANY();
  832. consumeEndStatement();
  833. // If last op is not an assignment, pop the result.
  834. uint8_t lastOp = co()->co_code.back().op;
  835. if( lastOp!=OP_STORE_NAME && lastOp!=OP_STORE_REF){
  836. if(mode()==SINGLE_MODE && parser->indents.top()==0) emit(OP_PRINT_EXPR);
  837. emit(OP_POP_TOP);
  838. }
  839. }
  840. }
  841. void compileClass(){
  842. consume(TK("@id"));
  843. int clsNameIdx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  844. int superClsNameIdx = -1;
  845. if(match(TK("("))){
  846. consume(TK("@id"));
  847. superClsNameIdx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  848. consume(TK(")"));
  849. }
  850. emit(OP_LOAD_NONE);
  851. isCompilingClass = true;
  852. __compileBlockBody(&Compiler::compileFunction);
  853. isCompilingClass = false;
  854. if(superClsNameIdx == -1) emit(OP_LOAD_NONE);
  855. else emit(OP_LOAD_NAME_REF, superClsNameIdx);
  856. emit(OP_BUILD_CLASS, clsNameIdx);
  857. }
  858. void __compileFunctionArgs(_Func func, bool enableTypeHints){
  859. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  860. do {
  861. if(state == 3) syntaxError("**kwargs should be the last argument");
  862. matchNewLines();
  863. if(match(TK("*"))){
  864. if(state < 1) state = 1;
  865. else syntaxError("*args should be placed before **kwargs");
  866. }
  867. else if(match(TK("**"))){
  868. state = 3;
  869. }
  870. consume(TK("@id"));
  871. const _Str& name = parser->prev.str();
  872. if(func->hasName(name)) syntaxError("duplicate argument name");
  873. // eat type hints
  874. if(enableTypeHints && match(TK(":"))) consume(TK("@id"));
  875. if(state == 0 && peek() == TK("=")) state = 2;
  876. switch (state)
  877. {
  878. case 0: func->args.push_back(name); break;
  879. case 1: func->starredArg = name; state+=1; break;
  880. case 2: {
  881. consume(TK("="));
  882. PyVarOrNull value = readLiteral();
  883. if(value == nullptr){
  884. syntaxError(_Str("expect a literal, not ") + TK_STR(parser->curr.type));
  885. }
  886. func->kwArgs[name] = value;
  887. func->kwArgsOrder.push_back(name);
  888. } break;
  889. case 3: syntaxError("**kwargs is not supported yet"); break;
  890. }
  891. } while (match(TK(",")));
  892. }
  893. void compileFunction(){
  894. if(isCompilingClass){
  895. if(match(TK("pass"))) return;
  896. consume(TK("def"));
  897. }
  898. _Func func = pkpy::make_shared<Function>();
  899. consume(TK("@id"));
  900. func->name = parser->prev.str();
  901. if (match(TK("(")) && !match(TK(")"))) {
  902. __compileFunctionArgs(func, true);
  903. consume(TK(")"));
  904. }
  905. // eat type hints
  906. if(match(TK("->"))) consume(TK("@id"));
  907. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  908. this->codes.push(func->code);
  909. compileBlockBody();
  910. func->code->optimize();
  911. this->codes.pop();
  912. emit(OP_LOAD_CONST, co()->add_const(vm->PyFunction(func)));
  913. if(!isCompilingClass) emit(OP_STORE_FUNCTION);
  914. }
  915. PyVarOrNull readLiteral(){
  916. if(match(TK("-"))){
  917. consume(TK("@num"));
  918. PyVar val = parser->prev.value;
  919. return vm->num_negated(val);
  920. }
  921. if(match(TK("@num"))) return parser->prev.value;
  922. if(match(TK("@str"))) return parser->prev.value;
  923. if(match(TK("True"))) return vm->PyBool(true);
  924. if(match(TK("False"))) return vm->PyBool(false);
  925. if(match(TK("None"))) return vm->None;
  926. if(match(TK("..."))) return vm->Ellipsis;
  927. return nullptr;
  928. }
  929. void compileTopLevelStatement() {
  930. if (match(TK("class"))) {
  931. compileClass();
  932. } else if (match(TK("def"))) {
  933. compileFunction();
  934. } else if (match(TK("import"))) {
  935. compileRegularImport();
  936. } else if (match(TK("from"))) {
  937. compileFromImport();
  938. } else {
  939. compileStatement();
  940. }
  941. }
  942. bool _used = false;
  943. _Code __fillCode(){
  944. // can only be called once
  945. if(_used) UNREACHABLE();
  946. _used = true;
  947. _Code code = pkpy::make_shared<CodeObject>(parser->src, _Str("<module>"));
  948. codes.push(code);
  949. // Lex initial tokens. current <-- next.
  950. lexToken();
  951. lexToken();
  952. matchNewLines();
  953. if(mode()==EVAL_MODE) {
  954. EXPR_TUPLE();
  955. consume(TK("@eof"));
  956. code->optimize();
  957. return code;
  958. }else if(mode()==JSON_MODE){
  959. PyVarOrNull value = readLiteral();
  960. if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value));
  961. else if(match(TK("{"))) exprMap();
  962. else if(match(TK("["))) exprList();
  963. else syntaxError("expect a JSON object or array");
  964. consume(TK("@eof"));
  965. return code; // no need to optimize for JSON decoding
  966. }
  967. while (!match(TK("@eof"))) {
  968. compileTopLevelStatement();
  969. matchNewLines();
  970. }
  971. code->optimize();
  972. return code;
  973. }
  974. /***** Error Reporter *****/
  975. _Str getLineSnapshot(){
  976. int lineno = parser->curr.line;
  977. const char* cursor = parser->curr.start;
  978. // if error occurs in lexing, lineno should be `parser->current_line`
  979. if(lexingCnt > 0){
  980. lineno = parser->current_line;
  981. cursor = parser->curr_char;
  982. }
  983. if(parser->peekchar() == '\n') lineno--;
  984. return parser->src->snapshot(lineno, cursor);
  985. }
  986. void syntaxError(_Str msg){ throw CompileError("SyntaxError", msg, getLineSnapshot()); }
  987. void indentationError(_Str msg){ throw CompileError("IndentationError", msg, getLineSnapshot()); }
  988. void unexpectedError(_Str msg){ throw CompileError("UnexpectedError", msg, getLineSnapshot()); }
  989. };