compiler.h 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088
  1. #pragma once
  2. #include "parser.h"
  3. #include "error.h"
  4. #include "vm.h"
  5. class Compiler;
  6. typedef void (Compiler::*GrammarFn)();
  7. typedef void (Compiler::*CompilerAction)();
  8. struct GrammarRule{
  9. GrammarFn prefix;
  10. GrammarFn infix;
  11. Precedence precedence;
  12. };
  13. enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
  14. class Compiler {
  15. std::unique_ptr<Parser> parser;
  16. std::stack<_Code> codes;
  17. bool is_compiling_class = false;
  18. int lexing_count = 0;
  19. bool used = false;
  20. VM* vm;
  21. emhash8::HashMap<TokenIndex, GrammarRule> rules;
  22. _Code co() const{ return codes.top(); }
  23. CompileMode mode() const{ return parser->src->mode; }
  24. public:
  25. Compiler(VM* vm, const char* source, _Str filename, CompileMode mode){
  26. this->vm = vm;
  27. this->parser = std::make_unique<Parser>(
  28. pkpy::make_shared<SourceData>(source, filename, mode)
  29. );
  30. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  31. #define METHOD(name) &Compiler::name
  32. #define NO_INFIX nullptr, PREC_NONE
  33. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, NO_INFIX };
  34. rules[TK(".")] = { nullptr, METHOD(exprAttrib), PREC_ATTRIB };
  35. rules[TK("(")] = { METHOD(exprGrouping), METHOD(exprCall), PREC_CALL };
  36. rules[TK("[")] = { METHOD(exprList), METHOD(exprSubscript), PREC_SUBSCRIPT };
  37. rules[TK("{")] = { METHOD(exprMap), NO_INFIX };
  38. rules[TK("%")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  39. rules[TK("+")] = { nullptr, METHOD(exprBinaryOp), PREC_TERM };
  40. rules[TK("-")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_TERM };
  41. rules[TK("*")] = { METHOD(exprUnaryOp), METHOD(exprBinaryOp), PREC_FACTOR };
  42. rules[TK("/")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  43. rules[TK("//")] = { nullptr, METHOD(exprBinaryOp), PREC_FACTOR };
  44. rules[TK("**")] = { nullptr, METHOD(exprBinaryOp), PREC_EXPONENT };
  45. rules[TK(">")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  46. rules[TK("<")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  47. rules[TK("==")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  48. rules[TK("!=")] = { nullptr, METHOD(exprBinaryOp), PREC_EQUALITY };
  49. rules[TK(">=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  50. rules[TK("<=")] = { nullptr, METHOD(exprBinaryOp), PREC_COMPARISION };
  51. rules[TK("in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  52. rules[TK("is")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  53. rules[TK("not in")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  54. rules[TK("is not")] = { nullptr, METHOD(exprBinaryOp), PREC_TEST };
  55. rules[TK("and") ] = { nullptr, METHOD(exprAnd), PREC_LOGICAL_AND };
  56. rules[TK("or")] = { nullptr, METHOD(exprOr), PREC_LOGICAL_OR };
  57. rules[TK("not")] = { METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  58. rules[TK("True")] = { METHOD(exprValue), NO_INFIX };
  59. rules[TK("False")] = { METHOD(exprValue), NO_INFIX };
  60. rules[TK("lambda")] = { METHOD(exprLambda), NO_INFIX };
  61. rules[TK("None")] = { METHOD(exprValue), NO_INFIX };
  62. rules[TK("...")] = { METHOD(exprValue), NO_INFIX };
  63. rules[TK("@id")] = { METHOD(exprName), NO_INFIX };
  64. rules[TK("@num")] = { METHOD(exprLiteral), NO_INFIX };
  65. rules[TK("@str")] = { METHOD(exprLiteral), NO_INFIX };
  66. rules[TK("@fstr")] = { METHOD(exprFString), NO_INFIX };
  67. rules[TK("?")] = { nullptr, METHOD(exprTernary), PREC_TERNARY };
  68. rules[TK("=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  69. rules[TK("+=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  70. rules[TK("-=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  71. rules[TK("*=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  72. rules[TK("/=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  73. rules[TK("//=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  74. rules[TK("%=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  75. rules[TK("&=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  76. rules[TK("|=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  77. rules[TK("^=")] = { nullptr, METHOD(exprAssign), PREC_ASSIGNMENT };
  78. rules[TK(",")] = { nullptr, METHOD(exprComma), PREC_COMMA };
  79. rules[TK("<<")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  80. rules[TK(">>")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  81. rules[TK("&")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_AND };
  82. rules[TK("|")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_OR };
  83. rules[TK("^")] = { nullptr, METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  84. #undef METHOD
  85. #undef NO_INFIX
  86. #define EXPR() parse_expression(PREC_TERNARY) // no '=' and ',' just a simple expression
  87. #define EXPR_TUPLE() parse_expression(PREC_COMMA) // no '=', but ',' is allowed
  88. #define EXPR_ANY() parse_expression(PREC_ASSIGNMENT)
  89. }
  90. private:
  91. _Str eat_string_until(char quote, bool raw) {
  92. bool quote3 = parser->match_n_chars(2, quote);
  93. std::vector<char> buff;
  94. while (true) {
  95. char c = parser->eatchar_include_newline();
  96. if (c == quote){
  97. if(quote3 && !parser->match_n_chars(2, quote)){
  98. buff.push_back(c);
  99. continue;
  100. }
  101. break;
  102. }
  103. if (c == '\0'){
  104. if(quote3 && parser->src->mode == REPL_MODE){
  105. throw NeedMoreLines(false);
  106. }
  107. SyntaxError("EOL while scanning string literal");
  108. }
  109. if (c == '\n'){
  110. if(!quote3) SyntaxError("EOL while scanning string literal");
  111. else{
  112. buff.push_back(c);
  113. continue;
  114. }
  115. }
  116. if (!raw && c == '\\') {
  117. switch (parser->eatchar_include_newline()) {
  118. case '"': buff.push_back('"'); break;
  119. case '\'': buff.push_back('\''); break;
  120. case '\\': buff.push_back('\\'); break;
  121. case 'n': buff.push_back('\n'); break;
  122. case 'r': buff.push_back('\r'); break;
  123. case 't': buff.push_back('\t'); break;
  124. default: SyntaxError("invalid escape character");
  125. }
  126. } else {
  127. buff.push_back(c);
  128. }
  129. }
  130. return _Str(buff.data(), buff.size());
  131. }
  132. void eat_string(char quote, StringType type) {
  133. _Str s = eat_string_until(quote, type == RAW_STRING);
  134. if(type == F_STRING){
  135. parser->set_next_token(TK("@fstr"), vm->PyStr(s));
  136. }else{
  137. parser->set_next_token(TK("@str"), vm->PyStr(s));
  138. }
  139. }
  140. void eat_number() {
  141. static const std::regex pattern("^(0x)?[0-9a-fA-F]+(\\.[0-9]+)?");
  142. std::smatch m;
  143. const char* i = parser->token_start;
  144. while(*i != '\n' && *i != '\0') i++;
  145. std::string s = std::string(parser->token_start, i);
  146. try{
  147. if (std::regex_search(s, m, pattern)) {
  148. // here is m.length()-1, since the first char was eaten by lex_token()
  149. for(int j=0; j<m.length()-1; j++) parser->eatchar();
  150. int base = 10;
  151. size_t size;
  152. if (m[1].matched) base = 16;
  153. if (m[2].matched) {
  154. if(base == 16) SyntaxError("hex literal should not contain a dot");
  155. parser->set_next_token(TK("@num"), vm->PyFloat(std::stod(m[0], &size)));
  156. } else {
  157. parser->set_next_token(TK("@num"), vm->PyInt(std::stoll(m[0], &size, base)));
  158. }
  159. if (size != m.length()) UNREACHABLE();
  160. }
  161. }catch(std::exception& _){
  162. SyntaxError("invalid number literal");
  163. }
  164. }
  165. void lex_token(){
  166. lexing_count++;
  167. _lex_token();
  168. lexing_count--;
  169. }
  170. // Lex the next token and set it as the next token.
  171. void _lex_token() {
  172. parser->prev = parser->curr;
  173. parser->curr = parser->next_token();
  174. //_Str _info = parser->curr.info(); std::cout << _info << '[' << parser->current_line << ']' << std::endl;
  175. while (parser->peekchar() != '\0') {
  176. parser->token_start = parser->curr_char;
  177. char c = parser->eatchar_include_newline();
  178. switch (c) {
  179. case '\'': case '"': eat_string(c, NORMAL_STRING); return;
  180. case '#': parser->skip_line_comment(); break;
  181. case '{': parser->set_next_token(TK("{")); return;
  182. case '}': parser->set_next_token(TK("}")); return;
  183. case ',': parser->set_next_token(TK(",")); return;
  184. case ':': parser->set_next_token(TK(":")); return;
  185. case ';': parser->set_next_token(TK(";")); return;
  186. case '(': parser->set_next_token(TK("(")); return;
  187. case ')': parser->set_next_token(TK(")")); return;
  188. case '[': parser->set_next_token(TK("[")); return;
  189. case ']': parser->set_next_token(TK("]")); return;
  190. case '%': parser->set_next_token_2('=', TK("%"), TK("%=")); return;
  191. case '&': parser->set_next_token_2('=', TK("&"), TK("&=")); return;
  192. case '|': parser->set_next_token_2('=', TK("|"), TK("|=")); return;
  193. case '^': parser->set_next_token_2('=', TK("^"), TK("^=")); return;
  194. case '?': parser->set_next_token(TK("?")); return;
  195. case '.': {
  196. if(parser->matchchar('.')) {
  197. if(parser->matchchar('.')) {
  198. parser->set_next_token(TK("..."));
  199. } else {
  200. SyntaxError("invalid token '..'");
  201. }
  202. } else {
  203. parser->set_next_token(TK("."));
  204. }
  205. return;
  206. }
  207. case '=': parser->set_next_token_2('=', TK("="), TK("==")); return;
  208. case '+': parser->set_next_token_2('=', TK("+"), TK("+=")); return;
  209. case '>': {
  210. if(parser->matchchar('=')) parser->set_next_token(TK(">="));
  211. else if(parser->matchchar('>')) parser->set_next_token(TK(">>"));
  212. else parser->set_next_token(TK(">"));
  213. return;
  214. }
  215. case '<': {
  216. if(parser->matchchar('=')) parser->set_next_token(TK("<="));
  217. else if(parser->matchchar('<')) parser->set_next_token(TK("<<"));
  218. else parser->set_next_token(TK("<"));
  219. return;
  220. }
  221. case '-': {
  222. if(parser->matchchar('=')) parser->set_next_token(TK("-="));
  223. else if(parser->matchchar('>')) parser->set_next_token(TK("->"));
  224. else parser->set_next_token(TK("-"));
  225. return;
  226. }
  227. case '!':
  228. if(parser->matchchar('=')) parser->set_next_token(TK("!="));
  229. else SyntaxError("expected '=' after '!'");
  230. break;
  231. case '*':
  232. if (parser->matchchar('*')) {
  233. parser->set_next_token(TK("**")); // '**'
  234. } else {
  235. parser->set_next_token_2('=', TK("*"), TK("*="));
  236. }
  237. return;
  238. case '/':
  239. if(parser->matchchar('/')) {
  240. parser->set_next_token_2('=', TK("//"), TK("//="));
  241. } else {
  242. parser->set_next_token_2('=', TK("/"), TK("/="));
  243. }
  244. return;
  245. case '\r': break; // just ignore '\r'
  246. case ' ': case '\t': parser->eat_spaces(); break;
  247. case '\n': {
  248. parser->set_next_token(TK("@eol"));
  249. if(!parser->eat_indentation()) IndentationError("unindent does not match any outer indentation level");
  250. return;
  251. }
  252. default: {
  253. if(c == 'f'){
  254. if(parser->matchchar('\'')) {eat_string('\'', F_STRING); return;}
  255. if(parser->matchchar('"')) {eat_string('"', F_STRING); return;}
  256. }else if(c == 'r'){
  257. if(parser->matchchar('\'')) {eat_string('\'', RAW_STRING); return;}
  258. if(parser->matchchar('"')) {eat_string('"', RAW_STRING); return;}
  259. }
  260. if (c >= '0' && c <= '9') {
  261. eat_number();
  262. return;
  263. }
  264. switch (parser->eat_name())
  265. {
  266. case 0: break;
  267. case 1: SyntaxError("invalid char: " + std::string(1, c));
  268. case 2: SyntaxError("invalid utf8 sequence: " + std::string(1, c));
  269. case 3: SyntaxError("@id contains invalid char"); break;
  270. case 4: SyntaxError("invalid JSON token"); break;
  271. default: UNREACHABLE();
  272. }
  273. return;
  274. }
  275. }
  276. }
  277. parser->token_start = parser->curr_char;
  278. parser->set_next_token(TK("@eof"));
  279. }
  280. inline TokenIndex peek() {
  281. return parser->curr.type;
  282. }
  283. // not sure this will work
  284. TokenIndex peek_next() {
  285. if(parser->nexts.empty()) return TK("@eof");
  286. return parser->nexts.front().type;
  287. }
  288. bool match(TokenIndex expected) {
  289. if (peek() != expected) return false;
  290. lex_token();
  291. return true;
  292. }
  293. void consume(TokenIndex expected) {
  294. if (!match(expected)){
  295. _StrStream ss;
  296. ss << "expected '" << TK_STR(expected) << "', but got '" << TK_STR(peek()) << "'";
  297. SyntaxError(ss.str());
  298. }
  299. }
  300. bool match_newlines(bool repl_throw=false) {
  301. bool consumed = false;
  302. if (peek() == TK("@eol")) {
  303. while (peek() == TK("@eol")) lex_token();
  304. consumed = true;
  305. }
  306. if (repl_throw && peek() == TK("@eof")){
  307. throw NeedMoreLines(is_compiling_class);
  308. }
  309. return consumed;
  310. }
  311. bool match_end_stmt() {
  312. if (match(TK(";"))) { match_newlines(); return true; }
  313. if (match_newlines() || peek()==TK("@eof")) return true;
  314. if (peek() == TK("@dedent")) return true;
  315. return false;
  316. }
  317. void consume_end_stmt() {
  318. if (!match_end_stmt()) SyntaxError("expected statement end");
  319. }
  320. void exprLiteral() {
  321. PyVar value = parser->prev.value;
  322. int index = co()->add_const(value);
  323. emit(OP_LOAD_CONST, index);
  324. }
  325. void exprFString() {
  326. static const std::regex pattern(R"(\{(.*?)\})");
  327. PyVar value = parser->prev.value;
  328. _Str s = vm->PyStr_AS_C(value);
  329. std::sregex_iterator begin(s.begin(), s.end(), pattern);
  330. std::sregex_iterator end;
  331. int size = 0;
  332. int i = 0;
  333. for(auto it = begin; it != end; it++) {
  334. std::smatch m = *it;
  335. if (i < m.position()) {
  336. std::string literal = s.substr(i, m.position() - i);
  337. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  338. size++;
  339. }
  340. emit(OP_LOAD_EVAL_FN);
  341. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(m[1].str())));
  342. emit(OP_CALL, 1);
  343. size++;
  344. i = (int)(m.position() + m.length());
  345. }
  346. if (i < s.size()) {
  347. std::string literal = s.substr(i, s.size() - i);
  348. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(literal)));
  349. size++;
  350. }
  351. emit(OP_BUILD_STRING, size);
  352. }
  353. void exprLambda() {
  354. _Func func = pkpy::make_shared<Function>();
  355. func->name = "<lambda>";
  356. if(!match(TK(":"))){
  357. _compile_f_args(func, false);
  358. consume(TK(":"));
  359. }
  360. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  361. this->codes.push(func->code);
  362. EXPR_TUPLE();
  363. emit(OP_RETURN_VALUE);
  364. func->code->optimize();
  365. this->codes.pop();
  366. emit(OP_LOAD_LAMBDA, co()->add_const(vm->PyFunction(func)));
  367. }
  368. void exprAssign() {
  369. co()->_rvalue = true;
  370. TokenIndex op = parser->prev.type;
  371. if(op == TK("=")) { // a = (expr)
  372. EXPR_TUPLE();
  373. emit(OP_STORE_REF);
  374. }else{ // a += (expr) -> a = a + (expr)
  375. emit(OP_DUP_TOP);
  376. EXPR();
  377. switch (op) {
  378. case TK("+="): emit(OP_BINARY_OP, 0); break;
  379. case TK("-="): emit(OP_BINARY_OP, 1); break;
  380. case TK("*="): emit(OP_BINARY_OP, 2); break;
  381. case TK("/="): emit(OP_BINARY_OP, 3); break;
  382. case TK("//="): emit(OP_BINARY_OP, 4); break;
  383. case TK("%="): emit(OP_BINARY_OP, 5); break;
  384. case TK("&="): emit(OP_BITWISE_OP, 2); break;
  385. case TK("|="): emit(OP_BITWISE_OP, 3); break;
  386. case TK("^="): emit(OP_BITWISE_OP, 4); break;
  387. default: UNREACHABLE();
  388. }
  389. emit(OP_STORE_REF);
  390. }
  391. co()->_rvalue = false;
  392. }
  393. void exprComma() {
  394. int size = 1; // an expr is in the stack now
  395. do {
  396. EXPR(); // NOTE: "1," will fail, "1,2" will be ok
  397. size++;
  398. } while(match(TK(",")));
  399. emit(OP_BUILD_SMART_TUPLE, size);
  400. }
  401. void exprOr() {
  402. int patch = emit(OP_JUMP_IF_TRUE_OR_POP);
  403. parse_expression(PREC_LOGICAL_OR);
  404. patch_jump(patch);
  405. }
  406. void exprAnd() {
  407. int patch = emit(OP_JUMP_IF_FALSE_OR_POP);
  408. parse_expression(PREC_LOGICAL_AND);
  409. patch_jump(patch);
  410. }
  411. void exprTernary() {
  412. int patch = emit(OP_POP_JUMP_IF_FALSE);
  413. EXPR(); // if true
  414. int patch2 = emit(OP_JUMP_ABSOLUTE);
  415. consume(TK(":"));
  416. patch_jump(patch);
  417. EXPR(); // if false
  418. patch_jump(patch2);
  419. }
  420. void exprBinaryOp() {
  421. TokenIndex op = parser->prev.type;
  422. parse_expression((Precedence)(rules[op].precedence + 1));
  423. switch (op) {
  424. case TK("+"): emit(OP_BINARY_OP, 0); break;
  425. case TK("-"): emit(OP_BINARY_OP, 1); break;
  426. case TK("*"): emit(OP_BINARY_OP, 2); break;
  427. case TK("/"): emit(OP_BINARY_OP, 3); break;
  428. case TK("//"): emit(OP_BINARY_OP, 4); break;
  429. case TK("%"): emit(OP_BINARY_OP, 5); break;
  430. case TK("**"): emit(OP_BINARY_OP, 6); break;
  431. case TK("<"): emit(OP_COMPARE_OP, 0); break;
  432. case TK("<="): emit(OP_COMPARE_OP, 1); break;
  433. case TK("=="): emit(OP_COMPARE_OP, 2); break;
  434. case TK("!="): emit(OP_COMPARE_OP, 3); break;
  435. case TK(">"): emit(OP_COMPARE_OP, 4); break;
  436. case TK(">="): emit(OP_COMPARE_OP, 5); break;
  437. case TK("in"): emit(OP_CONTAINS_OP, 0); break;
  438. case TK("not in"): emit(OP_CONTAINS_OP, 1); break;
  439. case TK("is"): emit(OP_IS_OP, 0); break;
  440. case TK("is not"): emit(OP_IS_OP, 1); break;
  441. case TK("<<"): emit(OP_BITWISE_OP, 0); break;
  442. case TK(">>"): emit(OP_BITWISE_OP, 1); break;
  443. case TK("&"): emit(OP_BITWISE_OP, 2); break;
  444. case TK("|"): emit(OP_BITWISE_OP, 3); break;
  445. case TK("^"): emit(OP_BITWISE_OP, 4); break;
  446. default: UNREACHABLE();
  447. }
  448. }
  449. void exprUnaryOp() {
  450. TokenIndex op = parser->prev.type;
  451. parse_expression((Precedence)(PREC_UNARY + 1));
  452. switch (op) {
  453. case TK("-"): emit(OP_UNARY_NEGATIVE); break;
  454. case TK("not"): emit(OP_UNARY_NOT); break;
  455. case TK("*"): SyntaxError("cannot use '*' as unary operator"); break;
  456. default: UNREACHABLE();
  457. }
  458. }
  459. void exprGrouping() {
  460. match_newlines(mode()==REPL_MODE);
  461. EXPR_TUPLE();
  462. match_newlines(mode()==REPL_MODE);
  463. consume(TK(")"));
  464. }
  465. void exprList() {
  466. int _patch = emit(OP_NO_OP);
  467. int _body_start = co()->codes.size();
  468. int ARGC = 0;
  469. do {
  470. match_newlines(mode()==REPL_MODE);
  471. if (peek() == TK("]")) break;
  472. EXPR(); ARGC++;
  473. match_newlines(mode()==REPL_MODE);
  474. if(ARGC == 1 && match(TK("for"))) goto __LISTCOMP;
  475. } while (match(TK(",")));
  476. match_newlines(mode()==REPL_MODE);
  477. consume(TK("]"));
  478. emit(OP_BUILD_LIST, ARGC);
  479. return;
  480. __LISTCOMP:
  481. int _body_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  482. int _body_end = co()->codes.size();
  483. co()->codes[_patch].op = OP_JUMP_ABSOLUTE;
  484. co()->codes[_patch].arg = _body_end;
  485. emit(OP_BUILD_LIST, 0);
  486. EXPR_FOR_VARS();consume(TK("in"));EXPR_TUPLE();
  487. match_newlines(mode()==REPL_MODE);
  488. int _skipPatch = emit(OP_JUMP_ABSOLUTE);
  489. int _cond_start = co()->codes.size();
  490. int _cond_end_return = -1;
  491. if(match(TK("if"))) {
  492. EXPR_TUPLE();
  493. _cond_end_return = emit(OP_JUMP_ABSOLUTE, -1);
  494. }
  495. patch_jump(_skipPatch);
  496. emit(OP_GET_ITER);
  497. co()->_enter_block(FOR_LOOP);
  498. emit(OP_FOR_ITER);
  499. if(_cond_end_return != -1) { // there is an if condition
  500. emit(OP_JUMP_ABSOLUTE, _cond_start);
  501. patch_jump(_cond_end_return);
  502. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  503. emit(OP_JUMP_ABSOLUTE, _body_start);
  504. patch_jump(_body_end_return);
  505. emit(OP_LIST_APPEND);
  506. patch_jump(ifpatch);
  507. }else{
  508. emit(OP_JUMP_ABSOLUTE, _body_start);
  509. patch_jump(_body_end_return);
  510. emit(OP_LIST_APPEND);
  511. }
  512. emit(OP_LOOP_CONTINUE, -1, true);
  513. co()->_exit_block();
  514. match_newlines(mode()==REPL_MODE);
  515. consume(TK("]"));
  516. }
  517. void exprMap() {
  518. bool parsing_dict = false;
  519. int size = 0;
  520. do {
  521. match_newlines(mode()==REPL_MODE);
  522. if (peek() == TK("}")) break;
  523. EXPR();
  524. if(peek() == TK(":")) parsing_dict = true;
  525. if(parsing_dict){
  526. consume(TK(":"));
  527. EXPR();
  528. }
  529. size++;
  530. match_newlines(mode()==REPL_MODE);
  531. } while (match(TK(",")));
  532. consume(TK("}"));
  533. if(size == 0 || parsing_dict) emit(OP_BUILD_MAP, size);
  534. else emit(OP_BUILD_SET, size);
  535. }
  536. void exprCall() {
  537. int ARGC = 0;
  538. int KWARGC = 0;
  539. do {
  540. match_newlines(mode()==REPL_MODE);
  541. if (peek() == TK(")")) break;
  542. if(peek() == TK("@id") && peek_next() == TK("=")) {
  543. consume(TK("@id"));
  544. const _Str& key = parser->prev.str();
  545. emit(OP_LOAD_CONST, co()->add_const(vm->PyStr(key)));
  546. consume(TK("="));
  547. EXPR();
  548. KWARGC++;
  549. } else{
  550. if(KWARGC > 0) SyntaxError("positional argument follows keyword argument");
  551. EXPR();
  552. ARGC++;
  553. }
  554. match_newlines(mode()==REPL_MODE);
  555. } while (match(TK(",")));
  556. consume(TK(")"));
  557. emit(OP_CALL, (KWARGC << 16) | ARGC);
  558. }
  559. void exprName(){ _exprName(false); }
  560. void _exprName(bool force_lvalue) {
  561. Token tkname = parser->prev;
  562. int index = co()->add_name(
  563. tkname.str(),
  564. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  565. );
  566. bool fast_load = !force_lvalue && co()->_rvalue;
  567. emit(fast_load ? OP_LOAD_NAME : OP_LOAD_NAME_REF, index);
  568. }
  569. void exprAttrib() {
  570. consume(TK("@id"));
  571. const _Str& name = parser->prev.str();
  572. int index = co()->add_name(name, NAME_ATTR);
  573. emit(OP_BUILD_ATTR_REF, index);
  574. }
  575. // [:], [:b]
  576. // [a], [a:], [a:b]
  577. void exprSubscript() {
  578. if(match(TK(":"))){
  579. emit(OP_LOAD_NONE);
  580. if(match(TK("]"))){
  581. emit(OP_LOAD_NONE);
  582. }else{
  583. EXPR_TUPLE();
  584. consume(TK("]"));
  585. }
  586. emit(OP_BUILD_SLICE);
  587. }else{
  588. EXPR_TUPLE();
  589. if(match(TK(":"))){
  590. if(match(TK("]"))){
  591. emit(OP_LOAD_NONE);
  592. }else{
  593. EXPR_TUPLE();
  594. consume(TK("]"));
  595. }
  596. emit(OP_BUILD_SLICE);
  597. }else{
  598. consume(TK("]"));
  599. }
  600. }
  601. emit(OP_BUILD_INDEX_REF);
  602. }
  603. void exprValue() {
  604. TokenIndex op = parser->prev.type;
  605. switch (op) {
  606. case TK("None"): emit(OP_LOAD_NONE); break;
  607. case TK("True"): emit(OP_LOAD_TRUE); break;
  608. case TK("False"): emit(OP_LOAD_FALSE); break;
  609. case TK("..."): emit(OP_LOAD_ELLIPSIS); break;
  610. default: UNREACHABLE();
  611. }
  612. }
  613. int emit(Opcode opcode, int arg=-1, bool keepline=false) {
  614. int line = parser->prev.line;
  615. co()->codes.push_back(
  616. Bytecode{(uint8_t)opcode, arg, line, (uint16_t)co()->_curr_block_i}
  617. );
  618. int i = co()->codes.size() - 1;
  619. if(keepline && i>=1) co()->codes[i].line = co()->codes[i-1].line;
  620. return i;
  621. }
  622. inline void patch_jump(int addr_index) {
  623. int target = co()->codes.size();
  624. co()->codes[addr_index].arg = target;
  625. }
  626. void compile_block_body(CompilerAction action=nullptr) {
  627. if(action == nullptr) action = &Compiler::compile_stmt;
  628. consume(TK(":"));
  629. if(!match_newlines(mode()==REPL_MODE)){
  630. SyntaxError("expected a new line after ':'");
  631. }
  632. consume(TK("@indent"));
  633. while (peek() != TK("@dedent")) {
  634. match_newlines();
  635. (this->*action)();
  636. match_newlines();
  637. }
  638. consume(TK("@dedent"));
  639. }
  640. Token _compile_import() {
  641. consume(TK("@id"));
  642. Token tkmodule = parser->prev;
  643. int index = co()->add_name(tkmodule.str(), NAME_SPECIAL);
  644. emit(OP_IMPORT_NAME, index);
  645. return tkmodule;
  646. }
  647. // import a as b
  648. void compile_normal_import() {
  649. do {
  650. Token tkmodule = _compile_import();
  651. if (match(TK("as"))) {
  652. consume(TK("@id"));
  653. tkmodule = parser->prev;
  654. }
  655. int index = co()->add_name(tkmodule.str(), NAME_GLOBAL);
  656. emit(OP_STORE_NAME, index);
  657. } while (match(TK(",")));
  658. consume_end_stmt();
  659. }
  660. // from a import b as c, d as e
  661. void compile_from_import() {
  662. Token tkmodule = _compile_import();
  663. consume(TK("import"));
  664. do {
  665. emit(OP_DUP_TOP);
  666. consume(TK("@id"));
  667. Token tkname = parser->prev;
  668. int index = co()->add_name(tkname.str(), NAME_ATTR);
  669. emit(OP_BUILD_ATTR_REF, index);
  670. if (match(TK("as"))) {
  671. consume(TK("@id"));
  672. tkname = parser->prev;
  673. }
  674. index = co()->add_name(tkname.str(), NAME_GLOBAL);
  675. emit(OP_STORE_NAME, index);
  676. } while (match(TK(",")));
  677. emit(OP_POP_TOP);
  678. consume_end_stmt();
  679. }
  680. void parse_expression(Precedence precedence) {
  681. lex_token();
  682. GrammarFn prefix = rules[parser->prev.type].prefix;
  683. if (prefix == nullptr) SyntaxError(_Str("expected an expression, but got ") + TK_STR(parser->prev.type));
  684. (this->*prefix)();
  685. while (rules[peek()].precedence >= precedence) {
  686. lex_token();
  687. TokenIndex op = parser->prev.type;
  688. GrammarFn infix = rules[op].infix;
  689. if(infix == nullptr) throw std::runtime_error("(infix == nullptr) is true");
  690. (this->*infix)();
  691. }
  692. }
  693. void compile_if_stmt() {
  694. match_newlines();
  695. EXPR_TUPLE();
  696. int ifpatch = emit(OP_POP_JUMP_IF_FALSE);
  697. compile_block_body();
  698. if (match(TK("elif"))) {
  699. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  700. patch_jump(ifpatch);
  701. compile_if_stmt();
  702. patch_jump(exit_jump);
  703. } else if (match(TK("else"))) {
  704. int exit_jump = emit(OP_JUMP_ABSOLUTE);
  705. patch_jump(ifpatch);
  706. compile_block_body();
  707. patch_jump(exit_jump);
  708. } else {
  709. patch_jump(ifpatch);
  710. }
  711. }
  712. void compile_while_loop() {
  713. co()->_enter_block(WHILE_LOOP);
  714. EXPR_TUPLE();
  715. int patch = emit(OP_POP_JUMP_IF_FALSE);
  716. compile_block_body();
  717. emit(OP_LOOP_CONTINUE, -1, true);
  718. patch_jump(patch);
  719. co()->_exit_block();
  720. }
  721. void EXPR_FOR_VARS(){
  722. int size = 0;
  723. do {
  724. consume(TK("@id"));
  725. _exprName(true); size++;
  726. } while (match(TK(",")));
  727. if(size > 1) emit(OP_BUILD_SMART_TUPLE, size);
  728. }
  729. void compile_for_loop() {
  730. EXPR_FOR_VARS();consume(TK("in")); EXPR_TUPLE();
  731. emit(OP_GET_ITER);
  732. co()->_enter_block(FOR_LOOP);
  733. emit(OP_FOR_ITER);
  734. compile_block_body();
  735. emit(OP_LOOP_CONTINUE, -1, true);
  736. co()->_exit_block();
  737. }
  738. void compile_try_except() {
  739. co()->_enter_block(TRY_EXCEPT);
  740. emit(OP_TRY_BLOCK_ENTER);
  741. compile_block_body();
  742. emit(OP_TRY_BLOCK_EXIT);
  743. std::vector<int> patches = { emit(OP_JUMP_ABSOLUTE) };
  744. co()->_exit_block();
  745. do {
  746. consume(TK("except"));
  747. if(match(TK("@id"))){
  748. int name_idx = co()->add_name(parser->prev.str(), NAME_SPECIAL);
  749. emit(OP_EXCEPTION_MATCH, name_idx);
  750. }else{
  751. emit(OP_LOAD_TRUE);
  752. }
  753. int patch = emit(OP_POP_JUMP_IF_FALSE);
  754. emit(OP_POP_TOP); // pop the exception on match
  755. compile_block_body();
  756. patches.push_back(emit(OP_JUMP_ABSOLUTE));
  757. patch_jump(patch);
  758. }while(peek() == TK("except"));
  759. emit(OP_RE_RAISE); // no match, re-raise
  760. for (int patch : patches) patch_jump(patch);
  761. }
  762. void compile_stmt() {
  763. if (match(TK("break"))) {
  764. if (!co()->_is_curr_block_loop()) SyntaxError("'break' outside loop");
  765. consume_end_stmt();
  766. emit(OP_LOOP_BREAK);
  767. } else if (match(TK("continue"))) {
  768. if (!co()->_is_curr_block_loop()) SyntaxError("'continue' not properly in loop");
  769. consume_end_stmt();
  770. emit(OP_LOOP_CONTINUE);
  771. } else if (match(TK("return"))) {
  772. if (codes.size() == 1)
  773. SyntaxError("'return' outside function");
  774. if(match_end_stmt()){
  775. emit(OP_LOAD_NONE);
  776. }else{
  777. EXPR_TUPLE();
  778. consume_end_stmt();
  779. }
  780. emit(OP_RETURN_VALUE, -1, true);
  781. } else if (match(TK("if"))) {
  782. compile_if_stmt();
  783. } else if (match(TK("while"))) {
  784. compile_while_loop();
  785. } else if (match(TK("for"))) {
  786. compile_for_loop();
  787. } else if (match(TK("try"))) {
  788. compile_try_except();
  789. }else if(match(TK("assert"))){
  790. EXPR();
  791. emit(OP_ASSERT);
  792. consume_end_stmt();
  793. } else if(match(TK("with"))){
  794. EXPR();
  795. consume(TK("as"));
  796. consume(TK("@id"));
  797. Token tkname = parser->prev;
  798. int index = co()->add_name(
  799. tkname.str(),
  800. codes.size()>1 ? NAME_LOCAL : NAME_GLOBAL
  801. );
  802. emit(OP_STORE_NAME, index);
  803. emit(OP_LOAD_NAME_REF, index);
  804. emit(OP_WITH_ENTER);
  805. compile_block_body();
  806. emit(OP_LOAD_NAME_REF, index);
  807. emit(OP_WITH_EXIT);
  808. } else if(match(TK("label"))){
  809. if(mode() != EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  810. consume(TK(".")); consume(TK("@id"));
  811. _Str label = parser->prev.str();
  812. bool ok = co()->add_label(label);
  813. if(!ok) SyntaxError("label '" + label + "' already exists");
  814. consume_end_stmt();
  815. } else if(match(TK("goto"))){ // https://entrian.com/goto/
  816. if(mode() != EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  817. consume(TK(".")); consume(TK("@id"));
  818. emit(OP_GOTO, co()->add_name(parser->prev.str(), NAME_SPECIAL));
  819. consume_end_stmt();
  820. } else if(match(TK("raise"))){
  821. consume(TK("@id"));
  822. int dummy_t = co()->add_name(parser->prev.str(), NAME_SPECIAL);
  823. if(match(TK("("))){
  824. EXPR(); consume(TK(")"));
  825. }else{
  826. emit(OP_LOAD_NONE);
  827. }
  828. emit(OP_RAISE, dummy_t);
  829. consume_end_stmt();
  830. } else if(match(TK("del"))){
  831. EXPR();
  832. emit(OP_DELETE_REF);
  833. consume_end_stmt();
  834. } else if(match(TK("global"))){
  835. do {
  836. consume(TK("@id"));
  837. co()->global_names[parser->prev.str()] = 1;
  838. } while (match(TK(",")));
  839. consume_end_stmt();
  840. } else if(match(TK("pass"))){
  841. consume_end_stmt();
  842. } else {
  843. EXPR_ANY();
  844. consume_end_stmt();
  845. // If last op is not an assignment, pop the result.
  846. uint8_t last_op = co()->codes.back().op;
  847. if( last_op!=OP_STORE_NAME && last_op!=OP_STORE_REF){
  848. if(mode()==REPL_MODE && parser->indents.top()==0) emit(OP_PRINT_EXPR, -1, true);
  849. emit(OP_POP_TOP, -1, true);
  850. }
  851. }
  852. }
  853. void compile_class(){
  854. consume(TK("@id"));
  855. int cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  856. int super_cls_name_idx = -1;
  857. if(match(TK("(")) && match(TK("@id"))){
  858. super_cls_name_idx = co()->add_name(parser->prev.str(), NAME_GLOBAL);
  859. consume(TK(")"));
  860. }
  861. emit(OP_LOAD_NONE);
  862. is_compiling_class = true;
  863. compile_block_body(&Compiler::compile_function);
  864. is_compiling_class = false;
  865. if(super_cls_name_idx == -1) emit(OP_LOAD_NONE);
  866. else emit(OP_LOAD_NAME_REF, super_cls_name_idx);
  867. emit(OP_BUILD_CLASS, cls_name_idx);
  868. }
  869. void _compile_f_args(_Func func, bool enable_type_hints){
  870. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  871. do {
  872. if(state == 3) SyntaxError("**kwargs should be the last argument");
  873. match_newlines();
  874. if(match(TK("*"))){
  875. if(state < 1) state = 1;
  876. else SyntaxError("*args should be placed before **kwargs");
  877. }
  878. else if(match(TK("**"))){
  879. state = 3;
  880. }
  881. consume(TK("@id"));
  882. const _Str& name = parser->prev.str();
  883. if(func->hasName(name)) SyntaxError("duplicate argument name");
  884. // eat type hints
  885. if(enable_type_hints && match(TK(":"))) consume(TK("@id"));
  886. if(state == 0 && peek() == TK("=")) state = 2;
  887. switch (state)
  888. {
  889. case 0: func->args.push_back(name); break;
  890. case 1: func->starredArg = name; state+=1; break;
  891. case 2: {
  892. consume(TK("="));
  893. PyVarOrNull value = read_literal();
  894. if(value == nullptr){
  895. SyntaxError(_Str("expect a literal, not ") + TK_STR(parser->curr.type));
  896. }
  897. func->kwArgs[name] = value;
  898. func->kwArgsOrder.push_back(name);
  899. } break;
  900. case 3: SyntaxError("**kwargs is not supported yet"); break;
  901. }
  902. } while (match(TK(",")));
  903. }
  904. void compile_function(){
  905. if(is_compiling_class){
  906. if(match(TK("pass"))) return;
  907. consume(TK("def"));
  908. }
  909. _Func func = pkpy::make_shared<Function>();
  910. consume(TK("@id"));
  911. func->name = parser->prev.str();
  912. if (match(TK("(")) && !match(TK(")"))) {
  913. _compile_f_args(func, true);
  914. consume(TK(")"));
  915. }
  916. // eat type hints
  917. if(match(TK("->"))) consume(TK("@id"));
  918. func->code = pkpy::make_shared<CodeObject>(parser->src, func->name);
  919. this->codes.push(func->code);
  920. compile_block_body();
  921. func->code->optimize();
  922. this->codes.pop();
  923. emit(OP_LOAD_CONST, co()->add_const(vm->PyFunction(func)));
  924. if(!is_compiling_class) emit(OP_STORE_FUNCTION);
  925. }
  926. PyVarOrNull read_literal(){
  927. if(match(TK("-"))){
  928. consume(TK("@num"));
  929. PyVar val = parser->prev.value;
  930. return vm->num_negated(val);
  931. }
  932. if(match(TK("@num"))) return parser->prev.value;
  933. if(match(TK("@str"))) return parser->prev.value;
  934. if(match(TK("True"))) return vm->PyBool(true);
  935. if(match(TK("False"))) return vm->PyBool(false);
  936. if(match(TK("None"))) return vm->None;
  937. if(match(TK("..."))) return vm->Ellipsis;
  938. return nullptr;
  939. }
  940. /***** Error Reporter *****/
  941. void throw_err(_Str type, _Str msg){
  942. int lineno = parser->curr.line;
  943. const char* cursor = parser->curr.start;
  944. // if error occurs in lexing, lineno should be `parser->current_line`
  945. if(lexing_count > 0){
  946. lineno = parser->current_line;
  947. cursor = parser->curr_char;
  948. }
  949. if(parser->peekchar() == '\n') lineno--;
  950. auto e = _Exception("SyntaxError", msg);
  951. e.st_push(parser->src->snapshot(lineno, cursor));
  952. throw e;
  953. }
  954. void SyntaxError(_Str msg){ throw_err("SyntaxError", msg); }
  955. void IndentationError(_Str msg){ throw_err("IndentationError", msg); }
  956. public:
  957. _Code compile(){
  958. // can only be called once
  959. if(used) UNREACHABLE();
  960. used = true;
  961. _Code code = pkpy::make_shared<CodeObject>(parser->src, _Str("<module>"));
  962. codes.push(code);
  963. lex_token(); lex_token();
  964. match_newlines();
  965. if(mode()==EVAL_MODE) {
  966. EXPR_TUPLE();
  967. consume(TK("@eof"));
  968. code->optimize();
  969. return code;
  970. }else if(mode()==JSON_MODE){
  971. PyVarOrNull value = read_literal();
  972. if(value != nullptr) emit(OP_LOAD_CONST, code->add_const(value));
  973. else if(match(TK("{"))) exprMap();
  974. else if(match(TK("["))) exprList();
  975. else SyntaxError("expect a JSON object or array");
  976. consume(TK("@eof"));
  977. return code; // no need to optimize for JSON decoding
  978. }
  979. while (!match(TK("@eof"))) {
  980. // compile top-level statement
  981. if (match(TK("class"))) {
  982. compile_class();
  983. } else if (match(TK("def"))) {
  984. compile_function();
  985. } else if (match(TK("import"))) {
  986. compile_normal_import();
  987. } else if (match(TK("from"))) {
  988. compile_from_import();
  989. } else {
  990. compile_stmt();
  991. }
  992. match_newlines();
  993. }
  994. code->optimize();
  995. return code;
  996. }
  997. };