compiler.cpp 54 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421
  1. #include "pocketpy/compiler.h"
  2. namespace pkpy{
  3. NameScope Compiler::name_scope() const {
  4. auto s = contexts.size()>1 ? NAME_LOCAL : NAME_GLOBAL;
  5. if(unknown_global_scope && s == NAME_GLOBAL) s = NAME_GLOBAL_UNKNOWN;
  6. return s;
  7. }
  8. CodeObject_ Compiler::push_global_context(){
  9. CodeObject_ co = std::make_shared<CodeObject>(lexer.src, lexer.src->filename);
  10. co->start_line = i==0 ? 1 : prev().line;
  11. contexts.push(CodeEmitContext(vm, co, contexts.size()));
  12. return co;
  13. }
  14. FuncDecl_ Compiler::push_f_context(Str name){
  15. FuncDecl_ decl = std::make_shared<FuncDecl>();
  16. decl->code = std::make_shared<CodeObject>(lexer.src, name);
  17. decl->code->start_line = i==0 ? 1 : prev().line;
  18. decl->nested = name_scope() == NAME_LOCAL;
  19. contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
  20. contexts.top().func = decl;
  21. return decl;
  22. }
  23. void Compiler::pop_context(){
  24. if(!ctx()->s_expr.empty()){
  25. throw std::runtime_error("!ctx()->s_expr.empty()");
  26. }
  27. // add a `return None` in the end as a guard
  28. // previously, we only do this if the last opcode is not a return
  29. // however, this is buggy...since there may be a jump to the end (out of bound) even if the last opcode is a return
  30. ctx()->emit_(OP_RETURN_VALUE, 1, BC_KEEPLINE, true);
  31. // find the last valid token
  32. int j = i-1;
  33. while(tokens[j].type == TK("@eol") || tokens[j].type == TK("@dedent") || tokens[j].type == TK("@eof")) j--;
  34. ctx()->co->end_line = tokens[j].line;
  35. // some check here
  36. auto& codes = ctx()->co->codes;
  37. if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){
  38. SyntaxError("maximum number of local variables exceeded");
  39. }
  40. if(ctx()->co->consts.size() > 65530){
  41. SyntaxError("maximum number of constants exceeded");
  42. }
  43. if(codes.size() > 65530 && ctx()->co->src->mode != JSON_MODE){
  44. // json mode does not contain jump instructions, so it is safe to ignore this check
  45. SyntaxError("maximum number of opcodes exceeded");
  46. }
  47. // pre-compute LOOP_BREAK and LOOP_CONTINUE
  48. for(int i=0; i<codes.size(); i++){
  49. Bytecode& bc = codes[i];
  50. if(bc.op == OP_LOOP_CONTINUE){
  51. bc.arg = ctx()->co->blocks[bc.arg].start;
  52. }else if(bc.op == OP_LOOP_BREAK){
  53. bc.arg = ctx()->co->blocks[bc.arg].get_break_end();
  54. }
  55. }
  56. // pre-compute func->is_simple
  57. FuncDecl_ func = contexts.top().func;
  58. if(func){
  59. // check generator
  60. for(Bytecode bc: func->code->codes){
  61. if(bc.op == OP_YIELD_VALUE || bc.op == OP_FOR_ITER_YIELD_VALUE){
  62. func->type = FuncType::GENERATOR;
  63. for(Bytecode bc: func->code->codes){
  64. if(bc.op == OP_RETURN_VALUE && bc.arg == BC_NOARG){
  65. SyntaxError("'return' with argument inside generator function");
  66. }
  67. }
  68. break;
  69. }
  70. }
  71. if(func->type == FuncType::UNSET){
  72. bool is_simple = true;
  73. if(func->kwargs.size() > 0) is_simple = false;
  74. if(func->starred_arg >= 0) is_simple = false;
  75. if(func->starred_kwarg >= 0) is_simple = false;
  76. if(is_simple){
  77. func->type = FuncType::SIMPLE;
  78. bool is_empty = false;
  79. if(func->code->codes.size() == 1){
  80. Bytecode bc = func->code->codes[0];
  81. if(bc.op == OP_RETURN_VALUE && bc.arg == 1){
  82. is_empty = true;
  83. }
  84. }
  85. if(is_empty) func->type = FuncType::EMPTY;
  86. }
  87. else func->type = FuncType::NORMAL;
  88. }
  89. PK_ASSERT(func->type != FuncType::UNSET);
  90. }
  91. contexts.pop();
  92. }
  93. void Compiler::init_pratt_rules(){
  94. PK_LOCAL_STATIC bool initialized = false;
  95. if(initialized) return;
  96. initialized = true;
  97. // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
  98. #define PK_METHOD(name) &Compiler::name
  99. #define PK_NO_INFIX nullptr, PREC_LOWEST
  100. for(TokenIndex i=0; i<kTokenCount; i++) rules[i] = { nullptr, PK_NO_INFIX };
  101. rules[TK(".")] = { nullptr, PK_METHOD(exprAttrib), PREC_PRIMARY };
  102. rules[TK("(")] = { PK_METHOD(exprGroup), PK_METHOD(exprCall), PREC_PRIMARY };
  103. rules[TK("[")] = { PK_METHOD(exprList), PK_METHOD(exprSubscr), PREC_PRIMARY };
  104. rules[TK("{")] = { PK_METHOD(exprMap), PK_NO_INFIX };
  105. rules[TK("%")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  106. rules[TK("+")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_TERM };
  107. rules[TK("-")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_TERM };
  108. rules[TK("*")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_FACTOR };
  109. rules[TK("~")] = { PK_METHOD(exprUnaryOp), nullptr, PREC_UNARY };
  110. rules[TK("/")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  111. rules[TK("//")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  112. rules[TK("**")] = { PK_METHOD(exprUnaryOp), PK_METHOD(exprBinaryOp), PREC_EXPONENT };
  113. rules[TK(">")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  114. rules[TK("<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  115. rules[TK("==")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  116. rules[TK("!=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  117. rules[TK(">=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  118. rules[TK("<=")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  119. rules[TK("in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  120. rules[TK("is")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  121. rules[TK("<<")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  122. rules[TK(">>")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_SHIFT };
  123. rules[TK("&")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_AND };
  124. rules[TK("|")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_OR };
  125. rules[TK("^")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_BITWISE_XOR };
  126. rules[TK("@")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_FACTOR };
  127. rules[TK("if")] = { nullptr, PK_METHOD(exprTernary), PREC_TERNARY };
  128. rules[TK("not in")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  129. rules[TK("is not")] = { nullptr, PK_METHOD(exprBinaryOp), PREC_COMPARISION };
  130. rules[TK("and") ] = { nullptr, PK_METHOD(exprAnd), PREC_LOGICAL_AND };
  131. rules[TK("or")] = { nullptr, PK_METHOD(exprOr), PREC_LOGICAL_OR };
  132. rules[TK("not")] = { PK_METHOD(exprNot), nullptr, PREC_LOGICAL_NOT };
  133. rules[TK("True")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  134. rules[TK("False")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  135. rules[TK("None")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  136. rules[TK("...")] = { PK_METHOD(exprLiteral0), PK_NO_INFIX };
  137. rules[TK("lambda")] = { PK_METHOD(exprLambda), PK_NO_INFIX };
  138. rules[TK("@id")] = { PK_METHOD(exprName), PK_NO_INFIX };
  139. rules[TK("@num")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
  140. rules[TK("@str")] = { PK_METHOD(exprLiteral), PK_NO_INFIX };
  141. rules[TK("@fstr")] = { PK_METHOD(exprFString), PK_NO_INFIX };
  142. rules[TK("@long")] = { PK_METHOD(exprLong), PK_NO_INFIX };
  143. rules[TK("@imag")] = { PK_METHOD(exprImag), PK_NO_INFIX };
  144. rules[TK("@bytes")] = { PK_METHOD(exprBytes), PK_NO_INFIX };
  145. rules[TK(":")] = { PK_METHOD(exprSlice0), PK_METHOD(exprSlice1), PREC_PRIMARY };
  146. #undef PK_METHOD
  147. #undef PK_NO_INFIX
  148. }
  149. bool Compiler::match(TokenIndex expected) {
  150. if (curr().type != expected) return false;
  151. advance();
  152. return true;
  153. }
  154. void Compiler::consume(TokenIndex expected) {
  155. if (!match(expected)){
  156. SyntaxError(
  157. _S("expected '", TK_STR(expected), "', got '", TK_STR(curr().type), "'")
  158. );
  159. }
  160. }
  161. bool Compiler::match_newlines_repl(){
  162. return match_newlines(mode()==REPL_MODE);
  163. }
  164. bool Compiler::match_newlines(bool repl_throw) {
  165. bool consumed = false;
  166. if (curr().type == TK("@eol")) {
  167. while (curr().type == TK("@eol")) advance();
  168. consumed = true;
  169. }
  170. if (repl_throw && curr().type == TK("@eof")){
  171. throw NeedMoreLines(ctx()->is_compiling_class);
  172. }
  173. return consumed;
  174. }
  175. bool Compiler::match_end_stmt() {
  176. if (match(TK(";"))) { match_newlines(); return true; }
  177. if (match_newlines() || curr().type == TK("@eof")) return true;
  178. if (curr().type == TK("@dedent")) return true;
  179. return false;
  180. }
  181. void Compiler::consume_end_stmt() {
  182. if (!match_end_stmt()) SyntaxError("expected statement end");
  183. }
  184. void Compiler::EXPR() {
  185. parse_expression(PREC_LOWEST+1);
  186. }
  187. void Compiler::EXPR_TUPLE(bool allow_slice) {
  188. parse_expression(PREC_LOWEST+1, allow_slice);
  189. if(!match(TK(","))) return;
  190. // tuple expression
  191. Expr_vector items;
  192. items.push_back(ctx()->s_expr.popx());
  193. do {
  194. if(curr().brackets_level) match_newlines_repl();
  195. if(!is_expression(allow_slice)) break;
  196. parse_expression(PREC_LOWEST+1, allow_slice);
  197. items.push_back(ctx()->s_expr.popx());
  198. if(curr().brackets_level) match_newlines_repl();
  199. } while(match(TK(",")));
  200. ctx()->s_expr.push(make_expr<TupleExpr>(std::move(items)));
  201. }
  202. // special case for `for loop` and `comp`
  203. Expr_ Compiler::EXPR_VARS(){
  204. Expr_vector items;
  205. do {
  206. consume(TK("@id"));
  207. items.push_back(make_expr<NameExpr>(prev().str(), name_scope()));
  208. } while(match(TK(",")));
  209. if(items.size()==1) return std::move(items[0]);
  210. return make_expr<TupleExpr>(std::move(items));
  211. }
  212. void Compiler::exprLiteral(){
  213. ctx()->s_expr.push(make_expr<LiteralExpr>(prev().value));
  214. }
  215. void Compiler::exprLong(){
  216. ctx()->s_expr.push(make_expr<LongExpr>(prev().str()));
  217. }
  218. void Compiler::exprImag(){
  219. ctx()->s_expr.push(make_expr<ImagExpr>(std::get<f64>(prev().value)));
  220. }
  221. void Compiler::exprBytes(){
  222. ctx()->s_expr.push(make_expr<BytesExpr>(std::get<Str>(prev().value)));
  223. }
  224. void Compiler::exprFString(){
  225. ctx()->s_expr.push(make_expr<FStringExpr>(std::get<Str>(prev().value)));
  226. }
  227. void Compiler::exprLambda(){
  228. FuncDecl_ decl = push_f_context("<lambda>");
  229. auto e = make_expr<LambdaExpr>(decl);
  230. if(!match(TK(":"))){
  231. _compile_f_args(e->decl, false);
  232. consume(TK(":"));
  233. }
  234. // https://github.com/pocketpy/pocketpy/issues/37
  235. parse_expression(PREC_LAMBDA + 1);
  236. ctx()->emit_expr();
  237. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  238. pop_context();
  239. ctx()->s_expr.push(std::move(e));
  240. }
  241. void Compiler::exprOr(){
  242. auto e = make_expr<OrExpr>();
  243. e->lhs = ctx()->s_expr.popx();
  244. parse_expression(PREC_LOGICAL_OR + 1);
  245. e->rhs = ctx()->s_expr.popx();
  246. ctx()->s_expr.push(std::move(e));
  247. }
  248. void Compiler::exprAnd(){
  249. auto e = make_expr<AndExpr>();
  250. e->lhs = ctx()->s_expr.popx();
  251. parse_expression(PREC_LOGICAL_AND + 1);
  252. e->rhs = ctx()->s_expr.popx();
  253. ctx()->s_expr.push(std::move(e));
  254. }
  255. void Compiler::exprTernary(){
  256. auto e = make_expr<TernaryExpr>();
  257. e->true_expr = ctx()->s_expr.popx();
  258. // cond
  259. parse_expression(PREC_TERNARY + 1);
  260. e->cond = ctx()->s_expr.popx();
  261. consume(TK("else"));
  262. // if false
  263. parse_expression(PREC_TERNARY + 1);
  264. e->false_expr = ctx()->s_expr.popx();
  265. ctx()->s_expr.push(std::move(e));
  266. }
  267. void Compiler::exprBinaryOp(){
  268. auto e = make_expr<BinaryExpr>();
  269. e->op = prev().type;
  270. e->lhs = ctx()->s_expr.popx();
  271. parse_expression(rules[e->op].precedence + 1);
  272. e->rhs = ctx()->s_expr.popx();
  273. ctx()->s_expr.push(std::move(e));
  274. }
  275. void Compiler::exprNot() {
  276. parse_expression(PREC_LOGICAL_NOT + 1);
  277. ctx()->s_expr.push(make_expr<NotExpr>(ctx()->s_expr.popx()));
  278. }
  279. void Compiler::exprUnaryOp(){
  280. TokenIndex op = prev().type;
  281. parse_expression(PREC_UNARY + 1);
  282. switch(op){
  283. case TK("-"):
  284. ctx()->s_expr.push(make_expr<NegatedExpr>(ctx()->s_expr.popx()));
  285. break;
  286. case TK("~"):
  287. ctx()->s_expr.push(make_expr<InvertExpr>(ctx()->s_expr.popx()));
  288. break;
  289. case TK("*"):
  290. ctx()->s_expr.push(make_expr<StarredExpr>(1, ctx()->s_expr.popx()));
  291. break;
  292. case TK("**"):
  293. ctx()->s_expr.push(make_expr<StarredExpr>(2, ctx()->s_expr.popx()));
  294. break;
  295. default: PK_FATAL_ERROR();
  296. }
  297. }
  298. void Compiler::exprGroup(){
  299. match_newlines_repl();
  300. EXPR_TUPLE(); // () is just for change precedence
  301. match_newlines_repl();
  302. consume(TK(")"));
  303. if(ctx()->s_expr.top()->is_tuple()) return;
  304. Expr_ g = make_expr<GroupedExpr>(ctx()->s_expr.popx());
  305. ctx()->s_expr.push(std::move(g));
  306. }
  307. void Compiler::consume_comp(unique_ptr_128<CompExpr> ce, Expr_ expr){
  308. ce->expr = std::move(expr);
  309. ce->vars = EXPR_VARS();
  310. consume(TK("in"));
  311. parse_expression(PREC_TERNARY + 1);
  312. ce->iter = ctx()->s_expr.popx();
  313. match_newlines_repl();
  314. if(match(TK("if"))){
  315. parse_expression(PREC_TERNARY + 1);
  316. ce->cond = ctx()->s_expr.popx();
  317. }
  318. ctx()->s_expr.push(std::move(ce));
  319. match_newlines_repl();
  320. }
  321. void Compiler::exprList() {
  322. int line = prev().line;
  323. Expr_vector items;
  324. do {
  325. match_newlines_repl();
  326. if (curr().type == TK("]")) break;
  327. EXPR();
  328. items.push_back(ctx()->s_expr.popx());
  329. match_newlines_repl();
  330. if(items.size()==1 && match(TK("for"))){
  331. consume_comp(make_expr<ListCompExpr>(), std::move(items[0]));
  332. consume(TK("]"));
  333. return;
  334. }
  335. match_newlines_repl();
  336. } while (match(TK(",")));
  337. consume(TK("]"));
  338. auto e = make_expr<ListExpr>(std::move(items));
  339. e->line = line; // override line
  340. ctx()->s_expr.push(std::move(e));
  341. }
  342. void Compiler::exprMap() {
  343. bool parsing_dict = false; // {...} may be dict or set
  344. Expr_vector items;
  345. do {
  346. match_newlines_repl();
  347. if (curr().type == TK("}")) break;
  348. EXPR();
  349. int star_level = ctx()->s_expr.top()->star_level();
  350. if(star_level==2 || curr().type == TK(":")){
  351. parsing_dict = true;
  352. }
  353. if(parsing_dict){
  354. auto dict_item = make_expr<DictItemExpr>();
  355. if(star_level == 2){
  356. dict_item->key = nullptr;
  357. dict_item->value = ctx()->s_expr.popx();
  358. }else{
  359. consume(TK(":"));
  360. EXPR();
  361. dict_item->key = ctx()->s_expr.popx();
  362. dict_item->value = ctx()->s_expr.popx();
  363. }
  364. items.push_back(std::move(dict_item));
  365. }else{
  366. items.push_back(ctx()->s_expr.popx());
  367. }
  368. match_newlines_repl();
  369. if(items.size()==1 && match(TK("for"))){
  370. if(parsing_dict) consume_comp(make_expr<DictCompExpr>(), std::move(items[0]));
  371. else consume_comp(make_expr<SetCompExpr>(), std::move(items[0]));
  372. consume(TK("}"));
  373. return;
  374. }
  375. match_newlines_repl();
  376. } while (match(TK(",")));
  377. consume(TK("}"));
  378. if(items.size()==0 || parsing_dict){
  379. auto e = make_expr<DictExpr>(std::move(items));
  380. ctx()->s_expr.push(std::move(e));
  381. }else{
  382. auto e = make_expr<SetExpr>(std::move(items));
  383. ctx()->s_expr.push(std::move(e));
  384. }
  385. }
  386. void Compiler::exprCall() {
  387. auto e = make_expr<CallExpr>();
  388. e->callable = ctx()->s_expr.popx();
  389. do {
  390. match_newlines_repl();
  391. if (curr().type==TK(")")) break;
  392. if(curr().type==TK("@id") && next().type==TK("=")) {
  393. consume(TK("@id"));
  394. Str key = prev().str();
  395. consume(TK("="));
  396. EXPR();
  397. e->kwargs.push_back({key, ctx()->s_expr.popx()});
  398. } else{
  399. EXPR();
  400. if(ctx()->s_expr.top()->star_level() == 2){
  401. // **kwargs
  402. e->kwargs.push_back({"**", ctx()->s_expr.popx()});
  403. }else{
  404. // positional argument
  405. if(!e->kwargs.empty()) SyntaxError("positional argument follows keyword argument");
  406. e->args.push_back(ctx()->s_expr.popx());
  407. }
  408. }
  409. match_newlines_repl();
  410. } while (match(TK(",")));
  411. consume(TK(")"));
  412. if(e->args.size() > 32767) SyntaxError("too many positional arguments");
  413. if(e->kwargs.size() > 32767) SyntaxError("too many keyword arguments");
  414. ctx()->s_expr.push(std::move(e));
  415. }
  416. void Compiler::exprName(){
  417. Str name = prev().str();
  418. NameScope scope = name_scope();
  419. if(ctx()->global_names.count(name)){
  420. scope = NAME_GLOBAL;
  421. }
  422. ctx()->s_expr.push(make_expr<NameExpr>(name, scope));
  423. }
  424. void Compiler::exprAttrib() {
  425. consume(TK("@id"));
  426. ctx()->s_expr.push(
  427. make_expr<AttribExpr>(ctx()->s_expr.popx(), StrName::get(prev().sv()))
  428. );
  429. }
  430. void Compiler::exprSlice0() {
  431. auto slice = make_expr<SliceExpr>();
  432. if(is_expression()){ // :<stop>
  433. EXPR();
  434. slice->stop = ctx()->s_expr.popx();
  435. // try optional step
  436. if(match(TK(":"))){ // :<stop>:<step>
  437. EXPR();
  438. slice->step = ctx()->s_expr.popx();
  439. }
  440. }else if(match(TK(":"))){
  441. if(is_expression()){ // ::<step>
  442. EXPR();
  443. slice->step = ctx()->s_expr.popx();
  444. } // else ::
  445. } // else :
  446. ctx()->s_expr.push(std::move(slice));
  447. }
  448. void Compiler::exprSlice1() {
  449. auto slice = make_expr<SliceExpr>();
  450. slice->start = ctx()->s_expr.popx();
  451. if(is_expression()){ // <start>:<stop>
  452. EXPR();
  453. slice->stop = ctx()->s_expr.popx();
  454. // try optional step
  455. if(match(TK(":"))){ // <start>:<stop>:<step>
  456. EXPR();
  457. slice->step = ctx()->s_expr.popx();
  458. }
  459. }else if(match(TK(":"))){ // <start>::<step>
  460. EXPR();
  461. slice->step = ctx()->s_expr.popx();
  462. } // else <start>:
  463. ctx()->s_expr.push(std::move(slice));
  464. }
  465. void Compiler::exprSubscr() {
  466. auto e = make_expr<SubscrExpr>();
  467. match_newlines_repl();
  468. e->a = ctx()->s_expr.popx(); // a
  469. EXPR_TUPLE(true);
  470. e->b = ctx()->s_expr.popx(); // a[<expr>]
  471. match_newlines_repl();
  472. consume(TK("]"));
  473. ctx()->s_expr.push(std::move(e));
  474. }
  475. void Compiler::exprLiteral0() {
  476. ctx()->s_expr.push(make_expr<Literal0Expr>(prev().type));
  477. }
  478. void Compiler::compile_block_body(void (Compiler::*callback)()) {
  479. if(callback == nullptr) callback = &Compiler::compile_stmt;
  480. consume(TK(":"));
  481. if(curr().type!=TK("@eol") && curr().type!=TK("@eof")){
  482. compile_stmt(); // inline block
  483. return;
  484. }
  485. if(!match_newlines(mode()==REPL_MODE)){
  486. SyntaxError("expected a new line after ':'");
  487. }
  488. consume(TK("@indent"));
  489. while (curr().type != TK("@dedent")) {
  490. match_newlines();
  491. (this->*callback)();
  492. match_newlines();
  493. }
  494. consume(TK("@dedent"));
  495. }
  496. // import a [as b]
  497. // import a [as b], c [as d]
  498. void Compiler::compile_normal_import() {
  499. do {
  500. consume(TK("@id"));
  501. Str name = prev().str();
  502. ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(name.sv()), prev().line);
  503. if (match(TK("as"))) {
  504. consume(TK("@id"));
  505. name = prev().str();
  506. }
  507. ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
  508. } while (match(TK(",")));
  509. consume_end_stmt();
  510. }
  511. // from a import b [as c], d [as e]
  512. // from a.b import c [as d]
  513. // from . import a [as b]
  514. // from .a import b [as c]
  515. // from ..a import b [as c]
  516. // from .a.b import c [as d]
  517. // from xxx import *
  518. void Compiler::compile_from_import() {
  519. int dots = 0;
  520. while(true){
  521. switch(curr().type){
  522. case TK("."): dots+=1; break;
  523. case TK(".."): dots+=2; break;
  524. case TK("..."): dots+=3; break;
  525. default: goto __EAT_DOTS_END;
  526. }
  527. advance();
  528. }
  529. __EAT_DOTS_END:
  530. SStream ss;
  531. for(int i=0; i<dots; i++) ss << '.';
  532. if(dots > 0){
  533. // @id is optional if dots > 0
  534. if(match(TK("@id"))){
  535. ss << prev().sv();
  536. while (match(TK("."))) {
  537. consume(TK("@id"));
  538. ss << "." << prev().sv();
  539. }
  540. }
  541. }else{
  542. // @id is required if dots == 0
  543. consume(TK("@id"));
  544. ss << prev().sv();
  545. while (match(TK("."))) {
  546. consume(TK("@id"));
  547. ss << "." << prev().sv();
  548. }
  549. }
  550. ctx()->emit_(OP_IMPORT_PATH, ctx()->add_const_string(ss.str().sv()), prev().line);
  551. consume(TK("import"));
  552. if (match(TK("*"))) {
  553. if(name_scope() != NAME_GLOBAL) SyntaxError("from <module> import * can only be used in global scope");
  554. // pop the module and import __all__
  555. ctx()->emit_(OP_POP_IMPORT_STAR, BC_NOARG, prev().line);
  556. consume_end_stmt();
  557. return;
  558. }
  559. do {
  560. ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  561. consume(TK("@id"));
  562. Str name = prev().str();
  563. ctx()->emit_(OP_LOAD_ATTR, StrName(name).index, prev().line);
  564. if (match(TK("as"))) {
  565. consume(TK("@id"));
  566. name = prev().str();
  567. }
  568. ctx()->emit_store_name(name_scope(), StrName(name), prev().line);
  569. } while (match(TK(",")));
  570. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  571. consume_end_stmt();
  572. }
  573. bool Compiler::is_expression(bool allow_slice){
  574. PrattCallback prefix = rules[curr().type].prefix;
  575. return prefix != nullptr && (allow_slice || curr().type!=TK(":"));
  576. }
  577. void Compiler::parse_expression(int precedence, bool allow_slice) {
  578. PrattCallback prefix = rules[curr().type].prefix;
  579. if (prefix==nullptr || (curr().type==TK(":") && !allow_slice)){
  580. SyntaxError(Str("expected an expression, got ") + TK_STR(curr().type));
  581. }
  582. advance();
  583. (this->*prefix)();
  584. while (rules[curr().type].precedence >= precedence && (allow_slice || curr().type!=TK(":"))) {
  585. TokenIndex op = curr().type;
  586. advance();
  587. PrattCallback infix = rules[op].infix;
  588. PK_ASSERT(infix != nullptr);
  589. (this->*infix)();
  590. }
  591. }
  592. void Compiler::compile_if_stmt() {
  593. EXPR(); // condition
  594. ctx()->emit_expr();
  595. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  596. compile_block_body();
  597. if (match(TK("elif"))) {
  598. int exit_patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  599. ctx()->patch_jump(patch);
  600. compile_if_stmt();
  601. ctx()->patch_jump(exit_patch);
  602. } else if (match(TK("else"))) {
  603. int exit_patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, prev().line);
  604. ctx()->patch_jump(patch);
  605. compile_block_body();
  606. ctx()->patch_jump(exit_patch);
  607. } else {
  608. ctx()->patch_jump(patch);
  609. }
  610. }
  611. void Compiler::compile_while_loop() {
  612. CodeBlock* block = ctx()->enter_block(CodeBlockType::WHILE_LOOP);
  613. EXPR(); // condition
  614. ctx()->emit_expr();
  615. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, prev().line);
  616. compile_block_body();
  617. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true);
  618. ctx()->patch_jump(patch);
  619. ctx()->exit_block();
  620. // optional else clause
  621. if (match(TK("else"))) {
  622. compile_block_body();
  623. block->end2 = ctx()->co->codes.size();
  624. }
  625. }
  626. void Compiler::compile_for_loop() {
  627. Expr_ vars = EXPR_VARS();
  628. consume(TK("in"));
  629. EXPR_TUPLE(); ctx()->emit_expr();
  630. ctx()->emit_(OP_GET_ITER, BC_NOARG, BC_KEEPLINE);
  631. CodeBlock* block = ctx()->enter_block(CodeBlockType::FOR_LOOP);
  632. int for_codei = ctx()->emit_(OP_FOR_ITER, BC_NOARG, BC_KEEPLINE);
  633. bool ok = vars->emit_store(ctx());
  634. if(!ok) SyntaxError(); // this error occurs in `vars` instead of this line, but...nevermind
  635. ctx()->try_merge_for_iter_store(for_codei);
  636. compile_block_body();
  637. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), BC_KEEPLINE, true);
  638. ctx()->exit_block();
  639. // optional else clause
  640. if (match(TK("else"))) {
  641. compile_block_body();
  642. block->end2 = ctx()->co->codes.size();
  643. }
  644. }
  645. void Compiler::compile_try_except() {
  646. ctx()->enter_block(CodeBlockType::TRY_EXCEPT);
  647. compile_block_body();
  648. pod_vector<int> patches = {
  649. ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE)
  650. };
  651. ctx()->exit_block();
  652. int finally_entry = -1;
  653. if(curr().type != TK("finally")){
  654. do {
  655. StrName as_name;
  656. consume(TK("except"));
  657. if(is_expression()){
  658. EXPR(); // push assumed type on to the stack
  659. ctx()->emit_expr();
  660. ctx()->emit_(OP_EXCEPTION_MATCH, BC_NOARG, prev().line);
  661. if(match(TK("as"))){
  662. consume(TK("@id"));
  663. as_name = StrName(prev().sv());
  664. }
  665. }else{
  666. ctx()->emit_(OP_LOAD_TRUE, BC_NOARG, BC_KEEPLINE);
  667. }
  668. int patch = ctx()->emit_(OP_POP_JUMP_IF_FALSE, BC_NOARG, BC_KEEPLINE);
  669. // on match
  670. if(!as_name.empty()){
  671. ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  672. ctx()->emit_store_name(name_scope(), as_name, BC_KEEPLINE);
  673. }
  674. // pop the exception
  675. ctx()->emit_(OP_POP_EXCEPTION, BC_NOARG, BC_KEEPLINE);
  676. compile_block_body();
  677. patches.push_back(ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE));
  678. ctx()->patch_jump(patch);
  679. }while(curr().type == TK("except"));
  680. }
  681. if(match(TK("finally"))){
  682. int patch = ctx()->emit_(OP_JUMP_ABSOLUTE, BC_NOARG, BC_KEEPLINE);
  683. finally_entry = ctx()->co->codes.size();
  684. compile_block_body();
  685. ctx()->emit_(OP_JUMP_ABSOLUTE_TOP, BC_NOARG, BC_KEEPLINE);
  686. ctx()->patch_jump(patch);
  687. }
  688. // no match, re-raise
  689. if(finally_entry != -1){
  690. i64 target = ctx()->co->codes.size()+2;
  691. ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE);
  692. ctx()->emit_(OP_JUMP_ABSOLUTE, finally_entry, BC_KEEPLINE);
  693. }
  694. ctx()->emit_(OP_RE_RAISE, BC_NOARG, BC_KEEPLINE);
  695. // no exception or no match, jump to the end
  696. for (int patch : patches) ctx()->patch_jump(patch);
  697. if(finally_entry != -1){
  698. i64 target = ctx()->co->codes.size()+2;
  699. ctx()->emit_(OP_LOAD_CONST, ctx()->add_const(VAR(target)), BC_KEEPLINE);
  700. ctx()->emit_(OP_JUMP_ABSOLUTE, finally_entry, BC_KEEPLINE);
  701. }
  702. }
  703. void Compiler::compile_decorated(){
  704. Expr_vector decorators;
  705. do{
  706. EXPR();
  707. decorators.push_back(ctx()->s_expr.popx());
  708. if(!match_newlines_repl()) SyntaxError();
  709. }while(match(TK("@")));
  710. if(match(TK("class"))){
  711. compile_class(decorators);
  712. }else{
  713. consume(TK("def"));
  714. compile_function(decorators);
  715. }
  716. }
  717. bool Compiler::try_compile_assignment(){
  718. switch (curr().type) {
  719. case TK("+="): case TK("-="): case TK("*="): case TK("/="): case TK("//="): case TK("%="):
  720. case TK("<<="): case TK(">>="): case TK("&="): case TK("|="): case TK("^="): {
  721. Expr* lhs_p = ctx()->s_expr.top().get();
  722. if(lhs_p->is_starred()) SyntaxError();
  723. if(ctx()->is_compiling_class) SyntaxError("can't use inplace operator in class definition");
  724. advance();
  725. auto e = make_expr<BinaryExpr>();
  726. e->op = prev().type - 1; // -1 to remove =
  727. e->lhs = ctx()->s_expr.popx();
  728. EXPR_TUPLE();
  729. e->rhs = ctx()->s_expr.popx();
  730. if(e->is_starred()) SyntaxError();
  731. e->emit_(ctx());
  732. bool ok = lhs_p->emit_store(ctx());
  733. if(!ok) SyntaxError();
  734. } return true;
  735. case TK("="): {
  736. int n = 0;
  737. while(match(TK("="))){
  738. EXPR_TUPLE();
  739. n += 1;
  740. }
  741. // stack size is n+1
  742. Expr_ val = ctx()->s_expr.popx();
  743. val->emit_(ctx());
  744. for(int j=1; j<n; j++) ctx()->emit_(OP_DUP_TOP, BC_NOARG, BC_KEEPLINE);
  745. for(int j=0; j<n; j++){
  746. auto e = ctx()->s_expr.popx();
  747. if(e->is_starred()) SyntaxError();
  748. bool ok = e->emit_store(ctx());
  749. if(!ok) SyntaxError();
  750. }
  751. } return true;
  752. default: return false;
  753. }
  754. }
  755. void Compiler::compile_stmt() {
  756. if(match(TK("class"))){
  757. compile_class();
  758. return;
  759. }
  760. advance();
  761. int kw_line = prev().line; // backup line number
  762. int curr_loop_block = ctx()->get_loop();
  763. switch(prev().type){
  764. case TK("break"):
  765. if (curr_loop_block < 0) SyntaxError("'break' outside loop");
  766. ctx()->emit_(OP_LOOP_BREAK, curr_loop_block, kw_line);
  767. consume_end_stmt();
  768. break;
  769. case TK("continue"):
  770. if (curr_loop_block < 0) SyntaxError("'continue' not properly in loop");
  771. ctx()->emit_(OP_LOOP_CONTINUE, curr_loop_block, kw_line);
  772. consume_end_stmt();
  773. break;
  774. case TK("yield"):
  775. if (contexts.size() <= 1) SyntaxError("'yield' outside function");
  776. EXPR_TUPLE(); ctx()->emit_expr();
  777. ctx()->emit_(OP_YIELD_VALUE, BC_NOARG, kw_line);
  778. consume_end_stmt();
  779. break;
  780. case TK("yield from"):
  781. if (contexts.size() <= 1) SyntaxError("'yield from' outside function");
  782. EXPR_TUPLE(); ctx()->emit_expr();
  783. ctx()->emit_(OP_GET_ITER, BC_NOARG, kw_line);
  784. ctx()->enter_block(CodeBlockType::FOR_LOOP);
  785. ctx()->emit_(OP_FOR_ITER_YIELD_VALUE, BC_NOARG, kw_line);
  786. ctx()->emit_(OP_LOOP_CONTINUE, ctx()->get_loop(), kw_line);
  787. ctx()->exit_block();
  788. consume_end_stmt();
  789. break;
  790. case TK("return"):
  791. if (contexts.size() <= 1) SyntaxError("'return' outside function");
  792. if(match_end_stmt()){
  793. ctx()->emit_(OP_RETURN_VALUE, 1, kw_line);
  794. }else{
  795. EXPR_TUPLE(); ctx()->emit_expr();
  796. consume_end_stmt();
  797. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, kw_line);
  798. }
  799. break;
  800. /*************************************************/
  801. case TK("if"): compile_if_stmt(); break;
  802. case TK("while"): compile_while_loop(); break;
  803. case TK("for"): compile_for_loop(); break;
  804. case TK("import"): compile_normal_import(); break;
  805. case TK("from"): compile_from_import(); break;
  806. case TK("def"): compile_function(); break;
  807. case TK("@"): compile_decorated(); break;
  808. case TK("try"): compile_try_except(); break;
  809. case TK("pass"): consume_end_stmt(); break;
  810. /*************************************************/
  811. case TK("++"):{
  812. consume(TK("@id"));
  813. StrName name(prev().sv());
  814. NameScope scope = name_scope();
  815. bool is_global = ctx()->global_names.count(name.sv());
  816. if(is_global) scope = NAME_GLOBAL;
  817. switch(scope){
  818. case NAME_LOCAL:
  819. ctx()->emit_(OP_INC_FAST, ctx()->add_varname(name), prev().line);
  820. break;
  821. case NAME_GLOBAL:
  822. ctx()->emit_(OP_INC_GLOBAL, name.index, prev().line);
  823. break;
  824. default: SyntaxError(); break;
  825. }
  826. consume_end_stmt();
  827. break;
  828. }
  829. case TK("--"):{
  830. consume(TK("@id"));
  831. StrName name(prev().sv());
  832. switch(name_scope()){
  833. case NAME_LOCAL:
  834. ctx()->emit_(OP_DEC_FAST, ctx()->add_varname(name), prev().line);
  835. break;
  836. case NAME_GLOBAL:
  837. ctx()->emit_(OP_DEC_GLOBAL, name.index, prev().line);
  838. break;
  839. default: SyntaxError(); break;
  840. }
  841. consume_end_stmt();
  842. break;
  843. }
  844. case TK("assert"):{
  845. EXPR(); // condition
  846. ctx()->emit_expr();
  847. int index = ctx()->emit_(OP_POP_JUMP_IF_TRUE, BC_NOARG, kw_line);
  848. int has_msg = 0;
  849. if(match(TK(","))){
  850. EXPR(); // message
  851. ctx()->emit_expr();
  852. has_msg = 1;
  853. }
  854. ctx()->emit_(OP_RAISE_ASSERT, has_msg, kw_line);
  855. ctx()->patch_jump(index);
  856. consume_end_stmt();
  857. break;
  858. }
  859. case TK("global"):
  860. do {
  861. consume(TK("@id"));
  862. ctx()->global_names.insert(prev().str());
  863. } while (match(TK(",")));
  864. consume_end_stmt();
  865. break;
  866. case TK("raise"): {
  867. EXPR(); ctx()->emit_expr();
  868. ctx()->emit_(OP_RAISE, BC_NOARG, kw_line);
  869. consume_end_stmt();
  870. } break;
  871. case TK("del"): {
  872. EXPR_TUPLE();
  873. Expr_ e = ctx()->s_expr.popx();
  874. bool ok = e->emit_del(ctx());
  875. if(!ok) SyntaxError();
  876. consume_end_stmt();
  877. } break;
  878. case TK("with"): {
  879. EXPR(); // [ <expr> ]
  880. ctx()->emit_expr();
  881. ctx()->enter_block(CodeBlockType::CONTEXT_MANAGER);
  882. Expr_ as_name;
  883. if(match(TK("as"))){
  884. consume(TK("@id"));
  885. as_name = make_expr<NameExpr>(prev().str(), name_scope());
  886. }
  887. ctx()->emit_(OP_WITH_ENTER, BC_NOARG, prev().line);
  888. // [ <expr> <expr>.__enter__() ]
  889. if(as_name != nullptr){
  890. bool ok = as_name->emit_store(ctx());
  891. if(!ok) SyntaxError();
  892. }else{
  893. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  894. }
  895. compile_block_body();
  896. ctx()->emit_(OP_WITH_EXIT, BC_NOARG, prev().line);
  897. ctx()->exit_block();
  898. } break;
  899. /*************************************************/
  900. case TK("=="): {
  901. consume(TK("@id"));
  902. if(mode()!=EXEC_MODE) SyntaxError("'label' is only available in EXEC_MODE");
  903. bool ok = ctx()->add_label(prev().str());
  904. consume(TK("=="));
  905. if(!ok) SyntaxError("label " + prev().str().escape() + " already exists");
  906. consume_end_stmt();
  907. } break;
  908. case TK("->"):
  909. consume(TK("@id"));
  910. if(mode()!=EXEC_MODE) SyntaxError("'goto' is only available in EXEC_MODE");
  911. ctx()->emit_(OP_GOTO, StrName(prev().sv()).index, prev().line);
  912. consume_end_stmt();
  913. break;
  914. /*************************************************/
  915. // handle dangling expression or assignment
  916. default: {
  917. advance(-1); // do revert since we have pre-called advance() at the beginning
  918. EXPR_TUPLE();
  919. bool is_typed_name = false; // e.g. x: int
  920. // eat variable's type hint if it is a single name
  921. if(ctx()->s_expr.top()->is_name()){
  922. if(match(TK(":"))){
  923. consume_type_hints();
  924. is_typed_name = true;
  925. if(ctx()->is_compiling_class){
  926. NameExpr* ne = static_cast<NameExpr*>(ctx()->s_expr.top().get());
  927. ctx()->emit_(OP_ADD_CLASS_ANNOTATION, ne->name.index, BC_KEEPLINE);
  928. }
  929. }
  930. }
  931. if(!try_compile_assignment()){
  932. if(!ctx()->s_expr.empty() && ctx()->s_expr.top()->is_starred()){
  933. SyntaxError();
  934. }
  935. if(!is_typed_name){
  936. ctx()->emit_expr();
  937. if((mode()==CELL_MODE || mode()==REPL_MODE) && name_scope()==NAME_GLOBAL){
  938. ctx()->emit_(OP_PRINT_EXPR, BC_NOARG, BC_KEEPLINE);
  939. }else{
  940. ctx()->emit_(OP_POP_TOP, BC_NOARG, BC_KEEPLINE);
  941. }
  942. }else{
  943. PK_ASSERT(ctx()->s_expr.size() == 1)
  944. ctx()->s_expr.pop();
  945. }
  946. }
  947. consume_end_stmt();
  948. }
  949. }
  950. }
  951. void Compiler::consume_type_hints(){
  952. EXPR();
  953. ctx()->s_expr.pop();
  954. }
  955. void Compiler::_add_decorators(const Expr_vector& decorators){
  956. // [obj]
  957. for(auto it=decorators.rbegin(); it!=decorators.rend(); ++it){
  958. (*it)->emit_(ctx()); // [obj, f]
  959. ctx()->emit_(OP_ROT_TWO, BC_NOARG, (*it)->line); // [f, obj]
  960. ctx()->emit_(OP_LOAD_NULL, BC_NOARG, BC_KEEPLINE); // [f, obj, NULL]
  961. ctx()->emit_(OP_ROT_TWO, BC_NOARG, BC_KEEPLINE); // [obj, NULL, f]
  962. ctx()->emit_(OP_CALL, 1, (*it)->line); // [obj]
  963. }
  964. }
  965. void Compiler::compile_class(const Expr_vector& decorators){
  966. consume(TK("@id"));
  967. int namei = StrName(prev().sv()).index;
  968. Expr_ base = nullptr;
  969. if(match(TK("("))){
  970. if(is_expression()){
  971. EXPR();
  972. base = ctx()->s_expr.popx();
  973. }
  974. consume(TK(")"));
  975. }
  976. if(base == nullptr){
  977. ctx()->emit_(OP_LOAD_NONE, BC_NOARG, prev().line);
  978. }else {
  979. base->emit_(ctx());
  980. }
  981. ctx()->emit_(OP_BEGIN_CLASS, namei, BC_KEEPLINE);
  982. for(auto& c: this->contexts.container()){
  983. if(c.is_compiling_class){
  984. SyntaxError("nested class is not allowed");
  985. }
  986. }
  987. ctx()->is_compiling_class = true;
  988. compile_block_body();
  989. ctx()->is_compiling_class = false;
  990. if(!decorators.empty()){
  991. ctx()->emit_(OP_BEGIN_CLASS_DECORATION, BC_NOARG, BC_KEEPLINE);
  992. _add_decorators(decorators);
  993. ctx()->emit_(OP_END_CLASS_DECORATION, BC_NOARG, BC_KEEPLINE);
  994. }
  995. ctx()->emit_(OP_END_CLASS, namei, BC_KEEPLINE);
  996. }
  997. void Compiler::_compile_f_args(FuncDecl_ decl, bool enable_type_hints){
  998. int state = 0; // 0 for args, 1 for *args, 2 for k=v, 3 for **kwargs
  999. do {
  1000. if(state > 3) SyntaxError();
  1001. if(state == 3) SyntaxError("**kwargs should be the last argument");
  1002. match_newlines();
  1003. if(match(TK("*"))){
  1004. if(state < 1) state = 1;
  1005. else SyntaxError("*args should be placed before **kwargs");
  1006. }
  1007. else if(match(TK("**"))){
  1008. state = 3;
  1009. }
  1010. consume(TK("@id"));
  1011. StrName name = prev().str();
  1012. // check duplicate argument name
  1013. for(int j: decl->args){
  1014. if(decl->code->varnames[j] == name) {
  1015. SyntaxError("duplicate argument name");
  1016. }
  1017. }
  1018. for(auto& kv: decl->kwargs){
  1019. if(decl->code->varnames[kv.index] == name){
  1020. SyntaxError("duplicate argument name");
  1021. }
  1022. }
  1023. if(decl->starred_arg!=-1 && decl->code->varnames[decl->starred_arg] == name){
  1024. SyntaxError("duplicate argument name");
  1025. }
  1026. if(decl->starred_kwarg!=-1 && decl->code->varnames[decl->starred_kwarg] == name){
  1027. SyntaxError("duplicate argument name");
  1028. }
  1029. // eat type hints
  1030. if(enable_type_hints && match(TK(":"))) consume_type_hints();
  1031. if(state == 0 && curr().type == TK("=")) state = 2;
  1032. int index = ctx()->add_varname(name);
  1033. switch (state)
  1034. {
  1035. case 0:
  1036. decl->args.push_back(index);
  1037. break;
  1038. case 1:
  1039. decl->starred_arg = index;
  1040. state+=1;
  1041. break;
  1042. case 2: {
  1043. consume(TK("="));
  1044. PyObject* value = read_literal();
  1045. if(value == nullptr){
  1046. SyntaxError(Str("default argument must be a literal"));
  1047. }
  1048. decl->add_kwarg(index, name, value);
  1049. } break;
  1050. case 3:
  1051. decl->starred_kwarg = index;
  1052. state+=1;
  1053. break;
  1054. }
  1055. } while (match(TK(",")));
  1056. }
  1057. void Compiler::compile_function(const Expr_vector& decorators){
  1058. consume(TK("@id"));
  1059. Str decl_name = prev().str();
  1060. FuncDecl_ decl = push_f_context(decl_name);
  1061. consume(TK("("));
  1062. if (!match(TK(")"))) {
  1063. _compile_f_args(decl, true);
  1064. consume(TK(")"));
  1065. }
  1066. if(match(TK("->"))) consume_type_hints();
  1067. compile_block_body();
  1068. pop_context();
  1069. decl->docstring = nullptr;
  1070. if(decl->code->codes.size()>=2 && decl->code->codes[0].op == OP_LOAD_CONST && decl->code->codes[1].op == OP_POP_TOP){
  1071. PyObject* c = decl->code->consts[decl->code->codes[0].arg];
  1072. if(is_type(c, vm->tp_str)){
  1073. decl->code->codes[0].op = OP_NO_OP;
  1074. decl->code->codes[1].op = OP_NO_OP;
  1075. decl->docstring = PK_OBJ_GET(Str, c).c_str();
  1076. }
  1077. }
  1078. ctx()->emit_(OP_LOAD_FUNCTION, ctx()->add_func_decl(decl), prev().line);
  1079. _add_decorators(decorators);
  1080. if(!ctx()->is_compiling_class){
  1081. auto e = make_expr<NameExpr>(decl_name, name_scope());
  1082. e->emit_store(ctx());
  1083. }else{
  1084. int index = StrName(decl_name).index;
  1085. ctx()->emit_(OP_STORE_CLASS_ATTR, index, prev().line);
  1086. }
  1087. }
  1088. PyObject* Compiler::to_object(const TokenValue& value){
  1089. PyObject* obj = nullptr;
  1090. if(std::holds_alternative<i64>(value)){
  1091. obj = VAR(std::get<i64>(value));
  1092. }
  1093. if(std::holds_alternative<f64>(value)){
  1094. obj = VAR(std::get<f64>(value));
  1095. }
  1096. if(std::holds_alternative<Str>(value)){
  1097. obj = VAR(std::get<Str>(value));
  1098. }
  1099. PK_ASSERT(obj != nullptr)
  1100. return obj;
  1101. }
  1102. PyObject* Compiler::read_literal(){
  1103. advance();
  1104. switch(prev().type){
  1105. case TK("-"): {
  1106. consume(TK("@num"));
  1107. PyObject* val = to_object(prev().value);
  1108. return vm->py_negate(val);
  1109. }
  1110. case TK("@num"): return to_object(prev().value);
  1111. case TK("@str"): return to_object(prev().value);
  1112. case TK("True"): return VAR(true);
  1113. case TK("False"): return VAR(false);
  1114. case TK("None"): return vm->None;
  1115. case TK("..."): return vm->Ellipsis;
  1116. case TK("("): {
  1117. List cpnts;
  1118. while(true) {
  1119. cpnts.push_back(read_literal());
  1120. if(curr().type == TK(")")) break;
  1121. consume(TK(","));
  1122. if(curr().type == TK(")")) break;
  1123. }
  1124. consume(TK(")"));
  1125. return VAR(Tuple(std::move(cpnts)));
  1126. }
  1127. default: break;
  1128. }
  1129. return nullptr;
  1130. }
  1131. Compiler::Compiler(VM* vm, std::string_view source, const Str& filename, CompileMode mode, bool unknown_global_scope)
  1132. :lexer(vm, std::make_shared<SourceData>(source, filename, mode)){
  1133. this->vm = vm;
  1134. this->unknown_global_scope = unknown_global_scope;
  1135. init_pratt_rules();
  1136. }
  1137. Str Compiler::precompile(){
  1138. auto tokens = lexer.run();
  1139. SStream ss;
  1140. ss << "pkpy:" PK_VERSION << '\n'; // L1: version string
  1141. ss << (int)mode() << '\n'; // L2: mode
  1142. std::map<std::string_view, int> token_indices;
  1143. for(auto token: tokens){
  1144. if(is_raw_string_used(token.type)){
  1145. auto it = token_indices.find(token.sv());
  1146. if(it == token_indices.end()){
  1147. token_indices[token.sv()] = 0;
  1148. // assert no '\n' in token.sv()
  1149. for(char c: token.sv()) if(c=='\n') PK_FATAL_ERROR();
  1150. }
  1151. }
  1152. }
  1153. ss << "=" << (int)token_indices.size() << '\n'; // L3: raw string count
  1154. int index = 0;
  1155. for(auto& kv: token_indices){
  1156. ss << kv.first << '\n'; // L4: raw strings
  1157. kv.second = index++;
  1158. }
  1159. ss << "=" << (int)tokens.size() << '\n'; // L5: token count
  1160. for(int i=0; i<tokens.size(); i++){
  1161. const Token& token = tokens[i];
  1162. ss << (int)token.type << ',';
  1163. if(is_raw_string_used(token.type)){
  1164. ss << token_indices[token.sv()] << ',';
  1165. }
  1166. if(i>0 && tokens[i-1].line == token.line) ss << ',';
  1167. else ss << token.line << ',';
  1168. if(i>0 && tokens[i-1].brackets_level == token.brackets_level) ss << ',';
  1169. else ss << token.brackets_level << ',';
  1170. // visit token value
  1171. std::visit([&ss](auto&& arg){
  1172. using T = std::decay_t<decltype(arg)>;
  1173. if constexpr(std::is_same_v<T, i64>){
  1174. ss << 'I' << arg;
  1175. }else if constexpr(std::is_same_v<T, f64>){
  1176. ss << 'F' << arg;
  1177. }else if constexpr(std::is_same_v<T, Str>){
  1178. ss << 'S';
  1179. for(char c: arg) ss.write_hex((unsigned char)c);
  1180. }
  1181. ss << '\n';
  1182. }, token.value);
  1183. }
  1184. return ss.str();
  1185. }
  1186. void Compiler::from_precompiled(const char* source){
  1187. TokenDeserializer deserializer(source);
  1188. deserializer.curr += 5; // skip "pkpy:"
  1189. std::string_view version = deserializer.read_string('\n');
  1190. if(version != PK_VERSION){
  1191. SyntaxError(_S("precompiled version mismatch: ", version, "!=" PK_VERSION));
  1192. }
  1193. if(deserializer.read_uint('\n') != (i64)mode()){
  1194. SyntaxError("precompiled mode mismatch");
  1195. }
  1196. int count = deserializer.read_count();
  1197. std::vector<Str>& precompiled_tokens = lexer.src->_precompiled_tokens;
  1198. for(int i=0; i<count; i++){
  1199. precompiled_tokens.push_back(deserializer.read_string('\n'));
  1200. }
  1201. count = deserializer.read_count();
  1202. for(int i=0; i<count; i++){
  1203. Token t;
  1204. t.type = (unsigned char)deserializer.read_uint(',');
  1205. if(is_raw_string_used(t.type)){
  1206. i64 index = deserializer.read_uint(',');
  1207. t.start = precompiled_tokens[index].c_str();
  1208. t.length = precompiled_tokens[index].size;
  1209. }else{
  1210. t.start = nullptr;
  1211. t.length = 0;
  1212. }
  1213. if(deserializer.match_char(',')){
  1214. t.line = tokens.back().line;
  1215. }else{
  1216. t.line = (int)deserializer.read_uint(',');
  1217. }
  1218. if(deserializer.match_char(',')){
  1219. t.brackets_level = tokens.back().brackets_level;
  1220. }else{
  1221. t.brackets_level = (int)deserializer.read_uint(',');
  1222. }
  1223. char type = deserializer.read_char();
  1224. switch(type){
  1225. case 'I': t.value = deserializer.read_uint('\n'); break;
  1226. case 'F': t.value = deserializer.read_float('\n'); break;
  1227. case 'S': t.value = deserializer.read_string_from_hex('\n'); break;
  1228. default: t.value = {}; break;
  1229. }
  1230. tokens.push_back(t);
  1231. }
  1232. }
  1233. CodeObject_ Compiler::compile(){
  1234. PK_ASSERT(i == 0) // make sure it is the first time to compile
  1235. if(lexer.src->is_precompiled){
  1236. from_precompiled(lexer.src->source.c_str());
  1237. }else{
  1238. this->tokens = lexer.run();
  1239. }
  1240. CodeObject_ code = push_global_context();
  1241. advance(); // skip @sof, so prev() is always valid
  1242. match_newlines(); // skip possible leading '\n'
  1243. if(mode()==EVAL_MODE) {
  1244. EXPR_TUPLE(); ctx()->emit_expr();
  1245. consume(TK("@eof"));
  1246. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  1247. pop_context();
  1248. return code;
  1249. }else if(mode()==JSON_MODE){
  1250. EXPR();
  1251. Expr_ e = ctx()->s_expr.popx();
  1252. if(!e->is_json_object()) SyntaxError("expect a JSON object, literal or array");
  1253. consume(TK("@eof"));
  1254. e->emit_(ctx());
  1255. ctx()->emit_(OP_RETURN_VALUE, BC_NOARG, BC_KEEPLINE);
  1256. pop_context();
  1257. return code;
  1258. }
  1259. while (!match(TK("@eof"))) {
  1260. compile_stmt();
  1261. match_newlines();
  1262. }
  1263. pop_context();
  1264. return code;
  1265. }
  1266. // TODO: refactor this
  1267. void Lexer::throw_err(StrName type, Str msg, int lineno, const char* cursor){
  1268. PyObject* e_obj = vm->call(vm->builtins->attr(type), VAR(msg));
  1269. Exception& e = PK_OBJ_GET(Exception, e_obj);
  1270. e.st_push(src, lineno, cursor, "");
  1271. throw e;
  1272. }
  1273. std::string_view TokenDeserializer::read_string(char c){
  1274. const char* start = curr;
  1275. while(*curr != c) curr++;
  1276. std::string_view retval(start, curr-start);
  1277. curr++; // skip the delimiter
  1278. return retval;
  1279. }
  1280. Str TokenDeserializer::read_string_from_hex(char c){
  1281. std::string_view s = read_string(c);
  1282. char* buffer = (char*)pool64_alloc(s.size()/2 + 1);
  1283. for(int i=0; i<s.size(); i+=2){
  1284. char c = 0;
  1285. if(s[i]>='0' && s[i]<='9') c += s[i]-'0';
  1286. else if(s[i]>='a' && s[i]<='f') c += s[i]-'a'+10;
  1287. else PK_FATAL_ERROR();
  1288. c <<= 4;
  1289. if(s[i+1]>='0' && s[i+1]<='9') c += s[i+1]-'0';
  1290. else if(s[i+1]>='a' && s[i+1]<='f') c += s[i+1]-'a'+10;
  1291. else PK_FATAL_ERROR();
  1292. buffer[i/2] = c;
  1293. }
  1294. buffer[s.size()/2] = 0;
  1295. return std::pair<char*, int>(buffer, s.size()/2);
  1296. }
  1297. int TokenDeserializer::read_count(){
  1298. PK_ASSERT(*curr == '=')
  1299. curr++;
  1300. return read_uint('\n');
  1301. }
  1302. i64 TokenDeserializer::read_uint(char c){
  1303. i64 out = 0;
  1304. while(*curr != c){
  1305. out = out*10 + (*curr-'0');
  1306. curr++;
  1307. }
  1308. curr++; // skip the delimiter
  1309. return out;
  1310. }
  1311. f64 TokenDeserializer::read_float(char c){
  1312. std::string_view sv = read_string(c);
  1313. return std::stod(std::string(sv));
  1314. }
  1315. } // namespace pkpy