lexer.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726
  1. #include "pocketpy/common/sstream.h"
  2. #include "pocketpy/common/vector.h"
  3. #include "pocketpy/compiler/lexer.h"
  4. #include "pocketpy/objects/sourcedata.h"
  5. #include <ctype.h>
  6. #define is_raw_string_used(t) ((t) == TK_ID)
  7. typedef struct Lexer {
  8. SourceData_ src;
  9. const char* token_start;
  10. const char* curr_char;
  11. int current_line;
  12. int brackets_level;
  13. c11_vector /*T=Token*/ nexts;
  14. c11_vector /*T=int*/ indents;
  15. } Lexer;
  16. const static TokenValue EmptyTokenValue;
  17. static Error* lex_one_token(Lexer* self, bool* eof, bool is_fstring);
  18. static void Lexer__ctor(Lexer* self, SourceData_ src) {
  19. PK_INCREF(src);
  20. self->src = src;
  21. self->curr_char = self->token_start = src->source->data;
  22. self->current_line = 1;
  23. self->brackets_level = 0;
  24. c11_vector__ctor(&self->nexts, sizeof(Token));
  25. c11_vector__ctor(&self->indents, sizeof(int));
  26. }
  27. static void Lexer__dtor(Lexer* self) {
  28. PK_DECREF(self->src);
  29. c11_vector__dtor(&self->nexts);
  30. c11_vector__dtor(&self->indents);
  31. }
  32. static char eatchar(Lexer* self) {
  33. char c = *self->curr_char;
  34. assert(c != '\n'); // eatchar() cannot consume a newline
  35. self->curr_char++;
  36. return c;
  37. }
  38. static char eatchar_include_newline(Lexer* self) {
  39. char c = *self->curr_char;
  40. self->curr_char++;
  41. if(c == '\n') {
  42. self->current_line++;
  43. c11_vector__push(const char*, &self->src->line_starts, self->curr_char);
  44. }
  45. return c;
  46. }
  47. static int eat_spaces(Lexer* self) {
  48. int count = 0;
  49. while(true) {
  50. switch(*self->curr_char) {
  51. case ' ': count += 1; break;
  52. case '\t': count += 4; break;
  53. default: return count;
  54. }
  55. eatchar(self);
  56. }
  57. }
  58. static bool matchchar(Lexer* self, char c) {
  59. if(*self->curr_char != c) return false;
  60. eatchar_include_newline(self);
  61. return true;
  62. }
  63. static bool match_n_chars(Lexer* self, int n, char c0) {
  64. const char* c = self->curr_char;
  65. for(int i = 0; i < n; i++) {
  66. if(*c == '\0') return false;
  67. if(*c != c0) return false;
  68. c++;
  69. }
  70. for(int i = 0; i < n; i++)
  71. eatchar_include_newline(self);
  72. return true;
  73. }
  74. static void skip_line_comment(Lexer* self) {
  75. while(*self->curr_char) {
  76. if(*self->curr_char == '\n') return;
  77. eatchar(self);
  78. }
  79. }
  80. static void add_token_with_value(Lexer* self, TokenIndex type, TokenValue value) {
  81. switch(type) {
  82. case TK_LBRACE:
  83. case TK_LBRACKET:
  84. case TK_LPAREN: self->brackets_level++; break;
  85. case TK_RPAREN:
  86. case TK_RBRACKET:
  87. case TK_RBRACE: self->brackets_level--; break;
  88. default: break;
  89. }
  90. Token token = {type,
  91. self->token_start,
  92. (int)(self->curr_char - self->token_start),
  93. self->current_line - ((type == TK_EOL) ? 1 : 0),
  94. self->brackets_level,
  95. value};
  96. // handle "not in", "is not", "yield from"
  97. if(self->nexts.length > 0) {
  98. Token* back = &c11_vector__back(Token, &self->nexts);
  99. if(back->type == TK_NOT_KW && type == TK_IN) {
  100. back->type = TK_NOT_IN;
  101. return;
  102. }
  103. if(back->type == TK_IS && type == TK_NOT_KW) {
  104. back->type = TK_IS_NOT;
  105. return;
  106. }
  107. if(back->type == TK_YIELD && type == TK_FROM) {
  108. back->type = TK_YIELD_FROM;
  109. return;
  110. }
  111. c11_vector__push(Token, &self->nexts, token);
  112. }
  113. }
  114. static void add_token(Lexer* self, TokenIndex type) {
  115. add_token_with_value(self, type, EmptyTokenValue);
  116. }
  117. static void add_token_2(Lexer* self, char c, TokenIndex one, TokenIndex two) {
  118. if(matchchar(self, c))
  119. add_token(self, two);
  120. else
  121. add_token(self, one);
  122. }
  123. static bool eat_indentation(Lexer* self) {
  124. if(self->brackets_level > 0) return true;
  125. int spaces = eat_spaces(self);
  126. if(*self->curr_char == '#') skip_line_comment(self);
  127. if(*self->curr_char == '\0' || *self->curr_char == '\n') { return true; }
  128. // https://docs.python.org/3/reference/lexical_analysis.html#indentation
  129. int indents_back = c11_vector__back(int, &self->indents);
  130. if(spaces > indents_back) {
  131. c11_vector__push(int, &self->indents, spaces);
  132. Token t = {TK_INDENT,
  133. self->token_start,
  134. 0,
  135. self->current_line,
  136. self->brackets_level,
  137. EmptyTokenValue};
  138. c11_vector__push(Token, &self->nexts, t);
  139. } else if(spaces < indents_back) {
  140. do {
  141. c11_vector__pop(&self->indents);
  142. Token t = {TK_DEDENT,
  143. self->token_start,
  144. 0,
  145. self->current_line,
  146. self->brackets_level,
  147. EmptyTokenValue};
  148. c11_vector__push(Token, &self->nexts, t);
  149. indents_back = c11_vector__back(int, &self->indents);
  150. } while(spaces < indents_back);
  151. if(spaces != indents_back) { return false; }
  152. }
  153. return true;
  154. }
  155. static bool is_possible_number_char(char c) {
  156. switch(c) {
  157. // clang-format off
  158. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
  159. case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
  160. case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
  161. case '.': case 'x': case 'o': case 'j':
  162. return true;
  163. default: return false;
  164. // clang-format on
  165. }
  166. }
  167. /******************************/
  168. static Error* LexerError(Lexer* self, const char* fmt, ...) {
  169. Error* err = PK_MALLOC(sizeof(Error));
  170. err->src = self->src;
  171. PK_INCREF(self->src);
  172. err->lineno = self->current_line;
  173. if(*self->curr_char == '\n') { err->lineno--; }
  174. va_list args;
  175. va_start(args, fmt);
  176. vsnprintf(err->msg, sizeof(err->msg), fmt, args);
  177. va_end(args);
  178. return err;
  179. }
  180. static Error* eat_name(Lexer* self) {
  181. self->curr_char--;
  182. while(true) {
  183. unsigned char c = *self->curr_char;
  184. int u8bytes = c11__u8_header(c, true);
  185. if(u8bytes == 0) return LexerError(self, "invalid char: %c", c);
  186. if(u8bytes == 1) {
  187. if(isalnum(c) || c == '_') {
  188. self->curr_char++;
  189. continue;
  190. } else {
  191. break;
  192. }
  193. }
  194. int value = c11__u8_value(u8bytes, self->curr_char);
  195. if(c11__is_unicode_Lo_char(value)) {
  196. self->curr_char += u8bytes;
  197. } else {
  198. break;
  199. }
  200. }
  201. int length = (int)(self->curr_char - self->token_start);
  202. if(length == 0) return LexerError(self, "@id contains invalid char");
  203. c11_sv name = {self->token_start, length};
  204. const char** KW_BEGIN = TokenSymbols + TK_FALSE;
  205. int KW_COUNT = TK__COUNT__ - TK_FALSE;
  206. #define less(a, b) (c11_sv__cmp2(b, a) > 0)
  207. int out;
  208. c11__lower_bound(const char*, KW_BEGIN, KW_COUNT, name, less, &out);
  209. #undef less
  210. if(out != KW_COUNT && c11__sveq2(name, KW_BEGIN[out])) {
  211. add_token(self, (TokenIndex)(out + TK_FALSE));
  212. } else {
  213. add_token(self, TK_ID);
  214. }
  215. return NULL;
  216. }
  217. enum StringType { NORMAL_STRING, RAW_STRING, F_STRING, NORMAL_BYTES };
  218. static Error* _eat_string(Lexer* self, c11_sbuf* buff, char quote, enum StringType type) {
  219. bool is_raw = type == RAW_STRING;
  220. bool is_fstring = type == F_STRING;
  221. if(is_fstring) { add_token(self, TK_FSTR_BEGIN); }
  222. // previous char is quote
  223. bool quote3 = match_n_chars(self, 2, quote);
  224. while(true) {
  225. char c = eatchar_include_newline(self);
  226. if(c == quote) {
  227. if(quote3 && !match_n_chars(self, 2, quote)) {
  228. c11_sbuf__write_char(buff, c);
  229. continue;
  230. }
  231. // end of string
  232. break;
  233. }
  234. if(c == '\0') { return LexerError(self, "EOL while scanning string literal"); }
  235. if(c == '\n') {
  236. if(!quote3)
  237. return LexerError(self, "EOL while scanning string literal");
  238. else {
  239. c11_sbuf__write_char(buff, c);
  240. continue;
  241. }
  242. }
  243. if(!is_raw && c == '\\') {
  244. switch(eatchar_include_newline(self)) {
  245. case '"': c11_sbuf__write_char(buff, '"'); break;
  246. case '\'': c11_sbuf__write_char(buff, '\''); break;
  247. case '\\': c11_sbuf__write_char(buff, '\\'); break;
  248. case 'n': c11_sbuf__write_char(buff, '\n'); break;
  249. case 'r': c11_sbuf__write_char(buff, '\r'); break;
  250. case 't': c11_sbuf__write_char(buff, '\t'); break;
  251. case 'b': c11_sbuf__write_char(buff, '\b'); break;
  252. case 'x': {
  253. char hex[3] = {eatchar(self), eatchar(self), '\0'};
  254. int code;
  255. if(sscanf(hex, "%x", &code) != 1 || code > 0xFF) {
  256. return LexerError(self, "invalid hex escape");
  257. }
  258. if(type == NORMAL_BYTES) {
  259. // Bytes literals: write raw byte
  260. c11_sbuf__write_char(buff, (char)code);
  261. } else {
  262. // Regular strings: encode as UTF-8
  263. if(code <= 0x7F) {
  264. c11_sbuf__write_char(buff, (char)code);
  265. } else {
  266. // Encode as 2-byte UTF-8 for code points 0x80-0xFF
  267. c11_sbuf__write_char(buff, 0xC0 | (code >> 6)); // Leading byte
  268. c11_sbuf__write_char(buff, 0x80 | (code & 0x3F)); // Continuation byte
  269. }
  270. }
  271. } break;
  272. default: return LexerError(self, "invalid escape char");
  273. }
  274. } else {
  275. if(is_fstring) {
  276. if(c == '{') {
  277. if(matchchar(self, '{')) {
  278. // '{{' -> '{'
  279. c11_sbuf__write_char(buff, '{');
  280. } else {
  281. // submit previous string
  282. c11_string* res = c11_sbuf__submit(buff);
  283. if(res->size > 0) {
  284. TokenValue value = {TokenValue_STR, ._str = res};
  285. add_token_with_value(self, TK_FSTR_CPNT, value);
  286. } else {
  287. c11_string__delete(res);
  288. }
  289. c11_sbuf__ctor(buff); // re-init buffer
  290. // submit {expr} tokens
  291. bool eof = false;
  292. int token_count = self->nexts.length;
  293. while(!eof) {
  294. Error* err = lex_one_token(self, &eof, true);
  295. if(err) return err;
  296. }
  297. if(self->nexts.length == token_count) {
  298. // f'{}' is not allowed
  299. return LexerError(self, "f-string: empty expression not allowed");
  300. }
  301. }
  302. } else if(c == '}') {
  303. if(matchchar(self, '}')) {
  304. // '}}' -> '}'
  305. c11_sbuf__write_char(buff, '}');
  306. } else {
  307. return LexerError(self, "f-string: single '}' is not allowed");
  308. }
  309. } else {
  310. c11_sbuf__write_char(buff, c);
  311. }
  312. } else {
  313. c11_sbuf__write_char(buff, c);
  314. }
  315. }
  316. }
  317. c11_string* res = c11_sbuf__submit(buff);
  318. TokenValue value = {TokenValue_STR, ._str = res};
  319. if(is_fstring) {
  320. if(res->size > 0) {
  321. add_token_with_value(self, TK_FSTR_CPNT, value);
  322. } else {
  323. c11_string__delete(res);
  324. }
  325. add_token(self, TK_FSTR_END);
  326. return NULL;
  327. }
  328. if(type == NORMAL_BYTES) {
  329. add_token_with_value(self, TK_BYTES, value);
  330. } else {
  331. add_token_with_value(self, TK_STR, value);
  332. }
  333. return NULL;
  334. }
  335. static Error* eat_string(Lexer* self, char quote, enum StringType type) {
  336. c11_sbuf buff;
  337. c11_sbuf__ctor(&buff);
  338. Error* err = _eat_string(self, &buff, quote, type);
  339. c11_sbuf__dtor(&buff);
  340. return err;
  341. }
  342. static Error* eat_number(Lexer* self) {
  343. const char* i = self->token_start;
  344. while(is_possible_number_char(*i))
  345. i++;
  346. bool is_scientific_notation = false;
  347. if(*(i - 1) == 'e' && (*i == '+' || *i == '-')) {
  348. i++;
  349. while(isdigit(*i) || *i == 'j')
  350. i++;
  351. is_scientific_notation = true;
  352. }
  353. c11_sv text = {self->token_start, i - self->token_start};
  354. self->curr_char = i;
  355. if(text.data[0] != '.' && !is_scientific_notation) {
  356. // try integer
  357. TokenValue value = {.index = TokenValue_I64};
  358. switch(c11__parse_uint(text, &value._i64, -1)) {
  359. case IntParsing_SUCCESS: add_token_with_value(self, TK_NUM, value); return NULL;
  360. case IntParsing_OVERFLOW: return LexerError(self, "int literal is too large");
  361. case IntParsing_FAILURE: break; // do nothing
  362. }
  363. }
  364. // try float
  365. double float_out;
  366. char* p_end;
  367. float_out = strtod(text.data, &p_end);
  368. if(p_end == text.data + text.size) {
  369. TokenValue value = {.index = TokenValue_F64, ._f64 = float_out};
  370. add_token_with_value(self, TK_NUM, value);
  371. return NULL;
  372. }
  373. if(i[-1] == 'j' && p_end == text.data + text.size - 1) {
  374. TokenValue value = {.index = TokenValue_F64, ._f64 = float_out};
  375. add_token_with_value(self, TK_IMAG, value);
  376. return NULL;
  377. }
  378. return LexerError(self, "invalid number literal");
  379. }
  380. static Error* eat_fstring_spec(Lexer* self, bool* eof) {
  381. while(true) {
  382. char c = eatchar_include_newline(self);
  383. if(c == '\n' || c == '\0') {
  384. return LexerError(self, "EOL while scanning f-string format spec");
  385. }
  386. if(c == '}') {
  387. add_token(self, TK_FSTR_SPEC);
  388. *eof = true;
  389. break;
  390. }
  391. }
  392. return NULL;
  393. }
  394. static Error* lex_one_token(Lexer* self, bool* eof, bool is_fstring) {
  395. *eof = false;
  396. while(*self->curr_char) {
  397. self->token_start = self->curr_char;
  398. char c = eatchar_include_newline(self);
  399. switch(c) {
  400. case '\'':
  401. case '"': {
  402. Error* err = eat_string(self, c, NORMAL_STRING);
  403. if(err) return err;
  404. return NULL;
  405. }
  406. case '#': skip_line_comment(self); break;
  407. case '~': add_token(self, TK_INVERT); return NULL;
  408. case '{': add_token(self, TK_LBRACE); return NULL;
  409. case '}': {
  410. if(is_fstring) {
  411. *eof = true;
  412. return NULL;
  413. }
  414. add_token(self, TK_RBRACE);
  415. return NULL;
  416. }
  417. case ',': add_token(self, TK_COMMA); return NULL;
  418. case ':': {
  419. if(is_fstring) {
  420. // BUG: f"{stack[2:]}"
  421. return eat_fstring_spec(self, eof);
  422. }
  423. add_token(self, TK_COLON);
  424. return NULL;
  425. }
  426. case ';': add_token(self, TK_SEMICOLON); return NULL;
  427. case '(': add_token(self, TK_LPAREN); return NULL;
  428. case ')': add_token(self, TK_RPAREN); return NULL;
  429. case '[': add_token(self, TK_LBRACKET); return NULL;
  430. case ']': add_token(self, TK_RBRACKET); return NULL;
  431. case '@': add_token(self, TK_DECORATOR); return NULL;
  432. case '\\': {
  433. // line continuation character
  434. char c = eatchar_include_newline(self);
  435. if(c != '\n') {
  436. return LexerError(self, "expected newline after line continuation character");
  437. }
  438. eat_spaces(self);
  439. return NULL;
  440. }
  441. case '%': add_token_2(self, '=', TK_MOD, TK_IMOD); return NULL;
  442. case '&': add_token_2(self, '=', TK_AND, TK_IAND); return NULL;
  443. case '|': add_token_2(self, '=', TK_OR, TK_IOR); return NULL;
  444. case '^': add_token_2(self, '=', TK_XOR, TK_IXOR); return NULL;
  445. case '.': {
  446. if(matchchar(self, '.')) {
  447. if(matchchar(self, '.')) {
  448. add_token(self, TK_DOTDOTDOT);
  449. } else {
  450. add_token(self, TK_DOTDOT);
  451. }
  452. } else {
  453. char next_char = *self->curr_char;
  454. if(next_char >= '0' && next_char <= '9') {
  455. Error* err = eat_number(self);
  456. if(err) return err;
  457. } else {
  458. add_token(self, TK_DOT);
  459. }
  460. }
  461. return NULL;
  462. }
  463. case '=': add_token_2(self, '=', TK_ASSIGN, TK_EQ); return NULL;
  464. case '+': add_token_2(self, '=', TK_ADD, TK_IADD); return NULL;
  465. case '>': {
  466. if(matchchar(self, '='))
  467. add_token(self, TK_GE);
  468. else if(matchchar(self, '>'))
  469. add_token_2(self, '=', TK_RSHIFT, TK_IRSHIFT);
  470. else
  471. add_token(self, TK_GT);
  472. return NULL;
  473. }
  474. case '<': {
  475. if(matchchar(self, '='))
  476. add_token(self, TK_LE);
  477. else if(matchchar(self, '<'))
  478. add_token_2(self, '=', TK_LSHIFT, TK_ILSHIFT);
  479. else
  480. add_token(self, TK_LT);
  481. return NULL;
  482. }
  483. case '-': {
  484. if(matchchar(self, '='))
  485. add_token(self, TK_ISUB);
  486. else if(matchchar(self, '>'))
  487. add_token(self, TK_ARROW);
  488. else
  489. add_token(self, TK_SUB);
  490. return NULL;
  491. }
  492. case '!':
  493. if(is_fstring) {
  494. if(matchchar(self, 'r')) { return eat_fstring_spec(self, eof); }
  495. }
  496. if(matchchar(self, '=')) {
  497. add_token(self, TK_NE);
  498. return NULL;
  499. } else {
  500. return LexerError(self, "expected '=' after '!'");
  501. }
  502. case '*':
  503. if(matchchar(self, '*')) {
  504. add_token(self, TK_POW); // '**'
  505. } else {
  506. add_token_2(self, '=', TK_MUL, TK_IMUL);
  507. }
  508. return NULL;
  509. case '/':
  510. if(matchchar(self, '/')) {
  511. add_token_2(self, '=', TK_FLOORDIV, TK_IFLOORDIV);
  512. } else {
  513. add_token_2(self, '=', TK_DIV, TK_IDIV);
  514. }
  515. return NULL;
  516. case ' ':
  517. case '\t': eat_spaces(self); break;
  518. case '\n': {
  519. add_token(self, TK_EOL);
  520. if(!eat_indentation(self)) {
  521. return LexerError(self, "unindent does not match any outer indentation level");
  522. }
  523. return NULL;
  524. }
  525. default: {
  526. if(c == 'f') {
  527. if(matchchar(self, '\'')) return eat_string(self, '\'', F_STRING);
  528. if(matchchar(self, '"')) return eat_string(self, '"', F_STRING);
  529. } else if(c == 'r') {
  530. if(matchchar(self, '\'')) return eat_string(self, '\'', RAW_STRING);
  531. if(matchchar(self, '"')) return eat_string(self, '"', RAW_STRING);
  532. } else if(c == 'b') {
  533. if(matchchar(self, '\'')) return eat_string(self, '\'', NORMAL_BYTES);
  534. if(matchchar(self, '"')) return eat_string(self, '"', NORMAL_BYTES);
  535. }
  536. if(c >= '0' && c <= '9') return eat_number(self);
  537. return eat_name(self);
  538. }
  539. }
  540. }
  541. if(is_fstring) return LexerError(self, "unterminated f-string expression");
  542. self->token_start = self->curr_char;
  543. while(self->indents.length > 1) {
  544. c11_vector__pop(&self->indents);
  545. add_token(self, TK_DEDENT);
  546. return NULL;
  547. }
  548. add_token(self, TK_EOF);
  549. *eof = true;
  550. return NULL;
  551. }
  552. Error* Lexer__process(SourceData_ src, Token** out_tokens, int* out_length) {
  553. Lexer lexer;
  554. Lexer__ctor(&lexer, src);
  555. // push initial tokens
  556. Token sof =
  557. {TK_SOF, lexer.token_start, 0, lexer.current_line, lexer.brackets_level, EmptyTokenValue};
  558. c11_vector__push(Token, &lexer.nexts, sof);
  559. c11_vector__push(int, &lexer.indents, 0);
  560. bool eof = false;
  561. while(!eof) {
  562. void* err = lex_one_token(&lexer, &eof, false);
  563. if(err) {
  564. Lexer__dtor(&lexer);
  565. return err;
  566. }
  567. }
  568. // set out_tokens
  569. *out_tokens = c11_vector__submit(&lexer.nexts, out_length);
  570. Lexer__dtor(&lexer);
  571. return NULL;
  572. }
  573. const char* TokenSymbols[] = {
  574. "@eof",
  575. "@eol",
  576. "@sof",
  577. "@id",
  578. "@num",
  579. "@str",
  580. "@fstr-begin", // TK_FSTR_BEGIN
  581. "@fstr-cpnt", // TK_FSTR_CPNT
  582. "@fstr-spec", // TK_FSTR_SPEC
  583. "@fstr-end", // TK_FSTR_END
  584. "@bytes",
  585. "@imag",
  586. "@indent",
  587. "@dedent",
  588. // These 3 are compound keywords which are generated on the fly
  589. "is not",
  590. "not in",
  591. "yield from",
  592. /*****************************************/
  593. "+",
  594. "+=",
  595. "-",
  596. "-=", // (INPLACE_OP - 1) can get '=' removed
  597. "*",
  598. "*=",
  599. "/",
  600. "/=",
  601. "//",
  602. "//=",
  603. "%",
  604. "%=",
  605. "&",
  606. "&=",
  607. "|",
  608. "|=",
  609. "^",
  610. "^=",
  611. "<<",
  612. "<<=",
  613. ">>",
  614. ">>=",
  615. /*****************************************/
  616. "(",
  617. ")",
  618. "[",
  619. "]",
  620. "{",
  621. "}",
  622. ".",
  623. "..",
  624. "...",
  625. ",",
  626. ":",
  627. ";",
  628. "**",
  629. "->",
  630. "#",
  631. "@",
  632. ">",
  633. "<",
  634. "=",
  635. "==",
  636. "!=",
  637. ">=",
  638. "<=",
  639. "~",
  640. /** KW_BEGIN **/
  641. // NOTE: These keywords should be sorted in ascending order!!
  642. "False",
  643. "None",
  644. "True",
  645. "and",
  646. "as",
  647. "assert",
  648. "break",
  649. "class",
  650. "continue",
  651. "def",
  652. "del",
  653. "elif",
  654. "else",
  655. "except",
  656. "finally",
  657. "for",
  658. "from",
  659. "global",
  660. "if",
  661. "import",
  662. "in",
  663. "is",
  664. "lambda",
  665. "match",
  666. "not",
  667. "or",
  668. "pass",
  669. "raise",
  670. "return",
  671. "try",
  672. "while",
  673. "with",
  674. "yield",
  675. };
  676. #undef is_raw_string_used