lexer.c 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #include "pocketpy/common/config.h"
  2. #include "pocketpy/common/str.h"
  3. #include "pocketpy/common/smallmap.h"
  4. #include "pocketpy/compiler/lexer.h"
  5. const char* pk_TokenSymbols[] = {
  6. "@eof", "@eol", "@sof",
  7. "@id", "@num", "@str", "@fstr", "@long", "@bytes", "@imag",
  8. "@indent", "@dedent",
  9. // These 3 are compound keywords which are generated on the fly
  10. "is not", "not in", "yield from",
  11. /*****************************************/
  12. "+", "+=", "-", "-=", // (INPLACE_OP - 1) can get '=' removed
  13. "*", "*=", "/", "/=", "//", "//=", "%", "%=",
  14. "&", "&=", "|", "|=", "^", "^=",
  15. "<<", "<<=", ">>", ">>=",
  16. /*****************************************/
  17. "(", ")", "[", "]", "{", "}",
  18. ".", "..", "...", ",", ":", ";",
  19. "**", "->", "#", "@",
  20. ">", "<", "=", "==", "!=", ">=", "<=", "~",
  21. /** KW_BEGIN **/
  22. // NOTE: These keywords should be sorted in ascending order!!
  23. "False", "None", "True", "and", "as", "assert", "break", "class", "continue",
  24. "def", "del", "elif", "else", "except", "finally", "for", "from", "global",
  25. "if", "import", "in", "is", "lambda", "not", "or", "pass", "raise", "return",
  26. "try", "while", "with", "yield",
  27. };
  28. void pkpy_TokenDeserializer__ctor(pkpy_TokenDeserializer* self, const char* source){
  29. self->curr = source;
  30. self->source = source;
  31. }
  32. bool pkpy_TokenDeserializer__match_char(pkpy_TokenDeserializer* self, char c){
  33. if(*self->curr == c) {
  34. self->curr++;
  35. return true;
  36. }
  37. return false;
  38. }
  39. c11_string pkpy_TokenDeserializer__read_string(pkpy_TokenDeserializer* self, char c){
  40. const char* start = self->curr;
  41. while(*self->curr != c)
  42. self->curr++;
  43. c11_string retval = {start, (int)(self->curr-start)};
  44. self->curr++; // skip the delimiter
  45. return retval;
  46. }
  47. pkpy_Str pkpy_TokenDeserializer__read_string_from_hex(pkpy_TokenDeserializer* self, char c){
  48. c11_string sv = pkpy_TokenDeserializer__read_string(self, c);
  49. const char* s = sv.data;
  50. char* buffer = (char*)malloc(sv.size / 2 + 1);
  51. for(int i = 0; i < sv.size; i += 2) {
  52. char c = 0;
  53. if(s[i] >= '0' && s[i] <= '9')
  54. c += s[i] - '0';
  55. else if(s[i] >= 'a' && s[i] <= 'f')
  56. c += s[i] - 'a' + 10;
  57. else
  58. assert(false);
  59. c <<= 4;
  60. if(s[i + 1] >= '0' && s[i + 1] <= '9')
  61. c += s[i + 1] - '0';
  62. else if(s[i + 1] >= 'a' && s[i + 1] <= 'f')
  63. c += s[i + 1] - 'a' + 10;
  64. else
  65. assert(false);
  66. buffer[i / 2] = c;
  67. }
  68. buffer[sv.size / 2] = 0;
  69. return (pkpy_Str){
  70. .size = sv.size / 2,
  71. .is_ascii = c11__isascii(buffer, sv.size / 2),
  72. .is_sso = false,
  73. ._ptr = buffer
  74. };
  75. }
  76. int pkpy_TokenDeserializer__read_count(pkpy_TokenDeserializer* self){
  77. assert(*self->curr == '=');
  78. self->curr++;
  79. return pkpy_TokenDeserializer__read_uint(self, '\n');
  80. }
  81. int64_t pkpy_TokenDeserializer__read_uint(pkpy_TokenDeserializer* self, char c){
  82. int64_t out = 0;
  83. while(*self->curr != c) {
  84. out = out * 10 + (*self->curr - '0');
  85. self->curr++;
  86. }
  87. self->curr++; // skip the delimiter
  88. return out;
  89. }
  90. double pkpy_TokenDeserializer__read_float(pkpy_TokenDeserializer* self, char c){
  91. c11_string sv = pkpy_TokenDeserializer__read_string(self, c);
  92. pkpy_Str nullterm;
  93. pkpy_Str__ctor2(&nullterm, sv.data, sv.size);
  94. char* end;
  95. double retval = strtod(pkpy_Str__data(&nullterm), &end);
  96. pkpy_Str__dtor(&nullterm);
  97. assert(*end == 0);
  98. return retval;
  99. }