str.h 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. #pragma once
  2. #include "common.h"
  3. #include "memory.h"
  4. #include "vector.h"
  5. namespace pkpy {
  6. int utf8len(unsigned char c, bool suppress=false);
  7. struct SStream;
  8. struct Str{
  9. int size;
  10. bool is_ascii;
  11. char* data;
  12. char _inlined[16];
  13. mutable const char* _cached_c_str = nullptr;
  14. bool is_inlined() const { return data == _inlined; }
  15. Str(): size(0), is_ascii(true), data(_inlined) {}
  16. Str(int size, bool is_ascii);
  17. Str(const std::string& s);
  18. Str(std::string_view s);
  19. Str(std::nullptr_t) { FATAL_ERROR(); }
  20. Str(const char* s);
  21. Str(const char* s, int len);
  22. Str(const Str& other);
  23. Str(Str&& other);
  24. void _alloc();
  25. const char* begin() const { return data; }
  26. const char* end() const { return data + size; }
  27. char operator[](int idx) const { return data[idx]; }
  28. int length() const { return size; }
  29. bool empty() const { return size == 0; }
  30. size_t hash() const{ return std::hash<std::string_view>()(sv()); }
  31. Str& operator=(const Str& other);
  32. Str operator+(const Str& other) const;
  33. Str operator+(const char* p) const;
  34. bool operator==(const Str& other) const;
  35. bool operator!=(const Str& other) const;
  36. bool operator==(const std::string_view other) const;
  37. bool operator!=(const std::string_view other) const;
  38. bool operator==(const char* p) const;
  39. bool operator!=(const char* p) const;
  40. bool operator<(const Str& other) const;
  41. bool operator>(const Str& other) const;
  42. bool operator<=(const Str& other) const;
  43. bool operator>=(const Str& other) const;
  44. bool operator<(const std::string_view other) const;
  45. ~Str();
  46. friend Str operator+(const char* p, const Str& str);
  47. friend std::ostream& operator<<(std::ostream& os, const Str& str);
  48. friend bool operator<(const std::string_view other, const Str& str);
  49. Str substr(int start, int len) const;
  50. Str substr(int start) const;
  51. char* c_str_dup() const;
  52. const char* c_str() const;
  53. std::string_view sv() const;
  54. std::string str() const;
  55. Str lstrip() const;
  56. Str strip() const;
  57. Str lower() const;
  58. Str upper() const;
  59. Str escape(bool single_quote=true) const;
  60. void escape_(SStream& ss, bool single_quote=true) const;
  61. int index(const Str& sub, int start=0) const;
  62. Str replace(char old, char new_) const;
  63. Str replace(const Str& old, const Str& new_, int count=-1) const;
  64. std::vector<std::string_view> split(const Str& sep) const;
  65. int count(const Str& sub) const;
  66. /*************unicode*************/
  67. int _unicode_index_to_byte(int i) const;
  68. int _byte_index_to_unicode(int n) const;
  69. Str u8_getitem(int i) const;
  70. Str u8_slice(int start, int stop, int step) const;
  71. int u8_length() const;
  72. };
  73. struct StrName {
  74. uint16_t index;
  75. StrName();
  76. explicit StrName(uint16_t index);
  77. StrName(const char* s);
  78. StrName(const Str& s);
  79. std::string_view sv() const;
  80. bool empty() const { return index == 0; }
  81. friend std::ostream& operator<<(std::ostream& os, const StrName& sn);
  82. Str escape() const;
  83. bool operator==(const StrName& other) const noexcept {
  84. return this->index == other.index;
  85. }
  86. bool operator!=(const StrName& other) const noexcept {
  87. return this->index != other.index;
  88. }
  89. bool operator<(const StrName& other) const noexcept {
  90. return this->index < other.index;
  91. }
  92. bool operator>(const StrName& other) const noexcept {
  93. return this->index > other.index;
  94. }
  95. static bool is_valid(int index);
  96. static StrName get(std::string_view s);
  97. static std::map<std::string, uint16_t, std::less<>>& _interned();
  98. static std::map<uint16_t, std::string>& _r_interned();
  99. static uint32_t _pesudo_random_index;
  100. };
  101. struct SStream{
  102. PK_ALWAYS_PASS_BY_POINTER(SStream)
  103. // pod_vector<T> is allocated by pool64 so the buffer can be moved into Str without a copy
  104. pod_vector<char> buffer;
  105. bool empty() const { return buffer.empty(); }
  106. SStream(){}
  107. SStream(int guess_size){ buffer.reserve(guess_size); }
  108. Str str(){
  109. // after this call, the buffer is no longer valid
  110. auto detached = buffer.detach();
  111. return Str(detached.first, detached.second);
  112. }
  113. SStream& operator<<(const Str& s){
  114. buffer.extend(s.begin(), s.end());
  115. return *this;
  116. }
  117. SStream& operator<<(const char* s){
  118. buffer.extend(s, s + strlen(s));
  119. return *this;
  120. }
  121. SStream& operator<<(i64 val){
  122. // str(-2**64).__len__() == 21
  123. buffer.reserve(buffer.size() + 24);
  124. if(val == 0){
  125. buffer.push_back('0');
  126. return *this;
  127. }
  128. if(val < 0){
  129. buffer.push_back('-');
  130. val = -val;
  131. }
  132. char* begin = buffer.end();
  133. while(val){
  134. buffer.push_back('0' + val % 10);
  135. val /= 10;
  136. }
  137. std::reverse(begin, buffer.end());
  138. return *this;
  139. }
  140. SStream& operator<<(const std::string& s){
  141. buffer.extend(s.data(), s.data() + s.size());
  142. return *this;
  143. }
  144. SStream& operator<<(std::string_view s){
  145. buffer.extend(s.data(), s.data() + s.size());
  146. return *this;
  147. }
  148. SStream& operator<<(char c){
  149. buffer.push_back(c);
  150. return *this;
  151. }
  152. template<typename T>
  153. SStream& operator<<(T val){
  154. (*this) << std::to_string(val);
  155. return *this;
  156. }
  157. };
  158. template<typename... Args>
  159. Str fmt(Args&&... args) {
  160. SStream ss;
  161. (ss << ... << args);
  162. return ss.str();
  163. }
  164. struct CString{
  165. const char* ptr;
  166. CString(const char* ptr): ptr(ptr) {}
  167. operator const char*() const { return ptr; }
  168. };
  169. // unary operators
  170. const StrName __repr__ = StrName::get("__repr__");
  171. const StrName __str__ = StrName::get("__str__");
  172. const StrName __hash__ = StrName::get("__hash__"); // unused
  173. const StrName __len__ = StrName::get("__len__");
  174. const StrName __iter__ = StrName::get("__iter__");
  175. const StrName __next__ = StrName::get("__next__"); // unused
  176. const StrName __neg__ = StrName::get("__neg__"); // unused
  177. const StrName __bool__ = StrName::get("__bool__"); // unused
  178. // logical operators
  179. const StrName __eq__ = StrName::get("__eq__");
  180. const StrName __lt__ = StrName::get("__lt__");
  181. const StrName __le__ = StrName::get("__le__");
  182. const StrName __gt__ = StrName::get("__gt__");
  183. const StrName __ge__ = StrName::get("__ge__");
  184. const StrName __contains__ = StrName::get("__contains__");
  185. // binary operators
  186. const StrName __add__ = StrName::get("__add__");
  187. const StrName __radd__ = StrName::get("__radd__");
  188. const StrName __sub__ = StrName::get("__sub__");
  189. const StrName __rsub__ = StrName::get("__rsub__");
  190. const StrName __mul__ = StrName::get("__mul__");
  191. const StrName __rmul__ = StrName::get("__rmul__");
  192. const StrName __truediv__ = StrName::get("__truediv__");
  193. const StrName __floordiv__ = StrName::get("__floordiv__");
  194. const StrName __mod__ = StrName::get("__mod__");
  195. const StrName __pow__ = StrName::get("__pow__");
  196. const StrName __matmul__ = StrName::get("__matmul__");
  197. const StrName __lshift__ = StrName::get("__lshift__");
  198. const StrName __rshift__ = StrName::get("__rshift__");
  199. const StrName __and__ = StrName::get("__and__");
  200. const StrName __or__ = StrName::get("__or__");
  201. const StrName __xor__ = StrName::get("__xor__");
  202. const StrName __invert__ = StrName::get("__invert__");
  203. // indexer
  204. const StrName __getitem__ = StrName::get("__getitem__");
  205. const StrName __setitem__ = StrName::get("__setitem__");
  206. const StrName __delitem__ = StrName::get("__delitem__");
  207. // specials
  208. const StrName __new__ = StrName::get("__new__");
  209. const StrName __init__ = StrName::get("__init__");
  210. const StrName __call__ = StrName::get("__call__");
  211. const StrName __divmod__ = StrName::get("__divmod__");
  212. const StrName __enter__ = StrName::get("__enter__");
  213. const StrName __exit__ = StrName::get("__exit__");
  214. const StrName __name__ = StrName::get("__name__");
  215. const StrName __all__ = StrName::get("__all__");
  216. const StrName __package__ = StrName::get("__package__");
  217. const StrName __path__ = StrName::get("__path__");
  218. const StrName __class__ = StrName::get("__class__");
  219. const StrName pk_id_add = StrName::get("add");
  220. const StrName pk_id_set = StrName::get("set");
  221. const StrName pk_id_eval = StrName::get("eval");
  222. #define DEF_SNAME(name) const static StrName name(#name)
  223. } // namespace pkpy