str.cpp 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. #include "pocketpy/common/str.hpp"
  2. #include "pocketpy/common/gil.hpp"
  3. #include <cassert>
  4. #include <ostream>
  5. #include <algorithm>
  6. #include <cmath>
  7. #include <map>
  8. namespace pkpy {
  9. Str::Str(pair<char*, int> detached) {
  10. this->size = detached.second;
  11. this->is_ascii = true;
  12. this->is_sso = false;
  13. this->_ptr = detached.first;
  14. for(int i = 0; i < size; i++) {
  15. if(!isascii(_ptr[i])) {
  16. is_ascii = false;
  17. break;
  18. }
  19. }
  20. assert(_ptr[size] == '\0');
  21. }
  22. Str Str::strip(bool left, bool right, const Str& chars) const {
  23. int L = 0;
  24. int R = u8_length();
  25. if(left) {
  26. while(L < R && chars.index(u8_getitem(L)) != -1)
  27. L++;
  28. }
  29. if(right) {
  30. while(L < R && chars.index(u8_getitem(R - 1)) != -1)
  31. R--;
  32. }
  33. return u8_slice(L, R, 1);
  34. }
  35. Str Str::strip(bool left, bool right) const {
  36. const char* data = pkpy_Str__data(this);
  37. if(is_ascii) {
  38. int L = 0;
  39. int R = size;
  40. if(left) {
  41. while(L < R && (data[L] == ' ' || data[L] == '\t' || data[L] == '\n' || data[L] == '\r'))
  42. L++;
  43. }
  44. if(right) {
  45. while(L < R && (data[R - 1] == ' ' || data[R - 1] == '\t' || data[R - 1] == '\n' || data[R - 1] == '\r'))
  46. R--;
  47. }
  48. return substr(L, R - L);
  49. } else {
  50. return strip(left, right, " \t\n\r");
  51. }
  52. }
  53. vector<std::string_view> Str::split(const Str& sep) const {
  54. vector<std::string_view> result;
  55. std::string_view tmp;
  56. int start = 0;
  57. while(true) {
  58. int i = index(sep, start);
  59. if(i == -1) break;
  60. tmp = sv().substr(start, i - start);
  61. if(!tmp.empty()) result.push_back(tmp);
  62. start = i + sep.size;
  63. }
  64. tmp = sv().substr(start, size - start);
  65. if(!tmp.empty()) result.push_back(tmp);
  66. return result;
  67. }
  68. vector<std::string_view> Str::split(char sep) const {
  69. vector<std::string_view> result;
  70. const char* data = pkpy_Str__data(this);
  71. int i = 0;
  72. for(int j = 0; j < size; j++) {
  73. if(data[j] == sep) {
  74. if(j > i) result.emplace_back(data + i, j - i);
  75. i = j + 1;
  76. continue;
  77. }
  78. }
  79. if(size > i) result.emplace_back(data + i, size - i);
  80. return result;
  81. }
  82. static std::map<std::string_view, uint16_t>& _interned() {
  83. static std::map<std::string_view, uint16_t> interned;
  84. return interned;
  85. }
  86. static std::map<uint16_t, std::string>& _r_interned() {
  87. static std::map<uint16_t, std::string> r_interned;
  88. return r_interned;
  89. }
  90. std::string_view StrName::sv() const { return _r_interned()[index]; }
  91. const char* StrName::c_str() const { return _r_interned()[index].c_str(); }
  92. uint32_t StrName::_pesudo_random_index = 0;
  93. StrName StrName::get(std::string_view s) {
  94. PK_GLOBAL_SCOPE_LOCK()
  95. auto it = _interned().find(s);
  96. if(it != _interned().end()) return StrName(it->second);
  97. // generate new index
  98. // https://github.com/python/cpython/blob/3.12/Objects/dictobject.c#L175
  99. uint16_t index = ((_pesudo_random_index * 5) + 1) & 65535;
  100. if(index == 0) PK_FATAL_ERROR("StrName index overflow\n")
  101. auto res = _r_interned().emplace(index, s);
  102. assert(res.second);
  103. s = std::string_view(res.first->second);
  104. _interned()[s] = index;
  105. _pesudo_random_index = index;
  106. return StrName(index);
  107. }
  108. Str SStream::str() {
  109. // after this call, the buffer is no longer valid
  110. buffer.push_back('\0');
  111. auto detached = buffer.detach();
  112. detached.second--; // remove the last '\0'
  113. return Str(detached);
  114. }
  115. SStream& SStream::operator<< (const Str& s) {
  116. for(char c: s)
  117. buffer.push_back(c);
  118. return *this;
  119. }
  120. SStream& SStream::operator<< (const char* s) {
  121. while(*s)
  122. buffer.push_back(*s++);
  123. return *this;
  124. }
  125. SStream& SStream::operator<< (const std::string& s) {
  126. for(char c: s)
  127. buffer.push_back(c);
  128. return *this;
  129. }
  130. SStream& SStream::operator<< (std::string_view s) {
  131. for(char c: s)
  132. buffer.push_back(c);
  133. return *this;
  134. }
  135. SStream& SStream::operator<< (char c) {
  136. buffer.push_back(c);
  137. return *this;
  138. }
  139. SStream& SStream::operator<< (StrName sn) { return *this << sn.sv(); }
  140. SStream& SStream::operator<< (size_t val) {
  141. // size_t could be out of range of `i64`, use `std::to_string` instead
  142. return (*this) << std::to_string(val);
  143. }
  144. SStream& SStream::operator<< (int val) { return (*this) << static_cast<i64>(val); }
  145. SStream& SStream::operator<< (i64 val) {
  146. // str(-2**64).__len__() == 21
  147. buffer.reserve(buffer.size() + 24);
  148. if(val == 0) {
  149. buffer.push_back('0');
  150. return *this;
  151. }
  152. if(val < 0) {
  153. buffer.push_back('-');
  154. val = -val;
  155. }
  156. auto begin = buffer.end();
  157. while(val) {
  158. buffer.push_back('0' + val % 10);
  159. val /= 10;
  160. }
  161. std::reverse(begin, buffer.end());
  162. return *this;
  163. }
  164. SStream& SStream::operator<< (f64 val) {
  165. if(std::isinf(val)) { return (*this) << (val > 0 ? "inf" : "-inf"); }
  166. if(std::isnan(val)) { return (*this) << "nan"; }
  167. char b[32];
  168. if(_precision == -1) {
  169. int prec = std::numeric_limits<f64>::max_digits10 - 1;
  170. snprintf(b, sizeof(b), "%.*g", prec, val);
  171. } else {
  172. int prec = _precision;
  173. snprintf(b, sizeof(b), "%.*f", prec, val);
  174. }
  175. (*this) << b;
  176. if(std::all_of(b + 1, b + strlen(b), isdigit)) (*this) << ".0";
  177. return *this;
  178. }
  179. void SStream::write_hex(unsigned char c, bool non_zero) {
  180. unsigned char high = c >> 4;
  181. unsigned char low = c & 0xf;
  182. if(non_zero) {
  183. if(high) (*this) << PK_HEX_TABLE[high];
  184. if(high || low) (*this) << PK_HEX_TABLE[low];
  185. } else {
  186. (*this) << PK_HEX_TABLE[high];
  187. (*this) << PK_HEX_TABLE[low];
  188. }
  189. }
  190. void SStream::write_hex(void* p) {
  191. if(p == nullptr) {
  192. (*this) << "0x0";
  193. return;
  194. }
  195. (*this) << "0x";
  196. uintptr_t p_t = reinterpret_cast<uintptr_t>(p);
  197. bool non_zero = true;
  198. for(int i = sizeof(void*) - 1; i >= 0; i--) {
  199. unsigned char cpnt = (p_t >> (i * 8)) & 0xff;
  200. write_hex(cpnt, non_zero);
  201. if(cpnt != 0) non_zero = false;
  202. }
  203. }
  204. void SStream::write_hex(i64 val) {
  205. if(val == 0) {
  206. (*this) << "0x0";
  207. return;
  208. }
  209. if(val < 0) {
  210. (*this) << "-";
  211. val = -val;
  212. }
  213. (*this) << "0x";
  214. bool non_zero = true;
  215. for(int i = 56; i >= 0; i -= 8) {
  216. unsigned char cpnt = (val >> i) & 0xff;
  217. write_hex(cpnt, non_zero);
  218. if(cpnt != 0) non_zero = false;
  219. }
  220. }
  221. #undef PK_STR_ALLOCATE
  222. #undef PK_STR_COPY_INIT
  223. // unary operators
  224. const StrName __repr__ = StrName::get("__repr__");
  225. const StrName __str__ = StrName::get("__str__");
  226. const StrName __hash__ = StrName::get("__hash__");
  227. const StrName __len__ = StrName::get("__len__");
  228. const StrName __iter__ = StrName::get("__iter__");
  229. const StrName __next__ = StrName::get("__next__");
  230. const StrName __neg__ = StrName::get("__neg__");
  231. // logical operators
  232. const StrName __eq__ = StrName::get("__eq__");
  233. const StrName __lt__ = StrName::get("__lt__");
  234. const StrName __le__ = StrName::get("__le__");
  235. const StrName __gt__ = StrName::get("__gt__");
  236. const StrName __ge__ = StrName::get("__ge__");
  237. const StrName __contains__ = StrName::get("__contains__");
  238. // binary operators
  239. const StrName __add__ = StrName::get("__add__");
  240. const StrName __radd__ = StrName::get("__radd__");
  241. const StrName __sub__ = StrName::get("__sub__");
  242. const StrName __rsub__ = StrName::get("__rsub__");
  243. const StrName __mul__ = StrName::get("__mul__");
  244. const StrName __rmul__ = StrName::get("__rmul__");
  245. const StrName __truediv__ = StrName::get("__truediv__");
  246. const StrName __floordiv__ = StrName::get("__floordiv__");
  247. const StrName __mod__ = StrName::get("__mod__");
  248. const StrName __pow__ = StrName::get("__pow__");
  249. const StrName __matmul__ = StrName::get("__matmul__");
  250. const StrName __lshift__ = StrName::get("__lshift__");
  251. const StrName __rshift__ = StrName::get("__rshift__");
  252. const StrName __and__ = StrName::get("__and__");
  253. const StrName __or__ = StrName::get("__or__");
  254. const StrName __xor__ = StrName::get("__xor__");
  255. const StrName __invert__ = StrName::get("__invert__");
  256. // indexer
  257. const StrName __getitem__ = StrName::get("__getitem__");
  258. const StrName __setitem__ = StrName::get("__setitem__");
  259. const StrName __delitem__ = StrName::get("__delitem__");
  260. // specials
  261. const StrName __new__ = StrName::get("__new__");
  262. const StrName __init__ = StrName::get("__init__");
  263. const StrName __call__ = StrName::get("__call__");
  264. const StrName __divmod__ = StrName::get("__divmod__");
  265. const StrName __enter__ = StrName::get("__enter__");
  266. const StrName __exit__ = StrName::get("__exit__");
  267. const StrName __name__ = StrName::get("__name__");
  268. const StrName __all__ = StrName::get("__all__");
  269. const StrName __package__ = StrName::get("__package__");
  270. const StrName __path__ = StrName::get("__path__");
  271. const StrName __class__ = StrName::get("__class__");
  272. const StrName __missing__ = StrName::get("__missing__");
  273. const StrName pk_id_add = StrName::get("add");
  274. const StrName pk_id_set = StrName::get("set");
  275. const StrName pk_id_long = StrName::get("long");
  276. const StrName pk_id_complex = StrName::get("complex");
  277. } // namespace pkpy