str.h 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. #pragma once
  2. #include "__stl__.h"
  3. typedef std::stringstream _StrStream;
  4. class _Str {
  5. private:
  6. mutable bool utf8_initialized = false;
  7. mutable std::vector<uint16_t> _u8_index; // max_len is 65535
  8. std::string _s;
  9. mutable bool hash_initialized = false;
  10. mutable size_t _hash;
  11. void utf8_lazy_init() const{
  12. if(utf8_initialized) return;
  13. for(uint16_t i = 0; i < size(); i++){
  14. // https://stackoverflow.com/questions/3911536/utf-8-unicode-whats-with-0xc0-and-0x80
  15. if((_s[i] & 0xC0) != 0x80)
  16. _u8_index.push_back(i);
  17. }
  18. utf8_initialized = true;
  19. }
  20. public:
  21. _Str(const char* s): _s(s) {}
  22. _Str(const char* s, size_t len): _s(s, len) {}
  23. _Str(int n, char fill): _s(n, fill) {}
  24. _Str(const std::string& s): _s(s) {}
  25. _Str(std::string&& s): _s(std::move(s)) {}
  26. _Str(const _StrStream& ss): _s(ss.str()) {}
  27. _Str(){}
  28. size_t hash() const{
  29. if(!hash_initialized){
  30. _hash = std::hash<std::string>()(_s);
  31. hash_initialized = true;
  32. }
  33. return _hash;
  34. }
  35. int u8_length() const {
  36. utf8_lazy_init();
  37. return _u8_index.size();
  38. }
  39. _Str u8_getitem(int i) const{
  40. return u8_substr(i, i+1);
  41. }
  42. _Str u8_substr(int start, int end) const{
  43. utf8_lazy_init();
  44. if(start >= end) return _Str();
  45. int c_end = end >= _u8_index.size() ? size() : _u8_index[end];
  46. return _s.substr(_u8_index.at(start), c_end - _u8_index.at(start));
  47. }
  48. int size() const {
  49. return _s.size();
  50. }
  51. bool empty() const {
  52. return _s.empty();
  53. }
  54. bool operator==(const _Str& other) const {
  55. return _s == other._s;
  56. }
  57. bool operator!=(const _Str& other) const {
  58. return _s != other._s;
  59. }
  60. bool operator==(const char* other) const {
  61. return _s == other;
  62. }
  63. bool operator!=(const char* other) const {
  64. return _s != other;
  65. }
  66. bool operator<(const _Str& other) const {
  67. return _s < other._s;
  68. }
  69. bool operator>(const _Str& other) const {
  70. return _s > other._s;
  71. }
  72. char operator[](int i) const {
  73. return _s[i];
  74. }
  75. friend std::ostream& operator<<(std::ostream& os, const _Str& s){
  76. os << s._s;
  77. return os;
  78. }
  79. _Str operator+(const _Str& other) const {
  80. return _Str(_s + other._s);
  81. }
  82. _Str operator+(const char* other) const {
  83. return _Str(_s + other);
  84. }
  85. _Str operator+(const std::string& other) const {
  86. return _Str(_s + other);
  87. }
  88. friend _Str operator+(const char* other, const _Str& s){
  89. return _Str(other + s._s);
  90. }
  91. friend _Str operator+(const std::string& other, const _Str& s){
  92. return _Str(other + s._s);
  93. }
  94. const std::string& str() const {
  95. return _s;
  96. }
  97. const char* c_str() const {
  98. return _s.c_str();
  99. }
  100. static const std::size_t npos = std::string::npos;
  101. _Str __lstrip() const {
  102. std::string copy(_s);
  103. copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) {
  104. return !std::isspace(c);
  105. }));
  106. return _Str(copy);
  107. }
  108. };
  109. namespace std {
  110. template<>
  111. struct hash<_Str> {
  112. std::size_t operator()(const _Str& s) const {
  113. return s.hash();
  114. }
  115. };
  116. }
  117. const _Str& __class__ = _Str("__class__");
  118. const _Str& __base__ = _Str("__base__");
  119. const _Str& __new__ = _Str("__new__");
  120. const _Str& __iter__ = _Str("__iter__");
  121. const _Str& __str__ = _Str("__str__");
  122. const _Str& __repr__ = _Str("__repr__");
  123. const _Str& __neg__ = _Str("__neg__");
  124. const _Str& __module__ = _Str("__module__");
  125. const _Str& __getitem__ = _Str("__getitem__");
  126. const _Str& __setitem__ = _Str("__setitem__");
  127. const _Str& __delitem__ = _Str("__delitem__");
  128. const _Str& __contains__ = _Str("__contains__");
  129. const _Str& __init__ = _Str("__init__");
  130. const _Str CMP_SPECIAL_METHODS[] = {
  131. "__lt__", "__le__", "__eq__", "__ne__", "__gt__", "__ge__"
  132. }; // __ne__ should not be used
  133. const _Str BIN_SPECIAL_METHODS[] = {
  134. "__add__", "__sub__", "__mul__", "__truediv__", "__floordiv__", "__mod__", "__pow__"
  135. };