str.h 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. #pragma once
  2. #include <vector>
  3. #include <string>
  4. #include <sstream>
  5. #include <regex>
  6. typedef std::stringstream _StrStream;
  7. class _Str {
  8. private:
  9. mutable bool utf8_initialized = false;
  10. mutable std::vector<uint16_t> _u8_index; // max_len is 65535
  11. std::string _s;
  12. mutable bool hash_initialized = false;
  13. mutable size_t _hash;
  14. void utf8_lazy_init() const{
  15. if(utf8_initialized) return;
  16. for(uint16_t i = 0; i < size(); i++){
  17. // https://stackoverflow.com/questions/3911536/utf-8-unicode-whats-with-0xc0-and-0x80
  18. if((_s[i] & 0xC0) != 0x80)
  19. _u8_index.push_back(i);
  20. }
  21. utf8_initialized = true;
  22. }
  23. public:
  24. _Str(const char* s): _s(s) {}
  25. _Str(const char* s, size_t len): _s(s, len) {}
  26. _Str(int n, char fill): _s(n, fill) {}
  27. _Str(const std::string& s): _s(s) {}
  28. _Str(std::string&& s): _s(std::move(s)) {}
  29. _Str(const _StrStream& ss): _s(ss.str()) {}
  30. _Str(){}
  31. size_t hash() const{
  32. if(!hash_initialized){
  33. _hash = std::hash<std::string>()(_s);
  34. hash_initialized = true;
  35. }
  36. return _hash;
  37. }
  38. int u8_length() const {
  39. utf8_lazy_init();
  40. return _u8_index.size();
  41. }
  42. _Str u8_getitem(int i) const{
  43. return u8_substr(i, i+1);
  44. }
  45. _Str u8_substr(int start, int end) const{
  46. utf8_lazy_init();
  47. if(start >= end) return _Str();
  48. int c_end = end >= _u8_index.size() ? size() : _u8_index[end];
  49. return _s.substr(_u8_index.at(start), c_end - _u8_index.at(start));
  50. }
  51. int size() const {
  52. return _s.size();
  53. }
  54. bool empty() const {
  55. return _s.empty();
  56. }
  57. bool operator==(const _Str& other) const {
  58. return _s == other._s;
  59. }
  60. bool operator!=(const _Str& other) const {
  61. return _s != other._s;
  62. }
  63. bool operator==(const char* other) const {
  64. return _s == other;
  65. }
  66. bool operator!=(const char* other) const {
  67. return _s != other;
  68. }
  69. bool operator<(const _Str& other) const {
  70. return _s < other._s;
  71. }
  72. bool operator>(const _Str& other) const {
  73. return _s > other._s;
  74. }
  75. char operator[](int i) const {
  76. return _s[i];
  77. }
  78. friend std::ostream& operator<<(std::ostream& os, const _Str& s){
  79. os << s._s;
  80. return os;
  81. }
  82. _Str operator+(const _Str& other) const {
  83. return _Str(_s + other._s);
  84. }
  85. _Str operator+(const char* other) const {
  86. return _Str(_s + other);
  87. }
  88. _Str operator+(const std::string& other) const {
  89. return _Str(_s + other);
  90. }
  91. friend _Str operator+(const char* other, const _Str& s){
  92. return _Str(other + s._s);
  93. }
  94. friend _Str operator+(const std::string& other, const _Str& s){
  95. return _Str(other + s._s);
  96. }
  97. const std::string& str() const {
  98. return _s;
  99. }
  100. const char* c_str() const {
  101. return _s.c_str();
  102. }
  103. static const std::size_t npos = std::string::npos;
  104. _Str __lstrip() const {
  105. std::string copy(_s);
  106. copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) {
  107. return !std::isspace(c);
  108. }));
  109. return _Str(copy);
  110. }
  111. };
  112. namespace std {
  113. template<>
  114. struct hash<_Str> {
  115. std::size_t operator()(const _Str& s) const {
  116. return s.hash();
  117. }
  118. };
  119. }
  120. const _Str& __class__ = _Str("__class__");
  121. const _Str& __base__ = _Str("__base__");
  122. const _Str& __new__ = _Str("__new__");
  123. const _Str& __iter__ = _Str("__iter__");
  124. const _Str& __str__ = _Str("__str__");
  125. const _Str& __repr__ = _Str("__repr__");
  126. const _Str& __neg__ = _Str("__neg__");
  127. const _Str& __getitem__ = _Str("__getitem__");
  128. const _Str& __setitem__ = _Str("__setitem__");
  129. const _Str& __delitem__ = _Str("__delitem__");
  130. const _Str& __contains__ = _Str("__contains__");
  131. const _Str& __init__ = _Str("__init__");
  132. const _Str CMP_SPECIAL_METHODS[] = {
  133. "__lt__", "__le__", "__eq__", "__ne__", "__gt__", "__ge__"
  134. }; // __ne__ should not be used
  135. const _Str BIN_SPECIAL_METHODS[] = {
  136. "__add__", "__sub__", "__mul__", "__truediv__", "__floordiv__", "__mod__", "__pow__"
  137. };