str.h 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. #pragma once
  2. #include <vector>
  3. #include <string>
  4. #include <sstream>
  5. typedef std::stringstream _StrStream;
  6. class _Str {
  7. private:
  8. mutable bool utf8_initialized = false;
  9. mutable std::vector<uint16_t> _u8_index; // max_len is 65535
  10. std::string _s;
  11. mutable bool hash_initialized = false;
  12. mutable size_t _hash;
  13. void utf8_lazy_init() const{
  14. if(utf8_initialized) return;
  15. for(uint16_t i = 0; i < size(); i++){
  16. // https://stackoverflow.com/questions/3911536/utf-8-unicode-whats-with-0xc0-and-0x80
  17. if((_s[i] & 0xC0) != 0x80)
  18. _u8_index.push_back(i);
  19. }
  20. utf8_initialized = true;
  21. }
  22. public:
  23. _Str(const char* s): _s(s) {}
  24. _Str(const char* s, size_t len): _s(s, len) {}
  25. _Str(int n, char fill = ' '): _s(n, fill) {}
  26. _Str(const std::string& s): _s(s) {}
  27. _Str(std::string&& s): _s(std::move(s)) {}
  28. _Str(const _StrStream& ss): _s(ss.str()) {}
  29. _Str(){}
  30. size_t hash() const{
  31. if(!hash_initialized){
  32. _hash = std::hash<std::string>()(_s);
  33. hash_initialized = true;
  34. }
  35. return _hash;
  36. }
  37. int u8_length() const {
  38. utf8_lazy_init();
  39. return _u8_index.size();
  40. }
  41. _Str u8_getitem(int i) const{
  42. return u8_substr(i, i+1);
  43. }
  44. _Str u8_substr(int start, int end) const{
  45. utf8_lazy_init();
  46. if(start >= end) return _Str();
  47. int c_end = end >= _u8_index.size() ? size() : _u8_index[end];
  48. return _s.substr(_u8_index.at(start), c_end - _u8_index.at(start));
  49. }
  50. int size() const {
  51. return _s.size();
  52. }
  53. bool empty() const {
  54. return _s.empty();
  55. }
  56. bool operator==(const _Str& other) const {
  57. return _s == other._s;
  58. }
  59. bool operator!=(const _Str& other) const {
  60. return _s != other._s;
  61. }
  62. bool operator<(const _Str& other) const {
  63. return _s < other._s;
  64. }
  65. bool operator>(const _Str& other) const {
  66. return _s > other._s;
  67. }
  68. char operator[](int i) const {
  69. return _s[i];
  70. }
  71. friend std::ostream& operator<<(std::ostream& os, const _Str& s){
  72. os << s._s;
  73. return os;
  74. }
  75. _Str operator+(const _Str& other) const {
  76. return _Str(_s + other._s);
  77. }
  78. _Str operator+(const char* other) const {
  79. return _Str(_s + other);
  80. }
  81. _Str operator+(const std::string& other) const {
  82. return _Str(_s + other);
  83. }
  84. friend _Str operator+(const char* other, const _Str& s){
  85. return _Str(other + s._s);
  86. }
  87. friend _Str operator+(const std::string& other, const _Str& s){
  88. return _Str(other + s._s);
  89. }
  90. const std::string& str() const {
  91. return _s;
  92. }
  93. static const std::size_t npos = std::string::npos;
  94. operator const char*() const {
  95. return _s.c_str();
  96. }
  97. };
  98. namespace std {
  99. template<>
  100. struct hash<_Str> {
  101. std::size_t operator()(const _Str& s) const {
  102. return s.hash();
  103. }
  104. };
  105. }
  106. const _Str& __class__ = _Str("__class__");
  107. const _Str& __base__ = _Str("__base__");
  108. const _Str& __new__ = _Str("__new__");
  109. const _Str& __iter__ = _Str("__iter__");
  110. const _Str& __str__ = _Str("__str__");
  111. const _Str& __repr__ = _Str("__repr__");
  112. const _Str& __neg__ = _Str("__neg__");
  113. const _Str& __getitem__ = _Str("__getitem__");
  114. const _Str& __setitem__ = _Str("__setitem__");
  115. const _Str& __delitem__ = _Str("__delitem__");
  116. const _Str& __contains__ = _Str("__contains__");
  117. const _Str& __init__ = _Str("__init__");
  118. const _Str CMP_SPECIAL_METHODS[] = {
  119. "__lt__", "__le__", "__eq__", "__ne__", "__gt__", "__ge__"
  120. }; // __ne__ should not be used
  121. const _Str BIN_SPECIAL_METHODS[] = {
  122. "__add__", "__sub__", "__mul__", "__truediv__", "__floordiv__", "__mod__", "__pow__"
  123. };