str.cpp 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. #include "pocketpy/str.h"
  2. namespace pkpy {
  3. Str& Str::operator=(const Str& other){
  4. if(!is_inlined()) pool64.dealloc(data);
  5. size = other.size;
  6. is_ascii = other.is_ascii;
  7. _alloc();
  8. memcpy(data, other.data, size);
  9. return *this;
  10. }
  11. Str Str::operator+(const Str& other) const {
  12. Str ret(size + other.size, is_ascii && other.is_ascii);
  13. memcpy(ret.data, data, size);
  14. memcpy(ret.data + size, other.data, other.size);
  15. return ret;
  16. }
  17. Str Str::operator+(const char* p) const {
  18. Str other(p);
  19. return *this + other;
  20. }
  21. bool Str::operator==(const Str& other) const {
  22. if(size != other.size) return false;
  23. return memcmp(data, other.data, size) == 0;
  24. }
  25. bool Str::operator!=(const Str& other) const {
  26. if(size != other.size) return true;
  27. return memcmp(data, other.data, size) != 0;
  28. }
  29. bool Str::operator==(const std::string_view other) const {
  30. if(size != (int)other.size()) return false;
  31. return memcmp(data, other.data(), size) == 0;
  32. }
  33. bool Str::operator!=(const std::string_view other) const {
  34. if(size != (int)other.size()) return true;
  35. return memcmp(data, other.data(), size) != 0;
  36. }
  37. bool Str::operator==(const char* p) const {
  38. return *this == std::string_view(p);
  39. }
  40. bool Str::operator!=(const char* p) const {
  41. return *this != std::string_view(p);
  42. }
  43. bool Str::operator<(const Str& other) const {
  44. int ret = strncmp(data, other.data, std::min(size, other.size));
  45. if(ret != 0) return ret < 0;
  46. return size < other.size;
  47. }
  48. bool Str::operator<(const std::string_view other) const {
  49. int ret = strncmp(data, other.data(), std::min(size, (int)other.size()));
  50. if(ret != 0) return ret < 0;
  51. return size < (int)other.size();
  52. }
  53. bool Str::operator>(const Str& other) const {
  54. int ret = strncmp(data, other.data, std::min(size, other.size));
  55. if(ret != 0) return ret > 0;
  56. return size > other.size;
  57. }
  58. bool Str::operator<=(const Str& other) const {
  59. int ret = strncmp(data, other.data, std::min(size, other.size));
  60. if(ret != 0) return ret < 0;
  61. return size <= other.size;
  62. }
  63. bool Str::operator>=(const Str& other) const {
  64. int ret = strncmp(data, other.data, std::min(size, other.size));
  65. if(ret != 0) return ret > 0;
  66. return size >= other.size;
  67. }
  68. Str::~Str(){
  69. if(!is_inlined()) pool64.dealloc(data);
  70. if(_cached_c_str != nullptr) free((void*)_cached_c_str);
  71. }
  72. Str Str::substr(int start, int len) const {
  73. Str ret(len, is_ascii);
  74. memcpy(ret.data, data + start, len);
  75. return ret;
  76. }
  77. Str Str::substr(int start) const {
  78. return substr(start, size - start);
  79. }
  80. char* Str::c_str_dup() const {
  81. char* p = (char*)malloc(size + 1);
  82. memcpy(p, data, size);
  83. p[size] = 0;
  84. return p;
  85. }
  86. const char* Str::c_str() const{
  87. if(_cached_c_str == nullptr){
  88. _cached_c_str = c_str_dup();
  89. }
  90. return _cached_c_str;
  91. }
  92. std::string_view Str::sv() const {
  93. return std::string_view(data, size);
  94. }
  95. std::string Str::str() const {
  96. return std::string(data, size);
  97. }
  98. Str Str::lstrip() const {
  99. std::string copy(data, size);
  100. copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) {
  101. // std::isspace(c) does not working on windows (Debug)
  102. return c != ' ' && c != '\t' && c != '\r' && c != '\n';
  103. }));
  104. return Str(copy);
  105. }
  106. Str Str::strip() const {
  107. std::string copy(data, size);
  108. copy.erase(copy.begin(), std::find_if(copy.begin(), copy.end(), [](char c) {
  109. return c != ' ' && c != '\t' && c != '\r' && c != '\n';
  110. }));
  111. copy.erase(std::find_if(copy.rbegin(), copy.rend(), [](char c) {
  112. return c != ' ' && c != '\t' && c != '\r' && c != '\n';
  113. }).base(), copy.end());
  114. return Str(copy);
  115. }
  116. Str Str::lower() const{
  117. std::string copy(data, size);
  118. std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c){ return std::tolower(c); });
  119. return Str(copy);
  120. }
  121. Str Str::upper() const{
  122. std::string copy(data, size);
  123. std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c){ return std::toupper(c); });
  124. return Str(copy);
  125. }
  126. Str Str::escape(bool single_quote) const {
  127. std::stringstream ss;
  128. ss << (single_quote ? '\'' : '"');
  129. for (int i=0; i<length(); i++) {
  130. char c = this->operator[](i);
  131. switch (c) {
  132. case '"':
  133. if(!single_quote) ss << '\\';
  134. ss << '"';
  135. break;
  136. case '\'':
  137. if(single_quote) ss << '\\';
  138. ss << '\'';
  139. break;
  140. case '\\': ss << '\\' << '\\'; break;
  141. case '\n': ss << "\\n"; break;
  142. case '\r': ss << "\\r"; break;
  143. case '\t': ss << "\\t"; break;
  144. default:
  145. if ('\x00' <= c && c <= '\x1f') {
  146. ss << "\\x" << std::hex << std::setw(2) << std::setfill('0') << (int)c;
  147. } else {
  148. ss << c;
  149. }
  150. }
  151. }
  152. ss << (single_quote ? '\'' : '"');
  153. return ss.str();
  154. }
  155. int Str::index(const Str& sub, int start) const {
  156. auto p = std::search(data + start, data + size, sub.data, sub.data + sub.size);
  157. if(p == data + size) return -1;
  158. return p - data;
  159. }
  160. Str Str::replace(const Str& old, const Str& new_, int count) const {
  161. std::stringstream ss;
  162. int start = 0;
  163. while(true){
  164. int i = index(old, start);
  165. if(i == -1) break;
  166. ss << substr(start, i - start);
  167. ss << new_;
  168. start = i + old.size;
  169. if(count != -1 && --count == 0) break;
  170. }
  171. ss << substr(start, size - start);
  172. return ss.str();
  173. }
  174. int Str::_unicode_index_to_byte(int i) const{
  175. if(is_ascii) return i;
  176. int j = 0;
  177. while(i > 0){
  178. j += utf8len(data[j]);
  179. i--;
  180. }
  181. return j;
  182. }
  183. int Str::_byte_index_to_unicode(int n) const{
  184. if(is_ascii) return n;
  185. int cnt = 0;
  186. for(int i=0; i<n; i++){
  187. if((data[i] & 0xC0) != 0x80) cnt++;
  188. }
  189. return cnt;
  190. }
  191. Str Str::u8_getitem(int i) const{
  192. i = _unicode_index_to_byte(i);
  193. return substr(i, utf8len(data[i]));
  194. }
  195. Str Str::u8_slice(int start, int stop, int step) const{
  196. std::stringstream ss;
  197. if(is_ascii){
  198. for(int i=start; step>0?i<stop:i>stop; i+=step) ss << data[i];
  199. }else{
  200. for(int i=start; step>0?i<stop:i>stop; i+=step) ss << u8_getitem(i);
  201. }
  202. return ss.str();
  203. }
  204. int Str::u8_length() const {
  205. return _byte_index_to_unicode(size);
  206. }
  207. StrName StrName::get(std::string_view s){
  208. auto it = _interned.find(s);
  209. if(it != _interned.end()) return StrName(it->second);
  210. uint16_t index = (uint16_t)(_r_interned.size() + 1);
  211. _interned[s] = index;
  212. _r_interned.push_back(s);
  213. return StrName(index);
  214. }
  215. Str StrName::escape() const {
  216. return _r_interned[index-1].escape();
  217. }
  218. bool StrName::is_valid(int index) {
  219. // check _r_interned[index-1] is valid
  220. return index > 0 && index <= _r_interned.size();
  221. }
  222. StrName::StrName(): index(0) {}
  223. StrName::StrName(uint16_t index): index(index) {}
  224. StrName::StrName(const char* s): index(get(s).index) {}
  225. StrName::StrName(const Str& s){
  226. index = get(s.sv()).index;
  227. }
  228. std::string_view StrName::sv() const { return _r_interned[index-1].sv(); }
  229. } // namespace pkpy