|
@@ -9,159 +9,20 @@
|
|
|
|
|
|
|
|
namespace pkpy {
|
|
namespace pkpy {
|
|
|
|
|
|
|
|
-int utf8len(unsigned char c, bool suppress) {
|
|
|
|
|
- if((c & 0b10000000) == 0) return 1;
|
|
|
|
|
- if((c & 0b11100000) == 0b11000000) return 2;
|
|
|
|
|
- if((c & 0b11110000) == 0b11100000) return 3;
|
|
|
|
|
- if((c & 0b11111000) == 0b11110000) return 4;
|
|
|
|
|
- if((c & 0b11111100) == 0b11111000) return 5;
|
|
|
|
|
- if((c & 0b11111110) == 0b11111100) return 6;
|
|
|
|
|
- if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n")
|
|
|
|
|
- return 0;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-#define PK_STR_ALLOCATE() \
|
|
|
|
|
- if(this->size < (int)sizeof(this->_inlined)) { \
|
|
|
|
|
- this->data = this->_inlined; \
|
|
|
|
|
- } else { \
|
|
|
|
|
- this->data = (char*)std::malloc(this->size + 1); \
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
-#define PK_STR_COPY_INIT(__s) \
|
|
|
|
|
- for(int i = 0; i < this->size; i++) { \
|
|
|
|
|
- this->data[i] = __s[i]; \
|
|
|
|
|
- if(!isascii(__s[i])) is_ascii = false; \
|
|
|
|
|
- } \
|
|
|
|
|
- this->data[this->size] = '\0';
|
|
|
|
|
-
|
|
|
|
|
-Str::Str() : size(0), is_ascii(true), data(_inlined) { _inlined[0] = '\0'; }
|
|
|
|
|
-
|
|
|
|
|
-Str::Str(int size, bool is_ascii) :
|
|
|
|
|
- size(size), is_ascii(is_ascii){PK_STR_ALLOCATE()}
|
|
|
|
|
-
|
|
|
|
|
- Str::Str(const std::string& s) :
|
|
|
|
|
- size(s.size()), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
|
|
|
|
|
-
|
|
|
|
|
- Str::Str(std::string_view s) :
|
|
|
|
|
- size(s.size()), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
|
|
|
|
|
-
|
|
|
|
|
- Str::Str(const char* s) :
|
|
|
|
|
- size(strlen(s)), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
|
|
|
|
|
-
|
|
|
|
|
- Str::Str(const char* s, int len) :
|
|
|
|
|
- size(len), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
|
|
|
|
|
-
|
|
|
|
|
- Str::Str(pair<char*, int> detached) : size(detached.second), is_ascii(true) {
|
|
|
|
|
- this->data = detached.first;
|
|
|
|
|
|
|
+Str::Str(pair<char*, int> detached) {
|
|
|
|
|
+ this->size = detached.second;
|
|
|
|
|
+ this->is_ascii = true;
|
|
|
|
|
+ this->is_sso = false;
|
|
|
|
|
+ this->_ptr = detached.first;
|
|
|
for(int i = 0; i < size; i++) {
|
|
for(int i = 0; i < size; i++) {
|
|
|
- if(!isascii(data[i])) {
|
|
|
|
|
|
|
+ if(!isascii(_ptr[i])) {
|
|
|
is_ascii = false;
|
|
is_ascii = false;
|
|
|
break;
|
|
break;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- assert(data[size] == '\0');
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str::Str(const Str& other) : size(other.size), is_ascii(other.is_ascii) {
|
|
|
|
|
- PK_STR_ALLOCATE()
|
|
|
|
|
- std::memcpy(data, other.data, size);
|
|
|
|
|
- data[size] = '\0';
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str::Str(Str&& other) : size(other.size), is_ascii(other.is_ascii) {
|
|
|
|
|
- if(other.is_inlined()) {
|
|
|
|
|
- data = _inlined;
|
|
|
|
|
- for(int i = 0; i < size; i++)
|
|
|
|
|
- _inlined[i] = other._inlined[i];
|
|
|
|
|
- data[size] = '\0';
|
|
|
|
|
- } else {
|
|
|
|
|
- data = other.data;
|
|
|
|
|
- // zero out `other`
|
|
|
|
|
- other.data = other._inlined;
|
|
|
|
|
- other.data[0] = '\0';
|
|
|
|
|
- other.size = 0;
|
|
|
|
|
- }
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str operator+ (const char* p, const Str& str) {
|
|
|
|
|
- Str other(p);
|
|
|
|
|
- return other + str;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-std::ostream& operator<< (std::ostream& os, const Str& str) { return os << str.sv(); }
|
|
|
|
|
-
|
|
|
|
|
-bool operator< (const std::string_view other, const Str& str) { return other < str.sv(); }
|
|
|
|
|
-
|
|
|
|
|
-Str& Str::operator= (const Str& other) {
|
|
|
|
|
- if(!is_inlined()) std::free(data);
|
|
|
|
|
- size = other.size;
|
|
|
|
|
- is_ascii = other.is_ascii;
|
|
|
|
|
- PK_STR_ALLOCATE()
|
|
|
|
|
- std::memcpy(data, other.data, size);
|
|
|
|
|
- data[size] = '\0';
|
|
|
|
|
- return *this;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str Str::operator+ (const Str& other) const {
|
|
|
|
|
- Str ret(size + other.size, is_ascii && other.is_ascii);
|
|
|
|
|
- std::memcpy(ret.data, data, size);
|
|
|
|
|
- std::memcpy(ret.data + size, other.data, other.size);
|
|
|
|
|
- ret.data[ret.size] = '\0';
|
|
|
|
|
- return ret;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str Str::operator+ (const char* p) const {
|
|
|
|
|
- Str other(p);
|
|
|
|
|
- return *this + other;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator== (const Str& other) const {
|
|
|
|
|
- if(size != other.size) return false;
|
|
|
|
|
- return memcmp(data, other.data, size) == 0;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator!= (const Str& other) const {
|
|
|
|
|
- if(size != other.size) return true;
|
|
|
|
|
- return memcmp(data, other.data, size) != 0;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator== (const std::string_view other) const {
|
|
|
|
|
- if(size != (int)other.size()) return false;
|
|
|
|
|
- return memcmp(data, other.data(), size) == 0;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator!= (const std::string_view other) const {
|
|
|
|
|
- if(size != (int)other.size()) return true;
|
|
|
|
|
- return memcmp(data, other.data(), size) != 0;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator== (const char* p) const { return *this == std::string_view(p); }
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator!= (const char* p) const { return *this != std::string_view(p); }
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator< (const Str& other) const { return this->sv() < other.sv(); }
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator< (const std::string_view other) const { return this->sv() < other; }
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator> (const Str& other) const { return this->sv() > other.sv(); }
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator<= (const Str& other) const { return this->sv() <= other.sv(); }
|
|
|
|
|
-
|
|
|
|
|
-bool Str::operator>= (const Str& other) const { return this->sv() >= other.sv(); }
|
|
|
|
|
-
|
|
|
|
|
-Str::~Str() {
|
|
|
|
|
- if(!is_inlined()) std::free(data);
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str Str::substr(int start, int len) const {
|
|
|
|
|
- Str ret(len, is_ascii);
|
|
|
|
|
- std::memcpy(ret.data, data + start, len);
|
|
|
|
|
- ret.data[len] = '\0';
|
|
|
|
|
- return ret;
|
|
|
|
|
|
|
+ assert(_ptr[size] == '\0');
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-Str Str::substr(int start) const { return substr(start, size - start); }
|
|
|
|
|
-
|
|
|
|
|
Str Str::strip(bool left, bool right, const Str& chars) const {
|
|
Str Str::strip(bool left, bool right, const Str& chars) const {
|
|
|
int L = 0;
|
|
int L = 0;
|
|
|
int R = u8_length();
|
|
int R = u8_length();
|
|
@@ -177,6 +38,7 @@ Str Str::strip(bool left, bool right, const Str& chars) const {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
Str Str::strip(bool left, bool right) const {
|
|
Str Str::strip(bool left, bool right) const {
|
|
|
|
|
+ const char* data = pkpy_Str__data(this);
|
|
|
if(is_ascii) {
|
|
if(is_ascii) {
|
|
|
int L = 0;
|
|
int L = 0;
|
|
|
int R = size;
|
|
int R = size;
|
|
@@ -194,24 +56,6 @@ Str Str::strip(bool left, bool right) const {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-Str Str::lower() const {
|
|
|
|
|
- std::string copy(data, size);
|
|
|
|
|
- std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) {
|
|
|
|
|
- if('A' <= c && c <= 'Z') return c + ('a' - 'A');
|
|
|
|
|
- return (int)c;
|
|
|
|
|
- });
|
|
|
|
|
- return Str(copy);
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str Str::upper() const {
|
|
|
|
|
- std::string copy(data, size);
|
|
|
|
|
- std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) {
|
|
|
|
|
- if('a' <= c && c <= 'z') return c - ('a' - 'A');
|
|
|
|
|
- return (int)c;
|
|
|
|
|
- });
|
|
|
|
|
- return Str(copy);
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
Str Str::escape(bool single_quote) const {
|
|
Str Str::escape(bool single_quote) const {
|
|
|
SStream ss;
|
|
SStream ss;
|
|
|
escape_(ss, single_quote);
|
|
escape_(ss, single_quote);
|
|
@@ -220,7 +64,7 @@ Str Str::escape(bool single_quote) const {
|
|
|
|
|
|
|
|
void Str::escape_(SStream& ss, bool single_quote) const {
|
|
void Str::escape_(SStream& ss, bool single_quote) const {
|
|
|
ss << (single_quote ? '\'' : '"');
|
|
ss << (single_quote ? '\'' : '"');
|
|
|
- for(int i = 0; i < length(); i++) {
|
|
|
|
|
|
|
+ for(int i = 0; i < size; i++) {
|
|
|
char c = this->operator[] (i);
|
|
char c = this->operator[] (i);
|
|
|
switch(c) {
|
|
switch(c) {
|
|
|
case '"':
|
|
case '"':
|
|
@@ -249,71 +93,6 @@ void Str::escape_(SStream& ss, bool single_quote) const {
|
|
|
ss << (single_quote ? '\'' : '"');
|
|
ss << (single_quote ? '\'' : '"');
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-int Str::index(const Str& sub, int start) const {
|
|
|
|
|
- auto p = std::search(data + start, data + size, sub.data, sub.data + sub.size);
|
|
|
|
|
- if(p == data + size) return -1;
|
|
|
|
|
- return p - data;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str Str::replace(char old, char new_) const {
|
|
|
|
|
- Str copied = *this;
|
|
|
|
|
- for(int i = 0; i < copied.size; i++) {
|
|
|
|
|
- if(copied.data[i] == old) copied.data[i] = new_;
|
|
|
|
|
- }
|
|
|
|
|
- return copied;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str Str::replace(const Str& old, const Str& new_, int count) const {
|
|
|
|
|
- SStream ss;
|
|
|
|
|
- int start = 0;
|
|
|
|
|
- while(true) {
|
|
|
|
|
- int i = index(old, start);
|
|
|
|
|
- if(i == -1) break;
|
|
|
|
|
- ss << substr(start, i - start);
|
|
|
|
|
- ss << new_;
|
|
|
|
|
- start = i + old.size;
|
|
|
|
|
- if(count != -1 && --count == 0) break;
|
|
|
|
|
- }
|
|
|
|
|
- ss << substr(start, size - start);
|
|
|
|
|
- return ss.str();
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-int Str::_unicode_index_to_byte(int i) const {
|
|
|
|
|
- if(is_ascii) return i;
|
|
|
|
|
- int j = 0;
|
|
|
|
|
- while(i > 0) {
|
|
|
|
|
- j += utf8len(data[j]);
|
|
|
|
|
- i--;
|
|
|
|
|
- }
|
|
|
|
|
- return j;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-int Str::_byte_index_to_unicode(int n) const {
|
|
|
|
|
- if(is_ascii) return n;
|
|
|
|
|
- int cnt = 0;
|
|
|
|
|
- for(int i = 0; i < n; i++) {
|
|
|
|
|
- if((data[i] & 0xC0) != 0x80) cnt++;
|
|
|
|
|
- }
|
|
|
|
|
- return cnt;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str Str::u8_getitem(int i) const {
|
|
|
|
|
- i = _unicode_index_to_byte(i);
|
|
|
|
|
- return substr(i, utf8len(data[i]));
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-Str Str::u8_slice(int start, int stop, int step) const {
|
|
|
|
|
- SStream ss;
|
|
|
|
|
- if(is_ascii) {
|
|
|
|
|
- PK_SLICE_LOOP(i, start, stop, step) ss << data[i];
|
|
|
|
|
- } else {
|
|
|
|
|
- PK_SLICE_LOOP(i, start, stop, step) ss << u8_getitem(i);
|
|
|
|
|
- }
|
|
|
|
|
- return ss.str();
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
-int Str::u8_length() const { return _byte_index_to_unicode(size); }
|
|
|
|
|
-
|
|
|
|
|
vector<std::string_view> Str::split(const Str& sep) const {
|
|
vector<std::string_view> Str::split(const Str& sep) const {
|
|
|
vector<std::string_view> result;
|
|
vector<std::string_view> result;
|
|
|
std::string_view tmp;
|
|
std::string_view tmp;
|
|
@@ -332,6 +111,7 @@ vector<std::string_view> Str::split(const Str& sep) const {
|
|
|
|
|
|
|
|
vector<std::string_view> Str::split(char sep) const {
|
|
vector<std::string_view> Str::split(char sep) const {
|
|
|
vector<std::string_view> result;
|
|
vector<std::string_view> result;
|
|
|
|
|
+ const char* data = pkpy_Str__data(this);
|
|
|
int i = 0;
|
|
int i = 0;
|
|
|
for(int j = 0; j < size; j++) {
|
|
for(int j = 0; j < size; j++) {
|
|
|
if(data[j] == sep) {
|
|
if(data[j] == sep) {
|
|
@@ -344,19 +124,6 @@ vector<std::string_view> Str::split(char sep) const {
|
|
|
return result;
|
|
return result;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-int Str::count(const Str& sub) const {
|
|
|
|
|
- if(sub.empty()) return size + 1;
|
|
|
|
|
- int cnt = 0;
|
|
|
|
|
- int start = 0;
|
|
|
|
|
- while(true) {
|
|
|
|
|
- int i = index(sub, start);
|
|
|
|
|
- if(i == -1) break;
|
|
|
|
|
- cnt++;
|
|
|
|
|
- start = i + sub.size;
|
|
|
|
|
- }
|
|
|
|
|
- return cnt;
|
|
|
|
|
-}
|
|
|
|
|
-
|
|
|
|
|
static std::map<std::string_view, uint16_t>& _interned() {
|
|
static std::map<std::string_view, uint16_t>& _interned() {
|
|
|
static std::map<std::string_view, uint16_t> interned;
|
|
static std::map<std::string_view, uint16_t> interned;
|
|
|
return interned;
|
|
return interned;
|