blueloveTH 1 tahun lalu
induk
melakukan
3787a1da1d

+ 8 - 1
CMakeLists.txt

@@ -2,23 +2,30 @@ cmake_minimum_required(VERSION 3.10)
 
 project(pocketpy)
 
+set(CMAKE_C_STANDARD 11)
+set(CMAKE_C_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 if(MSVC)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /utf-8 /Ox /jumptablerdata /GS-")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /utf-8 /Ox /jumptablerdata /GS-")
     add_compile_options(/wd4267 /wd4244)
 else()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions -frtti -O2")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2")
 
     # disable -Wshorten-64-to-32 for apple
     if(APPLE)
         set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-shorten-64-to-32")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-shorten-64-to-32")
     endif()
 endif()
 
 include_directories(${CMAKE_CURRENT_LIST_DIR}/include)
-file(GLOB_RECURSE POCKETPY_SRC ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
+file(GLOB_RECURSE POCKETPY_SRC_CPP ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
+file(GLOB_RECURSE POCKETPY_SRC_C ${CMAKE_CURRENT_LIST_DIR}/src/*.c)
+set(POCKETPY_SRC ${POCKETPY_SRC_CPP} ${POCKETPY_SRC_C})
 
 option(PK_USE_CJSON "" OFF)
 if(PK_USE_CJSON)

+ 3 - 1
build.sh

@@ -18,7 +18,9 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-SRC=$(find src/ -name "*.cpp")
+SRC_C=$(find src/ -name "*.c")
+SRC_CPP=$(find src/ -name "*.cpp")
+SRC="$SRC_C $SRC_CPP"
 
 echo "> Compiling and linking source files... "
 

+ 5 - 2
build_g.sh

@@ -1,7 +1,10 @@
 python prebuild.py
 
-SRC=$(find src/ -name "*.cpp")
+SRC_C=$(find src/ -name "*.c")
+SRC_CPP=$(find src/ -name "*.cpp")
+SRC="$SRC_C $SRC_CPP"
 
-FLAGS="-std=c++17 -O0 -stdlib=libc++ -Iinclude -frtti -Wfatal-errors -g -DDEBUG"
+FLAGS="-std=c++17 -O0 -stdlib=libc++ -Iinclude -frtti -Wfatal-errors -g -DDEBUG -DPK_ENABLE_OS=1"
 
 clang++ $FLAGS -o main src2/main.cpp $SRC
+

+ 4 - 1
build_web.sh

@@ -3,5 +3,8 @@ python prebuild.py
 rm -rf web/lib
 mkdir web/lib
 
-SRC=$(find src/ -name "*.cpp")
+SRC_C=$(find src/ -name "*.c")
+SRC_CPP=$(find src/ -name "*.cpp")
+SRC="$SRC_C $SRC_CPP"
+
 em++ $SRC -Iinclude/ -fexceptions -frtti -s -Os -sEXPORTED_FUNCTIONS=_pkpy_new_repl,_pkpy_repl_input,_pkpy_new_vm -sEXPORTED_RUNTIME_METHODS=ccall -o web/lib/pocketpy.js

+ 52 - 0
include/pocketpy/common/str.h

@@ -0,0 +1,52 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+
+typedef struct pkpy_Str{
+    int size;
+    bool is_ascii;
+    bool is_sso;
+    union{
+        char* _ptr;
+        char _inlined[16];
+    };
+} pkpy_Str;
+
+inline const char* pkpy_Str__data(const pkpy_Str* self){
+    return self->is_sso ? self->_inlined : self->_ptr;
+}
+
+inline int pkpy_Str__size(const pkpy_Str* self){
+    return self->size;
+}
+
+int pkpy_utils__u8len(unsigned char c, bool suppress);
+void pkpy_Str__ctor(pkpy_Str* self, const char* data);
+void pkpy_Str__ctor2(pkpy_Str* self, const char* data, int size);
+void pkpy_Str__dtor(pkpy_Str* self);
+pkpy_Str pkpy_Str__copy(const pkpy_Str* self);
+pkpy_Str pkpy_Str__concat(const pkpy_Str* self, const pkpy_Str* other);
+pkpy_Str pkpy_Str__concat2(const pkpy_Str* self, const char* other, int size);
+pkpy_Str pkpy_Str__substr(const pkpy_Str* self, int start);
+pkpy_Str pkpy_Str__substr2(const pkpy_Str* self, int start, int size);
+pkpy_Str pkpy_Str__lower(const pkpy_Str* self);
+pkpy_Str pkpy_Str__upper(const pkpy_Str* self);
+pkpy_Str pkpy_Str__replace(const pkpy_Str* self, char old, char new_);
+pkpy_Str pkpy_Str__replace2(const pkpy_Str* self, const pkpy_Str* old, const pkpy_Str* new_);
+pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str* self, int i);
+pkpy_Str pkpy_Str__u8_slice(const pkpy_Str* self, int start, int stop, int step);
+int pkpy_Str__u8_length(const pkpy_Str* self);
+int pkpy_Str__cmp(const pkpy_Str* self, const pkpy_Str* other);
+int pkpy_Str__cmp2(const pkpy_Str* self, const char* other, int size);
+int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i);
+int pkpy_Str__byte_index_to_unicode(const pkpy_Str* self, int n);
+int pkpy_Str__index(const pkpy_Str* self, const pkpy_Str* sub, int start);
+int pkpy_Str__count(const pkpy_Str* self, const pkpy_Str* sub);
+
+#ifdef __cplusplus
+}
+#endif

+ 176 - 65
include/pocketpy/common/str.hpp

@@ -1,80 +1,162 @@
 #pragma once
 
-#include "pocketpy/common/utils.hpp"
+#include "pocketpy/common/utils.h"
 #include "pocketpy/common/memorypool.hpp"
 #include "pocketpy/common/vector.hpp"
+#include "pocketpy/common/str.h"
 
 #include <string_view>
+#include <ostream>
 
 namespace pkpy {
 
-int utf8len(unsigned char c, bool suppress = false);
 struct SStream;
 
-struct Str {
-    int size;
-    bool is_ascii;
-    char* data;
-    char _inlined[16];
+struct Str: pkpy_Str {
+    bool is_inlined() const { return is_sso; }
 
-    bool is_inlined() const { return data == _inlined; }
+    Str(){
+        pkpy_Str__ctor2(this, "", 0);
+    }
 
-    Str();
-    Str(int size, bool is_ascii);
-    Str(const std::string& s);
-    Str(std::string_view s);
-    Str(const char* s);
-    Str(const char* s, int len);
-    Str(pair<char*, int>);      // take ownership
-    Str(const Str& other);
-    Str(Str&& other);
-
-    operator std::string_view () const { return sv(); }
-
-    const char* begin() const { return data; }
-
-    const char* end() const { return data + size; }
-
-    char operator[] (int idx) const { return data[idx]; }
-
-    int length() const { return size; }
-
-    bool empty() const { return size == 0; }
-
-    size_t hash() const { return std::hash<std::string_view>()(sv()); }
+    Str(pkpy_Str&& s){
+        std::memcpy(this, &s, sizeof(pkpy_Str));
+    }
 
-    Str& operator= (const Str&);
-    Str operator+ (const Str&) const;
-    Str operator+ (const char*) const;
-    friend Str operator+ (const char*, const Str&);
+    Str(const std::string& s){
+        pkpy_Str__ctor2(this, s.data(), s.size());
+    }
 
-    bool operator== (const std::string_view other) const;
-    bool operator!= (const std::string_view other) const;
-    bool operator< (const std::string_view other) const;
-    friend bool operator< (const std::string_view other, const Str& str);
+    Str(std::string_view s){
+        pkpy_Str__ctor2(this, s.data(), s.size());
+    }
 
-    bool operator== (const char* p) const;
-    bool operator!= (const char* p) const;
+    Str(const char* s){
+        pkpy_Str__ctor2(this, s, strlen(s));
+    }
 
-    bool operator== (const Str& other) const;
-    bool operator!= (const Str& other) const;
-    bool operator< (const Str& other) const;
-    bool operator> (const Str& other) const;
-    bool operator<= (const Str& other) const;
-    bool operator>= (const Str& other) const;
+    Str(const char* s, int len){
+        pkpy_Str__ctor2(this, s, len);
+    }
 
-    ~Str();
+    Str(pair<char*, int>);      // take ownership
 
-    friend std::ostream& operator<< (std::ostream& os, const Str& str);
+    Str(const Str& other){
+        pkpy_Str__ctor2(this, pkpy_Str__data(&other), other.size);
+    }
 
-    const char* c_str() const { return data; }
+    Str(Str&& other){
+        std::memcpy(this, &other, sizeof(pkpy_Str));
+        other.size = 0;
+        other.is_sso = true;
+    }
 
-    std::string_view sv() const { return std::string_view(data, size); }
+    operator std::string_view () const { return sv(); }
+    const char* begin() const { return pkpy_Str__data(this); }
+    const char* end() const { return pkpy_Str__data(this) + size; }
+    int length() const { return size; }
+    char operator[] (int idx) const { return pkpy_Str__data(this)[idx]; }
+    bool empty() const { return size == 0; }
+    size_t hash() const { return std::hash<std::string_view>()(sv()); }
 
-    std::string str() const { return std::string(data, size); }
+    Str& operator= (const Str& other){
+        pkpy_Str__dtor(this);
+        pkpy_Str__ctor2(this, pkpy_Str__data(&other), other.size);
+        return *this;
+    }
+
+    Str operator+ (const Str& other) const{
+        return pkpy_Str__concat(this, &other);
+    }
+
+    Str operator+ (const char* other) const{
+        return pkpy_Str__concat2(this, other, strlen(other));
+    }
+
+    friend Str operator+ (const char* self, const Str& other){
+        pkpy_Str tmp;
+        pkpy_Str__ctor2(&tmp, self, strlen(self));
+        pkpy_Str retval = pkpy_Str__concat(&tmp, &other);
+        pkpy_Str__dtor(&tmp);
+        return retval;
+    }
+
+    bool operator== (const std::string_view other) const{
+        int res = pkpy_Str__cmp2(this, other.data(), other.size());
+        return res == 0;
+    }
+
+    bool operator!= (const std::string_view other) const{
+        int res = pkpy_Str__cmp2(this, other.data(), other.size());
+        return res != 0;
+    }
+
+    bool operator< (const std::string_view other) const{
+        int res = pkpy_Str__cmp2(this, other.data(), other.size());
+        return res < 0;
+    }
+
+    friend bool operator< (const std::string_view other, const Str& str){
+        int res = pkpy_Str__cmp2(&str, other.data(), other.size());
+        return res > 0;
+    }
+
+    bool operator== (const char* p) const{
+        int res = pkpy_Str__cmp2(this, p, strlen(p));
+        return res == 0;
+    }
+
+    bool operator!= (const char* p) const{
+        int res = pkpy_Str__cmp2(this, p, strlen(p));
+        return res != 0;
+    }
+
+    bool operator== (const Str& other) const{
+        return pkpy_Str__cmp(this, &other) == 0;
+    }
+    bool operator!= (const Str& other) const{
+        return pkpy_Str__cmp(this, &other) != 0;
+    }
+    bool operator< (const Str& other) const{
+        return pkpy_Str__cmp(this, &other) < 0;
+    }
+    bool operator> (const Str& other) const{
+        return pkpy_Str__cmp(this, &other) > 0;
+    }
+    bool operator<= (const Str& other) const{
+        return pkpy_Str__cmp(this, &other) <= 0;
+    }
+    bool operator>= (const Str& other) const{
+        return pkpy_Str__cmp(this, &other) >= 0;
+    }
+
+    ~Str(){
+        pkpy_Str__dtor(this);
+    }
+
+    friend std::ostream& operator<< (std::ostream& os, const Str& self){
+        os.write(pkpy_Str__data(&self), self.size);
+        return os;
+    }
+
+    const char* c_str() const { return pkpy_Str__data(this); }
+
+    std::string_view sv() const {
+        return std::string_view(pkpy_Str__data(this), size);
+    }
+
+    std::string str() const {
+        return std::string(pkpy_Str__data(this), size);
+    }
+
+    Str substr(int start, int size) const{
+        return pkpy_Str__substr2(this, start, size);
+    }
+
+    Str substr(int start) const{
+        return pkpy_Str__substr(this, start);
+    }
 
-    Str substr(int start, int len) const;
-    Str substr(int start) const;
     Str strip(bool left, bool right, const Str& chars) const;
     Str strip(bool left = true, bool right = true) const;
 
@@ -82,23 +164,52 @@ struct Str {
 
     Str rstrip() const { return strip(false, true); }
 
-    Str lower() const;
-    Str upper() const;
+    Str lower() const{
+        return pkpy_Str__lower(this);
+    }
+    Str upper() const{
+        return pkpy_Str__upper(this);
+    }
+    Str replace(char old, char new_) const{
+        return pkpy_Str__replace(this, old, new_);
+    }
+    Str replace(const Str& old, const Str& new_) const{
+        return pkpy_Str__replace2(this, &old, &new_);
+    }
+
     Str escape(bool single_quote = true) const;
     void escape_(SStream& ss, bool single_quote = true) const;
-    int index(const Str& sub, int start = 0) const;
-    Str replace(char old, char new_) const;
-    Str replace(const Str& old, const Str& new_, int count = -1) const;
     vector<std::string_view> split(const Str& sep) const;
     vector<std::string_view> split(char sep) const;
-    int count(const Str& sub) const;
+
+    int index(const Str& sub, int start = 0) const{
+        return pkpy_Str__index(this, &sub, start);
+    }
+
+    int count(const Str& sub) const{
+        return pkpy_Str__count(this, &sub);
+    }
 
     /*************unicode*************/
-    int _unicode_index_to_byte(int i) const;
-    int _byte_index_to_unicode(int n) const;
-    Str u8_getitem(int i) const;
-    Str u8_slice(int start, int stop, int step) const;
-    int u8_length() const;
+    int _unicode_index_to_byte(int i) const{
+        return pkpy_Str__unicode_index_to_byte(this, i);
+    }
+
+    int _byte_index_to_unicode(int n) const{
+        return pkpy_Str__byte_index_to_unicode(this, n);
+    }
+
+    Str u8_getitem(int i) const{
+        return pkpy_Str__u8_getitem(this, i);
+    }
+
+    Str u8_slice(int start, int stop, int step) const{
+        return pkpy_Str__u8_slice(this, start, stop, step);
+    }
+
+    int u8_length() const{
+        return pkpy_Str__u8_length(this);
+    }
 };
 
 struct StrName {

+ 7 - 0
include/pocketpy/common/traits.hpp

@@ -37,4 +37,11 @@ struct has_gc_marker<T, std::void_t<decltype(&T::_gc_mark)>> : std::true_type {}
 
 template <typename T>
 constexpr inline int py_sizeof = 16 + sizeof(T);
+
+#define PK_ALWAYS_PASS_BY_POINTER(T)                                                                                   \
+    T(const T&) = delete;                                                                                              \
+    T& operator= (const T&) = delete;                                                                                  \
+    T(T&&) = delete;                                                                                                   \
+    T& operator= (T&&) = delete;
+
 }  // namespace pkpy

+ 29 - 0
include/pocketpy/common/utils.h

@@ -0,0 +1,29 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PK_REGION(name) 1
+
+#define PK_SLICE_LOOP(i, start, stop, step) for(int i = start; step > 0 ? i < stop : i > stop; i += step)
+
+// global constants
+#define PK_HEX_TABLE "0123456789abcdef"
+
+extern const char* kPlatformStrings[];
+
+#ifdef _MSC_VER
+#define PK_UNREACHABLE() __assume(0);
+#else
+#define PK_UNREACHABLE() __builtin_unreachable();
+#endif
+
+#define PK_FATAL_ERROR(...) { fprintf(stderr, __VA_ARGS__); abort(); }
+
+#define PK_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define PK_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#ifdef __cplusplus
+}
+#endif

+ 0 - 36
include/pocketpy/common/utils.hpp

@@ -1,36 +0,0 @@
-#pragma once
-
-#define PK_REGION(name) 1
-
-#define PK_ALWAYS_PASS_BY_POINTER(T)                                                                                   \
-    T(const T&) = delete;                                                                                              \
-    T& operator= (const T&) = delete;                                                                                  \
-    T(T&&) = delete;                                                                                                   \
-    T& operator= (T&&) = delete;
-
-#define PK_SLICE_LOOP(i, start, stop, step) for(int i = start; step > 0 ? i < stop : i > stop; i += step)
-
-namespace pkpy {
-
-// global constants
-const inline char* PK_HEX_TABLE = "0123456789abcdef";
-
-const inline char* kPlatformStrings[] = {
-    "win32",       // 0
-    "emscripten",  // 1
-    "ios",         // 2
-    "darwin",      // 3
-    "android",     // 4
-    "linux",       // 5
-    "unknown"      // 6
-};
-
-#ifdef _MSC_VER
-#define PK_UNREACHABLE() __assume(0);
-#else
-#define PK_UNREACHABLE() __builtin_unreachable();
-#endif
-
-#define PK_FATAL_ERROR(...) { fprintf(stderr, __VA_ARGS__); std::abort(); }
-
-}  // namespace pkpy

+ 55 - 0
include/pocketpy/common/vector.h

@@ -0,0 +1,55 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct c11_array{
+    void* data;
+    int count;
+    int elem_size;
+} c11_array;
+
+void c11_array__ctor(c11_array* self, int elem_size, int count);
+void c11_array__dtor(c11_array* self);
+c11_array c11_array__copy(const c11_array* self);
+void* c11_array__at(c11_array* self, int index);
+
+typedef struct c11_vector{
+    void* data;
+    int count;
+    int capacity;
+    int elem_size;
+} c11_vector;
+
+void c11_vector__ctor(c11_vector* self, int elem_size);
+void c11_vector__dtor(c11_vector* self);
+c11_vector c11_vector__copy(const c11_vector* self);
+void* c11_vector__at(c11_vector* self, int index);
+void c11_vector__reserve(c11_vector* self, int capacity);
+
+#define c11__getitem(T, self, index) ((T*)(self)->data)[index]
+#define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value;
+
+#define c11_vector__push_back(T, self, elem) \
+    do{ \
+        if((self)->count == (self)->capacity) c11_vector__reserve((self), (self)->capacity*2); \
+        ((T*)(self)->data)[(self)->count] = (elem); \
+        (self)->count++; \
+    }while(0)
+
+#define c11_vector__pop_back(T, self) \
+    do{ \
+        (self)->count--; \
+    }while(0)
+
+#define c11_vector__extend(T, self, p, size) \
+    do{ \
+        c11_vector__reserve((self), (self)->count + (size)); \
+        memcpy((T*)(self)->data + (self)->count, (p), (size) * sizeof(T)); \
+        (self)->count += (size); \
+    }while(0)
+
+#ifdef __cplusplus
+}
+#endif

+ 1 - 1
include/pocketpy/interpreter/gc.hpp

@@ -2,7 +2,7 @@
 
 #include "pocketpy/common/config.h"
 #include "pocketpy/common/vector.hpp"
-#include "pocketpy/common/utils.hpp"
+#include "pocketpy/common/utils.h"
 #include "pocketpy/objects/object.hpp"
 #include "pocketpy/objects/namedict.hpp"
 

+ 2 - 2
include/pocketpy/interpreter/vm.hpp

@@ -319,8 +319,8 @@ public:
 #endif
 
 #if PK_REGION("Logging Methods")
-    virtual void stdout_write(const Str& s){ _stdout(s.data, s.size); }
-    virtual void stderr_write(const Str& s){ _stderr(s.data, s.size); }
+    virtual void stdout_write(const Str& s){ _stdout(s.c_str(), s.size); }
+    virtual void stderr_write(const Str& s){ _stderr(s.c_str(), s.size); }
 #endif
 
 #if PK_REGION("Magic Bindings")

+ 1 - 1
include/pocketpy/objects/namedict.hpp

@@ -2,7 +2,7 @@
 
 #include "pocketpy/common/config.h"
 #include "pocketpy/common/str.hpp"
-#include "pocketpy/common/utils.hpp"
+#include "pocketpy/common/utils.h"
 #include "pocketpy/objects/object.hpp"
 
 namespace pkpy {

+ 1 - 1
include/pocketpy/objects/sourcedata.hpp

@@ -1,6 +1,6 @@
 #pragma once
 
-#include "pocketpy/common/utils.hpp"
+#include "pocketpy/common/utils.h"
 #include "pocketpy/common/str.hpp"
 
 namespace pkpy {

+ 1 - 1
src/common/any.cpp

@@ -1,5 +1,5 @@
 #include "pocketpy/common/any.hpp"
-#include "pocketpy/common/utils.hpp"
+#include "pocketpy/common/utils.h"
 
 #include <cstdio>
 

+ 250 - 0
src/common/str.c

@@ -0,0 +1,250 @@
+#include "pocketpy/common/str.h"
+#include "pocketpy/common/vector.h"
+#include "pocketpy/common/utils.h"
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <stdio.h>
+
+int pkpy_utils__u8len(unsigned char c, bool suppress) {
+    if((c & 0b10000000) == 0) return 1;
+    if((c & 0b11100000) == 0b11000000) return 2;
+    if((c & 0b11110000) == 0b11100000) return 3;
+    if((c & 0b11111000) == 0b11110000) return 4;
+    if((c & 0b11111100) == 0b11111000) return 5;
+    if((c & 0b11111110) == 0b11111100) return 6;
+    if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n")
+    return 0;
+}
+
+void pkpy_Str__ctor(pkpy_Str *self, const char *data){
+    pkpy_Str__ctor2(self, data, strlen(data));
+}
+
+void pkpy_Str__ctor2(pkpy_Str *self, const char *data, int size){
+    self->size = size;
+    self->is_ascii = true;
+    self->is_sso = size < sizeof(self->_inlined);
+    char* p;
+    if(self->is_sso){
+        p = self->_inlined;
+    }else{
+        self->_ptr = (char*)malloc(size + 1);
+        p = self->_ptr;
+    }
+    memcpy(p, data, size);
+    p[size] = '\0';
+    // check is_ascii
+    for(int i = 0; i < size; i++){
+        if(!isascii(p[i])){
+            self->is_ascii = false;
+            break;
+        }
+    }
+}
+
+void pkpy_Str__dtor(pkpy_Str *self){
+    if(!self->is_sso){
+        free(self->_ptr);
+        self->is_sso = true;
+        self->size = 0;
+    }
+}
+
+pkpy_Str pkpy_Str__copy(const pkpy_Str *self){
+    pkpy_Str retval = *self;
+    if(!self->is_sso){
+        retval._ptr = (char*)malloc(self->size + 1);
+        memcpy(retval._ptr, self->_ptr, self->size + 1);
+        retval._ptr[retval.size] = '\0';
+    }
+    return retval;
+}
+
+pkpy_Str pkpy_Str__concat(const pkpy_Str *self, const pkpy_Str *other){
+    pkpy_Str retval = {
+        .size = self->size + other->size,
+        .is_ascii = self->is_ascii && other->is_ascii,
+        .is_sso = self->size + other->size < sizeof(retval._inlined),
+    };
+    char* p;
+    if(retval.is_sso){
+        p = retval._inlined;
+    }else{
+        retval._ptr = (char*)malloc(retval.size + 1);
+        p = retval._ptr;
+    }
+    memcpy(p, pkpy_Str__data(self), self->size);
+    memcpy(p + self->size, pkpy_Str__data(other), other->size);
+    p[retval.size] = '\0';
+    return retval;
+}
+
+pkpy_Str pkpy_Str__concat2(const pkpy_Str *self, const char *other, int size){
+    pkpy_Str tmp;
+    pkpy_Str__ctor2(&tmp, other, size);
+    pkpy_Str retval = pkpy_Str__concat(self, &tmp);
+    pkpy_Str__dtor(&tmp);
+    return retval;
+}
+
+pkpy_Str pkpy_Str__substr(const pkpy_Str *self, int start){
+    return pkpy_Str__substr2(self, start, self->size - start);
+}
+
+pkpy_Str pkpy_Str__substr2(const pkpy_Str *self, int start, int size){
+    pkpy_Str retval;
+    pkpy_Str__ctor2(&retval, pkpy_Str__data(self) + start, size);
+    return retval;
+}
+
+pkpy_Str pkpy_Str__lower(const pkpy_Str *self){
+    pkpy_Str retval = pkpy_Str__copy(self);
+    char* p = (char*)pkpy_Str__data(&retval);
+    for(int i = 0; i < retval.size; i++){
+        if('A' <= p[i] && p[i] <= 'Z') p[i] += 32;
+    }
+    return retval;
+}
+
+pkpy_Str pkpy_Str__upper(const pkpy_Str *self){
+    pkpy_Str retval = pkpy_Str__copy(self);
+    char* p = (char*)pkpy_Str__data(&retval);
+    for(int i = 0; i < retval.size; i++){
+        if('a' <= p[i] && p[i] <= 'z') p[i] -= 32;
+    }
+    return retval;
+}
+
+pkpy_Str pkpy_Str__replace(const pkpy_Str *self, char old, char new_){
+    pkpy_Str retval = pkpy_Str__copy(self);
+    char* p = (char*)pkpy_Str__data(&retval);
+    for(int i = 0; i < retval.size; i++){
+        if(p[i] == old) p[i] = new_;
+    }
+    return retval;
+}
+
+pkpy_Str pkpy_Str__replace2(const pkpy_Str *self, const pkpy_Str *old, const pkpy_Str *new_){
+    c11_vector buffer;
+    c11_vector__ctor(&buffer, sizeof(char));
+    int start = 0;
+    while(true) {
+        int i = pkpy_Str__index(self, old, start);
+        if(i == -1) break;
+        pkpy_Str tmp = pkpy_Str__substr2(self, start, i - start);
+        c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size);
+        pkpy_Str__dtor(&tmp);
+        c11_vector__extend(char, &buffer, pkpy_Str__data(new_), new_->size);
+        start = i + old->size;
+    }
+    pkpy_Str tmp = pkpy_Str__substr2(self, start, self->size - start);
+    c11_vector__extend(char, &buffer, pkpy_Str__data(&tmp), tmp.size);
+    pkpy_Str__dtor(&tmp);
+    pkpy_Str retval = {
+        .size = buffer.count,
+        .is_ascii = self->is_ascii && old->is_ascii && new_->is_ascii,
+        .is_sso = false,
+        ._ptr = (char*)buffer.data,
+    };
+    return retval;
+}
+
+int pkpy_Str__cmp(const pkpy_Str *self, const pkpy_Str *other){
+    return pkpy_Str__cmp2(self, pkpy_Str__data(other), other->size);
+}
+
+int pkpy_Str__cmp2(const pkpy_Str *self, const char *other, int size){
+    int res = strncmp(pkpy_Str__data(self), other, PK_MIN(self->size, size));
+    if(res != 0) return res;
+    return self->size - size;
+}
+
+pkpy_Str pkpy_Str__u8_getitem(const pkpy_Str *self, int i){
+    i = pkpy_Str__unicode_index_to_byte(self, i);
+    return pkpy_Str__substr2(
+        self, i,
+        pkpy_utils__u8len(pkpy_Str__data(self)[i], false)
+    );
+}
+
+pkpy_Str pkpy_Str__u8_slice(const pkpy_Str *self, int start, int stop, int step){
+    c11_vector buffer;
+    c11_vector__ctor(&buffer, sizeof(char));
+    assert(step != 0);
+    if(self->is_ascii){
+        const char* p = pkpy_Str__data(self);
+        for (int i=start; step>0 ? i<stop : i>stop; i+=step) {
+            c11_vector__push_back(char, &buffer, p[i]);
+        }
+    }else{
+        for (int i=start; step>0 ? i<stop : i>stop; i+=step) {
+            pkpy_Str unicode = pkpy_Str__u8_getitem(self, i);
+            const char* p = pkpy_Str__data(&unicode);
+            for(int j = 0; j < unicode.size; j++){
+                c11_vector__push_back(char, &buffer, p[j]);
+            }
+            pkpy_Str__dtor(&unicode);
+        }
+    }
+    pkpy_Str retval = {
+        .size = buffer.count,
+        .is_ascii = self->is_ascii,
+        .is_sso = false,
+        ._ptr = (char*)buffer.data,
+    };
+    return retval;
+}
+
+int pkpy_Str__u8_length(const pkpy_Str *self){
+    return pkpy_Str__byte_index_to_unicode(self, self->size);
+}
+
+int pkpy_Str__unicode_index_to_byte(const pkpy_Str* self, int i) {
+    if(self->is_ascii) return i;
+    const char* p = pkpy_Str__data(self);
+    int j = 0;
+    while(i > 0) {
+        j += pkpy_utils__u8len(p[j], false);
+        i--;
+    }
+    return j;
+}
+
+int pkpy_Str__byte_index_to_unicode(const pkpy_Str* self, int n) {
+    if(self->is_ascii) return n;
+    const char* p = pkpy_Str__data(self);
+    int cnt = 0;
+    for(int i = 0; i < n; i++) {
+        if((p[i] & 0xC0) != 0x80) cnt++;
+    }
+    return cnt;
+}
+
+int pkpy_Str__index(const pkpy_Str *self, const pkpy_Str *sub, int start){
+    if(sub->size == 0) return start;
+    int max_end = self->size - sub->size;
+    const char* self_data = pkpy_Str__data(self);
+    const char* sub_data = pkpy_Str__data(sub);
+    for(int i=start; i<=max_end; i++){
+        int res = memcmp(self_data + i, sub_data, sub->size);
+        if(res == 0) return i;
+    }
+    return -1;
+}
+
+int pkpy_Str__count(const pkpy_Str *self, const pkpy_Str *sub){
+    if(sub->size == 0) return self->size + 1;
+    int cnt = 0;
+    int start = 0;
+    while(true) {
+        int i = pkpy_Str__index(self, sub, start);
+        if(i == -1) break;
+        cnt++;
+        start = i + sub->size;
+    }
+    return cnt;
+}
+

+ 10 - 243
src/common/str.cpp

@@ -9,159 +9,20 @@
 
 namespace pkpy {
 
-int utf8len(unsigned char c, bool suppress) {
-    if((c & 0b10000000) == 0) return 1;
-    if((c & 0b11100000) == 0b11000000) return 2;
-    if((c & 0b11110000) == 0b11100000) return 3;
-    if((c & 0b11111000) == 0b11110000) return 4;
-    if((c & 0b11111100) == 0b11111000) return 5;
-    if((c & 0b11111110) == 0b11111100) return 6;
-    if(!suppress) PK_FATAL_ERROR("invalid utf8 char\n")
-    return 0;
-}
-
-#define PK_STR_ALLOCATE()                                                                                              \
-    if(this->size < (int)sizeof(this->_inlined)) {                                                                     \
-        this->data = this->_inlined;                                                                                   \
-    } else {                                                                                                           \
-        this->data = (char*)std::malloc(this->size + 1);                                                               \
-    }
-
-#define PK_STR_COPY_INIT(__s)                                                                                          \
-    for(int i = 0; i < this->size; i++) {                                                                              \
-        this->data[i] = __s[i];                                                                                        \
-        if(!isascii(__s[i])) is_ascii = false;                                                                         \
-    }                                                                                                                  \
-    this->data[this->size] = '\0';
-
-Str::Str() : size(0), is_ascii(true), data(_inlined) { _inlined[0] = '\0'; }
-
-Str::Str(int size, bool is_ascii) :
-    size(size), is_ascii(is_ascii){PK_STR_ALLOCATE()}
-
-    Str::Str(const std::string& s) :
-    size(s.size()), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
-
-    Str::Str(std::string_view s) :
-    size(s.size()), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
-
-    Str::Str(const char* s) :
-    size(strlen(s)), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
-
-    Str::Str(const char* s, int len) :
-    size(len), is_ascii(true){PK_STR_ALLOCATE() PK_STR_COPY_INIT(s)}
-
-    Str::Str(pair<char*, int> detached) : size(detached.second), is_ascii(true) {
-    this->data = detached.first;
+Str::Str(pair<char*, int> detached) {
+    this->size = detached.second;
+    this->is_ascii = true;
+    this->is_sso = false;
+    this->_ptr = detached.first;
     for(int i = 0; i < size; i++) {
-        if(!isascii(data[i])) {
+        if(!isascii(_ptr[i])) {
             is_ascii = false;
             break;
         }
     }
-    assert(data[size] == '\0');
-}
-
-Str::Str(const Str& other) : size(other.size), is_ascii(other.is_ascii) {
-    PK_STR_ALLOCATE()
-    std::memcpy(data, other.data, size);
-    data[size] = '\0';
-}
-
-Str::Str(Str&& other) : size(other.size), is_ascii(other.is_ascii) {
-    if(other.is_inlined()) {
-        data = _inlined;
-        for(int i = 0; i < size; i++)
-            _inlined[i] = other._inlined[i];
-        data[size] = '\0';
-    } else {
-        data = other.data;
-        // zero out `other`
-        other.data = other._inlined;
-        other.data[0] = '\0';
-        other.size = 0;
-    }
-}
-
-Str operator+ (const char* p, const Str& str) {
-    Str other(p);
-    return other + str;
-}
-
-std::ostream& operator<< (std::ostream& os, const Str& str) { return os << str.sv(); }
-
-bool operator< (const std::string_view other, const Str& str) { return other < str.sv(); }
-
-Str& Str::operator= (const Str& other) {
-    if(!is_inlined()) std::free(data);
-    size = other.size;
-    is_ascii = other.is_ascii;
-    PK_STR_ALLOCATE()
-    std::memcpy(data, other.data, size);
-    data[size] = '\0';
-    return *this;
-}
-
-Str Str::operator+ (const Str& other) const {
-    Str ret(size + other.size, is_ascii && other.is_ascii);
-    std::memcpy(ret.data, data, size);
-    std::memcpy(ret.data + size, other.data, other.size);
-    ret.data[ret.size] = '\0';
-    return ret;
-}
-
-Str Str::operator+ (const char* p) const {
-    Str other(p);
-    return *this + other;
-}
-
-bool Str::operator== (const Str& other) const {
-    if(size != other.size) return false;
-    return memcmp(data, other.data, size) == 0;
-}
-
-bool Str::operator!= (const Str& other) const {
-    if(size != other.size) return true;
-    return memcmp(data, other.data, size) != 0;
-}
-
-bool Str::operator== (const std::string_view other) const {
-    if(size != (int)other.size()) return false;
-    return memcmp(data, other.data(), size) == 0;
-}
-
-bool Str::operator!= (const std::string_view other) const {
-    if(size != (int)other.size()) return true;
-    return memcmp(data, other.data(), size) != 0;
-}
-
-bool Str::operator== (const char* p) const { return *this == std::string_view(p); }
-
-bool Str::operator!= (const char* p) const { return *this != std::string_view(p); }
-
-bool Str::operator< (const Str& other) const { return this->sv() < other.sv(); }
-
-bool Str::operator< (const std::string_view other) const { return this->sv() < other; }
-
-bool Str::operator> (const Str& other) const { return this->sv() > other.sv(); }
-
-bool Str::operator<= (const Str& other) const { return this->sv() <= other.sv(); }
-
-bool Str::operator>= (const Str& other) const { return this->sv() >= other.sv(); }
-
-Str::~Str() {
-    if(!is_inlined()) std::free(data);
-}
-
-Str Str::substr(int start, int len) const {
-    Str ret(len, is_ascii);
-    std::memcpy(ret.data, data + start, len);
-    ret.data[len] = '\0';
-    return ret;
+    assert(_ptr[size] == '\0');
 }
 
-Str Str::substr(int start) const { return substr(start, size - start); }
-
 Str Str::strip(bool left, bool right, const Str& chars) const {
     int L = 0;
     int R = u8_length();
@@ -177,6 +38,7 @@ Str Str::strip(bool left, bool right, const Str& chars) const {
 }
 
 Str Str::strip(bool left, bool right) const {
+    const char* data = pkpy_Str__data(this);
     if(is_ascii) {
         int L = 0;
         int R = size;
@@ -194,24 +56,6 @@ Str Str::strip(bool left, bool right) const {
     }
 }
 
-Str Str::lower() const {
-    std::string copy(data, size);
-    std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) {
-        if('A' <= c && c <= 'Z') return c + ('a' - 'A');
-        return (int)c;
-    });
-    return Str(copy);
-}
-
-Str Str::upper() const {
-    std::string copy(data, size);
-    std::transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) {
-        if('a' <= c && c <= 'z') return c - ('a' - 'A');
-        return (int)c;
-    });
-    return Str(copy);
-}
-
 Str Str::escape(bool single_quote) const {
     SStream ss;
     escape_(ss, single_quote);
@@ -220,7 +64,7 @@ Str Str::escape(bool single_quote) const {
 
 void Str::escape_(SStream& ss, bool single_quote) const {
     ss << (single_quote ? '\'' : '"');
-    for(int i = 0; i < length(); i++) {
+    for(int i = 0; i < size; i++) {
         char c = this->operator[] (i);
         switch(c) {
             case '"':
@@ -249,71 +93,6 @@ void Str::escape_(SStream& ss, bool single_quote) const {
     ss << (single_quote ? '\'' : '"');
 }
 
-int Str::index(const Str& sub, int start) const {
-    auto p = std::search(data + start, data + size, sub.data, sub.data + sub.size);
-    if(p == data + size) return -1;
-    return p - data;
-}
-
-Str Str::replace(char old, char new_) const {
-    Str copied = *this;
-    for(int i = 0; i < copied.size; i++) {
-        if(copied.data[i] == old) copied.data[i] = new_;
-    }
-    return copied;
-}
-
-Str Str::replace(const Str& old, const Str& new_, int count) const {
-    SStream ss;
-    int start = 0;
-    while(true) {
-        int i = index(old, start);
-        if(i == -1) break;
-        ss << substr(start, i - start);
-        ss << new_;
-        start = i + old.size;
-        if(count != -1 && --count == 0) break;
-    }
-    ss << substr(start, size - start);
-    return ss.str();
-}
-
-int Str::_unicode_index_to_byte(int i) const {
-    if(is_ascii) return i;
-    int j = 0;
-    while(i > 0) {
-        j += utf8len(data[j]);
-        i--;
-    }
-    return j;
-}
-
-int Str::_byte_index_to_unicode(int n) const {
-    if(is_ascii) return n;
-    int cnt = 0;
-    for(int i = 0; i < n; i++) {
-        if((data[i] & 0xC0) != 0x80) cnt++;
-    }
-    return cnt;
-}
-
-Str Str::u8_getitem(int i) const {
-    i = _unicode_index_to_byte(i);
-    return substr(i, utf8len(data[i]));
-}
-
-Str Str::u8_slice(int start, int stop, int step) const {
-    SStream ss;
-    if(is_ascii) {
-        PK_SLICE_LOOP(i, start, stop, step) ss << data[i];
-    } else {
-        PK_SLICE_LOOP(i, start, stop, step) ss << u8_getitem(i);
-    }
-    return ss.str();
-}
-
-int Str::u8_length() const { return _byte_index_to_unicode(size); }
-
 vector<std::string_view> Str::split(const Str& sep) const {
     vector<std::string_view> result;
     std::string_view tmp;
@@ -332,6 +111,7 @@ vector<std::string_view> Str::split(const Str& sep) const {
 
 vector<std::string_view> Str::split(char sep) const {
     vector<std::string_view> result;
+    const char* data = pkpy_Str__data(this);
     int i = 0;
     for(int j = 0; j < size; j++) {
         if(data[j] == sep) {
@@ -344,19 +124,6 @@ vector<std::string_view> Str::split(char sep) const {
     return result;
 }
 
-int Str::count(const Str& sub) const {
-    if(sub.empty()) return size + 1;
-    int cnt = 0;
-    int start = 0;
-    while(true) {
-        int i = index(sub, start);
-        if(i == -1) break;
-        cnt++;
-        start = i + sub.size;
-    }
-    return cnt;
-}
-
 static std::map<std::string_view, uint16_t>& _interned() {
     static std::map<std::string_view, uint16_t> interned;
     return interned;

+ 9 - 0
src/common/utils.c

@@ -0,0 +1,9 @@
+const char* kPlatformStrings[] = {
+    "win32",       // 0
+    "emscripten",  // 1
+    "ios",         // 2
+    "darwin",      // 3
+    "android",     // 4
+    "linux",       // 5
+    "unknown"      // 6
+};

+ 61 - 0
src/common/vector.c

@@ -0,0 +1,61 @@
+#include "pocketpy/common/vector.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+void c11_array__ctor(c11_array* self, int elem_size, int count){
+    self->data = malloc(elem_size * count);
+    self->count = count;
+    self->elem_size = elem_size;
+}
+
+void c11_array__dtor(c11_array* self){
+    free(self->data);
+    self->data = NULL;
+    self->count = 0;
+}
+
+c11_array c11_array__copy(const c11_array* self){
+    c11_array retval;
+    c11_array__ctor(&retval, self->elem_size, self->count);
+    memcpy(retval.data, self->data, self->elem_size * self->count);
+    return retval;
+}
+
+void* c11_array__at(c11_array* self, int index){
+    return (char*)self->data + self->elem_size * index;
+}
+
+void c11_vector__ctor(c11_vector* self, int elem_size){
+    self->data = NULL;
+    self->count = 0;
+    self->capacity = 0;
+    self->elem_size = elem_size;
+}
+
+void c11_vector__dtor(c11_vector* self){
+    if(self->data) free(self->data);
+    self->data = NULL;
+    self->count = 0;
+    self->capacity = 0;
+}
+
+c11_vector c11_vector__copy(const c11_vector* self){
+    c11_vector retval;
+    c11_vector__ctor(&retval, self->elem_size);
+    c11_vector__reserve(&retval, self->capacity);
+    memcpy(retval.data, self->data, self->elem_size * self->count);
+    retval.count = self->count;
+    return retval;
+}
+
+void* c11_vector__at(c11_vector* self, int index){
+    return (char*)self->data + self->elem_size * index;
+}
+
+void c11_vector__reserve(c11_vector* self, int capacity){
+    if(capacity < 4) capacity = 4;
+    if(capacity <= self->capacity) return;
+    self->capacity = capacity;
+    self->data = realloc(self->data, self->elem_size * self->capacity);
+}

+ 2 - 1
src/compiler/lexer.cpp

@@ -1,6 +1,7 @@
 #include "pocketpy/compiler/lexer.hpp"
 #include "pocketpy/common/gil.hpp"
 #include "pocketpy/common/version.h"
+#include "pocketpy/common/str.h"
 
 #include <cstdarg>
 
@@ -107,7 +108,7 @@ Error* Lexer::eat_name() noexcept{
     curr_char--;
     while(true) {
         unsigned char c = peekchar();
-        int u8bytes = utf8len(c, true);
+        int u8bytes = pkpy_utils__u8len(c, true);
         if(u8bytes == 0) return SyntaxError("invalid char: %c", c);
         if(u8bytes == 1) {
             if(isalpha(c) || c == '_' || isdigit(c)) {

+ 1 - 1
src/interpreter/ceval.cpp

@@ -446,7 +446,7 @@ PyVar VM::__run_top_frame() {
                     case OP_BUILD_BYTES: {
                         const Str& s = CAST(Str&, TOP());
                         unsigned char* p = (unsigned char*)std::malloc(s.size);
-                        std::memcpy(p, s.data, s.size);
+                        std::memcpy(p, s.c_str(), s.size);
                         TOP() = VAR(Bytes(p, s.size));
                     }
                         DISPATCH()

+ 1 - 1
src/interpreter/iter.cpp

@@ -49,7 +49,7 @@ void StringIter::_register(VM* vm, PyObject* mod, PyObject* type) {
         Str& s = PK_OBJ_GET(Str, self.ref);
         if(self.i == s.size) return 0;
         int start = self.i;
-        int len = utf8len(s.data[self.i]);
+        int len = pkpy_utils__u8len(s[self.i], false);
         self.i += len;
         vm->s_data.push(VAR(s.substr(start, len)));
         return 1;

+ 1 - 1
src/modules/io.cpp

@@ -85,7 +85,7 @@ void FileIO::_register(VM* vm, PyObject* mod, PyObject* type) {
         FileIO& io = PK_OBJ_GET(FileIO, args[0]);
         if(io.is_text) {
             Str& s = CAST(Str&, args[1]);
-            fwrite(s.data, 1, s.length(), io.fp);
+            fwrite(s.c_str(), 1, s.length(), io.fp);
         } else {
             Bytes& buffer = CAST(Bytes&, args[1]);
             fwrite(buffer.data(), 1, buffer.size(), io.fp);

+ 5 - 6
src/pocketpy.cpp

@@ -539,7 +539,7 @@ void __init_builtins(VM* _vm) {
         double float_out;
         char* p_end;
         try {
-            float_out = std::strtod(s.data, &p_end);
+            float_out = std::strtod(s.c_str(), &p_end);
             if(p_end != s.end()) throw 1;
         } catch(...) { vm->ValueError("invalid literal for float(): " + s.escape()); }
         return VAR(float_out);
@@ -636,13 +636,12 @@ void __init_builtins(VM* _vm) {
         return VAR(self.u8_getitem(i));
     });
 
-    _vm->bind(_vm->_t(VM::tp_str), "replace(self, old, new, count=-1)", [](VM* vm, ArgsView args) {
+    _vm->bind(_vm->_t(VM::tp_str), "replace(self, old, new)", [](VM* vm, ArgsView args) {
         const Str& self = _CAST(Str&, args[0]);
         const Str& old = CAST(Str&, args[1]);
         if(old.empty()) vm->ValueError("empty substring");
         const Str& new_ = CAST(Str&, args[2]);
-        int count = CAST(int, args[3]);
-        return VAR(self.replace(old, new_, count));
+        return VAR(self.replace(old, new_));
     });
 
     _vm->bind(_vm->_t(VM::tp_str), "split(self, sep=' ')", [](VM* vm, ArgsView args) {
@@ -705,14 +704,14 @@ void __init_builtins(VM* _vm) {
         const Str& suffix = CAST(Str&, args[1]);
         int offset = self.length() - suffix.length();
         if(offset < 0) return vm->False;
-        bool ok = memcmp(self.data + offset, suffix.data, suffix.length()) == 0;
+        bool ok = memcmp(self.c_str() + offset, suffix.c_str(), suffix.length()) == 0;
         return VAR(ok);
     });
 
     _vm->bind_func(VM::tp_str, "encode", 1, [](VM* vm, ArgsView args) {
         const Str& self = _CAST(Str&, args[0]);
         Bytes retval(self.length());
-        std::memcpy(retval.data(), self.data, self.length());
+        std::memcpy(retval.data(), self.c_str(), self.length());
         return VAR(std::move(retval));
     });
 

+ 2 - 2
tests/04_str.py

@@ -39,8 +39,8 @@ assert t[-5:] == 'ow!!!'
 assert t[3:-3] == 's is string example....wow'
 assert s > q;assert s < r
 assert s.replace("o","") == "ftball"
-assert s.replace("o","O",1) == "fOotball"
-assert s.replace("foo","ball",1) == "balltball"
+assert s.replace("o","O") == "fOOtball"
+assert s.replace("foo","ball") == "balltball"
 assert s.startswith('f') == True;assert s.endswith('o') == False
 assert t.startswith('this') == True;