blueloveTH hai 1 ano
pai
achega
a8db1cc5e1

+ 2 - 2
include/pocketpy/common/smallmap.h

@@ -18,8 +18,8 @@ extern "C" {
 
 #define SMALLMAP_T__HEADER
 #define K c11_string
-#define V int
-#define TAG s2i
+#define V uint16_t
+#define TAG s2n
 #define less(a, b)  (c11_string__cmp((a.key), (b)) < 0)
 #define equal(a, b)  (c11_string__cmp((a), (b)) == 0)
 #include "pocketpy/xmacros/smallmap.h"

+ 4 - 0
include/pocketpy/common/str.h

@@ -32,6 +32,10 @@ PK_INLINE const char* pkpy_Str__data(const pkpy_Str* self){
     return self->is_sso ? self->_inlined : self->_ptr;
 }
 
+PK_INLINE c11_string pkpy_Str__sv(const pkpy_Str* self){
+    return (c11_string){pkpy_Str__data(self), self->size};
+}
+
 void pkpy_Str__ctor(pkpy_Str* self, const char* data);
 void pkpy_Str__ctor2(pkpy_Str* self, const char* data, int size);
 void pkpy_Str__dtor(pkpy_Str* self);

+ 15 - 5
include/pocketpy/common/str.hpp

@@ -6,6 +6,7 @@
 #include "pocketpy/common/vector.h"
 #include "pocketpy/common/vector.hpp"
 #include "pocketpy/common/str.h"
+#include "pocketpy/common/strname.h"
 
 #include <cassert>
 #include <string_view>
@@ -261,9 +262,6 @@ struct StrName {
 
     StrName(const Str& s) : index(get(s.sv()).index) {}
 
-    std::string_view sv() const;
-    const char* c_str() const;
-
     bool empty() const { return index == 0; }
 
     Str escape() const { return Str(sv()).escape(); }
@@ -276,8 +274,20 @@ struct StrName {
 
     bool operator> (const StrName& other) const noexcept { return sv() > other.sv(); }
 
-    static StrName get(std::string_view s);
-    static uint32_t _pesudo_random_index;
+    inline static StrName get(std::string_view s){
+        uint16_t index = pkpy_StrName__map({s.data(), (int)s.size()});
+        return StrName(index);
+    }
+
+    std::string_view sv() const{
+        c11_string s = pkpy_StrName__rmap(index);
+        return std::string_view(s.data, s.size);
+    }
+
+    const char* c_str() const{
+        c11_string s = pkpy_StrName__rmap(index);
+        return s.data;
+    }
 };
 
 struct SStream: pkpy_SStream {

+ 18 - 0
include/pocketpy/common/strname.h

@@ -0,0 +1,18 @@
+#pragma once
+
+#include <stdint.h>
+#include "pocketpy/common/str.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+uint16_t pkpy_StrName__map(c11_string name);
+c11_string pkpy_StrName__rmap(uint16_t index);
+
+void pkpy_StrName__initialize();
+void pkpy_StrName__finalize();
+
+#ifdef __cplusplus
+}
+#endif

+ 4 - 0
include/pocketpy/common/vector.h

@@ -33,6 +33,7 @@ void c11_vector__dtor(c11_vector* self);
 c11_vector c11_vector__copy(const c11_vector* self);
 void c11_vector__reserve(c11_vector* self, int capacity);
 void c11_vector__clear(c11_vector* self);
+void* c11_vector__emplace(c11_vector* self);
 
 #define c11__getitem(T, self, index) (((T*)(self)->data)[index])
 #define c11__setitem(T, self, index, value) ((T*)(self)->data)[index] = value;
@@ -50,6 +51,9 @@ void c11_vector__clear(c11_vector* self);
         (self)->count--; \
     }while(0)
 
+#define c11_vector__back(T, self) \
+    (((T*)(self)->data)[(self)->count - 1])
+
 #define c11_vector__extend(T, self, p, size) \
     do{ \
         c11_vector__reserve((self), (self)->count + (size)); \

+ 2 - 2
src/common/smallmap.c

@@ -12,8 +12,8 @@
 
 #define SMALLMAP_T__SOURCE
 #define K c11_string
-#define V int
-#define TAG s2i
+#define V uint16_t
+#define TAG s2n
 #define less(a, b)  (c11_string__cmp((a.key), (b)) < 0)
 #define equal(a, b)  (c11_string__cmp((a), (b)) == 0)
 #include "pocketpy/xmacros/smallmap.h"

+ 0 - 31
src/common/str.cpp

@@ -8,37 +8,6 @@
 
 namespace pkpy {
 
-static std::map<std::string_view, uint16_t>& _interned() {
-    static std::map<std::string_view, uint16_t> interned;
-    return interned;
-}
-
-static std::map<uint16_t, std::string>& _r_interned() {
-    static std::map<uint16_t, std::string> r_interned;
-    return r_interned;
-}
-
-std::string_view StrName::sv() const { return _r_interned()[index]; }
-const char* StrName::c_str() const { return _r_interned()[index].c_str(); }
-
-uint32_t StrName::_pesudo_random_index = 0;
-
-StrName StrName::get(std::string_view s) {
-    // TODO: PK_GLOBAL_SCOPE_LOCK()
-    auto it = _interned().find(s);
-    if(it != _interned().end()) return StrName(it->second);
-    // generate new index
-    // https://github.com/python/cpython/blob/3.12/Objects/dictobject.c#L175
-    uint16_t index = ((_pesudo_random_index * 5) + 1) & 65535;
-    if(index == 0) PK_FATAL_ERROR("StrName index overflow\n")
-    auto res = _r_interned().emplace(index, s);
-    assert(res.second);
-    s = std::string_view(res.first->second);
-    _interned()[s] = index;
-    _pesudo_random_index = index;
-    return StrName(index);
-}
-
 // unary operators
 const StrName __repr__ = StrName::get("__repr__");
 const StrName __str__ = StrName::get("__str__");

+ 57 - 0
src/common/strname.c

@@ -0,0 +1,57 @@
+#include "pocketpy/common/strname.h"
+#include "pocketpy/common/smallmap.h"
+#include "pocketpy/common/utils.h"
+#include "pocketpy/common/vector.h"
+
+#include <stdio.h>
+
+// TODO: use a more efficient data structure
+static c11_smallmap_s2n _interned;
+static c11_vector/*T=char* */ _r_interned;
+static bool _initialized = false;
+
+void pkpy_StrName__initialize(){
+    if(_initialized) return;
+    c11_smallmap_s2n__ctor(&_interned);
+    for(int i=0; i<_r_interned.count; i++){
+        free(c11__at(char*, &_r_interned, i));
+    }
+    c11_vector__ctor(&_r_interned, sizeof(c11_string));
+    _initialized = true;
+}
+
+void pkpy_StrName__finalize(){
+    if(!_initialized) return;
+    c11_smallmap_s2n__dtor(&_interned);
+    c11_vector__dtor(&_r_interned);
+}
+
+uint16_t pkpy_StrName__map(c11_string name){
+    // TODO: PK_GLOBAL_SCOPE_LOCK()
+    if(!_initialized){
+        pkpy_StrName__initialize(); // lazy init
+    }
+    uint16_t index = c11_smallmap_s2n__get(&_interned, name, 0);
+    if(index != 0) return index;
+    // generate new index
+    if(_interned.count > 65530){
+        PK_FATAL_ERROR("StrName index overflow\n");
+    }
+    // NOTE: we must allocate the string in the heap so iterators are not invalidated
+    char* p = malloc(name.size + 1);
+    memcpy(p, name.data, name.size);
+    p[name.size] = '\0';
+    c11_vector__push(char*, &_r_interned, p);
+    index = _r_interned.count;  // 1-based
+    // save to _interned
+    c11_smallmap_s2n__set(&_interned, (c11_string){p, name.size}, index);
+    assert(_interned.count == _r_interned.count);
+    return index;
+}
+
+c11_string pkpy_StrName__rmap(uint16_t index){
+    assert(_initialized);
+    assert(index > 0 && index <= _interned.count);
+    char* p = c11__getitem(char*, &_r_interned, index - 1);
+    return (c11_string){p, strlen(p)};
+}

+ 7 - 0
src/common/vector.c

@@ -55,3 +55,10 @@ void c11_vector__reserve(c11_vector* self, int capacity){
 void c11_vector__clear(c11_vector* self){
     self->count = 0;
 }
+
+void* c11_vector__emplace(c11_vector* self){
+    if(self->count == self->capacity) c11_vector__reserve(self, self->capacity*2);
+    void* p = (char*)self->data + self->elem_size * self->count;
+    self->count++;
+    return p;
+}

+ 10 - 10
src/compiler/lexer.cpp

@@ -620,14 +620,14 @@ Error* Lexer::precompile(Str* out) noexcept{
     ss << "pkpy:" PK_VERSION << '\n';       // L1: version string
     ss << (int)src->mode << '\n';           // L2: mode
 
-    c11_smallmap_s2i token_indices;
-    c11_smallmap_s2i__ctor(&token_indices);
+    c11_smallmap_s2n token_indices;
+    c11_smallmap_s2n__ctor(&token_indices);
 
     for(auto token: nexts) {
         if(is_raw_string_used(token.type)) {
             c11_string token_sv = {token.start, token.length};
-            if(!c11_smallmap_s2i__contains(&token_indices, token_sv)) {
-                c11_smallmap_s2i__set(&token_indices, token_sv, 0);
+            if(!c11_smallmap_s2n__contains(&token_indices, token_sv)) {
+                c11_smallmap_s2n__set(&token_indices, token_sv, 0);
                 // assert no '\n' in token.sv()
                 for(char c: token.sv())
                     assert(c != '\n');
@@ -635,9 +635,9 @@ Error* Lexer::precompile(Str* out) noexcept{
         }
     }
     ss << "=" << (int)token_indices.count << '\n';  // L3: raw string count
-    int index = 0;
+    uint16_t index = 0;
     for(int i=0; i<token_indices.count; i++){
-        c11_smallmap_entry_s2i* kv = c11__at(c11_smallmap_entry_s2i, &token_indices, i);
+        c11_smallmap_entry_s2n* kv = c11__at(c11_smallmap_entry_s2n, &token_indices, i);
         ss << kv->key << '\n';  // L4: raw strings
         kv->value = index++;
     }
@@ -647,9 +647,9 @@ Error* Lexer::precompile(Str* out) noexcept{
         const Token& token = nexts[i];
         ss << (int)token.type << ',';
         if(is_raw_string_used(token.type)) {
-            int index = c11_smallmap_s2i__get(&token_indices, {token.start, token.length}, -1);
-            assert(index >= 0);
-            ss << index << ',';
+            uint16_t *p = c11_smallmap_s2n__try_get(&token_indices, {token.start, token.length});
+            assert(p != NULL);
+            ss << (int)*p << ',';
         }
         if(i > 0 && nexts[i - 1].line == token.line)
             ss << ',';
@@ -677,7 +677,7 @@ Error* Lexer::precompile(Str* out) noexcept{
             token.value);
     }
     *out = ss.str();
-    c11_smallmap_s2i__dtor(&token_indices);
+    c11_smallmap_s2n__dtor(&token_indices);
     return NULL;
 }
 

+ 1 - 0
src/interpreter/vm.cpp

@@ -83,6 +83,7 @@ struct JsonSerializer {
 
 VM::VM(bool enable_os) : heap(this), enable_os(enable_os) {
     Pools_initialize();
+    pkpy_StrName__initialize();
     
     this->vm = this;
     this->__c.error = nullptr;

+ 3 - 1
src/objects/namedict.cpp

@@ -116,7 +116,9 @@ bool NameDict::contains(StrName key) const {
 
 PyVar NameDict::operator[] (StrName key) const {
     PyVar* val = try_get_2_likely_found(key);
-    if(val == nullptr) PK_FATAL_ERROR("NameDict key not found: %s\n", key.escape().c_str())
+    if(val == nullptr){
+        PK_FATAL_ERROR("NameDict key not found: %d (%s)\n", (int)key.index, key.escape().c_str())
+    }
     return *val;
 }