Explorar el Código

improve memory profile

blueloveTH hace 3 meses
padre
commit
ea14ec8463

+ 2 - 0
include/pocketpy/common/algorithm.h

@@ -52,3 +52,5 @@ bool c11__stable_sort(void* ptr,
                       int elem_size,
                       int (*f_lt)(const void* a, const void* b, void* extra),
                       void* extra);
+
+int c11__bit_length(unsigned long x);

+ 2 - 1
include/pocketpy/interpreter/heap.h

@@ -8,6 +8,7 @@ typedef struct ManagedHeap {
     MultiPool small_objects;
     c11_vector /* PyObject_p */ large_objects;
     c11_vector /* PyObject_p */ gc_roots;
+    size_t large_total_size;
 
     int freed_ma[3];
     int gc_threshold;  // threshold for gc_counter
@@ -44,7 +45,7 @@ void ManagedHeap__dtor(ManagedHeap* self);
 ManagedHeapSwpetInfo* ManagedHeapSwpetInfo__new();
 void ManagedHeapSwpetInfo__delete(ManagedHeapSwpetInfo* self);
 
-void ManagedHeap__collect_hint(ManagedHeap* self);
+int ManagedHeap__collect_hint(ManagedHeap* self);
 int ManagedHeap__collect(ManagedHeap* self);
 int ManagedHeap__sweep(ManagedHeap* self, ManagedHeapSwpetInfo* out_info);
 

+ 1 - 0
include/pocketpy/interpreter/objectpool.h

@@ -34,4 +34,5 @@ void* MultiPool__alloc(MultiPool* self, int size);
 int MultiPool__sweep_dealloc(MultiPool* self, int* out_types);
 void MultiPool__ctor(MultiPool* self);
 void MultiPool__dtor(MultiPool* self);
+size_t MultiPool__total_allocated_bytes(MultiPool* self);
 c11_string* MultiPool__summary(MultiPool* self);

+ 1 - 1
include/pocketpy/objects/object.h

@@ -5,7 +5,7 @@
 
 typedef struct PyObject {
     py_Type type;  // we have a duplicated type here for convenience
-    // bool _;
+    uint8_t size_8b;
     bool gc_marked;
     int slots;  // number of slots in the object
     char flex[];

+ 1 - 1
include/typings/gc.pyi

@@ -15,7 +15,7 @@ def collect() -> int:
     Returns an integer indicating the number of unreachable objects found.
     """
 
-def collect_hint() -> None:
+def collect_hint() -> int:
     """Hint the garbage collector to run a collection.
 
     The typical usage scenario for this function is in frame-driven games,

+ 2 - 2
include/typings/pkpy.pyi

@@ -14,8 +14,8 @@ class TValue[T]:
     
 configmacros: dict[str, int]
 
-def memory_usage() -> str:
-    """Return a summary of the memory usage."""
+def memory_usage_info() -> str: ...
+def memory_usage() -> int: ...
 
 
 def currentvm() -> int:

+ 1 - 1
src/common/chunkedvector.c

@@ -6,7 +6,7 @@
 #include <intrin.h>
 #endif
 
-PK_INLINE static int c11__bit_length(unsigned long x) {
+PK_INLINE int c11__bit_length(unsigned long x) {
 #if(defined(__clang__) || defined(__GNUC__))
     return x == 0 ? 0 : (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
 #elif defined(_MSC_VER)

+ 30 - 3
src/interpreter/heap.c

@@ -6,10 +6,30 @@
 #include "pocketpy/pocketpy.h"
 #include <assert.h>
 
+static uint8_t encode_size_8b(int size, int* out_size) {
+    int bit_length = c11__bit_length(size);
+    int min_val = 1 << (bit_length - 1);
+    int gap = min_val;
+    float ratio = (float)(size - min_val) / gap;
+    int ratio_3bit = (int)(ratio * 7.999f);
+    *out_size = min_val + (int)(gap * ((float)ratio_3bit / 7.999f));
+    return (uint8_t)((bit_length << 3) | ratio_3bit);
+}
+
+static int decode_size_8b(uint8_t byte) {
+    int bit_length = byte >> 3;
+    int ratio_3bit = byte & 0x07;
+    int min_val = 1 << (bit_length - 1);
+    int gap = min_val;
+    float ratio = (float)ratio_3bit / 7.999f;
+    return min_val + (int)(gap * ratio);
+}
+
 void ManagedHeap__ctor(ManagedHeap* self) {
     MultiPool__ctor(&self->small_objects);
     c11_vector__ctor(&self->large_objects, sizeof(PyObject*));
     c11_vector__ctor(&self->gc_roots, sizeof(PyObject*));
+    self->large_total_size = 0;
 
     for(int i = 0; i < c11__count_array(self->freed_ma); i++) {
         self->freed_ma[i] = PK_GC_MIN_THRESHOLD;
@@ -107,8 +127,8 @@ static void ManagedHeap__fire_debug_callback_stop(ManagedHeap* self,
     }
 }
 
-void ManagedHeap__collect_hint(ManagedHeap* self) {
-    if(self->gc_counter < self->gc_threshold) return;
+int ManagedHeap__collect_hint(ManagedHeap* self) {
+    if(self->gc_counter < self->gc_threshold) return 0;
     self->gc_counter = 0;
 
     ManagedHeapSwpetInfo* out_info = NULL;
@@ -127,7 +147,7 @@ void ManagedHeap__collect_hint(ManagedHeap* self) {
     self->freed_ma[1] = self->freed_ma[2];
     self->freed_ma[2] = freed;
     int avg_freed = (self->freed_ma[0] + self->freed_ma[1] + self->freed_ma[2]) / 3;
-    const int upper = PK_GC_MIN_THRESHOLD * 16;
+    const int upper = PK_GC_MIN_THRESHOLD * 8;
     const int lower = PK_GC_MIN_THRESHOLD / 2;
     float free_ratio = (float)avg_freed / self->gc_threshold;
     int new_threshold = self->gc_threshold * (1.5f / free_ratio);
@@ -145,6 +165,7 @@ void ManagedHeap__collect_hint(ManagedHeap* self) {
         ManagedHeap__fire_debug_callback_stop(self, out_info);
         ManagedHeapSwpetInfo__delete(out_info);
     }
+    return freed;
 }
 
 int ManagedHeap__collect(ManagedHeap* self) {
@@ -187,6 +208,7 @@ int ManagedHeap__sweep(ManagedHeap* self, ManagedHeapSwpetInfo* out_info) {
             large_living_count++;
         } else {
             if(out_info) out_info->large_types[obj->type]++;
+            self->large_total_size -= decode_size_8b(obj->size_8b);
             PyObject__dtor(obj);
             PK_FREE(obj);
         }
@@ -206,11 +228,16 @@ PyObject* ManagedHeap__gcnew(ManagedHeap* self, py_Type type, int slots, int uds
     // header + slots + udsize
     int size = sizeof(PyObject) + PK_OBJ_SLOTS_SIZE(slots) + udsize;
     PyObject* obj = MultiPool__alloc(&self->small_objects, size);
+    uint8_t size_8b = 0;
     if(obj == NULL) {
         obj = PK_MALLOC(size);
+        int quantized_size;
+        size_8b = encode_size_8b(size, &quantized_size);
+        self->large_total_size += quantized_size;
         c11_vector__push(PyObject*, &self->large_objects, obj);
     }
     obj->type = type;
+    obj->size_8b = size_8b;
     obj->gc_marked = false;
     obj->slots = slots;
 

+ 10 - 0
src/interpreter/objectpool.c

@@ -177,6 +177,16 @@ void MultiPool__dtor(MultiPool* self) {
     }
 }
 
+size_t MultiPool__total_allocated_bytes(MultiPool* self) {
+    size_t total = 0;
+    for(int i = 0; i < kMultiPoolCount; i++) {
+        Pool* item = &self->pools[i];
+        int arena_count = item->arenas.length + item->no_free_arenas.length;
+        total += (size_t)arena_count * kPoolArenaSize;
+    }
+    return total;
+}
+
 c11_string* MultiPool__summary(MultiPool* self) {
     c11_sbuf sbuf;
     c11_sbuf__ctor(&sbuf);

+ 4 - 4
src/modules/gc.c

@@ -27,16 +27,16 @@ static bool gc_isenabled(int argc, py_Ref argv) {
 static bool gc_collect(int argc, py_Ref argv) {
     PY_CHECK_ARGC(0);
     ManagedHeap* heap = &pk_current_vm->heap;
-    int res = ManagedHeap__collect(heap);
-    py_newint(py_retval(), res);
+    int freed = ManagedHeap__collect(heap);
+    py_newint(py_retval(), freed);
     return true;
 }
 
 static bool gc_collect_hint(int argc, py_Ref argv) {
     PY_CHECK_ARGC(0);
     ManagedHeap* heap = &pk_current_vm->heap;
-    ManagedHeap__collect_hint(heap);
-    py_newnone(py_retval());
+    int freed = ManagedHeap__collect_hint(heap);
+    py_newint(py_retval(), freed);
     return true;
 }
 

+ 21 - 0
src/modules/pkpy.c

@@ -37,17 +37,37 @@ DEF_TVALUE_METHODS(float, _f64)
 DEF_TVALUE_METHODS(vec2, _vec2)
 DEF_TVALUE_METHODS(vec2i, _vec2i)
 
+
 static bool pkpy_memory_usage(int argc, py_Ref argv) {
+    PY_CHECK_ARGC(0);
+    ManagedHeap* heap = &pk_current_vm->heap;
+    py_i64 size = MultiPool__total_allocated_bytes(&heap->small_objects);
+    size += heap->large_total_size;
+    size += sizeof(VM);
+    py_newint(py_retval(), size);
+    return true;
+}
+
+static bool pkpy_memory_usage_info(int argc, py_Ref argv) {
     PY_CHECK_ARGC(0);
     ManagedHeap* heap = &pk_current_vm->heap;
     c11_string* small_objects_usage = MultiPool__summary(&heap->small_objects);
     int large_object_count = heap->large_objects.length;
     c11_sbuf buf;
     c11_sbuf__ctor(&buf);
+    c11_sbuf__write_cstr(&buf, "== pre-allocated ==\n");
+    double vm_size_mb = sizeof(VM) / 1024.0 / 1024.0;
+    c11_sbuf__write_cstr(&buf, "VM: ");
+    c11_sbuf__write_f64(&buf, vm_size_mb, 2);
+    c11_sbuf__write_cstr(&buf, " MB\n");
     c11_sbuf__write_cstr(&buf, "== heap.small_objects ==\n");
     c11_sbuf__write_cstr(&buf, small_objects_usage->data);
     c11_sbuf__write_cstr(&buf, "== heap.large_objects ==\n");
     pk_sprintf(&buf, "len(large_objects)=%d\n", large_object_count);
+    double large_total_size_mb = (size_t)(heap->large_total_size / 1024) / 1024.0;
+    c11_sbuf__write_cstr(&buf, "Total: ~");
+    c11_sbuf__write_f64(&buf, large_total_size_mb, 2);
+    c11_sbuf__write_cstr(&buf, " MB\n");
     c11_sbuf__write_cstr(&buf, "== heap.gc ==\n");
     pk_sprintf(&buf, "gc_counter=%d\n", heap->gc_counter);
     pk_sprintf(&buf, "gc_threshold=%d", heap->gc_threshold);
@@ -522,6 +542,7 @@ void pk__add_module_pkpy() {
     py_pop();
 
     py_bindfunc(mod, "memory_usage", pkpy_memory_usage);
+    py_bindfunc(mod, "memory_usage_info", pkpy_memory_usage_info);
 
     py_bindfunc(mod, "currentvm", pkpy_currentvm);