blueloveTH 2 年 前
コミット
9634e5c402
13 ファイル変更205 行追加98 行削除
  1. 4 0
      src/codeobject.h
  2. 32 2
      src/common.h
  3. 1 1
      src/compiler.h
  4. 2 2
      src/error.h
  5. 11 0
      src/frame.h
  6. 71 24
      src/gc.h
  7. 17 0
      src/iter.h
  8. 1 6
      src/namedict.h
  9. 14 11
      src/obj.h
  10. 2 2
      src/parser.h
  11. 2 2
      src/pocketpy.h
  12. 15 1
      src/ref.h
  13. 33 47
      src/vm.h

+ 4 - 0
src/codeobject.h

@@ -89,6 +89,10 @@ struct CodeObject {
         return consts.size() - 1;
     }
 
+    void _mark() const {
+        for(PyObject* v : consts) OBJ_MARK(v);
+    }
+
     /************************************************/
     int _curr_block_i = 0;
     int _rvalue = 0;

+ 32 - 2
src/common.h

@@ -9,7 +9,6 @@
 
 #include <sstream>
 #include <regex>
-#include <stack>
 #include <cmath>
 #include <cstdlib>
 #include <stdexcept>
@@ -18,7 +17,6 @@
 #include <cstring>
 #include <chrono>
 #include <string_view>
-#include <queue>
 #include <iomanip>
 #include <memory>
 #include <functional>
@@ -28,6 +26,7 @@
 #include <algorithm>
 #include <random>
 #include <initializer_list>
+#include <list>
 
 #define PK_VERSION				"0.9.5"
 #define PK_EXTRA_CHECK 			0
@@ -101,4 +100,35 @@ inline bool is_both_int(PyObject* a, PyObject* b) noexcept {
     return is_int(a) && is_int(b);
 }
 
+
+template <typename T>
+class queue{
+	std::list<T> list;
+public:
+	void push(const T& t){ list.push_back(t); }
+	void push(T&& t){ list.push_back(std::move(t)); }
+	void pop(){ list.pop_front(); }
+	void clear(){ list.clear(); }
+	bool empty() const { return list.empty(); }
+	size_t size() const { return list.size(); }
+	T& front(){ return list.front(); }
+	const T& front() const { return list.front(); }
+	const std::list<T>& data() const { return list; }
+};
+
+template <typename T>
+class stack{
+	std::vector<T> vec;
+public:
+	void push(const T& t){ vec.push_back(t); }
+	void push(T&& t){ vec.push_back(std::move(t)); }
+	void pop(){ vec.pop_back(); }
+	void clear(){ vec.clear(); }
+	bool empty() const { return vec.empty(); }
+	size_t size() const { return vec.size(); }
+	T& top(){ return vec.back(); }
+	const T& top() const { return vec.back(); }
+	const std::vector<T>& data() const { return vec; }
+};
+
 } // namespace pkpy

+ 1 - 1
src/compiler.h

@@ -21,7 +21,7 @@ enum StringType { NORMAL_STRING, RAW_STRING, F_STRING };
 
 class Compiler {
     std::unique_ptr<Parser> parser;
-    std::stack<CodeObject_> codes;
+    stack<CodeObject_> codes;
     int lexing_count = 0;
     bool used = false;
     VM* vm;

+ 2 - 2
src/error.h

@@ -72,7 +72,7 @@ struct SourceData {
 class Exception {
     StrName type;
     Str msg;
-    std::stack<Str> stacktrace;
+    stack<Str> stacktrace;
 public:
     Exception(StrName type, Str msg): type(type), msg(msg) {}
     bool match_type(StrName type) const { return this->type == type;}
@@ -84,7 +84,7 @@ public:
     }
 
     Str summary() const {
-        std::stack<Str> st(stacktrace);
+        stack<Str> st(stacktrace);
         StrStream ss;
         if(is_re) ss << "Traceback (most recent call last):\n";
         while(!st.empty()) { ss << st.top() << '\n'; st.pop(); }

+ 11 - 0
src/frame.h

@@ -159,6 +159,17 @@ struct Frame {
         for(int i=n-1; i>=0; i--) v[i] = pop();
         return v;
     }
+
+    void _mark() const {
+        for(PyObject* obj : _data) OBJ_MARK(obj);
+        if(_locals != nullptr) _locals->_mark();
+        if(_closure != nullptr) _closure->_mark();
+        OBJ_MARK(_module);
+        for(auto& p : s_try_block){
+            for(PyObject* obj : p.second) OBJ_MARK(obj);
+        }
+        co->_mark();
+    }
 };
 
 }; // namespace pkpy

+ 71 - 24
src/gc.h

@@ -1,37 +1,84 @@
 #pragma once
 
 #include "obj.h"
+#include "codeobject.h"
+#include "namedict.h"
 
 namespace pkpy {
-    struct ManagedHeap{
-        std::vector<PyObject*> heap;
+struct ManagedHeap{
+    std::vector<PyObject*> gen;
 
-        void _add(PyObject* obj){
-            obj->gc.enabled = true;
-            heap.push_back(obj);
-        }
+    template<typename T>
+    PyObject* gcnew(Type type, T&& val){
+        PyObject* obj = new Py_<std::decay_t<T>>(type, std::forward<T>(val));
+        gen.push_back(obj);
+        return obj;
+    }
+
+    template<typename T>
+    PyObject* _new(Type type, T&& val){
+        return gcnew<T>(type, std::forward<T>(val));
+    }
 
-        void sweep(){
-            std::vector<PyObject*> alive;
-            for(PyObject* obj: heap){
-                if(obj->gc.marked){
-                    obj->gc.marked = false;
-                    alive.push_back(obj);
-                }else{
-                    delete obj;
-                }
+    int sweep(){
+        std::vector<PyObject*> alive;
+        for(PyObject* obj: gen){
+            if(obj->gc.marked){
+                obj->gc.marked = false;
+                alive.push_back(obj);
+            }else{
+                delete obj;
             }
-            heap.clear();
-            heap.swap(alive);
         }
+        int freed = gen.size() - alive.size();
+        gen.clear();
+        gen.swap(alive);
+        return freed;
+    }
 
-        void collect(VM* vm){
-            std::vector<PyObject*> roots = get_roots(vm);
-            for(PyObject* obj: roots) obj->mark();
-            sweep();
-        }
+    int collect(VM* vm){
+        mark(vm);
+        return sweep();
+    }
+
+    void mark(VM* vm);
+};
+
+
+inline void NameDict::_mark(){
+    for(uint16_t i=0; i<_capacity; i++){
+        if(_items[i].first.empty()) continue;
+        OBJ_MARK(_items[i].second);
+    }
+}
+
+template<> inline void _mark<List>(List& t){
+    for(PyObject* obj: t) OBJ_MARK(obj);
+}
+
+template<> inline void _mark<Tuple>(Tuple& t){
+    for(int i=0; i<t.size(); i++) OBJ_MARK(t[i]);
+}
+
+template<> inline void _mark<Function>(Function& t){
+    t.code->_mark();
+    t.kwargs._mark();
+    if(t._module != nullptr) OBJ_MARK(t._module);
+    if(t._closure != nullptr) t._closure->_mark();
+}
+
+template<> inline void _mark<BoundMethod>(BoundMethod& t){
+    OBJ_MARK(t.obj);
+    OBJ_MARK(t.method);
+}
+
+template<> inline void _mark<StarWrapper>(StarWrapper& t){
+    OBJ_MARK(t.obj);
+}
 
-        std::vector<PyObject*> get_roots(VM* vm);
-    };
+template<> inline void _mark<Super>(Super& t){
+    OBJ_MARK(t.first);
+}
+// NOTE: std::function may capture some PyObject*, they can not be marked
 
 }   // namespace pkpy

+ 17 - 0
src/iter.h

@@ -65,4 +65,21 @@ inline PyObject* Generator::next(){
     }
 }
 
+inline void BaseIter::_mark() {
+    if(_ref != nullptr) OBJ_MARK(_ref);
+    if(loop_var != nullptr) OBJ_MARK(loop_var);
+}
+
+inline void Generator::_mark(){
+    BaseIter::_mark();
+    frame->_mark();
+}
+
+template<typename T>
+void _mark(T& t){
+    if constexpr(std::is_base_of_v<BaseIter, T>){
+        t._mark();
+    }
+}
+
 } // namespace pkpy

+ 1 - 6
src/namedict.h

@@ -180,12 +180,7 @@ while(!_items[i].first.empty()) {       \
         return v;
     }
 
-    void apply_v(void(*f)(PyObject*)) {
-        for(uint16_t i=0; i<_capacity; i++){
-            if(_items[i].first.empty()) continue;
-            f(_items[i].second);
-        }
-    }
+    void _mark();
 #undef HASH_PROBE
 #undef _hash
 };

+ 14 - 11
src/obj.h

@@ -63,6 +63,8 @@ struct StarWrapper {
     StarWrapper(PyObject* obj, bool rvalue): obj(obj), rvalue(rvalue) {}
 };
 
+using Super = std::pair<PyObject*, Type>;
+
 struct Slice {
     int start = 0;
     int stop = 0x7fffffff; 
@@ -84,16 +86,13 @@ public:
     virtual PyObject* next() = 0;
     PyObject* loop_var;
     BaseIter(VM* vm, PyObject* _ref) : vm(vm), _ref(_ref) {}
+    virtual void _mark();
     virtual ~BaseIter() = default;
 };
 
-template <typename, typename=void> struct is_container_gc : std::false_type {};
-template <typename T> struct is_container_gc<T, std::void_t<decltype(T::_mark)>> : std::true_type {};
-
 struct GCHeader {
-    bool enabled;   // whether this object is managed by GC
     bool marked;    // whether this object is marked
-    GCHeader() : enabled(false), marked(false) {}
+    GCHeader() : marked(false) {}
 };
 
 struct PyObject {
@@ -105,12 +104,15 @@ struct PyObject {
     NameDict& attr() noexcept { return *_attr; }
     PyObject* attr(StrName name) const noexcept { return (*_attr)[name]; }
     virtual void* value() = 0;
-    virtual void mark() = 0;
+    virtual void _mark() = 0;
 
     PyObject(Type type) : type(type) {}
     virtual ~PyObject() { delete _attr; }
 };
 
+template<typename T>
+void _mark(T& t);
+
 template <typename T>
 struct Py_ : PyObject {
     T _value;
@@ -131,16 +133,17 @@ struct Py_ : PyObject {
     }
     void* value() override { return &_value; }
 
-    void mark() override {
-        if(!gc.enabled || gc.marked) return;
+    void _mark() override {
+        if(gc.marked) return;
         gc.marked = true;
-        if(is_attr_valid()) attr().apply_v([](PyObject* v){ v->mark(); });
-        if constexpr (is_container_gc<T>::value) _value._mark();
+        if(is_attr_valid()) attr()._mark();
+        pkpy::_mark<T>(_value);   // handle PyObject* inside _value `T`
     }
 };
 
 #define OBJ_GET(T, obj) (((Py_<T>*)(obj))->_value)
 #define OBJ_NAME(obj) OBJ_GET(Str, vm->getattr(obj, __name__))
+#define OBJ_MARK(obj) if(!is_tagged(obj)) obj->_mark()
 
 const int kTpIntIndex = 2;
 const int kTpFloatIndex = 3;
@@ -210,7 +213,7 @@ __T _py_cast(VM* vm, PyObject* obj) {
 }
 
 #define VAR(x) py_var(vm, x)
-#define VAR_T(T, ...) vm->gcnew<T>(T::_type(vm), T(__VA_ARGS__))
+#define VAR_T(T, ...) vm->heap.gcnew<T>(T::_type(vm), T(__VA_ARGS__))
 #define CAST(T, x) py_cast<T>(vm, x)
 #define _CAST(T, x) _py_cast<T>(vm, x)
 

+ 2 - 2
src/parser.h

@@ -101,8 +101,8 @@ struct Parser {
     const char* curr_char;
     int current_line = 1;
     Token prev, curr;
-    std::queue<Token> nexts;
-    std::stack<int> indents;
+    queue<Token> nexts;
+    stack<int> indents;
 
     int brackets_level = 0;
 

+ 2 - 2
src/pocketpy.h

@@ -69,7 +69,7 @@ inline void init_builtins(VM* _vm) {
             vm->TypeError("super(type, obj): obj must be an instance or subtype of type");
         }
         Type base = vm->_all_types[type].base;
-        return vm->gcnew(vm->tp_super, Super(args[1], base));
+        return vm->heap.gcnew(vm->tp_super, Super(args[1], base));
     });
 
     _vm->bind_builtin_func<2>("isinstance", [](VM* vm, Args& args) {
@@ -757,7 +757,7 @@ inline void add_module_random(VM* vm){
 
 inline void add_module_gc(VM* vm){
     PyObject* mod = vm->new_module("gc");
-    vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->gc_collect())));
+    vm->bind_func<0>(mod, "collect", CPP_LAMBDA(VAR(vm->heap.collect(vm))));
 }
 
 inline void VM::post_init(){

+ 15 - 1
src/ref.h

@@ -152,7 +152,7 @@ struct TupleRef : BaseRef {
 template<typename P>
 PyObject* VM::PyRef(P&& value) {
     static_assert(std::is_base_of_v<BaseRef, std::decay_t<P>>);
-    return gcnew<P>(tp_ref, std::forward<P>(value));
+    return heap.gcnew<P>(tp_ref, std::forward<P>(value));
 }
 
 inline const BaseRef* VM::PyRef_AS_C(PyObject* obj)
@@ -166,4 +166,18 @@ inline void Frame::try_deref(VM* vm, PyObject*& v){
     if(is_type(v, vm->tp_ref)) v = vm->PyRef_AS_C(v)->get(vm, this);
 }
 
+/***** GC's Impl *****/
+template<> inline void _mark<AttrRef>(AttrRef& t){
+    OBJ_MARK(obj);
+}
+
+template<> inline void _mark<IndexRef>(IndexRef& t){
+    OBJ_MARK(obj);
+    OBJ_MARK(index);
+}
+
+template<> inline void _mark<TupleRef>(TupleRef& t){
+    _mark<Tuple>(t.objs);
+}
+
 }   // namespace pkpy

+ 33 - 47
src/vm.h

@@ -24,8 +24,8 @@ Str _read_file_cwd(const Str& name, bool* ok);
     template<> inline ctype& _py_cast<ctype&>(VM* vm, PyObject* obj) {  \
         return OBJ_GET(ctype, obj);                                     \
     }                                                                   \
-    inline PyObject* py_var(VM* vm, const ctype& value) { return vm->gcnew(vm->ptype, value);}     \
-    inline PyObject* py_var(VM* vm, ctype&& value) { return vm->gcnew(vm->ptype, std::move(value));}
+    inline PyObject* py_var(VM* vm, const ctype& value) { return vm->heap.gcnew(vm->ptype, value);}     \
+    inline PyObject* py_var(VM* vm, ctype&& value) { return vm->heap.gcnew(vm->ptype, std::move(value));}
 
 
 class Generator: public BaseIter {
@@ -35,7 +35,8 @@ public:
     Generator(VM* vm, std::unique_ptr<Frame>&& frame)
         : BaseIter(vm, nullptr), frame(std::move(frame)), state(0) {}
 
-    PyObject* next();
+    PyObject* next() override;
+    void _mark() override;
 };
 
 struct PyTypeInfo{
@@ -46,9 +47,9 @@ struct PyTypeInfo{
 
 class VM {
     VM* vm;     // self reference for simplify code
-    ManagedHeap heap;
 public:
-    std::stack< std::unique_ptr<Frame> > callstack;
+    ManagedHeap heap;
+    stack< std::unique_ptr<Frame> > callstack;
     std::vector<PyTypeInfo> _all_types;
 
     PyObject* run_frame(Frame* frame);
@@ -56,15 +57,12 @@ public:
     NameDict _modules;                          // loaded modules
     std::map<StrName, Str> _lazy_modules;       // lazy loaded modules
 
-    // singleton objects, need_gc=false
     PyObject* _py_op_call;
     PyObject* _py_op_yield;
     PyObject* None;
     PyObject* True;
     PyObject* False;
     PyObject* Ellipsis;
-
-    // managed by _modules, need_gc=false
     PyObject* builtins;         // builtins module
     PyObject* _main;            // __main__ module
 
@@ -73,6 +71,13 @@ public:
     std::ostream* _stderr;
     int recursionlimit = 1000;
 
+    // for quick access
+    Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str;
+    Type tp_list, tp_tuple;
+    Type tp_function, tp_native_function, tp_iterator, tp_bound_method;
+    Type tp_slice, tp_range, tp_module, tp_ref;
+    Type tp_super, tp_exception, tp_star_wrapper;
+
     VM(bool use_stdio){
         this->vm = this;
         this->use_stdio = use_stdio;
@@ -118,7 +123,7 @@ public:
         do{
             val = cls->attr().try_get(name);
             if(val != nullptr) return val;
-            Type cls_t = static_cast<Py_<Type>*>(cls)->_value;
+            Type cls_t = OBJ_GET(Type, cls);
             Type base = _all_types[cls_t].base;
             if(base.index == -1) break;
             cls = _all_types[base].obj;
@@ -144,18 +149,6 @@ public:
         return nullptr;
     }
 
-    i64 gc_collect(){
-        heap.collect(this);
-        return 0;
-    }
-
-    template<typename T>
-    PyObject* gcnew(Type type, T&& val){
-        PyObject* obj = new Py_<std::decay_t<T>>(type, std::forward<T>(val));
-        heap._add(obj);
-        return obj;
-    }
-
     template<typename ArgT>
     std::enable_if_t<std::is_same_v<std::decay_t<ArgT>, Args>, PyObject*>
     call(PyObject* callable, ArgT&& args){
@@ -200,12 +193,12 @@ public:
 
     PyObject* property(NativeFuncRaw fget){
         PyObject* p = builtins->attr("property");
-        PyObject* method = gcnew(tp_native_function, NativeFunc(fget, 1, false));
+        PyObject* method = heap.gcnew(tp_native_function, NativeFunc(fget, 1, false));
         return call(p, Args{method});
     }
 
     PyObject* new_type_object(PyObject* mod, StrName name, Type base){
-        PyObject* obj = new Py_<Type>(tp_type, _all_types.size());
+        PyObject* obj = heap._new<Type>(tp_type, _all_types.size());
         PyTypeInfo info{
             .obj = obj,
             .base = base,
@@ -263,17 +256,10 @@ public:
         return index;
     }
 
-    // for quick access
-    Type tp_object, tp_type, tp_int, tp_float, tp_bool, tp_str;
-    Type tp_list, tp_tuple;
-    Type tp_function, tp_native_function, tp_iterator, tp_bound_method;
-    Type tp_slice, tp_range, tp_module, tp_ref;
-    Type tp_super, tp_exception, tp_star_wrapper;
-
     template<typename P>
     PyObject* PyIter(P&& value) {
         static_assert(std::is_base_of_v<BaseIter, std::decay_t<P>>);
-        return gcnew<P>(tp_iterator, std::forward<P>(value));
+        return heap.gcnew<P>(tp_iterator, std::forward<P>(value));
     }
 
     BaseIter* PyIter_AS_C(PyObject* obj)
@@ -323,6 +309,7 @@ public:
     }
 
     ~VM() {
+        heap.collect(this);
         if(!use_stdio){
             delete _stdout;
             delete _stderr;
@@ -578,7 +565,7 @@ inline PyObject* VM::asRepr(PyObject* obj){
 }
 
 inline PyObject* VM::new_module(StrName name) {
-    PyObject* obj = new Py_<DummyModule>(tp_module, DummyModule());
+    PyObject* obj = heap._new<DummyModule>(tp_module, DummyModule());
     obj->attr().set(__name__, VAR(name.str()));
     // we do not allow override in order to avoid memory leak
     // it is because Module objects are not garbage collected
@@ -666,8 +653,8 @@ inline void VM::init_builtin_types(){
     // PyTypeObject is managed by _all_types
     // PyModuleObject is managed by _modules
     // They are not managed by GC, so we use a simple "new"
-    _all_types.push_back({.obj = new Py_<Type>(Type(1), Type(0)), .base = -1, .name = "object"});
-    _all_types.push_back({.obj = new Py_<Type>(Type(1), Type(1)), .base = 0, .name = "type"});
+    _all_types.push_back({.obj = heap._new<Type>(Type(1), Type(0)), .base = -1, .name = "object"});
+    _all_types.push_back({.obj = heap._new<Type>(Type(1), Type(1)), .base = 0, .name = "type"});
     tp_object = 0; tp_type = 1;
 
     tp_int = _new_type_object("int");
@@ -690,12 +677,12 @@ inline void VM::init_builtin_types(){
     tp_super = _new_type_object("super");
     tp_exception = _new_type_object("Exception");
 
-    this->None = new Py_<Dummy>(_new_type_object("NoneType"), {});
-    this->Ellipsis = new Py_<Dummy>(_new_type_object("ellipsis"), {});
-    this->True = new Py_<Dummy>(tp_bool, {});
-    this->False = new Py_<Dummy>(tp_bool, {});
-    this->_py_op_call = new Py_<Dummy>(_new_type_object("_py_op_call"), {});
-    this->_py_op_yield = new Py_<Dummy>(_new_type_object("_py_op_yield"), {});
+    this->None = heap._new<Dummy>(_new_type_object("NoneType"), {});
+    this->Ellipsis = heap._new<Dummy>(_new_type_object("ellipsis"), {});
+    this->True = heap._new<Dummy>(tp_bool, {});
+    this->False = heap._new<Dummy>(tp_bool, {});
+    this->_py_op_call = heap._new<Dummy>(_new_type_object("_py_op_call"), {});
+    this->_py_op_yield = heap._new<Dummy>(_new_type_object("_py_op_yield"), {});
 
     this->builtins = new_module("builtins");
     this->_main = new_module("__main__");
@@ -723,7 +710,7 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo
         if(new_f != nullptr){
             obj = call(new_f, std::move(args), kwargs, false);
         }else{
-            obj = gcnew<DummyInstance>(OBJ_GET(Type, callable), {});
+            obj = heap.gcnew<DummyInstance>(OBJ_GET(Type, callable), {});
             PyObject* init_f = getattr(obj, __init__, false, true);
             if (init_f != nullptr) call(init_f, std::move(args), kwargs, false);
         }
@@ -812,8 +799,6 @@ inline void VM::unpack_args(Args& args){
     args = Args(std::move(unpacked));
 }
 
-using Super = std::pair<PyObject*, Type>;
-
 // https://docs.python.org/3/howto/descriptor.html#invocation-from-an-instance
 inline PyObject* VM::getattr(PyObject* obj, StrName name, bool throw_err, bool class_only){
     PyObject* objtype = _t(obj);
@@ -936,10 +921,11 @@ inline PyObject* VM::_exec(){
     }
 }
 
-inline std::vector<PyObject*> ManagedHeap::get_roots(VM *vm) {
-    std::vector<PyObject*> roots;
-    // ...
-    return roots;
+inline void ManagedHeap::mark(VM *vm) {
+    // iterate callstack frames
+    for(auto& frame : vm->callstack.data()){
+        frame->_mark();
+    }
 }
 
 }   // namespace pkpy