Browse Source

add a fast path test

blueloveTH 2 years ago
parent
commit
8bb3cefb34
5 changed files with 103 additions and 62 deletions
  1. 1 1
      run_profile.sh
  2. 14 4
      src/ceval.h
  3. 4 0
      src/frame.h
  4. 27 9
      src/tuplelist.h
  5. 57 48
      src/vm.h

+ 1 - 1
run_profile.sh

@@ -1,5 +1,5 @@
 clang++ -pg -O2 -std=c++17 -fno-rtti -stdlib=libc++ -Wall -o pocketpy src/main.cpp
-time ./pocketpy benchmarks/primes.py
+time ./pocketpy benchmarks/fib.py
 mv benchmarks/gmon.out .
 gprof pocketpy gmon.out > gprof.txt
 rm gmon.out

+ 14 - 4
src/ceval.h

@@ -343,14 +343,24 @@ __NEXT_STEP:;
     TARGET(CALL)
     TARGET(CALL_UNPACK) {
         int ARGC = byte.arg;
-
+        PyObject* callable = frame->top_n(ARGC+1);
         bool method_call = frame->top_n(ARGC) != _py_null;
-        if(method_call) ARGC++;         // add self into args
-        Args args = frame->popx_n_reversed(ARGC);
+
+        // fast path
+        if(byte.op==OP_CALL && is_type(callable, tp_function)){
+            ArgsView args = frame->top_n_view(ARGC + int(method_call));
+            PyObject* ret = _py_call(callable, args, {});
+            frame->pop_n(ARGC + 2);
+            if(ret == nullptr) goto __PY_OP_CALL;
+            else frame->push(ret);      // a generator
+            DISPATCH();
+        }
+
+        Args args = frame->popx_n_reversed(ARGC + int(method_call));
         if(!method_call) frame->pop();
 
         if(byte.op == OP_CALL_UNPACK) unpack_args(args);
-        PyObject* callable = frame->popx();
+        frame->pop();
         PyObject* ret = call(callable, std::move(args), no_arg(), true);
         if(ret == _py_op_call) { __ret=ret; goto __PY_OP_CALL; }
         frame->push(ret);

+ 4 - 0
src/frame.h

@@ -254,6 +254,10 @@ struct Frame {
         _data.pop_back_n(n);
     }
 
+    ArgsView top_n_view(int n){
+        return ArgsView(_data.end()-n, _data.end());
+    }
+
     void _gc_mark() const {
         // do return if this frame has been moved
         if(_data._data == nullptr) return;

+ 27 - 9
src/tuplelist.h

@@ -9,7 +9,7 @@ namespace pkpy {
 
 using List = pod_vector<PyObject*>;
 
-class Args {
+class Tuple {
     PyObject** _args;
     int _size;
 
@@ -19,26 +19,26 @@ class Args {
     }
 
 public:
-    Args(int n){ _alloc(n); }
+    Tuple(int n){ _alloc(n); }
 
-    Args(const Args& other){
+    Tuple(const Tuple& other){
         _alloc(other._size);
         for(int i=0; i<_size; i++) _args[i] = other._args[i];
     }
 
-    Args(Args&& other) noexcept {
+    Tuple(Tuple&& other) noexcept {
         this->_args = other._args;
         this->_size = other._size;
         other._args = nullptr;
         other._size = 0;
     }
 
-    Args(std::initializer_list<PyObject*> list) : Args(list.size()){
+    Tuple(std::initializer_list<PyObject*> list) : Tuple(list.size()){
         int i = 0;
         for(PyObject* p : list) _args[i++] = p;
     }
 
-    Args(List&& other) noexcept : Args(other.size()){
+    Tuple(List&& other) noexcept : Tuple(other.size()){
         for(int i=0; i<_size; i++) _args[i] = other[i];
         other.clear();
     }
@@ -46,7 +46,7 @@ public:
     PyObject*& operator[](int i){ return _args[i]; }
     PyObject* operator[](int i) const { return _args[i]; }
 
-    Args& operator=(Args&& other) noexcept {
+    Tuple& operator=(Tuple&& other) noexcept {
         if(_args!=nullptr) pool64.dealloc(_args);
         this->_args = other._args;
         this->_size = other._size;
@@ -57,6 +57,9 @@ public:
 
     int size() const { return _size; }
 
+    PyObject** begin() const { return _args; }
+    PyObject** end() const { return _args + _size; }
+
     List to_list() noexcept {
         List ret(_size);
         // TODO: use move/memcpy
@@ -73,14 +76,29 @@ public:
         if(old_args!=nullptr) pool64.dealloc(old_args);
     }
 
-    ~Args(){ if(_args!=nullptr) pool64.dealloc(_args); }
+    ~Tuple(){ if(_args!=nullptr) pool64.dealloc(_args); }
 };
 
+using Args = Tuple;
 inline const Args& no_arg() {
     static const Args _zero(0);
     return _zero;
 }
 
-typedef Args Tuple;
+// a lightweight view for function args, it does not own the memory
+struct ArgsView{
+    PyObject** _begin;
+    PyObject** _end;
+
+    ArgsView(PyObject** begin, PyObject** end) : _begin(begin), _end(end) {}
+    ArgsView(const Tuple& t) : _begin(t.begin()), _end(t.end()) {}
+    ArgsView(): _begin(nullptr), _end(nullptr) {}
+
+    PyObject** begin() const { return _begin; }
+    PyObject** end() const { return _end; }
+    int size() const { return _end - _begin; }
+    bool empty() const { return _begin == _end; }
+    PyObject* operator[](int i) const { return _begin[i]; }
+};
 
 }   // namespace pkpy

+ 57 - 48
src/vm.h

@@ -338,6 +338,7 @@ public:
     Str disassemble(CodeObject_ co);
     void init_builtin_types();
     PyObject* call(PyObject* callable, Args args, const Args& kwargs, bool opCall);
+    PyObject* _py_call(PyObject* callable, ArgsView args, ArgsView kwargs);
     void unpack_args(Args& args);
     PyObject* getattr(PyObject* obj, StrName name, bool throw_err=true);
     PyObject* get_unbound_method(PyObject* obj, StrName name, PyObject** self, bool throw_err=true, bool fallback=false);
@@ -687,6 +688,59 @@ inline void VM::init_builtin_types(){
     for(auto [k, v]: _modules.items()) v->attr()._try_perfect_rehash();
 }
 
+inline PyObject* VM::_py_call(PyObject* callable, ArgsView args, ArgsView kwargs){
+    // callable is a `function` object
+    const Function& fn = CAST(Function&, callable);
+    const CodeObject* co = fn.decl->code.get();
+    FastLocals locals(co);
+
+    int i = 0;
+    for(int index: fn.decl->args){
+        if(i < args.size()){
+            locals[index] = args[i++];
+        }else{
+            StrName name = co->varnames[index];
+            TypeError(fmt("missing positional argument ", name.escape()));
+        }
+    }
+
+    // prepare kwdefaults
+    for(auto& kv: fn.decl->kwargs) locals[kv.key] = kv.value;
+    
+    // handle *args
+    if(fn.decl->starred_arg != -1){
+        List vargs;        // handle *args
+        while(i < args.size()) vargs.push_back(args[i++]);
+        locals[fn.decl->starred_arg] = VAR(Tuple(std::move(vargs)));
+    }else{
+        // kwdefaults override
+        for(auto& kv: fn.decl->kwargs){
+            if(i < args.size()){
+                locals[kv.key] = args[i++];
+            }else{
+                break;
+            }
+        }
+        if(i < args.size()) TypeError("too many arguments");
+    }
+    
+    for(int i=0; i<kwargs.size(); i+=2){
+        StrName key = CAST(int, kwargs[i]);
+        // try_set has nullptr check
+        // TODO: optimize this
+        bool ok = locals.try_set(key, kwargs[i+1]);
+        if(!ok){
+            TypeError(fmt(key.escape(), " is an invalid keyword argument for ", co->name, "()"));
+        }
+    }
+    PyObject* _module = fn._module != nullptr ? fn._module : top_frame()->_module;
+    if(co->is_generator){
+        return PyIter(Generator(this, Frame(co, _module, std::move(locals), fn._closure)));
+    }
+    _push_new_frame(co, _module, std::move(locals), fn._closure);
+    return nullptr;
+}
+
 // TODO: callable/args here may be garbage collected accidentally
 inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, bool opCall){
     if(is_type(callable, tp_bound_method)){
@@ -700,54 +754,9 @@ inline PyObject* VM::call(PyObject* callable, Args args, const Args& kwargs, boo
         if(kwargs.size() != 0) TypeError("native_function does not accept keyword arguments");
         return f(this, args);
     } else if(is_type(callable, tp_function)){
-        const Function& fn = CAST(Function&, callable);
-        const CodeObject* co = fn.decl->code.get();
-        FastLocals locals(co);
-
-        int i = 0;
-        for(int index: fn.decl->args){
-            if(i < args.size()){
-                locals[index] = args[i++];
-            }else{
-                StrName name = co->varnames[index];
-                TypeError(fmt("missing positional argument ", name.escape()));
-            }
-        }
-
-        // prepare kwdefaults
-        for(auto& kv: fn.decl->kwargs) locals[kv.key] = kv.value;
-        
-        // handle *args
-        if(fn.decl->starred_arg != -1){
-            List vargs;        // handle *args
-            while(i < args.size()) vargs.push_back(args[i++]);
-            locals[fn.decl->starred_arg] = VAR(Tuple(std::move(vargs)));
-        }else{
-            // kwdefaults override
-            for(auto& kv: fn.decl->kwargs){
-                if(i < args.size()){
-                    locals[kv.key] = args[i++];
-                }else{
-                    break;
-                }
-            }
-            if(i < args.size()) TypeError("too many arguments");
-        }
-        
-        for(int i=0; i<kwargs.size(); i+=2){
-            StrName key = CAST(int, kwargs[i]);
-            // try_set has nullptr check
-            // TODO: optimize this
-            bool ok = locals.try_set(key, kwargs[i+1]);
-            if(!ok){
-                TypeError(fmt(key.escape(), " is an invalid keyword argument for ", co->name, "()"));
-            }
-        }
-        PyObject* _module = fn._module != nullptr ? fn._module : top_frame()->_module;
-        if(co->is_generator){
-            return PyIter(Generator(this, Frame(co, _module, std::move(locals), fn._closure)));
-        }
-        _push_new_frame(co, _module, std::move(locals), fn._closure);
+        // ret is nullptr or a generator
+        PyObject* ret = _py_call(callable, args, kwargs);
+        if(ret != nullptr) return ret;
         if(opCall) return _py_op_call;
         return _run_top_frame();
     }