Pārlūkot izejas kodu

add fastpath for simple calls

blueloveTH 2 gadi atpakaļ
vecāks
revīzija
814ce3e465
5 mainītis faili ar 44 papildinājumiem un 7 dzēšanām
  1. 1 0
      include/pocketpy/codeobject.h
  2. 1 0
      include/pocketpy/expr.h
  3. 1 1
      run_profile.sh
  4. 12 1
      src/compiler.cpp
  5. 29 5
      src/vm.cpp

+ 1 - 0
include/pocketpy/codeobject.h

@@ -131,6 +131,7 @@ struct FuncDecl {
 
     Str signature;              // signature of this function
     Str docstring;              // docstring of this function
+    bool is_simple;
     void _gc_mark() const;
 };
 

+ 1 - 0
include/pocketpy/expr.h

@@ -42,6 +42,7 @@ struct Expr{
 
 struct CodeEmitContext{
     VM* vm;
+    FuncDecl_ func;     // optional
     CodeObject_ co;
     // some bugs on MSVC (error C2280) when using std::vector<Expr_>
     // so we use stack_no_copy instead

+ 1 - 1
run_profile.sh

@@ -1,7 +1,7 @@
 python3 prebuild.py
 SRC=$(find src/ -name "*.cpp")
 clang++ -pg -O1 -std=c++17 -stdlib=libc++ -Wfatal-errors -o main $SRC src2/main.cpp -Iinclude
-time ./main benchmarks/primes.py
+time ./main benchmarks/fib.py
 mv benchmarks/gmon.out .
 gprof main gmon.out > gprof.txt
 rm gmon.out

+ 12 - 1
src/compiler.cpp

@@ -19,6 +19,7 @@ namespace pkpy{
         decl->code = std::make_shared<CodeObject>(lexer->src, name);
         decl->nested = name_scope() == NAME_LOCAL;
         contexts.push(CodeEmitContext(vm, decl->code, contexts.size()));
+        contexts.top().func = decl;
         return decl;
     }
 
@@ -35,11 +36,21 @@ namespace pkpy{
         if(ctx()->co->varnames.size() > PK_MAX_CO_VARNAMES){
             SyntaxError("maximum number of local variables exceeded");
         }
+        FuncDecl_ func = contexts.top().func;
+        if(func){
+            func->is_simple = true;
+            if(func->code->is_generator) func->is_simple = false;
+            if(func->kwargs.size() > 0) func->is_simple = false;
+            if(func->starred_arg >= 0) func->is_simple = false;
+            if(func->starred_kwarg >= 0) func->is_simple = false;
+        }
         contexts.pop();
     }
 
     void Compiler::init_pratt_rules(){
-        if(rules[TK(".")].precedence != PREC_NONE) return;
+        PK_LOCAL_STATIC unsigned int count = 0;
+        if(count > 0) return;
+        count += 1;
 // http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
 #define METHOD(name) &Compiler::name
 #define NO_INFIX nullptr, PREC_NONE

+ 29 - 5
src/vm.cpp

@@ -739,15 +739,15 @@ void VM::_prepare_py_call(PyObject** buffer, ArgsView args, ArgsView kwargs, con
 
     if(args.size() < decl_argc){
         vm->TypeError(fmt(
-            "expected ", decl_argc, " positional arguments, got ", args.size(),
-            " (", co->name, ')'
+            co->name, "() takes ", decl_argc, " positional arguments but ", args.size(), " were given"
         ));
+        UNREACHABLE();
     }
 
     int i = 0;
     // prepare args
     for(int index: decl->args) buffer[index] = args[i++];
-    // set extra varnames to nullptr
+    // set extra varnames to PY_NULL
     for(int j=i; j<co_nlocals; j++) buffer[j] = PY_NULL;
     // prepare kwdefaults
     for(auto& kv: decl->kwargs) buffer[kv.key] = kv.value;
@@ -843,6 +843,28 @@ PyObject* VM::vectorcall(int ARGC, int KWARGC, bool op_call){
         const CodeObject* co = decl->code.get();
         int co_nlocals = co->varnames.size();
 
+        PyObject** _base = args.begin();
+
+        if(decl->is_simple){
+            if(args.size() != decl->args.size()){
+                TypeError(fmt(
+                    co->name, "() takes ", decl->args.size(), " positional arguments but ", args.size(), " were given"
+                ));
+                UNREACHABLE();
+            }
+            if(!kwargs.empty()){
+                TypeError(fmt(co->name, "() takes no keyword arguments"));
+                UNREACHABLE();
+            }
+            s_data.reset(_base + co_nlocals);
+            int i = 0;
+            // prepare args
+            for(int index: decl->args) _base[index] = args[i++];
+            // set extra varnames to PY_NULL
+            for(int j=i; j<co_nlocals; j++) _base[j] = PY_NULL;
+            goto __FAST_CALL;
+        }
+
         _prepare_py_call(buffer, args, kwargs, decl);
         
         if(co->is_generator){
@@ -854,8 +876,10 @@ PyObject* VM::vectorcall(int ARGC, int KWARGC, bool op_call){
         }
 
         // copy buffer back to stack
-        s_data.reset(args.begin());
-        for(int j=0; j<co_nlocals; j++) PUSH(buffer[j]);
+        s_data.reset(_base + co_nlocals);
+        for(int j=0; j<co_nlocals; j++) _base[j] = buffer[j];
+
+__FAST_CALL:
         callstack.emplace(&s_data, p0, co, fn._module, callable, FastLocals(co, args.begin()));
         if(op_call) return PY_OP_CALL;
         return _run_top_frame();