C++/Python/Matlab執行效率分析
以前一直覺得C++效率最高,速度最快,但是今天做的一個實驗結果大大出乎我的意料—Python使用向量處理效率一點都不慢,甚至高於C++在O2優化後的效率。 Matlab效率更高。 這為以後選取語言提供了一個很好的參考。
問題起源與對場內期權MC定價時,一步到最後與按天到最後在計算精度上有無差別,映象問題是FDM定價一步到最後,與按天到最後計算精度上有無差別。我的觀點顯然是前者無差,後者影響顯著。無奈,領導觀點竟然相反,為了說明問題,講理論肯定是不行了,只能用資料說話。為了驗證MC的結果,有了本實驗。
實驗初期,先採用Python寫的,因為簡單,就用了最簡單低階的程式碼思路。寫完後執行發現速度還可以(軌道數目較少,1k條)。於是萌生了與C++比較下效率的想法。於是速度照搬改成C++版本。為了能體現出差距,將軌道數目都改成10w條,結果發現,在簡單程式碼結構下,python不是一般的慢!
廢話不多說,先列出結果資料。表中資料單位為秒。
圖中列出了幾個相關實驗:
- C++原始程式碼及經過O2優化後的程式碼。
- C++通過OMP並行後的程式碼及經過O2優化過的程式碼
- Python 序列程式碼及向量化程式碼
- Matlab 序列及向量化程式碼
圖中給出的結果是讓人驚奇的:Python經過向量化後代碼效率極大的提高,這跟Matlab一樣,兩者都要好於經過O2優化過的C++並行程式碼
附件一 C++ omp並行與非並行程式碼(註釋掉pragma那行即可)
#include <random>
#include <cmath>
#include <ctime>
#include <omp.h>
using namespace std;
default_random_engine eng(time(NULL));
normal_distribution<double> norm(0,1);
double payoff(double &s, double &k)
{
return fmax(s-k,0.);
}
double pnode(double &s0, double& u, double& q, double& vol, double& t)
{
return s0*exp((u-q-0.5*vol*vol)*t + sqrt(t)*vol*norm(eng));
}
double mc(int& npath, const int& msteps)
{
double u = 0;
double q = 0;
double vol = 0.2;
double r = 0.035;
double s0 = 1.;
double t = 1.;
double k = 1.;
double val = 0.;
#pragma omp parallel for reduction(+:val)
for(int i = 0; i<npath; ++i)
{
double dt = t / msteps;
double st = s0;
for(int j = 0; j<msteps; ++j)
st=pnode(st,u,q,vol,dt);
double v=payoff(st,k)*exp(-r * t);
val +=v;
}
printf("%d steps to terminal with %d paths : v=%f\n", msteps, npath, val/npath);
}
int main()
{
clock_t t1 = clock();
double ompt1 = omp_get_wtime();
int n= 100000;
int m = 100;
mc(n, 1);
mc(n,m);
double ompt2 = omp_get_wtime();
printf("Time cost %f\n", ompt2-ompt1);
return 0;
}
附件二 序列Python程式
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 12 10:52:40 2017
@author: kt
"""
import numpy as np
import matplotlib.pyplot as plt
import time
def payoff(s, k):
# Calculate the tereminal payoff
return np.max([s-k,0.])
def pnode(s0, u, q, vol, t):
return s0*np.exp((u-q-0.5*vol**2)*t+vol*np.sqrt(t)*np.random.normal(0,1))
def mc(npath, msteps):
u = 0
q = 0
vol = 0.2
r = 0.035
s0 = 1.0
t = 1
k = 1.0
val = 0.;
for i in range(npath):
dt = t/msteps
st = s0
for j in range(msteps):
st = pnode(st,u,q,vol,dt)
val += payoff(st,k)*np.exp(-r*t)
print('%d Step to Terminal: v=%f' % (msteps,val/npath))
t1 = time.clock()
npath = 100000
msteps=100
mc(npath,1)
mc(npath, msteps)
print('Time cost %f' % (time.clock() - t1))
附件三 向量化Python程式
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 12 10:52:40 2017
@author: kt
"""
import numpy as np
import matplotlib.pyplot as plt
import time
def payoff(s, k):
# Calculate the tereminal payoff
v = s-k
v[s-k<0] = 0
return v
def TermV(s0, u, q, vol, t, npath, msteps):
dt = t/msteps
s = s0*np.ones(npath)
return s*np.exp((u-q-0.5*vol**2)*t+ \
np.sum(vol*np.sqrt(dt)*\
np.random.normal(size=[npath,msteps]),1))
def mc(npath, msteps):
u = 0
q = 0
vol = 0.2
r = 0.035
s0 = 1.0
t = 1
k = 1.0
st = TermV(s0,u,q,vol,t,npath,msteps)
v= np.sum(payoff(st,k))/npath *np.exp(-r*t)
print('%d Step to Terminal: v=%f' % (msteps,v))
t1 = time.clock()
npath = 100000
msteps=100
mc(npath,1)
mc(npath, msteps)
print('Time cost %f' % (time.clock() - t1))
附件四 序列Matlab程式碼
function mc()
tic;
npath= 100000;
msteps = 100;
mc_s(npath,1);
mc_s(npath,msteps);
toc
end
function y=payoff(s,k)
y = max(s-k,0);
end
function y=pnode(s0,u,q,vol,t)
y=s0*exp((u-q-0.5*vol^2)*t+vol*sqrt(t)*randn());
end
function mc_s(npath, msteps)
u = 0;
q=0;
vol=0.2;
r=0.035;
s0=1;
t=1;
k=1;
val = 0;
for i = 1:npath
dt = t/msteps;
st = s0;
for j = 1:msteps
st = pnode(st,u,q,vol,dt);
end
val = val + payoff(st,k)*exp(-r*t);
end
fprintf('%d step to Terminal :v=%f\n', msteps, val/npath);
end
附件五 向量化Matlab程式碼
function mc2()
tic;
npath= 100000;
msteps = 100;
mc_s(npath,1);
mc_s(npath,msteps);
toc
end
function y=payoff(s,k)
y=s-k;
y(s-k<0) = 0;
end
function y=TermV(s0,u,q,vol,t,npath, msteps)
dt = t/msteps;
s = s0*ones(npath,1);
y=s.*exp((u-q-0.5*vol^2)*t+vol*sqrt(dt)*sum(randn(npath,msteps),2));
end
function mc_s(npath, msteps)
u = 0;
q=0;
vol=0.2;
r=0.035;
s0=1;
t=1;
k=1;
st = TermV(s0,u,q,vol,t,npath,msteps);
v= sum(payoff(st,k))/npath *exp(-r*t);
fprintf('%d step to Terminal :v=%f\n', msteps, v);
end
結論
實驗結果說明,在處理好for的關係後,Python的效率還是挺高的,當然Matlab效率更高!
- Obviously, 不要忘了實驗的初心:對於MC方法定價場內期權,一步到位與中間詳細刻畫路徑結果精度是一樣的,理論依據就是:MC方法是基於概率分佈的方法,而布朗運動增量是獨立同分布的,他們和的分佈與一步到位終端變數的分佈是同分布的。
1 step to Terminal :v=0.076928
100 step to Terminal :v=0.076514
時間已過 0.632972 秒。