『Python』Numpy学习指南第三章__常用函数
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了『Python』Numpy学习指南第三章__常用函数相关的知识,希望对你有一定的参考价值。
感觉心情渐渐变好了,加油!
np.eye(2)
np.savetxt(‘eye.txt‘,i2)
c,v = np.loadtxt(‘data.csv‘, delimiter=‘,‘, usecols=(6,7), unpack=True)
1 # __*__coding=utf-8__*__ 2 3 import numpy as np 4 5 # 单位矩阵生成 6 i2 = np.eye(2) 7 print(i2) 8 9 # 保存为txt 10 np.savetxt(‘eye.txt‘,i2) 11 12 # 读取csv文件 13 # unpack意为拆分返回为多个变量 14 # 0 1 2 3 4 5 6 7 15 # 股票代码,日期,空格,开盘价,最高价,最低价,收盘价,日成交量 16 # c代表收盘价,v代表交易量 17 c,v = np.loadtxt(‘data.csv‘, delimiter=‘,‘, usecols=(6,7), unpack=True) 18 print(c,v)
[[ 1. 0.] [ 0. 1.]] [ 336.1 339.32 345.03 344.32 343.44 346.5 351.88 355.2 358.16 354.54 356.85 359.18 359.9 363.13 358.3 350.56 338.61 342.62 342.88 348.16 353.21 349.31 352.12 359.56 360. 355.36 355.76 352.47 346.67 351.99] [ 21144800. 13473000. 15236800. 9242600. 14064100. 11494200. 17322100. 13608500. 17240800. 33162400. 13127500. 11086200. 10149000. 17184100. 18949000. 29144500. 31162200. 23994700. 17853500. 13572000. 14395400. 16290300. 21521000. 17885200. 16188000. 19504300. 12718000. 16192700. 18138800. 16824200.]
np.average(c, weights=v)
np.mean(c)
1 ‘‘‘平均值‘‘‘ 2 3 # 交易量平均价格VWAP,单价c,交易量 4 vwap = np.average(c, weights=v) 5 print(vwap) 6 # 算数平均值 7 print(np.mean(c), np.average(c)) 8 # 时间加权平均价格TWAP 9 t = np.arange(len(c)) 10 twap = np.average(c,weights=t) 11 print(twap)
350.589549353 351.037666667 351.037666667 352.428321839
np.max(h)
np.min(l)
np.median(c)
np.ptp(h)
1 ‘‘‘最大最小值‘‘‘ 2 3 h,l = np.loadtxt(‘data.csv‘, delimiter=‘,‘, usecols=(4,5), unpack=True) 4 print(‘最大值:‘,np.max(h)) 5 print(‘最小值:‘,np.min(l)) 6 # 中位数 7 print(‘中位数:‘, np.median(c)) 8 # 计算一个array的区间 9 print(‘最高价区间:‘,np.ptp(h)) 10 print(‘最低价区间:‘,np.ptp(l))
最大值: 364.9 最小值: 333.53 中位数: 352.055 最高价区间: 24.86 最低价区间: 26.97
np.var(c)
np.msort(c)
1 ‘‘‘统计分析基础‘‘‘ 2 3 c = np.loadtxt(‘data.csv‘, delimiter=‘,‘, usecols=(6,), unpack=False) 4 print(‘统计中位数:‘,np.median(c)) 5 # 方差 6 # 7 print(‘方差:‘,np.var(c)) 8 print(‘方差:‘,np.mean((c-c.mean())**2)) 9 # 排序 10 sorted_close = np.msort(c) 11 print(sorted_close) 12 print_line()
统计中位数: 352.055 方差: 50.1265178889 方差: 50.1265178889 [ 336.1 338.61 339.32 342.62 342.88 343.44 344.32 345.03 346.5 346.67 348.16 349.31 350.56 351.88 351.99 352.12 352.47 353.21 354.54 355.2 355.36 355.76 356.85 358.16 358.3 359.18 359.56 359.9 360. 363.13]
np.diff(c)
np.log(c)
np.where(returns>0)
np.std(logreturns
1 ‘‘‘股票收益率‘‘‘ 2 3 # 差值数组 4 # .diff(),注意比原数组长度短1 5 # 简单收益率,(a1-a2)/a1 6 returns = np.diff(c)/c[:-1] 7 # 标准差 8 print(np.std(returns)) 9 # 对数收益率,log(a1/a2) 10 # 由于取对后做差实际上会进行除法 11 logreturns = np.diff(np.log(c)) 12 print(logreturns) 13 # .where()定位索引 14 # 筛选正收益率索引 15 print(np.where(returns>0)) 16 # 历史波动率 17 # np.sqrt()开平方 18 # 对数收益率标准差/对数收益率均值/交易日倒数的平方根 19 print((np.std(logreturns)/np.mean(logreturns)/np.sqrt(1/252)))
0.0129221344368 [ 0.00953488 0.01668775 -0.00205991 -0.00255903 0.00887039 0.01540739 0.0093908 0.0082988 -0.01015864 0.00649435 0.00650813 0.00200256 0.00893468 -0.01339027 -0.02183875 -0.03468287 0.01177296 0.00075857 0.01528161 0.01440064 -0.011103 0.00801225 0.02090904 0.00122297 -0.01297267 0.00112499 -0.00929083 -0.01659219 0.01522945] (array([ 0, 1, 4, 5, 6, 7, 9, 10, 11, 12, 16, 17, 18, 19, 21, 22, 23, 25, 28]),) 129.274789911
datetime.datetime.strptime(s.decode(‘utf-8‘), "%d-%m-%Y").date().weekday()
np.take(close, indices)
np.argmax(averages)
np.argmin(averages)
1 ‘‘‘日期分析‘‘‘ 2 3 import datetime 4 def datestr2num(s): 5 ‘‘‘ 6 读取格式化日期脚本 7 :param s: 8 :return: {0,1,2,3,4,5,6}对应周{1,2,3,4,5,6,7} 9 ‘‘‘ 10 return datetime.datetime.strptime(s.decode(‘utf-8‘), "%d-%m-%Y").date().weekday() 11 # 注意s.decode(‘utf-8‘)和str(s)效果不同 12 # 读取结构化日期,返回周日期 13 dates, close = np.loadtxt(‘data.csv‘, delimiter=‘,‘, usecols=(1,6), unpack=True, converters={1:datestr2num}) 14 print(dates) 15 # 计算每个星期日期的平均收盘价 16 averages = np.zeros(5) 17 for i in range(5): 18 indices = np.where(dates==i) 19 # take取出方法 20 # 从argv1中取出argv2为索引的所有元素组成新的数组 21 prices = np.take(close, indices) 22 avg = np.mean(prices) 23 averages[i] = avg 24 print(i,prices,avg) 25 print(‘最大工作日日期‘,np.argmax(averages)+1) 26 print(‘最小工作日日期‘,np.argmin(averages)+1)
[ 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 1. 2. 3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4.] 0 [[ 339.32 351.88 359.18 353.21 355.36]] 351.79 1 [[ 345.03 355.2 359.9 338.61 349.31 355.76]] 350.635 2 [[ 344.32 358.16 363.13 342.62 352.12 352.47]] 352.136666667 3 [[ 343.44 354.54 358.3 342.88 359.56 346.67]] 350.898333333 4 [[ 336.1 346.5 356.85 350.56 348.16 360. 351.99]] 350.022857143 最大工作日日期 3 最小工作日日期 5
np.ravel(np.where(dates == 0))
np.max(np.take(h, a))
np.min(np.take(l, a))
np.apply_along_axis(summrize, 1, week_indices, o, h, l, c)
np.savetxt(‘weeksummary.csv‘, weeksummary, delimiter=‘,‘, fmt=‘%s‘)
1 ‘‘‘周汇总‘‘‘ 2 3 close = close[:16] 4 dates = dates[:16] 5 print(np.where(dates==0)) 6 print(np.where(dates==4)) 7 first_monday = np.ravel(np.where(dates == 0))[0] 8 print(‘第一个星期一‘,first_monday) 9 last_friday = np.ravel(np.where(dates == 4))[-1] 10 print(‘最后一个星期五‘,last_friday) 11 week_indices = np.arange(first_monday, last_friday+1) 12 # 分组,分为三子组 13 # 依据索引分子组[]->[[],[],[]...] 14 week_indices = np.split(week_indices, [5,10]) 15 print(week_indices) 16 17 18 def summrize(a,o,h,l,c): 19 monday_open = o[a[0]] 20 week_high = np.max(np.take(h, a)) 21 week_low = np.min(np.take(l, a)) 22 friday_close = c[a[-1]] 23 return ("APPL", monday_open, week_high, week_low, friday_close) 24 o = np.loadtxt("data.csv", delimiter=‘,‘, usecols=(3,), unpack=True) 25 #weeksummary = np.apply_along_axis(summrize, 0, week_indices, o, h, l, c) 26 #print(weeksummary) 27 # .apply_along_axis() 把(一个函数)作用在(指定维度)的(n维数组),后面接(其他函数参数) 28 weeksummary = np.apply_along_axis(summrize, 1, week_indices, o, h, l, c) 29 # 返回值组成新的数组 30 print(weeksummary) 31 np.savetxt(‘weeksummary.csv‘, weeksummary, delimiter=‘,‘, fmt=‘%s‘)
(array([ 1, 6, 11]),) (array([ 0, 5, 10, 15]),) 第一个星期一 1 最后一个星期五 15 [array([1, 2, 3, 4, 5]), array([ 6, 7, 8, 9, 10]), array([11, 12, 13, 14, 15])] [[‘APPL‘ ‘335.8‘ ‘346.7‘ ‘334.3‘ ‘346.5‘] [‘APPL‘ ‘347.8‘ ‘360.0‘ ‘347.6‘ ‘356.8‘] [‘APPL‘ ‘356.7‘ ‘364.9‘ ‘349.5‘ ‘350.5‘]]
np.maximum(h-l, h-previousclose, previousclose-l)
np.zeros(N)
1 ‘‘‘真实波动幅度均值‘‘‘ 2 3 N = 20 4 h = h[-20:] 5 l = l[-20:] 6 previousclose = c[-N -1:-1] 7 # 多个数组同一位置取最大值 8 truerange = np.maximum(h-l, h-previousclose, previousclose-l) 9 print(truerange) # 真实波动平均值 10 11 atr = np.zeros(N) 12 atr[0] = np.mean(truerange) 13 for i in range(1,N): 14 atr[i] = (N-1)*atr[i-1]+truerange[i] 15 atr[i] /= N 16 print(atr) # 移动波动平均值
[ 4.26 2.77 2.42 5. 3.75 9.98 7.68 6.03 6.78 5.55
6.89 8.04 5.95 7.67 2.54 10.36 5.15 4.16 4.87 7.32]
[ 5.8585 5.704075 5.53987125 5.51287769 5.4247338 5.65249711
5.75387226 5.76767864 5.81829471 5.80487998 5.85913598 5.96817918
5.96727022 6.05240671 5.87678637 6.10094705 6.0533997 5.95872972
5.90429323 5.97507857]
np.convolve(weights,c)
1 ‘‘‘简单移动平均线‘‘‘ 2 3 import matplotlib.pyplot as plt 4 5 N = 5 6 weights = np.ones(N)/N 7 8 c = np.loadtxt(‘data.csv‘, delimiter=‘,‘, usecols=(6,), unpack=True) 9 # 指定一组数据和指定权重的卷积 10 sma = np.convolve(weights,c)[N-1:-N+1] # 30->4+1+28+1+4=34 11 print(len(sma)) 12 t = np.arange(N-1,len(c)) 13 plt.plot(t,c[N-1:],lw=1.0) 14 plt.plot(t,sma,lw=2.0) 15 # plt.show()
26
np.exp(x)
np.linspace(-1,0,5)
array.sum() # 仅return,不改值
1 ‘‘‘指数移动平均线‘‘‘ 2 3 # 指数运算 4 x = np.arange(5) 5 print(‘EXP:‘,np.exp(x)) 6 # 生成线 7 print(np.linspace(-1,0,5)) 8 9 # 权重指数衰减 10 weights = np.exp(np.linspace(-1,0,N)) 11 weights /= weights.sum() 12 print(‘Weights:‘,weights) 13 c = np.loadtxt(‘data.csv‘, delimiter=‘,‘,usecols=(6,),unpack=True) 14 ema = np.convolve(weights,c)[N-1:-N+1] 15 t = np.arange(N-1,len(c)) 16 plt.plot(t,c[N-1:],lw=1) 17 plt.plot(t,ema,lw=2) 18 # plt.show()
EXP: [ 1. 2.71828183 7.3890561 20.08553692 54.59815003] [-1. -0.75 -0.5 -0.25 0. ] Weights: [ 0.11405072 0.14644403 0.18803785 0.24144538 0.31002201]
array.fill(sma[i-N-1])
1 ‘‘‘布林带‘‘‘ 2 3 deviation = [] 4 c = np.loadtxt(‘data.csv‘, delimiter=‘,‘,usecols=(6,),unpack=True) 5 C = len(c) 6 7 for i in range(N-1,C): 8 if i+N<C: 9 dev=c[i:i+1] 10 else: 11 dev=c[-N:] 12 13 averges = np.zeros(N) 14 # 比arra.flat=scalar更快 15 averages.fill(sma[i-N-1]) # 0~26:0-(0,4,8),1-(1,5,9)...26-(22,26,30) 16 dev = dev-averages # 4~30 17 dev = dev**2 18 dev = np.sqrt(np.mean(dev)) 19 deviation.append(dev) 20 21 deviation = 2*np.array(deviation) 22 upperBB = sma + deviation 23 lowerBB = sma - deviation 24 25 t = np.arange(N-1,C) 26 plt.plot(t,c[N-1:],lw=1.) 27 plt.plot(t,sma,lw=2.) 28 plt.plot(t,upperBB,lw=3.) 29 plt.plot(t,lowerBB,lw=4.) 30 # plt.show()
np.linalg.lstsq(A,b)[0] # 解Ax=b
np.dot(x,b) # 点积
1 ‘‘‘线性模型‘‘‘ 2 3 b = c[-N:] 4 b = b[::-1] 5 print(‘b:‘,b) 6 7 A = np.zeros((N,N),float) 8 print(A) 9 # 填充矩阵A 10 for i in range(N): 11 A[i] = c[-N-1-i:-1-i] 12 print(A) 13 # 系数向量,残差数组,A的秩,A的奇异值 14 (x,resilduals,rank,s) = np.linalg.lstsq(A,b) # 解Ax=b 15 print(x,resilduals,rank,s) 16 # 点积 17 print(np.dot(x,b))
b: [ 351.99 346.67 352.47 355.76 355.36] [[ 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0.] [ 0. 0. 0. 0. 0.]] [[ 360. 355.36 355.76 352.47 346.67] [ 359.56 360. 355.36 355.76 352.47] [ 352.12 359.56 360. 355.36 355.76] [ 349.31 352.12 359.56 360. 355.36] [ 353.21 349.31 352.12 359.56 360. ]] [ 0.78111069 -1.44411737 1.63563225 -0.89905126 0.92009049] [] 5 [ 1.77736601e+03 1.49622969e+01 8.75528492e+00 5.15099261e+00 1.75199608e+00] 357.939161015
np.ones_like(t)
np.vstack([t,np.ones_like(t)])
np.vstack([t,np.ones_like(t)]).T
np.intersect1d(c[c>support],c[c<resistance]
1 ‘‘‘趋势线‘‘‘ 2 3 h,l,c = np.loadtxt(‘data.csv‘,delimiter=‘,‘,usecols=(4,5,6),unpack=True) 4 pivots = (h+l+c)/3 5 print("Pivots:",pivots) 6 7 def fit_line(t,y): 8 ‘‘‘ 9 线性拟合函数y=at+b->[t,1]*[a,b].T=y 10 :param t: 11 :param y: 12 :return: 13 ‘‘‘ 14 A = np.vstack([t,np.ones_like(t)]).T # one_like()矩阵生成 15 return np.linalg.lstsq(A,y)[0] # 解Ax=y的最小二乘解 16 17 t = np.arange(len(c)) 18 sa,sb = fit_line(t,pivots - (h - l)) 19 ra,rb = fit_line(t,pivots + (h - l)) 20 support = sa*t + sb 21 resistance = ra*t + rb 22 print(support) 23 condition = (c>support)&(c<resistance) 24 between_bands = np.where(condition) 25 print(condition) 26 print(sa*(t[-1]+1)+sb) 27 print(ra*(t[-1]+1)+rb) 28 # 计算交集 29 print(np.intersect1d(c[c>support],c[c<resistance])) 30 plt.plot(t,c) 31 plt.plot(t,support) 32 plt.plot(t,resistance) 33 # plt.show()
Pivots: [ 338.01 337.88666667 343.88666667 344.37333333 342.07666667 345.57 350.92333333 354.29 357.34333333 354.18 356.06333333 358.45666667 359.14 362.84333333 358.36333333 353.19333333 340.57666667 341.95666667 342.13333333 347.13 353.12666667 350.90333333 351.62333333 358.42333333 359.34666667 356.11333333 355.13666667 352.61 347.11333333 349.77 ] [ 341.39100358 341.6576087 341.92421382 342.19081893 342.45742405 342.72402917 342.99063429 343.2572394 343.52384452 343.79044964 344.05705475 344.32365987 344.59026499 344.8568701 345.12347522 345.39008034 345.65668545 345.92329057 346.18989569 346.4565008 346.72310592 346.98971104 347.25631615 347.52292127 347.78952639 348.0561315 348.32273662 348.58934174 348.85594685 349.12255197] [False False True True True True True False False True False False False False False True False False False True True True True False False True True True False True] 349.389157088 360.749340996 [ 343.44 344.32 345.03 346.5 348.16 349.31 350.56 351.88 351.99 352.12 352.47 353.21 354.54 355.36 355.76]
array.clip(1,2)
array.compress(a>2
1 ‘‘‘数组修剪和压缩‘‘‘ 2 3 a = np.arange(5) 4 print("a=",a) 5 # 修剪数组 6 print(‘a=‘,a.clip(1,2)) 7 8 a = np.arange(5) 9 print("a=",a) 10 # 筛选压缩数组 11 print(‘a=‘,a.compress(a>2))
a= [0 1 2 3 4] a= [1 1 2 2 2] a= [0 1 2 3 4] a= [3 4]
array.prod()
array.cumprod()
1 ‘‘‘计算阶乘‘‘‘ 2 3 b = np.arange(1,9) 4 # 计算所有元素的乘积 5 print(b.prod()) 6 # 计算所有元素累积成绩 7 print(b.cumprod())
40320
[ 1 2 6 24 120 720 5040 40320]
以上是关于『Python』Numpy学习指南第三章__常用函数的主要内容,如果未能解决你的问题,请参考以下文章