python 数据分析
2022/6/20 1:20:49
本文主要是介绍python 数据分析,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
基本环境安装
安装Anaconda
Matplot绘图架构
Scripting(脚本) -> Artist(美工) -> Backend(后端)
折线图
点击查看代码
import matplotlib.pyplot as plt import random plt.rcParams['font.sans-serif'] = ['SimHei'] plt.figure(figsize=(20,8),dpi=80) # 绘图区域 创建大小 和 清晰度 x = range(60) # 构造中文 x_ch = ['11点{}分'.format(i) for i in x] y_ticks = range(40) # 刻度 # 修改x,y的刻度 plt.xticks(x[::5],x_ch[::5]) plt.yticks(y_ticks[::5]) # 增加标题,坐标描述 plt.xlabel('时间') plt.ylabel('温度') plt.title('某些城市11点到12点之间的温度变化') # 准备上海的数据 y_shanghai = [random.uniform(15,18) for i in x] # 准备北京的数据 y_beijing = [random.uniform(1,2) for i in x] # 画折线图 plt.plot(x,y_shanghai,label='上海') # 实现绘图 plt.plot(x,y_beijing,color='r', linestyle ='--',label='北京') # 实现绘图 plt.legend(loc='best') # 实现图例 up # plt.savefig('test.png') # 保存图片 plt.show() # 显示图片
颜色字符 | 风格字符 | 位置信息 |
---|---|---|
r 红色 | - 实线 | 'bese' 0 |
g 绿色 | -- 虚线 | 'upper right' 1 |
b 蓝色 | -. 点画线 | 'upper left' 2 |
w 白色 | :点虚线 | 'lower left' 3 |
c 青色 | ''留空 空格 | 'lower right' 4 |
m 洋红 | 'right' 5 | |
y 黄色 | 'center left' 6 | |
k 黑色 | 'center right' 7 | |
'lower center' 8 | ||
'upper center 9' | ||
'center' 10 |
多个坐标系绘制
点击查看代码
import matplotlib.pyplot as plt import random plt.rcParams['font.sans-serif'] = ['SimHei'] # 画出某城市11点到12点之间1小时的每分钟的温度变化显示,温度范围在15-18之间 # 创建一个figure # plt.figure(figsize=(20, 8), dpi=80) fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(20, 8)) # 准备数据 x = range(60) # 准备上海的温度数据 y_shanghai = [random.uniform(15, 18) for i in x] # 准备北京的温度数据 y_beijing = [random.uniform(1, 3) for i in x] # 构造中文 x_ch = ['11点{}分'.format(i) for i in x] y_ticks = range(40) # 画折线图 # plt.plot(x, y_shanghai, label='上海') # plt.plot(x, y_beijing, color='r', linestyle='--', label='北京') ax[0].plot(x, y_shanghai, label='上海') ax[1].plot(x, y_beijing, color='r', linestyle='--', label='北京') # plt是对整体画图,ax是对每个坐标系做处理 # 修改x,y的刻度 # plt.xticks(x[::5], x_ch[::5]) # plt.yticks(y_ticks[::5]) ax[0].set_xticks(x[::5], x_ch[::5]) ax[1].set_xticks(x[::5], x_ch[::5]) ax[0].set_yticks(y_ticks[::5]) ax[1].set_yticks(y_ticks[::5]) # 增加标题,坐标描述 # plt.xlabel('时间') # plt.ylabel('温度') # plt.title('某些城市11点到12点之间的温度变化显示') ax[0].set_xlabel('时间') ax[1].set_xlabel('时间') ax[0].set_ylabel("温度") ax[1].set_ylabel("温度") ax[0].set_title("中午11点到12点之间的温度变化显示") ax[1].set_title("中午11点到12点之间的温度变化显示") ax[0].legend(loc='upper left') ax[1].legend(loc='upper left') plt.show()
柱状图
点击查看代码
import matplotlib.pyplot as plt # bar(x, width) plt.rcParams['font.sans-serif'] = ['SimHei'] # 创建fig对象 plt.figure(figsize=(20, 8)) # 准备数据 movie_name = ['雷神3:诸神黄昏','正义联盟','东方快车谋杀案','寻梦环游记','全球风暴','降魔传','追捕','七十七天','密战','狂兽','其它'] y = [73853,57767,22354,15969,14839,8725,8716,8318,7916,6764,52222] # 放进横坐标的数字列表 x = range(len(movie_name)) # 画图 plt.bar(x, y, width=0.5, color=['b','r','g','y','c','m','y','k','c','g','g']) # 修改刻度名称 plt.xticks(x, movie_name) plt.show()
点击查看代码
plt.rcParams['font.sans-serif'] = ['SimHei'] # 创建fig对象 plt.figure(figsize=(20, 8)) movie_name = ['雷神3:诸神黄昏','正义联盟','寻梦环游记'] first_day = [10587.6,10062.5,1275.7] first_weekend=[36224.9,34479.6,11830] x = range(len(movie_name)) plt.bar(x, first_day, width=0.2, label='首日票房') plt.bar([i+0.2 for i in x],first_weekend, width=0.2, label='首周票房') # 修改刻度 plt.xticks([i + 0.1 for i in x], movie_name) plt.legend(loc='best') plt.show()
直方图
点击查看代码
import matplotlib.pyplot as plt # 组数:数据按照不同的范围分组,分成的组成为组数 = 极差/组距(max-mix)/bins # 组距:每一组两个端点的差 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.figure(figsize=(20, 8)) time =[131, 98, 125, 131, 124, 139, 131, 117, 128, 108, 135, 138, 131, 102, 107, 114, 119, 128, 121, 142, 127, 130, 124, 101, 110, 116, 117, 110, 128, 128, 115, 99, 136, 126, 134, 95, 138, 117, 111,78, 132, 124, 113, 150, 110, 117, 86, 95, 144, 105, 126, 130,126, 130, 126, 116, 123, 106, 112, 138, 123, 86, 101, 99, 136,123, 117, 119, 105, 137, 123, 128, 125, 104, 109, 134, 125, 127,105, 120, 107, 129, 116, 108, 132, 103, 136, 118, 102, 120, 114,105, 115, 132, 145, 119, 121, 112, 139, 125, 138, 109, 132, 134,156, 106, 117, 127, 144, 139, 139, 119, 140, 83, 110, 102,123,107, 143, 115, 136, 118, 139, 123, 112, 118, 125, 109, 119, 133,112, 114, 122, 109, 106, 123, 116, 131, 127, 115, 118, 112, 135,115, 146, 137, 116, 103, 144, 83, 123, 111, 110, 111, 100, 154,136, 100, 118, 119, 133, 134, 106, 129, 126, 110, 111, 109, 141,120, 117, 106, 149, 122, 122, 110, 118, 127, 121, 114, 125, 126,114, 140, 103, 130, 141, 117, 106, 114, 121, 114, 133, 137, 92,121, 112, 146, 97, 137, 105, 98, 117, 112, 81, 97, 139, 113,134, 106, 144, 110, 137, 137, 111, 104, 117, 100, 111, 101, 110,105, 129, 137, 112, 120, 113, 133, 112, 83, 94, 146, 133, 101,131, 116, 111, 84, 137, 115, 122, 106, 144, 109, 123, 116, 111,111, 133, 150] # 组距 2分钟 组数 bins = 2 groups = int((max(time)-min(time)) / bins) # 画直方图 # normed: 纵坐标的显示频率 plt.hist(time, groups) # 指定刻度范围,以及步长 plt.xticks(list(range(min(time), max(time)))[::2]) plt.xlabel('电影时长大小') plt.ylabel('电影的数据量') # 增加网格显示 plt.grid(None, linestyle='--', alpha=1) plt.show()
饼图
点击查看代码
import matplotlib.pyplot as plt import pandas as pd from mplfinance.original_flavor import candlestick_ochl plt.rcParams['font.sans-serif'] = ['SimHei'] plt.figure(figsize=(20, 8)) movie_name = ['雷神3:诸神黄昏','正义联盟','东方快车谋杀案','寻梦环游记','全球风暴','降魔传','追捕','七十七天','密战','狂兽','其它'] place_count = [60605,54546,45819,28243,13270,9945,7679,6799,6101,4621,20105] # 绘制饼图 plt.pie(place_count, labels=movie_name, autopct='%1.2f%%', colors=['b','r','g','y','c','m','y','r','c','g','g']) # 显示正圆 plt.axis('equal') plt.legend(loc='best') plt.title('排片占比示意图') plt.show()
点击查看代码
labels = 'Frogs', 'Hogs', 'Dogs', 'Logs' sizes = [15, 30, 45, 10] # 将某部分爆炸出来, 使用括号将第一块分割出来,数值的大小是分割出来的与其他两块之间的间隙 explode = (0, 0.1, 0, 0) # 分别对应labels fig1, ax1 = plt.subplots() # pctdistance, 百分比的text离圆心的距离 ax1.pie(sizes,explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90) ax1.axis('equal') plt.show()
K线图
点击查看代码
# 了解部分 data = pd.read_hdf("./stock_plot/day_open.h5")[:100] data1 = pd.read_hdf("./stock_plot/day_close.h5")[:100] data2 = pd.read_hdf("./stock_plot/day_high.h5")[:100] data3 = pd.read_hdf("./stock_plot/day_low(1).h5")[:100] day = pd.concat([data["000001.SZ"], data1["000001.SZ"], data2["000001.SZ"], data3["000001.SZ"]], axis=1) day.columns = ["open", "close", "high", "low"] day = day.reset_index().values # 画图 fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(20, 8), dpi=80) # 第一个参数axes candlestick_ochl(axes, day, width=0.2, colorup='r', colordown='g') plt.show()
Numpy
了解Numpy
import random import time import numpy as np a = [] for i in range(100000000): a.append(random.random()) t1 = time.time() sum1 = sum(a) t2 = time.time() b = np.array(a) t4 = time.time() sum3 = np.sum(b) t5 = time.time() print(t2-t1, t5-t4)
1.6841034889221191 0.5198299884796143
ndarray n维数组
# ndarray n维数组 # 主要存储相同的类型的数据集合 # 创建数组 # 创建二维数组 a = np.array([[1,2,3], [4,5,6]]) a.shape # 查看数组形状 # (2,3) a.ndim # 数组的维度 # 2 a.size # 查看数组中的元素的数量 # 6 a.itemsize # 4 a.nbytes # 6 * 4 # 24 a.flags # C_CONTIGUOUS : True # F_CONTIGUOUS : False # OWNDATA : True # WRITEABLE : True # ALIGNED : True # WRITEBACKIFCOPY : False # UPDATEIFCOPY : False a = np.array([[1,2,3], [4,5,6]]) b = np.array([7,8,9,10]) c = np.array([[[1,2,3], [4,5,6]], [[11,22,33], [44,55,66]]]) a.shape b.shape c.shape # (2, 2, 3) # N维数组 # 0维:1,2,3, # 1维:[7,8,9,10] # 2维:[[1,2,3], [4,5,6]] # 3维:[[[1,2,3], [4,5,6]], [[11,22,33], [44,55,66]]] a a.dtype # 获取数组的类型 # dtype('int32') a = np.array([[1,2,3],[4,5,6]],dtype=np.float32) a.dtype # dtype('float32')
数组之间的运算
import numpy as np arr = np.array([1,2,3,4]) arr+1 # array([2, 3, 4, 5]) # 数组与数组之间的运算 # 广播机制 a = np.array([[4,5,6],[7,8,9]]) b = np.array([[2,10], [2, 15]]) # a * b element-wise score = np.array([[80,86], [82,80], [85,78], [90,90], [86,82], [82,90], [78,80], [92,94]]) percent = np.array([[0.3, 0.7]]) score * percent #array([[24. , 60.2], # [24.6, 56. ], # [25.5, 54.6], # [27. , 63. ], # [25.8, 57.4], # [24.6, 63. ], # [23.4, 56. ], # [27.6, 65.8]]) # 矩阵,特殊在运算机制 np.mat(score) #matrix([[80, 86], # [82, 80], # [85, 78], # [90, 90], # [86, 82], # [82, 90], # [78, 80], # [92, 94]]) c = np.array([[0.3], [0.7]]) np.mat(c) #matrix([[0.3, 0.7], # [0.3, 0.7]]) # 矩阵运算 # (8, 2) * (2, 1) = (8, 1) np.matmul(score, c) #array([[84.2], # [80.6], # [80.1], # [90. ], # [83.2], # [87.6], # [79.4], # [93.4]]) stock_day_rise = np.random.normal(0, 1, [500, 504]) stock_day_rise.shape # (500, 504) stock1 = stock_day_rise[:10, :100] stock2 = stock_day_rise[10: 20, :100] stock2 # 合并 # axis: 0按照数组的行的方向拼接在一起 # axis: 1按照数组的列的方向拼接在一起 all_ = np.concatenate([stock1, stock2], axis=0) # hstack 列拼接 axis: 1 # vstack 行拼接 axis: 0 # 分割 np.split(all_, 20, axis=0) np.genfromtxt('test.csv', delimiter=',') #array([[ nan, nan, nan, nan], # [ 1. , 123. , 1.4, 23. ], # [ 2. , 110. , nan, 18. ], # [ 3. , nan, 2.1, 19. ]]) type(np.nan) #float e = 2.73 1/e #0.3663003663003663 np.exp(2) # 7.38905609893065 1 / np.exp(2) #0.1353352832366127 1 / (1 + 1/np.exp(2)) # 0.8807970779778823 np.exp(2) # 7.38905609893065 m = np.array([1,2,3]) 1 / (1 + 1/np.exp(m)) # array([0.73105858, 0.88079708, 0.95257413])
Pandas
pandas数据结构
import numpy as np import pandas as pd stock_day_rise = np.random.normal(0, 1, [500, 504]) stock_day_rise #array([[-0.51275272, 0.94026123, -0.28734351, ..., -1.80535228, # 1.12647759, -0.34482647], # [-0.11082195, -0.61753087, 0.51247014, ..., -0.71336186, # -0.75038013, 1.23107248], # [ 1.30920002, -0.86247187, -0.18046507, ..., 0.41082344, # 0.36615753, -1.15248877], # ..., # [-0.64597353, 0.98051196, 0.21157511, ..., 0.3901954 , # 0.44220279, 0.7628329 ], # [-0.45372471, 0.74978987, 1.14269309, ..., -0.9227356 , # -0.64413556, -0.36949079], # [-0.7002719 , 0.57790589, -1.65279998, ..., -1.57232142, # -0.51782955, 0.13426912]]) stock_df = pd.DataFrame(stock_day_rise) stock_df | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | | ---- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | ---- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | | 0 | -0.512753 | 0.940261 | -0.287344 | 0.531760 | 0.012567 | 0.709473 | 0.239689 | -1.779217 | -0.501474 | -0.507617 | ... | 1.377147 | 1.783230 | 0.196377 | 1.594897 | 0.619660 | -1.876187 | 1.279120 | -1.805352 | 1.126478 | | 1 | -0.110822 | -0.617531 | 0.512470 | 0.581689 | 0.711916 | 0.813071 | 1.521003 | -0.290721 | -0.156604 | -1.124984 | ... | 0.948753 | 1.402447 | 0.294993 | -0.802038 | -1.067637 | -0.223470 | 0.445096 | -0.713362 | -0.750380 | | 2 | 1.309200 | -0.862472 | -0.180465 | 0.028584 | 0.037257 | 0.051052 | 1.629817 | -1.133528 | -0.987510 | -1.585423 | ... | 0.245225 | 1.909723 | -1.403577 | -0.406025 | -1.327163 | -0.608240 | 0.755096 | 0.410823 | 0.366158 | | 3 | 0.920909 | -0.473799 | -1.925638 | -0.989393 | 0.837138 | 0.948183 | 0.011733 | 0.466019 | 0.258141 | 0.270631 | ... | 1.028244 | 0.550098 | -0.168381 | 0.029352 | 0.652068 | -1.366157 | 2.141130 | -0.391050 | -0.524698 | | 4 | -0.319762 | 0.599024 | -0.154454 | -1.954056 | -1.672396 | -0.158403 | 1.369486 | 1.294337 | 0.920220 | 0.784408 | ... | -0.694639 | -0.250066 | 0.229763 | -1.020350 | 0.725860 | -0.062765 | -0.071443 | -0.708495 | -1.298314 | | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | | 495 | -0.189318 | 0.680488 | 0.696482 | -0.480230 | -1.868852 | -0.012383 | -1.519626 | -1.279518 | 0.804979 | 1.390888 | ... | 1.013731 | -1.506497 | -0.326615 | -1.552188 | 0.427825 | -0.533029 | 0.143934 | 0.192034 | 1.304076 | | 496 | 0.838544 | -0.455677 | -0.874880 | 0.494403 | -0.196655 | -0.738068 | -2.619937 | -0.151928 | -1.533008 | -2.134869 | ... | -0.575703 | -0.237983 | -1.551520 | 0.825470 | 0.186887 | -0.449823 | 1.406305 | 1.347674 | 0.058468 | | 497 | -0.645974 | 0.980512 | 0.211575 | -0.397760 | -0.926155 | -0.628815 | 0.407839 | -0.002652 | 0.106013 | 0.377582 | ... | -0.984033 | 0.882435 | 0.741889 | 1.084276 | -0.514312 | 1.374642 | 0.186176 | 0.390195 | 0.442203 | | 498 | -0.453725 | 0.749790 | 1.142693 | -0.058502 | 0.327256 | 1.752110 | 0.535332 | 1.743112 | -0.459879 | -2.108713 | ... | 0.119614 | -0.412215 | 0.209263 | 0.313788 | 0.216358 | -1.119070 | 1.067892 | -0.922736 | -0.644136 | | 499 | -0.700272 | 0.577906 | -1.652800 | -0.523849 | -0.342849 | -0.937188 | 0.835102 | 0.269253 | -0.754492 | -0.169862 | ... | -0.792549 | -0.159701 | 0.900721 | -0.909817 | -1.044447 | -1.155437 | 0.309660 | -1.572321 | -0.517830 | type(stock_df) #pandas.core.frame.DataFrame # 添加行索引 stock_code = ['股票' + str(i) for i in range(stock_day_rise.shape[0])] # stcok_code stock_df = pd.DataFrame(stock_day_rise,index=stock_code) stock_df | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | | ----- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | ---- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | | 股票0 | -0.512753 | 0.940261 | -0.287344 | 0.531760 | 0.012567 | 0.709473 | 0.239689 | -1.779217 | -0.501474 | -0.507617 | ... | 1.377147 | 1.783230 | 0.196377 | 1.594897 | 0.619660 | -1.876187 | 1.279120 | -1.805352 | 1.126478 | | 股票1 | -0.110822 | -0.617531 | 0.512470 | 0.581689 | 0.711916 | 0.813071 | 1.521003 | -0.290721 | -0.156604 | -1.124984 | ... | 0.948753 | 1.402447 | 0.294993 | -0.802038 | -1.067637 | -0.223470 | 0.445096 | -0.713362 | -0.750380 | | 股票2 | 1.309200 | -0.862472 | -0.180465 | 0.028584 | 0.037257 | 0.051052 | 1.629817 | -1.133528 | -0.987510 | -1.585423 | ... | 0.245225 | 1.909723 | -1.403577 | -0.406025 | -1.327163 | -0.608240 | 0.755096 | 0.410823 | 0.366158 | | 股票3 | 0.920909 | -0.473799 | -1.925638 | -0.989393 | 0.837138 | 0.948183 | 0.011733 | 0.466019 | 0.258141 | 0.270631 | ... | 1.028244 | 0.550098 | -0.168381 | 0.029352 | 0.652068 | -1.366157 | 2.141130 | -0.391050 | -0.524698 | | 股票4 | -0.319762 | 0.599024 | -0.154454 | -1.954056 | -1.672396 | -0.158403 | 1.369486 | 1.294337 | 0.920220 | 0.784408 | ... | -0.694639 | -0.250066 | 0.229763 | -1.020350 | 0.725860 | -0.062765 | -0.071443 | -0.708495 | -1.298314 | | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | | 股票495 | -0.189318 | 0.680488 | 0.696482 | -0.480230 | -1.868852 | -0.012383 | -1.519626 | -1.279518 | 0.804979 | 1.390888 | ... | 1.013731 | -1.506497 | -0.326615 | -1.552188 | 0.427825 | -0.533029 | 0.143934 | 0.192034 | 1.304076 | | 股票496 | 0.838544 | -0.455677 | -0.874880 | 0.494403 | -0.196655 | -0.738068 | -2.619937 | -0.151928 | -1.533008 | -2.134869 | ... | -0.575703 | -0.237983 | -1.551520 | 0.825470 | 0.186887 | -0.449823 | 1.406305 | 1.347674 | 0.058468 | | 股票497 | -0.645974 | 0.980512 | 0.211575 | -0.397760 | -0.926155 | -0.628815 | 0.407839 | -0.002652 | 0.106013 | 0.377582 | ... | -0.984033 | 0.882435 | 0.741889 | 1.084276 | -0.514312 | 1.374642 | 0.186176 | 0.390195 | 0.442203 | | 股票498 | -0.453725 | 0.749790 | 1.142693 | -0.058502 | 0.327256 | 1.752110 | 0.535332 | 1.743112 | -0.459879 | -2.108713 | ... | 0.119614 | -0.412215 | 0.209263 | 0.313788 | 0.216358 | -1.119070 | 1.067892 | -0.922736 | -0.644136 | # freq='B' 默认略过周六周日 date = pd.date_range('2017-01-01', periods=504, freq='B') stock_df = pd.DataFrame(stock_day_rise, index=stock_code, columns=date)
pandas的索引与修改
import pandas as pd import numpy as np import matplotlib.pyplot as plt stock_day_rise = np.random.normal(0, 1, [500, 504]) # stock_day_rise stock_code = ['股票' + str(i) for i in range(stock_day_rise.shape[0])] data = pd.date_range('2017-01-01', periods=504, freq='B') stock_dataframe = pd.DataFrame(stock_day_rise, index=stock_code, columns=data) stock_dataframe | 2017-01-02 | 2017-01-03 | 2017-01-04 | 2017-01-05 | 2017-01-06 | 2017-01-09 | 2017-01-10 | 2017-01-11 | 2017-01-12 | 2017-01-13 | ... | 2018-11-23 | 2018-11-26 | 2018-11-27 | 2018-11-28 | 2018-11-29 | 2018-11-30 | 2018-12-03 | 2018-12-04 | 2018-12-05 | 2018-12-06 | | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | --------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | | 股票0 | -0.336502 | -0.283818 | -1.833312 | -0.034063 | -0.273923 | -0.013894 | 0.571314 | -0.685192 | -0.844952 | 0.697230 | ... | -0.897525 | 2.295753 | 0.726545 | -0.332880 | -0.707125 | 0.301560 | -1.315805 | 1.038277 | 0.232298 | | 股票1 | 0.431983 | -0.128563 | 0.430541 | 0.260152 | 0.885598 | 1.659742 | 0.407230 | 0.011112 | 0.624398 | -1.356692 | ... | 0.433011 | -0.468825 | 0.536704 | -0.796652 | 0.972271 | 1.537066 | -0.146411 | 1.468827 | 1.733275 | | 股票2 | 1.068510 | 0.637716 | -1.626844 | -0.985523 | 0.745854 | -0.359343 | 0.889808 | 1.364657 | -1.017752 | -0.772868 | ... | -0.310762 | 0.420062 | 0.903381 | -0.804816 | -0.444837 | 1.373565 | -1.688836 | -0.853804 | 1.056135 | | 股票3 | 1.650343 | -0.921815 | -0.068494 | 1.043372 | -1.766311 | -1.018881 | -1.031309 | 1.024690 | -0.533850 | 0.350309 | ... | -1.010353 | 0.614537 | -0.511354 | -0.752013 | -1.017201 | -0.886048 | 0.680733 | 1.063538 | -0.383206 | | 股票4 | -1.128249 | -1.282252 | -0.928848 | 0.075446 | -1.358604 | 1.602723 | -0.966502 | 2.256386 | 0.925430 | -1.027316 | ... | | | | | | | | | | stock_dataframe.values array([[-0.33650197, -0.28381791, -1.83331156, ..., 1.03827662, 0.23229771, 0.50349308], [ 0.43198327, -0.12856302, 0.4305411 , ..., 1.46882666, 1.73327538, 0.44540417], [ 1.06851021, 0.63771568, -1.6268439 , ..., -0.8538035 , 1.05613455, 1.13792046], stock_dataframe.T | 股票0 | 股票1 | 股票2 | 股票3 | 股票4 | 股票5 | 股票6 | 股票7 | 股票8 | 股票9 | ... | 股票490 | 股票491 | 股票492 | 股票493 | 股票494 | 股票495 | 股票496 | 股票497 | 股票498 | 股票499 | | ---------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | ----- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | | 2017-01-02 | -0.336502 | 0.431983 | 1.068510 | 1.650343 | -1.128249 | -0.605703 | -0.551460 | 0.019854 | -2.092409 | -0.495476 | ... | 0.451665 | -0.328315 | -0.289311 | 0.204668 | 2.794759 | 0.877930 | 1.944935 | -0.657216 | 1.261522 | | 2017-01-03 | -0.283818 | -0.128563 | 0.637716 | -0.921815 | -1.282252 | 0.427100 | -1.296923 | 0.767681 | -0.621305 | 0.122074 | ... | 2.337562 | -0.350175 | -0.424671 | -1.011431 | 0.184091 | 0.242851 | 0.164125 | 0.910831 | -2.520630 | | 2017-01-04 | -1.833312 | 0.430541 | -1.626844 | -0.068494 | -0.928848 | 0.030197 | -0.171296 | -0.267061 | -0.285124 | -0.212287 | ... | -0.301684 | 0.015821 | 0.582552 | -0.349317 | 2.052757 | 0.056201 | 1.028949 | -0.730406 | -0.275469 | | 2017-01-05 | -0.034063 | 0.260152 | -0.985523 | 1.043372 | 0.075446 | -0.282063 | 0.939964 | -1.005864 | -0.536240 | -0.521829 | ... | 0.487618 | 0.211755 | 1.134300 | -1.530601 | -1.129824 | -0.106915 | -0.757018 | -0.306077 | 0.088784 | | 2017-01-06 | -0.273923 | 0.885598 | 0.745854 | -1.766311 | -1.358604 | -1.407985 | -1.195100 | -0.552709 | -1.014346 | -0.442240 | | | | | | | | | | | stock_dataframe.head(10) 2017-01-02 2017-01-03 2017-01-04 2017-01-05 2017-01-06 2017-01-09 2017-01-10 2017-01-11 2017-01-12 2017-01-13 ... 2018-11-23 2018-11-26 2018-11-27 2018-11-28 2018-11-29 2018-11-30 2018-12-03 2018-12-04 2018-12-05 2018-12-06 股票0 -0.336502 -0.283818 -1.833312 -0.034063 -0.273923 -0.013894 0.571314 -0.685192 -0.844952 0.697230 ... -0.897525 2.295753 0.726545 -0.332880 -0.707125 0.301560 -1.315805 1.038277 0.232298 0.503493 股票1 0.431983 -0.128563 0.430541 0.260152 0.885598 1.659742 0.407230 0.011112 0.624398 -1.356692 ... 0.433011 -0.468825 0.536704 -0.796652 0.972271 1.537066 -0.146411 1.468827 1.733275 0.445404 股票2 1.068510 0.637716 -1.626844 -0.985523 0.745854 -0.359343 0.889808 1.364657 -1.017752 -0.772868 ... -0.310762 0.420062 0.903381 -0.804816 -0.444837 1.373565 -1.688836 -0.853804 1.056135 1.137920 股票3 1.650343 -0.921815 -0.068494 1.043372 -1.766311 -1.018881 -1.031309 1.024690 -0.533850 0.350309 # stock_dataframe.tail(10) # DataFrame索引操作 # 重设索引 # stock_dataframe.reset_index(drop=True) df = pd.DataFrame({'month':[1,4,7,10], 'year':[1, 1, 2, 2], 'sale':[55, 40, 84, 31]}) df = df.set_index('year','month') # df df month sale year 1 1 55 1 4 40 2 7 84 2 10 31 df.index Int64Index([1, 1, 2, 2], dtype='int64', name='year') # MutiIndex stock_dataframe = stock_dataframe.T stock_dataframe | 股票0 | 股票1 | 股票2 | 股票3 | 股票4 | 股票5 | 股票6 | 股票7 | 股票8 | 股票9 | ... | 股票490 | 股票491 | 股票492 | 股票493 | 股票494 | 股票495 | 股票496 | 股票497 | 股票498 | 股票499 | | ---------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | ----- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | --------- | | 2017-01-02 | -0.336502 | 0.431983 | 1.068510 | 1.650343 | -1.128249 | -0.605703 | -0.551460 | 0.019854 | -2.092409 | -0.495476 | ... | 0.451665 | -0.328315 | -0.289311 | 0.204668 | 2.794759 | 0.877930 | 1.944935 | -0.657216 | 1.261522 | | 2017-01-03 | -0.283818 | -0.128563 | 0.637716 | -0.921815 | -1.282252 | 0.427100 | -1.296923 | 0.767681 | -0.621305 | 0.122074 | ... | 2.337562 | -0.350175 | -0.424671 | -1.011431 | 0.184091 | 0.242851 | 0.164125 | 0.910831 | -2.520630 | | 2017-01-04 | -1.833312 | 0.430541 | -1.626844 | -0.068494 | -0.928848 | 0.030197 | -0.171296 | -0.267061 | -0.285124 | -0.212287 | ... | -0.301684 | 0.015821 | 0.582552 | -0.349317 | 2.052757 | 0.056201 | 1.028949 | -0.730406 | -0.275469 | | 2017-01-05 | -0.034063 | 0.260152 | -0.985523 | 1.043372 | 0.075446 | -0.282063 | 0.939964 | -1.005864 | -0.536240 | -0.521829 | ... | 0.487618 | 0.211755 | 1.134300 | -1.530601 | -1.129824 | -0.106915 | -0.757018 | -0.306077 | 0.088784 | | 2017-01-06 | -0.273923 | 0.885598 | 0.745854 | -1.766311 | | | | | | | | | | | | | | | | | stock_dataframe['股票0']['2017-01-02'] # -0.33650197255654596 pd.Series(np.arange(10)) 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 dtype: int32 pd.Series({'red':100, 'blue':200, 'green': 500, 'yellow':1000}) red 100 blue 200 green 500 yellow 1000 dtype: int64 data = pd.read_csv('./stock_day.csv') data.head() open high close low volume price_change p_change ma5 ma10 ma20 v_ma5 v_ma10 v_ma20 turnover 2018-02-27 23.53 25.88 24.16 23.53 95578.03 0.63 2.68 22.942 22.142 22.875 53782.64 46738.65 55576.11 2.39 2018-02-26 22.80 23.78 23.53 22.80 60985.11 0.69 3.02 22.406 21.955 22.942 40827.52 42736.34 56007.50 1.53 2018-02-23 22.88 23.37 22.82 22.71 52914.01 0.54 2.42 21.938 21.929 23.022 35119.58 41871.97 56372.85 1.32 2018-02-22 22.25 22.76 22.28 22.02 36105.01 0.36 1.64 21.446 21.909 23.137 35397.58 39904.78 60149.60 0.90 2018-02-14 21.49 21.99 21.92 21.48 23331.04 0.44 2.05 21.366 21.923 23.253 33590.21 42935.74 61716.11 0.58 data[['open', 'high', 'close']] open high close 2018-02-27 23.53 25.88 24.16 2018-02-26 22.80 23.78 23.53 2018-02-23 22.88 23.37 22.82 2018-02-22 22.25 22.76 22.28 2018-02-14 21.49 21.99 21.92 ... ... ... ... 2015-03-06 13.17 14.48 14.28 2015-03-05 12.88 13.45 13.16 2015-03-04 12.80 12.92 12.90 2015-03-03 12.52 13.06 12.70 2015-03-02 12.25 12.67 12.52 643 rows × 3 columns # 使用行列索引的方式取值,必须按照先列后行的顺序 data['open']['2018-02-27'] 23.53 # data[:1, :2] # loc: 只能指定行列索引的名字 # iloc: 可以通过索引的下标获取,索引是时间或者指标的名字 data.loc['2018-02-27': '2018-02-23', 'open'] 2018-02-27 23.53 2018-02-26 22.80 2018-02-23 22.88 Name: open, dtype: float64 data.iloc[0:3, 0:4] # 相当于取到一个DataFrame open high close low 2018-02-27 23.53 25.88 24.16 23.53 2018-02-26 22.80 23.78 23.53 22.80 2018-02-23 22.88 23.37 22.82 22.71 # ix, 在1.0.0版本之后就删除这个方法 # 排序 data.sort_index() # ascending=False 按照升序还是降序的顺序排序,默认从小到大 data.sort_values(by='p_change', ascending=False) data.sort_values(by=['open', 'close'], ascending=False).head(10) # 统计分析 # 求出最小值 data.idxmin(axis=0) # cumsum data = data.sort_index() data data.p_change.cumsum().plot() plt.show() # 逻辑运算 # 通过运算符 data['p_change'] > 2 data[data['p_change'] > 2] data[(data['p_change']>2) & (data['turnover']>5)] data.query('p_change>2&turnover>5') # isin data[data['turnover'].isin([4.19, 2.39])] # 数学运算 data open_ = data['open'] close_ = data['close'] # add 加法 sub 减法 data['my_price_change'] = close_.sub(open_) # 自定义运算 data[['open', 'close']].apply(lambda x: x.max() - x.min(), axis=0)
这篇关于python 数据分析的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-11-21Python编程基础教程
- 2024-11-20Python编程基础与实践
- 2024-11-20Python编程基础与高级应用
- 2024-11-19Python 基础编程教程
- 2024-11-19Python基础入门教程
- 2024-11-17在FastAPI项目中添加一个生产级别的数据库——本地环境搭建指南
- 2024-11-16`PyMuPDF4LLM`:提取PDF数据的神器
- 2024-11-16四种数据科学Web界面框架快速对比:Rio、Reflex、Streamlit和Plotly Dash
- 2024-11-14获取参数学习:Python编程入门教程
- 2024-11-14Python编程基础入门