1500字范文 > python大数据股票分析

python大数据股票分析

时间：2022-04-07 13:47:40

相关推荐

python大数据股票分析

import pandas as pdimport numpy as np# 读取Excel文件,pd.read_excel(r'文件位置\文件名称.xlsx')hq = pd.read_excel(r'D:\火狐下载\股票行情0406.xlsx') # hq=行情print(hq)print(hq.shape) # (2807, 13)hq["涨跌幅"] = np.zeros((2807,1)) # 每种方法用完运行一次这段代码以重置数据# 计算涨跌幅并填充# 方法一：用while循环i = 0while i < hq.shape[0]: # hq.shape[0]=2807hq.loc[i,"涨跌幅"] = round((hq.loc[i,"今收"] / hq.loc[i,"前收"] - 1) * 100, 2)i += 1print(hq)# 方法二：用for循环for i in range(0,hq.shape[0]):hq.loc[i,"涨跌幅"] = round((hq.loc[i,"今收"] / hq.loc[i,"前收"] - 1) * 100, 2)print(hq)# 方法三：直接进行列运算hq["涨跌幅"] = round((hq["今收"] / hq["前收"] - 1) * 100,2)print(hq)#成交量分析import pandas as pdimport numpy as nphq = pd.read_excel(r'D:\火狐下载\股票行情0406.xlsx') # hq=行情hq["成交量分析"] = np.zeros((hq.shape[0],1))#用whilei = 0while i < hq.shape[0]:a = hq.iloc[i,9]a = float(a.replace(",",""))if a < 1000:hq.loc[i,"成交量分析"] = "不活跃"elif a < 2000:hq.loc[i, "成交量分析"] = "一般"else:hq.loc[i, "成交量分析"] = "活跃"i += 1#用forfor i in range(hq.shape[0]):a = hq.iloc[i,9]a = float(a.replace(",",""))if a < 1000:hq.loc[i,"成交量分析"] = "不活跃"elif a < 2000:hq.loc[i, "成交量分析"] = "一般"else:hq.loc[i, "成交量分析"] = "活跃"#统计各区间股票数量#方法一Small,Medium,Large = 0,0,0for i in range(hq.shape[0]):a = hq.iloc[i,9]a = float(a.replace(",",""))if a < 1000:hq.loc[i,"成交量分析"] = "不活跃"Small += 1elif a < 2000:hq.loc[i, "成交量分析"] = "一般"Medium += 1else:hq.loc[i, "成交量分析"] = "活跃"Large += 1print(Small,Medium,Large)#方法二a = []for i in hq["成交量分析"]:a.append(i)print(a.count("不活跃"),a.count("一般"),a.count("活跃"))#方法三print(list(hq["成交量分析"]).count("不活跃"))print(list(hq["成交量分析"]).count("一般"))print(list(hq["成交量分析"]).count("活跃"))#整理成表格dic = {"不活跃":[a.count("不活跃"),a.count("不活跃")/hq.shape[0]],"一般":[a.count("一般"),a.count("一般")/hq.shape[0]],"活跃":[a.count("活跃"),a.count("活跃")/hq.shape[0]]}df = pd.DataFrame(dic)df.index = ["数量","占比"] #定义行索引#打分：涨跌幅为正1分，为负0分；活跃3分，一般2分，不活跃0分。————————————————————————————————————import pandas as pdimport numpy as nphq = pd.read_excel(r'D:\火狐下载\股票行情0406.xlsx') # hq=行情hq["成交量分析"] = np.zeros((hq.shape[0],1))for i in range(hq.shape[0]):a = hq.iloc[i,9]a = float(a.replace(",",""))if a < 1000:hq.loc[i,"成交量分析"] = "不活跃"elif a < 2000:hq.loc[i, "成交量分析"] = "一般"else:hq.loc[i, "成交量分析"] = "活跃"#书接上回，两个ifhq["分数1"] = np.zeros((hq.shape[0],1))for i in range(0,hq.shape[0]):if hq.loc[i,"涨跌幅（%）"] > 0:hq.loc[i,"分数1"] = 1else:hq.loc[i, "分数1"] = 0hq["分数2"] = np.zeros((hq.shape[0],1))for i in range(0,hq.shape[0]):if hq.loc[i,"成交量分析"] == "活跃":hq.loc[i,"分数2"] = 3elif hq.loc[i,"成交量分析"] == "一般":hq.loc[i, "分数2"] = 2else:hq.loc[i, "分数2"] = 0hq["总分"] = hq["分数1"] + hq["分数2"]#书接上回，if嵌套ifhq["分数"] = np.zeros((hq.shape[0],1))for i in range(0,hq.shape[0]):if hq.loc[i,"涨跌幅（%）"] > 0:if hq.loc[i,"成交量分析"] == "活跃":hq.loc[i,"分数"] = 4elif hq.loc[i,"成交量分析"] == "一般":hq.loc[i, "分数"] = 3else:hq.loc[i, "分数"] = 1else:if hq.loc[i, "成交量分析"] == "活跃":hq.loc[i, "分数"] = 3elif hq.loc[i, "成交量分析"] == "一般":hq.loc[i, "分数"] = 2else:hq.loc[i, "分数"] = 0#if简洁写法hq["分数"] = np.zeros((hq.shape[0],1))for i in range(0,hq.shape[0]):if hq.loc[i,"涨跌幅（%）"] > 0:hq.loc[i, "分数"] = 4 if hq.loc[i,"成交量分析"] == "活跃" else 3 if hq.loc[i,"成交量分析"] == "一般" else 1else:hq.loc[i, "分数"] = 3 if hq.loc[i, "成交量分析"] == "活跃" else 2 if hq.loc[i, "成交量分析"] == "一般" else 0hq.sort_index(axis=1,ascending=False,inplace=True)#axis=1按行标签排序hq.sort_index(axis=1,ascending=True,inplace=True)#ascending=True升序hq.sort_index(axis=0,ascending=True,inplace=True)#inplace=True不拷贝，即在原有数据上排序hq.sort_index(axis=0,ascending=False,inplace=True)del hq["分数1"]del hq["分数2"]del hq["总分"]del hq["分数"]del hq#排序和排名（1）————————————————————————————————————————————————————import pandas as pdimport numpy as nphq = pd.read_excel(r'D:\火狐下载\股票行情0406.xlsx') # hq=行情hq["排名"] = np.zeros((hq.shape[0],1))l = hq["涨跌幅（%）"]for i in range(hq.shape[0]):hq.loc[hq.index[hq["涨跌幅（%）"]==l.max()],"排名"] = i + 1l.drop(l.index[l==l.max()],inplace=True)#排序和排名（2）import pandas as pdimport numpy as nphq = pd.read_excel(r'D:\火狐下载\股票行情0406.xlsx') # hq=行情hq["排名"] = np.zeros((hq.shape[0],1))hq["复制"] = sorted(hq["涨跌幅（%）"],reverse=True) #reverse=True降序#测试一dic = {"名字":["张三","李四","王五","赵六"],"成绩":[70,60,90,80]}df = pd.DataFrame(dic)df["排名"] = np.zeros((df.shape[0],1))l = df["成绩"]for i in range(df.shape[0]):df.loc[df.index[df["成绩"]==l.max()],"排名"] = i + 1l.drop(l.index[l==l.max()],inplace=True)#测试二dic = {"名字":["张三","李四","王五","赵六"],"成绩":[70,60,90,80]}df = pd.DataFrame(dic)df["复制"] = sorted(df["成绩"],reverse=True)df["排名"] = np.zeros((df.shape[0],1))i = 0j = 0rank = 1while i < df.shape[0]:if df.loc[i,"复制"] <= df.loc[j,"成绩"]:df.loc[i, "排名"] = ranki += 1j += 1rank += 1else:j += 1rank += 1#排序标准答案0515————————————————————————————————————————————————————import pandas as pdimport numpy as nphq = pd.read_excel(r'D:\火狐下载\股票行情0406.xlsx') # hq=行情hq["涨跌幅排名"] = np.zeros((hq.shape[0],1))l = hq["涨跌幅（%）"]for i in range(hq.shape[0]):hq.loc[hq.index[hq["涨跌幅（%）"]==l.max()],"涨跌幅排名"] = i + 1l.drop(l.index[l==l.max()],inplace=True)#直接使用rank函数import pandas as pdimport numpy as nphq = pd.read_excel(r'D:\火狐下载\股票行情0406.xlsx') # hq=行情hq["涨跌幅排名"] = hq["涨跌幅（%）"].rank(method="dense",ascending=False) #涨跌幅高的排第一hq["市盈率排名"] = hq["市盈率"].rank(method="dense",ascending=True) #市盈率低的排第一hq["成交量排名"] = hq["成交量(万股)"].rank(method="dense",ascending=False) #成交量高的排第一hq["临时"] =(hq["最高"] + hq["最低"]) / 2hq["价格排名"] = hq["临时"].rank(method="dense",ascending=True) #平均价格低的排第一del hq["临时"]hq["总排名"] = hq["涨跌幅排名"] + hq["市盈率排名"] + hq["成交量排名"] + hq["价格排名"]hq["总排名"] = hq["总排名"] / hq["总排名"].max() * 100#测试1dic = {"名字":["张三","李四","王五","赵六","张三","李四","王五","赵六"],"测试":["测试一","测试一","测试一","测试一","测试二","测试二","测试二","测试二"],"成绩":[80,90,60,70,100,80,75,85]}df = pd.DataFrame(dic)df.sort_values(by="名字",axis=0,ascending=True,inplace=True) #按名字列升序排序#测试2dic = {"名字":["张三","李四","王五","赵六","张三","王五","赵六","张三","赵六"],"测试":["测试一","测试一","测试一","测试一","测试二","测试二","测试二","测试三","测试三"],"成绩":[80,90,60,70,100,75,85,100,85]}df = pd.DataFrame(dic)df.sort_values(by="名字",axis=0,ascending=True,inplace=True) #按名字列升序排序#0524import pandas as pdimport numpy as npHQ = pd.read_excel(r'D:\火狐下载\HQ0508-0512.xlsx') # HQ=行情HQ.sort_values(by="证券代码",axis=0,ascending=True,inplace=True) #排序，这一步可有可无HQ5 = HQ.groupby(["证券代码","证券简称"]) #按证券代码和证券简称进行分组HQ5 = HQ5["今收"].mean() #今收的均值HQ5 = pd.DataFrame(HQ5)HQ5.to_excel(r'D:\火狐下载\HQ5.xlsx')

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。