#삼성전자 주가 정보 다운로드 및 descriptive statistic 작성
#import fucntions
import datetime
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from pandas.tools.plotting import scatter_matrix
# download and load dataset
def download_stock_data(file_name,company_code,year1,month1,date1,year2,month2,date2):
start = datetime.datetime(year1, month1, date1)
end = datetime.datetime(year2, month2, date2)
df = web.DataReader("%s.KS" % (company_code), "yahoo", start, end)
df.to_pickle(file_name)
return df
def load_stock_data(file_name):
df = pd.read_pickle(file_name)
return df
download_stock_data('samsung_2010to2017.csv','005930',2010,1,1,2017,4,14)
download_stock_data('hyundai_2010to2017.csv','005380',2010,1,1,2017,4,14)
data = load_stock_data('samsung_2010to2017.csv')
#draw all figures
data.plot()
plt.show()
#check data
print(data)
Open High Low Close Volume Adj Close
Date
2010-01-04 803000.0 809000.0 800000.0 809000.0 239000 751191.79
2010-01-05 826000.0 829000.0 815000.0 822000.0 558500 763262.86
2010-01-06 829000.0 841000.0 826000.0 841000.0 458900 780905.19
2010-01-07 841000.0 841000.0 813000.0 813000.0 442100 754905.97
2010-01-08 820000.0 821000.0 806000.0 821000.0 295500 762334.32
2010-01-11 821000.0 823000.0 797000.0 797000.0 397900 740049.27
#print descriptive statistic
print(data.describe())
Open High Low Close Volume \ count 1.886000e+03 1.886000e+03 1.886000e+03 1.886000e+03 1.886000e+03 mean 1.235572e+06 1.247218e+06 1.223781e+06 1.235772e+06 2.786047e+05 std 2.936847e+05 2.962787e+05 2.925842e+05 2.949172e+05 1.427531e+05 min 6.840000e+05 6.970000e+05 6.720000e+05 6.800000e+05 0.000000e+00 25% 9.822500e+05 9.960000e+05 9.712500e+05 9.822500e+05 1.947250e+05 50% 1.280000e+06 1.291000e+06 1.268000e+06 1.280000e+06 2.523500e+05 75% 1.410000e+06 1.423000e+06 1.399000e+06 1.410000e+06 3.370000e+05 max 2.110000e+06 2.134000e+06 2.094000e+06 2.128000e+06 1.276000e+06 Adj Close count 1.886000e+03 mean 1.181318e+06 std 3.017163e+05 min 6.351751e+05 25% 9.169498e+05 50% 1.226299e+06 75% 1.344629e+06 max 2.128000e+06#check summary of qunaile score print(data.quantile([.25,.5,.75,1]))
Open High Low Close Volume Adj Close 0.25 982250.0 996000.0 971250.0 982250.0 194725.0 916949.800 0.50 1280000.0 1291000.0 1268000.0 1280000.0 252350.0 1226299.135 0.75 1410000.0 1423000.0 1399000.0 1410000.0 337000.0 1344628.580 1.00 2110000.0 2134000.0 2094000.0 2128000.0 1276000.0 2128000.000
#check histogramme (n, bins, patched) = plt.hist(data['Open']) data['Open'].plot(kind='kde') plt.axvline(data['Open'].mean(),color='red') plt.show() for index in range(len(n)): print("Bin : %0.f, Frequency = %0.f" % (bins[index],n[index]))Bin : 684000, Frequency = 243 Bin : 826600, Frequency = 219 Bin : 969200, Frequency = 98 Bin : 1111800, Frequency = 265 Bin : 1254400, Frequency = 560 Bin : 1397000, Frequency = 306 Bin : 1539600, Frequency = 95 Bin : 1682200, Frequency = 29 Bin : 1824800, Frequency = 33 Bin : 1967400, Frequency = 38
#draw scatter_matrix without considering 'volume' scatter_matrix(data[['Open','High','Low','Close']], alpha=0.2, figsize=(6, 6), diagonal='kde') #draw box plot data[['Open','High','Low','Close','Adj Close']].plot(kind='box') plt.show()
0 Comment to "descriptive statistic in python"
Post a Comment