03/05/2017

descriptive statistic in python

#Download Samsung stock data and write descriptive statistic
#삼성전자 주가 정보 다운로드 및 descriptive statistic 작성

#import fucntions
import datetime
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from pandas.tools.plotting import scatter_matrix


# download and load dataset
def download_stock_data(file_name,company_code,year1,month1,date1,year2,month2,date2):
start = datetime.datetime(year1, month1, date1)
end = datetime.datetime(year2, month2, date2)
df = web.DataReader("%s.KS" % (company_code), "yahoo", start, end)

df.to_pickle(file_name)

return df

def load_stock_data(file_name):
df = pd.read_pickle(file_name)
return df

download_stock_data('samsung_2010to2017.csv','005930',2010,1,1,2017,4,14)
download_stock_data('hyundai_2010to2017.csv','005380',2010,1,1,2017,4,14)

data = load_stock_data('samsung_2010to2017.csv')

#draw all figures
data.plot()
plt.show()




#check data
print(data)

                 Open       High        Low      Close  Volume   Adj Close
Date                                                                      
2010-01-04   803000.0   809000.0   800000.0   809000.0  239000   751191.79
2010-01-05   826000.0   829000.0   815000.0   822000.0  558500   763262.86
2010-01-06   829000.0   841000.0   826000.0   841000.0  458900   780905.19
2010-01-07   841000.0   841000.0   813000.0   813000.0  442100   754905.97
2010-01-08   820000.0   821000.0   806000.0   821000.0  295500   762334.32
2010-01-11   821000.0   823000.0   797000.0   797000.0  397900   740049.27



#print descriptive statistic
print(data.describe())

               Open          High           Low         Close        Volume  \
count  1.886000e+03  1.886000e+03  1.886000e+03  1.886000e+03  1.886000e+03   
mean   1.235572e+06  1.247218e+06  1.223781e+06  1.235772e+06  2.786047e+05   
std    2.936847e+05  2.962787e+05  2.925842e+05  2.949172e+05  1.427531e+05   
min    6.840000e+05  6.970000e+05  6.720000e+05  6.800000e+05  0.000000e+00   
25%    9.822500e+05  9.960000e+05  9.712500e+05  9.822500e+05  1.947250e+05   
50%    1.280000e+06  1.291000e+06  1.268000e+06  1.280000e+06  2.523500e+05   
75%    1.410000e+06  1.423000e+06  1.399000e+06  1.410000e+06  3.370000e+05   
max    2.110000e+06  2.134000e+06  2.094000e+06  2.128000e+06  1.276000e+06   

          Adj Close  
count  1.886000e+03  
mean   1.181318e+06  
std    3.017163e+05  
min    6.351751e+05  
25%    9.169498e+05  
50%    1.226299e+06  
75%    1.344629e+06  
max    2.128000e+06  
#check summary of qunaile score
print(data.quantile([.25,.5,.75,1]))
           Open       High        Low      Close     Volume    Adj Close
0.25   982250.0   996000.0   971250.0   982250.0   194725.0   916949.800
0.50  1280000.0  1291000.0  1268000.0  1280000.0   252350.0  1226299.135
0.75  1410000.0  1423000.0  1399000.0  1410000.0   337000.0  1344628.580
1.00  2110000.0  2134000.0  2094000.0  2128000.0  1276000.0  2128000.000

#check histogramme
(n, bins, patched) = plt.hist(data['Open'])
data['Open'].plot(kind='kde')
plt.axvline(data['Open'].mean(),color='red')
plt.show()

for index in range(len(n)):
 print("Bin : %0.f, Frequency = %0.f" % (bins[index],n[index]))


Bin : 684000, Frequency = 243
Bin : 826600, Frequency = 219
Bin : 969200, Frequency = 98
Bin : 1111800, Frequency = 265
Bin : 1254400, Frequency = 560
Bin : 1397000, Frequency = 306
Bin : 1539600, Frequency = 95
Bin : 1682200, Frequency = 29
Bin : 1824800, Frequency = 33
Bin : 1967400, Frequency = 38

#draw scatter_matrix without considering 'volume'
scatter_matrix(data[['Open','High','Low','Close']], alpha=0.2, figsize=(6, 6), diagonal='kde')

#draw  box plot
data[['Open','High','Low','Close','Adj Close']].plot(kind='box')
plt.show()

Share this

0 Comment to "descriptive statistic in python"

Post a Comment