descriptive statistic in python

#Download Samsung stock data and write descriptive statistic
#삼성전자 주가 정보 다운로드 및 descriptive statistic 작성

#import fucntions
import datetime
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from pandas.tools.plotting import scatter_matrix

# download and load dataset
def download_stock_data(file_name,company_code,year1,month1,date1,year2,month2,date2):
start = datetime.datetime(year1, month1, date1)
end = datetime.datetime(year2, month2, date2)
df = web.DataReader("%s.KS" % (company_code), "yahoo", start, end)

df.to_pickle(file_name)

return df

def load_stock_data(file_name):
df = pd.read_pickle(file_name)
return df

download_stock_data('samsung_2010to2017.csv','005930',2010,1,1,2017,4,14)
download_stock_data('hyundai_2010to2017.csv','005380',2010,1,1,2017,4,14)

data = load_stock_data('samsung_2010to2017.csv')

#draw all figures
data.plot()
plt.show()

#check data
print(data)

                 Open       High        Low      Close  Volume   Adj Close
Date                                                                      
2010-01-04   803000.0   809000.0   800000.0   809000.0  239000   751191.79
2010-01-05   826000.0   829000.0   815000.0   822000.0  558500   763262.86
2010-01-06   829000.0   841000.0   826000.0   841000.0  458900   780905.19
2010-01-07   841000.0   841000.0   813000.0   813000.0  442100   754905.97
2010-01-08   820000.0   821000.0   806000.0   821000.0  295500   762334.32
2010-01-11   821000.0   823000.0   797000.0   797000.0  397900   740049.27

#print descriptive statistic
print(data.describe())

               Open          High           Low         Close        Volume  \
count  1.886000e+03  1.886000e+03  1.886000e+03  1.886000e+03  1.886000e+03   
mean   1.235572e+06  1.247218e+06  1.223781e+06  1.235772e+06  2.786047e+05   
std    2.936847e+05  2.962787e+05  2.925842e+05  2.949172e+05  1.427531e+05   
min    6.840000e+05  6.970000e+05  6.720000e+05  6.800000e+05  0.000000e+00   
25%    9.822500e+05  9.960000e+05  9.712500e+05  9.822500e+05  1.947250e+05   
50%    1.280000e+06  1.291000e+06  1.268000e+06  1.280000e+06  2.523500e+05   
75%    1.410000e+06  1.423000e+06  1.399000e+06  1.410000e+06  3.370000e+05   
max    2.110000e+06  2.134000e+06  2.094000e+06  2.128000e+06  1.276000e+06   

          Adj Close  
count  1.886000e+03  
mean   1.181318e+06  
std    3.017163e+05  
min    6.351751e+05  
25%    9.169498e+05  
50%    1.226299e+06  
75%    1.344629e+06  
max    2.128000e+06  
#check summary of qunaile score
print(data.quantile([.25,.5,.75,1]))
           Open       High        Low      Close     Volume    Adj Close
0.25   982250.0   996000.0   971250.0   982250.0   194725.0   916949.800
0.50  1280000.0  1291000.0  1268000.0  1280000.0   252350.0  1226299.135
0.75  1410000.0  1423000.0  1399000.0  1410000.0   337000.0  1344628.580
1.00  2110000.0  2134000.0  2094000.0  2128000.0  1276000.0  2128000.000

#check histogramme
(n, bins, patched) = plt.hist(data['Open'])
data['Open'].plot(kind='kde')
plt.axvline(data['Open'].mean(),color='red')
plt.show()

for index in range(len(n)):
 print("Bin : %0.f, Frequency = %0.f" % (bins[index],n[index]))




Bin : 684000, Frequency = 243
Bin : 826600, Frequency = 219
Bin : 969200, Frequency = 98
Bin : 1111800, Frequency = 265
Bin : 1254400, Frequency = 560
Bin : 1397000, Frequency = 306
Bin : 1539600, Frequency = 95
Bin : 1682200, Frequency = 29
Bin : 1824800, Frequency = 33
Bin : 1967400, Frequency = 38


#draw scatter_matrix without considering 'volume'
scatter_matrix(data[['Open','High','Low','Close']], alpha=0.2, figsize=(6, 6), diagonal='kde')

#draw  box plot
data[['Open','High','Low','Close','Adj Close']].plot(kind='box')
plt.show()

Investment

Coding

Daily Life

VIEWS

Follow by Email

03/05/2017

descriptive statistic in python

Share this

ForecasterJ

0 Comment to "descriptive statistic in python"

Post a Comment

Popular

Recommended

2019_12_Forecast Model_Performance_Presentation