#prerequisite : understanding of stationarity, (auto)covariance, (auto)correlation
import os,sys,datetime
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from pandas.compat import range, lrange, lmap, map, zip
from pandas.tools.plotting import scatter_matrix,autocorrelation_plot
def load_stock_data(file_name):
df = pd.read_pickle(file_name)
return df
def get_autocorrelation_dataframe(series):
def r(h):
return ((data[:n - h] - mean) * (data[h:] - mean)).sum() / float(n) / c0
n = len(series)
data = np.asarray(series)
mean = np.mean(data)
c0 = np.sum((data - mean) ** 2) / float(n)
x = np.arange(n) + 1
y = lmap(r, x)
df = pd.DataFrame(y, index=x)
return df
df_samsung = load_stock_data('samsung_2010to2017.csv')
df_hyundai = load_stock_data('hyundai_2010to2017.csv')
# samsung correlation
df_samsung_corr = get_autocorrelation_dataframe(df_samsung['Close'])
print(df_samsung_corr)
0
1 0.994718
2 0.989251
3 0.984319
4 0.979771
5 0.975310
6 0.970901
7 0.966464
8 0.961763
#covariance between samsung and hyundai print(df_samsung['Close'].cov(df_hyundai['Close'])) print(df_samsung['Close'].corr(df_hyundai['Close']))
512188378.7 0.0408314240626
fig, axs = plt.subplots(2,1) axs[1].xaxis.set_visible(False) df_samsung['Close'].plot(ax=axs[0]) df_samsung_corr[0].plot(kind='bar',ax=axs[1]) plt.show()
0 Comment to "Correlation in python"
Post a Comment