03/05/2017

Correlation in python

#import functions and load dataset
#prerequisite : understanding of stationarity, (auto)covariance, (auto)correlation

import os,sys,datetime
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import matplotlib.pyplot as plt
from pandas.compat import range, lrange, lmap, map, zip
from pandas.tools.plotting import scatter_matrix,autocorrelation_plot

def load_stock_data(file_name):
df = pd.read_pickle(file_name)
return df

def get_autocorrelation_dataframe(series):
    def r(h):
        return ((data[:n - h] - mean) * (data[h:] - mean)).sum() / float(n) / c0

    n = len(series)
    data = np.asarray(series)

    mean = np.mean(data)
    c0 = np.sum((data - mean) ** 2) / float(n)

    x = np.arange(n) + 1
    y = lmap(r, x)

    df = pd.DataFrame(y, index=x)

    return df


df_samsung = load_stock_data('samsung_2010to2017.csv')
df_hyundai = load_stock_data('hyundai_2010to2017.csv')

# samsung correlation

df_samsung_corr = get_autocorrelation_dataframe(df_samsung['Close'])

print(df_samsung_corr)

             0
1     0.994718
2     0.989251
3     0.984319
4     0.979771
5     0.975310
6     0.970901
7     0.966464
8     0.961763


#covariance between samsung and hyundai

print(df_samsung['Close'].cov(df_hyundai['Close']))

print(df_samsung['Close'].corr(df_hyundai['Close']))

512188378.7
0.0408314240626

fig, axs = plt.subplots(2,1)
axs[1].xaxis.set_visible(False) 

df_samsung['Close'].plot(ax=axs[0])
df_samsung_corr[0].plot(kind='bar',ax=axs[1])

plt.show()

Share this

0 Comment to "Correlation in python"

Post a Comment