data_corr = data.corr()
# Set the threshold to select only highly correlated attributes
threshold = 0.5
# List of pairs along with correlation above threshold
corr_list = []
#Search for the highly correlated pairs
for i in range(0,size): #for 'size' features
for j in range(i+1,size): #avoid repetition
if (data_corr.iloc[i,j] >= threshold and data_corr.iloc[i,j] < 1) or (data_corr.iloc[i,j] < 0 and data_corr.iloc[i,j] <= -threshold):
corr_list.append([data_corr.iloc[i,j],i,j]) #store correlation and columns index
#Sort to show higher ones first
s_corr_list = sorted(corr_list,key=lambda x: -abs(x[0]))
#Print correlations and column names
for v,i,j in s_corr_list:
print ("%s and %s = %.2f" % (cols[i],cols[j],v))
# Correlation amongst top feautres and target
corr = df.corr()
abb = corr["target"].sort_values(ascending=False)[::5].index.values.tolist()
corr = corr[corr.index.isin(abb)]
corr = corr[abb]
corr.ix[:,0].abs().sort_values(ascending=False)
corr.shape
data_corr = data.corr()
# Set the threshold to select only highly correlated attributes
threshold = 0.5
# List of pairs along with correlation above threshold
corr_list = []
#Search for the highly correlated pairs
for i in range(0,size): #for 'size' features
for j in range(i+1,size): #avoid repetition
if (data_corr.iloc[i,j] >= threshold and data_corr.iloc[i,j] < 1) or (data_corr.iloc[i,j] < 0 and data_corr.iloc[i,j] <= -threshold):
corr_list.append([data_corr.iloc[i,j],i,j]) #store correlation and columns index
#Sort to show higher ones first
s_corr_list = sorted(corr_list,key=lambda x: -abs(x[0]))
#Print correlations and column names
for v,i,j in s_corr_list:
print ("%s and %s = %.2f" % (cols[i],cols[j],v))
#Search for the highly correlated pairs
for i in range(0,size): #for 'size' features
for j in range(i+1,size): #avoid repetition
if (data_corr.iloc[i,j] >= threshold and data_corr.iloc[i,j] < 1) or (data_corr.iloc[i,j] < 0 and data_corr.iloc[i,j] <= -threshold):
corr_list.append([data_corr.iloc[i,j],i,j]) #store correlation and columns index
#Sort to show higher ones first
s_corr_list = sorted(corr_list,key=lambda x: -abs(x[0]))
# Correlation amongst top feautres and target
corr = df.corr()
abb = corr["target"].sort_values(ascending=False)[::5].index.values.tolist()
corr = corr[corr.index.isin(abb)]
corr = corr[abb]
corr.ix[:,0].abs().sort_values(ascending=False)
corr.shape