Dropbox Paper

Table Exploration

Please sign in to edit freely, fast sign-up with Google an option.

June 2018

Feel free to add your tips and tricks, anything useful/semi-useful will have a home here while being periodically updated and improved by users.

Table of Strong Collections

data_corr = data.corr()

# Set the threshold to select only highly correlated attributes

threshold = 0.5

# List of pairs along with correlation above threshold

corr_list = []

#Search for the highly correlated pairs

for i in range(0,size): #for 'size' features

for j in range(i+1,size): #avoid repetition

        if (data_corr.iloc[i,j] >= threshold and data_corr.iloc[i,j] < 1) or (data_corr.iloc[i,j] < 0 and data_corr.iloc[i,j] <= -threshold):

corr_list.append([data_corr.iloc[i,j],i,j]) #store correlation and columns index

#Sort to show higher ones first

s_corr_list = sorted(corr_list,key=lambda x: -abs(x[0]))

#Print correlations and column names

for v,i,j in s_corr_list:

print ("%s and %s = %.2f" % (cols[i],cols[j],v))

Search for Highly Correlated Pairs

#Search for the highly correlated pairs

for i in range(0,size): #for 'size' features

for j in range(i+1,size): #avoid repetition

        if (data_corr.iloc[i,j] >= threshold and data_corr.iloc[i,j] < 1) or (data_corr.iloc[i,j] < 0 and data_corr.iloc[i,j] <= -threshold):

corr_list.append([data_corr.iloc[i,j],i,j]) #store correlation and columns index

#Sort to show higher ones first

s_corr_list = sorted(corr_list,key=lambda x: -abs(x[0]))

Correlation with Target

# Correlation amongst top feautres and target

corr = df.corr()

abb = corr["target"].sort_values(ascending=False)[::5].index.values.tolist()

corr = corr[corr.index.isin(abb)]

corr = corr[abb]

corr.ix[:,0].abs().sort_values(ascending=False)

corr.shape

Classification Report