df.date = pd.to_datetime(df.date, format='%d.%m.%Y')
df['day'] = df.date.map(lambda x: x.day)
df_sep14 = df[(df['year'] == 2014) & (df['month'] == 9)]
df_sep14['revenue'] = df['item_price'] * df['item_cnt_day']
# goupbyオブジェクトにsumすると合計が出る
df_sep14_grouped = df_sep14.groupby('shop_id')
df_sep14_grouped.sum()
# in は isin
df_summer14 = df[(df['year'] == 2014) & (df['month'].isin([6, 7, 8]))]
# count distinct
nu = df['state'].nunique()
upperbound, lowerbound = np.percentile(x, [1, 99])
y = np.clip(x, upperbound, lowerbound)
scipy.stats.rankdata
WEEK 1
MLの基礎
pandas
df.date = pd.to_datetime(df.date, format='%d.%m.%Y')
df['day'] = df.date.map(lambda x: x.day)
df_sep14 = df[(df['year'] == 2014) & (df['month'] == 9)]
df_sep14['revenue'] = df['item_price'] * df['item_cnt_day']
# goupbyオブジェクトにsumすると合計が出る
df_sep14_grouped = df_sep14.groupby('shop_id')
df_sep14_grouped.sum()
# in は isin
df_summer14 = df[(df['year'] == 2014) & (df['month'].isin([6, 7, 8]))]
# count distinct
nu = df['state'].nunique()
数値データの前処理
upperbound, lowerbound = np.percentile(x, [1, 99])
y = np.clip(x, upperbound, lowerbound)
scipy.stats.rankdata