import seaborn as sns
titanic = sns.load_dataset('titanic')
df = titanic.loc[:, ['age', 'sex', 'class', 'fare', 'survived']]
print(df.shape)
df.head()
(891, 5)
age | sex | class | fare | survived | |
---|---|---|---|---|---|
0 | 22.0 | male | Third | 7.2500 | 0 |
1 | 38.0 | female | First | 71.2833 | 1 |
2 | 26.0 | female | Third | 7.9250 | 1 |
3 | 35.0 | female | First | 53.1000 | 1 |
4 | 35.0 | male | Third | 8.0500 | 0 |
# 여러 열 기준으로 그룹화
grouped_two = df.groupby(['class', 'sex'])
grouped_two
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000028DD0426508>
# 그룹별 row 수 확인
grouped_two.size()
class sex First female 94 male 122 Second female 76 male 108 Third female 144 male 347 dtype: int64
# 그룹별 key 확인
grouped_two.groups.keys()
dict_keys([('First', 'female'), ('First', 'male'), ('Second', 'female'), ('Second', 'male'), ('Third', 'female'), ('Third', 'male')])
# 그룹별 key 와 DataFrame 의 shape 확인
for key, group in grouped_two:
print(f'key: {key}')
print(f'shape: {group.shape}')
# display(group.head(2))
print()
key: ('First', 'female') shape: (94, 5) key: ('First', 'male') shape: (122, 5) key: ('Second', 'female') shape: (76, 5) key: ('Second', 'male') shape: (108, 5) key: ('Third', 'female') shape: (144, 5) key: ('Third', 'male') shape: (347, 5)
# 특정 열들 골라서 다양한 연산 적용
grouped_two[['age', 'fare']].mean()
age | fare | ||
---|---|---|---|
class | sex | ||
First | female | 34.611765 | 106.125798 |
male | 41.281386 | 67.226127 | |
Second | female | 28.722973 | 21.970121 |
male | 30.740707 | 19.741782 | |
Third | female | 21.750000 | 16.118810 |
male | 26.507589 | 12.661633 |
# 그외 적용 가능한 판다스 기본 함수
# - mean(), max(), min(), sum(), count(), size()
# - var(), std(), describe(), info(), first(), last() 등
# reset_index() 도 적용
grouped_two[['age', 'fare']].mean().reset_index()
class | sex | age | fare | |
---|---|---|---|---|
0 | First | female | 34.611765 | 106.125798 |
1 | First | male | 41.281386 | 67.226127 |
2 | Second | female | 28.722973 | 21.970121 |
3 | Second | male | 30.740707 | 19.741782 |
4 | Third | female | 21.750000 | 16.118810 |
5 | Third | male | 26.507589 | 12.661633 |