In [1]:
import seaborn as sns

titanic = sns.load_dataset('titanic')
df = titanic.loc[:, ['age', 'sex', 'class', 'fare', 'survived']]
print(df.shape)
df.head()
(891, 5)
Out[1]:
age sex class fare survived
0 22.0 male Third 7.2500 0
1 38.0 female First 71.2833 1
2 26.0 female Third 7.9250 1
3 35.0 female First 53.1000 1
4 35.0 male Third 8.0500 0
In [2]:
# 여러 열 기준으로 그룹화
grouped_two = df.groupby(['class', 'sex'])
grouped_two
Out[2]:
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000028DD0426508>
In [3]:
# 그룹별 row 수 확인
grouped_two.size()
Out[3]:
class   sex   
First   female     94
        male      122
Second  female     76
        male      108
Third   female    144
        male      347
dtype: int64
In [4]:
# 그룹별 key 확인
grouped_two.groups.keys()
Out[4]:
dict_keys([('First', 'female'), ('First', 'male'), ('Second', 'female'), ('Second', 'male'), ('Third', 'female'), ('Third', 'male')])
In [5]:
# 그룹별 key 와 DataFrame 의 shape 확인
for key, group in grouped_two:
    print(f'key: {key}')
    print(f'shape: {group.shape}')
    # display(group.head(2))
    print()
key: ('First', 'female')
shape: (94, 5)

key: ('First', 'male')
shape: (122, 5)

key: ('Second', 'female')
shape: (76, 5)

key: ('Second', 'male')
shape: (108, 5)

key: ('Third', 'female')
shape: (144, 5)

key: ('Third', 'male')
shape: (347, 5)

In [6]:
# 특정 열들 골라서 다양한 연산 적용
grouped_two[['age', 'fare']].mean()
Out[6]:
age fare
class sex
First female 34.611765 106.125798
male 41.281386 67.226127
Second female 28.722973 21.970121
male 30.740707 19.741782
Third female 21.750000 16.118810
male 26.507589 12.661633
In [9]:
# 그외 적용 가능한 판다스 기본 함수
# - mean(), max(), min(), sum(), count(), size()
# - var(), std(), describe(), info(), first(), last() 등
In [10]:
# reset_index() 도 적용
grouped_two[['age', 'fare']].mean().reset_index()
Out[10]:
class sex age fare
0 First female 34.611765 106.125798
1 First male 41.281386 67.226127
2 Second female 28.722973 21.970121
3 Second male 30.740707 19.741782
4 Third female 21.750000 16.118810
5 Third male 26.507589 12.661633
In [ ]: