import pandas as pd
import seaborn as sns

titanic = sns.load_dataset('titanic')
print(titanic.shape)
titanic.head(2)

(891, 15)


df = titanic.loc[:, ['age', 'sex', 'class', 'fare', 'survived']]
print(df.shape)
df.head()

(891, 5)


# class 열의 값들 확인
df['class'].value_counts()

Third     491
First     216
Second    184
Name: class, dtype: int64


# class 열의 값 기준으로 그룹화
grouped = df.groupby(['class'])
grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000241F1D1AD08>


# 그룹별 row 수 확인
grouped.size()

class
First     216
Second    184
Third     491
dtype: int64


# 그룹별 key 와 DataFrame 확인
for key, group in grouped:
    print(f'key: {key}')
    print(f'shape: {group.shape}')
    display(group.head(2))

key: First
shape: (216, 5)

key: Second
shape: (184, 5)

key: Third
shape: (491, 5)


# 그룹별로 각 열별 평균(mean) 구하기
grouped.mean()


# 그외 적용 가능한 판다스 기본 함수
# - mean(), max(), min(), sum(), count(), size()
# - var(), std(), describe(), info(), first(), last() 등


# 특정 그룹 확인
grouped.get_group('Third').head(2)

	survived	pclass	sex	age	sibsp	parch	fare	embarked	class	who	adult_male	deck	embark_town	alive	alone
0	0	3	male	22.0	1	0	7.2500	S	Third	man	True	NaN	Southampton	no	False
1	1	1	female	38.0	1	0	71.2833	C	First	woman	False	C	Cherbourg	yes	False

	age	sex	class	fare	survived
0	22.0	male	Third	7.2500	0
1	38.0	female	First	71.2833	1
2	26.0	female	Third	7.9250	1
3	35.0	female	First	53.1000	1
4	35.0	male	Third	8.0500	0