import pandas as pd
import seaborn as sns
titanic = sns.load_dataset('titanic')
print(titanic.shape)
titanic.head(2)
(891, 15)
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
df = titanic.loc[:, ['age', 'sex', 'class', 'fare', 'survived']]
print(df.shape)
df.head()
(891, 5)
age | sex | class | fare | survived | |
---|---|---|---|---|---|
0 | 22.0 | male | Third | 7.2500 | 0 |
1 | 38.0 | female | First | 71.2833 | 1 |
2 | 26.0 | female | Third | 7.9250 | 1 |
3 | 35.0 | female | First | 53.1000 | 1 |
4 | 35.0 | male | Third | 8.0500 | 0 |
# class 열의 값들 확인
df['class'].value_counts()
Third 491 First 216 Second 184 Name: class, dtype: int64
# class 열의 값 기준으로 그룹화
grouped = df.groupby(['class'])
grouped
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000241F1D1AD08>
# 그룹별 row 수 확인
grouped.size()
class First 216 Second 184 Third 491 dtype: int64
# 그룹별 key 와 DataFrame 확인
for key, group in grouped:
print(f'key: {key}')
print(f'shape: {group.shape}')
display(group.head(2))
key: First shape: (216, 5)
age | sex | class | fare | survived | |
---|---|---|---|---|---|
1 | 38.0 | female | First | 71.2833 | 1 |
3 | 35.0 | female | First | 53.1000 | 1 |
key: Second shape: (184, 5)
age | sex | class | fare | survived | |
---|---|---|---|---|---|
9 | 14.0 | female | Second | 30.0708 | 1 |
15 | 55.0 | female | Second | 16.0000 | 1 |
key: Third shape: (491, 5)
age | sex | class | fare | survived | |
---|---|---|---|---|---|
0 | 22.0 | male | Third | 7.250 | 0 |
2 | 26.0 | female | Third | 7.925 | 1 |
# 그룹별로 각 열별 평균(mean) 구하기
grouped.mean()
age | fare | survived | |
---|---|---|---|
class | |||
First | 38.233441 | 84.154687 | 0.629630 |
Second | 29.877630 | 20.662183 | 0.472826 |
Third | 25.140620 | 13.675550 | 0.242363 |
# 그외 적용 가능한 판다스 기본 함수
# - mean(), max(), min(), sum(), count(), size()
# - var(), std(), describe(), info(), first(), last() 등
# 특정 그룹 확인
grouped.get_group('Third').head(2)
age | sex | class | fare | survived | |
---|---|---|---|---|---|
0 | 22.0 | male | Third | 7.250 | 0 |
2 | 26.0 | female | Third | 7.925 | 1 |