import pandas as pd
import seaborn as sns
df = sns.load_dataset('titanic')
df.head(2)
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
df = df.loc[:, ['age', 'fare']]
df.head()
age | fare | |
---|---|---|
0 | 22.0 | 7.2500 |
1 | 38.0 | 71.2833 |
2 | 26.0 | 7.9250 |
3 | 35.0 | 53.1000 |
4 | 35.0 | 8.0500 |
def missing_value(series: pd.Series) -> pd.Series:
return series.isnull()
result = df.apply(missing_value, axis=0) # axis=0 은 기본 값이므로 생략 가능
result.head() # DataFrame 반환
age | fare | |
---|---|---|
0 | False | False |
1 | False | False |
2 | False | False |
3 | False | False |
4 | False | False |
def min_max(x: pd.Series) -> float:
return x.max() - x.min()
result = df.apply(min_max) # axis=0 은 기본 값이므로 생략 가능
result # Series 반환. 각 열이 Series의 인덱스가 됨
age 79.5800 fare 512.3292 dtype: float64