import pandas as pd
import seaborn as sns
df = sns.load_dataset('titanic')
df.head(2)
| survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
| 1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
df = df.loc[:, ['age', 'fare']]
df.head()
| age | fare | |
|---|---|---|
| 0 | 22.0 | 7.2500 |
| 1 | 38.0 | 71.2833 |
| 2 | 26.0 | 7.9250 |
| 3 | 35.0 | 53.1000 |
| 4 | 35.0 | 8.0500 |
def missing_value(series: pd.Series) -> pd.Series:
return series.isnull()
result = df.apply(missing_value, axis=0) # axis=0 은 기본 값이므로 생략 가능
result.head() # DataFrame 반환
| age | fare | |
|---|---|---|
| 0 | False | False |
| 1 | False | False |
| 2 | False | False |
| 3 | False | False |
| 4 | False | False |
def min_max(x: pd.Series) -> float:
return x.max() - x.min()
result = df.apply(min_max) # axis=0 은 기본 값이므로 생략 가능
result # Series 반환. 각 열이 Series의 인덱스가 됨
age 79.5800 fare 512.3292 dtype: float64