Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added __pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file not shown.
Binary file added q01_missing_value/__pycache__/build.cpython-36.pyc
Binary file not shown.
28 changes: 28 additions & 0 deletions q01_missing_value/build.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# %load q01_missing_value/build.py
# Default imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import Imputer


# Data loading
ny_housing = pd.read_csv('data/train.csv')
Expand All @@ -8,3 +12,27 @@


# Write your code here:
def imputation(housing_data):
''' Function to impute numerical data with mean
impute categorical data with mode '''
#Seperate numercial and categorical columns
num_col = housing_data._get_numeric_data().columns
cat_col = housing_data.columns.difference(num_col)

#impute numerical data
imput_num = Imputer(missing_values=np.NaN , strategy= 'mean')
df_num = imput_num.fit_transform(housing_data.loc[: , num_col])
df_num = pd.DataFrame(df_num)
df_num.columns = num_col

#impute categorial data
for col in cat_col:
housing_data.loc[: , col].fillna(housing_data.loc[:,col].mode()[0] , inplace = True)
df_cat = housing_data.loc[:,cat_col]

return df_num,df_cat

imputation(housing_data)



Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added q02_outlier_removal/__pycache__/build.cpython-36.pyc
Binary file not shown.
19 changes: 19 additions & 0 deletions q02_outlier_removal/build.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# %load q02_outlier_removal/build.py
# Default imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')


# Data
ny_housing = pd.read_csv('data/train.csv')
Expand All @@ -8,3 +17,13 @@


# Write your code here:
def outlier_removal(housing_data):
num_col = housing_data._get_numeric_data().columns
for col in num_col:
housing_data = housing_data.drop(housing_data[housing_data[col] > housing_data[col].quantile(0.95)].index)
return housing_data

outlier_removal(housing_data)
print(housing_data.shape)


Binary file not shown.
Binary file not shown.
Binary file added q03_skewness_log/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added q03_skewness_log/__pycache__/build.cpython-36.pyc
Binary file not shown.
7 changes: 7 additions & 0 deletions q03_skewness_log/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q03_skewness_log/build.py
from scipy.stats import skew
import pandas as pd
import numpy as np
Expand All @@ -6,3 +7,9 @@


# Write code here:
def skewness_log(data):
return skew(np.log(data['GrLivArea'])), skew(np.log(data['SalePrice']))

skewness_log(data)


Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added q03_skewness_sqrt/__pycache__/build.cpython-36.pyc
Binary file not shown.
10 changes: 10 additions & 0 deletions q03_skewness_sqrt/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q03_skewness_sqrt/build.py
# Default imports
from scipy.stats import skew
import pandas as pd
Expand All @@ -8,3 +9,12 @@

# Write your Solution Here:

def skewness_sqrt(ny_housing):
skewed_sqrt_val2 = skew(np.sqrt(ny_housing['SalePrice']))
skewed_sqrt_val1 = skew(np.sqrt(ny_housing['GrLivArea']))
return skewed_sqrt_val1,skewed_sqrt_val2

skewness_sqrt(ny_housing)



Binary file not shown.
Binary file not shown.