diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..db1f765 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_missing_value/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..fe24846 Binary files /dev/null and b/q01_missing_value/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_missing_value/__pycache__/build.cpython-36.pyc b/q01_missing_value/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..1a6f588 Binary files /dev/null and b/q01_missing_value/__pycache__/build.cpython-36.pyc differ diff --git a/q01_missing_value/build.py b/q01_missing_value/build.py index 7dc4d18..f146876 100644 --- a/q01_missing_value/build.py +++ b/q01_missing_value/build.py @@ -1,3 +1,4 @@ +# %load q01_missing_value/build.py # Default imports import pandas as pd @@ -6,5 +7,24 @@ # Selecting 4 most relevant variables along with target variable from the dataset fot the Cleaning and Preprocessing. housing_data = ny_housing[['MasVnrArea', 'GrLivArea', 'LotShape', 'GarageType', 'SalePrice']] - +df = housing_data.copy() +df1 =df.select_dtypes(include=['int','float']) # Write your code here: + +def imputation(df): + df1=df.select_dtypes(include=['int','float']) + df2=df.select_dtypes(include=['object']) + df2['LotShape']=df2['LotShape'].fillna(df2['LotShape'].mode().index[0]) + df2['GarageType']=df2['GarageType'].fillna(df2['GarageType'].value_counts().index[0]) + for col in df1.columns: + df1[col] = df1[col].fillna(df1[col].mean()) + + return df1,df2 + + + + + + + + diff --git a/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..4d990b2 Binary files /dev/null and b/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc b/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc new file mode 100644 index 0000000..86df0ed Binary files /dev/null and b/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc differ diff --git a/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..75d97f2 Binary files /dev/null and b/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_outlier_removal/__pycache__/build.cpython-36.pyc b/q02_outlier_removal/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..fa64036 Binary files /dev/null and b/q02_outlier_removal/__pycache__/build.cpython-36.pyc differ diff --git a/q02_outlier_removal/build.py b/q02_outlier_removal/build.py index 74df5f2..9a7f32f 100644 --- a/q02_outlier_removal/build.py +++ b/q02_outlier_removal/build.py @@ -1,3 +1,4 @@ +# %load q02_outlier_removal/build.py # Default imports import pandas as pd @@ -8,3 +9,19 @@ # Write your code here: +def outlier_removal(df): + #quantile_value = 0.95 + t = dict(df.quantile(.95)) + for item in t: + df = df.drop(df[df[item] > t[item]].index) + return df + + + + + + + + + + diff --git a/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..6f77271 Binary files /dev/null and b/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_outlier_removal/tests/__pycache__/test_q02_outlier_removal.cpython-36.pyc b/q02_outlier_removal/tests/__pycache__/test_q02_outlier_removal.cpython-36.pyc new file mode 100644 index 0000000..bf08b9f Binary files /dev/null and b/q02_outlier_removal/tests/__pycache__/test_q02_outlier_removal.cpython-36.pyc differ diff --git a/q03_skewness_log/__pycache__/__init__.cpython-36.pyc b/q03_skewness_log/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..4fed81d Binary files /dev/null and b/q03_skewness_log/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_skewness_log/__pycache__/build.cpython-36.pyc b/q03_skewness_log/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..ccef3a6 Binary files /dev/null and b/q03_skewness_log/__pycache__/build.cpython-36.pyc differ diff --git a/q03_skewness_log/build.py b/q03_skewness_log/build.py index f008d0f..241c65e 100644 --- a/q03_skewness_log/build.py +++ b/q03_skewness_log/build.py @@ -1,3 +1,4 @@ +# %load q03_skewness_log/build.py from scipy.stats import skew import pandas as pd import numpy as np @@ -6,3 +7,13 @@ # Write code here: +def skewness_log(data): + data['GrLivArea'] = np.log(data['GrLivArea']) + data['SalePrice'] = np.log(data['SalePrice']) + sk_val1 = skew(data['GrLivArea']) + sk_val2 = skew(data['SalePrice']) + return sk_val1, sk_val2 + + + + diff --git a/q03_skewness_log/tests/__pycache__/__init__.cpython-36.pyc b/q03_skewness_log/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..6286ed2 Binary files /dev/null and b/q03_skewness_log/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_skewness_log/tests/__pycache__/test_q03_skewness_log.cpython-36.pyc b/q03_skewness_log/tests/__pycache__/test_q03_skewness_log.cpython-36.pyc new file mode 100644 index 0000000..1b34065 Binary files /dev/null and b/q03_skewness_log/tests/__pycache__/test_q03_skewness_log.cpython-36.pyc differ diff --git a/q03_skewness_sqrt/__pycache__/__init__.cpython-36.pyc b/q03_skewness_sqrt/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..1d33499 Binary files /dev/null and b/q03_skewness_sqrt/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_skewness_sqrt/__pycache__/build.cpython-36.pyc b/q03_skewness_sqrt/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..6147347 Binary files /dev/null and b/q03_skewness_sqrt/__pycache__/build.cpython-36.pyc differ diff --git a/q03_skewness_sqrt/build.py b/q03_skewness_sqrt/build.py index 4bdb0e4..8189147 100644 --- a/q03_skewness_sqrt/build.py +++ b/q03_skewness_sqrt/build.py @@ -1,3 +1,4 @@ +# %load q03_skewness_sqrt/build.py # Default imports from scipy.stats import skew import pandas as pd @@ -7,4 +8,16 @@ # Write your Solution Here: +def skewness_sqrt(data): + data['GrLivArea'] = np.sqrt(data['GrLivArea']) + data['SalePrice'] = np.sqrt(data['SalePrice']) + sk_val1 = skew(data['GrLivArea']) + sk_val2 = skew(data['SalePrice']) + return sk_val1, sk_val2 + + + + + + diff --git a/q03_skewness_sqrt/tests/__pycache__/__init__.cpython-36.pyc b/q03_skewness_sqrt/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..e4c87fb Binary files /dev/null and b/q03_skewness_sqrt/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_skewness_sqrt/tests/__pycache__/test-q03_skewness_sqrt.cpython-36.pyc b/q03_skewness_sqrt/tests/__pycache__/test-q03_skewness_sqrt.cpython-36.pyc new file mode 100644 index 0000000..3bf52a2 Binary files /dev/null and b/q03_skewness_sqrt/tests/__pycache__/test-q03_skewness_sqrt.cpython-36.pyc differ diff --git a/q04_encoding/__pycache__/__init__.cpython-36.pyc b/q04_encoding/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..21acdfd Binary files /dev/null and b/q04_encoding/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_encoding/__pycache__/build.cpython-36.pyc b/q04_encoding/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..844d219 Binary files /dev/null and b/q04_encoding/__pycache__/build.cpython-36.pyc differ diff --git a/q04_encoding/build.py b/q04_encoding/build.py index a52c57f..b9d5911 100644 --- a/q04_encoding/build.py +++ b/q04_encoding/build.py @@ -1,3 +1,4 @@ +# %load q04_encoding/build.py # Default imports import pandas as pd from sklearn.preprocessing import LabelEncoder @@ -8,3 +9,23 @@ # Write your code here: +def encoding(dataset): + le = LabelEncoder() + dataset['LotShape'] = dataset['LotShape'].fillna(dataset['LotShape'].mode()[0]) + dataset['GarageType'] = dataset['GarageType'].fillna(dataset['GarageType'].mode()[0]) + dataset['LotShape_Label'] = le.fit_transform(dataset['LotShape']) + df_dummy = pd.get_dummies(dataset['GarageType']) + dataset = dataset.drop(['GarageType'],1) + dataset = pd.concat([dataset,df_dummy],1) + return dataset + + + + + + + + + + + diff --git a/q04_encoding/tests/__pycache__/__init__.cpython-36.pyc b/q04_encoding/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..8db3d32 Binary files /dev/null and b/q04_encoding/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_encoding/tests/__pycache__/test_q04_encoding.cpython-36.pyc b/q04_encoding/tests/__pycache__/test_q04_encoding.cpython-36.pyc new file mode 100644 index 0000000..78a532b Binary files /dev/null and b/q04_encoding/tests/__pycache__/test_q04_encoding.cpython-36.pyc differ