diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..2ad1e31 Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_missing_value/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..36e7358 Binary files /dev/null and b/q01_missing_value/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_missing_value/__pycache__/build.cpython-36.pyc b/q01_missing_value/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..b36b6e1 Binary files /dev/null and b/q01_missing_value/__pycache__/build.cpython-36.pyc differ diff --git a/q01_missing_value/build.py b/q01_missing_value/build.py index 7dc4d18..e7f1f87 100644 --- a/q01_missing_value/build.py +++ b/q01_missing_value/build.py @@ -1,3 +1,4 @@ +# %load q01_missing_value/build.py # Default imports import pandas as pd @@ -7,4 +8,29 @@ housing_data = ny_housing[['MasVnrArea', 'GrLivArea', 'LotShape', 'GarageType', 'SalePrice']] + # Write your code here: +def imputation(dataset): + + #housing_data.isnull().sum(axis=0) Gives missing values count before imputing values. + all_columns = dataset.columns + numeric_columns = dataset._get_numeric_data().columns + categoric_columns = list(set(all_columns)-set(numeric_columns)) + + for num_col in numeric_columns: + dataset[num_col].fillna(dataset[num_col].mean(),inplace=True) + + + for cat_col in categoric_columns: + dataset[cat_col].fillna(dataset[cat_col].mode()[0],inplace=True) + + #housing_data.isnull().sum(axis=0) Gives missing values count post imputing values. + return pd.DataFrame(dataset[numeric_columns]),pd.DataFrame(dataset[categoric_columns]) + + +#Call to the function +x,y = imputation(housing_data) +y + + + diff --git a/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..c8c38fa Binary files /dev/null and b/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc b/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc new file mode 100644 index 0000000..319d3e1 Binary files /dev/null and b/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc differ diff --git a/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..bc40732 Binary files /dev/null and b/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_outlier_removal/__pycache__/build.cpython-36.pyc b/q02_outlier_removal/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..30247ad Binary files /dev/null and b/q02_outlier_removal/__pycache__/build.cpython-36.pyc differ diff --git a/q02_outlier_removal/build.py b/q02_outlier_removal/build.py index 74df5f2..c5308fc 100644 --- a/q02_outlier_removal/build.py +++ b/q02_outlier_removal/build.py @@ -1,3 +1,4 @@ +# %load q02_outlier_removal/build.py # Default imports import pandas as pd @@ -8,3 +9,23 @@ # Write your code here: +def outlier_removal(dataset): + + #Dropping Null value rows + dataset = dataset.dropna() + + #Variable to store quantile value + Target_95_quantile = dataset.iloc[:,-1].quantile(0.95) + + #Taking values less than or equal to 95th quantile cutoff for SalePrice. + #i.e. indirectly filtering out values greater than o.95 quantile + dataset=dataset[dataset.iloc[:,-1]<=Target_95_quantile] + + return dataset + + +#Call to the function +outlier_removal(housing_data) + + + diff --git a/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..c5dd28b Binary files /dev/null and b/q02_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_outlier_removal/tests/__pycache__/test_q02_outlier_removal.cpython-36.pyc b/q02_outlier_removal/tests/__pycache__/test_q02_outlier_removal.cpython-36.pyc new file mode 100644 index 0000000..a9d743b Binary files /dev/null and b/q02_outlier_removal/tests/__pycache__/test_q02_outlier_removal.cpython-36.pyc differ diff --git a/q03_skewness_log/__pycache__/__init__.cpython-36.pyc b/q03_skewness_log/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..06ef712 Binary files /dev/null and b/q03_skewness_log/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_skewness_log/__pycache__/build.cpython-36.pyc b/q03_skewness_log/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..4c8058f Binary files /dev/null and b/q03_skewness_log/__pycache__/build.cpython-36.pyc differ diff --git a/q03_skewness_log/build.py b/q03_skewness_log/build.py index f008d0f..c74cf42 100644 --- a/q03_skewness_log/build.py +++ b/q03_skewness_log/build.py @@ -1,3 +1,4 @@ +# %load q03_skewness_log/build.py from scipy.stats import skew import pandas as pd import numpy as np @@ -6,3 +7,13 @@ # Write code here: +def skewness_log(data): + + data['log_SalePrice']=np.log(data['SalePrice']) + data['log_GrLivArea']=np.log(data['GrLivArea']) + + return skew(data['log_GrLivArea']),skew(data['log_SalePrice']) +skewness_log(data) + + + diff --git a/q03_skewness_log/tests/__pycache__/__init__.cpython-36.pyc b/q03_skewness_log/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..e1f6689 Binary files /dev/null and b/q03_skewness_log/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_skewness_log/tests/__pycache__/test_q03_skewness_log.cpython-36.pyc b/q03_skewness_log/tests/__pycache__/test_q03_skewness_log.cpython-36.pyc new file mode 100644 index 0000000..26a877b Binary files /dev/null and b/q03_skewness_log/tests/__pycache__/test_q03_skewness_log.cpython-36.pyc differ diff --git a/q03_skewness_sqrt/__pycache__/__init__.cpython-36.pyc b/q03_skewness_sqrt/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..b1fea3a Binary files /dev/null and b/q03_skewness_sqrt/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_skewness_sqrt/__pycache__/build.cpython-36.pyc b/q03_skewness_sqrt/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..d08b5a4 Binary files /dev/null and b/q03_skewness_sqrt/__pycache__/build.cpython-36.pyc differ diff --git a/q03_skewness_sqrt/build.py b/q03_skewness_sqrt/build.py index 4bdb0e4..0bfb9f3 100644 --- a/q03_skewness_sqrt/build.py +++ b/q03_skewness_sqrt/build.py @@ -1,3 +1,4 @@ +# %load q03_skewness_sqrt/build.py # Default imports from scipy.stats import skew import pandas as pd @@ -7,4 +8,13 @@ # Write your Solution Here: +def skewness_sqrt(data): + data['sqrt_SalePrice']=np.sqrt(data['SalePrice']) + data['sqrt_GrLivArea']=np.sqrt(data['GrLivArea']) + + return skew(data['sqrt_GrLivArea']),skew(data['sqrt_SalePrice']) + + +#Call to the function +skewness_sqrt(ny_housing) diff --git a/q03_skewness_sqrt/tests/__pycache__/__init__.cpython-36.pyc b/q03_skewness_sqrt/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..4c1833b Binary files /dev/null and b/q03_skewness_sqrt/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_skewness_sqrt/tests/__pycache__/test-q03_skewness_sqrt.cpython-36.pyc b/q03_skewness_sqrt/tests/__pycache__/test-q03_skewness_sqrt.cpython-36.pyc new file mode 100644 index 0000000..cebd67b Binary files /dev/null and b/q03_skewness_sqrt/tests/__pycache__/test-q03_skewness_sqrt.cpython-36.pyc differ diff --git a/q04_encoding/__pycache__/__init__.cpython-36.pyc b/q04_encoding/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..9c3ff2e Binary files /dev/null and b/q04_encoding/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_encoding/__pycache__/build.cpython-36.pyc b/q04_encoding/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..e94ede6 Binary files /dev/null and b/q04_encoding/__pycache__/build.cpython-36.pyc differ diff --git a/q04_encoding/build.py b/q04_encoding/build.py index a52c57f..98b8476 100644 --- a/q04_encoding/build.py +++ b/q04_encoding/build.py @@ -1,3 +1,4 @@ +# %load q04_encoding/build.py # Default imports import pandas as pd from sklearn.preprocessing import LabelEncoder @@ -7,4 +8,22 @@ # Write your code here: +def encoding(dataset): + + #Label Encoding for LotShape. + label_encoder = LabelEncoder() + dataset['LotShape_Label'] = label_encoder.fit_transform(dataset['LotShape']) + + #One Hot Encoding for GarageType + One_Hot_Encoding_Columns = pd.get_dummies(dataset['GarageType'], drop_first=True) + + #Concatenating the encoding columns to actual dataframe. + dataset = pd.concat([dataset, One_Hot_Encoding_Columns],axis=1) + + return dataset + + +#Call to the actual function. +encoding(housing_data) + diff --git a/q04_encoding/tests/__pycache__/__init__.cpython-36.pyc b/q04_encoding/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..4e005e7 Binary files /dev/null and b/q04_encoding/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_encoding/tests/__pycache__/test_q04_encoding.cpython-36.pyc b/q04_encoding/tests/__pycache__/test_q04_encoding.cpython-36.pyc new file mode 100644 index 0000000..56ceed9 Binary files /dev/null and b/q04_encoding/tests/__pycache__/test_q04_encoding.cpython-36.pyc differ