diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..ee22e7c Binary files /dev/null and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_missing_value/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..a531f76 Binary files /dev/null and b/q01_missing_value/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_missing_value/__pycache__/build.cpython-36.pyc b/q01_missing_value/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..cde5d94 Binary files /dev/null and b/q01_missing_value/__pycache__/build.cpython-36.pyc differ diff --git a/q01_missing_value/build.py b/q01_missing_value/build.py index 7dc4d18..0caeecf 100644 --- a/q01_missing_value/build.py +++ b/q01_missing_value/build.py @@ -1,3 +1,4 @@ +# %load q01_missing_value/build.py # Default imports import pandas as pd @@ -5,6 +6,13 @@ ny_housing = pd.read_csv('data/train.csv') # Selecting 4 most relevant variables along with target variable from the dataset fot the Cleaning and Preprocessing. housing_data = ny_housing[['MasVnrArea', 'GrLivArea', 'LotShape', 'GarageType', 'SalePrice']] +def imputation(housing_data): + housing_data1=housing_data.loc[:,['MasVnrArea', 'GrLivArea']] + total =housing_data1.loc[:,['MasVnrArea','GrLivArea']].notnull() + total1 =total.notna() + housing_data2=housing_data.loc[:,['LotShape', 'GarageType']] + total2=housing_data2.loc[:,['LotShape', 'GarageType']].notnull() + total3 =total2.notna() + return(total1,total3) -# Write your code here: diff --git a/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc b/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..c8cc27e Binary files /dev/null and b/q01_missing_value/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc b/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc new file mode 100644 index 0000000..57c2205 Binary files /dev/null and b/q01_missing_value/tests/__pycache__/test_q01_imputation.cpython-36.pyc differ diff --git a/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..522a44e Binary files /dev/null and b/q02_outlier_removal/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_outlier_removal/__pycache__/build.cpython-36.pyc b/q02_outlier_removal/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..ffc3805 Binary files /dev/null and b/q02_outlier_removal/__pycache__/build.cpython-36.pyc differ diff --git a/q02_outlier_removal/build.py b/q02_outlier_removal/build.py index 74df5f2..b8a9301 100644 --- a/q02_outlier_removal/build.py +++ b/q02_outlier_removal/build.py @@ -1,10 +1,21 @@ +# %load q02_outlier_removal/build.py # Default imports +import matplotlib.pyplot as plt +import seaborn as sns import pandas as pd - +import numpy as np # Data ny_housing = pd.read_csv('data/train.csv') # Selecting 4 most relevant variables from the dataset fot the Cleaning and Preprocessing. housing_data = ny_housing[['MasVnrArea', 'GrLivArea', 'LotShape', 'GarageType', 'SalePrice']] +#housing_data.describe() +def outlier_removal(housing_data): + housing_data1=housing_data[housing_data.loc[:,['MasVnrArea','GrLivArea','LotShape','GarageType','SalePrice']].notnull()] + housing_data1=housing_data1.dropna() + housing_data2=housing_data1.sort_values(['MasVnrArea', 'GrLivArea','SalePrice']) + upper_quartile = np.percentile(housing_data2['SalePrice'],95) + h2=(housing_data2[housing_data2['SalePrice']