diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index ebbd53a..5e39e74 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 745b533..59d1d24 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 108e4a3..1ae7450 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index e4cd8e3..1b6f17d 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,10 +1,17 @@ -# Default imports import pandas as pd from sklearn.model_selection import train_test_split - path = 'data/house_prices_multivariate.csv' +def load_data(path, test_size=0.33, randomst=9): + df = pd.read_csv(path) + X = df.iloc[:,:-1] + y = df['SalePrice'] + X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=randomst, train_size=0.67) + + return df, X_train, X_test, y_train, y_test + + + -# Write your solution here diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 133357e..2c6b4ce 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc index 689755b..a8bac7c 100644 Binary files a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc index 93c9119..3ca4e50 100644 Binary files a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc index 2b7cfd4..f23e901 100644 Binary files a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc differ diff --git a/q02_Max_important_feature/build.py b/q02_Max_important_feature/build.py index 51fbde6..07f4ffc 100644 --- a/q02_Max_important_feature/build.py +++ b/q02_Max_important_feature/build.py @@ -1,8 +1,18 @@ -# Default imports +import pandas as pd +import numpy as np from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -# We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +def Max_important_feature(data_set, target_variable='SalePrice', n=4): + + cols = data_set.corr().nlargest(n+1, target_variable)[target_variable].drop([target_variable]).index + + cm = data_set[cols].corr() + + return cm + +Max_important_feature(data_set, 'SalePrice') + + -# Write your code here diff --git a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc index cec58d4..8a6f000 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc index cb6849b..4b8a060 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/__pycache__/__init__.cpython-36.pyc index aa42922..478bd96 100644 Binary files a/q03_polynomial/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/build.cpython-36.pyc b/q03_polynomial/__pycache__/build.cpython-36.pyc index 3be41d0..285d42d 100644 Binary files a/q03_polynomial/__pycache__/build.cpython-36.pyc and b/q03_polynomial/__pycache__/build.cpython-36.pyc differ diff --git a/q03_polynomial/build.py b/q03_polynomial/build.py index 26d8971..ba09723 100644 --- a/q03_polynomial/build.py +++ b/q03_polynomial/build.py @@ -1,11 +1,21 @@ -# Default imports +import pandas as pd +import numpy as np from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline from sklearn.linear_model import LinearRegression -# We have already loaded the data for you -data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv', 0.33, 9) + +def polynomial(power = 5, randomst=9): + cols = data_set.corr().nlargest(5, 'SalePrice')['SalePrice'].index + poly_model = make_pipeline(PolynomialFeatures(degree=power, include_bias=False), LinearRegression()) + poly_model.fit(X_train[cols[1:]], y_train) + + return poly_model + +polynomial() + + -# Write your solution here diff --git a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc index 6e20876..d0d6573 100644 Binary files a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc index ef8c88b..6f6e67b 100644 Binary files a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/__init__.cpython-36.pyc b/q04_ridge/__pycache__/__init__.cpython-36.pyc index 4342136..dd2d346 100644 Binary files a/q04_ridge/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/build.cpython-36.pyc b/q04_ridge/__pycache__/build.cpython-36.pyc index ea08c01..0c3ba91 100644 Binary files a/q04_ridge/__pycache__/build.cpython-36.pyc and b/q04_ridge/__pycache__/build.cpython-36.pyc differ diff --git a/q04_ridge/build.py b/q04_ridge/build.py index 9ee00b1..b7c7867 100644 --- a/q04_ridge/build.py +++ b/q04_ridge/build.py @@ -1,15 +1,28 @@ -# Default imports from sklearn.linear_model import Ridge import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) -# We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + +def ridge(al=0.01): + + ridge_model = Ridge(alpha=al, normalize=True, random_state=9) + ridge_model.fit(X_train, y_train) + + y_pred_train = ridge_model.predict(X_train) + y_pred_test = ridge_model.predict(X_test) + + rmse_train = mean_squared_error(y_train, y_pred_train)**0.5 + rmse_test = mean_squared_error(y_test, y_pred_test)**0.5 + + return rmse_train, rmse_test, ridge_model + +ridge(0.01) + -# Write your solution here diff --git a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc index 6d021b5..bd4ea53 100644 Binary files a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc index 0549421..aed97f3 100644 Binary files a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc and b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/__init__.cpython-36.pyc b/q05_lasso/__pycache__/__init__.cpython-36.pyc index 1005306..628f927 100644 Binary files a/q05_lasso/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/build.cpython-36.pyc b/q05_lasso/__pycache__/build.cpython-36.pyc index b4ea629..dc3169f 100644 Binary files a/q05_lasso/__pycache__/build.cpython-36.pyc and b/q05_lasso/__pycache__/build.cpython-36.pyc differ diff --git a/q05_lasso/build.py b/q05_lasso/build.py index fb30d50..d8a601c 100644 --- a/q05_lasso/build.py +++ b/q05_lasso/build.py @@ -1,14 +1,28 @@ -# Default imports from sklearn.linear_model import Lasso import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + +def lasso(al=0.01): + + lasso_model = Lasso(alpha=al, normalize=True, random_state=9) + lasso_model.fit(X_train, y_train) + + y_pred_train = lasso_model.predict(X_train) + y_pred_test = lasso_model.predict(X_test) + + rmse_train = mean_squared_error(y_train, y_pred_train)**0.5 + rmse_test = mean_squared_error(y_test, y_pred_test)**0.5 + + return rmse_train, rmse_test + +lasso(0.01) + -# Write your solution here diff --git a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc index 8869434..cb6cbc2 100644 Binary files a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc index 438235e..bb7b47b 100644 Binary files a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc and b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc index fa7d8bf..ac1de30 100644 Binary files a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/build.cpython-36.pyc b/q06_cross_validation/__pycache__/build.cpython-36.pyc index 19e8bd8..db77ca4 100644 Binary files a/q06_cross_validation/__pycache__/build.cpython-36.pyc and b/q06_cross_validation/__pycache__/build.cpython-36.pyc differ diff --git a/q06_cross_validation/build.py b/q06_cross_validation/build.py index e39b93b..958f926 100644 --- a/q06_cross_validation/build.py +++ b/q06_cross_validation/build.py @@ -1,13 +1,20 @@ -# Default imports from sklearn.model_selection import cross_val_score import numpy as np from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + +def cross_validation(model, X, y): + + model.fit(X_train, y_train) + scores = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=5) + neg_mse = scores.mean() + + return neg_mse + -# Write your solution here diff --git a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc index ca3f5cd..b11cd3d 100644 Binary files a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc index e7acaaf..e258e70 100644 Binary files a/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/test_q06_cross_validation.cpython-36.pyc differ