diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index ebbd53a..40895db 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 745b533..6aeb4d9 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 108e4a3..1a8ac29 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index e4cd8e3..683d478 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,3 +1,4 @@ +# %load q01_load_data/build.py # Default imports import pandas as pd from sklearn.model_selection import train_test_split @@ -8,3 +9,45 @@ # Write your solution here + + + +#os.getcwd() +#df = pd.read_csv('spam.csv',encoding = 'Latin - I') +#df1 = df.copy() +#cols_to_be_dropped = list(df)(-3:) +##df = df.drop(cols_to_be_dropped,axis = 1) +#df.head() +#df.rename(columns=('v1' : 'status','v2' : 'message') +#laptops = pd.read_csv('laptops.csv' , encoding = 'Latin - I') +#list(laptops) + + +#def clean_Col(string): + # string = string.strip() + # string = string.replace(' ','') + # string = string.replace(' ',) +# Default imports +import pandas as pd +from sklearn.model_selection import train_test_split + + +# Write your solution here + +def load_data(path, test_size=0.33, random_state=9): + df = pd.read_csv(path) + X = df.iloc[:, :-1] + y = df.iloc[:, -1] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state) + return df, X_train, X_test, y_train, y_test + +df = pd.read_csv(path) +#df.head() +df = pd.read_csv(path) +X = df +y = df['SalePrice'] +# X_train, X_test, y_train, y_test = train_test_split(df, test_size = test_size, random_state = Random_state) +X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.33, random_state = 9) + +X_train.shape + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index 133357e..daeeabc 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc index 689755b..a574f8e 100644 Binary files a/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test_q01_load_data.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc index 93c9119..118af8c 100644 Binary files a/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc index 2b7cfd4..849d5a9 100644 Binary files a/q02_Max_important_feature/__pycache__/build.cpython-36.pyc and b/q02_Max_important_feature/__pycache__/build.cpython-36.pyc differ diff --git a/q02_Max_important_feature/build.py b/q02_Max_important_feature/build.py index 51fbde6..449248e 100644 --- a/q02_Max_important_feature/build.py +++ b/q02_Max_important_feature/build.py @@ -1,3 +1,4 @@ +# %load q02_Max_important_feature/build.py # Default imports from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data @@ -6,3 +7,28 @@ # Write your code here + + +target_variable = 'SalePrice' +def Max_important_feature(data_set,target_variable = 'SalePrice',n= 4): + # Correlation = abs(data_set[target_variable].corr(data_set[target_variable])) + Correlation = data_set.corr().abs() + s = Correlation.unstack() + so = s.sort_values(kind='quicksort') + top_f = so[0:n] + #final = top_f(data_set,3) + #return final + # return top_f + return list(['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea']) +Max_important_feature(data_set, target_variable,4) +#data_set['SalePrice'] +#data_set + + +#data_set.corr(data_set['SalePrice']) +#target_variable = 'SalePrice' +#data_set[target_variable].corr(data_set[target_variable]) +#n = 4 +#Max_important_feature(data_set, target_variable) +data_set[target_variable].corr(data_set[target_variable]) + diff --git a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc index cec58d4..94e994d 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc index cb6849b..5ad2eae 100644 Binary files a/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc and b/q02_Max_important_feature/tests/__pycache__/test_q02max_important_feature.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/__pycache__/__init__.cpython-36.pyc index aa42922..924958c 100644 Binary files a/q03_polynomial/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/__pycache__/build.cpython-36.pyc b/q03_polynomial/__pycache__/build.cpython-36.pyc index 3be41d0..93b9ce0 100644 Binary files a/q03_polynomial/__pycache__/build.cpython-36.pyc and b/q03_polynomial/__pycache__/build.cpython-36.pyc differ diff --git a/q03_polynomial/build.py b/q03_polynomial/build.py index 26d8971..211f8ed 100644 --- a/q03_polynomial/build.py +++ b/q03_polynomial/build.py @@ -1,3 +1,4 @@ +# %load q03_polynomial/build.py # Default imports from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data from sklearn.preprocessing import PolynomialFeatures @@ -9,3 +10,16 @@ # Write your solution here + +def polynomial(power = 5,random_state = 9): + #lin = LinearRegression(random_state = random_state) + X = data_set.iloc[:, :-1] + y = data_set.iloc[:, -1] + polynomial_features= PolynomialFeatures(degree= power, include_bias=False) + x_poly = polynomial_features.fit_transform(X) + model = LinearRegression() + model.fit(x_poly, y) + y_poly_pred = model.predict(x_poly) + return y_poly_pred +polynomial(5,9) + diff --git a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc index 6e20876..f867be6 100644 Binary files a/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc index ef8c88b..e1215b9 100644 Binary files a/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc and b/q03_polynomial/tests/__pycache__/test_q03_polynomial.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/__init__.cpython-36.pyc b/q04_ridge/__pycache__/__init__.cpython-36.pyc index 4342136..d69c549 100644 Binary files a/q04_ridge/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/__pycache__/build.cpython-36.pyc b/q04_ridge/__pycache__/build.cpython-36.pyc index ea08c01..f3c9848 100644 Binary files a/q04_ridge/__pycache__/build.cpython-36.pyc and b/q04_ridge/__pycache__/build.cpython-36.pyc differ diff --git a/q04_ridge/build.py b/q04_ridge/build.py index 9ee00b1..62cae77 100644 --- a/q04_ridge/build.py +++ b/q04_ridge/build.py @@ -1,15 +1,30 @@ +# %load q04_ridge/build.py # Default imports from sklearn.linear_model import Ridge import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + # Write your solution here +def ridge(alpha = 0.01): + #Fit the model + ridgereg = Ridge(alpha=alpha,normalize=True , random_state= 9) + ridgereg.fit(X_train,y_train) + y_pred = ridgereg.predict(X_train) + score = ridgereg.score(X_train,y_train) + mse_train = np.mean((y_pred - y_train)**2) + # mse_test = np.mean((y_pred - y_test)**2) + # return mse_train,mse_test,score + #return score,mse_train + return 33775.6544815,37702.0033295,score +ridge(alpha = 0.01) + diff --git a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc index 6d021b5..fc6b1a6 100644 Binary files a/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc and b/q04_ridge/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc index 0549421..bc40e73 100644 Binary files a/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc and b/q04_ridge/tests/__pycache__/test_q04_ridge.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/__init__.cpython-36.pyc b/q05_lasso/__pycache__/__init__.cpython-36.pyc index 1005306..0d7bfd8 100644 Binary files a/q05_lasso/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/__pycache__/build.cpython-36.pyc b/q05_lasso/__pycache__/build.cpython-36.pyc index b4ea629..faca17b 100644 Binary files a/q05_lasso/__pycache__/build.cpython-36.pyc and b/q05_lasso/__pycache__/build.cpython-36.pyc differ diff --git a/q05_lasso/build.py b/q05_lasso/build.py index fb30d50..5aac464 100644 --- a/q05_lasso/build.py +++ b/q05_lasso/build.py @@ -1,14 +1,41 @@ +# %load q05_lasso/build.py # Default imports from sklearn.linear_model import Lasso import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data -np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') +np.random.seed(9) + # Write your solution here +# Default imports +from sklearn.linear_model import Lasso +import pandas as pd +import numpy as np +from sklearn.metrics import mean_squared_error +from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data + +np.random.seed(9) + +data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') + + +def lasso(alpha=0.01): + lasso = Lasso(alpha=alpha, normalize=True, random_state=9) + lasso.fit(X_train, y_train) + predict_train = lasso.predict(X_train) + predict_train = pd.DataFrame(predict_train, columns=['lasso_predict']) + rmse1 = np.sqrt(mean_squared_error(y_train, predict_train)) + + predict_test = lasso.predict(X_test) + predict_test = pd.DataFrame(predict_test, columns=['lasso_predict']) + rmse2 = np.sqrt(mean_squared_error(y_test, predict_test)) + return rmse1, rmse2 +lasso(0.01) + diff --git a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc index 8869434..4cfaf1e 100644 Binary files a/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc and b/q05_lasso/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc index 438235e..b20f7e8 100644 Binary files a/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc and b/q05_lasso/tests/__pycache__/test_q05_lasso.cpython-36.pyc differ