Regularization Example: King County housing

5. Regularization Example: King County housing#

import pandas as pd

pd.set_option('display.float_format', '{:.2f}'.format)
pd.set_option('display.max_colwidth', 15)

pd.options.display.max_colwidth = 15

import numpy as np
import matplotlib.pyplot as plt

housing_df = pd.read_csv('https://raw.githubusercontent.com/GettysburgDataScience/datasets/refs/heads/main/kc_house_data.csv')
housing_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21613 entries, 0 to 21612
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 id             21613 non-null  int64  
 date           21613 non-null  object 
 price          21613 non-null  float64
 bedrooms       21613 non-null  int64  
 bathrooms      21613 non-null  float64
 sqft_living    21613 non-null  int64  
 sqft_lot       21613 non-null  int64  
 floors         21613 non-null  float64
 waterfront     21613 non-null  int64  
 view           21613 non-null  int64  
condition      21613 non-null  int64  
grade          21613 non-null  int64  
sqft_above     21613 non-null  int64  
sqft_basement  21613 non-null  int64  
yr_built       21613 non-null  int64  
yr_renovated   21613 non-null  int64  
zipcode        21613 non-null  int64  
lat            21613 non-null  float64
long           21613 non-null  float64
sqft_living15  21613 non-null  int64  
sqft_lot15     21613 non-null  int64  
dtypes: float64(5), int64(15), object(1)
memory usage: 3.5+ MB

housing_df.describe()

	id	price	bedrooms	bathrooms	sqft_living	sqft_lot	floors	waterfront	view	condition	grade	sqft_above	sqft_basement	yr_built	yr_renovated	zipcode	lat	long	sqft_living15	sqft_lot15
count	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00	21613.00
mean	4580301520.86	540088.14	3.37	2.11	2079.90	15106.97	1.49	0.01	0.23	3.41	7.66	1788.39	291.51	1971.01	84.40	98077.94	47.56	-122.21	1986.55	12768.46
std	2876565571.31	367127.20	0.93	0.77	918.44	41420.51	0.54	0.09	0.77	0.65	1.18	828.09	442.58	29.37	401.68	53.51	0.14	0.14	685.39	27304.18
min	1000102.00	75000.00	0.00	0.00	290.00	520.00	1.00	0.00	0.00	1.00	1.00	290.00	0.00	1900.00	0.00	98001.00	47.16	-122.52	399.00	651.00
25%	2123049194.00	321950.00	3.00	1.75	1427.00	5040.00	1.00	0.00	0.00	3.00	7.00	1190.00	0.00	1951.00	0.00	98033.00	47.47	-122.33	1490.00	5100.00
50%	3904930410.00	450000.00	3.00	2.25	1910.00	7618.00	1.50	0.00	0.00	3.00	7.00	1560.00	0.00	1975.00	0.00	98065.00	47.57	-122.23	1840.00	7620.00
75%	7308900445.00	645000.00	4.00	2.50	2550.00	10688.00	2.00	0.00	0.00	4.00	8.00	2210.00	560.00	1997.00	0.00	98118.00	47.68	-122.12	2360.00	10083.00
max	9900000190.00	7700000.00	33.00	8.00	13540.00	1651359.00	3.50	1.00	4.00	5.00	13.00	9410.00	4820.00	2015.00	2015.00	98199.00	47.78	-121.31	6210.00	871200.00

housing_df['price'].hist(bins=100)
plt.xlabel('Price ($)')

Text(0.5, 0, 'Price ($)')

../_images/34935cd6cc2324f54bae732b41a098214f8b5b6c05d31af7befad69d37f1fe4b.png

housing_df = housing_df.query('bedrooms <= 8')

np.sort(housing_df['yr_renovated'].unique())

array([   0, 1934, 1940, 1944, 1945, 1946, 1948, 1950, 1951, 1953, 1954,
       1955, 1956, 1957, 1958, 1959, 1960, 1962, 1963, 1964, 1965, 1967,
       1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978,
       1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989,
       1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
       2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
       2012, 2013, 2014, 2015])

housing_df.loc[housing_df['yr_renovated']==0, 'yr_renovated'] = housing_df.loc[housing_df['yr_renovated']==0, 'yr_built']

np.sort(housing_df['yr_built'].unique())

array([1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910,
       1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921,
       1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932,
       1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943,
       1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954,
       1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965,
       1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976,
       1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987,
       1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
       1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013, 2014, 2015])

housing_df['yr_sold'] = housing_df['date'].apply(lambda d: int(d[0:4]))
housing_df['age_built'] = housing_df['yr_sold'] - housing_df['yr_built']
housing_df['age_reno'] = housing_df['yr_sold'] - housing_df['yr_renovated']

columns_to_drop = ['id','date', 'zipcode', 'yr_built', 'yr_renovated']
housing_df.drop(columns = columns_to_drop, inplace=True)

target = ['price']

y = housing_df[target]
X = housing_df.drop(columns = target)

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, RidgeCV, LassoCV, ElasticNetCV
from sklearn.metrics import mean_squared_error, r2_score

import warnings
warnings.filterwarnings("ignore")

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Scale features
ss = StandardScaler()
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)

ss_y = StandardScaler()
y_train_scaled = ss_y.fit_transform(y_train)
y_test_scaled = ss_y.transform(y_test)

# Linear Regression
lr = LinearRegression()
lr.fit(X_train_scaled, y_train_scaled)

y_pred_lr_scaled = lr.predict(X_test_scaled)
y_pred_lr = ss_y.inverse_transform(y_pred_lr_scaled)

R2_lr = r2_score(y_test_scaled, y_pred_lr_scaled)

R2_lr

0.6956974268521209

np.logspace(0, 5, 6)

array([1.e+00, 1.e+01, 1.e+02, 1.e+03, 1.e+04, 1.e+05])

alpha_values = np.logspace(-4,3, 8)

ridge_dict = {}
lasso_dict = {}
elastic_dict = {}

R2_dict = {'alpha': alpha_values, 
           'ridge':[], 
           'lasso':[], 
           'elastic':[]}

for a in alpha_values:
    # Ridge Regression
    ridge = Ridge(alpha=a)
    ridge.fit(X_train_scaled, y_train_scaled)
    
    y_pred_ridge_scaled = ridge.predict(X_test_scaled)
    y_pred_ridge = ss_y.inverse_transform(y_pred_ridge_scaled.reshape(-1,1))
    
    R2_ridge = r2_score(y_test_scaled, y_pred_ridge_scaled)
    R2_dict['ridge'].append(R2_ridge)
    
    ridge_coef = list(ridge.coef_.flatten().round(2))
    ridge_coef.sort(key = abs, reverse=True)
    ridge_dict[f'alpha_{a:.1e}'] = ridge_coef

    # Lasso Regression
    lasso = Lasso(alpha=a)
    lasso.fit(X_train_scaled, y_train_scaled)

    y_pred_lasso_scaled = lasso.predict(X_test_scaled)
    y_pred_lasso = ss_y.inverse_transform(y_pred_lasso_scaled.reshape(-1,1))
    
    R2_lasso = r2_score(y_test_scaled, y_pred_lasso_scaled)
    R2_dict['lasso'].append(R2_lasso)
    
    lasso_coef = list(lasso.coef_.flatten().round(2))
    lasso_coef.sort(key = abs, reverse=True)
    lasso_dict[f'alpha_{a: .1e}'] = lasso_coef

    # ElasticNet Regression
    elastic = ElasticNet(alpha=a, l1_ratio=0.5)
    elastic.fit(X_train_scaled, y_train_scaled)

    y_pred_elastic_scaled = elastic.predict(X_test_scaled)
    y_pred_elastic = ss_y.inverse_transform(y_pred_elastic_scaled.reshape(-1,1))
    
    R2_elastic = r2_score(y_test_scaled, y_pred_elastic_scaled)
    R2_dict['elastic'].append(R2_elastic)
    
    elastic_coef = list(elastic.coef_.flatten().round(2))
    elastic_coef.sort(key = abs, reverse=True)
    elastic_dict[f'alpha_{a: .1e}'] = elastic_coef

ridge_df = pd.DataFrame(ridge_dict)
lasso_df = pd.DataFrame(lasso_dict)
elastic_df = pd.DataFrame(elastic_dict)

R2_dict = pd.DataFrame(R2_dict)
display(R2_dict)

print('Ridge Regression')
display(ridge_df)

print('Lasso Regression')
display(lasso_df)

print('ElasticNet Regression')
display(elastic_df)

	alpha	ridge	lasso	elastic
0	0.00	0.70	0.70	0.70
1	0.00	0.70	0.70	0.70
2	0.01	0.70	0.69	0.69
3	0.10	0.70	0.63	0.67
4	1.00	0.70	-0.00	0.19
5	10.00	0.70	-0.00	-0.00
6	100.00	0.70	-0.00	-0.00
7	1000.00	0.69	-0.00	-0.00

Ridge Regression

	alpha_1.0e-04	alpha_1.0e-03	alpha_1.0e-02	alpha_1.0e-01	alpha_1.0e+00	alpha_1.0e+01	alpha_1.0e+02	alpha_1.0e+03
0	0.33	0.33	0.33	0.33	0.33	0.33	0.33	0.29
1	0.25	0.25	0.25	0.25	0.25	0.25	0.24	0.20
2	0.21	0.21	0.21	0.21	0.21	0.21	0.21	0.20
3	0.21	0.21	0.21	0.21	0.21	0.21	0.21	0.17
4	0.19	0.19	0.19	0.19	0.19	0.19	0.19	0.17
5	0.14	0.14	0.14	0.14	0.14	0.14	0.14	0.13
6	0.11	0.11	0.11	0.11	0.11	0.11	0.11	0.11
7	-0.09	-0.09	-0.09	-0.09	-0.09	-0.09	0.10	0.10
8	0.09	0.09	0.09	0.09	0.09	0.09	-0.09	0.08
9	0.08	0.08	0.08	0.08	0.08	0.08	0.08	-0.07
10	0.05	0.05	0.05	0.05	0.05	0.05	0.05	0.07
11	0.04	0.04	0.04	0.04	0.04	0.04	0.05	0.05
12	-0.04	-0.04	-0.04	-0.04	-0.04	-0.04	-0.03	-0.04
13	-0.03	-0.03	-0.03	-0.03	-0.03	-0.03	-0.03	0.03
14	-0.03	-0.03	-0.03	-0.03	-0.03	-0.03	0.03	0.02
15	0.03	0.03	0.03	0.03	0.03	0.03	-0.03	-0.02
16	0.01	0.01	0.01	0.01	0.01	0.01	0.01	0.02
17	0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.01

Lasso Regression

	alpha_ 1.0e-04	alpha_ 1.0e-03	alpha_ 1.0e-02	alpha_ 1.0e-01	alpha_ 1.0e+00	alpha_ 1.0e+01	alpha_ 1.0e+02	alpha_ 1.0e+03
0	0.48	0.38	0.38	0.37	0.00	0.00	0.00	0.00
1	0.33	0.34	0.34	0.26	0.00	0.00	0.00	0.00
2	0.25	0.24	0.20	0.15	0.00	0.00	0.00	0.00
3	0.21	0.21	0.20	0.11	0.00	0.00	0.00	0.00
4	0.14	0.14	0.13	0.07	0.00	0.00	0.00	0.00
5	0.11	0.11	0.11	0.05	0.00	0.00	0.00	0.00
6	-0.09	-0.09	0.08	-0.00	0.00	0.00	0.00	0.00
7	0.09	0.09	-0.06	0.00	0.00	0.00	0.00	0.00
8	0.05	0.05	0.04	-0.00	0.00	0.00	0.00	0.00
9	-0.05	0.04	0.03	0.00	0.00	0.00	0.00	0.00
10	-0.04	0.04	0.02	0.00	0.00	0.00	0.00	0.00
11	0.04	-0.03	-0.02	0.00	0.00	0.00	0.00	0.00
12	-0.04	0.03	0.02	0.00	0.00	0.00	0.00	0.00
13	-0.03	-0.03	-0.01	-0.00	0.00	0.00	0.00	0.00
14	-0.03	-0.02	-0.00	0.00	0.00	0.00	0.00	0.00
15	0.03	0.01	0.00	-0.00	0.00	0.00	0.00	0.00
16	0.01	0.00	0.00	0.00	-0.00	-0.00	-0.00	-0.00
17	0.00	0.00	0.00	0.00	-0.00	-0.00	-0.00	-0.00

ElasticNet Regression

	alpha_ 1.0e-04	alpha_ 1.0e-03	alpha_ 1.0e-02	alpha_ 1.0e-01	alpha_ 1.0e+00	alpha_ 1.0e+01	alpha_ 1.0e+02	alpha_ 1.0e+03
0	0.55	0.38	0.37	0.34	0.10	0.00	0.00	0.00
1	0.33	0.33	0.33	0.30	0.06	0.00	0.00	0.00
2	0.25	0.24	0.21	0.18	0.00	0.00	0.00	0.00
3	0.21	0.21	0.20	0.12	0.00	0.00	0.00	0.00
4	0.14	0.14	0.13	0.12	0.00	0.00	0.00	0.00
5	0.11	0.11	0.11	0.10	0.00	0.00	0.00	0.00
6	-0.11	-0.09	0.09	0.03	0.00	0.00	0.00	0.00
7	-0.09	0.09	-0.08	0.03	0.00	0.00	0.00	0.00
8	0.09	0.05	0.04	0.01	0.00	0.00	0.00	0.00
9	-0.08	0.04	0.04	0.01	0.00	0.00	0.00	0.00
10	0.05	0.04	0.04	-0.00	0.00	0.00	0.00	0.00
11	0.04	-0.04	-0.03	-0.00	0.00	0.00	0.00	0.00
12	-0.04	-0.03	0.03	0.00	-0.00	0.00	0.00	0.00
13	-0.03	0.03	-0.02	0.00	0.00	0.00	0.00	0.00
14	-0.03	-0.02	-0.00	-0.00	0.00	0.00	0.00	0.00
15	0.03	0.01	0.00	-0.00	0.00	0.00	0.00	0.00
16	0.01	0.00	0.00	0.00	0.00	-0.00	-0.00	-0.00
17	0.00	0.00	-0.00	0.00	-0.00	-0.00	-0.00	-0.00

5.1. Choosing the best models#

# Ridge
ridge = Ridge(alpha=100)
ridge.fit(X_train_scaled, y_train_scaled)

y_pred_ridge_scaled = ridge.predict(X_test_scaled)
y_pred_ridge = ss_y.inverse_transform(y_pred_ridge_scaled.reshape(-1,1))

R2_ridge = r2_score(y_test_scaled, y_pred_ridge_scaled)

# Lasso
lasso = Lasso(alpha=0.1)
lasso.fit(X_train_scaled, y_train_scaled)
y_pred_lasso_scaled = lasso.predict(X_test_scaled)
y_pred_lasso = ss_y.inverse_transform(y_pred_lasso_scaled.reshape(-1,1))

R2_lasso = r2_score(y_test_scaled, y_pred_lasso_scaled)

# ElasticNet
elastic = ElasticNet(alpha=0.1, l1_ratio=0.5)
elastic.fit(X_train_scaled, y_train_scaled)

y_pred_elastic_scaled = elastic.predict(X_test_scaled)
y_pred_elastic = ss_y.inverse_transform(y_pred_elastic_scaled.reshape(-1,1))

R2_elastic = r2_score(y_test_scaled, y_pred_elastic_scaled)

fig, ax = plt.subplots(1,4, figsize = (20,4), sharex=True, sharey=True)

model_names = ['Linear Regression', 'Ridge', 'Lasso', 'ElasticNet']
model_predictions = [y_pred_lr, y_pred_ridge, y_pred_lasso, y_pred_elastic]
R2 = [R2_lr, R2_ridge, R2_lasso, R2_elastic]

for k, (mod, pred, R) in enumerate(zip(model_names, model_predictions, R2)):
    ax[k].scatter(y_test, pred, s = 5, alpha = 0.1)
    ax[k].plot([0,4000000], [0, 4000000], 'k--')
    
    ax[k].set_title(mod)
    ax[k].set_xlabel('Actual Selling Price ($)')
    ax[k].text(0.05, 0.9, f'R2 = {R:.3f}', transform=ax[k].transAxes)
    
    if k == 0:
        ax[k].set_ylabel('Predicted Price ($)')


plt.show()

../_images/c1cc345da8a2374219085fb6a77dc7f37de4d26e05426347cce721d87fc596f8.png

5.2. Hyper-parameter Search and Validation#

5.2.1. Grid Search#

How do we explore this space? Suppose I want to try trees with these options:

alpha = [0.01, 0.1, 1, 10]
l1_ratio= [0.25, 0.5, 0.75]

How many models will I be testing?

GridSearch does just this in an automated way, testing every combination from the parameters you’d like to test.

alpha	l1_ratio	Cartesian Product
0.01	0.25	(0.01, 0.25)
0.01	0.50	(0.01, 0.50)
0.01	0.75	(0.01, 0.75)
0.1	0.25	(0.1, 0.25)
0.1	0.50	(0.1, 0.50)
0.1	0.75	(0.1, 0.75)
1.	0.25	(1, 0.25)
1.	0.50	(1, 0.50)
1.	0.75	(1, 0.75)
10	0.25	(10, 0.25)
10	0.50	(10, 0.50)
10	0.75	(10, 0.75)

5.2.2. Cross-Validation#

Validation is used to select from a set of candidate models (e.g. different learning algorithms, variations on the same algorithm with different hyperparameters). In the simplest form of validation, we split off a portion of the training data and compare models based on their performance on this validation set. But more commonly, we use K-fold Cross-Validation:

Split the training data into K “folds”
Set the first fold aside as a validation set and train on the remaining data.
Validate using that first fold as a validation set.
Repeat the process (K times in total), each time using a different fold as the validation set.
Average the performance across all the training-validation iterations.

5.2.3. Grid Search + Cross-Validation#

Grid Search and Cross-Validation are used in tandem so commonly that sklearn packages them together in some very convenient functions.

# RidgeCV
ridge = RidgeCV(alphas=np.logspace(-3, 3, 7), cv=5)
ridge.fit(X_train_scaled, y_train_scaled)
y_pred_ridge = ridge.predict(X_test_scaled)
y_train_ridge = ridge.predict(X_train_scaled)   

# LassoCV
lasso = LassoCV(alphas=np.logspace(-3, 3, 7), cv=5, max_iter=10000)
lasso.fit(X_train_scaled, y_train_scaled)
y_pred_lasso = lasso.predict(X_test_scaled)
y_train_lasso = lasso.predict(X_train_scaled)

# ElasticNetCV
elastic = ElasticNetCV(alphas=np.logspace(-3, 3, 7), l1_ratio=[.1, .5, .7, .9, .95, .99, 1], cv=5, max_iter=10000)
elastic.fit(X_train_scaled, y_train_scaled)
y_pred_elastic = elastic.predict(X_test_scaled)
y_train_elastic = elastic.predict(X_train_scaled)

lasso.__dict__

{'eps': 0.001,
 'n_alphas': 100,
 'alphas': array([1.e-03, 1.e-02, 1.e-01, 1.e+00, 1.e+01, 1.e+02, 1.e+03]),
 'fit_intercept': True,
 'precompute': 'auto',
 'max_iter': 10000,
 'tol': 0.0001,
 'copy_X': True,
 'cv': 5,
 'verbose': False,
 'n_jobs': None,
 'positive': False,
 'random_state': None,
 'selection': 'cyclic',
 'n_features_in_': 18,
 'mse_path_': array([[1.17493944, 1.08348981, 0.91379629, 0.94334548, 0.88546225],
        [1.17493944, 1.08348981, 0.91379629, 0.94334548, 0.88546225],
        [1.17493944, 1.08348981, 0.91379629, 0.94334548, 0.88546225],
        [1.17493944, 1.08348981, 0.91379629, 0.94334548, 0.88546225],
        [0.47139429, 0.39173577, 0.31993276, 0.3258653 , 0.31450938],
        [0.391673  , 0.31529641, 0.27752907, 0.27940606, 0.27739959],
        [0.38621254, 0.31204547, 0.27755557, 0.27871435, 0.27824047]]),
 'alpha_': np.float64(0.001),
 'alphas_': array([1.e+03, 1.e+02, 1.e+01, 1.e+00, 1.e-01, 1.e-02, 1.e-03]),
 'coef_': array([-0.08903972,  0.09425803,  0.38298852,  0.00168326,  0.00694186,
         0.13634189,  0.11170771,  0.05152892,  0.33509464,  0.03526533,
         0.        ,  0.20924649, -0.02938209,  0.03984716, -0.02259535,
         0.03311764,  0.2397004 , -0.03350463]),
 'intercept_': np.float64(-2.1721301169984707e-15),
 'dual_gap_': np.float64(8.889033968068597e-05),
 'n_iter_': 202}

# Vanilla linear
rmse_lr = np.sqrt(mean_squared_error(y_test, y_pred_lr))
r2_lr = r2_score(y_test, y_pred_lr)

# Ridge
rmse_ridge = np.sqrt(mean_squared_error(y_test, y_pred_ridge))
r2_ridge = r2_score(y_test, y_pred_ridge)

# Lasso
rmse_lasso = np.sqrt(mean_squared_error(y_test, y_pred_lasso))
r2_lasso = r2_score(y_test, y_pred_lasso)

# ElasticNet
rmse_elastic = np.sqrt(mean_squared_error(y_test, y_pred_elastic))
r2_elastic = r2_score(y_test, y_pred_elastic)

# Get feature names
feature_names = np.array(X_train.columns)

# Linear Regression coefficients
lr_weights = lr.coef_.flatten().round(2)
lr_idx = np.argsort(np.abs(lr_weights))[::-1]
lr_features, lr_weights = feature_names[lr_idx], lr_weights[lr_idx]

# Ridge Regression coefficients
ridge_weights = ridge.coef_.flatten().round(2)
ridge_idx = np.argsort(np.abs(ridge_weights))[::-1]
ridge_features, ridge_weights = feature_names[ridge_idx], ridge_weights[ridge_idx]

# Lasso Regression coefficients
lasso_weights = lasso.coef_.flatten().round(2)
lasso_idx = np.argsort(np.abs(lasso_weights))[::-1]
lasso_features, lasso_weights = feature_names[lasso_idx], lasso_weights[lasso_idx]

# ElasticNet Regression coefficients
elastic_weights = elastic.coef_.flatten().round(2)
elastic_idx = np.argsort(np.abs(elastic_weights))[::-1]
elastic_features, elastic_weights = feature_names[elastic_idx], elastic_weights[elastic_idx]



feature_df = pd.DataFrame({
    ('Linear', 'feature'): lr_features, 
    ('Linear','weight'): lr_weights,
    ('Ridge','feature'): ridge_features, 
    ('Ridge','weight'): ridge_weights,
    ('Lasso', 'feature'): lasso_features, 
    ('Lasso', 'weight'): lasso_weights,
    ('Elastic', 'feature'): elastic_features,
    ('Elastic', 'weight'): elastic_weights
})

feature_df.head(200)

	Linear		Ridge		Lasso		Elastic
	feature	weight	feature	weight	feature	weight	feature	weight
0	grade	0.33	grade	0.33	sqft_living	0.38	grade	0.33
1	age_built	0.25	age_built	0.24	grade	0.34	sqft_living	0.25
2	sqft_living	0.21	sqft_living	0.21	age_built	0.24	age_built	0.25
3	lat	0.21	lat	0.21	lat	0.21	lat	0.21
4	sqft_above	0.19	sqft_above	0.19	waterfront	0.14	sqft_above	0.15
5	waterfront	0.14	waterfront	0.14	view	0.11	waterfront	0.14
6	view	0.11	view	0.11	bathrooms	0.09	view	0.11
7	bathrooms	0.09	bathrooms	0.10	bedrooms	-0.09	bathrooms	0.09
8	bedrooms	-0.09	bedrooms	-0.09	condition	0.05	bedrooms	-0.09
9	sqft_basement	0.08	sqft_basement	0.08	sqft_above	0.04	sqft_basement	0.06
10	condition	0.05	condition	0.05	sqft_living15	0.04	condition	0.05
11	sqft_living15	0.04	sqft_living15	0.05	long	-0.03	sqft_living15	0.04
12	age_reno	-0.04	long	-0.03	yr_sold	0.03	age_reno	-0.04
13	long	-0.03	sqft_lot15	-0.03	age_reno	-0.03	long	-0.03
14	sqft_lot15	-0.03	yr_sold	0.03	sqft_lot15	-0.02	sqft_lot15	-0.03
15	yr_sold	0.03	age_reno	-0.03	floors	0.01	yr_sold	0.03
16	floors	0.01	floors	0.01	sqft_basement	0.00	floors	0.01
17	sqft_lot	0.00	sqft_lot	0.00	sqft_lot	0.00	sqft_lot	0.00

feature_names

array(['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
       'waterfront', 'view', 'condition', 'grade', 'sqft_above',
       'sqft_basement', 'lat', 'long', 'sqft_living15', 'sqft_lot15',
       'yr_sold', 'age_built', 'age_reno'], dtype=object)