Model Families

In [20]:
import pandas as pd
import numpy as np
import seaborn as sns
In [14]:
ads = pd.read_csv("https://gist.githubusercontent.com/krisrs1128/28c59d0222dcb494d00aaaeaa6498c9e/raw/15a9881ee0c43ef5ae41a1c8765d51e9aa149768/advertising.csv").iloc[:, 1:]
ads
Out[14]:
TV Radio Newspaper Sales
0 230.1 37.8 69.2 22.1
1 44.5 39.3 45.1 10.4
2 17.2 45.9 69.3 9.3
3 151.5 41.3 58.5 18.5
4 180.8 10.8 58.4 12.9
... ... ... ... ...
195 38.2 3.7 13.8 7.6
196 94.2 4.9 8.1 9.7
197 177.0 9.3 6.4 12.8
198 283.6 42.0 66.2 25.5
199 232.1 8.6 8.7 13.4

200 rows × 4 columns

In [12]:
sns.scatterplot(x="Radio", y="Sales", hue="TV", data=ads)
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x11610f470>
In [ ]:
def fit_and_plot(model, ads, eval_grid):
    model.fit(ads[["TV", "Radio"]], ads[["Sales"]])
    eval_grid["y_hat"] = model.predict(eval_grid[["tv", "radio"]])
    return sns.scatterplot(data=eval_grid, x="radio", y="y_hat", hue="tv")
In [93]:
import sklearn.linear_model as lm
from itertools import product

tv_grid = np.arange(0.7, 296, 50)
radio_grid = np.arange(0, 50, 1)
eval_grid = pd.DataFrame(
    list(product(tv_grid, radio_grid)),
    columns=['tv', 'radio']
)

fit_and_plot(lm.LinearRegression(), ads, eval_grid)
Out[93]:
<matplotlib.axes._subplots.AxesSubplot at 0x11be53518>
In [94]:
fit_and_plot(knn.KNeighborsRegressor(), ads, eval_grid)
Out[94]:
<matplotlib.axes._subplots.AxesSubplot at 0x11bf4c8d0>
In [97]:
from pygam import LinearGAM, s, te
fit_and_plot(LinearGAM(s(0) + s(1) + te(0, 1)), ads, eval_grid)
Out[97]:
<matplotlib.axes._subplots.AxesSubplot at 0x11c044390>
In [92]:
from sklearn import tree
fit_and_plot(tree.DecisionTreeRegressor(), ads, eval_grid)
Out[92]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b9ae9e8>
In [110]:
ads[["Sales"]].values.squeeze()
Out[110]:
array([22.1, 10.4,  9.3, 18.5, 12.9,  7.2, 11.8, 13.2,  4.8, 10.6,  8.6,
       17.4,  9.2,  9.7, 19. , 22.4, 12.5, 24.4, 11.3, 14.6, 18. , 12.5,
        5.6, 15.5,  9.7, 12. , 15. , 15.9, 18.9, 10.5, 21.4, 11.9,  9.6,
       17.4,  9.5, 12.8, 25.4, 14.7, 10.1, 21.5, 16.6, 17.1, 20.7, 12.9,
        8.5, 14.9, 10.6, 23.2, 14.8,  9.7, 11.4, 10.7, 22.6, 21.2, 20.2,
       23.7,  5.5, 13.2, 23.8, 18.4,  8.1, 24.2, 15.7, 14. , 18. ,  9.3,
        9.5, 13.4, 18.9, 22.3, 18.3, 12.4,  8.8, 11. , 17. ,  8.7,  6.9,
       14.2,  5.3, 11. , 11.8, 12.3, 11.3, 13.6, 21.7, 15.2, 12. , 16. ,
       12.9, 16.7, 11.2,  7.3, 19.4, 22.2, 11.5, 16.9, 11.7, 15.5, 25.4,
       17.2, 11.7, 23.8, 14.8, 14.7, 20.7, 19.2,  7.2,  8.7,  5.3, 19.8,
       13.4, 21.8, 14.1, 15.9, 14.6, 12.6, 12.2,  9.4, 15.9,  6.6, 15.5,
        7. , 11.6, 15.2, 19.7, 10.6,  6.6,  8.8, 24.7,  9.7,  1.6, 12.7,
        5.7, 19.6, 10.8, 11.6,  9.5, 20.8,  9.6, 20.7, 10.9, 19.2, 20.1,
       10.4, 11.4, 10.3, 13.2, 25.4, 10.9, 10.1, 16.1, 11.6, 16.6, 19. ,
       15.6,  3.2, 15.3, 10.1,  7.3, 12.9, 14.4, 13.3, 14.9, 18. , 11.9,
       11.9,  8. , 12.2, 17.1, 15. ,  8.4, 14.5,  7.6, 11.7, 11.5, 27. ,
       20.2, 11.7, 11.8, 12.6, 10.5, 12.2,  8.7, 26.2, 17.6, 22.6, 10.3,
       17.3, 15.9,  6.7, 10.8,  9.9,  5.9, 19.6, 17.3,  7.6,  9.7, 12.8,
       25.5, 13.4])

Polynomial basis (intentionally absurdly high degree)

In [146]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=7)
x = poly.fit_transform(ads[["TV", "Radio"]])
model = lm.LinearRegression().fit(x, ads[["Sales"]])
eval_grid["y_hat"] = model.predict(poly.fit_transform(eval_grid[["tv", "radio"]]))
sns.scatterplot(data=eval_grid, x="radio", y="y_hat", hue="tv")
Out[146]:
<matplotlib.axes._subplots.AxesSubplot at 0x11cd8ac18>
In [147]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(interaction_only=True)
x = poly.fit_transform(ads[["TV", "Radio"]])
model = lm.LinearRegression().fit(x, ads[["Sales"]])
eval_grid["y_hat"] = model.predict(poly.fit_transform(eval_grid[["tv", "radio"]]))
sns.scatterplot(data=eval_grid, x="radio", y="y_hat", hue="tv")
Out[147]:
<matplotlib.axes._subplots.AxesSubplot at 0x11ce76a58>