import pandas as pd
import numpy as np
import seaborn as sns
ads = pd.read_csv("https://gist.githubusercontent.com/krisrs1128/28c59d0222dcb494d00aaaeaa6498c9e/raw/15a9881ee0c43ef5ae41a1c8765d51e9aa149768/advertising.csv").iloc[:, 1:]
ads
sns.scatterplot(x="Radio", y="Sales", hue="TV", data=ads)
def fit_and_plot(model, ads, eval_grid):
model.fit(ads[["TV", "Radio"]], ads[["Sales"]])
eval_grid["y_hat"] = model.predict(eval_grid[["tv", "radio"]])
return sns.scatterplot(data=eval_grid, x="radio", y="y_hat", hue="tv")
import sklearn.linear_model as lm
from itertools import product
tv_grid = np.arange(0.7, 296, 50)
radio_grid = np.arange(0, 50, 1)
eval_grid = pd.DataFrame(
list(product(tv_grid, radio_grid)),
columns=['tv', 'radio']
)
fit_and_plot(lm.LinearRegression(), ads, eval_grid)
fit_and_plot(knn.KNeighborsRegressor(), ads, eval_grid)
from pygam import LinearGAM, s, te
fit_and_plot(LinearGAM(s(0) + s(1) + te(0, 1)), ads, eval_grid)
from sklearn import tree
fit_and_plot(tree.DecisionTreeRegressor(), ads, eval_grid)
ads[["Sales"]].values.squeeze()
Polynomial basis (intentionally absurdly high degree)
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=7)
x = poly.fit_transform(ads[["TV", "Radio"]])
model = lm.LinearRegression().fit(x, ads[["Sales"]])
eval_grid["y_hat"] = model.predict(poly.fit_transform(eval_grid[["tv", "radio"]]))
sns.scatterplot(data=eval_grid, x="radio", y="y_hat", hue="tv")
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(interaction_only=True)
x = poly.fit_transform(ads[["TV", "Radio"]])
model = lm.LinearRegression().fit(x, ads[["Sales"]])
eval_grid["y_hat"] = model.predict(poly.fit_transform(eval_grid[["tv", "radio"]]))
sns.scatterplot(data=eval_grid, x="radio", y="y_hat", hue="tv")