Skip to content

Commit 8a66b8c

Browse files
remove assign in reg1
1 parent 19b31b8 commit 8a66b8c

File tree

1 file changed

+35
-42
lines changed

1 file changed

+35
-42
lines changed

source/regression1.md

Lines changed: 35 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -294,17 +294,15 @@ of a house that is 2,000 square feet.
294294
```
295295

296296
```{code-cell} ipython3
297-
nearest_neighbors = (
298-
small_sacramento.assign(diff=(2000 - small_sacramento["sqft"]).abs())
299-
.nsmallest(5, "diff")
300-
)
301-
302-
nearest_neighbors
297+
small_sacramento["dist"] = (2000 - small_sacramento["sqft"]).abs()
298+
small_sacramento.nsmallest(5, "dist")
303299
```
304300

305301
```{code-cell} ipython3
306302
:tags: [remove-cell]
307303
304+
nearest_neighbors = small_sacramento.nsmallest(5, "dist")
305+
308306
nn_plot = small_plot + rule
309307
310308
# plot horizontal lines which is perpendicular to x=2000
@@ -609,16 +607,15 @@ sacr_gridsearch.fit(
609607
)
610608
611609
# Retrieve the CV scores
612-
sacr_results = pd.DataFrame(sacr_gridsearch.cv_results_)[[
613-
"param_kneighborsregressor__n_neighbors",
614-
"mean_test_score",
615-
"std_test_score"
616-
]]
610+
sacr_results = pd.DataFrame(sacr_gridsearch.cv_results_)
611+
sacr_results["sem_test_score"] = sacr_results["std_test_score"] / 5**(1/2)
617612
sacr_results = (
618-
sacr_results
619-
.assign(sem_test_score=sacr_results["std_test_score"] / 5**(1/2))
613+
sacr_results[[
614+
"param_kneighborsregressor__n_neighbors",
615+
"mean_test_score",
616+
"sem_test_score"
617+
]]
620618
.rename(columns={"param_kneighborsregressor__n_neighbors": "n_neighbors"})
621-
.drop(columns=["std_test_score"])
622619
)
623620
sacr_results
624621
```
@@ -834,12 +831,10 @@ model uses a different default scoring metric than the RMSPE.
834831
```{code-cell} ipython3
835832
from sklearn.metrics import mean_squared_error
836833
837-
sacr_preds = sacramento_test.assign(
838-
predicted = sacr_gridsearch.predict(sacramento_test)
839-
)
834+
sacramento_test["predicted"] = sacr_gridsearch.predict(sacramento_test)
840835
RMSPE = mean_squared_error(
841-
y_true = sacr_preds["price"],
842-
y_pred=sacr_preds["predicted"]
836+
y_true = sacramento_test["price"],
837+
y_pred = sacramento_test["predicted"]
843838
)**(1/2)
844839
RMSPE
845840
```
@@ -890,9 +885,7 @@ sqft_prediction_grid = pd.DataFrame({
890885
"sqft": np.arange(sacramento["sqft"].min(), sacramento["sqft"].max(), 10)
891886
})
892887
# Predict the price for each of the sqft values in the grid
893-
sacr_preds = sqft_prediction_grid.assign(
894-
predicted = sacr_gridsearch.predict(sqft_prediction_grid)
895-
)
888+
sqft_prediction_grid["predicted"] = sacr_gridsearch.predict(sqft_prediction_grid)
896889
897890
# Plot all the houses
898891
base_plot = alt.Chart(sacramento).mark_circle(opacity=0.4).encode(
@@ -905,11 +898,14 @@ base_plot = alt.Chart(sacramento).mark_circle(opacity=0.4).encode(
905898
)
906899
907900
# Add the predictions as a line
908-
sacr_preds_plot = base_plot + alt.Chart(sacr_preds, title=f"K = {best_k_sacr}").mark_line(
909-
color="#ff7f0e"
901+
sacr_preds_plot = base_plot + alt.Chart(
902+
sqft_prediction_grid,
903+
title=f"K = {best_k_sacr}"
904+
).mark_line(
905+
color="#ff7f0e"
910906
).encode(
911-
x="sqft",
912-
y="predicted"
907+
x="sqft",
908+
y="predicted"
913909
)
914910
915911
sacr_preds_plot
@@ -1018,25 +1014,24 @@ sacr_gridsearch = GridSearchCV(
10181014
cv=5,
10191015
scoring="neg_root_mean_squared_error"
10201016
)
1017+
10211018
sacr_gridsearch.fit(
10221019
sacramento_train[["sqft", "beds"]],
10231020
sacramento_train["price"]
10241021
)
10251022
10261023
# retrieve the CV scores
1027-
sacr_results = pd.DataFrame(sacr_gridsearch.cv_results_)[[
1028-
"param_kneighborsregressor__n_neighbors",
1029-
"mean_test_score",
1030-
"std_test_score"
1031-
]]
1032-
1024+
sacr_results = pd.DataFrame(sacr_gridsearch.cv_results_)
1025+
sacr_results["sem_test_score"] = sacr_results["std_test_score"] / 5**(1/2)
1026+
sacr_results["mean_test_score"] = -sacr_results["mean_test_score"]
10331027
sacr_results = (
1034-
sacr_results
1035-
.assign(sem_test_score=sacr_results["std_test_score"] / 5**(1/2))
1036-
.rename(columns={"param_kneighborsregressor__n_neighbors" : "n_neighbors"})
1037-
.drop(columns=["std_test_score"])
1028+
sacr_results[[
1029+
"param_kneighborsregressor__n_neighbors",
1030+
"mean_test_score",
1031+
"sem_test_score"
1032+
]]
1033+
.rename(columns={"param_kneighborsregressor__n_neighbors" : "n_neighbors"})
10381034
)
1039-
sacr_results["mean_test_score"] = -sacr_results["mean_test_score"]
10401035
10411036
# show only the row of minimum RMSPE
10421037
sacr_results.nsmallest(1, "mean_test_score")
@@ -1069,12 +1064,10 @@ via the `predict` method of the fit `GridSearchCV` object. Finally, we will use
10691064
to compute the RMSPE.
10701065

10711066
```{code-cell} ipython3
1072-
sacr_preds = sacramento_test.assign(
1073-
predicted = sacr_gridsearch.predict(sacramento_test)
1074-
)
1067+
sacramento_test["predicted"] = sacr_gridsearch.predict(sacramento_test)
10751068
RMSPE_mult = mean_squared_error(
1076-
y_true = sacr_preds["price"],
1077-
y_pred=sacr_preds["predicted"]
1069+
y_true = sacramento_test["price"],
1070+
y_pred = sacramento_test["predicted"]
10781071
)**(1/2)
10791072
RMSPE_mult
10801073

0 commit comments

Comments
 (0)