Skip to content

Commit e5cad17

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 94f2e9c5d86263c4c9abb412d44135b4e6bc1ef5
1 parent c053680 commit e5cad17

File tree

1,240 files changed

+3819
-3841
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,240 files changed

+3819
-3841
lines changed

dev/_downloads/00a5ddd24a9ad44708f4ab3b157ef0ff/plot_stack_predictors.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
# License: BSD 3 clause
2323

2424

25-
###############################################################################
25+
# %%
2626
# Download the dataset
27-
###############################################################################
27+
##############################################################################
2828
#
2929
# We will use `Ames Housing`_ dataset which was first compiled by Dean De Cock
3030
# and became better known after it was used in Kaggle challenge. It is a set
@@ -68,9 +68,9 @@ def load_ames_housing():
6868
X, y = load_ames_housing()
6969

7070

71-
###############################################################################
71+
# %%
7272
# Make pipeline to preprocess the data
73-
###############################################################################
73+
##############################################################################
7474
#
7575
# Before we can use Ames dataset we still need to do some preprocessing.
7676
# First, the dataset has many missing values. To impute them, we will exchange
@@ -136,9 +136,9 @@ def load_ames_housing():
136136
remainder='passthrough')
137137

138138

139-
###############################################################################
139+
# %%
140140
# Stack of predictors on a single data set
141-
###############################################################################
141+
##############################################################################
142142
#
143143
# It is sometimes tedious to find the model which will best perform on a given
144144
# dataset. Stacking provide an alternative by combining the outputs of several
@@ -181,9 +181,9 @@ def load_ames_housing():
181181
final_estimator=RidgeCV())
182182

183183

184-
###############################################################################
184+
# %%
185185
# Measure and plot the results
186-
###############################################################################
186+
##############################################################################
187187
#
188188
# Now we can use Ames Housing dataset to make the predictions. We check the
189189
# performance of each individual predictor as well as of the stack of the
@@ -250,7 +250,7 @@ def plot_regression_results(ax, y_true, y_pred, title, scores, elapsed_time):
250250
plt.subplots_adjust(top=0.9)
251251
plt.show()
252252

253-
###############################################################################
253+
# %%
254254
# The stacked regressor will combine the strengths of the different regressors.
255255
# However, we also see that training the stacked regressor is much more
256256
# computationally expensive.

dev/_downloads/0469b1db532e2049dcabff76dcfa3407/plot_cv_indices.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
cmap_cv = plt.cm.coolwarm
2323
n_splits = 4
2424

25-
###############################################################################
25+
# %%
2626
# Visualize our data
2727
# ------------------
2828
#
@@ -61,7 +61,7 @@ def visualize_groups(classes, groups, name):
6161

6262
visualize_groups(y, groups, 'no groups')
6363

64-
###############################################################################
64+
# %%
6565
# Define a function to visualize cross-validation behavior
6666
# --------------------------------------------------------
6767
#
@@ -102,15 +102,15 @@ def plot_cv_indices(cv, X, y, group, ax, n_splits, lw=10):
102102
return ax
103103

104104

105-
###############################################################################
105+
# %%
106106
# Let's see how it looks for the :class:`~sklearn.model_selection.KFold`
107107
# cross-validation object:
108108

109109
fig, ax = plt.subplots()
110110
cv = KFold(n_splits)
111111
plot_cv_indices(cv, X, y, groups, ax, n_splits)
112112

113-
###############################################################################
113+
# %%
114114
# As you can see, by default the KFold cross-validation iterator does not
115115
# take either datapoint class or group into consideration. We can change this
116116
# by using the ``StratifiedKFold`` like so.
@@ -119,7 +119,7 @@ def plot_cv_indices(cv, X, y, group, ax, n_splits, lw=10):
119119
cv = StratifiedKFold(n_splits)
120120
plot_cv_indices(cv, X, y, groups, ax, n_splits)
121121

122-
###############################################################################
122+
# %%
123123
# In this case, the cross-validation retained the same ratio of classes across
124124
# each CV split. Next we'll visualize this behavior for a number of CV
125125
# iterators.

dev/_downloads/0537f2a914bc9e8cc2d431935d152a79/plot_roc_curve_visualization_api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"""
1010
print(__doc__)
1111

12-
##############################################################################
12+
# %%
1313
# Load Data and Train a SVC
1414
# -------------------------
1515
# First, we load the wine dataset and convert it to a binary classification
@@ -28,7 +28,7 @@
2828
svc = SVC(random_state=42)
2929
svc.fit(X_train, y_train)
3030

31-
##############################################################################
31+
# %%
3232
# Plotting the ROC Curve
3333
# ----------------------
3434
# Next, we plot the ROC curve with a single call to
@@ -38,7 +38,7 @@
3838
svc_disp = plot_roc_curve(svc, X_test, y_test)
3939
plt.show()
4040

41-
##############################################################################
41+
# %%
4242
# Training a Random Forest and Plotting the ROC Curve
4343
# --------------------------------------------------------
4444
# We train a random forest classifier and create a plot comparing it to the SVC

dev/_downloads/08d5dd49f57f99e9638e3f76f11e28ac/plot_multi_metric_evaluation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
print(__doc__)
3131

32-
###############################################################################
32+
# %%
3333
# Running ``GridSearchCV`` using multiple evaluation metrics
3434
# ----------------------------------------------------------
3535
#
@@ -51,7 +51,7 @@
5151
gs.fit(X, y)
5252
results = gs.cv_results_
5353

54-
###############################################################################
54+
# %%
5555
# Plotting the result
5656
# -------------------
5757

dev/_downloads/0d59ba71a84b25ededa8e1298aed7cf2/plot_transformed_target.ipynb

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,7 @@
105105
"cell_type": "markdown",
106106
"metadata": {},
107107
"source": [
108-
"Real-world data set\n##############################################################################\n\n"
109-
]
110-
},
111-
{
112-
"cell_type": "markdown",
113-
"metadata": {},
114-
"source": [
115-
"In a similar manner, the Ames housing data set is used to show the impact\nof transforming the targets before learning a model. In this example, the\ntarget to be predicted is the selling price of each house.\n\n"
108+
"Real-world data set\n##############################################################################\n\n In a similar manner, the Ames housing data set is used to show the impact\n of transforming the targets before learning a model. In this example, the\n target to be predicted is the selling price of each house.\n\n"
116109
]
117110
},
118111
{
Binary file not shown.

dev/_downloads/34da82a8792cf79d06c7598ae139cc1c/plot_permutation_importance_multicollinear.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from sklearn.inspection import permutation_importance
3030
from sklearn.model_selection import train_test_split
3131

32-
##############################################################################
32+
# %%
3333
# Random Forest Feature Importance on Breast Cancer Data
3434
# ------------------------------------------------------
3535
# First, we train a random forest on the breast cancer dataset and evaluate
@@ -42,7 +42,7 @@
4242
clf.fit(X_train, y_train)
4343
print("Accuracy on test data: {:.2f}".format(clf.score(X_test, y_test)))
4444

45-
##############################################################################
45+
# %%
4646
# Next, we plot the tree based feature importance and the permutation
4747
# importance. The permutation importance plot shows that permuting a feature
4848
# drops the accuracy by at most `0.012`, which would suggest that none of the
@@ -68,7 +68,7 @@
6868
fig.tight_layout()
6969
plt.show()
7070

71-
##############################################################################
71+
# %%
7272
# Handling Multicollinear Features
7373
# --------------------------------
7474
# When features are collinear, permutating one feature will have little
@@ -92,7 +92,7 @@
9292
fig.tight_layout()
9393
plt.show()
9494

95-
##############################################################################
95+
# %%
9696
# Next, we manually pick a threshold by visual inspection of the dendrogram
9797
# to group our features into clusters and choose a feature from each cluster to
9898
# keep, select those features from our dataset, and train a new random forest.

dev/_downloads/388641d133587cc11aa26f2dbef4b950/plot_document_classification_20newsgroups.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def is_interactive():
9393
print()
9494

9595

96-
##############################################################################
96+
# %%
9797
# Load data from the training set
9898
# ------------------------------------
9999
# Let's load data from the newsgroups dataset which comprises around 18000
@@ -199,7 +199,7 @@ def trim(s):
199199
return s if len(s) <= 80 else s[:77] + "..."
200200

201201

202-
##############################################################################
202+
# %%
203203
# Benchmark classifiers
204204
# ------------------------------------
205205
# We train and test the datasets with 15 different classification models
@@ -297,7 +297,7 @@ def benchmark(clf):
297297
('classification', LinearSVC(penalty="l2"))])))
298298

299299

300-
##############################################################################
300+
# %%
301301
# Add plots
302302
# ------------------------------------
303303
# The bar plot indicates the accuracy, training time (normalized) and test time

dev/_downloads/418020d5fd9dd75fcb92704f51fc42db/plot_display_object_visualization.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
"""
1616
print(__doc__)
1717

18-
##############################################################################
18+
# %%
1919
# Load Data and train model
2020
# -------------------------
2121
# For this example, we load a blood transfusion service center data set from
@@ -35,7 +35,7 @@
3535
clf = make_pipeline(StandardScaler(), LogisticRegression(random_state=0))
3636
clf.fit(X_train, y_train)
3737

38-
##############################################################################
38+
# %%
3939
# Create :class:`ConfusionMatrixDisplay`
4040
##############################################################################
4141
# With the fitted model, we compute the predictions of the model on the test
@@ -50,7 +50,7 @@
5050
cm_display = ConfusionMatrixDisplay(cm).plot()
5151

5252

53-
##############################################################################
53+
# %%
5454
# Create :class:`RocCurveDisplay`
5555
##############################################################################
5656
# The roc curve requires either the probabilities or the non-thresholded
@@ -63,7 +63,7 @@
6363
fpr, tpr, _ = roc_curve(y_test, y_score, pos_label=clf.classes_[1])
6464
roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
6565

66-
##############################################################################
66+
# %%
6767
# Create :class:`PrecisionRecallDisplay`
6868
##############################################################################
6969
# Similarly, the precision recall curve can be plotted using `y_score` from
@@ -75,7 +75,7 @@
7575
pos_label=clf.classes_[1])
7676
pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot()
7777

78-
##############################################################################
78+
# %%
7979
# Combining the display objects into a single plot
8080
##############################################################################
8181
# The display objects store the computed values that were passed as arguments.

dev/_downloads/443833bbe6d96acdb0f3242b81ef41c3/plot_missing_values.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
# Authors: Maria Telenczuk <https://github.com/maikia>
3333
# License: BSD 3 clause
3434

35-
###############################################################################
35+
# %%
3636
# Download the data and make missing values sets
3737
################################################
3838
#
@@ -83,7 +83,7 @@ def add_missing_values(X_full, y_full):
8383
X_diabetes, y_diabetes)
8484

8585

86-
###############################################################################
86+
# %%
8787
# Impute the missing data and score
8888
# #################################
8989
# Now we will write a function which will score the results on the differently
@@ -104,7 +104,7 @@ def add_missing_values(X_full, y_full):
104104
N_SPLITS = 5
105105
regressor = RandomForestRegressor(random_state=0)
106106

107-
###############################################################################
107+
# %%
108108
# Missing information
109109
# -------------------
110110
# In addition to imputing the missing values, the imputers have an
@@ -132,7 +132,7 @@ def get_scores_for_imputer(imputer, X_missing, y_missing):
132132
mses_diabetes = np.zeros(5)
133133
stds_diabetes = np.zeros(5)
134134

135-
###############################################################################
135+
# %%
136136
# Estimate the score
137137
# ------------------
138138
# First, we want to estimate the score on the original data:
@@ -151,7 +151,7 @@ def get_full_score(X_full, y_full):
151151
mses_diabetes[0], stds_diabetes[0] = get_full_score(X_diabetes, y_diabetes)
152152

153153

154-
###############################################################################
154+
# %%
155155
# Replace missing values by 0
156156
# ---------------------------
157157
#
@@ -174,7 +174,7 @@ def get_impute_zero_score(X_missing, y_missing):
174174
y_miss_diabetes)
175175

176176

177-
###############################################################################
177+
# %%
178178
# kNN-imputation of the missing values
179179
# ------------------------------------
180180
#
@@ -193,7 +193,7 @@ def get_impute_knn_score(X_missing, y_missing):
193193
y_miss_diabetes)
194194

195195

196-
###############################################################################
196+
# %%
197197
# Impute missing values with mean
198198
# -------------------------------
199199
#
@@ -211,7 +211,7 @@ def get_impute_mean(X_missing, y_missing):
211211
y_miss_diabetes)
212212

213213

214-
###############################################################################
214+
# %%
215215
# Iterative imputation of the missing values
216216
# ------------------------------------------
217217
#
@@ -241,7 +241,7 @@ def get_impute_iterative(X_missing, y_missing):
241241
mses_diabetes = mses_diabetes * -1
242242
mses_california = mses_california * -1
243243

244-
###############################################################################
244+
# %%
245245
# Plot the results
246246
# ################
247247
#

0 commit comments

Comments
 (0)