linuxdevhub
diff --git a/‎0.22/.buildinfo
Lines changed: 4 additions & 0 deletions b/‎0.22/.buildinfo
Lines changed: 4 additions & 0 deletions
diff --git a/‎0.22/_downloads/00701bf1048deb8daeb5ad086596d260/plot_lasso_lars.ipynb
Lines changed: 54 additions & 0 deletions b/‎0.22/_downloads/00701bf1048deb8daeb5ad086596d260/plot_lasso_lars.ipynb
Lines changed: 54 additions & 0 deletions
diff --git a/‎0.22/_downloads/00727cbc15047062964b3f55fc4571b7/plot_label_propagation_digits_active_learning.ipynb
Lines changed: 54 additions & 0 deletions b/‎0.22/_downloads/00727cbc15047062964b3f55fc4571b7/plot_label_propagation_digits_active_learning.ipynb
Lines changed: 54 additions & 0 deletions
diff --git a/‎0.22/_downloads/00a5ddd24a9ad44708f4ab3b157ef0ff/plot_stack_predictors.py
Lines changed: 123 additions & 0 deletions b/‎0.22/_downloads/00a5ddd24a9ad44708f4ab3b157ef0ff/plot_stack_predictors.py
Lines changed: 123 additions & 0 deletions
diff --git a/‎0.22/_downloads/0119d6d5470878d65ff8709dfb187724/plot_dict_face_patches.py
Lines changed: 84 additions & 0 deletions b/‎0.22/_downloads/0119d6d5470878d65ff8709dfb187724/plot_dict_face_patches.py
Lines changed: 84 additions & 0 deletions
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 506805eb51c13bf5284500a7ba8b5be1
+tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Lasso path using LARS\n\n\nComputes Lasso Path along the regularization parameter using the LARS\nalgorithm on the diabetes dataset. Each color represents a different\nfeature of the coefficient vector, and this is displayed as a function\nof the regularization parameter.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "print(__doc__)\n\n# Author: Fabian Pedregosa <[email protected]>\n#         Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn import linear_model\nfrom sklearn import datasets\n\nX, y = datasets.load_diabetes(return_X_y=True)\n\nprint(\"Computing regularization path using the LARS ...\")\n_, _, coefs = linear_model.lars_path(X, y, method='lasso', verbose=True)\n\nxx = np.sum(np.abs(coefs.T), axis=1)\nxx /= xx[-1]\n\nplt.plot(xx, coefs.T)\nymin, ymax = plt.ylim()\nplt.vlines(xx, ymin, ymax, linestyle='dashed')\nplt.xlabel('|coef| / max|coef|')\nplt.ylabel('Coefficients')\nplt.title('LASSO Path')\nplt.axis('tight')\nplt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.5"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,54 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Label Propagation digits active learning\n\n\nDemonstrates an active learning technique to learn handwritten digits\nusing label propagation.\n\nWe start by training a label propagation model with only 10 labeled points,\nthen we select the top five most uncertain points to label. Next, we train\nwith 15 labeled points (original 10 + 5 new ones). We repeat this process\nfour times to have a model trained with 30 labeled examples. Note you can\nincrease this to label more than 30 by changing `max_iterations`. Labeling\nmore than 30 can be useful to get a sense for the speed of convergence of\nthis active learning technique.\n\nA plot will appear showing the top 5 most uncertain digits for each iteration\nof training. These may or may not contain mistakes, but we will train the next\nmodel with their true labels.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "print(__doc__)\n\n# Authors: Clay Woolam <[email protected]>\n# License: BSD\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom scipy import stats\n\nfrom sklearn import datasets\nfrom sklearn.semi_supervised import LabelSpreading\nfrom sklearn.metrics import classification_report, confusion_matrix\n\ndigits = datasets.load_digits()\nrng = np.random.RandomState(0)\nindices = np.arange(len(digits.data))\nrng.shuffle(indices)\n\nX = digits.data[indices[:330]]\ny = digits.target[indices[:330]]\nimages = digits.images[indices[:330]]\n\nn_total_samples = len(y)\nn_labeled_points = 40\nmax_iterations = 5\n\nunlabeled_indices = np.arange(n_total_samples)[n_labeled_points:]\nf = plt.figure()\n\nfor i in range(max_iterations):\n    if len(unlabeled_indices) == 0:\n        print(\"No unlabeled items left to label.\")\n        break\n    y_train = np.copy(y)\n    y_train[unlabeled_indices] = -1\n\n    lp_model = LabelSpreading(gamma=0.25, max_iter=20)\n    lp_model.fit(X, y_train)\n\n    predicted_labels = lp_model.transduction_[unlabeled_indices]\n    true_labels = y[unlabeled_indices]\n\n    cm = confusion_matrix(true_labels, predicted_labels,\n                          labels=lp_model.classes_)\n\n    print(\"Iteration %i %s\" % (i, 70 * \"_\"))\n    print(\"Label Spreading model: %d labeled & %d unlabeled (%d total)\"\n          % (n_labeled_points, n_total_samples - n_labeled_points,\n             n_total_samples))\n\n    print(classification_report(true_labels, predicted_labels))\n\n    print(\"Confusion matrix\")\n    print(cm)\n\n    # compute the entropies of transduced label distributions\n    pred_entropies = stats.distributions.entropy(\n        lp_model.label_distributions_.T)\n\n    # select up to 5 digit examples that the classifier is most uncertain about\n    uncertainty_index = np.argsort(pred_entropies)[::-1]\n    uncertainty_index = uncertainty_index[\n        np.in1d(uncertainty_index, unlabeled_indices)][:5]\n\n    # keep track of indices that we get labels for\n    delete_indices = np.array([], dtype=int)\n\n    # for more than 5 iterations, visualize the gain only on the first 5\n    if i < 5:\n        f.text(.05, (1 - (i + 1) * .183),\n               \"model %d\\n\\nfit with\\n%d labels\" %\n               ((i + 1), i * 5 + 10), size=10)\n    for index, image_index in enumerate(uncertainty_index):\n        image = images[image_index]\n\n        # for more than 5 iterations, visualize the gain only on the first 5\n        if i < 5:\n            sub = f.add_subplot(5, 5, index + 1 + (5 * i))\n            sub.imshow(image, cmap=plt.cm.gray_r, interpolation='none')\n            sub.set_title(\"predict: %i\\ntrue: %i\" % (\n                lp_model.transduction_[image_index], y[image_index]), size=10)\n            sub.axis('off')\n\n        # labeling 5 points, remote from labeled set\n        delete_index, = np.where(unlabeled_indices == image_index)\n        delete_indices = np.concatenate((delete_indices, delete_index))\n\n    unlabeled_indices = np.delete(unlabeled_indices, delete_indices)\n    n_labeled_points += len(uncertainty_index)\n\nf.suptitle(\"Active learning with Label Propagation.\\nRows show 5 most \"\n           \"uncertain labels to learn with the next model.\", y=1.15)\nplt.subplots_adjust(left=0.2, bottom=0.03, right=0.9, top=0.9, wspace=0.2,\n                    hspace=0.85)\nplt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.5"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,123 @@
+"""
+=================================
+Combine predictors using stacking
+=================================
+
+Stacking refers to a method to blend estimators. In this strategy, some
+estimators are individually fitted on some training data while a final
+estimator is trained using the stacked predictions of these base estimators.
+
+In this example, we illustrate the use case in which different regressors are
+stacked together and a final linear penalized regressor is used to output the
+prediction. We compare the performance of each individual regressor with the
+stacking strategy. Stacking slightly improves the overall performance.
+
+"""
+print(__doc__)
+
+# Authors: Guillaume Lemaitre <[email protected]>
+# License: BSD 3 clause
+
+###############################################################################
+# The function ``plot_regression_results`` is used to plot the predicted and
+# true targets.
+
+import matplotlib.pyplot as plt
+
+
+def plot_regression_results(ax, y_true, y_pred, title, scores, elapsed_time):
+    """Scatter plot of the predicted vs true targets."""
+    ax.plot([y_true.min(), y_true.max()],
+            [y_true.min(), y_true.max()],
+            '--r', linewidth=2)
+    ax.scatter(y_true, y_pred, alpha=0.2)
+
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.get_xaxis().tick_bottom()
+    ax.get_yaxis().tick_left()
+    ax.spines['left'].set_position(('outward', 10))
+    ax.spines['bottom'].set_position(('outward', 10))
+    ax.set_xlim([y_true.min(), y_true.max()])
+    ax.set_ylim([y_true.min(), y_true.max()])
+    ax.set_xlabel('Measured')
+    ax.set_ylabel('Predicted')
+    extra = plt.Rectangle((0, 0), 0, 0, fc="w", fill=False,
+                          edgecolor='none', linewidth=0)
+    ax.legend([extra], [scores], loc='upper left')
+    title = title + '\n Evaluation in {:.2f} seconds'.format(elapsed_time)
+    ax.set_title(title)
+
+
+###############################################################################
+# Stack of predictors on a single data set
+###############################################################################
+# It is sometimes tedious to find the model which will best perform on a given
+# dataset. Stacking provide an alternative by combining the outputs of several
+# learners, without the need to choose a model specifically. The performance of
+# stacking is usually close to the best model and sometimes it can outperform
+# the prediction performance of each individual model.
+#
+# Here, we combine 3 learners (linear and non-linear) and use a ridge regressor
+# to combine their outputs together.
+
+from sklearn.ensemble import StackingRegressor
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingRegressor
+from sklearn.linear_model import LassoCV
+from sklearn.linear_model import RidgeCV
+
+estimators = [
+    ('Random Forest', RandomForestRegressor(random_state=42)),
+    ('Lasso', LassoCV()),
+    ('Gradient Boosting', HistGradientBoostingRegressor(random_state=0))
+]
+stacking_regressor = StackingRegressor(
+    estimators=estimators, final_estimator=RidgeCV()
+)
+
+
+###############################################################################
+# We used the Boston data set (prediction of house prices). We check the
+# performance of each individual predictor as well as the stack of the
+# regressors.
+
+import time
+import numpy as np
+from sklearn.datasets import load_boston
+from sklearn.model_selection import cross_validate, cross_val_predict
+
+X, y = load_boston(return_X_y=True)
+
+fig, axs = plt.subplots(2, 2, figsize=(9, 7))
+axs = np.ravel(axs)
+
+for ax, (name, est) in zip(axs, estimators + [('Stacking Regressor',
+                                               stacking_regressor)]):
+    start_time = time.time()
+    score = cross_validate(est, X, y,
+                           scoring=['r2', 'neg_mean_absolute_error'],
+                           n_jobs=-1, verbose=0)
+    elapsed_time = time.time() - time.time()
+
+    y_pred = cross_val_predict(est, X, y, n_jobs=-1, verbose=0)
+    plot_regression_results(
+        ax, y, y_pred,
+        name,
+        (r'$R^2={:.2f} \pm {:.2f}$' + '\n' + r'$MAE={:.2f} \pm {:.2f}$')
+        .format(np.mean(score['test_r2']),
+                np.std(score['test_r2']),
+                -np.mean(score['test_neg_mean_absolute_error']),
+                np.std(score['test_neg_mean_absolute_error'])),
+        elapsed_time)
+
+plt.suptitle('Single predictors versus stacked predictors')
+plt.tight_layout()
+plt.subplots_adjust(top=0.9)
+plt.show()
+
+###############################################################################
+# The stacked regressor will combine the strengths of the different regressors.
+# However, we also see that training the stacked regressor is much more
+# computationally expensive.
@@ -0,0 +1,84 @@
+"""
+Online learning of a dictionary of parts of faces
+==================================================
+
+This example uses a large dataset of faces to learn a set of 20 x 20
+images patches that constitute faces.
+
+From the programming standpoint, it is interesting because it shows how
+to use the online API of the scikit-learn to process a very large
+dataset by chunks. The way we proceed is that we load an image at a time
+and extract randomly 50 patches from this image. Once we have accumulated
+500 of these patches (using 10 images), we run the
+:func:`~sklearn.cluster.MiniBatchKMeans.partial_fit` method
+of the online KMeans object, MiniBatchKMeans.
+
+The verbose setting on the MiniBatchKMeans enables us to see that some
+clusters are reassigned during the successive calls to
+partial-fit. This is because the number of patches that they represent
+has become too low, and it is better to choose a random new
+cluster.
+"""
+print(__doc__)
+
+import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+from sklearn import datasets
+from sklearn.cluster import MiniBatchKMeans
+from sklearn.feature_extraction.image import extract_patches_2d
+
+faces = datasets.fetch_olivetti_faces()
+
+# #############################################################################
+# Learn the dictionary of images
+
+print('Learning the dictionary... ')
+rng = np.random.RandomState(0)
+kmeans = MiniBatchKMeans(n_clusters=81, random_state=rng, verbose=True)
+patch_size = (20, 20)
+
+buffer = []
+t0 = time.time()
+
+# The online learning part: cycle over the whole dataset 6 times
+index = 0
+for _ in range(6):
+    for img in faces.images:
+        data = extract_patches_2d(img, patch_size, max_patches=50,
+                                  random_state=rng)
+        data = np.reshape(data, (len(data), -1))
+        buffer.append(data)
+        index += 1
+        if index % 10 == 0:
+            data = np.concatenate(buffer, axis=0)
+            data -= np.mean(data, axis=0)
+            data /= np.std(data, axis=0)
+            kmeans.partial_fit(data)
+            buffer = []
+        if index % 100 == 0:
+            print('Partial fit of %4i out of %i'
+                  % (index, 6 * len(faces.images)))
+
+dt = time.time() - t0
+print('done in %.2fs.' % dt)
+
+# #############################################################################
+# Plot the results
+plt.figure(figsize=(4.2, 4))
+for i, patch in enumerate(kmeans.cluster_centers_):
+    plt.subplot(9, 9, i + 1)
+    plt.imshow(patch.reshape(patch_size), cmap=plt.cm.gray,
+               interpolation='nearest')
+    plt.xticks(())
+    plt.yticks(())
+
+
+plt.suptitle('Patches of faces\nTrain time %.1fs on %d patches' %
+             (dt, 8 * len(faces.images)), fontsize=16)
+plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
+
+plt.show()