linuxdevhub
diff --git a/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
4.54 KB b/‎dev/_downloads/3409d9766d352cc9f9b169d4a799a87a/auto_examples_python.zip
4.54 KB
diff --git a/‎dev/_downloads/7ee55c12f8d3eb1dd8d2005d9dd7b6f1/plot_release_highlights_0_22_0.py
Lines changed: 119 additions & 0 deletions b/‎dev/_downloads/7ee55c12f8d3eb1dd8d2005d9dd7b6f1/plot_release_highlights_0_22_0.py
Lines changed: 119 additions & 0 deletions
diff --git a/‎dev/_downloads/c101b602d0b3510ef47dd19d64a4a92b/plot_release_highlights_0_22_0.ipynb
Lines changed: 133 additions & 0 deletions b/‎dev/_downloads/c101b602d0b3510ef47dd19d64a4a92b/plot_release_highlights_0_22_0.ipynb
Lines changed: 133 additions & 0 deletions
diff --git a/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
6.38 KB b/‎dev/_downloads/d34667f097c619f8afda4bc936e7af21/auto_examples_jupyter.zip
6.38 KB
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
35.4 KB b/‎dev/_downloads/scikit-learn-docs.pdf
35.4 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-268 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-268 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-268 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-268 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-290 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-290 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-290 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-290 Bytes
@@ -0,0 +1,119 @@
+"""
+========================================
+Release Highlights for scikit-learn 0.22
+========================================
+
+We are pleased to announce the release of scikit-learn 0.22, which comes
+with many bug fixes and new features! We detail below a few of the major
+features of this release. For an exhaustive list of all the changes, please
+refer to the :ref:`release notes <changes_0_22>`.
+
+To install the latest version (with pip)::
+
+    pip install -U scikit-learn --upgrade
+
+or with conda::
+
+    conda install scikit-learn
+"""
+
+##############################################################################
+# Permutation-based feature importance
+# ------------------------------------
+#
+# The :func:`~sklearn.inspection.permutation_importance` can be used to get an
+# estimate of the importance of each feature, for any fitted estimator:
+
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.datasets import make_classification
+from sklearn.inspection import permutation_importance
+import matplotlib.pyplot as plt
+
+X, y = make_classification(random_state=0, n_features=5, n_informative=3)
+rf = RandomForestClassifier(random_state=0).fit(X, y)
+result = permutation_importance(rf, X, y, n_repeats=10, random_state=0,
+                                n_jobs=-1)
+
+fig, ax = plt.subplots()
+sorted_idx = result.importances_mean.argsort()
+ax.boxplot(result.importances[sorted_idx].T,
+           vert=False, labels=range(X.shape[1]))
+ax.set_title("Permutation Importance of each feature")
+ax.set_ylabel("Features")
+fig.tight_layout()
+plt.show()
+
+##############################################################################
+# Native support for missing values for gradient boosting
+# -------------------------------------------------------
+#
+# The :class:`~sklearn.ensemble.HistGradientBoostingClassifier`
+# and :class:`~sklearn.ensemble.HistGradientBoostingRegressor` now have native
+# support for missing values (NaNs). This means that there is no need for
+# imputing data when training or predicting.
+
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingClassifier
+import numpy as np
+
+X = np.array([0, 1, 2, np.nan]).reshape(-1, 1)
+y = [0, 0, 1, 1]
+
+gbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y)
+print(gbdt.predict(X))
+
+##############################################################################
+# New plotting API
+# ----------------
+#
+# A new plotting API is available for creating visualizations. This new API
+# allows for quickly adjusting the visuals of a plot without involving any
+# recomputation. It is also possible to add different plots to the same
+# figure. See more examples in the :ref:`User Guide <visualizations>`.
+
+from sklearn.model_selection import train_test_split
+from sklearn.svm import SVC
+from sklearn.metrics import plot_roc_curve
+
+X, y = make_classification(random_state=0)
+X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+
+svc = SVC(random_state=42)
+svc.fit(X_train, y_train)
+rfc = RandomForestClassifier(random_state=42)
+rfc.fit(X_train, y_train)
+
+svc_disp = plot_roc_curve(svc, X_test, y_test)
+rfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=svc_disp.ax_)
+rfc_disp.figure_.suptitle("ROC curve comparison")
+
+plt.show()
+
+#############################################################################
+# Tree pruning
+# ------------
+#
+# It is now possible to prune most tree-based estimators once the trees are
+# built. The pruning is based on minimal cost-complexity. Read more in the
+# :ref:`User Guide <minimal_cost_complexity_pruning>` for details.
+
+X, y = make_classification(random_state=0)
+
+rf = RandomForestClassifier(random_state=0, ccp_alpha=0).fit(X, y)
+print("Average number of nodes without pruning {:.1f}".format(
+    np.mean([e.tree_.node_count for e in rf.estimators_])))
+
+rf = RandomForestClassifier(random_state=0, ccp_alpha=0.05).fit(X, y)
+print("Average number of nodes with pruning {:.1f}".format(
+    np.mean([e.tree_.node_count for e in rf.estimators_])))
+
+############################################################################
+# Retrieve dataframes from OpenML
+# -------------------------------
+# :func:`datasets.fetch_openml` can now return pandas dataframe and thus
+# properly handle datasets with heterogeneous data:
+
+from sklearn.datasets import fetch_openml
+
+titanic = fetch_openml('titanic', version=1, as_frame=True)
+print(titanic.data.head()[['pclass', 'embarked']])
@@ -0,0 +1,133 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n========================================\nRelease Highlights for scikit-learn 0.22\n========================================\n\nWe are pleased to announce the release of scikit-learn 0.22, which comes\nwith many bug fixes and new features! We detail below a few of the major\nfeatures of this release. For an exhaustive list of all the changes, please\nrefer to the `release notes <changes_0_22>`.\n\nTo install the latest version (with pip)::\n\n    pip install -U scikit-learn --upgrade\n\nor with conda::\n\n    conda install scikit-learn\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Permutation-based feature importance\n------------------------------------\n\nThe :func:`~sklearn.inspection.permutation_importance` can be used to get an\nestimate of the importance of each feature, for any fitted estimator:\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.ensemble import RandomForestClassifier\nfrom sklearn.datasets import make_classification\nfrom sklearn.inspection import permutation_importance\nimport matplotlib.pyplot as plt\n\nX, y = make_classification(random_state=0, n_features=5, n_informative=3)\nrf = RandomForestClassifier(random_state=0).fit(X, y)\nresult = permutation_importance(rf, X, y, n_repeats=10, random_state=0,\n                                n_jobs=-1)\n\nfig, ax = plt.subplots()\nsorted_idx = result.importances_mean.argsort()\nax.boxplot(result.importances[sorted_idx].T,\n           vert=False, labels=range(X.shape[1]))\nax.set_title(\"Permutation Importance of each feature\")\nax.set_ylabel(\"Features\")\nfig.tight_layout()\nplt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Native support for missing values for gradient boosting\n-------------------------------------------------------\n\nThe :class:`~sklearn.ensemble.HistGradientBoostingClassifier`\nand :class:`~sklearn.ensemble.HistGradientBoostingRegressor` now have native\nsupport for missing values (NaNs). This means that there is no need for\nimputing data when training or predicting.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.experimental import enable_hist_gradient_boosting  # noqa\nfrom sklearn.ensemble import HistGradientBoostingClassifier\nimport numpy as np\n\nX = np.array([0, 1, 2, np.nan]).reshape(-1, 1)\ny = [0, 0, 1, 1]\n\ngbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y)\nprint(gbdt.predict(X))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "New plotting API\n----------------\n\nA new plotting API is available for creating visualizations. This new API\nallows for quickly adjusting the visuals of a plot without involving any\nrecomputation. It is also possible to add different plots to the same\nfigure. See more examples in the `User Guide <visualizations>`.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.model_selection import train_test_split\nfrom sklearn.svm import SVC\nfrom sklearn.metrics import plot_roc_curve\n\nX, y = make_classification(random_state=0)\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)\n\nsvc = SVC(random_state=42)\nsvc.fit(X_train, y_train)\nrfc = RandomForestClassifier(random_state=42)\nrfc.fit(X_train, y_train)\n\nsvc_disp = plot_roc_curve(svc, X_test, y_test)\nrfc_disp = plot_roc_curve(rfc, X_test, y_test, ax=svc_disp.ax_)\nrfc_disp.figure_.suptitle(\"ROC curve comparison\")\n\nplt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Tree pruning\n------------\n\nIt is now possible to prune most tree-based estimators once the trees are\nbuilt. The pruning is based on minimal cost-complexity. Read more in the\n`User Guide <minimal_cost_complexity_pruning>` for details.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "X, y = make_classification(random_state=0)\n\nrf = RandomForestClassifier(random_state=0, ccp_alpha=0).fit(X, y)\nprint(\"Average number of nodes without pruning {:.1f}\".format(\n    np.mean([e.tree_.node_count for e in rf.estimators_])))\n\nrf = RandomForestClassifier(random_state=0, ccp_alpha=0.05).fit(X, y)\nprint(\"Average number of nodes with pruning {:.1f}\".format(\n    np.mean([e.tree_.node_count for e in rf.estimators_])))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Retrieve dataframes from OpenML\n-------------------------------\n:func:`datasets.fetch_openml` can now return pandas dataframe and thus\nproperly handle datasets with heterogeneous data:\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.datasets import fetch_openml\n\ntitanic = fetch_openml('titanic', version=1, as_frame=True)\nprint(titanic.data.head()[['pclass', 'embarked']])"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.4"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}