Thayakaran
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
-344 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
-344 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
-339 Bytes b/‎dev/_downloads/auto_examples_python.zip
-339 Bytes
diff --git a/‎dev/_downloads/plot_compare_calibration.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_compare_calibration.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_compare_calibration.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_compare_calibration.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_document_classification_20newsgroups.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_document_classification_20newsgroups.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_document_classification_20newsgroups.py
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_document_classification_20newsgroups.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_ensemble_oob.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_ensemble_oob.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_ensemble_oob.py
Lines changed: 3 additions & 6 deletions b/‎dev/_downloads/plot_ensemble_oob.py
Lines changed: 3 additions & 6 deletions
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nnp.random.seed(0)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import LinearSVC\nfrom sklearn.calibration import calibration_curve\n\nX, y = datasets.make_classification(n_samples=100000, n_features=20,\n                                    n_informative=2, n_redundant=2)\n\ntrain_samples = 100  # Samples used for training the models\n\nX_train = X[:train_samples]\nX_test = X[train_samples:]\ny_train = y[:train_samples]\ny_test = y[train_samples:]\n\n# Create classifiers\nlr = LogisticRegression()\ngnb = GaussianNB()\nsvc = LinearSVC(C=1.0)\nrfc = RandomForestClassifier(n_estimators=100)\n\n\n# #############################################################################\n# Plot calibration plots\n\nplt.figure(figsize=(10, 10))\nax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)\nax2 = plt.subplot2grid((3, 1), (2, 0))\n\nax1.plot([0, 1], [0, 1], \"k:\", label=\"Perfectly calibrated\")\nfor clf, name in [(lr, 'Logistic'),\n                  (gnb, 'Naive Bayes'),\n                  (svc, 'Support Vector Classification'),\n                  (rfc, 'Random Forest')]:\n    clf.fit(X_train, y_train)\n    if hasattr(clf, \"predict_proba\"):\n        prob_pos = clf.predict_proba(X_test)[:, 1]\n    else:  # use decision function\n        prob_pos = clf.decision_function(X_test)\n        prob_pos = \\\n            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())\n    fraction_of_positives, mean_predicted_value = \\\n        calibration_curve(y_test, prob_pos, n_bins=10)\n\n    ax1.plot(mean_predicted_value, fraction_of_positives, \"s-\",\n             label=\"%s\" % (name, ))\n\n    ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,\n             histtype=\"step\", lw=2)\n\nax1.set_ylabel(\"Fraction of positives\")\nax1.set_ylim([-0.05, 1.05])\nax1.legend(loc=\"lower right\")\nax1.set_title('Calibration plots  (reliability curve)')\n\nax2.set_xlabel(\"Mean predicted value\")\nax2.set_ylabel(\"Count\")\nax2.legend(loc=\"upper center\", ncol=2)\n\nplt.tight_layout()\nplt.show()"
+        "print(__doc__)\n\n# Author: Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nnp.random.seed(0)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import LinearSVC\nfrom sklearn.calibration import calibration_curve\n\nX, y = datasets.make_classification(n_samples=100000, n_features=20,\n                                    n_informative=2, n_redundant=2)\n\ntrain_samples = 100  # Samples used for training the models\n\nX_train = X[:train_samples]\nX_test = X[train_samples:]\ny_train = y[:train_samples]\ny_test = y[train_samples:]\n\n# Create classifiers\nlr = LogisticRegression()\ngnb = GaussianNB()\nsvc = LinearSVC(C=1.0)\nrfc = RandomForestClassifier()\n\n\n# #############################################################################\n# Plot calibration plots\n\nplt.figure(figsize=(10, 10))\nax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)\nax2 = plt.subplot2grid((3, 1), (2, 0))\n\nax1.plot([0, 1], [0, 1], \"k:\", label=\"Perfectly calibrated\")\nfor clf, name in [(lr, 'Logistic'),\n                  (gnb, 'Naive Bayes'),\n                  (svc, 'Support Vector Classification'),\n                  (rfc, 'Random Forest')]:\n    clf.fit(X_train, y_train)\n    if hasattr(clf, \"predict_proba\"):\n        prob_pos = clf.predict_proba(X_test)[:, 1]\n    else:  # use decision function\n        prob_pos = clf.decision_function(X_test)\n        prob_pos = \\\n            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())\n    fraction_of_positives, mean_predicted_value = \\\n        calibration_curve(y_test, prob_pos, n_bins=10)\n\n    ax1.plot(mean_predicted_value, fraction_of_positives, \"s-\",\n             label=\"%s\" % (name, ))\n\n    ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,\n             histtype=\"step\", lw=2)\n\nax1.set_ylabel(\"Fraction of positives\")\nax1.set_ylim([-0.05, 1.05])\nax1.legend(loc=\"lower right\")\nax1.set_title('Calibration plots  (reliability curve)')\n\nax2.set_xlabel(\"Mean predicted value\")\nax2.set_ylabel(\"Count\")\nax2.legend(loc=\"upper center\", ncol=2)\n\nplt.tight_layout()\nplt.show()"
       ]
     }
   ],
 
@@ -78,7 +78,7 @@
 lr = LogisticRegression()
 gnb = GaussianNB()
 svc = LinearSVC(C=1.0)
-rfc = RandomForestClassifier(n_estimators=100)
+rfc = RandomForestClassifier()
 
 
 # #############################################################################
 
@@ -251,7 +251,7 @@ def benchmark(clf):
         (PassiveAggressiveClassifier(max_iter=50, tol=1e-3),
          "Passive-Aggressive"),
         (KNeighborsClassifier(n_neighbors=10), "kNN"),
-        (RandomForestClassifier(n_estimators=100), "Random forest")):
+        (RandomForestClassifier(), "Random forest")):
     print('=' * 80)
     print(name)
     results.append(benchmark(clf))
 
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "import matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\n# Author: Kian Ho <[email protected]>\n#         Gilles Louppe <[email protected]>\n#         Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nprint(__doc__)\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(n_samples=500, n_features=25,\n                           n_clusters_per_class=1, n_informative=15,\n                           random_state=RANDOM_STATE)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n    (\"RandomForestClassifier, max_features='sqrt'\",\n        RandomForestClassifier(n_estimators=100,\n                               warm_start=True, oob_score=True,\n                               max_features=\"sqrt\",\n                               random_state=RANDOM_STATE)),\n    (\"RandomForestClassifier, max_features='log2'\",\n        RandomForestClassifier(n_estimators=100,\n                               warm_start=True, max_features='log2',\n                               oob_score=True,\n                               random_state=RANDOM_STATE)),\n    (\"RandomForestClassifier, max_features=None\",\n        RandomForestClassifier(n_estimators=100,\n                               warm_start=True, max_features=None,\n                               oob_score=True,\n                               random_state=RANDOM_STATE))\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n    for i in range(min_estimators, max_estimators + 1):\n        clf.set_params(n_estimators=i)\n        clf.fit(X, y)\n\n        # Record the OOB error for each `n_estimators=i` setting.\n        oob_error = 1 - clf.oob_score_\n        error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n    xs, ys = zip(*clf_err)\n    plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
+        "import matplotlib.pyplot as plt\n\nfrom collections import OrderedDict\nfrom sklearn.datasets import make_classification\nfrom sklearn.ensemble import RandomForestClassifier\n\n# Author: Kian Ho <[email protected]>\n#         Gilles Louppe <[email protected]>\n#         Andreas Mueller <[email protected]>\n#\n# License: BSD 3 Clause\n\nprint(__doc__)\n\nRANDOM_STATE = 123\n\n# Generate a binary classification dataset.\nX, y = make_classification(n_samples=500, n_features=25,\n                           n_clusters_per_class=1, n_informative=15,\n                           random_state=RANDOM_STATE)\n\n# NOTE: Setting the `warm_start` construction parameter to `True` disables\n# support for parallelized ensembles but is necessary for tracking the OOB\n# error trajectory during training.\nensemble_clfs = [\n    (\"RandomForestClassifier, max_features='sqrt'\",\n        RandomForestClassifier(warm_start=True, oob_score=True,\n                               max_features=\"sqrt\",\n                               random_state=RANDOM_STATE)),\n    (\"RandomForestClassifier, max_features='log2'\",\n        RandomForestClassifier(warm_start=True, max_features='log2',\n                               oob_score=True,\n                               random_state=RANDOM_STATE)),\n    (\"RandomForestClassifier, max_features=None\",\n        RandomForestClassifier(warm_start=True, max_features=None,\n                               oob_score=True,\n                               random_state=RANDOM_STATE))\n]\n\n# Map a classifier name to a list of (<n_estimators>, <error rate>) pairs.\nerror_rate = OrderedDict((label, []) for label, _ in ensemble_clfs)\n\n# Range of `n_estimators` values to explore.\nmin_estimators = 15\nmax_estimators = 175\n\nfor label, clf in ensemble_clfs:\n    for i in range(min_estimators, max_estimators + 1):\n        clf.set_params(n_estimators=i)\n        clf.fit(X, y)\n\n        # Record the OOB error for each `n_estimators=i` setting.\n        oob_error = 1 - clf.oob_score_\n        error_rate[label].append((i, oob_error))\n\n# Generate the \"OOB error rate\" vs. \"n_estimators\" plot.\nfor label, clf_err in error_rate.items():\n    xs, ys = zip(*clf_err)\n    plt.plot(xs, ys, label=label)\n\nplt.xlim(min_estimators, max_estimators)\nplt.xlabel(\"n_estimators\")\nplt.ylabel(\"OOB error rate\")\nplt.legend(loc=\"upper right\")\nplt.show()"
       ]
     }
   ],
 
@@ -45,18 +45,15 @@
 # error trajectory during training.
 ensemble_clfs = [
     ("RandomForestClassifier, max_features='sqrt'",
-        RandomForestClassifier(n_estimators=100,
-                               warm_start=True, oob_score=True,
+        RandomForestClassifier(warm_start=True, oob_score=True,
                                max_features="sqrt",
                                random_state=RANDOM_STATE)),
     ("RandomForestClassifier, max_features='log2'",
-        RandomForestClassifier(n_estimators=100,
-                               warm_start=True, max_features='log2',
+        RandomForestClassifier(warm_start=True, max_features='log2',
                                oob_score=True,
                                random_state=RANDOM_STATE)),
     ("RandomForestClassifier, max_features=None",
-        RandomForestClassifier(n_estimators=100,
-                               warm_start=True, max_features=None,
+        RandomForestClassifier(warm_start=True, max_features=None,
                                oob_score=True,
                                random_state=RANDOM_STATE))
 ]
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n# Author: Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nnp.random.seed(0)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import LinearSVC\nfrom sklearn.calibration import calibration_curve\n\nX, y = datasets.make_classification(n_samples=100000, n_features=20,\n n_informative=2, n_redundant=2)\n\ntrain_samples = 100 # Samples used for training the models\n\nX_train = X[:train_samples]\nX_test = X[train_samples:]\ny_train = y[:train_samples]\ny_test = y[train_samples:]\n\n# Create classifiers\nlr = LogisticRegression()\ngnb = GaussianNB()\nsvc = LinearSVC(C=1.0)\nrfc = RandomForestClassifier(n_estimators=100)\n\n\n# #############################################################################\n# Plot calibration plots\n\nplt.figure(figsize=(10, 10))\nax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)\nax2 = plt.subplot2grid((3, 1), (2, 0))\n\nax1.plot([0, 1], [0, 1], \"k:\", label=\"Perfectly calibrated\")\nfor clf, name in [(lr, 'Logistic'),\n (gnb, 'Naive Bayes'),\n (svc, 'Support Vector Classification'),\n (rfc, 'Random Forest')]:\n clf.fit(X_train, y_train)\n if hasattr(clf, \"predict_proba\"):\n prob_pos = clf.predict_proba(X_test)[:, 1]\n else: # use decision function\n prob_pos = clf.decision_function(X_test)\n prob_pos = \\\n (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())\n fraction_of_positives, mean_predicted_value = \\\n calibration_curve(y_test, prob_pos, n_bins=10)\n\n ax1.plot(mean_predicted_value, fraction_of_positives, \"s-\",\n label=\"%s\" % (name, ))\n\n ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,\n histtype=\"step\", lw=2)\n\nax1.set_ylabel(\"Fraction of positives\")\nax1.set_ylim([-0.05, 1.05])\nax1.legend(loc=\"lower right\")\nax1.set_title('Calibration plots (reliability curve)')\n\nax2.set_xlabel(\"Mean predicted value\")\nax2.set_ylabel(\"Count\")\nax2.legend(loc=\"upper center\", ncol=2)\n\nplt.tight_layout()\nplt.show()"
	`29`	+ "print(__doc__)\n\n# Author: Jan Hendrik Metzen <[email protected]>\n# License: BSD Style.\n\nimport numpy as np\nnp.random.seed(0)\n\nimport matplotlib.pyplot as plt\n\nfrom sklearn import datasets\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.svm import LinearSVC\nfrom sklearn.calibration import calibration_curve\n\nX, y = datasets.make_classification(n_samples=100000, n_features=20,\n n_informative=2, n_redundant=2)\n\ntrain_samples = 100 # Samples used for training the models\n\nX_train = X[:train_samples]\nX_test = X[train_samples:]\ny_train = y[:train_samples]\ny_test = y[train_samples:]\n\n# Create classifiers\nlr = LogisticRegression()\ngnb = GaussianNB()\nsvc = LinearSVC(C=1.0)\nrfc = RandomForestClassifier()\n\n\n# #############################################################################\n# Plot calibration plots\n\nplt.figure(figsize=(10, 10))\nax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)\nax2 = plt.subplot2grid((3, 1), (2, 0))\n\nax1.plot([0, 1], [0, 1], \"k:\", label=\"Perfectly calibrated\")\nfor clf, name in [(lr, 'Logistic'),\n (gnb, 'Naive Bayes'),\n (svc, 'Support Vector Classification'),\n (rfc, 'Random Forest')]:\n clf.fit(X_train, y_train)\n if hasattr(clf, \"predict_proba\"):\n prob_pos = clf.predict_proba(X_test)[:, 1]\n else: # use decision function\n prob_pos = clf.decision_function(X_test)\n prob_pos = \\\n (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())\n fraction_of_positives, mean_predicted_value = \\\n calibration_curve(y_test, prob_pos, n_bins=10)\n\n ax1.plot(mean_predicted_value, fraction_of_positives, \"s-\",\n label=\"%s\" % (name, ))\n\n ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,\n histtype=\"step\", lw=2)\n\nax1.set_ylabel(\"Fraction of positives\")\nax1.set_ylim([-0.05, 1.05])\nax1.legend(loc=\"lower right\")\nax1.set_title('Calibration plots (reliability curve)')\n\nax2.set_xlabel(\"Mean predicted value\")\nax2.set_ylabel(\"Count\")\nax2.legend(loc=\"upper center\", ncol=2)\n\nplt.tight_layout()\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`