Skip to content

Commit f601d5b

Browse files
committed
Pushing the docs to dev/ for branch: main, commit 2d9fa48aafa8e43304b7fd2b0cb76eabc48806ef
1 parent 54df66d commit f601d5b

File tree

1,319 files changed

+5785
-5780
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,319 files changed

+5785
-5780
lines changed

dev/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 64552944cc0ffe4efdd50963e6027564
3+
config: a0f54c044cc4c39c7016296a66a9f00f
44
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.

dev/_downloads/15dc6d7a809edf988a7328336a25faec/plot_column_transformer.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Author: Matt Terry <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\n\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.datasets import fetch_20newsgroups\nfrom sklearn.decomposition import TruncatedSVD\nfrom sklearn.feature_extraction import DictVectorizer\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.metrics import classification_report\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import FunctionTransformer\nfrom sklearn.svm import LinearSVC"
18+
"# Author: Matt Terry <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\n\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.datasets import fetch_20newsgroups\nfrom sklearn.decomposition import PCA\nfrom sklearn.feature_extraction import DictVectorizer\nfrom sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.metrics import classification_report\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.preprocessing import FunctionTransformer\nfrom sklearn.svm import LinearSVC"
1919
]
2020
},
2121
{
@@ -105,7 +105,7 @@
105105
},
106106
"outputs": [],
107107
"source": [
108-
"pipeline = Pipeline(\n [\n # Extract subject & body\n (\"subjectbody\", subject_body_transformer),\n # Use ColumnTransformer to combine the subject and body features\n (\n \"union\",\n ColumnTransformer(\n [\n # bag-of-words for subject (col 0)\n (\"subject\", TfidfVectorizer(min_df=50), 0),\n # bag-of-words with decomposition for body (col 1)\n (\n \"body_bow\",\n Pipeline(\n [\n (\"tfidf\", TfidfVectorizer()),\n (\"best\", TruncatedSVD(n_components=50)),\n ]\n ),\n 1,\n ),\n # Pipeline for pulling text stats from post's body\n (\n \"body_stats\",\n Pipeline(\n [\n (\n \"stats\",\n text_stats_transformer,\n ), # returns a list of dicts\n (\n \"vect\",\n DictVectorizer(),\n ), # list of dicts -> feature matrix\n ]\n ),\n 1,\n ),\n ],\n # weight above ColumnTransformer features\n transformer_weights={\n \"subject\": 0.8,\n \"body_bow\": 0.5,\n \"body_stats\": 1.0,\n },\n ),\n ),\n # Use a SVC classifier on the combined features\n (\"svc\", LinearSVC(dual=False)),\n ],\n verbose=True,\n)"
108+
"pipeline = Pipeline(\n [\n # Extract subject & body\n (\"subjectbody\", subject_body_transformer),\n # Use ColumnTransformer to combine the subject and body features\n (\n \"union\",\n ColumnTransformer(\n [\n # bag-of-words for subject (col 0)\n (\"subject\", TfidfVectorizer(min_df=50), 0),\n # bag-of-words with decomposition for body (col 1)\n (\n \"body_bow\",\n Pipeline(\n [\n (\"tfidf\", TfidfVectorizer()),\n (\"best\", PCA(n_components=50, svd_solver=\"arpack\")),\n ]\n ),\n 1,\n ),\n # Pipeline for pulling text stats from post's body\n (\n \"body_stats\",\n Pipeline(\n [\n (\n \"stats\",\n text_stats_transformer,\n ), # returns a list of dicts\n (\n \"vect\",\n DictVectorizer(),\n ), # list of dicts -> feature matrix\n ]\n ),\n 1,\n ),\n ],\n # weight above ColumnTransformer features\n transformer_weights={\n \"subject\": 0.8,\n \"body_bow\": 0.5,\n \"body_stats\": 1.0,\n },\n ),\n ),\n # Use a SVC classifier on the combined features\n (\"svc\", LinearSVC(dual=False)),\n ],\n verbose=True,\n)"
109109
]
110110
},
111111
{

dev/_downloads/3e8abcbcde21489054beb05cb87da525/plot_column_transformer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
from sklearn.compose import ColumnTransformer
2828
from sklearn.datasets import fetch_20newsgroups
29-
from sklearn.decomposition import TruncatedSVD
29+
from sklearn.decomposition import PCA
3030
from sklearn.feature_extraction import DictVectorizer
3131
from sklearn.feature_extraction.text import TfidfVectorizer
3232
from sklearn.metrics import classification_report
@@ -141,7 +141,7 @@ def text_stats(posts):
141141
Pipeline(
142142
[
143143
("tfidf", TfidfVectorizer()),
144-
("best", TruncatedSVD(n_components=50)),
144+
("best", PCA(n_components=50, svd_solver="arpack")),
145145
]
146146
),
147147
1,
Binary file not shown.

dev/_downloads/scikit-learn-docs.zip

-18 KB
Binary file not shown.

0 commit comments

Comments
 (0)