codeur66
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
38 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
38 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
38 Bytes b/‎dev/_downloads/auto_examples_python.zip
38 Bytes
diff --git a/‎dev/_downloads/plot_column_transformer_mixed_types.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_column_transformer_mixed_types.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_column_transformer_mixed_types.py
Lines changed: 7 additions & 7 deletions b/‎dev/_downloads/plot_column_transformer_mixed_types.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
5.14 KB b/‎dev/_downloads/scikit-learn-docs.pdf
5.14 KB
diff --git a/‎dev/_images/iris.png
0 Bytes b/‎dev/_images/iris.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
98 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
98 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
98 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
98 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
0 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
0 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
0 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
0 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Pedro Morales <[email protected]>\n#\n# License: BSD 3 clause\n\nimport pandas as pd\nimport numpy as np\n\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.preprocessing import StandardScaler, OneHotEncoder\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split, GridSearchCV\n\nnp.random.seed(0)\n\n# Read data from Titanic dataset.\ntitanic_url = ('https://raw.githubusercontent.com/amueller/'\n               'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')\ndata = pd.read_csv(titanic_url)\n\n# We will train our classifier with the following features:\n# Numeric Features:\n# - age: float.\n# - fare: float.\n# Categorical Features:\n# - embarked: categories encoded as strings {'C', 'S', 'Q'}.\n# - sex: categories encoded as strings {'female', 'male'}.\n# - pclass: ordinal integers {1, 2, 3}.\n\n# We create the preprocessing pipelines for both numeric and categorical data.\nnumeric_features = ['age', 'fare']\nnumeric_transformer = Pipeline(steps=[\n    ('imputer', SimpleImputer(strategy='median')),\n    ('scaler', StandardScaler())])\n\ncategorical_features = ['embarked', 'sex', 'pclass']\ncategorical_transformer = Pipeline(steps=[\n    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n    ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n\npreprocessor = ColumnTransformer(\n    transformers=[\n        ('num', numeric_transformer, numeric_features),\n        ('cat', categorical_transformer, categorical_features)])\n\n# Append classifier to preprocessing pipeline.\n# Now we have a full prediction pipeline.\nclf = Pipeline(steps=[('preprocessor', preprocessor),\n                      ('classifier', LogisticRegression())])\n\nX = data.drop('survived', axis=1)\ny = data['survived']\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n\nclf.fit(X_train, y_train)\nprint(\"model score: %.3f\" % clf.score(X_test, y_test))"
+        "# Author: Pedro Morales <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\n\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.preprocessing import StandardScaler, OneHotEncoder\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split, GridSearchCV\n\nnp.random.seed(0)\n\n# Read data from Titanic dataset.\ntitanic = fetch_openml(data_id=40945, as_frame=True)\nX, y = titanic.data, titanic.target\n\n# Alternatively X and y can be obtained directly from the frame attribute:\n# X = titanic.frame.drop('survived', axis=1)\n# y = titanic.frame['survived']\n\n# We will train our classifier with the following features:\n# Numeric Features:\n# - age: float.\n# - fare: float.\n# Categorical Features:\n# - embarked: categories encoded as strings {'C', 'S', 'Q'}.\n# - sex: categories encoded as strings {'female', 'male'}.\n# - pclass: ordinal integers {1, 2, 3}.\n\n# We create the preprocessing pipelines for both numeric and categorical data.\nnumeric_features = ['age', 'fare']\nnumeric_transformer = Pipeline(steps=[\n    ('imputer', SimpleImputer(strategy='median')),\n    ('scaler', StandardScaler())])\n\ncategorical_features = ['embarked', 'sex', 'pclass']\ncategorical_transformer = Pipeline(steps=[\n    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n    ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n\npreprocessor = ColumnTransformer(\n    transformers=[\n        ('num', numeric_transformer, numeric_features),\n        ('cat', categorical_transformer, categorical_features)])\n\n# Append classifier to preprocessing pipeline.\n# Now we have a full prediction pipeline.\nclf = Pipeline(steps=[('preprocessor', preprocessor),\n                      ('classifier', LogisticRegression())])\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n\nclf.fit(X_train, y_train)\nprint(\"model score: %.3f\" % clf.score(X_test, y_test))"
       ]
     },
     {
 
@@ -24,10 +24,10 @@
 #
 # License: BSD 3 clause
 
-import pandas as pd
 import numpy as np
 
 from sklearn.compose import ColumnTransformer
+from sklearn.datasets import fetch_openml
 from sklearn.pipeline import Pipeline
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
@@ -37,9 +37,12 @@
 np.random.seed(0)
 
 # Read data from Titanic dataset.
-titanic_url = ('https://raw.githubusercontent.com/amueller/'
-               'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')
-data = pd.read_csv(titanic_url)
+titanic = fetch_openml(data_id=40945, as_frame=True)
+X, y = titanic.data, titanic.target
+
+# Alternatively X and y can be obtained directly from the frame attribute:
+# X = titanic.frame.drop('survived', axis=1)
+# y = titanic.frame['survived']
 
 # We will train our classifier with the following features:
 # Numeric Features:
@@ -71,9 +74,6 @@
 clf = Pipeline(steps=[('preprocessor', preprocessor),
                       ('classifier', LogisticRegression())])
 
-X = data.drop('survived', axis=1)
-y = data['survived']
-
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 
 clf.fit(X_train, y_train)
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "# Author: Pedro Morales <[email protected]>\n#\n# License: BSD 3 clause\n\nimport pandas as pd\nimport numpy as np\n\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.preprocessing import StandardScaler, OneHotEncoder\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split, GridSearchCV\n\nnp.random.seed(0)\n\n# Read data from Titanic dataset.\ntitanic_url = ('https://raw.githubusercontent.com/amueller/'\n 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')\ndata = pd.read_csv(titanic_url)\n\n# We will train our classifier with the following features:\n# Numeric Features:\n# - age: float.\n# - fare: float.\n# Categorical Features:\n# - embarked: categories encoded as strings {'C', 'S', 'Q'}.\n# - sex: categories encoded as strings {'female', 'male'}.\n# - pclass: ordinal integers {1, 2, 3}.\n\n# We create the preprocessing pipelines for both numeric and categorical data.\nnumeric_features = ['age', 'fare']\nnumeric_transformer = Pipeline(steps=[\n ('imputer', SimpleImputer(strategy='median')),\n ('scaler', StandardScaler())])\n\ncategorical_features = ['embarked', 'sex', 'pclass']\ncategorical_transformer = Pipeline(steps=[\n ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n\npreprocessor = ColumnTransformer(\n transformers=[\n ('num', numeric_transformer, numeric_features),\n ('cat', categorical_transformer, categorical_features)])\n\n# Append classifier to preprocessing pipeline.\n# Now we have a full prediction pipeline.\nclf = Pipeline(steps=[('preprocessor', preprocessor),\n ('classifier', LogisticRegression())])\n\nX = data.drop('survived', axis=1)\ny = data['survived']\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n\nclf.fit(X_train, y_train)\nprint(\"model score: %.3f\" % clf.score(X_test, y_test))"
	`29`	+ "# Author: Pedro Morales <[email protected]>\n#\n# License: BSD 3 clause\n\nimport numpy as np\n\nfrom sklearn.compose import ColumnTransformer\nfrom sklearn.datasets import fetch_openml\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.impute import SimpleImputer\nfrom sklearn.preprocessing import StandardScaler, OneHotEncoder\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split, GridSearchCV\n\nnp.random.seed(0)\n\n# Read data from Titanic dataset.\ntitanic = fetch_openml(data_id=40945, as_frame=True)\nX, y = titanic.data, titanic.target\n\n# Alternatively X and y can be obtained directly from the frame attribute:\n# X = titanic.frame.drop('survived', axis=1)\n# y = titanic.frame['survived']\n\n# We will train our classifier with the following features:\n# Numeric Features:\n# - age: float.\n# - fare: float.\n# Categorical Features:\n# - embarked: categories encoded as strings {'C', 'S', 'Q'}.\n# - sex: categories encoded as strings {'female', 'male'}.\n# - pclass: ordinal integers {1, 2, 3}.\n\n# We create the preprocessing pipelines for both numeric and categorical data.\nnumeric_features = ['age', 'fare']\nnumeric_transformer = Pipeline(steps=[\n ('imputer', SimpleImputer(strategy='median')),\n ('scaler', StandardScaler())])\n\ncategorical_features = ['embarked', 'sex', 'pclass']\ncategorical_transformer = Pipeline(steps=[\n ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n\npreprocessor = ColumnTransformer(\n transformers=[\n ('num', numeric_transformer, numeric_features),\n ('cat', categorical_transformer, categorical_features)])\n\n# Append classifier to preprocessing pipeline.\n# Now we have a full prediction pipeline.\nclf = Pipeline(steps=[('preprocessor', preprocessor),\n ('classifier', LogisticRegression())])\n\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n\nclf.fit(X_train, y_train)\nprint(\"model score: %.3f\" % clf.score(X_test, y_test))"
`30`	`30`	`]`
`31`	`31`	`},`
`32`	`32`	`{`