Skip to content

Commit 7b9d997

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 9ae41ab26ec2777da9b2f290a1583a9b79f1f376
1 parent df28534 commit 7b9d997

File tree

1,211 files changed

+3577
-3577
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,211 files changed

+3577
-3577
lines changed

dev/_downloads/32c0e5b7a88834865bd9387a29b1efdf/plot_compare_cross_decomposition.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA\n\n# #############################################################################\n# Dataset based latent variables model\n\nn = 500\n# 2 latents vars:\nl1 = np.random.normal(size=n)\nl2 = np.random.normal(size=n)\n\nlatents = np.array([l1, l1, l2, l2]).T\nX = latents + np.random.normal(size=4 * n).reshape((n, 4))\nY = latents + np.random.normal(size=4 * n).reshape((n, 4))\n\nX_train = X[:n // 2]\nY_train = Y[:n // 2]\nX_test = X[n // 2:]\nY_test = Y[n // 2:]\n\nprint(\"Corr(X)\")\nprint(np.round(np.corrcoef(X.T), 2))\nprint(\"Corr(Y)\")\nprint(np.round(np.corrcoef(Y.T), 2))\n\n# #############################################################################\n# Canonical (symmetric) PLS\n\n# Transform data\n# ~~~~~~~~~~~~~~\nplsca = PLSCanonical(n_components=2)\nplsca.fit(X_train, Y_train)\nX_train_r, Y_train_r = plsca.transform(X_train, Y_train)\nX_test_r, Y_test_r = plsca.transform(X_test, Y_test)\n\n# Scatter plot of scores\n# ~~~~~~~~~~~~~~~~~~~~~~\n# 1) On diagonal plot X vs Y scores on each components\nplt.figure(figsize=(12, 8))\nplt.subplot(221)\nplt.scatter(X_train_r[:, 0], Y_train_r[:, 0], label=\"train\",\n marker=\"o\", c=\"b\", s=25)\nplt.scatter(X_test_r[:, 0], Y_test_r[:, 0], label=\"test\",\n marker=\"o\", c=\"r\", s=25)\nplt.xlabel(\"x scores\")\nplt.ylabel(\"y scores\")\nplt.title('Comp. 1: X vs Y (test corr = %.2f)' %\n np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])\nplt.xticks(())\nplt.yticks(())\nplt.legend(loc=\"best\")\n\nplt.subplot(224)\nplt.scatter(X_train_r[:, 1], Y_train_r[:, 1], label=\"train\",\n marker=\"o\", c=\"b\", s=25)\nplt.scatter(X_test_r[:, 1], Y_test_r[:, 1], label=\"test\",\n marker=\"o\", c=\"r\", s=25)\nplt.xlabel(\"x scores\")\nplt.ylabel(\"y scores\")\nplt.title('Comp. 2: X vs Y (test corr = %.2f)' %\n np.corrcoef(X_test_r[:, 1], Y_test_r[:, 1])[0, 1])\nplt.xticks(())\nplt.yticks(())\nplt.legend(loc=\"best\")\n\n# 2) Off diagonal plot components 1 vs 2 for X and Y\nplt.subplot(222)\nplt.scatter(X_train_r[:, 0], X_train_r[:, 1], label=\"train\",\n marker=\"*\", c=\"b\", s=50)\nplt.scatter(X_test_r[:, 0], X_test_r[:, 1], label=\"test\",\n marker=\"*\", c=\"r\", s=50)\nplt.xlabel(\"X comp. 1\")\nplt.ylabel(\"X comp. 2\")\nplt.title('X comp. 1 vs X comp. 2 (test corr = %.2f)'\n % np.corrcoef(X_test_r[:, 0], X_test_r[:, 1])[0, 1])\nplt.legend(loc=\"best\")\nplt.xticks(())\nplt.yticks(())\n\nplt.subplot(223)\nplt.scatter(Y_train_r[:, 0], Y_train_r[:, 1], label=\"train\",\n marker=\"*\", c=\"b\", s=50)\nplt.scatter(Y_test_r[:, 0], Y_test_r[:, 1], label=\"test\",\n marker=\"*\", c=\"r\", s=50)\nplt.xlabel(\"Y comp. 1\")\nplt.ylabel(\"Y comp. 2\")\nplt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)'\n % np.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1])\nplt.legend(loc=\"best\")\nplt.xticks(())\nplt.yticks(())\nplt.show()\n\n# #############################################################################\n# PLS regression, with multivariate response, a.k.a. PLS2\n\nn = 1000\nq = 3\np = 10\nX = np.random.normal(size=n * p).reshape((n, p))\nB = np.array([[1, 2] + [0] * (p - 2)] * q).T\n# each Yj = 1*X1 + 2*X2 + noize\nY = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5\n\npls2 = PLSRegression(n_components=3)\npls2.fit(X, Y)\nprint(\"True B (such that: Y = XB + Err)\")\nprint(B)\n# compare pls2.coef_ with B\nprint(\"Estimated B\")\nprint(np.round(pls2.coef_, 1))\npls2.predict(X)\n\n# PLS regression, with univariate response, a.k.a. PLS1\n\nn = 1000\np = 10\nX = np.random.normal(size=n * p).reshape((n, p))\ny = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5\npls1 = PLSRegression(n_components=3)\npls1.fit(X, y)\n# note that the number of components exceeds 1 (the dimension of y)\nprint(\"Estimated betas\")\nprint(np.round(pls1.coef_, 1))\n\n# #############################################################################\n# CCA (PLS mode B with symmetric deflation)\n\ncca = CCA(n_components=2)\ncca.fit(X_train, Y_train)\nX_train_r, Y_train_r = cca.transform(X_train, Y_train)\nX_test_r, Y_test_r = cca.transform(X_test, Y_test)"
29+
"print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA\n\n# #############################################################################\n# Dataset based latent variables model\n\nn = 500\n# 2 latents vars:\nl1 = np.random.normal(size=n)\nl2 = np.random.normal(size=n)\n\nlatents = np.array([l1, l1, l2, l2]).T\nX = latents + np.random.normal(size=4 * n).reshape((n, 4))\nY = latents + np.random.normal(size=4 * n).reshape((n, 4))\n\nX_train = X[:n // 2]\nY_train = Y[:n // 2]\nX_test = X[n // 2:]\nY_test = Y[n // 2:]\n\nprint(\"Corr(X)\")\nprint(np.round(np.corrcoef(X.T), 2))\nprint(\"Corr(Y)\")\nprint(np.round(np.corrcoef(Y.T), 2))\n\n# #############################################################################\n# Canonical (symmetric) PLS\n\n# Transform data\n# ~~~~~~~~~~~~~~\nplsca = PLSCanonical(n_components=2)\nplsca.fit(X_train, Y_train)\nX_train_r, Y_train_r = plsca.transform(X_train, Y_train)\nX_test_r, Y_test_r = plsca.transform(X_test, Y_test)\n\n# Scatter plot of scores\n# ~~~~~~~~~~~~~~~~~~~~~~\n# 1) On diagonal plot X vs Y scores on each components\nplt.figure(figsize=(12, 8))\nplt.subplot(221)\nplt.scatter(X_train_r[:, 0], Y_train_r[:, 0], label=\"train\",\n marker=\"o\", s=25)\nplt.scatter(X_test_r[:, 0], Y_test_r[:, 0], label=\"test\",\n marker=\"o\", s=25)\nplt.xlabel(\"x scores\")\nplt.ylabel(\"y scores\")\nplt.title('Comp. 1: X vs Y (test corr = %.2f)' %\n np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])\nplt.xticks(())\nplt.yticks(())\nplt.legend(loc=\"best\")\n\nplt.subplot(224)\nplt.scatter(X_train_r[:, 1], Y_train_r[:, 1], label=\"train\",\n marker=\"o\", s=25)\nplt.scatter(X_test_r[:, 1], Y_test_r[:, 1], label=\"test\",\n marker=\"o\", s=25)\nplt.xlabel(\"x scores\")\nplt.ylabel(\"y scores\")\nplt.title('Comp. 2: X vs Y (test corr = %.2f)' %\n np.corrcoef(X_test_r[:, 1], Y_test_r[:, 1])[0, 1])\nplt.xticks(())\nplt.yticks(())\nplt.legend(loc=\"best\")\n\n# 2) Off diagonal plot components 1 vs 2 for X and Y\nplt.subplot(222)\nplt.scatter(X_train_r[:, 0], X_train_r[:, 1], label=\"train\",\n marker=\"*\", s=50)\nplt.scatter(X_test_r[:, 0], X_test_r[:, 1], label=\"test\",\n marker=\"*\", s=50)\nplt.xlabel(\"X comp. 1\")\nplt.ylabel(\"X comp. 2\")\nplt.title('X comp. 1 vs X comp. 2 (test corr = %.2f)'\n % np.corrcoef(X_test_r[:, 0], X_test_r[:, 1])[0, 1])\nplt.legend(loc=\"best\")\nplt.xticks(())\nplt.yticks(())\n\nplt.subplot(223)\nplt.scatter(Y_train_r[:, 0], Y_train_r[:, 1], label=\"train\",\n marker=\"*\", s=50)\nplt.scatter(Y_test_r[:, 0], Y_test_r[:, 1], label=\"test\",\n marker=\"*\", s=50)\nplt.xlabel(\"Y comp. 1\")\nplt.ylabel(\"Y comp. 2\")\nplt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)'\n % np.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1])\nplt.legend(loc=\"best\")\nplt.xticks(())\nplt.yticks(())\nplt.show()\n\n# #############################################################################\n# PLS regression, with multivariate response, a.k.a. PLS2\n\nn = 1000\nq = 3\np = 10\nX = np.random.normal(size=n * p).reshape((n, p))\nB = np.array([[1, 2] + [0] * (p - 2)] * q).T\n# each Yj = 1*X1 + 2*X2 + noize\nY = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5\n\npls2 = PLSRegression(n_components=3)\npls2.fit(X, Y)\nprint(\"True B (such that: Y = XB + Err)\")\nprint(B)\n# compare pls2.coef_ with B\nprint(\"Estimated B\")\nprint(np.round(pls2.coef_, 1))\npls2.predict(X)\n\n# PLS regression, with univariate response, a.k.a. PLS1\n\nn = 1000\np = 10\nX = np.random.normal(size=n * p).reshape((n, p))\ny = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5\npls1 = PLSRegression(n_components=3)\npls1.fit(X, y)\n# note that the number of components exceeds 1 (the dimension of y)\nprint(\"Estimated betas\")\nprint(np.round(pls1.coef_, 1))\n\n# #############################################################################\n# CCA (PLS mode B with symmetric deflation)\n\ncca = CCA(n_components=2)\ncca.fit(X_train, Y_train)\nX_train_r, Y_train_r = cca.transform(X_train, Y_train)\nX_test_r, Y_test_r = cca.transform(X_test, Y_test)"
3030
]
3131
}
3232
],
Binary file not shown.

dev/_downloads/44597bf49ecba5eec700d262a093340f/plot_compare_cross_decomposition.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@
6262
plt.figure(figsize=(12, 8))
6363
plt.subplot(221)
6464
plt.scatter(X_train_r[:, 0], Y_train_r[:, 0], label="train",
65-
marker="o", c="b", s=25)
65+
marker="o", s=25)
6666
plt.scatter(X_test_r[:, 0], Y_test_r[:, 0], label="test",
67-
marker="o", c="r", s=25)
67+
marker="o", s=25)
6868
plt.xlabel("x scores")
6969
plt.ylabel("y scores")
7070
plt.title('Comp. 1: X vs Y (test corr = %.2f)' %
@@ -75,9 +75,9 @@
7575

7676
plt.subplot(224)
7777
plt.scatter(X_train_r[:, 1], Y_train_r[:, 1], label="train",
78-
marker="o", c="b", s=25)
78+
marker="o", s=25)
7979
plt.scatter(X_test_r[:, 1], Y_test_r[:, 1], label="test",
80-
marker="o", c="r", s=25)
80+
marker="o", s=25)
8181
plt.xlabel("x scores")
8282
plt.ylabel("y scores")
8383
plt.title('Comp. 2: X vs Y (test corr = %.2f)' %
@@ -89,9 +89,9 @@
8989
# 2) Off diagonal plot components 1 vs 2 for X and Y
9090
plt.subplot(222)
9191
plt.scatter(X_train_r[:, 0], X_train_r[:, 1], label="train",
92-
marker="*", c="b", s=50)
92+
marker="*", s=50)
9393
plt.scatter(X_test_r[:, 0], X_test_r[:, 1], label="test",
94-
marker="*", c="r", s=50)
94+
marker="*", s=50)
9595
plt.xlabel("X comp. 1")
9696
plt.ylabel("X comp. 2")
9797
plt.title('X comp. 1 vs X comp. 2 (test corr = %.2f)'
@@ -102,9 +102,9 @@
102102

103103
plt.subplot(223)
104104
plt.scatter(Y_train_r[:, 0], Y_train_r[:, 1], label="train",
105-
marker="*", c="b", s=50)
105+
marker="*", s=50)
106106
plt.scatter(Y_test_r[:, 0], Y_test_r[:, 1], label="test",
107-
marker="*", c="r", s=50)
107+
marker="*", s=50)
108108
plt.xlabel("Y comp. 1")
109109
plt.ylabel("Y comp. 2")
110110
plt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)'
Binary file not shown.

dev/_downloads/scikit-learn-docs.pdf

374 Bytes
Binary file not shown.

dev/_images/iris.png

0 Bytes

0 commit comments

Comments
 (0)