- "print(__doc__)\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA\n\n# #############################################################################\n# Dataset based latent variables model\n\nn = 500\n# 2 latents vars:\nl1 = np.random.normal(size=n)\nl2 = np.random.normal(size=n)\n\nlatents = np.array([l1, l1, l2, l2]).T\nX = latents + np.random.normal(size=4 * n).reshape((n, 4))\nY = latents + np.random.normal(size=4 * n).reshape((n, 4))\n\nX_train = X[:n // 2]\nY_train = Y[:n // 2]\nX_test = X[n // 2:]\nY_test = Y[n // 2:]\n\nprint(\"Corr(X)\")\nprint(np.round(np.corrcoef(X.T), 2))\nprint(\"Corr(Y)\")\nprint(np.round(np.corrcoef(Y.T), 2))\n\n# #############################################################################\n# Canonical (symmetric) PLS\n\n# Transform data\n# ~~~~~~~~~~~~~~\nplsca = PLSCanonical(n_components=2)\nplsca.fit(X_train, Y_train)\nX_train_r, Y_train_r = plsca.transform(X_train, Y_train)\nX_test_r, Y_test_r = plsca.transform(X_test, Y_test)\n\n# Scatter plot of scores\n# ~~~~~~~~~~~~~~~~~~~~~~\n# 1) On diagonal plot X vs Y scores on each components\nplt.figure(figsize=(12, 8))\nplt.subplot(221)\nplt.scatter(X_train_r[:, 0], Y_train_r[:, 0], label=\"train\",\n marker=\"o\", c=\"b\", s=25)\nplt.scatter(X_test_r[:, 0], Y_test_r[:, 0], label=\"test\",\n marker=\"o\", c=\"r\", s=25)\nplt.xlabel(\"x scores\")\nplt.ylabel(\"y scores\")\nplt.title('Comp. 1: X vs Y (test corr = %.2f)' %\n np.corrcoef(X_test_r[:, 0], Y_test_r[:, 0])[0, 1])\nplt.xticks(())\nplt.yticks(())\nplt.legend(loc=\"best\")\n\nplt.subplot(224)\nplt.scatter(X_train_r[:, 1], Y_train_r[:, 1], label=\"train\",\n marker=\"o\", c=\"b\", s=25)\nplt.scatter(X_test_r[:, 1], Y_test_r[:, 1], label=\"test\",\n marker=\"o\", c=\"r\", s=25)\nplt.xlabel(\"x scores\")\nplt.ylabel(\"y scores\")\nplt.title('Comp. 2: X vs Y (test corr = %.2f)' %\n np.corrcoef(X_test_r[:, 1], Y_test_r[:, 1])[0, 1])\nplt.xticks(())\nplt.yticks(())\nplt.legend(loc=\"best\")\n\n# 2) Off diagonal plot components 1 vs 2 for X and Y\nplt.subplot(222)\nplt.scatter(X_train_r[:, 0], X_train_r[:, 1], label=\"train\",\n marker=\"*\", c=\"b\", s=50)\nplt.scatter(X_test_r[:, 0], X_test_r[:, 1], label=\"test\",\n marker=\"*\", c=\"r\", s=50)\nplt.xlabel(\"X comp. 1\")\nplt.ylabel(\"X comp. 2\")\nplt.title('X comp. 1 vs X comp. 2 (test corr = %.2f)'\n % np.corrcoef(X_test_r[:, 0], X_test_r[:, 1])[0, 1])\nplt.legend(loc=\"best\")\nplt.xticks(())\nplt.yticks(())\n\nplt.subplot(223)\nplt.scatter(Y_train_r[:, 0], Y_train_r[:, 1], label=\"train\",\n marker=\"*\", c=\"b\", s=50)\nplt.scatter(Y_test_r[:, 0], Y_test_r[:, 1], label=\"test\",\n marker=\"*\", c=\"r\", s=50)\nplt.xlabel(\"Y comp. 1\")\nplt.ylabel(\"Y comp. 2\")\nplt.title('Y comp. 1 vs Y comp. 2 , (test corr = %.2f)'\n % np.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1])\nplt.legend(loc=\"best\")\nplt.xticks(())\nplt.yticks(())\nplt.show()\n\n# #############################################################################\n# PLS regression, with multivariate response, a.k.a. PLS2\n\nn = 1000\nq = 3\np = 10\nX = np.random.normal(size=n * p).reshape((n, p))\nB = np.array([[1, 2] + [0] * (p - 2)] * q).T\n# each Yj = 1*X1 + 2*X2 + noize\nY = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5\n\npls2 = PLSRegression(n_components=3)\npls2.fit(X, Y)\nprint(\"True B (such that: Y = XB + Err)\")\nprint(B)\n# compare pls2.coef_ with B\nprint(\"Estimated B\")\nprint(np.round(pls2.coef_, 1))\npls2.predict(X)\n\n# PLS regression, with univariate response, a.k.a. PLS1\n\nn = 1000\np = 10\nX = np.random.normal(size=n * p).reshape((n, p))\ny = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5\npls1 = PLSRegression(n_components=3)\npls1.fit(X, y)\n# note that the number of components exceeds 1 (the dimension of y)\nprint(\"Estimated betas\")\nprint(np.round(pls1.coef_, 1))\n\n# #############################################################################\n# CCA (PLS mode B with symmetric deflation)\n\ncca = CCA(n_components=2)\ncca.fit(X_train, Y_train)\nX_train_r, Y_train_r = cca.transform(X_train, Y_train)\nX_test_r, Y_test_r = cca.transform(X_test, Y_test)"
0 commit comments