krishnatray
diff --git a/‎dev/_downloads/plot_prediction_latency.py
Lines changed: 17 additions & 19 deletions b/‎dev/_downloads/plot_prediction_latency.py
Lines changed: 17 additions & 19 deletions
diff --git a/‎dev/_images/plot_agglomerative_clustering.png
155 Bytes b/‎dev/_images/plot_agglomerative_clustering.png
155 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering1.png
155 Bytes b/‎dev/_images/plot_agglomerative_clustering1.png
155 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_001.png
736 Bytes b/‎dev/_images/plot_agglomerative_clustering_001.png
736 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_0011.png
736 Bytes b/‎dev/_images/plot_agglomerative_clustering_0011.png
736 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_002.png
678 Bytes b/‎dev/_images/plot_agglomerative_clustering_002.png
678 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_0021.png
678 Bytes b/‎dev/_images/plot_agglomerative_clustering_0021.png
678 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_003.png
191 Bytes b/‎dev/_images/plot_agglomerative_clustering_003.png
191 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_0031.png
191 Bytes b/‎dev/_images/plot_agglomerative_clustering_0031.png
191 Bytes
diff --git a/‎dev/_images/plot_agglomerative_clustering_004.png
108 Bytes b/‎dev/_images/plot_agglomerative_clustering_004.png
108 Bytes
@@ -24,12 +24,15 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
 from scipy.stats import scoreatpercentile
 from sklearn.datasets.samples_generator import make_regression
 from sklearn.ensemble.forest import RandomForestRegressor
 from sklearn.linear_model.ridge import Ridge
 from sklearn.linear_model.stochastic_gradient import SGDRegressor
 from sklearn.svm.classes import SVR
+from sklearn.utils import shuffle
 
 
 def _not_in_sphinx():
@@ -93,27 +96,22 @@ def generate_dataset(n_train, n_test, n_features, noise=0.1, verbose=False):
     """Generate a regression dataset with the given parameters."""
     if verbose:
         print("generating dataset...")
+
     X, y, coef = make_regression(n_samples=n_train + n_test,
                                  n_features=n_features, noise=noise, coef=True)
-    X_train = X[:n_train]
-    y_train = y[:n_train]
-    X_test = X[n_train:]
-    y_test = y[n_train:]
-    idx = np.arange(n_train)
-    np.random.seed(13)
-    np.random.shuffle(idx)
-    X_train = X_train[idx]
-    y_train = y_train[idx]
-
-    std = X_train.std(axis=0)
-    mean = X_train.mean(axis=0)
-    X_train = (X_train - mean) / std
-    X_test = (X_test - mean) / std
-
-    std = y_train.std(axis=0)
-    mean = y_train.mean(axis=0)
-    y_train = (y_train - mean) / std
-    y_test = (y_test - mean) / std
+
+    random_seed = 13
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, train_size=n_train, random_state=random_seed)
+    X_train, y_train = shuffle(X_train, y_train, random_state=random_seed)
+
+    X_scaler = StandardScaler()
+    X_train = X_scaler.fit_transform(X_train)
+    X_test = X_scaler.transform(X_test)
+
+    y_scaler = StandardScaler()
+    y_train = y_scaler.fit_transform(y_train[:, None])[:, 0]
+    y_test = y_scaler.transform(y_test[:, None])[:, 0]
 
     gc.collect()
     if verbose: