8
8
replace KNeighborsTransformer and perform approximate nearest neighbors.
9
9
These packages can be installed with `pip install annoy nmslib`.
10
10
11
- Note: Currently `TSNE(metric='precomputed')` does not modify the precomputed
12
- distances, and thus assumes that precomputed euclidean distances are squared.
13
- In future versions, a parameter in TSNE will control the optional squaring of
14
- precomputed distances (see #12401).
15
-
16
11
Note: In KNeighborsTransformer we use the definition which includes each
17
12
training point as its own neighbor in the count of `n_neighbors`, and for
18
13
compatibility reasons, one extra neighbor is computed when
@@ -91,7 +86,6 @@ def fit(self, X):
91
86
# see more metric in the manual
92
87
# https://github.com/nmslib/nmslib/tree/master/manual
93
88
space = {
94
- 'sqeuclidean' : 'l2' ,
95
89
'euclidean' : 'l2' ,
96
90
'cosine' : 'cosinesimil' ,
97
91
'l1' : 'l1' ,
@@ -115,9 +109,6 @@ def transform(self, X):
115
109
indices , distances = zip (* results )
116
110
indices , distances = np .vstack (indices ), np .vstack (distances )
117
111
118
- if self .metric == 'sqeuclidean' :
119
- distances **= 2
120
-
121
112
indptr = np .arange (0 , n_samples_transform * n_neighbors + 1 ,
122
113
n_neighbors )
123
114
kneighbors_graph = csr_matrix ((distances .ravel (), indices .ravel (),
@@ -139,8 +130,7 @@ def __init__(self, n_neighbors=5, metric='euclidean', n_trees=10,
139
130
140
131
def fit (self , X ):
141
132
self .n_samples_fit_ = X .shape [0 ]
142
- metric = self .metric if self .metric != 'sqeuclidean' else 'euclidean'
143
- self .annoy_ = annoy .AnnoyIndex (X .shape [1 ], metric = metric )
133
+ self .annoy_ = annoy .AnnoyIndex (X .shape [1 ], metric = self .metric )
144
134
for i , x in enumerate (X ):
145
135
self .annoy_ .add_item (i , x .tolist ())
146
136
self .annoy_ .build (self .n_trees )
@@ -177,9 +167,6 @@ def _transform(self, X):
177
167
x .tolist (), n_neighbors , self .search_k ,
178
168
include_distances = True )
179
169
180
- if self .metric == 'sqeuclidean' :
181
- distances **= 2
182
-
183
170
indptr = np .arange (0 , n_samples_transform * n_neighbors + 1 ,
184
171
n_neighbors )
185
172
kneighbors_graph = csr_matrix ((distances .ravel (), indices .ravel (),
@@ -209,7 +196,7 @@ def test_transformers():
209
196
210
197
def load_mnist (n_samples ):
211
198
"""Load MNIST, shuffle the data, and return only n_samples."""
212
- mnist = fetch_openml ("mnist_784" )
199
+ mnist = fetch_openml ("mnist_784" , as_frame = False )
213
200
X , y = shuffle (mnist .data , mnist .target , random_state = 2 )
214
201
return X [:n_samples ] / 255 , y [:n_samples ]
215
202
@@ -222,34 +209,39 @@ def run_benchmark():
222
209
223
210
n_iter = 500
224
211
perplexity = 30
212
+ metric = "euclidean"
225
213
# TSNE requires a certain number of neighbors which depends on the
226
214
# perplexity parameter.
227
215
# Add one since we include each sample as its own neighbor.
228
216
n_neighbors = int (3. * perplexity + 1 ) + 1
229
217
218
+ tsne_params = dict (perplexity = perplexity , method = "barnes_hut" ,
219
+ random_state = 42 , n_iter = n_iter ,
220
+ square_distances = True )
221
+
230
222
transformers = [
231
- ('AnnoyTransformer' , AnnoyTransformer (n_neighbors = n_neighbors ,
232
- metric = 'sqeuclidean' )),
233
- ('NMSlibTransformer' , NMSlibTransformer (n_neighbors = n_neighbors ,
234
- metric = 'sqeuclidean' )),
235
- ('KNeighborsTransformer' , KNeighborsTransformer (
236
- n_neighbors = n_neighbors , mode = 'distance' , metric = 'sqeuclidean' )),
237
- ('TSNE with AnnoyTransformer' , make_pipeline (
238
- AnnoyTransformer (n_neighbors = n_neighbors , metric = 'sqeuclidean' ),
239
- TSNE (metric = 'precomputed' , perplexity = perplexity ,
240
- method = "barnes_hut" , random_state = 42 , n_iter = n_iter ), )),
241
- ('TSNE with NMSlibTransformer' , make_pipeline (
242
- NMSlibTransformer (n_neighbors = n_neighbors , metric = 'sqeuclidean' ),
243
- TSNE (metric = 'precomputed' , perplexity = perplexity ,
244
- method = "barnes_hut" , random_state = 42 , n_iter = n_iter ), )),
245
- ('TSNE with KNeighborsTransformer' , make_pipeline (
246
- KNeighborsTransformer (n_neighbors = n_neighbors , mode = 'distance' ,
247
- metric = 'sqeuclidean' ),
248
- TSNE (metric = 'precomputed' , perplexity = perplexity ,
249
- method = "barnes_hut" , random_state = 42 , n_iter = n_iter ), )),
223
+ ('AnnoyTransformer' ,
224
+ AnnoyTransformer (n_neighbors = n_neighbors , metric = metric )),
225
+ ('NMSlibTransformer' ,
226
+ NMSlibTransformer (n_neighbors = n_neighbors , metric = metric )),
227
+ ('KNeighborsTransformer' ,
228
+ KNeighborsTransformer (n_neighbors = n_neighbors , mode = 'distance' ,
229
+ metric = metric )),
230
+ ('TSNE with AnnoyTransformer' ,
231
+ make_pipeline (
232
+ AnnoyTransformer (n_neighbors = n_neighbors , metric = metric ),
233
+ TSNE (metric = 'precomputed' , ** tsne_params ))),
234
+ ('TSNE with NMSlibTransformer' ,
235
+ make_pipeline (
236
+ NMSlibTransformer (n_neighbors = n_neighbors , metric = metric ),
237
+ TSNE (metric = 'precomputed' , ** tsne_params ))),
238
+ ('TSNE with KNeighborsTransformer' ,
239
+ make_pipeline (
240
+ KNeighborsTransformer (n_neighbors = n_neighbors , mode = 'distance' ,
241
+ metric = metric ),
242
+ TSNE (metric = 'precomputed' , ** tsne_params ))),
250
243
('TSNE with internal NearestNeighbors' ,
251
- TSNE (metric = 'sqeuclidean' , perplexity = perplexity , method = "barnes_hut" ,
252
- random_state = 42 , n_iter = n_iter )),
244
+ TSNE (metric = metric , ** tsne_params )),
253
245
]
254
246
255
247
# init the plot
0 commit comments