@@ -243,6 +243,11 @@ def fd_not_exceeding_threadpool_size(threadpool_size: int) -> None:
243
243
len ([p .path for p in open_files if "sqlite3" in p .path ]) - 1 <= threadpool_size
244
244
)
245
245
246
+ def get_space (collection : Collection ):
247
+ if 'spann' in collection ._model .configuration_json and collection ._model .configuration_json .get ('spann' ) is not None :
248
+ return collection ._model .configuration_json .get ('spann' ).get ('space' )
249
+ else :
250
+ return collection ._model .configuration_json .get ('hnsw' ).get ('space' )
246
251
247
252
def ann_accuracy (
248
253
collection : Collection ,
@@ -267,26 +272,26 @@ def ann_accuracy(
267
272
assert isinstance (normalized_record_set ["documents" ], list )
268
273
# Compute the embeddings for the documents
269
274
embeddings = embedding_function (normalized_record_set ["documents" ])
275
+
276
+ space = get_space (collection )
277
+ if space == "cosine" :
278
+ distance_function = distance_functions .cosine
279
+ if space == "ip" :
280
+ distance_function = distance_functions .ip
281
+ if space == "l2" :
282
+ distance_function = distance_functions .l2
270
283
271
284
# l2 is the default distance function
272
- distance_function = distance_functions .l2
273
285
accuracy_threshold = 1e-6
274
286
assert collection .metadata is not None
275
287
assert embeddings is not None
276
- if "hnsw:space" in collection .metadata :
277
- space = collection .metadata ["hnsw:space" ]
278
- # TODO: ip and cosine are numerically unstable in HNSW.
279
- # The higher the dimensionality, the more noise is introduced, since each float element
280
- # of the vector has noise added, which is then subsequently included in all normalization calculations.
281
- # This means that higher dimensions will have more noise, and thus more error.
282
- assert all (isinstance (e , (list , np .ndarray )) for e in embeddings )
283
- dim = len (embeddings [0 ])
284
- accuracy_threshold = accuracy_threshold * math .pow (10 , int (math .log10 (dim )))
285
-
286
- if space == "cosine" :
287
- distance_function = distance_functions .cosine
288
- if space == "ip" :
289
- distance_function = distance_functions .ip
288
+ # TODO: ip and cosine are numerically unstable in HNSW.
289
+ # The higher the dimensionality, the more noise is introduced, since each float element
290
+ # of the vector has noise added, which is then subsequently included in all normalization calculations.
291
+ # This means that higher dimensions will have more noise, and thus more error.
292
+ assert all (isinstance (e , (list , np .ndarray )) for e in embeddings )
293
+ dim = len (embeddings [0 ])
294
+ accuracy_threshold = accuracy_threshold * math .pow (10 , int (math .log10 (dim )))
290
295
291
296
# Perform exact distance computation
292
297
if query_embeddings is None :
0 commit comments