Skip to content

Commit f26839d

Browse files
committed
[BUG] Ann calculation in tests after collection config + handle none in collection config
1 parent f871782 commit f26839d

File tree

2 files changed

+28
-16
lines changed

2 files changed

+28
-16
lines changed

chromadb/test/property/invariants.py

+20-15
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,11 @@ def fd_not_exceeding_threadpool_size(threadpool_size: int) -> None:
243243
len([p.path for p in open_files if "sqlite3" in p.path]) - 1 <= threadpool_size
244244
)
245245

246+
def get_space(collection: Collection):
247+
if 'spann' in collection._model.configuration_json and collection._model.configuration_json.get('spann') is not None:
248+
return collection._model.configuration_json.get('spann').get('space')
249+
else:
250+
return collection._model.configuration_json.get('hnsw').get('space')
246251

247252
def ann_accuracy(
248253
collection: Collection,
@@ -267,26 +272,26 @@ def ann_accuracy(
267272
assert isinstance(normalized_record_set["documents"], list)
268273
# Compute the embeddings for the documents
269274
embeddings = embedding_function(normalized_record_set["documents"])
275+
276+
space = get_space(collection)
277+
if space == "cosine":
278+
distance_function = distance_functions.cosine
279+
if space == "ip":
280+
distance_function = distance_functions.ip
281+
if space == "l2":
282+
distance_function = distance_functions.l2
270283

271284
# l2 is the default distance function
272-
distance_function = distance_functions.l2
273285
accuracy_threshold = 1e-6
274286
assert collection.metadata is not None
275287
assert embeddings is not None
276-
if "hnsw:space" in collection.metadata:
277-
space = collection.metadata["hnsw:space"]
278-
# TODO: ip and cosine are numerically unstable in HNSW.
279-
# The higher the dimensionality, the more noise is introduced, since each float element
280-
# of the vector has noise added, which is then subsequently included in all normalization calculations.
281-
# This means that higher dimensions will have more noise, and thus more error.
282-
assert all(isinstance(e, (list, np.ndarray)) for e in embeddings)
283-
dim = len(embeddings[0])
284-
accuracy_threshold = accuracy_threshold * math.pow(10, int(math.log10(dim)))
285-
286-
if space == "cosine":
287-
distance_function = distance_functions.cosine
288-
if space == "ip":
289-
distance_function = distance_functions.ip
288+
# TODO: ip and cosine are numerically unstable in HNSW.
289+
# The higher the dimensionality, the more noise is introduced, since each float element
290+
# of the vector has noise added, which is then subsequently included in all normalization calculations.
291+
# This means that higher dimensions will have more noise, and thus more error.
292+
assert all(isinstance(e, (list, np.ndarray)) for e in embeddings)
293+
dim = len(embeddings[0])
294+
accuracy_threshold = accuracy_threshold * math.pow(10, int(math.log10(dim)))
290295

291296
# Perform exact distance computation
292297
if query_embeddings is None:

rust/frontend/src/server.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -910,7 +910,14 @@ async fn create_collection(
910910
c,
911911
server.config.frontend.default_knn_index,
912912
)?),
913-
None => None,
913+
None => Some(InternalCollectionConfiguration::try_from_config(
914+
CollectionConfiguration {
915+
hnsw: None,
916+
spann: None,
917+
embedding_function: None,
918+
},
919+
server.config.frontend.default_knn_index,
920+
)?),
914921
};
915922

916923
let request = CreateCollectionRequest::try_new(

0 commit comments

Comments
 (0)