Add BM25 hybrid search (dense + sparse vectors with RRF)
Some checks failed
Some checks failed
- Add SparseTextEmbedding("Qdrant/bm25") to FastEmbedProvider for BM25 tokenization
- Add sparse vector config (IDF modifier) to collection creation
- Store both dense and sparse vectors per document
- Use Qdrant prefetch + Reciprocal Rank Fusion for hybrid search
- Add HYBRID_SEARCH env var (default: false) for backward compatibility
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -23,6 +23,9 @@ class Entry(BaseModel):
|
||||
metadata: Metadata | None = None
|
||||
|
||||
|
||||
SPARSE_VECTOR_NAME = "bm25"
|
||||
|
||||
|
||||
class QdrantConnector:
|
||||
"""
|
||||
Encapsulates the connection to a Qdrant server and all the methods to interact with it.
|
||||
@@ -32,6 +35,7 @@ class QdrantConnector:
|
||||
the collection name to be provided.
|
||||
:param embedding_provider: The embedding provider to use.
|
||||
:param qdrant_local_path: The path to the storage directory for the Qdrant client, if local mode is used.
|
||||
:param hybrid_search: Whether to enable hybrid search (dense + BM25 sparse vectors with RRF).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -42,15 +46,19 @@ class QdrantConnector:
|
||||
embedding_provider: EmbeddingProvider,
|
||||
qdrant_local_path: str | None = None,
|
||||
field_indexes: dict[str, models.PayloadSchemaType] | None = None,
|
||||
hybrid_search: bool = False,
|
||||
):
|
||||
self._qdrant_url = qdrant_url.rstrip("/") if qdrant_url else None
|
||||
self._qdrant_api_key = qdrant_api_key
|
||||
self._default_collection_name = collection_name
|
||||
self._embedding_provider = embedding_provider
|
||||
self._hybrid_search = hybrid_search and embedding_provider.supports_sparse()
|
||||
self._client = AsyncQdrantClient(
|
||||
location=qdrant_url, api_key=qdrant_api_key, path=qdrant_local_path
|
||||
)
|
||||
self._field_indexes = field_indexes
|
||||
if self._hybrid_search:
|
||||
logger.info("Hybrid search enabled (dense + BM25 sparse vectors with RRF)")
|
||||
|
||||
async def get_collection_names(self) -> list[str]:
|
||||
"""
|
||||
@@ -72,19 +80,30 @@ class QdrantConnector:
|
||||
await self._ensure_collection_exists(collection_name)
|
||||
|
||||
# Embed the document
|
||||
# ToDo: instead of embedding text explicitly, use `models.Document`,
|
||||
# it should unlock usage of server-side inference.
|
||||
embeddings = await self._embedding_provider.embed_documents([entry.content])
|
||||
|
||||
# Add to Qdrant
|
||||
# Build vector dict
|
||||
vector_name = self._embedding_provider.get_vector_name()
|
||||
vector_data: dict = {vector_name: embeddings[0]}
|
||||
|
||||
# Add sparse vector if hybrid search is enabled
|
||||
if self._hybrid_search:
|
||||
sparse_embeddings = await self._embedding_provider.embed_documents_sparse(
|
||||
[entry.content]
|
||||
)
|
||||
sparse = sparse_embeddings[0]
|
||||
vector_data[SPARSE_VECTOR_NAME] = models.SparseVector(
|
||||
indices=sparse.indices, values=sparse.values
|
||||
)
|
||||
|
||||
# Add to Qdrant
|
||||
payload = {"document": entry.content, METADATA_PATH: entry.metadata}
|
||||
await self._client.upsert(
|
||||
collection_name=collection_name,
|
||||
points=[
|
||||
models.PointStruct(
|
||||
id=uuid.uuid4().hex,
|
||||
vector={vector_name: embeddings[0]},
|
||||
vector=vector_data,
|
||||
payload=payload,
|
||||
)
|
||||
],
|
||||
@@ -113,21 +132,43 @@ class QdrantConnector:
|
||||
if not collection_exists:
|
||||
return []
|
||||
|
||||
# Embed the query
|
||||
# ToDo: instead of embedding text explicitly, use `models.Document`,
|
||||
# it should unlock usage of server-side inference.
|
||||
|
||||
query_vector = await self._embedding_provider.embed_query(query)
|
||||
vector_name = self._embedding_provider.get_vector_name()
|
||||
|
||||
# Search in Qdrant
|
||||
search_results = await self._client.query_points(
|
||||
collection_name=collection_name,
|
||||
query=query_vector,
|
||||
using=vector_name,
|
||||
limit=limit,
|
||||
query_filter=query_filter,
|
||||
)
|
||||
# Hybrid search: prefetch dense + sparse, fuse with RRF
|
||||
if self._hybrid_search:
|
||||
sparse_vector = await self._embedding_provider.embed_query_sparse(query)
|
||||
search_results = await self._client.query_points(
|
||||
collection_name=collection_name,
|
||||
prefetch=[
|
||||
models.Prefetch(
|
||||
query=query_vector,
|
||||
using=vector_name,
|
||||
limit=limit,
|
||||
filter=query_filter,
|
||||
),
|
||||
models.Prefetch(
|
||||
query=models.SparseVector(
|
||||
indices=sparse_vector.indices,
|
||||
values=sparse_vector.values,
|
||||
),
|
||||
using=SPARSE_VECTOR_NAME,
|
||||
limit=limit,
|
||||
filter=query_filter,
|
||||
),
|
||||
],
|
||||
query=models.FusionQuery(fusion=models.Fusion.RRF),
|
||||
limit=limit,
|
||||
)
|
||||
else:
|
||||
# Dense-only search (original behavior)
|
||||
search_results = await self._client.query_points(
|
||||
collection_name=collection_name,
|
||||
query=query_vector,
|
||||
using=vector_name,
|
||||
limit=limit,
|
||||
query_filter=query_filter,
|
||||
)
|
||||
|
||||
return [
|
||||
Entry(
|
||||
@@ -149,6 +190,16 @@ class QdrantConnector:
|
||||
|
||||
# Use the vector name as defined in the embedding provider
|
||||
vector_name = self._embedding_provider.get_vector_name()
|
||||
|
||||
# Sparse vectors config for hybrid search (BM25)
|
||||
sparse_config = None
|
||||
if self._hybrid_search:
|
||||
sparse_config = {
|
||||
SPARSE_VECTOR_NAME: models.SparseVectorParams(
|
||||
modifier=models.Modifier.IDF,
|
||||
)
|
||||
}
|
||||
|
||||
await self._client.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config={
|
||||
@@ -157,6 +208,7 @@ class QdrantConnector:
|
||||
distance=models.Distance.COSINE,
|
||||
)
|
||||
},
|
||||
sparse_vectors_config=sparse_config,
|
||||
)
|
||||
|
||||
# Create payload indexes if configured
|
||||
|
||||
Reference in New Issue
Block a user