Add BM25 hybrid search (dense + sparse vectors with RRF)
Some checks failed
Some checks failed
- Add SparseTextEmbedding("Qdrant/bm25") to FastEmbedProvider for BM25 tokenization
- Add sparse vector config (IDF modifier) to collection creation
- Store both dense and sparse vectors per document
- Use Qdrant prefetch + Reciprocal Rank Fusion for hybrid search
- Add HYBRID_SEARCH env var (default: false) for backward compatibility
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,13 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class SparseVector:
|
||||
"""A sparse vector representation with indices and values."""
|
||||
|
||||
indices: list[int]
|
||||
values: list[float]
|
||||
|
||||
|
||||
class EmbeddingProvider(ABC):
|
||||
@@ -23,3 +32,15 @@ class EmbeddingProvider(ABC):
|
||||
def get_vector_size(self) -> int:
|
||||
"""Get the size of the vector for the Qdrant collection."""
|
||||
pass
|
||||
|
||||
def supports_sparse(self) -> bool:
|
||||
"""Whether this provider supports sparse (BM25) embeddings."""
|
||||
return False
|
||||
|
||||
async def embed_documents_sparse(self, documents: list[str]) -> list[SparseVector]:
|
||||
"""Embed documents into sparse vectors. Override if supports_sparse() is True."""
|
||||
raise NotImplementedError
|
||||
|
||||
async def embed_query_sparse(self, query: str) -> SparseVector:
|
||||
"""Embed a query into a sparse vector. Override if supports_sparse() is True."""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -3,15 +3,18 @@ from mcp_server_qdrant.embeddings.types import EmbeddingProviderType
|
||||
from mcp_server_qdrant.settings import EmbeddingProviderSettings
|
||||
|
||||
|
||||
def create_embedding_provider(settings: EmbeddingProviderSettings) -> EmbeddingProvider:
|
||||
def create_embedding_provider(
|
||||
settings: EmbeddingProviderSettings, enable_sparse: bool = False
|
||||
) -> EmbeddingProvider:
|
||||
"""
|
||||
Create an embedding provider based on the specified type.
|
||||
:param settings: The settings for the embedding provider.
|
||||
:param enable_sparse: Whether to enable sparse (BM25) embeddings.
|
||||
:return: An instance of the specified embedding provider.
|
||||
"""
|
||||
if settings.provider_type == EmbeddingProviderType.FASTEMBED:
|
||||
from mcp_server_qdrant.embeddings.fastembed import FastEmbedProvider
|
||||
|
||||
return FastEmbedProvider(settings.model_name)
|
||||
return FastEmbedProvider(settings.model_name, enable_sparse=enable_sparse)
|
||||
else:
|
||||
raise ValueError(f"Unsupported embedding provider: {settings.provider_type}")
|
||||
|
||||
@@ -1,24 +1,31 @@
|
||||
import asyncio
|
||||
|
||||
from fastembed import TextEmbedding
|
||||
from fastembed import SparseTextEmbedding, TextEmbedding
|
||||
from fastembed.common.model_description import DenseModelDescription
|
||||
|
||||
from mcp_server_qdrant.embeddings.base import EmbeddingProvider
|
||||
from mcp_server_qdrant.embeddings.base import EmbeddingProvider, SparseVector
|
||||
|
||||
|
||||
class FastEmbedProvider(EmbeddingProvider):
|
||||
"""
|
||||
FastEmbed implementation of the embedding provider.
|
||||
:param model_name: The name of the FastEmbed model to use.
|
||||
:param enable_sparse: Whether to enable BM25 sparse embeddings for hybrid search.
|
||||
"""
|
||||
|
||||
def __init__(self, model_name: str):
|
||||
def __init__(self, model_name: str, enable_sparse: bool = False):
|
||||
self.model_name = model_name
|
||||
self.embedding_model = TextEmbedding(model_name)
|
||||
self._enable_sparse = enable_sparse
|
||||
self._sparse_model = None
|
||||
if enable_sparse:
|
||||
self._sparse_model = SparseTextEmbedding("Qdrant/bm25")
|
||||
|
||||
def supports_sparse(self) -> bool:
|
||||
return self._enable_sparse and self._sparse_model is not None
|
||||
|
||||
async def embed_documents(self, documents: list[str]) -> list[list[float]]:
|
||||
"""Embed a list of documents into vectors."""
|
||||
# Run in a thread pool since FastEmbed is synchronous
|
||||
loop = asyncio.get_event_loop()
|
||||
embeddings = await loop.run_in_executor(
|
||||
None, lambda: list(self.embedding_model.passage_embed(documents))
|
||||
@@ -27,13 +34,37 @@ class FastEmbedProvider(EmbeddingProvider):
|
||||
|
||||
async def embed_query(self, query: str) -> list[float]:
|
||||
"""Embed a query into a vector."""
|
||||
# Run in a thread pool since FastEmbed is synchronous
|
||||
loop = asyncio.get_event_loop()
|
||||
embeddings = await loop.run_in_executor(
|
||||
None, lambda: list(self.embedding_model.query_embed([query]))
|
||||
)
|
||||
return embeddings[0].tolist()
|
||||
|
||||
async def embed_documents_sparse(self, documents: list[str]) -> list[SparseVector]:
|
||||
"""Embed documents into BM25 sparse vectors."""
|
||||
loop = asyncio.get_event_loop()
|
||||
results = await loop.run_in_executor(
|
||||
None, lambda: list(self._sparse_model.passage_embed(documents))
|
||||
)
|
||||
return [
|
||||
SparseVector(
|
||||
indices=r.indices.tolist(),
|
||||
values=r.values.tolist(),
|
||||
)
|
||||
for r in results
|
||||
]
|
||||
|
||||
async def embed_query_sparse(self, query: str) -> SparseVector:
|
||||
"""Embed a query into a BM25 sparse vector."""
|
||||
loop = asyncio.get_event_loop()
|
||||
results = await loop.run_in_executor(
|
||||
None, lambda: list(self._sparse_model.query_embed([query]))
|
||||
)
|
||||
return SparseVector(
|
||||
indices=results[0].indices.tolist(),
|
||||
values=results[0].values.tolist(),
|
||||
)
|
||||
|
||||
def get_vector_name(self) -> str:
|
||||
"""
|
||||
Return the name of the vector for the Qdrant collection.
|
||||
|
||||
Reference in New Issue
Block a user