Add BM25 hybrid search (dense + sparse vectors with RRF)
Some checks failed
Some checks failed
- Add SparseTextEmbedding("Qdrant/bm25") to FastEmbedProvider for BM25 tokenization
- Add sparse vector config (IDF modifier) to collection creation
- Store both dense and sparse vectors per document
- Use Qdrant prefetch + Reciprocal Rank Fusion for hybrid search
- Add HYBRID_SEARCH env var (default: false) for backward compatibility
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,15 +3,18 @@ from mcp_server_qdrant.embeddings.types import EmbeddingProviderType
|
||||
from mcp_server_qdrant.settings import EmbeddingProviderSettings
|
||||
|
||||
|
||||
def create_embedding_provider(settings: EmbeddingProviderSettings) -> EmbeddingProvider:
|
||||
def create_embedding_provider(
|
||||
settings: EmbeddingProviderSettings, enable_sparse: bool = False
|
||||
) -> EmbeddingProvider:
|
||||
"""
|
||||
Create an embedding provider based on the specified type.
|
||||
:param settings: The settings for the embedding provider.
|
||||
:param enable_sparse: Whether to enable sparse (BM25) embeddings.
|
||||
:return: An instance of the specified embedding provider.
|
||||
"""
|
||||
if settings.provider_type == EmbeddingProviderType.FASTEMBED:
|
||||
from mcp_server_qdrant.embeddings.fastembed import FastEmbedProvider
|
||||
|
||||
return FastEmbedProvider(settings.model_name)
|
||||
return FastEmbedProvider(settings.model_name, enable_sparse=enable_sparse)
|
||||
else:
|
||||
raise ValueError(f"Unsupported embedding provider: {settings.provider_type}")
|
||||
|
||||
Reference in New Issue
Block a user