Add metadata support and integration tests for QdrantConnector (#25)
This commit is contained in:
15
README.md
15
README.md
@@ -19,16 +19,17 @@ It acts as a semantic memory layer on top of the Qdrant database.
|
|||||||
|
|
||||||
### Tools
|
### Tools
|
||||||
|
|
||||||
1. `qdrant-store-memory`
|
1. `qdrant-store`
|
||||||
- Store a memory in the Qdrant database
|
- Store some information in the Qdrant database
|
||||||
- Input:
|
- Input:
|
||||||
- `information` (string): Memory to store
|
- `information` (string): Information to store
|
||||||
|
- `metadata` (JSON): Optional metadata to store
|
||||||
- Returns: Confirmation message
|
- Returns: Confirmation message
|
||||||
2. `qdrant-find-memories`
|
2. `qdrant-find`
|
||||||
- Retrieve a memory from the Qdrant database
|
- Retrieve relevant information from the Qdrant database
|
||||||
- Input:
|
- Input:
|
||||||
- `query` (string): Query to retrieve a memory
|
- `query` (string): Query to use for searching
|
||||||
- Returns: Memories stored in the Qdrant database as separate messages
|
- Returns: Information stored in the Qdrant database as separate messages
|
||||||
|
|
||||||
## Installation in Claude Desktop
|
## Installation in Claude Desktop
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ dependencies = [
|
|||||||
"mcp[cli]>=1.3.0",
|
"mcp[cli]>=1.3.0",
|
||||||
"fastembed>=0.6.0",
|
"fastembed>=0.6.0",
|
||||||
"qdrant-client>=1.12.0",
|
"qdrant-client>=1.12.0",
|
||||||
|
"pydantic>=2.10.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from mcp_server_qdrant.embeddings import EmbeddingProvider
|
from mcp_server_qdrant.embeddings.base import EmbeddingProvider
|
||||||
from mcp_server_qdrant.embeddings.types import EmbeddingProviderType
|
from mcp_server_qdrant.embeddings.types import EmbeddingProviderType
|
||||||
from mcp_server_qdrant.settings import EmbeddingProviderSettings
|
from mcp_server_qdrant.settings import EmbeddingProviderSettings
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,24 @@
|
|||||||
|
import logging
|
||||||
import uuid
|
import uuid
|
||||||
from typing import Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
from qdrant_client import AsyncQdrantClient, models
|
from qdrant_client import AsyncQdrantClient, models
|
||||||
|
|
||||||
from .embeddings.base import EmbeddingProvider
|
from mcp_server_qdrant.embeddings.base import EmbeddingProvider
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
Metadata = Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
class Entry(BaseModel):
|
||||||
|
"""
|
||||||
|
A single entry in the Qdrant collection.
|
||||||
|
"""
|
||||||
|
|
||||||
|
content: str
|
||||||
|
metadata: Optional[Metadata] = None
|
||||||
|
|
||||||
|
|
||||||
class QdrantConnector:
|
class QdrantConnector:
|
||||||
@@ -53,30 +68,31 @@ class QdrantConnector:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
async def store(self, information: str):
|
async def store(self, entry: Entry):
|
||||||
"""
|
"""
|
||||||
Store some information in the Qdrant collection.
|
Store some information in the Qdrant collection, along with the specified metadata.
|
||||||
:param information: The information to store.
|
:param entry: The entry to store in the Qdrant collection.
|
||||||
"""
|
"""
|
||||||
await self._ensure_collection_exists()
|
await self._ensure_collection_exists()
|
||||||
|
|
||||||
# Embed the document
|
# Embed the document
|
||||||
embeddings = await self._embedding_provider.embed_documents([information])
|
embeddings = await self._embedding_provider.embed_documents([entry.content])
|
||||||
|
|
||||||
# Add to Qdrant
|
# Add to Qdrant
|
||||||
vector_name = self._embedding_provider.get_vector_name()
|
vector_name = self._embedding_provider.get_vector_name()
|
||||||
|
payload = {"document": entry.content, "metadata": entry.metadata}
|
||||||
await self._client.upsert(
|
await self._client.upsert(
|
||||||
collection_name=self._collection_name,
|
collection_name=self._collection_name,
|
||||||
points=[
|
points=[
|
||||||
models.PointStruct(
|
models.PointStruct(
|
||||||
id=uuid.uuid4().hex,
|
id=uuid.uuid4().hex,
|
||||||
vector={vector_name: embeddings[0]},
|
vector={vector_name: embeddings[0]},
|
||||||
payload={"document": information},
|
payload=payload,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
async def search(self, query: str) -> list[str]:
|
async def search(self, query: str) -> list[Entry]:
|
||||||
"""
|
"""
|
||||||
Find points in the Qdrant collection. If there are no entries found, an empty list is returned.
|
Find points in the Qdrant collection. If there are no entries found, an empty list is returned.
|
||||||
:param query: The query to use for the search.
|
:param query: The query to use for the search.
|
||||||
@@ -97,4 +113,10 @@ class QdrantConnector:
|
|||||||
limit=10,
|
limit=10,
|
||||||
)
|
)
|
||||||
|
|
||||||
return [result.payload["document"] for result in search_results]
|
return [
|
||||||
|
Entry(
|
||||||
|
content=result.payload["document"],
|
||||||
|
metadata=result.payload.get("metadata"),
|
||||||
|
)
|
||||||
|
for result in search_results
|
||||||
|
]
|
||||||
|
|||||||
@@ -1,12 +1,13 @@
|
|||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from typing import AsyncIterator, List
|
from typing import AsyncIterator, List, Optional
|
||||||
|
|
||||||
from mcp.server import Server
|
from mcp.server import Server
|
||||||
from mcp.server.fastmcp import Context, FastMCP
|
from mcp.server.fastmcp import Context, FastMCP
|
||||||
|
|
||||||
from mcp_server_qdrant.embeddings.factory import create_embedding_provider
|
from mcp_server_qdrant.embeddings.factory import create_embedding_provider
|
||||||
from mcp_server_qdrant.qdrant import QdrantConnector
|
from mcp_server_qdrant.qdrant import Entry, Metadata, QdrantConnector
|
||||||
from mcp_server_qdrant.settings import EmbeddingProviderSettings, QdrantSettings
|
from mcp_server_qdrant.settings import EmbeddingProviderSettings, QdrantSettings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -57,28 +58,32 @@ mcp = FastMCP("mcp-server-qdrant", lifespan=server_lifespan)
|
|||||||
|
|
||||||
|
|
||||||
@mcp.tool(
|
@mcp.tool(
|
||||||
name="qdrant-store-memory",
|
name="qdrant-store",
|
||||||
description=(
|
description=(
|
||||||
"Keep the memory for later use, when you are asked to remember something."
|
"Keep the memory for later use, when you are asked to remember something."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
async def store(information: str, ctx: Context) -> str:
|
async def store(
|
||||||
|
ctx: Context, information: str, metadata: Optional[Metadata] = None
|
||||||
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Store a memory in Qdrant.
|
Store a memory in Qdrant.
|
||||||
:param information: The information to store.
|
|
||||||
:param ctx: The context for the request.
|
:param ctx: The context for the request.
|
||||||
|
:param information: The information to store.
|
||||||
|
:param metadata: JSON metadata to store with the information, optional.
|
||||||
:return: A message indicating that the information was stored.
|
:return: A message indicating that the information was stored.
|
||||||
"""
|
"""
|
||||||
await ctx.debug(f"Storing information {information} in Qdrant")
|
await ctx.debug(f"Storing information {information} in Qdrant")
|
||||||
qdrant_connector: QdrantConnector = ctx.request_context.lifespan_context[
|
qdrant_connector: QdrantConnector = ctx.request_context.lifespan_context[
|
||||||
"qdrant_connector"
|
"qdrant_connector"
|
||||||
]
|
]
|
||||||
await qdrant_connector.store(information)
|
entry = Entry(content=information, metadata=metadata)
|
||||||
|
await qdrant_connector.store(entry)
|
||||||
return f"Remembered: {information}"
|
return f"Remembered: {information}"
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool(
|
@mcp.tool(
|
||||||
name="qdrant-find-memories",
|
name="qdrant-find",
|
||||||
description=(
|
description=(
|
||||||
"Look up memories in Qdrant. Use this tool when you need to: \n"
|
"Look up memories in Qdrant. Use this tool when you need to: \n"
|
||||||
" - Find memories by their content \n"
|
" - Find memories by their content \n"
|
||||||
@@ -86,11 +91,11 @@ async def store(information: str, ctx: Context) -> str:
|
|||||||
" - Get some personal information about the user"
|
" - Get some personal information about the user"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
async def find(query: str, ctx: Context) -> List[str]:
|
async def find(ctx: Context, query: str) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Find memories in Qdrant.
|
Find memories in Qdrant.
|
||||||
:param query: The query to use for the search.
|
|
||||||
:param ctx: The context for the request.
|
:param ctx: The context for the request.
|
||||||
|
:param query: The query to use for the search.
|
||||||
:return: A list of entries found.
|
:return: A list of entries found.
|
||||||
"""
|
"""
|
||||||
await ctx.debug(f"Finding points for query {query}")
|
await ctx.debug(f"Finding points for query {query}")
|
||||||
@@ -104,5 +109,9 @@ async def find(query: str, ctx: Context) -> List[str]:
|
|||||||
f"Memories for the query '{query}'",
|
f"Memories for the query '{query}'",
|
||||||
]
|
]
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
content.append(f"<entry>{entry}</entry>")
|
# Format the metadata as a JSON string and produce XML-like output
|
||||||
|
entry_metadata = json.dumps(entry.metadata) if entry.metadata else ""
|
||||||
|
content.append(
|
||||||
|
f"<entry><content>{entry.content}</content><metadata>{entry_metadata}</metadata></entry>"
|
||||||
|
)
|
||||||
return content
|
return content
|
||||||
|
|||||||
161
tests/test_qdrant_integration.py
Normal file
161
tests/test_qdrant_integration.py
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
import uuid
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from mcp_server_qdrant.embeddings.fastembed import FastEmbedProvider
|
||||||
|
from mcp_server_qdrant.qdrant import Entry, QdrantConnector
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
async def embedding_provider():
|
||||||
|
"""Fixture to provide a FastEmbed embedding provider."""
|
||||||
|
return FastEmbedProvider(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
async def qdrant_connector(embedding_provider):
|
||||||
|
"""Fixture to provide a QdrantConnector with in-memory Qdrant client."""
|
||||||
|
# Use a random collection name to avoid conflicts between tests
|
||||||
|
collection_name = f"test_collection_{uuid.uuid4().hex}"
|
||||||
|
|
||||||
|
# Create connector with in-memory Qdrant
|
||||||
|
connector = QdrantConnector(
|
||||||
|
qdrant_url=":memory:",
|
||||||
|
qdrant_api_key=None,
|
||||||
|
collection_name=collection_name,
|
||||||
|
embedding_provider=embedding_provider,
|
||||||
|
)
|
||||||
|
|
||||||
|
yield connector
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_store_and_search(qdrant_connector):
|
||||||
|
"""Test storing an entry and then searching for it."""
|
||||||
|
# Store a test entry
|
||||||
|
test_entry = Entry(
|
||||||
|
content="The quick brown fox jumps over the lazy dog",
|
||||||
|
metadata={"source": "test", "importance": "high"},
|
||||||
|
)
|
||||||
|
await qdrant_connector.store(test_entry)
|
||||||
|
|
||||||
|
# Search for the entry
|
||||||
|
results = await qdrant_connector.search("fox jumps")
|
||||||
|
|
||||||
|
# Verify results
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0].content == test_entry.content
|
||||||
|
assert results[0].metadata == test_entry.metadata
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_search_empty_collection(qdrant_connector):
|
||||||
|
"""Test searching in an empty collection."""
|
||||||
|
# Search in an empty collection
|
||||||
|
results = await qdrant_connector.search("test query")
|
||||||
|
|
||||||
|
# Verify results
|
||||||
|
assert len(results) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_multiple_entries(qdrant_connector):
|
||||||
|
"""Test storing and searching multiple entries."""
|
||||||
|
# Store multiple entries
|
||||||
|
entries = [
|
||||||
|
Entry(
|
||||||
|
content="Python is a programming language",
|
||||||
|
metadata={"topic": "programming"},
|
||||||
|
),
|
||||||
|
Entry(content="The Eiffel Tower is in Paris", metadata={"topic": "landmarks"}),
|
||||||
|
Entry(content="Machine learning is a subset of AI", metadata={"topic": "AI"}),
|
||||||
|
]
|
||||||
|
|
||||||
|
for entry in entries:
|
||||||
|
await qdrant_connector.store(entry)
|
||||||
|
|
||||||
|
# Search for programming-related entries
|
||||||
|
programming_results = await qdrant_connector.search("Python programming")
|
||||||
|
assert len(programming_results) > 0
|
||||||
|
assert any("Python" in result.content for result in programming_results)
|
||||||
|
|
||||||
|
# Search for landmark-related entries
|
||||||
|
landmark_results = await qdrant_connector.search("Eiffel Tower Paris")
|
||||||
|
assert len(landmark_results) > 0
|
||||||
|
assert any("Eiffel" in result.content for result in landmark_results)
|
||||||
|
|
||||||
|
# Search for AI-related entries
|
||||||
|
ai_results = await qdrant_connector.search(
|
||||||
|
"artificial intelligence machine learning"
|
||||||
|
)
|
||||||
|
assert len(ai_results) > 0
|
||||||
|
assert any("machine learning" in result.content.lower() for result in ai_results)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_ensure_collection_exists(qdrant_connector):
|
||||||
|
"""Test that the collection is created if it doesn't exist."""
|
||||||
|
# The collection shouldn't exist yet
|
||||||
|
assert not await qdrant_connector._client.collection_exists(
|
||||||
|
qdrant_connector._collection_name
|
||||||
|
)
|
||||||
|
|
||||||
|
# Storing an entry should create the collection
|
||||||
|
test_entry = Entry(content="Test content")
|
||||||
|
await qdrant_connector.store(test_entry)
|
||||||
|
|
||||||
|
# Now the collection should exist
|
||||||
|
assert await qdrant_connector._client.collection_exists(
|
||||||
|
qdrant_connector._collection_name
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_metadata_handling(qdrant_connector):
|
||||||
|
"""Test that metadata is properly stored and retrieved."""
|
||||||
|
# Store entries with different metadata
|
||||||
|
metadata1 = {"source": "book", "author": "Jane Doe", "year": 2023}
|
||||||
|
metadata2 = {"source": "article", "tags": ["science", "research"]}
|
||||||
|
|
||||||
|
await qdrant_connector.store(
|
||||||
|
Entry(content="Content with structured metadata", metadata=metadata1)
|
||||||
|
)
|
||||||
|
await qdrant_connector.store(
|
||||||
|
Entry(content="Content with list in metadata", metadata=metadata2)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Search and verify metadata is preserved
|
||||||
|
results = await qdrant_connector.search("metadata")
|
||||||
|
|
||||||
|
assert len(results) == 2
|
||||||
|
|
||||||
|
# Check that both metadata objects are present in the results
|
||||||
|
found_metadata1 = False
|
||||||
|
found_metadata2 = False
|
||||||
|
|
||||||
|
for result in results:
|
||||||
|
if result.metadata.get("source") == "book":
|
||||||
|
assert result.metadata.get("author") == "Jane Doe"
|
||||||
|
assert result.metadata.get("year") == 2023
|
||||||
|
found_metadata1 = True
|
||||||
|
elif result.metadata.get("source") == "article":
|
||||||
|
assert "science" in result.metadata.get("tags", [])
|
||||||
|
assert "research" in result.metadata.get("tags", [])
|
||||||
|
found_metadata2 = True
|
||||||
|
|
||||||
|
assert found_metadata1
|
||||||
|
assert found_metadata2
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_entry_without_metadata(qdrant_connector):
|
||||||
|
"""Test storing and retrieving entries without metadata."""
|
||||||
|
# Store an entry without metadata
|
||||||
|
await qdrant_connector.store(Entry(content="Entry without metadata"))
|
||||||
|
|
||||||
|
# Search and verify
|
||||||
|
results = await qdrant_connector.search("without metadata")
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0].content == "Entry without metadata"
|
||||||
|
assert results[0].metadata is None
|
||||||
2
uv.lock
generated
2
uv.lock
generated
@@ -497,6 +497,7 @@ source = { editable = "." }
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "fastembed" },
|
{ name = "fastembed" },
|
||||||
{ name = "mcp", extra = ["cli"] },
|
{ name = "mcp", extra = ["cli"] },
|
||||||
|
{ name = "pydantic" },
|
||||||
{ name = "qdrant-client" },
|
{ name = "qdrant-client" },
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -513,6 +514,7 @@ dev = [
|
|||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "fastembed", specifier = ">=0.6.0" },
|
{ name = "fastembed", specifier = ">=0.6.0" },
|
||||||
{ name = "mcp", extras = ["cli"], specifier = ">=1.3.0" },
|
{ name = "mcp", extras = ["cli"], specifier = ">=1.3.0" },
|
||||||
|
{ name = "pydantic", specifier = ">=2.10.6" },
|
||||||
{ name = "qdrant-client", specifier = ">=1.12.0" },
|
{ name = "qdrant-client", specifier = ">=1.12.0" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user