Files
03Rag/chromadb/05query_collection.py
heyong.fu a17c65c4bc feat: rag
2026-05-06 11:35:10 +08:00

46 lines
1.2 KiB
Python

# 查询数据
import chromadb
# 创建持久化客户端
client = chromadb.PersistentClient(path="./chromadb_store")
# 获取已经存在的集合
collection = client.get_collection(name="knowledge_base")
# query_texts 查询文本
# n_results 返回最相似的两条结果
results = collection.query(query_texts=["如何入门机器学习"], n_results=2)
# print(results)
# {
# "ids": [["doc_1", "doc_2"]],
# "embeddings": None,
# "documents": [
# ["机器学习包含监督学习和无监督学习", "Python 拥有丰富的数据科学生态"]
# ],
# "uris": None,
# "included": ["metadatas", "documents", "distances"],
# "data": None,
# "metadatas": [
# [{"level": "intro", "topic": "ml"}, {"topic": "python", "level": "beginner"}]
# ],
# "distances": [[0.24633410573005676, 0.8512163758277893]],
# }
for idx, (doc, metadata, distances, doc_id) in enumerate(
zip(
results["documents"][0],
results["metadatas"][0],
results["distances"][0],
results["ids"][0],
),
1,
):
print(f"结果{idx}")
print(f"文档ID{doc_id}")
print(f"匹配文档{doc}")
print(f"附加信息{metadata}")
print(f"相似度距离{distances}")
print("-" * 50)