46 lines
1.2 KiB
Python
46 lines
1.2 KiB
Python
# 查询数据
|
|
import chromadb
|
|
|
|
# 创建持久化客户端
|
|
client = chromadb.PersistentClient(path="./chromadb_store")
|
|
|
|
# 获取已经存在的集合
|
|
collection = client.get_collection(name="knowledge_base")
|
|
|
|
# query_texts 查询文本
|
|
# n_results 返回最相似的两条结果
|
|
results = collection.query(query_texts=["如何入门机器学习"], n_results=2)
|
|
|
|
# print(results)
|
|
|
|
# {
|
|
# "ids": [["doc_1", "doc_2"]],
|
|
# "embeddings": None,
|
|
# "documents": [
|
|
# ["机器学习包含监督学习和无监督学习", "Python 拥有丰富的数据科学生态"]
|
|
# ],
|
|
# "uris": None,
|
|
# "included": ["metadatas", "documents", "distances"],
|
|
# "data": None,
|
|
# "metadatas": [
|
|
# [{"level": "intro", "topic": "ml"}, {"topic": "python", "level": "beginner"}]
|
|
# ],
|
|
# "distances": [[0.24633410573005676, 0.8512163758277893]],
|
|
# }
|
|
|
|
for idx, (doc, metadata, distances, doc_id) in enumerate(
|
|
zip(
|
|
results["documents"][0],
|
|
results["metadatas"][0],
|
|
results["distances"][0],
|
|
results["ids"][0],
|
|
),
|
|
1,
|
|
):
|
|
print(f"结果{idx}")
|
|
print(f"文档ID{doc_id}")
|
|
print(f"匹配文档{doc}")
|
|
print(f"附加信息{metadata}")
|
|
print(f"相似度距离{distances}")
|
|
print("-" * 50)
|