65 lines
1.7 KiB
Python
65 lines
1.7 KiB
Python
# 完整流程
|
|
|
|
from chromadb import PersistentClient
|
|
|
|
# 创建持久化客户端
|
|
client = PersistentClient(path="./chromadb_store")
|
|
|
|
# 获取或者创建集合
|
|
collection = client.get_or_create_collection(name="example")
|
|
|
|
# 准备说明文档
|
|
documents = [
|
|
"机器学习包含监督学习和无监督学习",
|
|
"Python 拥有丰富的数据科学生态",
|
|
"数据库可以持久化结构化或非结构化数据",
|
|
]
|
|
# 创建元数据
|
|
metadatas = [
|
|
{"topic": "ml", "level": "intro"},
|
|
{"topic": "python", "level": "beginner"},
|
|
{"topic": "database", "level": "intro"},
|
|
]
|
|
|
|
# ids
|
|
ids = ["doc1", "doc2", "dic3"]
|
|
|
|
# 写入数据
|
|
collection.add(documents=documents, metadatas=metadatas, ids=ids)
|
|
|
|
abc = collection.get(ids=["doc2"])
|
|
print(abc)
|
|
|
|
# 查询
|
|
result = collection.query(query_texts=["如何入门机器学习"], n_results=2)
|
|
|
|
# print(result)
|
|
# {
|
|
# "ids": [["doc1", "doc2"]],
|
|
# "embeddings": None,
|
|
# "documents": [
|
|
# ["机器学习包含监督学习和无监督学习", "Python 拥有丰富的数据科学生态"]
|
|
# ],
|
|
# "uris": None,
|
|
# "included": ["metadatas", "documents", "distances"],
|
|
# "data": None,
|
|
# "metadatas": [
|
|
# [{"topic": "ml", "level": "intro"}, {"topic": "python", "level": "beginner"}]
|
|
# ],
|
|
# "distances": [[0.24633410573005676, 0.8512163758277893]],
|
|
# }
|
|
# for index, (id, doc, metadata, distance) in enumerate(
|
|
# zip(
|
|
# result["ids"][0],
|
|
# result["documents"][0],
|
|
# result["metadatas"][0],
|
|
# result["distances"][0],
|
|
# ),
|
|
# 1,
|
|
# ):
|
|
# print(f"匹配结果 {index}:")
|
|
# print(f" 文档:{doc}")
|
|
# print(f" 元数据:{metadata}")
|
|
# print(f" 距离:{distance:.4f}")
|
|
# print()
|