# 完整流程 from chromadb import PersistentClient # 创建持久化客户端 client = PersistentClient(path="./chromadb_store") # 获取或者创建集合 collection = client.get_or_create_collection(name="example") # 准备说明文档 documents = [ "机器学习包含监督学习和无监督学习", "Python 拥有丰富的数据科学生态", "数据库可以持久化结构化或非结构化数据", ] # 创建元数据 metadatas = [ {"topic": "ml", "level": "intro"}, {"topic": "python", "level": "beginner"}, {"topic": "database", "level": "intro"}, ] # ids ids = ["doc1", "doc2", "dic3"] # 写入数据 collection.add(documents=documents, metadatas=metadatas, ids=ids) abc = collection.get(ids=["doc2"]) print(abc) # 查询 result = collection.query(query_texts=["如何入门机器学习"], n_results=2) # print(result) # { # "ids": [["doc1", "doc2"]], # "embeddings": None, # "documents": [ # ["机器学习包含监督学习和无监督学习", "Python 拥有丰富的数据科学生态"] # ], # "uris": None, # "included": ["metadatas", "documents", "distances"], # "data": None, # "metadatas": [ # [{"topic": "ml", "level": "intro"}, {"topic": "python", "level": "beginner"}] # ], # "distances": [[0.24633410573005676, 0.8512163758277893]], # } # for index, (id, doc, metadata, distance) in enumerate( # zip( # result["ids"][0], # result["documents"][0], # result["metadatas"][0], # result["distances"][0], # ), # 1, # ): # print(f"匹配结果 {index}:") # print(f" 文档:{doc}") # print(f" 元数据:{metadata}") # print(f" 距离:{distance:.4f}") # print()