# 查询数据 import chromadb # 创建持久化客户端 client = chromadb.PersistentClient(path="./chromadb_store") # 获取已经存在的集合 collection = client.get_collection(name="knowledge_base") # query_texts 查询文本 # n_results 返回最相似的两条结果 results = collection.query(query_texts=["如何入门机器学习"], n_results=2) # print(results) # { # "ids": [["doc_1", "doc_2"]], # "embeddings": None, # "documents": [ # ["机器学习包含监督学习和无监督学习", "Python 拥有丰富的数据科学生态"] # ], # "uris": None, # "included": ["metadatas", "documents", "distances"], # "data": None, # "metadatas": [ # [{"level": "intro", "topic": "ml"}, {"topic": "python", "level": "beginner"}] # ], # "distances": [[0.24633410573005676, 0.8512163758277893]], # } for idx, (doc, metadata, distances, doc_id) in enumerate( zip( results["documents"][0], results["metadatas"][0], results["distances"][0], results["ids"][0], ), 1, ): print(f"结果{idx}") print(f"文档ID{doc_id}") print(f"匹配文档{doc}") print(f"附加信息{metadata}") print(f"相似度距离{distances}") print("-" * 50)