feat: rag
This commit is contained in:
@@ -0,0 +1,42 @@
|
||||
# 写入数据
|
||||
|
||||
import chromadb
|
||||
|
||||
# 创建持久化客户端
|
||||
|
||||
client = chromadb.PersistentClient(path="./chromadb_store")
|
||||
|
||||
|
||||
# 创建集合
|
||||
collection = client.get_or_create_collection(name="knowledge_base")
|
||||
|
||||
# 准备说明文档
|
||||
documents = [
|
||||
"机器学习包含监督学习和无监督学习",
|
||||
"Python 拥有丰富的数据科学生态",
|
||||
"数据库可以持久化结构化或非结构化数据",
|
||||
]
|
||||
|
||||
# 准备元组数据
|
||||
metadatas = [
|
||||
{"topic": "ml", "level": "intro"},
|
||||
{"topic": "python", "level": "beginner"},
|
||||
{"topic": "database", "level": "intro"},
|
||||
]
|
||||
|
||||
# 准备唯一标识
|
||||
# ids 是一个列表,每个元素对应一个文档的唯一ID
|
||||
# 如果不提供,Chromedb会自动生成
|
||||
ids = ["doc_1", "doc_2", "doc_3"]
|
||||
|
||||
# 将数据添加到集合中
|
||||
# add() 方法会将文档转为向量
|
||||
collection.add(documents=documents, metadatas=metadatas, ids=ids)
|
||||
|
||||
# 获取集合列表
|
||||
collections = client.list_collections()
|
||||
print(collections)
|
||||
|
||||
# 查看集合中的文档
|
||||
doc_count = collection.count()
|
||||
print(doc_count)
|
||||
Reference in New Issue
Block a user