43 lines
1.0 KiB
Python
43 lines
1.0 KiB
Python
# 写入数据
|
|
|
|
import chromadb
|
|
|
|
# 创建持久化客户端
|
|
|
|
client = chromadb.PersistentClient(path="./chromadb_store")
|
|
|
|
|
|
# 创建集合
|
|
collection = client.get_or_create_collection(name="knowledge_base")
|
|
|
|
# 准备说明文档
|
|
documents = [
|
|
"机器学习包含监督学习和无监督学习",
|
|
"Python 拥有丰富的数据科学生态",
|
|
"数据库可以持久化结构化或非结构化数据",
|
|
]
|
|
|
|
# 准备元组数据
|
|
metadatas = [
|
|
{"topic": "ml", "level": "intro"},
|
|
{"topic": "python", "level": "beginner"},
|
|
{"topic": "database", "level": "intro"},
|
|
]
|
|
|
|
# 准备唯一标识
|
|
# ids 是一个列表,每个元素对应一个文档的唯一ID
|
|
# 如果不提供,Chromedb会自动生成
|
|
ids = ["doc_1", "doc_2", "doc_3"]
|
|
|
|
# 将数据添加到集合中
|
|
# add() 方法会将文档转为向量
|
|
collection.add(documents=documents, metadatas=metadatas, ids=ids)
|
|
|
|
# 获取集合列表
|
|
collections = client.list_collections()
|
|
print(collections)
|
|
|
|
# 查看集合中的文档
|
|
doc_count = collection.count()
|
|
print(doc_count)
|