feat: rag
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
from langchain_text_splitters import CharacterTextSplitter
|
||||
|
||||
|
||||
# 创建字符分割器实例,设置每个块最大长度为100个字符,不重叠,使用空字符串进行分割
|
||||
text_splitters = CharacterTextSplitter(
|
||||
chunk_size=100, # 每个块的最大长度是100个字符
|
||||
chunk_overlap=0, # 块之间不重叠
|
||||
separator="", # 使用空白字符串作为分隔符
|
||||
)
|
||||
|
||||
# 构建一个长文本
|
||||
document = f"""{"1"*100}{"2"*100}{"3"*100}"""
|
||||
|
||||
# 使用分割器split_text方法,将原始文本切割成若干个字块
|
||||
texts = text_splitters.split_text(document)
|
||||
|
||||
# 打印原始文本长度
|
||||
print(f"原文长度{len(document)}")
|
||||
# 打印分割后的块的数量
|
||||
print(f"分割为{texts}个块")
|
||||
|
||||
for i, text in enumerate(texts, 1):
|
||||
print(f"\n块{i}({len(text)}字符):{repr(text)}")
|
||||
Reference in New Issue
Block a user