feat: rag

This commit is contained in:
heyong.fu
2026-05-06 11:35:10 +08:00
commit a17c65c4bc
75 changed files with 5196 additions and 0 deletions
+23
View File
@@ -0,0 +1,23 @@
from langchain_text_splitters import CharacterTextSplitter
# 创建字符分割器实例,设置每个块最大长度为100个字符,不重叠,使用空字符串进行分割
text_splitters = CharacterTextSplitter(
chunk_size=100, # 每个块的最大长度是100个字符
chunk_overlap=0, # 块之间不重叠
separator="", # 使用空白字符串作为分隔符
)
# 构建一个长文本
document = f"""{"1"*100}{"2"*100}{"3"*100}"""
# 使用分割器split_text方法,将原始文本切割成若干个字块
texts = text_splitters.split_text(document)
# 打印原始文本长度
print(f"原文长度{len(document)}")
# 打印分割后的块的数量
print(f"分割为{texts}个块")
for i, text in enumerate(texts, 1):
print(f"\n{i}({len(text)}字符){repr(text)}")