24 lines
805 B
Python
24 lines
805 B
Python
from langchain_text_splitters import CharacterTextSplitter
|
||
|
||
|
||
# 创建字符分割器实例,设置每个块最大长度为100个字符,不重叠,使用空字符串进行分割
|
||
text_splitters = CharacterTextSplitter(
|
||
chunk_size=100, # 每个块的最大长度是100个字符
|
||
chunk_overlap=0, # 块之间不重叠
|
||
separator="", # 使用空白字符串作为分隔符
|
||
)
|
||
|
||
# 构建一个长文本
|
||
document = f"""{"1"*100}{"2"*100}{"3"*100}"""
|
||
|
||
# 使用分割器split_text方法,将原始文本切割成若干个字块
|
||
texts = text_splitters.split_text(document)
|
||
|
||
# 打印原始文本长度
|
||
print(f"原文长度{len(document)}")
|
||
# 打印分割后的块的数量
|
||
print(f"分割为{texts}个块")
|
||
|
||
for i, text in enumerate(texts, 1):
|
||
print(f"\n块{i}({len(text)}字符):{repr(text)}")
|