feat: rag
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
# 读取word文件
|
||||
from docx import Document
|
||||
|
||||
|
||||
# 定义函数
|
||||
def extract_text_from_word(file_path):
|
||||
"""
|
||||
从word文档中提取所有段落,并以字符串返回
|
||||
param file_path:文件地址
|
||||
return: 返回文本内容字符串
|
||||
|
||||
"""
|
||||
# 加载文件
|
||||
doc = Document(file_path)
|
||||
text = "\n".join([para.text for para in doc.paragraphs])
|
||||
return text
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
file_path = "example/example.docx"
|
||||
result = extract_text_from_word(file_path)
|
||||
print(result)
|
||||
Reference in New Issue
Block a user