Files
03Rag/02word.py
T
heyong.fu a17c65c4bc feat: rag
2026-05-06 11:35:10 +08:00

23 lines
513 B
Python

# 读取word文件
from docx import Document
# 定义函数
def extract_text_from_word(file_path):
"""
从word文档中提取所有段落,并以字符串返回
param file_path:文件地址
return: 返回文本内容字符串
"""
# 加载文件
doc = Document(file_path)
text = "\n".join([para.text for para in doc.paragraphs])
return text
if __name__ == "__main__":
file_path = "example/example.docx"
result = extract_text_from_word(file_path)
print(result)