23 lines
513 B
Python
23 lines
513 B
Python
# 读取word文件
|
|
from docx import Document
|
|
|
|
|
|
# 定义函数
|
|
def extract_text_from_word(file_path):
|
|
"""
|
|
从word文档中提取所有段落,并以字符串返回
|
|
param file_path:文件地址
|
|
return: 返回文本内容字符串
|
|
|
|
"""
|
|
# 加载文件
|
|
doc = Document(file_path)
|
|
text = "\n".join([para.text for para in doc.paragraphs])
|
|
return text
|
|
|
|
|
|
if __name__ == "__main__":
|
|
file_path = "example/example.docx"
|
|
result = extract_text_from_word(file_path)
|
|
print(result)
|