31 lines
761 B
Python
31 lines
761 B
Python
# 读取xml文件格式
|
|
|
|
from lxml import etree
|
|
|
|
|
|
def extract_xml_text(file_path):
|
|
"""
|
|
读取XML文件并提取所有文本内容
|
|
|
|
参数:
|
|
file_path (str): XML文件路径
|
|
|
|
返回:
|
|
str: 提取的所有文本内容
|
|
"""
|
|
# 以utf-8格式打开文件
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
# 读取xml文件的全部字符串
|
|
xml = f.read()
|
|
# 将字符串形式的xml内容解析为xms树结构
|
|
root = etree.fromstring(xml.encode("utf-8"))
|
|
# 遍历xml树,提取所有文本内容,并用空格链接
|
|
text = " ".join(root.itertext())
|
|
return text
|
|
|
|
|
|
if __name__ == "__main__":
|
|
file_path = "example/example.xml"
|
|
result = extract_xml_text(file_path)
|
|
print(result)
|