# 读取html文件 from bs4 import BeautifulSoup def extract_text_html(file_path): """ 从指定HTML文件中提取所有文本内容 参数: file_path (str): HTML文件路径 返回: str: 提取的文本内容 """ with open(file_path, "r", encoding="utf-8") as f: # 读取整个html文件内容字符串 html = f.read() # 使用BeautifulSoup解析html内容 soup = BeautifulSoup(html, "html.parser") # 提取所有文本内容,使用换行符分割 text = soup.get_text(separator="\n") return text if __name__ == "__main__": file_path = "example/example.html" result = extract_text_html(file_path) print(result)