feat: rag
This commit is contained in:
+28
@@ -0,0 +1,28 @@
|
||||
import openpyxl
|
||||
|
||||
|
||||
def extract_text_from_excel(file_path):
|
||||
"""
|
||||
从Excel文件中提取所有单元格内容为文本,并以字符串返回。
|
||||
:param file_path: Excel文件路径
|
||||
:return: 文本内容字符串
|
||||
"""
|
||||
# 加载Excel表格
|
||||
wb = openpyxl.load_workbook(file_path)
|
||||
# 获取活动的工作表小
|
||||
ws = wb.active
|
||||
# 初始化用于存储每一行的文本列表
|
||||
rows = []
|
||||
# 遍历工作区的每一行,values_only = True 标识只获取单元格的值
|
||||
for row in ws.iter_rows(values_only=True):
|
||||
# 将每一行的单元格的数据转为字符串,并用制表符分割,如果为空返回空字符串
|
||||
rows.append("\t".join([str(cell) if cell is not None else "" for cell in row]))
|
||||
all_text = "\n".join(rows)
|
||||
|
||||
return all_text
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
file_path = "example/example.xlsx"
|
||||
result = extract_text_from_excel(file_path)
|
||||
print(result)
|
||||
Reference in New Issue
Block a user