feat: python

2026-05-06 11:21:42 +08:00
commit 0abf1ad3c4
62 changed files with 7598 additions and 0 deletions
@@ -0,0 +1,411 @@
+# 概述
+# Python 提供了丰富的内置函数和方法来处理文件，支持文本文件、二进制文件、CSV、JSON 等多种格式的读写操作。掌握文件操作是 Python 编程的重要技能。
+
+# 2.核心概念
+# 文件对象：通过 open() 函数创建，用于文件读写操作
+# 文件模式：指定文件的打开方式（读取、写入、追加等）
+# 编码：处理文本文件时指定字符编码，避免乱码
+# 上下文管理：使用 with 语句自动管理文件资源
+
+# 打开文件
+# 3.1.基本语法
+# 使用 open() 函数打开文件，返回文件对象用于后续操作：
+# file = open(file, mode='r', encoding=None)
+# 参数说明：
+# file：文件路径（字符串），可以是相对路径或绝对路径
+# mode：打开模式（字符串），默认为 'r'（只读）
+# encoding：字符编码（字符串），文本文件推荐使用 'utf-8'
+# 模式	    描述	            说明
+# 'r'	        只读（默认）	     文件必须存在，否则报错
+# 'w'	        写入	            覆盖已有文件，不存在则创建
+# 'a'	        追加	            在文件末尾添加内容，不存在则创建
+# 'x'	        创建	            创建新文件，已存在则失败
+# 'b'	        二进制	            与上述模式组合使用（如 'rb', 'wb'）
+# 't'	        文本（默认）	     文本模式，处理字符串
+# '+'	        读写	            与上述模式组合使用（如 'r+', 'w+'）
+# 只读模式打开文本文件
+file = open("data.txt", "r", encoding="utf-8")
+
+# 写入模式打开文件
+file = open("output.txt", "w", encoding="utf-8")
+
+# 二进制模式打开文件
+file = open("image.png", "rb")
+
+# 4.重要注意事项
+# 文件关闭：操作完成后必须关闭文件，避免资源泄露
+# 编码指定：处理中文等非ASCII字符时务必指定编码
+# 异常处理：文件不存在或权限不足时会抛出异常
+# 推荐使用 with 语句：自动管理文件资源，更安全可靠
+
+# 4.读取文件内容
+# 4.1.读取方法概述
+# Python 提供了多种读取文件内容的方法：
+# 方法	                描述	            适用场景
+# read()	            读取整个文件	    小文件，需要完整内容
+# readline()	        读取一行	        逐行处理，大文件
+# readlines()	        读取所有行到列表	需要随机访问行
+# for line in file	    遍历文件对象	    推荐方式，简洁高效
+# 重要注意事项
+# 使用 with 语句：自动管理文件资源，避免泄露
+# 大文件处理：建议逐行读取，避免内存溢出
+# 编码指定：处理中文时务必指定 encoding='utf-8'
+# 异常处理：文件不存在或权限不足时会抛出异常
+
+# 读取整个文件
+# 读取整个文件内容
+with open("example.txt", "r", encoding="utf-8") as file:
+    content = file.read()
+    print(content)
+# 适用场景：
+
+# 文件较小，可以一次性加载到内存
+# 需要完整的文件内容进行处理
+# 配置文件、小文本文件等
+
+# 逐行读取
+# 逐行读取适合处理大文件或需要按行处理内容的场景
+
+# 方法1：使用 readline()
+# 使用 readline() 逐行读取
+# with open("example.txt", "r", encoding="utf-8") as file:
+#     line = file.readline()
+#     while line:
+#         print(line.strip())
+#         line = file.readline()
+
+# 方法2：使用 readlines()
+# 使用 readlines() 读取所有行
+with open("example.txt", "r", encoding="utf-8") as file:
+    lines = file.readlines()
+    for line in lines:
+        print(line.strip())
+
+# 方法3：直接遍历文件对象（推荐）
+# 直接遍历文件对象（推荐方式）
+# with open("example.txt", "r", encoding="utf-8") as file:
+#     for line in file:
+#         print(line.strip())
+# 方法对比：
+
+# 方法	             优点	            缺点	        适用场景
+# readline()	    内存效率高	        代码较复杂	    大文件逐行处理
+# readlines()	    代码简洁	        内存占用大	    小文件，需要随机访问
+# for line in file	代码最简洁，效率高	    -	        推荐使用
+
+# 读取指定字节数
+# 使用 read(size) 方法可以读取指定数量的字符或字节。
+# 基本用法
+# 读取前100个字符
+with open("example.txt", "r", encoding="utf-8") as file:
+    chunk = file.read(100)
+    print(chunk)
+# 分块读取大文件
+# 分块读取大文件
+with open("large_file.txt", "r", encoding="utf-8") as file:
+    while True:
+        chunk = file.read(1000)  # 每次读取1000个字符
+        if not chunk:  # 读取结束
+            break
+        print(chunk)
+# 二进制文件读取
+# 读取二进制文件
+with open("image.jpg", "rb") as file:
+    chunk = file.read(1024)  # 读取1024字节
+    print(chunk)
+# 重要说明：
+# 文本模式：size 参数表示字符数
+# 二进制模式：size 参数表示字节数
+# 适合处理大文件，避免内存溢出
+# 结合循环可以实现流式处理
+
+
+# 写入文件
+# 方法	        描述	        特点
+# write()	        写入字符串	    不自动换行，需要手动添加 \n
+# writelines()	写入字符串序列	不自动换行，需要序列中自带 \n
+# 写入模式
+# 覆盖写入（'w'）：清空原内容，重新写入
+# 追加写入（'a'）：在文件末尾添加内容
+# 创建写入（'x'）：创建新文件，已存在则失败
+# 5.3.重要注意事项
+# 使用 with 语句：自动管理文件资源，避免数据丢失
+# 编码指定：处理中文时务必指定 encoding='utf-8'
+# 换行符：写入方法不会自动添加换行符，需要手动添加
+# 异常处理：权限不足或磁盘空间不够时会抛出异常
+
+# 写入字符串
+# 单独写入字符串
+# 覆盖写入模式
+with open("output.txt", "w", encoding="utf-8") as file:
+    file.write("Hello, World!\n")
+    file.write("This is a new line.\n")
+
+# 追加写入模式
+with open("output.txt", "a", encoding="utf-8") as file:
+    file.write("This line is appended.\n")
+
+# 批量写入字符串
+# 准备要写入的字符串列表
+lines = ["Line 1\n", "Line 2\n", "Line 3\n"]
+
+# 使用 writelines() 批量写入
+with open("output.txt", "w", encoding="utf-8") as file:
+    file.writelines(lines)
+
+# 或者使用循环写入
+with open("output.txt", "w", encoding="utf-8") as file:
+    for line in lines:
+        file.write(line)
+
+# 写入要点：
+# 写入方法不会自动添加换行符
+# 需要手动在字符串末尾添加 \n
+# 使用 with 语句确保文件正确关闭
+# 指定编码避免中文乱码
+
+
+# 使用 with 语句（推荐）
+# 基本用法
+# with 语句是 Python 推荐的文件操作方式，可以自动管理文件资源：
+# 使用 with 语句自动管理文件资源
+with open("example.txt", "r", encoding="utf-8") as file:
+    content = file.read()
+    print(content)
+# 文件会自动关闭，无需手动调用 file.close()
+# 优势
+# 自动关闭：无论是否发生异常，文件都会被正确关闭
+# 代码简洁：无需手动管理文件资源
+# 异常安全：即使发生异常也能确保资源释放
+# 推荐使用：Python 官方推荐的文件操作方式
+
+# 对比传统方式
+# 传统方式（不推荐）
+file = open("example.txt", "r", encoding="utf-8")
+try:
+    content = file.read()
+    print(content)
+finally:
+    file.close()  # 必须手动关闭
+
+# with 语句（推荐）
+with open("example.txt", "r", encoding="utf-8") as file:
+    content = file.read()
+    print(content)
+# 自动关闭，更简洁安全
+
+# 文件位置操作
+# 获取当前位置
+# 使用 tell() 方法获取文件指针的当前位置：
+with open("example.txt", "r", encoding="utf-8") as file:
+    print(f"初始位置: {file.tell()}")  # 0
+    content = file.read(5)
+    print(f"读取后位置: {file.tell()}")  # 5
+
+# 移动文件指针
+# 使用 seek() 方法移动文件指针到指定位置：
+with open("example.txt", "rb") as file:
+    # 移动到文件开头
+    file.seek(0)
+    print(f"位置: {file.tell()}")  # 0
+
+    # 移动到第10个字节
+    file.seek(10)
+    print(f"位置: {file.tell()}")  # 10
+
+    # 从当前位置向后移动5个字节
+    file.seek(5, 1)
+    print(f"位置: {file.tell()}")  # 15
+
+    # 从文件末尾向前移动10个字节
+    file.seek(-10, 2)
+    print(f"位置: {file.tell()}")  # 文件大小-10
+
+# seek() 参数说明
+# offset：偏移量（字节为单位）
+# whence：相对位置
+# 0：从文件开头计算（默认）
+# 1：从当前位置计算
+# 2：从文件末尾计算
+
+# 重要注意事项
+# 文本模式：seek() 行为可能受编码影响，建议使用二进制模式
+# 二进制模式：seek() 行为更可预测，适合精确定位
+# 应用场景：断点续传、日志处理、大文件分块处理
+
+# 文件属性检查
+# 基本属性检查
+# 使用 os.path 模块检查文件的基本属性：
+import os
+
+# 检查文件是否存在
+if os.path.exists("demo.txt"):
+    print("文件存在")
+else:
+    print("文件不存在")
+
+# 判断是文件还是目录
+if os.path.isfile("demo.txt"):
+    print("是文件")
+if os.path.isdir("test_folder"):
+    print("是目录")
+
+# 获取文件大小
+size = os.path.getsize("demo.txt")
+print(f"文件大小: {size} 字节")
+
+# 详细属性信息
+# 使用 os.stat() 获取文件的详细信息：
+import os
+from datetime import datetime
+
+# 获取文件详细信息
+info = os.stat("demo.txt")
+
+print(f"文件大小: {info.st_size} 字节")
+print(f"创建时间: {datetime.fromtimestamp(info.st_ctime)}")
+print(f"最后访问: {datetime.fromtimestamp(info.st_atime)}")
+print(f"最后修改: {datetime.fromtimestamp(info.st_mtime)}")
+print(f"文件权限: {oct(info.st_mode)}")
+
+
+# 常用属性检查方法
+# 方法	                    描述	            返回值
+# os.path.exists()	        检查路径是否存在	 True/False
+# os.path.isfile()	        检查是否为文件	     True/False
+# os.path.isdir()	        检查是否为目录	     True/False
+# os.path.getsize()	        获取文件大小	     字节数
+# os.stat()	获取详细信息      stat_result         对象
+
+
+# 完整实例
+# 1. 创建并写入文件
+with open("demo.txt", "w", encoding="utf-8") as file:
+    file.write("这是第一行\n")
+    file.write("这是第二行\n")
+    file.write("这是第三行\n")
+
+# 2. 读取并显示文件内容
+print("文件内容:")
+with open("demo.txt", "r", encoding="utf-8") as file:
+    for line_num, line in enumerate(file, 1):
+        print(f"第{line_num}行: {line.strip()}")
+
+# 3. 追加内容到文件
+with open("demo.txt", "a", encoding="utf-8") as file:
+    file.write("这是追加的内容\n")
+
+# 4. 显示追加后的内容
+print("\n追加后的内容:")
+with open("demo.txt", "r", encoding="utf-8") as file:
+    print(file.read())
+
+
+# 处理不同类型的文件
+# CSV 文件
+# CSV（逗号分隔值）文件是常见的表格数据格式，适合数据交换和存储。
+# CSV 文件格式示例
+# 姓名, 年龄, 城市
+# 张三, 25, 北京
+# 李四, 30, 上海
+
+# 读写 CSV 文件
+# import csv
+
+# # 写入 CSV 文件
+# with open("data.csv", "w", newline="", encoding="utf-8") as file:
+#     writer = csv.writer(file)
+#     writer.writerow(["姓名", "年龄", "城市"])  # 写入表头
+#     writer.writerow(["张三", "25", "北京"])  # 写入数据
+#     writer.writerow(["李四", "30", "上海"])
+
+# # 读取 CSV 文件
+# with open("data.csv", "r", newline="", encoding="utf-8") as file:
+#     reader = csv.reader(file)
+#     for row in reader:
+#         print(row)
+# CSV 文件特点
+# 格式简单：逗号分隔字段，换行分隔记录
+# 兼容性好：Excel、数据库等广泛支持
+# 易于处理：Python csv 模块提供便捷操作
+# 注意编码：处理中文时务必指定 utf-8 编码
+
+# SON 文件
+# JSON（JavaScript 对象表示法）是轻量级的数据交换格式，广泛用于数据存储和传输。
+# JSON 文件特点
+# 格式简洁：易于阅读和编写
+# 跨平台：支持多种编程语言
+# 结构化：支持复杂的数据结构
+# 广泛应用：网络通信、配置文件等
+# 读取JSON文件
+import json
+
+# 准备数据
+data = {"name": "张三", "age": 25, "cities": ["北京", "上海", "广州"]}
+
+# 写入 JSON 文件
+with open("data.json", "w", encoding="utf-8") as file:
+    json.dump(data, file, ensure_ascii=False, indent=4)
+
+# 读取 JSON 文件
+with open("data.json", "r", encoding="utf-8") as file:
+    loaded_data = json.load(file)
+    print(loaded_data)
+# JSON 文件优势
+# 数据完整性：保持 Python 对象的完整结构
+# 可读性强：格式化后易于阅读和调试
+# 兼容性好：与 Web API 和数据库无缝对接
+# 类型安全：自动处理数据类型转换
+
+# 错误处理
+# 11.1.常见文件操作异常
+# 文件操作中可能遇到的各种异常
+# 异常类型	             描述	        常见原因
+# FileNotFoundError	    文件不存在	    路径错误、文件被删除
+# PermissionError	    权限不足	    文件被占用、无写入权限
+# IOError	            输入输出错误	磁盘空间不足、设备错误
+# UnicodeDecodeError	编码错误	    文件编码与指定编码不匹配
+
+# 异常处理示例
+try:
+    with open("nonexistent.txt", "r", encoding="utf-8") as file:
+        content = file.read()
+        print(content)
+except FileNotFoundError:
+    print("文件不存在！")
+except PermissionError:
+    print("权限不足，无法访问文件！")
+except IOError as e:
+    print(f"文件操作错误: {e}")
+except UnicodeDecodeError:
+    print("文件编码错误！")
+finally:
+    print("文件操作完成")
+
+
+# 错误处理最佳实践
+# 具体异常：捕获具体的异常类型，避免使用过于宽泛的 except
+# 异常信息：提供有意义的错误信息，便于调试
+# 资源清理：使用 with 语句自动管理资源
+# 日志记录：记录异常信息，便于问题排查
+# 用户友好：向用户提供清晰的错误提示
+
+# 12.最佳实践
+# 12.1.文件操作原则
+# 使用 with 语句：自动管理文件资源，确保正确关闭
+# 指定编码：处理中文等非ASCII字符时务必指定 utf-8
+# 异常处理：捕获并处理文件操作异常，提高程序健壮性
+# 选择合适模式：根据需求选择读取、写入或追加模式
+# 考虑文件大小：大文件使用分块读取，避免内存溢出
+
+# 12.2.性能优化建议
+# 大文件处理：使用生成器或分块读取
+# 批量操作：使用 writelines() 批量写入
+# 内存管理：及时释放不需要的文件对象
+# 缓存策略：合理使用文件缓存提高性能
+
+# 12.3.安全注意事项
+# 路径验证：检查文件路径的有效性
+# 权限检查：确保有足够的文件操作权限
+# 资源清理：使用 with 语句确保资源释放
+# 异常处理：妥善处理各种异常情况