feat: rag

This commit is contained in:
heyong.fu
2026-05-06 11:35:10 +08:00
commit a17c65c4bc
75 changed files with 5196 additions and 0 deletions
+47
View File
@@ -0,0 +1,47 @@
from openai import OpenAI
# client = OpenAI(
# base_url="https://api.deepseek.com/v1",
# api_key="sk-01931083835f4a539e368b209559c52c",
# )
# response = client.chat.completions.create(
# model="deepseek-chat",
# messages=[
# {"role": "system", "content": "你是谁"},
# ],
# stream=True,
# )
# for chunk in response:
# if chunk.choices[0].delta.content is not None:
# print(chunk.choices[0].delta.content, end="", flush=True)
# 调用自己写的
from openai_client import OpenAI
client = OpenAI(
base_url="https://api.deepseek.com",
api_key="sk-cc7b983a00f34cec9a12b19b64060f68",
)
response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": "西游记作者是谁"},
],
stream=True,
)
# print(response.choices[0].message.content)
for chunk in response:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
# ChatCompletion(
# id='f8170d75-875c-4b46-bd3b-82a93d6be4c0',
# choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='你好!我是DeepSeek,一个由深度求索公司创造的AI助手。😊\n\n我是一个纯文本模型,虽然不支持多模态识别功能,但我有文件上传功能,可以帮你处理图像、txt、pdf、ppt、word、excel等文件,并从中读取文字信息进行分析处理。我完全免费使用,拥有128K的上下文长度,还支持联网搜索功能(需要你在Web/App中手动点开联网搜索按键)。\n\n你可以通过官方应用商店下载我的App来使用我。我很乐意帮助你解答问题、处理文档、进行对话交流等等!\n\n有什么我可以帮助你的吗?无论是学习、工作还是日常问题,我都很愿意为你提供帮助!✨', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None))],
# created=1765348625,
# model='deepseek-chat',
# object='chat.completion',
# service_tier=None,
# system_fingerprint='fp_eaab8d114b_prod0820_fp8_kvcache',
# usage=CompletionUsage(completion_tokens=143, prompt_tokens=4, total_tokens=147, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetails(audio_tokens=None, cached_tokens=0), prompt_cache_hit_tokens=0, prompt_cache_miss_tokens=4))
+20
View File
@@ -0,0 +1,20 @@
from openai import OpenAI
from dotenv import load_dotenv
import os
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")
API_KEY = os.getenv("API_KEY")
client = OpenAI(
base_url=DATABASE_URL,
api_key=API_KEY,
)
response = client.chat.completions.create(
model="doubao-seed-1-6-lite-251015",
messages=[
{"role": "system", "content": "你是谁"},
],
)
print(response)
+175
View File
@@ -0,0 +1,175 @@
# 封装统一调用openai的客户端
from typing import Optional, Iterator
import os
import requests
import json
class Message:
def __init__(self, data):
self.content = data.get("content")
self.role = data.get("role")
class Choice:
def __init__(self, choice):
self.index = choice.get("index")
self.finish_reason = choice.get("finish_reason")
self.message = Message(choice.get("message", {}))
class ChatCompletionResponse:
def __init__(self, data) -> None:
self.id = data.get("id")
self.object = data.get("object")
self.created = data.get("created")
self.model = data.get("model")
choices_data = data.get("choices", [])
self.choices = [Choice(choice) for choice in choices_data]
usage_data = data.get("usage", {})
self.usage = {
"prompt_tokens": usage_data.get("prompt_tokens"),
"completion_tokens": usage_data.get("completion_tokens"),
"total_tokens": usage_data.get("total_tokens"),
}
class DeltaMessage:
def __init__(self, data) -> None:
self.content = data.get("content")
self.role = data.get("role")
class DeltaChoice:
def __init__(self, data):
self.index = data.get("index")
self.finish_reason = data.get("finish_reason")
self.delta = DeltaMessage(data.get("delta", {}))
class StreamChunk:
def __init__(self, data):
self.id = data.get("id")
self.object = data.get("object")
self.created = data.get("created")
self.model = data.get("model")
choices_data = data.get("choices", [])
self.choices = [DeltaChoice(choice) for choice in choices_data]
class Stream:
def __init__(self, response: requests.Response):
self.response = response
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.response.close()
def __iter__(self) -> Iterator[StreamChunk]:
# 迭代器方法,逐个返回流式数据块
try:
# 逐行读取响应的内容(SSE格式)
for line in self.response.iter_lines(decode_unicode=True):
# print(line)
# data: {"id":"3eddf823-6ee6-4b14-a231-b0fd9dbc8087","object":"chat.completion.chunk","created":1765355109,"model":"deepseek-chat","system_fingerprint":"fp_eaab8d114b_prod0820_fp8_kvcache","choices":[{"index":0,"delta":{"content":"观点"},"logprobs":null,"finish_reason":null}]}
if not line.strip():
continue
if line.startswith("data: "):
json_str = line[6:]
# 如果遇到DONE 结束,说明结束
if json_str.strip() == "[DONE]":
break
try:
data = json.loads(json_str)
yield StreamChunk(data)
except json.JSONDecodeError:
continue
finally:
self.response.close()
class ChatCompletions:
def __init__(self, client):
self._client = client
def create(
self,
model,
messages,
max_tokens=1024,
temperature=0.7,
stream: bool = False,
**kwargs,
):
url = f"{self._client.base_url}/chat/completions"
body = {
"model": model,
"messages": messages,
}
if max_tokens is not None:
body["max_tokens"] = max_tokens
if temperature is not None:
body["temperature"] = temperature
if stream:
body["stream"] = True
# 将其他参数添加到body中
body.update(kwargs)
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {self._client.api_key}",
}
if stream:
response = requests.post(
url,
headers=headers,
json=body,
timeout=self._client.timeout,
stream=True, # 告诉openai的服务器我要使用流式输出
)
response.raise_for_status()
return Stream(response)
else:
response = requests.post(
url, headers=headers, json=body, timeout=self._client.timeout
)
# 如果响应状态不是2xx则直接报错
# response.raise_for_status()是 requests库中一个非常重要的方法,用于自动检查 HTTP 响应状态码,并在状态码表示错误时抛出异常。
# 如果状态码是 2xx(成功):什么都不做,继续执行
# 如果状态码是 4xx 或 5xx(客户端或服务器错误):抛出异常
response.raise_for_status()
return ChatCompletionResponse(response.json())
class ChatResource:
def __init__(self, client):
self.client = client
@property
def completions(self):
return ChatCompletions(self.client)
class OpenAI:
def __init__(
self,
base_url: str = "https://api.deepseek.com/v1",
api_key: Optional[str] = None,
timeout: float = 60.0,
):
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
if not self.api_key:
raise ValueError(
f"API秘钥未设置,请设置api_key参数或设置环境变量OPENAI_API_KEY"
)
self.base_url = base_url.rstrip("/")
self.timeout = timeout
# 可以使用属性.的方式使用方法
@property
def chat(self):
return ChatResource(self)
+136
View File
@@ -0,0 +1,136 @@
from typing import Optional,Iterator
import os
import requests
import json
class Message:
def __init__(self,data):
self.role = data.get('role'),
self.content = data.get('content')
class Choice:
def __init__(self,data):
self.index = data.get('index')
self.message = Message(data.get('message',{}))
self.finish_reason = data.get('finish_reason')
class ChatCompletionResponse:
def __init__(self,data):
self.id = data.get('id')
self.object = data.get('object')
self.created = data.get('created')
self.model = data.get('model')
choices_data = data.get('choices',[])
self.choices = [
Choice(choice_data) for choice_data in choices_data
]
usage_data = data.get('usage',{})
self.usage = {
"prompt_tokens":usage_data.get("prompt_tokens"),
"completion_tokens":usage_data.get("completion_tokens"),
"total_tokens":usage_data.get("total_tokens"),
}
class DeltaMessage:
def __init__(self,data):
self.content = data.get('content')
self.role = data.get('role')
class DeltaChoice:
def __init__(self,data):
self.index = data.get('index')
self.delta = DeltaMessage(data.get('delta',{}) )
self.finish_reason = data.get('finish_reason')
#流式响应数据块,表示流式响应中的一个数据块
class StreamChunk:
def __init__(self,data):
self.id = data.get('id')
self.object = data.get('object')
self.created = data.get('created')
self.model = data.get('model')
choices_data = data.get('choices',[])
self.choices = [DeltaChoice(choice_data) for choice_data in choices_data]
class Stream:
def __init__(self,response:requests.Response):
self.response=response
def __enter__(self):
return self
def __exit__(self,exc_type,exc_val,exc_tb):
self.response.close()
def __iter__(self)->Iterator[StreamChunk]:
#迭代器方法,逐个返回流式数据块
try:
# 逐行读取响应的内容(SSE格式)
for line in self.response.iter_lines(decode_unicode=True):
#print(line)
if not line.strip():
continue
if line.startswith('data: '):
json_str = line[6:]
# 如果遇到[DONE]说明流式输出结束
if json_str.strip()=="[DONE]":
break
try:
data = json.loads(json_str)
yield StreamChunk(data)
except json.JSONDecodeError:
continue
finally:
self.response.close()
class ChatCompletions:
def __init__(self,client):
self._client = client
def create(self,model,messages,max_tokens=1024,temperature=0.7,stream:bool=False,**kwargs):
url = f"{self._client.base_url}/chat/completions"
body = {
"model":model,
"messages":messages
}
if max_tokens is not None:
body["max_tokens"]=max_tokens
if temperature is not None:
body["temperature"]=temperature
if stream:
body["stream"]=True
#添加额外的参数到请求体中
body.update(kwargs)
headers = {
"Authorization":f"Bearer {self._client.api_key}",
"Content-Type":"application/json"
}
if stream:
response = requests.post(
url,
headers=headers,
json=body,
timeout=self._client.timeout,
stream=True#告诉openai的服务器我要使用流式输出
)
response.raise_for_status()
return Stream(response)
else:
response = requests.post(
url,
headers=headers,
json=body,
timeout=self._client.timeout
)
# 如果响应的状态不是2XX的话,主抛异常
response.raise_for_status()
return ChatCompletionResponse(response.json())
class ChatResource:
def __init__(self,client):
self._client = client
@property
def completions(self)->ChatCompletions:
return ChatCompletions(self._client)
class OpenAI:
def __init__(self,api_key:Optional[str]=None,base_url:str="https://api.openai.com/v1",timeout:float=60.0):
self.api_key=api_key or os.getenv("OPENAI_API_KEY")
if not self.api_key:
raise ValueError(f"API密钥未设置,请设置api_key参数或者环境变量OPENAI_API_KEY")
self.base_url = base_url.rstrip('/')
self.timeout = timeout
@property
def chat(self)->ChatResource:
return ChatResource(self)