basically OK

This commit is contained in:
2025-07-23 19:18:53 +08:00
parent 2762ae9e13
commit 4354e28163

View File

@@ -91,17 +91,14 @@ class Pipe:
} }
} }
def _calculate_tokens_and_cost(self, messages: list, response_text: str, model_id: str) -> dict: def _calculate_tokens_and_cost(self, messages: list, response_text: str, model_name: str) -> dict:
"""Calculate token count and cost using tiktoken""" """Calculate token count and cost using tiktoken"""
try: try:
# Get appropriate encoding for the model # Get appropriate encoding for the model
if "gpt-4" in model_id.lower(): try:
encoding = tiktoken.encoding_for_model("gpt-4") encoding = tiktoken.encoding_for_model(model_name)
elif "gpt-3.5" in model_id.lower(): except KeyError:
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") encoding = tiktoken.encoding_for_model("gpt-4o")
else:
# Default to gpt-4 encoding for other models
encoding = tiktoken.encoding_for_model("gpt-4")
# Calculate input tokens from messages # Calculate input tokens from messages
input_tokens = 0 input_tokens = 0
@@ -121,7 +118,7 @@ class Pipe:
output_tokens = len(encoding.encode(response_text)) if response_text else 0 output_tokens = len(encoding.encode(response_text)) if response_text else 0
# Get pricing for the model # Get pricing for the model
pricing = self.pricing_dict.get(model_id, {"input": 2.5, "output": 10}) # Default to gpt-4o pricing pricing = self.pricing_dict.get(model_name)
# Calculate cost (pricing is per 1M tokens) # Calculate cost (pricing is per 1M tokens)
input_cost = (input_tokens / 1_000_000) * pricing["input"] input_cost = (input_tokens / 1_000_000) * pricing["input"]
@@ -287,12 +284,14 @@ class Pipe:
headers["X-Title"] = "Open WebUI via Pipe" headers["X-Title"] = "Open WebUI via Pipe"
url = f"{self.valves.NEWAPI_BASE_URL}/chat/completions" url = f"{self.valves.NEWAPI_BASE_URL}/chat/completions"
model_name = payload['model']
print(f"model name in body is {model_name}")
try: try:
if body.get("stream", False): if body.get("stream", False):
return self.stream_response(url, headers, payload, user_email, model_id, __event_emitter__) return self.stream_response(url, headers, payload, user_email, model_id, __event_emitter__, model_name)
else: else:
return await self.non_stream_response(url, headers, payload, user_email, model_id, __event_emitter__) return await self.non_stream_response(url, headers, payload, user_email, model_id, __event_emitter__, model_name)
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print(f"Request failed: {e}") print(f"Request failed: {e}")
return f"Error: Request failed: {e}" return f"Error: Request failed: {e}"
@@ -300,7 +299,7 @@ class Pipe:
print(f"Error in pipe method: {e}") print(f"Error in pipe method: {e}")
return f"Error: {e}" return f"Error: {e}"
async def non_stream_response(self, url, headers, payload, user_email, model_id, __event_emitter__: Callable[[Any], Awaitable[None]]): async def non_stream_response(self, url, headers, payload, user_email, model_id, __event_emitter__: Callable[[Any], Awaitable[None]], model_name: str):
"""Handle non-streaming responses and wrap reasoning in <think> tags if present""" """Handle non-streaming responses and wrap reasoning in <think> tags if present"""
try: try:
print( print(
@@ -361,7 +360,7 @@ class Pipe:
# Calculate usage information using tiktoken # Calculate usage information using tiktoken
if user_email and model_id: if user_email and model_id:
messages = payload.get("messages", []) messages = payload.get("messages", [])
usage_info = self._calculate_tokens_and_cost(messages, final_response, model_id) usage_info = self._calculate_tokens_and_cost(messages, final_response, model_name)
try: try:
await self._report_api_call_direct(usage_info, user_email, model_id, __event_emitter__) await self._report_api_call_direct(usage_info, user_email, model_id, __event_emitter__)
@@ -374,7 +373,7 @@ class Pipe:
print(f"Error in non_stream_response: {e}") print(f"Error in non_stream_response: {e}")
return f"Error: {e}" return f"Error: {e}"
async def stream_response(self, url, headers, payload, user_email, model_id, __event_emitter__: Callable[[Any], Awaitable[None]]): async def stream_response(self, url, headers, payload, user_email, model_id, __event_emitter__: Callable[[Any], Awaitable[None]], model_name: str):
"""Stream reasoning tokens in real-time with proper tag management""" """Stream reasoning tokens in real-time with proper tag management"""
try: try:
response = requests.post( response = requests.post(
@@ -424,7 +423,7 @@ class Pipe:
elif accumulated_content: elif accumulated_content:
final_response = accumulated_content final_response = accumulated_content
usage_info = self._calculate_tokens_and_cost(messages, final_response, model_id) usage_info = self._calculate_tokens_and_cost(messages, final_response, model_name)
try: try:
await self._report_api_call_direct(usage_info, user_email, model_id, __event_emitter__) await self._report_api_call_direct(usage_info, user_email, model_id, __event_emitter__)