add full async support for newapi

2025-07-23 19:51:20 +08:00
parent 1c05f934fe
commit d97813ec1a
1 changed files with 176 additions and 157 deletions
--- a/newapi_provider.py
+++ b/newapi_provider.py
@@ -8,7 +8,7 @@ author_url: https://zhuangyumin.dev
 """
 import re
-import requests
+import aiohttp
 import json
 import time
 import tiktoken
@@ -150,26 +150,29 @@ class Pipe:
                "cost": 0.0
            }
-    def pipes(self) -> List[dict]:
+    async def pipes(self) -> List[dict]:
-        """Fetch available models from OpenRouter API"""
+        """Fetch available models from NewAPI asynchronously"""
        if not self.valves.NEWAPI_API_KEY:
            return [{"id": "error", "name": "API Key not provided"}]
        try:
            headers = {"Authorization": f"Bearer {self.valves.NEWAPI_API_KEY}"}
            response = requests.get(
                f"{self.valves.NEWAPI_BASE_URL}/models", headers=headers
            )
-            if response.status_code != 200:
+            async with aiohttp.ClientSession() as session:
-                return [
+                async with session.get(
-                    {
+                    f"{self.valves.NEWAPI_BASE_URL}/models",
-                        "id": "error",
+                    headers=headers,
-                        "name": f"Error fetching models: {response.status_code}",
+                    timeout=aiohttp.ClientTimeout(total=30)
-                    }
+                ) as response:
-                ]
+                    if response.status != 200:
                        return [
                            {
                                "id": "error",
                                "name": f"Error fetching models: {response.status}",
                            }
                        ]
-            models_data = response.json()
+                    models_data = await response.json()
            # Extract model information
            models = []
@@ -193,7 +196,7 @@ class Pipe:
            return [{"id": "error", "name": f"Error: {str(e)}"}]
    async def _report_api_call_direct(self, usage_info: dict, user_email: str, model_id: str, __event_emitter__: Callable[[Any], Awaitable[None]]):
-        """Report API call to upstream reporting service using direct usage information"""
+        """Report API call to upstream reporting service using direct usage information asynchronously"""
        if not self.valves.REPORT_API_URL or not self.valves.REPORT_API_KEY:
            return
@@ -214,7 +217,7 @@ class Pipe:
                "cost_usd": cost_usd
            }
-            # Send to reporting API
+            # Send to reporting API asynchronously
            headers = {
                "Authorization": f"Bearer {self.valves.REPORT_API_KEY}",
                "Content-Type": "application/json"
@@ -222,17 +225,17 @@ class Pipe:
            report_url = f"{self.valves.REPORT_API_URL.rstrip('/')}/api/record_api_call"
-            response = requests.post(
+            async with aiohttp.ClientSession() as session:
-                report_url,
+                async with session.post(
-                headers=headers,
+                    report_url,
-                json=api_call_record,
+                    headers=headers,
-                timeout=30
+                    json=api_call_record,
-            )
+                    timeout=aiohttp.ClientTimeout(total=30)
-            
+                ) as response:
-            if response.status_code == 200:
+                    if response.status == 200:
-                print(f"Successfully reported API call for user {user_email}")
+                        print(f"Successfully reported API call for user {user_email}")
-            else:
+                    else:
-                print(f"Failed to report API call: {response.status_code}")
+                        print(f"Failed to report API call: {response.status}")
            info = f"input: {input_tokens} | output: {output_tokens} | cost: {cost_usd:.6f}"
            await __event_emitter__(
@@ -300,7 +303,7 @@ class Pipe:
                return self.stream_response(url, headers, payload, user_email, model_id, __event_emitter__, model_name)
            else:
                return await self.non_stream_response(url, headers, payload, user_email, model_id, __event_emitter__, model_name)
-        except requests.exceptions.RequestException as e:
+        except aiohttp.ClientError as e:
            print(f"Request failed: {e}")
            return f"Error: Request failed: {e}"
        except Exception as e:
@@ -311,33 +314,40 @@ class Pipe:
        """Handle non-streaming responses and wrap reasoning in <think> tags if present"""
        try:
            print(
-                f"Sending non-streaming request to OpenRouter: {json.dumps(payload)[:200]}..."
+                f"Sending non-streaming request to NewAPI: {json.dumps(payload)[:200]}..."
            )
            response = requests.post(url, headers=headers, json=payload, timeout=90)
-            if response.status_code != 200:
+            async with aiohttp.ClientSession() as session:
-                error_message = f"HTTP Error {response.status_code}"
+                async with session.post(
-                try:
+                    url,
-                    error_data = response.json()
+                    headers=headers,
-                    print(f"Error response: {json.dumps(error_data)}")
+                    json=payload,
-                    if "error" in error_data:
+                    timeout=aiohttp.ClientTimeout(total=90)
-                        if (
+                ) as response:
-                            isinstance(error_data["error"], dict)
+                    if response.status != 200:
-                            and "message" in error_data["error"]
+                        error_message = f"HTTP Error {response.status}"
-                        ):
+                        try:
-                            error_message += f": {error_data['error']['message']}"
+                            error_data = await response.json()
-                        else:
+                            print(f"Error response: {json.dumps(error_data)}")
-                            error_message += f": {error_data['error']}"
+                            if "error" in error_data:
-                except Exception as e:
+                                if (
-                    print(f"Failed to parse error response: {e}")
+                                    isinstance(error_data["error"], dict)
-                    error_message += f": {response.text[:500]}"
+                                    and "message" in error_data["error"]
                                ):
                                    error_message += f": {error_data['error']['message']}"
                                else:
                                    error_message += f": {error_data['error']}"
                        except Exception as e:
                            print(f"Failed to parse error response: {e}")
                            error_text = await response.text()
                            error_message += f": {error_text[:500]}"
-                # Log request payload for debugging
+                        # Log request payload for debugging
-                print(f"Request that caused error: {json.dumps(payload)}")
+                        print(f"Request that caused error: {json.dumps(payload)}")
-                raise Exception(error_message)
+                        raise Exception(error_message)
-            res = response.json()
+                    res = await response.json()
-            print(f"OpenRouter response keys: {list(res.keys())}")
+                    print(f"NewAPI response keys: {list(res.keys())}")
            # Check if we have choices in the response
            if not res.get("choices") or len(res["choices"]) == 0:
@@ -384,122 +394,131 @@ class Pipe:
    async def stream_response(self, url, headers, payload, user_email, model_id, __event_emitter__: Callable[[Any], Awaitable[None]], model_name: str):
        """Stream reasoning tokens in real-time with proper tag management"""
        try:
-            response = requests.post(
+            async with aiohttp.ClientSession() as session:
-                url, headers=headers, json=payload, stream=True, timeout=90
+                async with session.post(
-            )
+                    url,
-
+                    headers=headers,
-            if response.status_code != 200:
+                    json=payload,
-                error_message = f"HTTP Error {response.status_code}"
+                    timeout=aiohttp.ClientTimeout(total=90)
-                try:
+                ) as response:
-                    error_data = response.json()
+                    if response.status != 200:
-                    error_message += (
+                        error_message = f"HTTP Error {response.status}"
                        f": {error_data.get('error', {}).get('message', '')}"
                    )
                except:
                    pass
                raise Exception(error_message)
            # State tracking
            in_reasoning_state = False  # True if we've output the opening <think> tag
            latest_citations = []  # The latest citations list
            accumulated_content = ""  # Accumulate all content for token calculation
            accumulated_reasoning = ""  # Accumulate all reasoning for token calculation
            # Process the response stream
            for line in response.iter_lines():
                if not line:
                    continue
                line_text = line.decode("utf-8")
                if not line_text.startswith("data: "):
                    continue
                elif line_text == "data: [DONE]":
                    # Handle citations at the end
                    if latest_citations:
                        citation_list = [f"1. {l}" for l in latest_citations]
                        citation_list_str = "\n".join(citation_list)
                        yield f"\n\n---\nCitations:\n{citation_list_str}"
                    # Calculate usage information using tiktoken and report
                    if user_email and model_id:
                        messages = payload.get("messages", [])
                        final_response = ""
                        if accumulated_reasoning and accumulated_content:
                            final_response = f"<think>\n{accumulated_reasoning}\n</think>\n\n{accumulated_content}"
                        elif accumulated_reasoning:
                            final_response = f"<think>\n{accumulated_reasoning}\n</think>\n\n"
                        elif accumulated_content:
                            final_response = accumulated_content
                        usage_info = self._calculate_tokens_and_cost(messages, final_response, model_name)
                        try:
-                            await self._report_api_call_direct(usage_info, user_email, model_id, __event_emitter__)
+                            error_data = await response.json()
-                        except Exception as e:
+                            error_message += (
-                            print(f"Error reporting API call: {e}")
+                                f": {error_data.get('error', {}).get('message', '')}"
-                            yield f"Error: {e}"
+                            )
                        except:
                            pass
                        raise Exception(error_message)
-                    # Stop processing after [DONE]
+                    # State tracking
-                    break
+                    in_reasoning_state = False  # True if we've output the opening <think> tag
                    latest_citations = []  # The latest citations list
                    accumulated_content = ""  # Accumulate all content for token calculation
                    accumulated_reasoning = ""  # Accumulate all reasoning for token calculation
-                try:
+                    # Process the response stream asynchronously
-                    chunk = json.loads(line_text[6:])
+                    async for line_bytes in response.content:
                        if not line_bytes:
                            continue
-                    if "choices" in chunk and chunk["choices"]:
+                        line_text = line_bytes.decode("utf-8").strip()
                        choice = chunk["choices"][0]
                        citations = chunk.get("citations") or []
-                        # Update the citation list
+                        # Handle multiple lines in a single chunk
-                        if citations:
+                        for line in line_text.split('\n'):
-                            latest_citations = citations
+                            if not line.strip():
                                continue
-                        # Check for reasoning tokens
+                            if not line.startswith("data: "):
-                        reasoning_text = None
+                                continue
-                        if "delta" in choice and "reasoning" in choice["delta"]:
+                            elif line == "data: [DONE]":
-                            reasoning_text = choice["delta"]["reasoning"]
+                                # Handle citations at the end
-                        elif "message" in choice and "reasoning" in choice["message"]:
+                                if latest_citations:
-                            reasoning_text = choice["message"]["reasoning"]
+                                    citation_list = [f"1. {l}" for l in latest_citations]
                                    citation_list_str = "\n".join(citation_list)
                                    yield f"\n\n---\nCitations:\n{citation_list_str}"
-                        # Check for content tokens
+                                # Calculate usage information using tiktoken and report
-                        content_text = None
+                                if user_email and model_id:
-                        if "delta" in choice and "content" in choice["delta"]:
+                                    messages = payload.get("messages", [])
-                            content_text = choice["delta"]["content"]
+                                    final_response = ""
-                        elif "message" in choice and "content" in choice["message"]:
+                                    if accumulated_reasoning and accumulated_content:
-                            content_text = choice["message"]["content"]
+                                        final_response = f"<think>\n{accumulated_reasoning}\n</think>\n\n{accumulated_content}"
                                    elif accumulated_reasoning:
                                        final_response = f"<think>\n{accumulated_reasoning}\n</think>\n\n"
                                    elif accumulated_content:
                                        final_response = accumulated_content
-                        # Handle reasoning tokens
+                                    usage_info = self._calculate_tokens_and_cost(messages, final_response, model_name)
                        if reasoning_text:
                            # Accumulate reasoning for token calculation
                            accumulated_reasoning += reasoning_text
-                            # If first reasoning token, output opening tag
+                                    try:
-                            if not in_reasoning_state:
+                                        await self._report_api_call_direct(usage_info, user_email, model_id, __event_emitter__)
-                                yield "<think>\n"
+                                    except Exception as e:
-                                in_reasoning_state = True
+                                        print(f"Error reporting API call: {e}")
                                        yield f"Error: {e}"
-                            # Output the reasoning token
+                                # Stop processing after [DONE]
-                            yield _insert_citations(reasoning_text, citations)
+                                break
-                        # Handle content tokens
+                            try:
-                        if content_text:
+                                chunk = json.loads(line[6:])
                            # Accumulate content for token calculation
                            accumulated_content += content_text
-                            # If transitioning from reasoning to content, close the thinking tag
+                                if "choices" in chunk and chunk["choices"]:
-                            if in_reasoning_state:
+                                    choice = chunk["choices"][0]
-                                yield "\n</think>\n\n"
+                                    citations = chunk.get("citations") or []
                                in_reasoning_state = False
-                            # Output the content
+                                    # Update the citation list
-                            if content_text:
+                                    if citations:
-                                yield _insert_citations(content_text, citations)
+                                        latest_citations = citations
-                except Exception as e:
+                                    # Check for reasoning tokens
-                    print(f"Error processing chunk: {e}")
+                                    reasoning_text = None
                                    if "delta" in choice and "reasoning" in choice["delta"]:
                                        reasoning_text = choice["delta"]["reasoning"]
                                    elif "message" in choice and "reasoning" in choice["message"]:
                                        reasoning_text = choice["message"]["reasoning"]
-            # If we're still in reasoning state at the end, close the tag
+                                    # Check for content tokens
-            if in_reasoning_state:
+                                    content_text = None
-                yield "\n</think>\n\n"
+                                    if "delta" in choice and "content" in choice["delta"]:
                                        content_text = choice["delta"]["content"]
                                    elif "message" in choice and "content" in choice["message"]:
                                        content_text = choice["message"]["content"]
                                    # Handle reasoning tokens
                                    if reasoning_text:
                                        # Accumulate reasoning for token calculation
                                        accumulated_reasoning += reasoning_text
                                        # If first reasoning token, output opening tag
                                        if not in_reasoning_state:
                                            yield "<think>\n"
                                            in_reasoning_state = True
                                        # Output the reasoning token
                                        yield _insert_citations(reasoning_text, citations)
                                    # Handle content tokens
                                    if content_text:
                                        # Accumulate content for token calculation
                                        accumulated_content += content_text
                                        # If transitioning from reasoning to content, close the thinking tag
                                        if in_reasoning_state:
                                            yield "\n</think>\n\n"
                                            in_reasoning_state = False
                                        # Output the content
                                        if content_text:
                                            yield _insert_citations(content_text, citations)
                            except Exception as e:
                                print(f"Error processing chunk: {e}")
                    # If we're still in reasoning state at the end, close the tag
                    if in_reasoning_state:
                        yield "\n</think>\n\n"
        except Exception as e:
            print(f"Error in stream_response: {e}")