ayuanchen
diff --git a/‎.env.example‎
Lines changed: 6 additions & 5 deletions b/‎.env.example‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎backend/app/routers/generate.py‎
Lines changed: 14 additions & 174 deletions b/‎backend/app/routers/generate.py‎
Lines changed: 14 additions & 174 deletions
diff --git a/‎backend/app/routers/modify.py‎
Lines changed: 3 additions & 3 deletions b/‎backend/app/routers/modify.py‎
Lines changed: 3 additions & 3 deletions
@@ -1,10 +1,11 @@
 POSTGRES_URL="postgresql://postgres:password@localhost:5432/gitdiagram"
 NEXT_PUBLIC_API_DEV_URL=http://localhost:8000
 
-# backend
-ANTHROPIC_API_KEY=
-OPENROUTER_API_KEY=
-# could probably also add OPENAI_API_KEY since its an easy swap
+OPENAI_API_KEY=
 
 # OPTIONAL: providing your own GitHub PAT increases rate limits from 60/hr to 5000/hr to the GitHub API
-GITHUB_PAT=
+GITHUB_PAT=
+
+# old implementation
+# OPENROUTER_API_KEY=
+# ANTHROPIC_API_KEY=
@@ -2,7 +2,7 @@
 from fastapi.responses import StreamingResponse
 from dotenv import load_dotenv
 from app.services.github_service import GitHubService
-from app.services.o3_mini_openrouter_service import OpenRouterO3Service
+from app.services.o1_mini_openai_service import OpenAIO1Service
 from app.prompts import (
     SYSTEM_FIRST_PROMPT,
     SYSTEM_SECOND_PROMPT,
@@ -25,7 +25,7 @@
 
 # Initialize services
 # claude_service = ClaudeService()
-o3_service = OpenRouterO3Service()
+o1_service = OpenAIO1Service()
 
 
 # cache github data to avoid double API calls from cost and generate
@@ -52,163 +52,6 @@ class ApiRequest(BaseModel):
     github_pat: str | None = None
 
 
-# OLD NON STREAMING VERSION
-@router.post("")
-# @limiter.limit("1/minute;5/day") # TEMP: disable rate limit for growth??
-async def generate(request: Request, body: ApiRequest):
-    try:
-        # Check instructions length
-        if len(body.instructions) > 1000:
-            return {"error": "Instructions exceed maximum length of 1000 characters"}
-
-        if body.repo in [
-            "fastapi",
-            "streamlit",
-            "flask",
-            "api-analytics",
-            "monkeytype",
-        ]:
-            return {"error": "Example repos cannot be regenerated"}
-
-        # Get cached github data with PAT if provided
-        github_data = get_cached_github_data(body.username, body.repo, body.github_pat)
-
-        # Get default branch first
-        default_branch = github_data["default_branch"]
-        file_tree = github_data["file_tree"]
-        readme = github_data["readme"]
-
-        # Check combined token count
-        combined_content = f"{file_tree}\n{readme}"
-        # token_count = claude_service.count_tokens(combined_content)
-        token_count = o3_service.count_tokens(combined_content)
-
-        # CLAUDE: Modified token limit check
-        # if 50000 < token_count < 190000 and not body.api_key:
-        #     return {
-        #         "error": f"File tree and README combined exceeds token limit (50,000). Current size: {token_count} tokens. This GitHub repository is too large for my wallet, but you can continue by providing your own Anthropic API key.",
-        #         "token_count": token_count,
-        #         "requires_api_key": True,
-        #     }
-        # elif token_count > 200000:
-        #     return {
-        #         "error": f"Repository is too large (>200k tokens) for analysis. Claude 3.5 Sonnet's max context length is 200k tokens. Current size: {token_count} tokens."
-        #     }
-
-        # O3: Modified token limit check
-        if 50000 < token_count < 195000 and not body.api_key:
-            return {
-                "error": f"File tree and README combined exceeds token limit (50,000). Current size: {token_count} tokens. This GitHub repository is too large for my wallet, but you can continue by providing your own OpenRouter API key.",
-                "token_count": token_count,
-                "requires_api_key": True,
-            }
-        elif token_count > 195000:
-            return {
-                "error": f"Repository is too large (>195k tokens) for analysis. OpenAI o3-mini's max context length is 200k tokens. Current size: {token_count} tokens."
-            }
-
-        # Prepare system prompts with instructions if provided
-        first_system_prompt = SYSTEM_FIRST_PROMPT
-        third_system_prompt = SYSTEM_THIRD_PROMPT
-        if body.instructions:
-            first_system_prompt = (
-                first_system_prompt + "\n" + ADDITIONAL_SYSTEM_INSTRUCTIONS_PROMPT
-            )
-            third_system_prompt = (
-                third_system_prompt + "\n" + ADDITIONAL_SYSTEM_INSTRUCTIONS_PROMPT
-            )
-
-        # get the explanation for sysdesign from claude
-        # explanation = claude_service.call_claude_api(
-        #     system_prompt=first_system_prompt,
-        #     data={
-        #         "file_tree": file_tree,
-        #         "readme": readme,
-        #         "instructions": body.instructions,
-        #     },
-        #     api_key=body.api_key,
-        # )
-
-        # get the explanation for sysdesign from o3
-        explanation = o3_service.call_o3_api(
-            system_prompt=first_system_prompt,
-            data={
-                "file_tree": file_tree,
-                "readme": readme,
-                "instructions": body.instructions,
-            },
-            api_key=body.api_key,
-            reasoning_effort="medium",
-        )
-
-        # Check for BAD_INSTRUCTIONS response
-        if "BAD_INSTRUCTIONS" in explanation:
-            return {"error": "Invalid or unclear instructions provided"}
-
-        # full_second_response = claude_service.call_claude_api(
-        #     system_prompt=SYSTEM_SECOND_PROMPT,
-        #     data={"explanation": explanation, "file_tree": file_tree},
-        #     api_key=body.api_key,
-        # )
-
-        full_second_response = o3_service.call_o3_api(
-            system_prompt=SYSTEM_SECOND_PROMPT,
-            data={"explanation": explanation, "file_tree": file_tree},
-            api_key=body.api_key,
-        )
-
-        # Extract component mapping from the response
-        start_tag = "<component_mapping>"
-        end_tag = "</component_mapping>"
-        component_mapping_text = full_second_response[
-            full_second_response.find(start_tag) : full_second_response.find(end_tag)
-        ]
-
-        # get mermaid.js code from claude
-        # mermaid_code = claude_service.call_claude_api(
-        #     system_prompt=third_system_prompt,
-        #     data={
-        #         "explanation": explanation,
-        #         "component_mapping": component_mapping_text,
-        #         "instructions": body.instructions,
-        #     },
-        #     api_key=body.api_key,
-        # )
-
-        # get mermaid.js code from o3
-        mermaid_code = o3_service.call_o3_api(
-            system_prompt=third_system_prompt,
-            data={
-                "explanation": explanation,
-                "component_mapping": component_mapping_text,
-                "instructions": body.instructions,
-            },
-            api_key=body.api_key,
-            reasoning_effort="medium",
-        )
-
-        # check for and remove code block tags
-        mermaid_code = mermaid_code.replace("```mermaid", "").replace("```", "")
-
-        # Check for BAD_INSTRUCTIONS response
-        if "BAD_INSTRUCTIONS" in mermaid_code:
-            return {"error": "Invalid or unclear instructions provided"}
-
-        # Process click events to include full GitHub URLs
-        processed_diagram = process_click_events(
-            mermaid_code, body.username, body.repo, default_branch
-        )
-
-        return {"diagram": processed_diagram, "explanation": explanation}
-    except RateLimitError as e:
-        raise HTTPException(
-            status_code=429,
-            detail="Service is currently experiencing high demand. Please try again in a few minutes.",
-        )
-    except Exception as e:
-        return {"error": str(e)}
-
-
 @router.post("/cost")
 # @limiter.limit("5/minute") # TEMP: disable rate limit for growth??
 async def get_generation_cost(request: Request, body: ApiRequest):
@@ -222,8 +65,8 @@ async def get_generation_cost(request: Request, body: ApiRequest):
         # file_tree_tokens = claude_service.count_tokens(file_tree)
         # readme_tokens = claude_service.count_tokens(readme)
 
-        file_tree_tokens = o3_service.count_tokens(file_tree)
-        readme_tokens = o3_service.count_tokens(readme)
+        file_tree_tokens = o1_service.count_tokens(file_tree)
+        readme_tokens = o1_service.count_tokens(readme)
 
         # CLAUDE: Calculate approximate cost
         # Input cost: $3 per 1M tokens ($0.000003 per token)
@@ -232,7 +75,7 @@ async def get_generation_cost(request: Request, body: ApiRequest):
         # output_cost = 3500 * 0.000015
         # estimated_cost = input_cost + output_cost
 
-        # O3: Calculate approximate cost
+        # O3: Calculate approximate cost temp: o1-mini, same price as o3-mini
         # Input cost: $1.1 per 1M tokens ($0.0000011 per token)
         # Output cost: $4.4 per 1M tokens ($0.0000044 per token)
         input_cost = ((file_tree_tokens * 2 + readme_tokens) + 3000) * 0.0000011
@@ -306,13 +149,13 @@ async def event_generator():
 
                 # Token count check
                 combined_content = f"{file_tree}\n{readme}"
-                token_count = o3_service.count_tokens(combined_content)
+                token_count = o1_service.count_tokens(combined_content)
 
                 if 50000 < token_count < 195000 and not body.api_key:
-                    yield f"data: {json.dumps({'error': f'File tree and README combined exceeds token limit (50,000). Current size: {token_count} tokens. This GitHub repository is too large for my wallet, but you can continue by providing your own OpenRouter API key.'})}\n\n"
+                    yield f"data: {json.dumps({'error': f'File tree and README combined exceeds token limit (50,000). Current size: {token_count} tokens. This GitHub repository is too large for my wallet, but you can continue by providing your own OpenAI API key.'})}\n\n"
                     return
                 elif token_count > 195000:
-                    yield f"data: {json.dumps({'error': f'Repository is too large (>195k tokens) for analysis. OpenAI o3-mini\'s max context length is 200k tokens. Current size: {token_count} tokens.'})}\n\n"
+                    yield f"data: {json.dumps({'error': f'Repository is too large (>195k tokens) for analysis. OpenAI o1-mini\'s max context length is 200k tokens. Current size: {token_count} tokens.'})}\n\n"
                     return
 
                 # Prepare prompts
@@ -331,19 +174,18 @@ async def event_generator():
                     )
 
                 # Phase 1: Get explanation
-                yield f"data: {json.dumps({'status': 'explanation_sent', 'message': 'Sending explanation request to o3-mini...'})}\n\n"
+                yield f"data: {json.dumps({'status': 'explanation_sent', 'message': 'Sending explanation request to o1-mini...'})}\n\n"
                 await asyncio.sleep(0.1)
                 yield f"data: {json.dumps({'status': 'explanation', 'message': 'Analyzing repository structure...'})}\n\n"
                 explanation = ""
-                async for chunk in o3_service.call_o3_api_stream(
+                async for chunk in o1_service.call_o1_api_stream(
                     system_prompt=first_system_prompt,
                     data={
                         "file_tree": file_tree,
                         "readme": readme,
                         "instructions": body.instructions,
                     },
                     api_key=body.api_key,
-                    reasoning_effort="medium",
                 ):
                     explanation += chunk
                     yield f"data: {json.dumps({'status': 'explanation_chunk', 'chunk': chunk})}\n\n"
@@ -353,15 +195,14 @@ async def event_generator():
                     return
 
                 # Phase 2: Get component mapping
-                yield f"data: {json.dumps({'status': 'mapping_sent', 'message': 'Sending component mapping request to o3-mini...'})}\n\n"
+                yield f"data: {json.dumps({'status': 'mapping_sent', 'message': 'Sending component mapping request to o1-mini...'})}\n\n"
                 await asyncio.sleep(0.1)
                 yield f"data: {json.dumps({'status': 'mapping', 'message': 'Creating component mapping...'})}\n\n"
                 full_second_response = ""
-                async for chunk in o3_service.call_o3_api_stream(
+                async for chunk in o1_service.call_o1_api_stream(
                     system_prompt=SYSTEM_SECOND_PROMPT,
                     data={"explanation": explanation, "file_tree": file_tree},
                     api_key=body.api_key,
-                    reasoning_effort="medium",
                 ):
                     full_second_response += chunk
                     yield f"data: {json.dumps({'status': 'mapping_chunk', 'chunk': chunk})}\n\n"
@@ -377,19 +218,18 @@ async def event_generator():
                 ]
 
                 # Phase 3: Generate Mermaid diagram
-                yield f"data: {json.dumps({'status': 'diagram_sent', 'message': 'Sending diagram generation request to o3-mini...'})}\n\n"
+                yield f"data: {json.dumps({'status': 'diagram_sent', 'message': 'Sending diagram generation request to o1-mini...'})}\n\n"
                 await asyncio.sleep(0.1)
                 yield f"data: {json.dumps({'status': 'diagram', 'message': 'Generating diagram...'})}\n\n"
                 mermaid_code = ""
-                async for chunk in o3_service.call_o3_api_stream(
+                async for chunk in o1_service.call_o1_api_stream(
                     system_prompt=third_system_prompt,
                     data={
                         "explanation": explanation,
                         "component_mapping": component_mapping_text,
                         "instructions": body.instructions,
                     },
                     api_key=body.api_key,
-                    reasoning_effort="medium",
                 ):
                     mermaid_code += chunk
                     yield f"data: {json.dumps({'status': 'diagram_chunk', 'chunk': chunk})}\n\n"
 
@@ -6,7 +6,7 @@
 from anthropic._exceptions import RateLimitError
 from app.prompts import SYSTEM_MODIFY_PROMPT
 from pydantic import BaseModel
-from app.services.o3_mini_openrouter_service import OpenRouterO3Service
+from app.services.o1_mini_openai_service import OpenAIO1Service
 
 
 load_dotenv()
@@ -15,7 +15,7 @@
 
 # Initialize services
 # claude_service = ClaudeService()
-o3_service = OpenRouterO3Service()
+o1_service = OpenAIO1Service()
 
 
 # Define the request body model
@@ -59,7 +59,7 @@ async def modify(request: Request, body: ModifyRequest):
         #     },
         # )
 
-        modified_mermaid_code = o3_service.call_o3_api(
+        modified_mermaid_code = o1_service.call_o1_api(
             system_prompt=SYSTEM_MODIFY_PROMPT,
             data={
                 "instructions": body.instructions,