Back to recipes

Coding Agent

Agent

A sophisticated workflow for AI-powered code generation that implements a robust process of task clarification, code writing, and test-driven development.

Coding Agent workflow diagram

Diagram Explanation

The diagram illustrates a comprehensive coding agent workflow with two main loops. The first loop ('Until tasks clear') handles task clarification between the human and LLM through the interface. The second loop ('Until tests pass') manages the code generation and testing process, where the LLM interacts with the environment to write, test, and refine code until it meets the specified requirements.

Use Cases

  • Automated Code Generation: Generate code based on natural language descriptions with built-in testing and validation.
  • Code Refactoring: Analyze existing code and suggest improvements while maintaining functionality.
  • Test-Driven Development: Generate test cases and implementation code iteratively.

Implementation

from typing import List, Dict, Any, Optional
from pydantic import BaseModel
from helpers import run_llm, JSON_llm, execute_command

class Requirement(BaseModel):
    """Represents a clarified requirement"""
    description: str
    acceptance_criteria: List[str]
    test_cases: List[Dict[str, Any]]

class TestResult(BaseModel):
    """Represents the result of running tests"""
    passed: bool
    failed_tests: List[str]
    error_messages: List[str]
    coverage: float

class CodingAgent:
    def __init__(self):
        self.requirements: List[Requirement] = []
        self.current_code: str = ""
        self.test_code: str = ""
        
    async def clarify_requirements(self, task_description: str) -> List[Requirement]:
        """Clarify requirements through interaction with LLM"""
        CLARIFICATION_PROMPT = """Analyze this coding task and break it down into clear requirements.
        For each requirement:
        1. Provide a clear description
        2. List specific acceptance criteria
        3. Define test cases with inputs and expected outputs
        
        Task: {task}
        
        Return as JSON array of requirements."""
        
        self.requirements = await JSON_llm(
            CLARIFICATION_PROMPT.format(task=task_description),
            List[Requirement]
        )
        
        return self.requirements
    
    async def generate_test_code(self) -> str:
        """Generate test code based on requirements"""
        TEST_PROMPT = """Generate pytest test code for these requirements:
        {requirements}
        
        Include:
        1. Test cases for each requirement
        2. Edge cases and error conditions
        3. Proper test organization and setup
        
        Return only the test code."""
        
        self.test_code = await run_llm(
            TEST_PROMPT.format(requirements=self.requirements)
        )
        
        return self.test_code
    
    async def generate_implementation(self) -> str:
        """Generate implementation code based on requirements and tests"""
        IMPLEMENTATION_PROMPT = """Generate implementation code that satisfies these requirements and passes these tests:
        
        Requirements:
        {requirements}
        
        Test Code:
        {test_code}
        
        Follow these guidelines:
        1. Write clean, well-documented code
        2. Include type hints
        3. Handle edge cases
        4. Follow PEP 8 style guide
        
        Return only the implementation code."""
        
        self.current_code = await run_llm(
            IMPLEMENTATION_PROMPT.format(
                requirements=self.requirements,
                test_code=self.test_code
            )
        )
        
        return self.current_code
    
    async def run_tests(self) -> TestResult:
        """Run tests and return results"""
        try:
            # Save code to temporary files
            with open("temp_implementation.py", "w") as f:
                f.write(self.current_code)
            with open("temp_test.py", "w") as f:
                f.write(self.test_code)
            
            # Run pytest with coverage
            result = await execute_command(
                "pytest temp_test.py --cov=temp_implementation -v"
            )
            
            # Parse test results
            return TestResult(
                passed=result.returncode == 0,
                failed_tests=self._parse_failed_tests(result.output),
                error_messages=self._parse_error_messages(result.output),
                coverage=self._parse_coverage(result.output)
            )
        finally:
            # Cleanup temporary files
            await execute_command("rm temp_implementation.py temp_test.py")
    
    async def fix_issues(self, test_result: TestResult) -> str:
        """Fix issues based on test results"""
        FIX_PROMPT = """Fix the following issues in the code:
        
        Current Code:
        {code}
        
        Test Results:
        Failed Tests: {failed_tests}
        Error Messages: {errors}
        
        Return the fixed implementation code."""
        
        self.current_code = await run_llm(
            FIX_PROMPT.format(
                code=self.current_code,
                failed_tests=test_result.failed_tests,
                errors=test_result.error_messages
            )
        )
        
        return self.current_code
    
    async def process_task(self, task_description: str, max_iterations: int = 5) -> str:
        """Process a coding task from start to finish"""
        print("1. Clarifying Requirements...")
        requirements = await self.clarify_requirements(task_description)
        for i, req in enumerate(requirements, 1):
            print(f"\nRequirement {i}:")
            print(f"Description: {req.description}")
            print("Acceptance Criteria:")
            for ac in req.acceptance_criteria:
                print(f"- {ac}")
        
        print("\n2. Generating Tests...")
        test_code = await self.generate_test_code()
        print("Test code generated.")
        
        print("\n3. Implementing Solution...")
        implementation = await self.generate_implementation()
        print("Initial implementation complete.")
        
        iteration = 0
        while iteration < max_iterations:
            print(f"\n4. Running Tests (Iteration {iteration + 1})...")
            test_result = await self.run_tests()
            
            print(f"Tests Passed: {test_result.passed}")
            print(f"Coverage: {test_result.coverage}%")
            
            if test_result.passed and test_result.coverage >= 90:
                print("\nAll tests passed with good coverage!")
                break
            
            print("\n5. Fixing Issues...")
            await self.fix_issues(test_result)
            iteration += 1
        
        return self.current_code

# Example usage
async def main():
    task = """
    Create a function that processes a list of financial transactions.
    Requirements:
    1. Calculate total amount, average transaction size
    2. Identify outliers (transactions > 2 standard deviations from mean)
    3. Group transactions by category
    4. Handle invalid inputs appropriately
    """
    
    agent = CodingAgent()
    final_code = await agent.process_task(task)
    print("\nFinal Implementation:\n", final_code)

if __name__ == "__main__":
    import asyncio
    asyncio.run(main())