Agentic AIPythonverifiedVerified

Evaluator-Optimizer Agent Pattern in Python

An iterative refinement loop where an 'Evaluator' provides granular feedback on an 'Optimizer’s' output until quality thresholds are met.

How to Implement the Evaluator-Optimizer Agent Pattern in Python

1Step 1: Define the Feedback dataclass and protocol types

from dataclasses import dataclass
from typing import Protocol


@dataclass
class Feedback:
    is_pass: bool
    critique: str
    score: float


class Optimizer(Protocol):
    async def generate(self, task: str) -> str: ...
    async def refine(self, current: str, feedback: str) -> str: ...


class Evaluator(Protocol):
    async def check(self, output: str) -> Feedback: ...

2Step 2: Implement the iterative refinement loop

MAX_ITERATIONS = 5


async def refinement_loop(
    task: str,
    optimizer: Optimizer,
    evaluator: Evaluator,
) -> str:
    current_output = await optimizer.generate(task)

    for _ in range(MAX_ITERATIONS):
        feedback = await evaluator.check(current_output)

        if feedback.is_pass:
            return current_output

        current_output = await optimizer.refine(
            current_output, feedback.critique
        )

    return current_output

"""Evaluator-Optimizer pattern with rubric scoring and cancellation support."""

import asyncio
import logging
from dataclasses import dataclass, field
from typing import Callable

logger = logging.getLogger(__name__)


# [step] Define feedback and configuration dataclasses
@dataclass(frozen=True)
class Feedback:
    is_pass: bool
    critique: str
    score: float
    categories: dict[str, float] = field(default_factory=dict)

    def __post_init__(self) -> None:
        if not 0 <= self.score <= 100:
            raise ValueError(f"Score must be 0-100, got {self.score}")


@dataclass(frozen=True)
class OptimizerConfig:
    model: str
    temperature: float = 0.7
    max_tokens: int = 4096


@dataclass(frozen=True)
class EvaluatorConfig:
    model: str
    rubric: str
    pass_threshold: float = 80.0


@dataclass(frozen=True)
class RefinementResult:
    output: str
    iterations: int
    final_score: float


# [step] Implement the Optimizer agent
class OptimizerAgent:
    def __init__(self, config: OptimizerConfig) -> None:
        self._config = config

    async def generate(self, task: str) -> str:
        logger.info("Generating initial output for task: %s", task[:80])
        return await self._call_llm(f"Generate: {task}")

    async def refine(self, current: str, critique: str) -> str:
        prompt = (
            "Improve the following output based on feedback.\n"
            f"\nCurrent output:\n{current}\n"
            f"\nFeedback:\n{critique}\n"
            "\nProvide the improved version:"
        )
        return await self._call_llm(prompt)

    async def _call_llm(self, prompt: str) -> str:
        # Replace with actual LLM API call
        return f"[LLM Response for: {prompt[:50]}...]"


# [step] Implement the Evaluator agent with rubric parsing
class EvaluatorAgent:
    def __init__(self, config: EvaluatorConfig) -> None:
        self._config = config

    async def check(self, output: str) -> Feedback:
        import json

        prompt = (
            f"Evaluate against rubric: {self._config.rubric}\n"
            f"\nOutput:\n{output}\n"
            '\nRespond with JSON: {{ "is_pass", "critique", "score", "categories" }}'
        )
        raw = await self._call_llm(prompt)
        data = json.loads(raw)
        return Feedback(**data)

    async def _call_llm(self, prompt: str) -> str:
        import json
        return json.dumps({
            "is_pass": True,
            "critique": "Meets all criteria",
            "score": 95,
            "categories": {"accuracy": 96, "completeness": 94},
        })


# [step] Implement the refinement loop with progress callbacks
async def refinement_loop(
    task: str,
    optimizer: OptimizerAgent,
    evaluator: EvaluatorAgent,
    *,
    max_iterations: int = 5,
    on_iteration: Callable[[int, Feedback], None] | None = None,
    cancel_event: asyncio.Event | None = None,
) -> RefinementResult:
    current_output = await optimizer.generate(task)

    for i in range(max_iterations):
        if cancel_event and cancel_event.is_set():
            raise asyncio.CancelledError("Refinement loop cancelled")

        feedback = await evaluator.check(current_output)
        if on_iteration:
            on_iteration(i + 1, feedback)

        if feedback.is_pass:
            return RefinementResult(
                output=current_output,
                iterations=i + 1,
                final_score=feedback.score,
            )

        current_output = await optimizer.refine(
            current_output, feedback.critique
        )

    final_feedback = await evaluator.check(current_output)
    return RefinementResult(
        output=current_output,
        iterations=max_iterations,
        final_score=final_feedback.score,
    )

Evaluator-Optimizer Agent Pattern Architecture

hourglass_empty

Rendering diagram...

lightbulb

Evaluator-Optimizer Agent Pattern in the Real World

“Think of a student writing an essay (Optimizer) and a teacher grading it with detailed feedback (Evaluator). The student revises based on the red-ink comments and resubmits. This cycle repeats until the essay meets the teacher’s standards—or the deadline (max iterations) is reached.”