Agentic AIPythonverifiedVerified
Evaluator-Optimizer Agent Pattern in Python
An iterative refinement loop where an 'Evaluator' provides granular feedback on an 'Optimizer’s' output until quality thresholds are met.
How to Implement the Evaluator-Optimizer Agent Pattern in Python
1Step 1: Define the Feedback dataclass and protocol types
from dataclasses import dataclass
from typing import Protocol
@dataclass
class Feedback:
is_pass: bool
critique: str
score: float
class Optimizer(Protocol):
async def generate(self, task: str) -> str: ...
async def refine(self, current: str, feedback: str) -> str: ...
class Evaluator(Protocol):
async def check(self, output: str) -> Feedback: ...2Step 2: Implement the iterative refinement loop
MAX_ITERATIONS = 5
async def refinement_loop(
task: str,
optimizer: Optimizer,
evaluator: Evaluator,
) -> str:
current_output = await optimizer.generate(task)
for _ in range(MAX_ITERATIONS):
feedback = await evaluator.check(current_output)
if feedback.is_pass:
return current_output
current_output = await optimizer.refine(
current_output, feedback.critique
)
return current_output"""Evaluator-Optimizer pattern with rubric scoring and cancellation support."""
import asyncio
import logging
from dataclasses import dataclass, field
from typing import Callable
logger = logging.getLogger(__name__)
# [step] Define feedback and configuration dataclasses
@dataclass(frozen=True)
class Feedback:
is_pass: bool
critique: str
score: float
categories: dict[str, float] = field(default_factory=dict)
def __post_init__(self) -> None:
if not 0 <= self.score <= 100:
raise ValueError(f"Score must be 0-100, got {self.score}")
@dataclass(frozen=True)
class OptimizerConfig:
model: str
temperature: float = 0.7
max_tokens: int = 4096
@dataclass(frozen=True)
class EvaluatorConfig:
model: str
rubric: str
pass_threshold: float = 80.0
@dataclass(frozen=True)
class RefinementResult:
output: str
iterations: int
final_score: float
# [step] Implement the Optimizer agent
class OptimizerAgent:
def __init__(self, config: OptimizerConfig) -> None:
self._config = config
async def generate(self, task: str) -> str:
logger.info("Generating initial output for task: %s", task[:80])
return await self._call_llm(f"Generate: {task}")
async def refine(self, current: str, critique: str) -> str:
prompt = (
"Improve the following output based on feedback.\n"
f"\nCurrent output:\n{current}\n"
f"\nFeedback:\n{critique}\n"
"\nProvide the improved version:"
)
return await self._call_llm(prompt)
async def _call_llm(self, prompt: str) -> str:
# Replace with actual LLM API call
return f"[LLM Response for: {prompt[:50]}...]"
# [step] Implement the Evaluator agent with rubric parsing
class EvaluatorAgent:
def __init__(self, config: EvaluatorConfig) -> None:
self._config = config
async def check(self, output: str) -> Feedback:
import json
prompt = (
f"Evaluate against rubric: {self._config.rubric}\n"
f"\nOutput:\n{output}\n"
'\nRespond with JSON: {{ "is_pass", "critique", "score", "categories" }}'
)
raw = await self._call_llm(prompt)
data = json.loads(raw)
return Feedback(**data)
async def _call_llm(self, prompt: str) -> str:
import json
return json.dumps({
"is_pass": True,
"critique": "Meets all criteria",
"score": 95,
"categories": {"accuracy": 96, "completeness": 94},
})
# [step] Implement the refinement loop with progress callbacks
async def refinement_loop(
task: str,
optimizer: OptimizerAgent,
evaluator: EvaluatorAgent,
*,
max_iterations: int = 5,
on_iteration: Callable[[int, Feedback], None] | None = None,
cancel_event: asyncio.Event | None = None,
) -> RefinementResult:
current_output = await optimizer.generate(task)
for i in range(max_iterations):
if cancel_event and cancel_event.is_set():
raise asyncio.CancelledError("Refinement loop cancelled")
feedback = await evaluator.check(current_output)
if on_iteration:
on_iteration(i + 1, feedback)
if feedback.is_pass:
return RefinementResult(
output=current_output,
iterations=i + 1,
final_score=feedback.score,
)
current_output = await optimizer.refine(
current_output, feedback.critique
)
final_feedback = await evaluator.check(current_output)
return RefinementResult(
output=current_output,
iterations=max_iterations,
final_score=final_feedback.score,
)Evaluator-Optimizer Agent Pattern Architecture
hourglass_empty
Rendering diagram...
lightbulb
Evaluator-Optimizer Agent Pattern in the Real World
“Think of a student writing an essay (Optimizer) and a teacher grading it with detailed feedback (Evaluator). The student revises based on the red-ink comments and resubmits. This cycle repeats until the essay meets the teacher’s standards—or the deadline (max iterations) is reached.”