async def evolve_one(
self, skill_id: str, triggered_by: str,
) -> EvolutionReport:
skill = await self._fetch_skill(skill_id)
if skill is None:
return EvolutionReport(
skill_id=skill_id, outcome="skipped",
rationale="skill not found",
)
if skill["state"] != "degraded":
return EvolutionReport(
skill_id=skill_id, outcome="skipped",
rationale=f"skill state is {skill['state']!r}, not 'degraded'",
)
# Budget check.
try:
if self._budget_guard is not None:
await self._budget_guard.check_pre_call(user_id="system")
except BudgetPausedError:
return EvolutionReport(
skill_id=skill_id, outcome="budget_exhausted",
)
# Assemble input package.
try:
package = await self._input_builder.build(skill_id=skill_id)
except (LookupError, ValueError) as exc:
logger.warning(
"skill_evolution_input_failed",
skill_id=skill_id, error=str(exc),
)
return EvolutionReport(
skill_id=skill_id, outcome="error",
rationale=str(exc),
)
# Call Claude.
try:
# Slice 21: thread the skill identity so the claude_code
# escalation gate can populate
# ``escalation_request.originating_entity_*``. Manual
# evolution edits the skill in-place; the diff validator
# uses skill_id → capability_name to render target_paths.
parsed, metadata = await self._router.complete(
prompt=self._build_prompt(package),
task_type=TASK_TYPE,
task_id=None,
user_id="system",
originating_entity=("skill", skill_id),
)
except BudgetPausedError:
return EvolutionReport(
skill_id=skill_id, outcome="budget_exhausted",
)
except EscalationDecisionError as exc:
# Slice 17/21: gate replaced the autonomous call.
# claude_code / chat hand off to the user; the poller
# eventually transitions the existing skill from its
# current state when the manual build validates.
logger.info(
"skill_evolution_escalation_resolved",
skill_id=skill_id,
mode=exc.mode,
escalation_request_id=exc.escalation_request_id,
)
outcome_label = (
"manual_handoff_pending"
if exc.mode in ("claude_code", "chat")
else "budget_exhausted"
)
return EvolutionReport(
skill_id=skill_id,
outcome=outcome_label,
rationale=f"escalation_resolved={exc.mode!r}",
)
except TokenLimitReachedError as exc:
# Extension token cap truncated the rewrite (enforce mode). Router
# logged the real spend before raising; surface as budget-exhausted
# so the next cycle can retry rather than treating it as a crash.
logger.warning(
"skill_evolution_token_limit_reached",
skill_id=skill_id,
escalation_request_id=exc.escalation_request_id,
)
return EvolutionReport(
skill_id=skill_id,
outcome="budget_exhausted",
rationale="token_limit_reached; re-escalation required",
)
except Exception as exc:
logger.warning(
"skill_evolution_llm_failed",
skill_id=skill_id, error=str(exc),
)
return EvolutionReport(
skill_id=skill_id, outcome="error",
rationale=f"llm call failed: {exc}",
)
invocation_id = getattr(metadata, "invocation_id", None)
cost_usd = float(getattr(metadata, "cost_usd", 0.0) or 0.0)
latency_ms = getattr(metadata, "latency_ms", None)
# Validate output shape.
required_keys = ("diagnosis", "new_skill_version", "changelog", "targeted_failure_cases")
if not (isinstance(parsed, dict) and all(k in parsed for k in required_keys)):
await self._log_repo.record(
skill_id=skill_id, from_version_id=skill["current_version_id"],
to_version_id=None, triggered_by=triggered_by,
claude_invocation_id=invocation_id,
diagnosis=None, targeted_case_ids=None,
validation_results={"malformed_output": True},
outcome="rejected_validation",
)
await self._maybe_demote_after_failure(skill_id)
return EvolutionReport(
skill_id=skill_id, outcome="rejected_validation",
rationale="malformed llm output",
cost_usd=cost_usd, latency_ms=latency_ms,
)
new_version = parsed["new_skill_version"]
targeted = parsed["targeted_failure_cases"] or []
diagnosis = parsed.get("diagnosis")
# Run the four gates.
executor = self._executor_factory()
gates = EvolutionGates(self._conn, self._config, executor)
gate_results: dict[str, GateResult] = {}
structural = run_structural_gate(new_version)
gate_results["structural"] = structural
if not structural.passed:
return await self._record_rejection(
skill_id=skill_id, from_version_id=skill["current_version_id"],
triggered_by=triggered_by, invocation_id=invocation_id,
diagnosis=diagnosis, targeted=targeted,
gate_results=gate_results,
rationale=f"structural gate failed: {structural.failure_reason}",
cost_usd=cost_usd, latency_ms=latency_ms,
)
targeted_result = await gates.run_targeted_case_gate(
new_version=new_version, skill_id=skill_id,
targeted_case_ids=targeted,
)
gate_results["targeted"] = targeted_result
if not targeted_result.passed:
return await self._record_rejection(
skill_id=skill_id, from_version_id=skill["current_version_id"],
triggered_by=triggered_by, invocation_id=invocation_id,
diagnosis=diagnosis, targeted=targeted,
gate_results=gate_results,
rationale="targeted case gate failed",
cost_usd=cost_usd, latency_ms=latency_ms,
)
fixture_result = await gates.run_fixture_regression_gate(
new_version=new_version, skill_id=skill_id,
)
gate_results["fixture_regression"] = fixture_result
if not fixture_result.passed:
return await self._record_rejection(
skill_id=skill_id, from_version_id=skill["current_version_id"],
triggered_by=triggered_by, invocation_id=invocation_id,
diagnosis=diagnosis, targeted=targeted,
gate_results=gate_results,
rationale="fixture regression gate failed",
cost_usd=cost_usd, latency_ms=latency_ms,
)
recent_result = await gates.run_recent_success_gate(
new_version=new_version, skill_id=skill_id,
)
gate_results["recent_success"] = recent_result
if not recent_result.passed:
return await self._record_rejection(
skill_id=skill_id, from_version_id=skill["current_version_id"],
triggered_by=triggered_by, invocation_id=invocation_id,
diagnosis=diagnosis, targeted=targeted,
gate_results=gate_results,
rationale="recent success gate failed",
cost_usd=cost_usd, latency_ms=latency_ms,
)
# All gates passed: persist new version + transition.
new_version_id = await self._persist_new_version(
skill_id=skill_id,
current_version_id=skill["current_version_id"],
new_version=new_version,
changelog=parsed.get("changelog", ""),
)
# Evolution lands the new version in DRAFT (spec §6.6 / §23.5). The
# subsequent draft → sandbox hop is a deliberate human gate:
# ``DRAFT → SANDBOX`` only accepts ``human_approval`` / ``manual_override``
# in the transition table, never an automated ``gate_passed``. The old
# code attempted that hop inside ``contextlib.suppress`` — a knowingly
# dead, always-failing call that violated the no-suppress rule (Fable
# critique #5). The skill correctly parks in draft awaiting a human; we
# alert loudly instead of silently swallowing the failure.
await self._lifecycle.transition(
skill_id=skill_id, to_state=SkillState.DRAFT,
reason="gate_passed", actor="system",
notes=f"evolution {new_version_id}",
)
logger.info(
"skill_evolution_parked_in_draft_awaiting_approval",
skill_id=skill_id,
new_version_id=new_version_id,
)
await emit_fallback_alert(
self._fallback_alert,
component="skill_evolution",
error="evolved skill version cannot auto-advance past draft",
fallback=(
"evolved version parked in draft awaiting human approval "
"(draft→sandbox requires human_approval)"
),
context={"skill_id": skill_id, "new_version_id": new_version_id},
)
await self._log_repo.record(
skill_id=skill_id,
from_version_id=skill["current_version_id"],
to_version_id=new_version_id,
triggered_by=triggered_by,
claude_invocation_id=invocation_id,
diagnosis=diagnosis,
targeted_case_ids=targeted,
validation_results={
name: {"passed": g.passed, **g.details}
for name, g in gate_results.items()
},
outcome="success",
)
return EvolutionReport(
skill_id=skill_id, outcome="success",
new_version_id=new_version_id,
rationale="all 4 gates passed",
cost_usd=cost_usd, latency_ms=latency_ms,
)