Skip to content

donna_models.yaml

Source: config/donna_models.yaml

# Model routing configuration
# See docs/model-layer.md for full documentation

models:
  parser:
    provider: anthropic
    model: claude-sonnet-4-20250514
  reasoner:
    provider: anthropic
    model: claude-sonnet-4-20250514
  fallback:
    provider: anthropic
    model: claude-sonnet-4-20250514
  local_parser:
    provider: ollama
    model: qwen2.5:32b-instruct-q6_K
    estimated_cost_per_1k_tokens: 0.0001  # hardware amortization
    num_ctx: 8192

# Ollama local LLM connection settings (RTX 3090)
ollama:
  base_url: http://localhost:11434
  timeout_s: 120
  keepalive: 5m
  default_num_ctx: 8192
  default_output_reserve: 1024

routing:
  # Keys match task_types.yaml entries exactly
  parse_task:
    model: parser
    fallback: reasoner
    confidence_threshold: 0.7
  classify_priority:
    model: parser
    fallback: reasoner
    confidence_threshold: 0.7
  generate_digest:
    model: parser
    shadow: reasoner              # production monitoring: secondary model runs, output logged only
  prep_research:
    model: reasoner
  dedup_check:
    model: parser
    fallback: reasoner
    confidence_threshold: 0.7
  task_decompose:
    model: reasoner
  extract_preferences:
    model: reasoner
  # Local model eval routing — use with: donna eval --task-type parse_task_local
  parse_task_local:
    model: local_parser

  # Local LLM agents — zero marginal cost
  generate_nudge:
    model: local_parser
    fallback: parser
    confidence_threshold: 0.5
  generate_reminder:
    model: local_parser
    fallback: parser
    confidence_threshold: 0.5
  challenge_task:
    model: local_parser
    fallback: parser
  generate_weekly_digest:
    model: local_parser
    fallback: parser

  # Chat interface
  classify_chat_intent:
    model: local_parser
  chat_respond:
    model: local_parser
  chat_summarize:
    model: local_parser
  chat_escalation:
    model: parser
  triage_failure:
    model: local_parser
    fallback: parser
  skill_equivalence_judge:
    model: reasoner
  skill_auto_draft:
    model: reasoner
  skill_evolution:
    model: reasoner

  # Slice 15 — template-driven vault writes
  draft_meeting_note:
    model: reasoner

  # Slice 16 — cadence-driven template writes
  draft_daily_reflection:
    model: reasoner
  draft_weekly_review:
    model: reasoner
  draft_person_profile:
    model: reasoner
  extract_commitments:
    model: reasoner

  # Wave 2: prefix-routing for dynamic skill + validation task_types.
  skill_step:
    model: parser
    fallback: reasoner
    confidence_threshold: 0.7
  skill_validation:
    model: parser
    fallback: reasoner
    confidence_threshold: 0.7

# Cost tracking
cost:
  monthly_budget_usd: 100.00
  daily_pause_threshold_usd: 20.00
  task_approval_threshold_usd: 5.00
  monthly_warning_pct: 0.90

# Quality monitoring (disabled Phase 1–2, enable when local model handles traffic)
quality_monitoring:
  spot_check_rate: 0.05
  judge_model: reasoner
  judge_batch_schedule: weekly
  flag_threshold: 0.7
  enabled: false