openapi: 3.0.3
info:
  title: Swarmy Agent API
  version: 1.0.0
  description: |
    Single SSE endpoint for driving Swarmy browsing tasks from external
    agents, automation systems, and first-time users.

    Call `POST /api/agent/run` with a token + profile (or container_id
    to resume). The response is a Server-Sent Events stream. Every
    error response includes action URLs so the caller can direct a
    human to the right place (sign up, create token, create profile,
    fix a BLOCKED browser via VNC).

servers:
  - url: https://swarmy.firsttofly.com
  - url: http://localhost:5174

security:
  - bearerAuth: []

paths:
  /api/agent/run:
    post:
      summary: Run (or resume) a browsing task
      description: |
        Provision a new container and run the task, or resume an existing
        container by passing `container_id`. Streams progress, activity,
        delta (stdout), and either `result` (success) or `blocked`
        (human intervention needed) events.
      requestBody:
        required: true
        content:
          application/json:
            schema: { $ref: '#/components/schemas/RunRequest' }
            examples:
              newTask:
                summary: New task with profile name
                value:
                  instruction: "Use swarmy-chrome-agent to navigate to https://example.com and summarise it."
                  profile: "claude-default"
              resumeAfterBlock:
                summary: Resume after a BLOCKED event was resolved via VNC
                value:
                  instruction: "Continue the original task: ..."
                  container_id: "d6a5d720-..."
      responses:
        '200':
          description: |
            SSE stream (`text/event-stream`). See the **Events** section
            in the long description for the frame catalogue. All frames
            use named events; payloads are single-line JSON.

            **Happy path**: `ready` → `progress`* → `activity`* →
            `delta`* → `result` → `done`.

            **Blocked**: `ready` → `progress`* → `activity`* → `delta`*
            → `blocked`. Stream closes, container stays alive.

            **Error**: `(ready?)` → `error`. Stream closes.

            A `:keepalive` comment frame is sent every 10 s to keep
            proxies from closing idle connections. Standard SSE clients
            ignore comments.
          content:
            text/event-stream:
              schema:
                oneOf:
                  - $ref: '#/components/schemas/ReadyEvent'
                  - $ref: '#/components/schemas/CaptureEvent'
                  - $ref: '#/components/schemas/ProgressEvent'
                  - $ref: '#/components/schemas/ActivityEvent'
                  - $ref: '#/components/schemas/DeltaEvent'
                  - $ref: '#/components/schemas/ResultEvent'
                  - $ref: '#/components/schemas/BlockedEvent'
                  - $ref: '#/components/schemas/ErrorEvent'
                  - $ref: '#/components/schemas/DoneEvent'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '403':
          $ref: '#/components/responses/Forbidden'
        '404':
          $ref: '#/components/responses/NotFound'
        '409':
          $ref: '#/components/responses/Conflict'
        '503':
          $ref: '#/components/responses/ServiceUnavailable'

components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      description: |
        Swarmy API token. Generate at **Settings → API Tokens**.
        Format: `swm_...`. See `/settings` on the manager.

  schemas:
    RunRequest:
      type: object
      required: [instruction]
      properties:
        instruction:
          type: string
          description: |
            Natural-language task for the in-container agent. The
            in-container agent has ZERO outer context — make the
            instruction self-contained. Include full URLs and desired
            output format.
        profile:
          type: string
          description: |
            Profile name OR id. Required unless `container_id` is set.
            Name is resolved to the single visible match; ambiguous
            names return `profile_ambiguous`.
        container_id:
          type: string
          description: |
            Resume an existing container (e.g. after a human fixed a
            BLOCKED browser via VNC). When set, `profile` is ignored.
        keyframes:
          type: boolean
          default: false
          description: |
            Capture a PNG of the running tab around every agent tool
            boundary (plan / execute / revise / followup / navigate
            before & after). Surfaced via `capture` SSE events.
        final_screenshot:
          type: boolean
          default: false
          description: |
            Capture a single PNG of the running tab right before stop.
            URL appears in `result.captures.final_screenshot`.
        final_video:
          type: boolean
          default: false
          description: |
            Capture a change-driven CDP screencast and JIT-encode an
            MP4 (H.264 / yuv420p) at end-of-run. Starts at the moment
            the agent leaves `claude.ai/*` extension-setup pages
            (cold-start prep is excluded). URL appears in
            `result.captures.final_video`.
        final_video_max_seconds:
          type: integer
          minimum: 0
          maximum: 3600
          default: 60
          description: |
            Tail-cap on the screencast ring buffer. Time-based: only
            the last N seconds of real wall-clock time are encoded.
      additionalProperties: false

    ReadyEvent:
      description: |
        Emitted exactly once at the start of the stream. Stash
        `container_id` — you'll need it to resume if `blocked` fires.
      type: object
      properties:
        container_id: { type: string, format: uuid }
        profile_id:   { type: string, format: uuid, nullable: true }
        worker_id:    { type: string }
        vnc_url:      { type: string, format: uri }
        web_ui_url:   { type: string, format: uri }
        resumed:
          type: boolean
          description: |
            true if the container came from the pool or from an explicit
            `container_id`. false if it was freshly provisioned.
        run_id:
          type: string
          description: |
            32-hex-char identifier for this run. Capture artifacts (if
            any) are addressable at `/api/captures/<run_id>/<filename>`.
      required: [container_id, worker_id, vnc_url, web_ui_url, resumed, run_id]

    TunnelEvent:
      description: |
        Emitted only when the request supplied `tunnel.ports`. Fires
        once per run, AFTER the worker confirms the in-container
        forwarders are bound. The CLI uses this to know when it's
        safe to dial the tunnel WS — the prior `ready` event lands
        BEFORE tunnel readiness, so don't try to use the tunnel based
        on `ready` alone.
      type: object
      properties:
        id:
          type: string
          format: uuid
          description: Tunnel identifier; pair with `ws_url` to dial.
        ws_url:
          type: string
          description: |
            Path to the tunnel WS (joins to manager base URL,
            ws/wss scheme matching request).
        ports:
          type: array
          items:
            type: object
            properties:
              workerPort: { type: integer }
              userPort:   { type: integer }
            required: [workerPort, userPort]
      required: [id, ws_url, ports]

    CaptureEvent:
      description: |
        Emitted only when the request enabled `keyframes`,
        `final_screenshot`, or `final_video`. Each event signals that
        a capture artifact has landed and is fetchable at the given
        URL. Supports `?token=<bearer>` query for `<img>` / `<video>`
        tag use.
      type: object
      properties:
        kind:
          type: string
          enum: [keyframe, turn_marker, final_screenshot, final_video]
        url:    { type: string, format: uri-reference }
        at:     { type: string, format: date-time }
        marker:
          type: string
          description: |
            Present on `keyframe` events only. Names the agent action
            boundary the keyframe was captured at, e.g. `plan_before`,
            `execute_after`, `navigate_before`, `turn_response`.
      required: [kind, url, at]

    ProgressEvent:
      description: Coarse manager-side phase transition. Human-readable; safe to ignore.
      type: object
      properties:
        phase:
          type: string
          enum: [provisioning, starting, running, finishing]
        message: { type: string }
      required: [phase, message]

    ActivityEvent:
      description: |
        Fine-grained Claude-in-Chrome sidepanel update. Can fire many
        times per second during a busy plan-execute loop. `level: main`
        = top-level step; `level: sub` = tool-call label.
      type: object
      properties:
        level: { type: string, enum: [main, sub] }
        text:  { type: string }
        at:    { type: integer, description: "Unix epoch ms" }
      required: [level, text, at]

    DeltaEvent:
      description: |
        Incremental stdout from the in-container agent. Concatenate for
        a running transcript. Includes thinking / tool-call output as
        well as the final answer; use `result.content` for the cleanly
        parsed answer.
      type: object
      properties:
        content: { type: string }
      required: [content]

    ResultEvent:
      description: |
        Final parsed answer (last non-empty line of stdout, per the
        parseable-tail convention). Emitted once at the end on success,
        immediately followed by `done`.
      type: object
      properties:
        content:      { type: string }
        container_id: { type: string, format: uuid }
        captures:
          type: object
          description: |
            Present iff capture flags were set on the request. Final
            artifact URLs (also emitted via `capture` SSE events
            during the run, but consolidated here so single-shot
            consumers don't have to track the stream).
          properties:
            final_screenshot: { type: string, format: uri-reference }
            final_video:      { type: string, format: uri-reference }
          additionalProperties: false
      required: [content, container_id]

    BlockedEvent:
      description: |
        Human intervention needed (captcha / login / MFA / consent
        screen). Container is kept alive. Send a human to `vnc_url` to
        fix it; then re-POST `/api/agent/run` with `{"instruction":
        "continue...", "container_id": "<this>"}`.
      type: object
      properties:
        blocked_state:
          type: string
          enum:
            - captcha_required
            - login_required
            - mfa_required
            - consent_screen
            - identity_verification
            - auth_tab_missing
            - authorize_button_missing
            - still_logged_out
        current_url:  { type: string, format: uri, nullable: true }
        container_id: { type: string, format: uuid }
        vnc_url:      { type: string, format: uri }
        resume_hint:  { type: string }
      required: [blocked_state, container_id, vnc_url]

    ErrorEvent:
      description: Unrecoverable error on an already-open stream. Stream closes.
      type: object
      properties:
        code:
          type: string
          enum:
            - container_start_failed
            - container_start_timeout
            - profile_fetch_failed
            - chrome_restart_failed
            - worker_offline
            - container_at_terminal_limit
            - terminal_spawn_failed
            - task_failed
            - internal_error
        message:      { type: string }
        container_id: { type: string, format: uuid, nullable: true }
        hint:         { type: string }
      required: [code, message]

    DoneEvent:
      description: |
        Final sentinel on success — always emitted after `result`.
        Mirrors OpenAI's `[DONE]`. Not emitted on `blocked` or `error`.
      type: object

    ApiError:
      description: Pre-stream error payload (HTTP 4xx/5xx with JSON).
      type: object
      properties:
        code:               { type: string }
        message:            { type: string }
        sign_up_url:        { type: string, format: uri }
        token_settings_url: { type: string, format: uri }
        profiles_url:       { type: string, format: uri }
        containers_url:     { type: string, format: uri }
        docs_url:           { type: string, format: uri }
        field:              { type: string }
        reason:             { type: string }
        matches:
          type: array
          description: "Populated on `profile_ambiguous`."
          items:
            type: object
            properties:
              id:    { type: string, format: uuid }
              name:  { type: string }
              owner: { type: string }
        running:
          type: array
          description: "Populated on `user_quota_exceeded` — your currently running containers."
          items:
            type: object
            properties:
              id:        { type: string, format: uuid }
              name:      { type: string }
              worker_id: { type: string }
        retry_after_seconds:
          type: integer
          description: "Populated on `no_workers_available`."
      required: [code, message]

  responses:
    BadRequest:
      description: |
        `bad_request` · `profile_not_found` · `profile_ambiguous` ·
        `profile_not_ready`
      content:
        application/json:
          schema: { $ref: '#/components/schemas/ApiError' }
          examples:
            ambiguous:
              value:
                code: profile_ambiguous
                message: 'Multiple visible profiles named "claude-default"'
                matches:
                  - { id: "abc-...", name: "claude-default", owner: "user-a" }
                  - { id: "def-...", name: "claude-default", owner: "user-b" }
    Unauthorized:
      description: '`unauthorized` · `token_revoked`'
      content:
        application/json:
          schema: { $ref: '#/components/schemas/ApiError' }
          examples:
            noToken:
              value:
                code: unauthorized
                message: "Missing or invalid Authorization header"
                sign_up_url: "https://swarmy.firsttofly.com/"
                token_settings_url: "https://swarmy.firsttofly.com/settings"
                docs_url: "https://swarmy.firsttofly.com/api/agent/docs"
    Forbidden:
      description: '`user_pending_approval`'
      content:
        application/json:
          schema: { $ref: '#/components/schemas/ApiError' }
    NotFound:
      description: '`container_not_found`'
      content:
        application/json:
          schema: { $ref: '#/components/schemas/ApiError' }
    Conflict:
      description: '`user_quota_exceeded` · `container_not_running`'
      content:
        application/json:
          schema: { $ref: '#/components/schemas/ApiError' }
          examples:
            quota:
              value:
                code: user_quota_exceeded
                message: "You already have 3 active containers (limit 3)"
                containers_url: "https://swarmy.firsttofly.com/"
                running:
                  - { id: "a-...", name: "swarmy-w1-a", worker_id: "w1" }
                  - { id: "b-...", name: "swarmy-w1-b", worker_id: "w1" }
                  - { id: "c-...", name: "swarmy-w1-c", worker_id: "w1" }
    ServiceUnavailable:
      description: '`no_workers_available`'
      content:
        application/json:
          schema: { $ref: '#/components/schemas/ApiError' }
