pelagia-portal/automation/claude-issue-watcher.sh
Hardik 12e6d16061 feat(automation): test DB mirror + dev-server env for autofix verification
- automation/refresh-test-db.sh: daily pg_dump of prod (pelagia) into a throwaway
  mirror (pelagia_test) on pms1; cron at 03:30. ~10MB, refresh ~1s.
- Autofix clone ~/pelagia-autofix/App/.env points DATABASE_URL at pelagia_test in
  safe dev mode (no Resend/SSO secrets -> console email, local storage), port 3100.
- Fix prompt: Claude may run integration tests against the test DB and start a dev
  server on port 3100 ONLY; stop it by port (fuser -k 3100/tcp), never broad pkill
  (production also runs a next-server on 3000).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-19 04:49:15 +05:30

337 lines
16 KiB
Bash

#!/usr/bin/env bash
# Claude issue watcher -- Linux port (runs on pms1 via cron). Two phases per run:
#
# 1. TRIAGE -- find open `portal` issues with no decision label yet. Claude
# reads each (analysis only), writes a label + a markdown breakdown, the
# watcher posts the breakdown as a comment and adds `claude-queue` or
# `interactive`.
# 2. FIX -- find open `claude-queue` issues. Claude implements a fix on a
# dedicated clone, pushes `claude/issue-N`, and opens a PR.
#
# Label lifecycle:
# portal -> (triage) -> claude-queue | interactive
# claude-queue -> claude-working -> claude-pr | claude-failed
#
# Config: watcher.config.json next to this script (or pass a path as $1).
# Mirrors the Windows claude-issue-watcher.ps1; see automation/README.md.
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CONFIG="${1:-$SCRIPT_DIR/watcher.config.json}"
[ -f "$CONFIG" ] || { echo "Config not found: $CONFIG (copy watcher.config.example.json and fill in the token)"; exit 1; }
cfg() { jq -r "$1" "$CONFIG"; }
FORGEJO_URL=$(cfg .forgejoUrl)
REPO=$(cfg .repo)
TOKEN=$(cfg .token)
WORKDIR=$(cfg .workDir)
BASE_BRANCH=$(cfg .baseBranch)
BRANCH_PREFIX=$(cfg .branchPrefix)
MAX_FIX=$(cfg '.maxIssuesPerRun // 1')
MAX_TRIAGE=$(cfg '.maxTriagePerRun // 3')
CLAUDE=$(cfg .claudeExe)
FIX_TURNS=$(cfg '.claudeMaxTurns // 150')
TRIAGE_TURNS=$(cfg '.triageMaxTurns // 80')
API="$FORGEJO_URL/api/v1"
LOG_DIR="$SCRIPT_DIR/logs"
mkdir -p "$LOG_DIR"
LOG_FILE="$LOG_DIR/watcher-$(date +%F).log"
log() { echo "$(date +%T) $*" | tee -a "$LOG_FILE"; }
BOT_MARKER='<!-- ppms-bot -->'
# Bot status comments are excluded from the context fed back to Claude. New ones
# carry the marker; legacy ones are matched by stable phrases.
BOT_PATTERN='ppms-bot|has started working on this issue|Claude opened PR \[#|Automated fix attempt did not produce'
# --- single-instance lock ---
exec 9>"$SCRIPT_DIR/.watcher.lock"
if ! flock -n 9; then log "Another watcher run is active; exiting."; exit 0; fi
# --- preflight: idle until Claude Code is authenticated on this host ---
# Lets cron be enabled before sign-in: the watcher no-ops until creds appear,
# then activates on its own. Avoids wrongly marking issues claude-failed.
if [ ! -f "$HOME/.claude/.credentials.json" ] && [ -z "${ANTHROPIC_API_KEY:-}" ]; then
log "Claude Code not authenticated yet (no ~/.claude/.credentials.json or ANTHROPIC_API_KEY); skipping."
exit 0
fi
# --- Forgejo API helpers (curl + jq; UTF-8 and JSON arrays are handled natively) ---
api() { # METHOD PATH [JSON_BODY]
local method=$1 path=$2 body=${3:-}
if [ -n "$body" ]; then
curl -fsS -X "$method" "$API$path" -H "Authorization: token $TOKEN" \
-H "Content-Type: application/json" --data "$body"
else
curl -fsS -X "$method" "$API$path" -H "Authorization: token $TOKEN"
fi
}
issues_by_label() { api GET "/repos/$REPO/issues?state=open&labels=$1&type=issues&limit=50"; }
add_comment() { # NUMBER TEXT
api POST "/repos/$REPO/issues/$1/comments" "$(jq -nc --arg b "$2" '{body:$b}')" >/dev/null
}
# Build {"labels":[ids]} for the given label names from the live label list.
label_ids_body() { # NAME...
local names; names=$(printf '%s\n' "$@" | jq -R . | jq -sc .)
issues_labels_cache=${issues_labels_cache:-$(api GET "/repos/$REPO/labels?limit=50")}
printf '%s' "$issues_labels_cache" | jq -c --argjson want "$names" '{labels: [ .[] | select(.name as $n | $want|index($n)) | .id ]}'
}
# Additive: never clears existing labels.
add_labels() { # NUMBER NAME...
local num=$1; shift
local body; body=$(label_ids_body "$@")
if [ "$(printf '%s' "$body" | jq '.labels|length')" -eq 0 ]; then
log "add_labels: no ids resolved for [$*] on #$num"; return
fi
api POST "/repos/$REPO/issues/$num/labels" "$body" >/dev/null
}
# Replace the label set: (current - remove) + add. Guards against wiping.
set_labels() { # NUMBER "remove names" "add names"
local num=$1 remove="$2" add="$3"
local cur kept wanted body n wn
cur=$(api GET "/repos/$REPO/issues/$num" | jq -r '.labels[].name')
if [ -n "${remove// /}" ]; then
kept=$(printf '%s\n' $cur | grep -vxF "$(printf '%s\n' $remove)")
else
kept=$cur
fi
wanted=$(printf '%s\n' $kept $add | grep -v '^$' | sort -u)
body=$(label_ids_body $wanted)
n=$(printf '%s' "$body" | jq '.labels|length')
wn=$(printf '%s\n' $wanted | grep -vc '^$')
if [ "$wn" -gt 0 ] && [ "$n" -eq 0 ]; then
log "set_labels: refusing to clear all labels on #$num"; return
fi
api PUT "/repos/$REPO/issues/$num/labels" "$body" >/dev/null
}
# Human comments as a markdown block (bot status comments excluded). Empty if none.
comments_block() { # NUMBER
local human
human=$(api GET "/repos/$REPO/issues/$1/comments?limit=50" \
| jq -r --arg pat "$BOT_PATTERN" '[.[] | select(.body != null) | select(.body | test($pat) | not)]')
[ "$(printf '%s' "$human" | jq 'length')" -eq 0 ] && return
printf '## Comments on the issue (read these -- they refine the scope/repro)\n\n'
printf '%s' "$human" | jq -r '.[] | "**\(.user.login) commented:**\n\(.body)\n"'
}
run_claude() { # PROMPT_FILE LOG_FILE MAX_TURNS
( cd "$WORKDIR" && "$CLAUDE" -p --dangerously-skip-permissions \
--max-turns "$3" --output-format text < "$1" > "$2" 2>&1 )
}
reset_clone() {
git -C "$WORKDIR" fetch origin -q
git -C "$WORKDIR" checkout -f "origin/$BASE_BRANCH" -q 2>/dev/null
git -C "$WORKDIR" clean -fdq
}
# --- prepare the dedicated work clone (needed by both phases) ---
host_no_scheme=$(printf '%s' "$FORGEJO_URL" | sed 's#^https\?://##')
owner=${REPO%%/*}
CLONE_URL="http://${owner}:${TOKEN}@${host_no_scheme}/${REPO}.git"
[ "${FORGEJO_URL#https}" != "$FORGEJO_URL" ] && CLONE_URL="https://${owner}:${TOKEN}@${host_no_scheme}/${REPO}.git"
if [ ! -d "$WORKDIR/.git" ]; then
log "Cloning $REPO into $WORKDIR"
if ! git clone -q "$CLONE_URL" "$WORKDIR"; then log "git clone failed"; exit 1; fi
git -C "$WORKDIR" config user.name "Claude (auto-fix)"
git -C "$WORKDIR" config user.email "claude-autofix@pelagiamarine.com"
fi
DECISION_LABELS="claude-queue interactive claude-working claude-pr claude-failed"
# =====================================================================
# Phase 1: triage new portal issues
# =====================================================================
dl_json=$(printf '%s\n' $DECISION_LABELS | jq -R . | jq -sc .)
to_triage=$(issues_by_label portal | jq -c --argjson dl "$dl_json" \
'[ .[] | select((.labels|map(.name)) as $have | ($dl | any(. as $d | $have|index($d))) | not) ] | sort_by(.number)')
to_triage=$(printf '%s' "$to_triage" | jq -c ".[:$MAX_TRIAGE]")
n_triage=$(printf '%s' "$to_triage" | jq 'length')
log "Triage: $n_triage portal issue(s) awaiting triage"
t=0
while [ "$t" -lt "$n_triage" ]; do
issue=$(printf '%s' "$to_triage" | jq -c ".[$t]")
t=$((t+1))
num=$(printf '%s' "$issue" | jq -r .number)
title=$(printf '%s' "$issue" | jq -r .title)
body=$(printf '%s' "$issue" | jq -r '.body // ""')
log "-- Triaging #$num: $title"
reset_clone
comments=$(comments_block "$num")
rm -f "$WORKDIR/CLAUDE_TRIAGE_LABEL.txt" "$WORKDIR/CLAUDE_TRIAGE.md"
prompt_file=$(mktemp)
{
printf '%s\n' "You are TRIAGING issue #$num of the Pelagia Portal (PPMS), a Next.js 15 purchase-order"
printf '%s\n' "management system for a maritime company. The web app is in App/ -- read App/CLAUDE.md and"
printf '%s\n' "explore the relevant code to judge feasibility. This is ANALYSIS ONLY: do NOT modify any"
printf '%s\n' "existing file, do NOT run builds or tests, do NOT commit. You only create two output files."
printf '\n## Issue #%s: %s\n\n' "$num" "$title"
printf '%s\n\n' "$body"
printf '%s\n\n' "$comments"
printf '%s\n' "## Your job"
printf '%s\n' "1. Interpret the request and break it into concrete technical action item(s), the way a"
printf '%s\n' " developer would in review -- note the files/areas likely involved and any open questions."
printf '%s\n' "2. Decide whether an UNATTENDED automated coding run can safely and verifiably implement it:"
printf '%s\n' " - claude-queue = localized change, clear acceptance, verifiable by type-check / lint / unit"
printf '%s\n' " tests, and NOT touching DB migrations, auth/permissions, payments/money, external live"
printf '%s\n' " systems (e.g. the GST website), or large multi-file features."
printf '%s\n' " - interactive = needs human steering: ambiguous or underspecified, needs business content"
printf '%s\n' " or a design decision, a schema migration, permissions/payments changes, an external"
printf '%s\n' " dependency, or a large feature needing visual verification."
printf '%s\n' "3. Write TWO files in the repository root, nothing else:"
printf '%s\n' " - CLAUDE_TRIAGE_LABEL.txt -- a single line with EXACTLY one word: claude-queue OR interactive"
printf '%s\n' " - CLAUDE_TRIAGE.md -- your requirements breakdown as markdown: action items, files/areas"
printf '%s\n' " involved, open questions, and a final one-line 'Routing rationale: ...'."
} > "$prompt_file"
tlog="$LOG_DIR/claude-triage-$num-$(date +%Y%m%d-%H%M%S).log"
log "Running Claude triage on #$num (log: $tlog)"
run_claude "$prompt_file" "$tlog" "$TRIAGE_TURNS"; rc=$?
log "Claude triage exited with code $rc for #$num"
rm -f "$prompt_file"
label=""
if [ -f "$WORKDIR/CLAUDE_TRIAGE_LABEL.txt" ]; then
raw=$(cat "$WORKDIR/CLAUDE_TRIAGE_LABEL.txt")
if printf '%s' "$raw" | grep -q interactive; then label=interactive
elif printf '%s' "$raw" | grep -q claude-queue; then label=claude-queue; fi
fi
breakdown=""
[ -f "$WORKDIR/CLAUDE_TRIAGE.md" ] && breakdown=$(cat "$WORKDIR/CLAUDE_TRIAGE.md")
reset_clone
if [ -z "$label" ]; then
log "Triage for #$num produced no valid decision; leaving for a human"
add_comment "$num" "$BOT_MARKER
[Claude triage] Could not auto-triage this issue. A human should review it and add either \`claude-queue\` or \`interactive\`."
continue
fi
# Label FIRST so a comment failure cannot trigger a re-triage that double-posts.
add_labels "$num" "$label"
# No bot marker on the breakdown: it is genuine refined requirements and SHOULD
# be fed to the fix stage (comments_block includes it).
note=${breakdown:-"(no breakdown produced)"}
add_comment "$num" "## Claude triage
$note
**Routing:** \`$label\`"
log "Triaged #$num -> $label"
done
# =====================================================================
# Phase 2: fix queued issues
# =====================================================================
queued=$(issues_by_label claude-queue | jq -c "sort_by(.number) | .[:$MAX_FIX]")
n_fix=$(printf '%s' "$queued" | jq 'length')
if [ "$n_fix" -eq 0 ]; then
log "No queued issues to fix."
else
log "Found $n_fix queued issue(s) to fix: $(printf '%s' "$queued" | jq -r '[.[].number|"#\(.)"]|join(", ")')"
fi
f=0
while [ "$f" -lt "$n_fix" ]; do
issue=$(printf '%s' "$queued" | jq -c ".[$f]")
f=$((f+1))
num=$(printf '%s' "$issue" | jq -r .number)
title=$(printf '%s' "$issue" | jq -r .title)
body=$(printf '%s' "$issue" | jq -r '.body // ""')
branch="${BRANCH_PREFIX}${num}"
log "-- Working issue #$num: $title"
set_labels "$num" "claude-queue claude-failed" "claude-working"
add_comment "$num" "$BOT_MARKER
[Claude] Started working on this issue on branch \`$branch\`."
git -C "$WORKDIR" fetch origin -q
if ! git -C "$WORKDIR" checkout -B "$branch" "origin/$BASE_BRANCH" -q 2>>"$LOG_FILE"; then
log "checkout failed for #$num"; continue
fi
comments=$(comments_block "$num")
[ -n "$comments" ] && log "Including human comment(s) for #$num"
prompt_file=$(mktemp)
{
printf '%s\n' "You are working autonomously on issue #$num of the Pelagia Portal (PPMS), a Next.js 15"
printf '%s\n' "purchase-order management system. The web app lives in App/ -- read App/CLAUDE.md first."
printf '\n## Issue #%s: %s\n\n' "$num" "$title"
printf '%s\n\n' "$body"
printf '%s\n\n' "$comments"
printf '%s\n' "## Test environment available to you"
printf '%s\n' "- App/.env points DATABASE_URL at a TEST database (pelagia_test) -- a daily mirror of"
printf '%s\n' " production, safe to read and write. It is NOT production. Email is console-logged and"
printf '%s\n' " storage is local in this dev mode (no real emails/uploads)."
printf '%s\n' "- To run integration tests against it, load the env first:"
printf '%s\n' " cd App && set -a && . ./.env && set +a && pnpm test:integration"
printf '%s\n' "- If you need runtime verification, you MAY start a dev server ON PORT 3100 ONLY:"
printf '%s\n' " cd App && pnpm dev -p 3100 (production runs on 3000 -- NEVER touch 3000)"
printf '%s\n' " When done, stop ONLY your own server by port: 'fuser -k 3100/tcp' (or kill its exact PID)."
printf '%s\n' " NEVER use a broad 'pkill -f next' -- it would kill the production app."
printf '%s\n' "- Never connect to or modify the production database or the production app."
printf '%s\n' ""
printf '%s\n' "## Your job"
printf '%s\n' "1. Investigate the issue and implement a focused, minimal fix in this repository."
printf '%s\n' "2. Verify: run 'pnpm type-check' and 'pnpm lint' in App/. If behaviour is covered by unit"
printf '%s\n' " tests, run them; for DB-backed behaviour, run integration tests against the test DB above."
printf '%s\n' "3. Add or adjust tests when it makes sense."
printf '%s\n' "4. Commit ALL changes to the current branch with a conventional message ending: Fixes #$num"
printf '%s\n' "5. Do NOT push, do NOT create tags, do NOT switch branches. The supervisor handles push and PR."
printf '%s\n' "If the issue is unclear, too risky (migrations, payments, permissions), or you cannot verify"
printf '%s\n' "the fix, make NO commits and write a short explanation to CLAUDE_RESULT.md in the repo root."
} > "$prompt_file"
clog="$LOG_DIR/claude-issue-$num-$(date +%Y%m%d-%H%M%S).log"
log "Running Claude Code on #$num (log: $clog)"
run_claude "$prompt_file" "$clog" "$FIX_TURNS"; rc=$?
log "Claude exited with code $rc for #$num"
rm -f "$prompt_file"
abort_note=""
if [ -f "$WORKDIR/CLAUDE_RESULT.md" ]; then
abort_note=$(cat "$WORKDIR/CLAUDE_RESULT.md")
rm -f "$WORKDIR/CLAUDE_RESULT.md"
git -C "$WORKDIR" checkout -- . 2>/dev/null
fi
commits=$(git -C "$WORKDIR" rev-list "origin/$BASE_BRANCH..HEAD" --count)
if [ "$commits" -gt 0 ]; then
log "Claude made $commits commit(s); pushing $branch"
if ! git -C "$WORKDIR" push -f -u origin "$branch" -q 2>>"$LOG_FILE"; then
log "push failed for #$num"; set_labels "$num" "claude-working" "claude-failed"; continue
fi
pr_title="fix: $(printf '%s' "$title" | sed 's/^\[Issue\]: //')"
pr_body="Automated fix by Claude Code for #$num.
Closes #$num
Review, merge, then create a release tag (vX.Y.Z) to deploy."
pr=$(api POST "/repos/$REPO/pulls" "$(jq -nc --arg base "$BASE_BRANCH" --arg head "$branch" --arg t "$pr_title" --arg b "$pr_body" '{base:$base,head:$head,title:$t,body:$b}')")
prnum=$(printf '%s' "$pr" | jq -r .number)
prurl=$(printf '%s' "$pr" | jq -r .html_url)
set_labels "$num" "claude-working" "claude-pr"
add_comment "$num" "$BOT_MARKER
[Claude] Opened PR [#$prnum]($prurl) with a proposed fix. Review and merge it, then create a release tag to deploy."
log "PR #$prnum opened for issue #$num"
else
log "No commits produced for #$num; marking claude-failed"
set_labels "$num" "claude-working" "claude-failed"
reason=${abort_note:-"Claude did not produce a verified fix. See watcher logs on pms1: $clog"}
add_comment "$num" "$BOT_MARKER
[Claude] Automated fix attempt did not produce a change.
$reason"
fi
done