import express from "express"; import { chromium, type Browser, type BrowserContext, type Page } from "playwright"; // ── Config ──────────────────────────────────────────────────────────────────── const PORT = Number(process.env.PORT ?? 3003); const SESSION_TTL_MS = Number(process.env.SESSION_TTL_MS ?? 3 * 60 * 1000); // 3 min default const LOG_LEVEL = (process.env.LOG_LEVEL ?? "INFO") as LogLevel; // ── Structured logger ───────────────────────────────────────────────────────── type LogLevel = "DEBUG" | "INFO" | "WARN" | "ERROR"; const LEVEL_RANK: Record = { DEBUG: 0, INFO: 1, WARN: 2, ERROR: 3 }; function log(level: LogLevel, msg: string, ctx?: Record): void { if (LEVEL_RANK[level] < LEVEL_RANK[LOG_LEVEL]) return; const entry: Record = { ts: new Date().toISOString(), level, msg, ...ctx }; const line = JSON.stringify(entry); // Errors and warnings go to stderr so process supervisors can separate them if (level === "ERROR" || level === "WARN") process.stderr.write(line + "\n"); else process.stdout.write(line + "\n"); } const logger = { debug: (msg: string, ctx?: Record) => log("DEBUG", msg, ctx), info: (msg: string, ctx?: Record) => log("INFO", msg, ctx), warn: (msg: string, ctx?: Record) => log("WARN", msg, ctx), error: (msg: string, ctx?: Record) => log("ERROR", msg, ctx), }; function elapsed(startMs: number): number { return Date.now() - startMs; } /** Extract loggable fields from any thrown value. */ function errCtx(e: unknown): Record { if (e instanceof Error) { return { errName: e.name, errMsg: e.message, // First 6 frames — enough for diagnosis without flooding logs stack: e.stack?.split("\n").slice(0, 6).map((l) => l.trim()), }; } return { err: String(e) }; } // ── Process-level resilience ────────────────────────────────────────────────── process.on("uncaughtException", (e) => logger.error("Uncaught exception", errCtx(e))); process.on("unhandledRejection", (reason) => logger.error("Unhandled promise rejection", errCtx(reason))); // ── Singleton browser ───────────────────────────────────────────────────────── let _browser: Browser | null = null; async function getBrowser(): Promise { if (_browser?.isConnected()) return _browser; const t = Date.now(); logger.info("Launching Chromium"); _browser = await chromium.launch({ headless: true, args: ["--no-sandbox", "--disable-setuid-sandbox"], }); _browser.on("disconnected", () => { logger.warn("Browser disconnected — will relaunch on next request"); _browser = null; }); logger.info("Chromium ready", { ms: elapsed(t) }); return _browser; } // ── Session store ───────────────────────────────────────────────────────────── /** One captcha image fetched within a session. */ type CaptchaEntry = { captchaId: string; b64: string; fetchedAt: number; }; /** * A browser context + page kept alive for one GST lookup. * Multiple captcha images can be fetched into `captchas` without reloading * the page — useful when the first image is unreadable or the user types it * wrong and wants to retry without paying the cost of a full page reload. */ type Session = { sessionId: string; ctx: BrowserContext; page: Page; captchas: CaptchaEntry[]; // ordered oldest→newest createdAt: number; lastUsedAt: number; expires: number; }; const sessions = new Map(); function makeId(): string { return Math.random().toString(36).slice(2) + Date.now().toString(36); } /** Close + remove all sessions whose TTL has passed. */ function pruneExpired(): void { const now = Date.now(); let pruned = 0; for (const [id, s] of sessions) { if (s.expires < now) { s.ctx.close().catch((e) => logger.warn("Error closing expired ctx", { sessionId: id, ...errCtx(e) }) ); sessions.delete(id); pruned++; } } if (pruned > 0) logger.info("Pruned expired sessions", { pruned, remaining: sessions.size }); } /** Look up a live session; returns null and cleans up if missing or expired. */ function getSession(sessionId: string): Session | null { const s = sessions.get(sessionId); if (!s) return null; if (s.expires < Date.now()) { sessions.delete(sessionId); s.ctx.close().catch((e) => logger.warn("Error closing expired ctx on access", { sessionId, ...errCtx(e) }) ); logger.info("Session expired on access", { sessionId }); return null; } s.lastUsedAt = Date.now(); return s; } /** Close the browser context and remove from the map. */ function closeSession(sessionId: string, reason = "normal"): void { const s = sessions.get(sessionId); if (!s) return; sessions.delete(sessionId); logger.info("Session closed", { sessionId, reason, captchaCount: s.captchas.length }); s.ctx.close().catch((e) => logger.warn("Error closing ctx", { sessionId, ...errCtx(e) }) ); } // ── Playwright page helpers ─────────────────────────────────────────────────── /** Wire up page events so Playwright activity surfaces in structured logs. */ function attachPageListeners(page: Page, sessionId: string): void { page.on("console", (msg) => { const type = msg.type(); if (type === "error" || type === "warning") { logger.debug("Page console", { sessionId, type, text: msg.text() }); } }); page.on("pageerror", (err) => { logger.warn("Uncaught JS error on page", { sessionId, ...errCtx(err) }); }); page.on("requestfailed", (req) => { logger.warn("Network request failed", { sessionId, url: req.url(), failure: req.failure()?.errorText, }); }); page.on("response", (resp) => { const url = resp.url(); const status = resp.status(); if ( status >= 400 && (url.includes("captcha") || url.includes("searchtp") || url.includes("/api/search")) ) { logger.warn("GST portal returned HTTP error on key endpoint", { sessionId, url, status }); } }); } /** Call /services/captcha from within the page context to get a fresh image. */ async function fetchCaptchaFromPage(page: Page, sessionId: string): Promise { const t = Date.now(); logger.debug("Fetching captcha image from GST portal", { sessionId }); const b64: string = await page.evaluate(() => fetch("/services/captcha", { headers: { Accept: "image/png,image/*" } }) .then((r) => { if (!r.ok) throw new Error(`Captcha endpoint returned HTTP ${r.status}`); return r.blob(); }) .then( (blob) => new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = () => resolve((reader.result as string).split(",")[1]); reader.onerror = () => reject(new Error("FileReader failed reading captcha blob")); reader.readAsDataURL(blob); }) ) ); if (!b64 || b64.length < 100) { throw new Error(`Captcha response looks invalid (base64 length=${b64?.length ?? 0})`); } logger.debug("Captcha image ready", { sessionId, b64Len: b64.length, ms: elapsed(t) }); return b64; } // ── Express app ─────────────────────────────────────────────────────────────── const app = express(); app.use(express.json()); // CORS — allow Pelagia portal to call us from any origin app.use((_req, res, next) => { res.setHeader("Access-Control-Allow-Origin", "*"); res.setHeader("Access-Control-Allow-Headers", "Content-Type"); next(); }); // ── Per-request ID + response logging ──────────────────────────────────────── type TrackedReq = express.Request & { reqId: string; startMs: number }; app.use((req, res, next) => { const r = req as TrackedReq; r.reqId = makeId(); r.startMs = Date.now(); res.on("finish", () => { const level: LogLevel = res.statusCode >= 500 ? "ERROR" : res.statusCode >= 400 ? "WARN" : "INFO"; log(level, `${req.method} ${req.path}`, { reqId: r.reqId, status: res.statusCode, ms: elapsed(r.startMs), sessions: sessions.size, }); }); next(); }); // ── GET /health ─────────────────────────────────────────────────────────────── app.get("/health", (_req, res) => { const now = Date.now(); res.json({ ok: true, browserConnected: _browser?.isConnected() ?? false, sessionCount: sessions.size, activeSessions: [...sessions.values()].map((s) => ({ sessionId: s.sessionId, captchaCount: s.captchas.length, expiresInMs: s.expires - now, lastUsedMsAgo: now - s.lastUsedAt, })), }); }); // ── GET /captcha — create new session ───────────────────────────────────────── /** * Loads the GST search page in a fresh browser context, fetches the first * captcha image, and returns a session that can be reused for retries. * * Response: { sessionId, captchaId, captchaBase64 } */ app.get("/captcha", async (req, res) => { pruneExpired(); const { reqId } = req as TrackedReq; const t = Date.now(); let ctx: BrowserContext | undefined; try { const browser = await getBrowser(); logger.debug("Creating browser context", { reqId }); ctx = await browser.newContext({ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", viewport: { width: 1280, height: 900 }, }); const page = await ctx.newPage(); const sessionId = makeId(); attachPageListeners(page, sessionId); // Navigate — this establishes the portal's session cookies logger.info("Navigating to GST search page", { reqId, sessionId }); const navT = Date.now(); await page.goto("https://services.gst.gov.in/services/searchtp", { waitUntil: "networkidle", timeout: 30_000, }); logger.info("GST portal loaded", { reqId, sessionId, ms: elapsed(navT) }); // First captcha const b64 = await fetchCaptchaFromPage(page, sessionId); const captchaId = makeId(); sessions.set(sessionId, { sessionId, ctx, page, captchas: [{ captchaId, b64, fetchedAt: Date.now() }], createdAt: Date.now(), lastUsedAt: Date.now(), expires: Date.now() + SESSION_TTL_MS, }); logger.info("Session created", { reqId, sessionId, captchaId, totalMs: elapsed(t) }); return res.json({ sessionId, captchaId, captchaBase64: b64 }); } catch (e) { logger.error("GET /captcha failed", { reqId, totalMs: elapsed(t), ...errCtx(e) }); ctx?.close().catch(() => {}); return res.status(502).json({ error: "Failed to fetch CAPTCHA from GST portal. Please try again." }); } }); // ── GET /captcha/:sessionId — refresh captcha within existing session ────────── /** * Fetches a new captcha image using the SAME browser context and page — * no page reload. The GST portal's /services/captcha endpoint issues a fresh * image (and updates CaptchaCookie) on each call. * * Use this after a SWEB_9034 (wrong captcha) response so the user can retry * without the latency of a new page load. * * Response: { captchaId, captchaBase64, totalCaptchas } */ app.get("/captcha/:sessionId", async (req, res) => { const { sessionId } = req.params; const { reqId } = req as unknown as TrackedReq; const t = Date.now(); const session = getSession(sessionId); if (!session) { logger.warn("Captcha refresh: session not found or expired", { reqId, sessionId }); return res.status(410).json({ error: "Session expired — please start a new lookup." }); } try { const b64 = await fetchCaptchaFromPage(session.page, sessionId); const captchaId = makeId(); session.captchas.push({ captchaId, b64, fetchedAt: Date.now() }); session.expires = Date.now() + SESSION_TTL_MS; // reset TTL on activity logger.info("Captcha refreshed", { reqId, sessionId, captchaId, totalCaptchas: session.captchas.length, ms: elapsed(t), }); return res.json({ captchaId, captchaBase64: b64, totalCaptchas: session.captchas.length }); } catch (e) { logger.error("GET /captcha/:sessionId failed", { reqId, sessionId, ms: elapsed(t), ...errCtx(e) }); return res.status(502).json({ error: "Failed to refresh CAPTCHA." }); } }); // ── POST /search ─────────────────────────────────────────────────────────────── /** * Body: { sessionId, gstin, captcha } * * Success: closes session, returns taxpayer data. * SWEB_9034: keeps session alive, returns { canRefresh: true, sessionId } * — caller should GET /captcha/:sessionId for a fresh image. * Other error: closes session, returns { error }. */ app.post("/search", async (req, res) => { const { sessionId, gstin, captcha } = req.body ?? {}; const { reqId } = req as TrackedReq; const t = Date.now(); if (!sessionId || !gstin || !captcha) { return res.status(400).json({ error: "sessionId, gstin and captcha are required" }); } const session = getSession(sessionId); if (!session) { logger.warn("Search: session not found or expired", { reqId, sessionId, gstin }); return res.status(410).json({ error: "Session expired — please fetch a new CAPTCHA." }); } logger.info("Submitting GST search", { reqId, sessionId, gstin, captchaLen: captcha.length, captchaCount: session.captchas.length, }); try { const searchT = Date.now(); const raw: Record = await session.page.evaluate( ([g, c]: [string, string]) => fetch("/services/api/search/tp", { method: "POST", headers: { Accept: "application/json, text/plain", "Content-Type": "application/json;charset=UTF-8", }, body: JSON.stringify({ gstin: g, captcha: c }), }) .then(async (r) => r.json().catch(() => ({ error: `HTTP ${r.status}` }))) .catch((e: Error) => ({ error: e.message })), [gstin, captcha] as [string, string] ); logger.debug("GST portal raw response", { reqId, sessionId, gstin, ms: elapsed(searchT), raw, }); // Wrong captcha — keep session alive so caller can refresh if (raw.errorCode === "SWEB_9034") { logger.warn("Wrong captcha — session kept alive for refresh", { reqId, sessionId, gstin, captchaCount: session.captchas.length, ms: elapsed(t), }); return res.status(422).json({ error: "Wrong CAPTCHA — please try again.", canRefresh: true, sessionId, }); } // Portal session/auth expired if (raw.errorCode === "SWEB_9000") { logger.warn("GST portal session expired (SWEB_9000)", { reqId, sessionId, gstin, ms: elapsed(t) }); closeSession(sessionId, "SWEB_9000"); return res.status(502).json({ error: "GST portal session expired. Please start a new lookup." }); } // Other portal error codes if (raw.errorCode) { logger.warn("GST portal error code", { reqId, sessionId, gstin, errorCode: raw.errorCode, ms: elapsed(t), }); closeSession(sessionId, `errorCode:${raw.errorCode}`); return res.status(422).json({ error: `GST portal error: ${raw.errorCode}` }); } // Network / fetch error from page.evaluate if (raw.error) { logger.error("GST search network error", { reqId, sessionId, gstin, error: raw.error, ms: elapsed(t), }); closeSession(sessionId, "network-error"); return res.status(502).json({ error: String(raw.error) }); } // Empty / unexpected response if (!raw.gstin) { logger.warn("No GSTIN in GST portal response", { reqId, sessionId, gstin, ms: elapsed(t) }); closeSession(sessionId, "no-data"); return res.status(422).json({ error: "No taxpayer data found for that GSTIN." }); } // Success closeSession(sessionId, "success"); const pradr = (raw.pradr as Record)?.adr as string ?? ""; const pincode = pradr.match(/\b(\d{6})\b/)?.[1] ?? ""; const state = String(raw.stj ?? "").split(",")[0].replace(/^State\s*-\s*/i, ""); const result = { legalName: raw.lgnm ?? "", tradeName: raw.tradeNam ?? raw.lgnm ?? "", address: pradr, state, pincode, gstin: raw.gstin, status: raw.sts ?? "", businessType: raw.ctb ?? raw.dty ?? "", registrationDate: raw.rgdt ?? "", }; logger.info("GST search successful", { reqId, sessionId, gstin, legalName: result.legalName, totalMs: elapsed(t), }); return res.json(result); } catch (e) { logger.error("POST /search failed unexpectedly", { reqId, sessionId, gstin, ms: elapsed(t), ...errCtx(e), }); closeSession(sessionId, "exception"); return res.status(500).json({ error: "Internal error during GST lookup." }); } }); // ── Start ───────────────────────────────────────────────────────────────────── app.listen(PORT, () => { logger.info("GstService listening", { port: PORT, sessionTtlMs: SESSION_TTL_MS, logLevel: LOG_LEVEL, }); });