/** * Interactive GST taxpayer lookup via services.gst.gov.in/services/searchtp * Run: npx tsx scripts/test-gst-scrape.ts * * Flow: * 1. Load the search page (establishes session) * 2. Fetch the CAPTCHA image → save to scripts/captcha.png * 3. Prompt you to open the image and type the 6 digits * 4. Submit GSTIN + captcha → print the result */ import { chromium } from "playwright"; import * as fs from "fs"; import * as readline from "readline"; const GSTIN = (process.argv[2] ?? "").toUpperCase(); if (!GSTIN) { console.error("Usage: npx tsx scripts/test-gst-scrape.ts "); process.exit(1); } function ask(q: string): Promise { const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); return new Promise(resolve => rl.question(q, a => { rl.close(); resolve(a.trim()); })); } (async () => { console.log(`\nGSTIN: ${GSTIN}\n`); const browser = await chromium.launch({ headless: true }); const ctx = await browser.newContext({ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", viewport: { width: 1280, height: 900 }, }); const page = await ctx.newPage(); // ── Step 1: load search page ────────────────────────────────────────────── process.stdout.write("Loading GST portal… "); await page.goto("https://services.gst.gov.in/services/searchtp", { waitUntil: "networkidle", timeout: 30000, }); console.log("done."); // ── Step 2: fetch captcha from same origin (sets CaptchaCookie) ─────────── process.stdout.write("Fetching CAPTCHA… "); const captchaB64: string = await page.evaluate(() => fetch("/services/captcha", { headers: { Accept: "image/png,image/*" } }) .then(r => r.blob()) .then(blob => new Promise((res, rej) => { const reader = new FileReader(); reader.onload = () => res((reader.result as string).split(",")[1]); reader.onerror = rej; reader.readAsDataURL(blob); })) ); const imgPath = "scripts/captcha.png"; fs.writeFileSync(imgPath, Buffer.from(captchaB64, "base64")); console.log(`saved → ${imgPath}`); const cookies = await ctx.cookies("https://services.gst.gov.in"); const capCookie = cookies.find(c => c.name === "CaptchaCookie"); console.log(`CaptchaCookie: ${capCookie?.value ?? "NOT SET"}`); // ── Step 3: ask for captcha answer ──────────────────────────────────────── console.log("\nOpen scripts/captcha.png and read the 6-digit number."); const captcha = await ask("Enter CAPTCHA (6 digits): "); if (!/^\d{6}$/.test(captcha)) { console.error("Expected exactly 6 digits. Got:", captcha); await browser.close(); process.exit(1); } // ── Step 4: submit search ───────────────────────────────────────────────── process.stdout.write(`\nSubmitting { gstin: "${GSTIN}", captcha: "${captcha}" }… `); const result: { status: number; body: unknown } = await page.evaluate( ([gstin, cap]: [string, string]) => fetch("/services/api/search/tp", { method: "POST", headers: { "Accept": "application/json, text/plain", "Content-Type": "application/json;charset=UTF-8", }, body: JSON.stringify({ gstin, captcha: cap }), }) .then(async r => ({ status: r.status, body: await r.json().catch(() => r.text()) })) .catch((e: Error) => ({ status: 0, body: { error: e.message } })), [GSTIN, captcha] as [string, string] ); console.log("done.\n"); console.log("=== Response ==="); console.log(JSON.stringify(result.body, null, 2)); // If wrong captcha, SWEB_9034; if GSTIN not found, different code; on success → data const body = result.body as Record; if (body.errorCode === "SWEB_9034") { console.log("\n→ Wrong CAPTCHA. Re-run to get a fresh image."); } else if (body.errorCode === "SWEB_9000") { console.log("\n→ SWEB_9000 (session/auth issue — not a captcha problem)."); } else if (body.errorCode) { console.log(`\n→ Error code: ${body.errorCode}`); } else { console.log("\n✅ Success — taxpayer data above."); } await browser.close(); })().catch(e => { console.error("\nError:", e.message); process.exit(1); });