diff --git a/skills/vana-connect/CREATE.md b/skills/vana-connect/CREATE.md index e12dc8a..e61f959 100644 --- a/skills/vana-connect/CREATE.md +++ b/skills/vana-connect/CREATE.md @@ -7,7 +7,7 @@ Build a data connector for a platform that isn't in the registry yet. - `reference/PAGE-API.md` -- full `page` object API - `reference/PATTERNS.md` -- data extraction approaches and code examples -All `node scripts/...` commands refer to `skills/vana-connect/scripts/` in the data-connectors repo. `run-connector.cjs` is at `~/.dataconnect/run-connector.cjs` (installed by SETUP.md). +All `node scripts/...` commands refer to `skills/vana-connect/scripts/` in the data-connectors repo. Use the `vana` CLI to exercise connectors; only fall back to raw scripts when debugging connector internals. ## Connector Format @@ -167,7 +167,7 @@ Run the connector and validate in one step: ```bash node scripts/validate.cjs /-playwright.js && \ - node ~/.dataconnect/run-connector.cjs /-playwright.js [start-url] && \ + vana connect && \ node scripts/validate.cjs /-playwright.js --check-result ~/.dataconnect/last-result.json ``` diff --git a/skills/vana-connect/RECIPES.md b/skills/vana-connect/RECIPES.md index 60a0b4d..41c5769 100644 --- a/skills/vana-connect/RECIPES.md +++ b/skills/vana-connect/RECIPES.md @@ -130,7 +130,7 @@ console.log([header, ...rows].join('\n')); Run the connector on a schedule (cron, agent heartbeat, etc.) and timestamp each export: ```bash -node run-connector.cjs +vana connect cp ~/.dataconnect/last-result.json ~/backups/-$(date +%Y-%m-%d).json ``` diff --git a/skills/vana-connect/SETUP.md b/skills/vana-connect/SETUP.md index e330829..4303110 100644 --- a/skills/vana-connect/SETUP.md +++ b/skills/vana-connect/SETUP.md @@ -1,63 +1,114 @@ # Connect -- Setup -Skip if `~/.dataconnect/playwright-runner/index.cjs` and `~/.dataconnect/run-connector.cjs` both exist. +This setup exists to let the skill use a real installed `vana` CLI when available, with the published canary CLI as the fallback. -## Prerequisites +## Preferred path -- Node.js v18+ -- Git +If `vana` is already on `PATH`, use it directly: -## Install +```bash +command -v vana +``` -Run the setup script from the data-connectors repo root: +Then use: ```bash -bash skills/vana-connect/scripts/setup.sh +vana +``` + +Skip runtime setup if `vana status --json` reports `"runtime":"installed"` or `"runtime":{"installed":true,...}`. + +## Fallback path + +If `vana` is not installed yet, prefer the official installer so the user gets a real installed CLI: + +```bash +curl -fsSL https://raw.githubusercontent.com/vana-com/vana-connect/main/install/install.sh | sh +``` + +Then verify: + +```bash +vana --help +``` + +If the installer path is unavailable or the released CLI does not yet contain the needed behavior, use the published canary package: + +```bash +npx -y @opendatalabs/connect@canary +``` + +Skip runtime setup if `npx -y @opendatalabs/connect@canary status --json` reports `"runtime":"installed"` or `"runtime":{"installed":true,...}`. + +## Verify the published CLI + +```bash +npx -y @opendatalabs/connect@canary --help ``` -This installs the playwright-runner, Chromium, and run-connector.cjs in a single step. If the user needs to approve commands, this is one approval instead of many. +## Verify an installed CLI -**Before running**, tell the user: setup will download a browser engine and some Node.js dependencies into `~/.dataconnect/`. This is a one-time step. +```bash +vana --help +``` -## Manual install +## Local development fallback -If the setup script doesn't work for your environment, follow these steps individually: +From `/home/tnunamak/code/vana-connect`: ```bash -mkdir -p ~/.dataconnect/connectors -cd ~/.dataconnect +pnpm install +pnpm build +``` + +Verify: -git clone --depth 1 --filter=blob:none --sparse --branch main \ - https://github.com/vana-com/data-connect.git _data-connect -cd _data-connect && git sparse-checkout set playwright-runner -cp -r playwright-runner ../playwright-runner -cd .. && rm -rf _data-connect -cd ~/.dataconnect/playwright-runner && npm install -npx playwright install chromium +```bash +ls /home/tnunamak/code/vana-connect/dist/cli/bin.js ``` -Then copy run-connector.cjs from the skill's scripts/ directory: +## Install the runtime + +Use the installed CLI when possible: ```bash -cp skills/vana-connect/scripts/run-connector.cjs ~/.dataconnect/run-connector.cjs +vana setup --yes ``` -> **Do not** use `curl` to fetch this file from GitHub — the repo root contains a symlink that GitHub raw serves as a text pointer, not the actual script. +If `vana` is not installed, use the published canary fallback: + +```bash +npx -y @opendatalabs/connect@canary --help +npx -y @opendatalabs/connect@canary setup --yes +``` + +Before running, tell the user this downloads a browser engine and some dependencies into `~/.dataconnect/`. This is a one-time step. ## Verify ```bash -ls ~/.dataconnect/playwright-runner/index.cjs ~/.dataconnect/run-connector.cjs +vana status ``` -Both files should exist. +You should see `Runtime: installed`. If `vana` is unavailable, run `npx -y @opendatalabs/connect@canary status` instead. +If setup still fails, inspect the log path surfaced by the CLI and only fall back to the older script-level flow if the CLI setup path is blocked. + +## Legacy fallback + +Only use this if the CLI setup path is broken and you are debugging the underlying runtime: + +```bash +bash skills/vana-connect/scripts/setup.sh +``` ## File Locations | Path | Purpose | |------|---------| -| `~/.dataconnect/playwright-runner/` | Runner process | -| `~/.dataconnect/run-connector.cjs` | Batch-mode runner wrapper | +| `vana` | Preferred installed CLI entrypoint | +| `npx -y @opendatalabs/connect@canary` | Published canary CLI entrypoint | +| `/home/tnunamak/code/vana-connect/dist/cli/bin.js` | Local development fallback | | `~/.dataconnect/connectors/` | Connector scripts | | `~/.dataconnect/browser-profiles/` | Persistent sessions (cookies) | | `~/.dataconnect/last-result.json` | Most recent result | +| `~/.dataconnect/logs/` | Setup and run logs surfaced by the CLI | diff --git a/skills/vana-connect/SKILL.md b/skills/vana-connect/SKILL.md index 777eec7..fa94239 100644 --- a/skills/vana-connect/SKILL.md +++ b/skills/vana-connect/SKILL.md @@ -10,85 +10,165 @@ description: > # Connect -Connect personal data from web platforms using local browser automation. +Connect personal data from web platforms using the `vana` CLI and local browser automation. ## Setup -If `~/.dataconnect/playwright-runner/index.cjs` or `~/.dataconnect/run-connector.cjs` does not exist, setup is needed. Tell the user: "I need to do a one-time setup first — this downloads a browser engine and some dependencies to `~/.dataconnect/`. It'll take about a minute." Then follow `SETUP.md` (co-located with this file). +Prefer an installed `vana` binary when it is available: + +```bash +command -v vana +``` + +If that succeeds, use: + +```bash +vana +``` + +If `vana` is not on `PATH`, prefer installing the real CLI before falling back to a transient package runner: + +```bash +curl -fsSL https://raw.githubusercontent.com/vana-com/vana-connect/main/install/install.sh | sh +``` + +Then use: + +```bash +vana +``` + +If the installer path is unavailable, the release channel is too old for the needed CLI behavior, or the user is explicitly testing prerelease changes, fall back to the published canary CLI: + +```bash +npx -y @opendatalabs/connect@canary +``` + +If the canary CLI is unavailable or the user is explicitly testing local changes, fall back to: + +```bash +node /home/tnunamak/code/vana-connect/dist/cli/bin.js +``` + +If neither path is available, follow `SETUP.md` in this folder. + +Before connecting a source, check runtime state with the highest-priority available CLI: + +```bash +vana status --json +``` + +If `vana` is unavailable, use: + +```bash +npx -y @opendatalabs/connect@canary status --json +``` + +If the runtime is missing, tell the user: "I need to do a one-time setup first. This downloads a browser engine and some dependencies into `~/.dataconnect/` and usually takes about a minute." Then run: + +```bash +vana setup --yes +``` + +If `vana` is unavailable, use: + +```bash +npx -y @opendatalabs/connect@canary setup --yes +``` ## Flow -### 1. Find a connector +### 1. Explore available sources + +Use `vana sources --json` if `vana` is installed: + +```bash +vana sources --json +``` + +Otherwise use: ```bash -node scripts/fetch-connector.cjs +npx -y @opendatalabs/connect@canary sources --json ``` -This searches the registry and downloads the connector + metadata + schemas in one step. It prints JSON: `{ "found": true, "connectorPath": "..." }` on success, `{ "found": false }` if no connector exists. +This is the source of truth for what the CLI can currently connect. Prefer it over inspecting repo files manually. + +If the requested platform is present, use the CLI flow below. -If found, let the user know there's an existing connector and this should be quick. +**If no connector exists for the platform,** tell the user you'll build one — this involves researching the platform's data APIs, writing the extraction code, and testing it. Let them know it'll take a bit and they're welcome to do something else while you work. Then read `CREATE.md` and follow it. -**If no connector exists for the platform,** tell the user you'll build one — this involves researching the platform's data APIs, writing the extraction code, and testing it. Let them know it'll take a bit and they're welcome to do something else while you work. Then read `CREATE.md` and follow it. Continue from step 2 with the newly created connector. +### 2. Connect with the CLI -### 2. Read the connector +Start with the agent-safe probe: -Before running, read the connector script to understand: -- What URL it starts from (`page.goto()` or `connectURL` in metadata) -- Whether it uses `requestInput` (batch-compatible) or `showBrowser`/`promptUser` (browser login) -- What data it collects +Use `vana connect --json --no-input` if `vana` is installed: + +```bash +vana connect --json --no-input +``` -### 3. Run it +Otherwise use: ```bash -node ~/.dataconnect/run-connector.cjs [start-url] -node ~/.dataconnect/run-connector.cjs [start-url] --inputs '{"username":"x","password":"y"}' +npx -y @opendatalabs/connect@canary connect --json --no-input ``` -**Stdout** is line-delimited JSON: +This will: + +- ensure the runtime is installed +- resolve and cache the connector +- try a saved session if one exists +- return structured events and a final outcome such as `needs_input`, `legacy_auth`, `connected_local_only`, or `connected_and_ingested` + +If the outcome is `needs_input`, rerun interactively: + +Use `vana connect ` if `vana` is installed: -| type | meaning | action | -|------|---------|--------| -| `need-input` | Connector needs credentials or 2FA | Ask user, write response file (see below) | -| `legacy-auth` | Legacy auth, can't run headless | See legacy section | -| `result` | Data saved to `resultPath` | Read the file | -| `error` | Failure | Report to user | +```bash +vana connect +``` -Exit codes: 0 = success, 2 = needs input, 3 = legacy auth, 1 = error. +Otherwise use: + +```bash +npx -y @opendatalabs/connect@canary connect +``` -### 4. Handle auth +If the user specifically wants to inspect current state before rerunning, use: -1. Check if `~/.dataconnect/browser-profiles/{script-filename}/` exists -- try without `--inputs` first (session may still be valid) -2. If `need-input` appears in stdout: the connector is paused, waiting for input. Two options: +Use `vana status` if `vana` is installed: - **Option A (preferred): File-based response.** The `need-input` message includes `pendingInputPath` and `responseInputPath`. Ask the user for the requested fields, then write the response as JSON to `responseInputPath`. The connector resumes automatically — no restart needed. This works for multi-step auth (credentials first, then 2FA). +```bash +vana status +``` - ```bash - # Connector emits: {"type":"need-input","message":"Enter credentials","pendingInputPath":"~/.dataconnect/pending-input-steam-playwright.json","responseInputPath":"~/.dataconnect/input-response-steam-playwright.json",...} - # After asking the user, write the response to the path from the message: - echo '{"username":"alice","password":"secret"}' > ~/.dataconnect/input-response-steam-playwright.json - # Connector picks it up within 1 second and continues. - # If it later needs 2FA, another need-input appears — write again. - ``` +Otherwise use: - **Option B: Pre-supply with `--inputs`.** If you know all inputs upfront, pass them at launch. Fields are consumed as each `requestInput` call is made. +```bash +npx -y @opendatalabs/connect@canary status +``` - ```bash - node ~/.dataconnect/run-connector.cjs --inputs '{"username":"x","password":"y"}' - ``` +### 3. Handle outcomes -TOTP codes expire in ~30 seconds — write the response file immediately after receiving a code. Cookies saved in browser profiles persist for days to weeks. +The CLI emits structured JSON events in `--json` mode. -#### Legacy connectors +Key outcomes: -Exit code 3 means the connector uses `showBrowser`/`promptUser` instead of `requestInput`: +- `needs_input` + The connector needs a live login or another manual step. Explain that you'll rerun interactively. +- `legacy_auth` + The connector still depends on `showBrowser` / `promptUser`. Explain that this source still needs a headed/manual session path and may not work in fully headless batch mode yet. +- `connected_local_only` + Data was collected locally but no Personal Server target was available. +- `connected_and_ingested` + Data was collected and synced to the Personal Server. -1. Try without `--inputs` -- if a browser profile exists, login may be skipped. -2. Check for a migrated version on the `main` branch. -3. Write a login script to establish a session, then run the stock connector. +If setup, fetch, or run output is truncated, the CLI may point to a full log file under `~/.dataconnect/logs/`. Use that rather than re-running blindly. -### 5. Validate, present results, and offer to contribute +### 4. Validate, present results, and offer to contribute -On success, immediately run validation — before presenting results to the user: +If you built or modified a connector, immediately run validation — before presenting results to the user: ```bash node scripts/validate.cjs /-playwright.js --check-result ~/.dataconnect/last-result.json @@ -104,7 +184,7 @@ If you built a new connector (not one from the registry), ask the user: If yes, run `node scripts/validate.cjs /-playwright.js --contribute`. If no, move on. -### 6. Suggest what to do with the data +### 5. Suggest what to do with the data After the contribution question is resolved (or if using an existing connector), suggest use cases from `RECIPES.md`: user profile generation, personal knowledge base, data backup, cross-platform synthesis, activity analytics. @@ -121,7 +201,7 @@ The user can't see what you're doing behind the scenes. Keep them informed at ke 3. **After collection**, summarize results in human terms — not file paths: - Good: "Connected! I collected 249 issues, 63 projects, 9 teams, and your profile from Linear." - Bad: "Data saved to ~/.dataconnect/last-result.json" - - Read the result file and build the summary from `exportSummary` and the scoped keys. + - Prefer the CLI outcome plus the result file. Build the summary from `exportSummary` and the scoped keys. 4. **On failure**, explain what went wrong and what the user can do: - Auth failed → "Login didn't work. Can you double-check your credentials?" @@ -134,3 +214,12 @@ The user can't see what you're doing behind the scenes. Keep them informed at ke 3. **One platform at a time** 4. **Check session first** -- try without credentials if a browser profile exists 5. **Read connectors before running them** +6. **Use the CLI as the primary interface** -- only drop to raw scripts when debugging or updating connector internals + +## CLI fallback order + +Use this order when choosing the CLI entrypoint: + +1. `vana` if it is already installed and on `PATH` +2. `npx -y @opendatalabs/connect@canary` +3. `node /home/tnunamak/code/vana-connect/dist/cli/bin.js` only for local development or debugging diff --git a/skills/vana-connect/scripts/fetch-connector.cjs b/skills/vana-connect/scripts/fetch-connector.cjs deleted file mode 100644 index de91c7d..0000000 --- a/skills/vana-connect/scripts/fetch-connector.cjs +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env node -/** - * fetch-connector.cjs — Download a connector from the registry. - * - * Usage: node scripts/fetch-connector.cjs - * - * Searches the registry for the platform, downloads the connector script - * and metadata to ~/.dataconnect/connectors/. Prints the local path on success. - * - * Exit codes: 0 = found and downloaded, 1 = not found or error. - */ - -const https = require('https'); -const fs = require('fs'); -const path = require('path'); -const os = require('os'); - -const platform = process.argv[2]; -if (!platform) { - console.error('Usage: node scripts/fetch-connector.cjs '); - process.exit(1); -} - -const REGISTRY_URL = 'https://raw.githubusercontent.com/vana-com/data-connectors/main/registry.json'; -const BASE_URL = 'https://raw.githubusercontent.com/vana-com/data-connectors/main'; -const CONNECTORS_DIR = path.join(os.homedir(), '.dataconnect', 'connectors'); - -function fetch(url) { - return new Promise((resolve, reject) => { - https.get(url, { headers: { 'User-Agent': 'dataconnect' } }, (res) => { - if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { - return fetch(res.headers.location).then(resolve, reject); - } - if (res.statusCode !== 200) { - reject(new Error(`HTTP ${res.statusCode} for ${url}`)); - res.resume(); - return; - } - const chunks = []; - res.on('data', (c) => chunks.push(c)); - res.on('end', () => resolve(Buffer.concat(chunks).toString())); - res.on('error', reject); - }).on('error', reject); - }); -} - -async function main() { - // Fetch registry - let registry; - try { - registry = JSON.parse(await fetch(REGISTRY_URL)); - } catch (e) { - console.error('Failed to fetch registry:', e.message); - process.exit(1); - } - - // Search for platform (case-insensitive, partial match) - const search = platform.toLowerCase(); - const match = (registry.connectors || []).find((c) => { - const name = (c.name || '').toLowerCase(); - const id = (c.id || '').toLowerCase(); - return name === search || id === search || name.includes(search) || id.includes(search); - }); - - if (!match) { - console.log(JSON.stringify({ found: false, platform })); - process.exit(1); - } - - // Download connector files - const scriptPath = match.scriptPath || match.script_path; - const metadataPath = scriptPath.replace(/\.js$/, '.json'); - const company = path.dirname(scriptPath); - - const localDir = path.join(CONNECTORS_DIR, company); - fs.mkdirSync(localDir, { recursive: true }); - - const files = [scriptPath, metadataPath]; - const downloaded = []; - - for (const filePath of files) { - try { - const content = await fetch(`${BASE_URL}/${filePath}`); - const localPath = path.join(CONNECTORS_DIR, filePath); - fs.writeFileSync(localPath, content); - downloaded.push(localPath); - } catch (e) { - // Metadata might not exist, that's OK - if (filePath === metadataPath) continue; - console.error(`Failed to download ${filePath}:`, e.message); - process.exit(1); - } - } - - // Download schemas if referenced in metadata - try { - const metaLocal = path.join(CONNECTORS_DIR, metadataPath); - if (fs.existsSync(metaLocal)) { - const meta = JSON.parse(fs.readFileSync(metaLocal, 'utf-8')); - if (meta.scopes && Array.isArray(meta.scopes)) { - const schemasDir = path.join(localDir, 'schemas'); - fs.mkdirSync(schemasDir, { recursive: true }); - for (const scope of meta.scopes) { - const scopeName = scope.scope || scope.name; - if (!scopeName) continue; - try { - const schemaContent = await fetch(`${BASE_URL}/${company}/schemas/${scopeName}.json`); - fs.writeFileSync(path.join(schemasDir, `${scopeName}.json`), schemaContent); - downloaded.push(path.join(schemasDir, `${scopeName}.json`)); - } catch {} // Schema might not exist yet - } - } - } - } catch {} // Non-critical - - const connectorPath = path.join(CONNECTORS_DIR, scriptPath); - console.log(JSON.stringify({ - found: true, - platform: match.name || platform, - connectorPath, - files: downloaded, - })); -} - -main();