Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,16 @@ desktop/build/icon-master.png

# Bundled service runtimes — produced by yarn dist:mac
desktop/resources/bin/node
desktop/resources/bin/ax-capture
desktop/resources/relay-server-bundle/
desktop/resources/openclaw/
desktop/resources/.openclaw-pack/

# Swift package build output for the AX-capture sidecar — regenerable
# from `node desktop/scripts/build-ax-capture.mjs`.
desktop/native/ax-capture/.build/
desktop/native/ax-capture/.swiftpm/

# electron-builder app staging dir — produced by scripts/stage-app.mjs
desktop/dist-staging/

Expand Down
17 changes: 17 additions & 0 deletions desktop/native/ax-capture/Package.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// swift-tools-version:5.9
import PackageDescription

let package = Package(
name: "ax-capture",
platforms: [
// Match Electron 41's minimum (macOS 11). Bumping forces unnecessary
// OS-version gating on the smallest piece of the bundle.
.macOS(.v11)
],
targets: [
.executableTarget(
name: "AXCapture",
path: "Sources/AXCapture"
)
]
)
187 changes: 187 additions & 0 deletions desktop/native/ax-capture/Sources/AXCapture/main.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import AppKit
import ApplicationServices
import Foundation

// JSON-line stdio protocol.
//
// Read one command per line on stdin, write one response per line on stdout.
//
// Commands:
// {"cmd":"ping"} -> {"ok":true,"version":"1"}
// {"cmd":"permission"} -> {"ok":true,"granted":bool}
// {"cmd":"capture"} -> {"ok":true,"app":string,"window":string,
// "elements":[{role,text,frame:{x,y,w,h}}]}
// or {"ok":false,"error":"permission_denied"|"no_frontmost"|"ax_failed"}
//
// All responses include "id" if the command had one (for request matching on
// the Electron side). Responses are flushed immediately.

let MAX_ELEMENTS = 500
let MAX_TEXT_LEN = 400
let MAX_DEPTH = 25

setbuf(stdout, nil)
setbuf(stderr, nil)

while let line = readLine(strippingNewline: true) {
guard let data = line.data(using: .utf8),
let cmd = (try? JSONSerialization.jsonObject(with: data)) as? [String: Any]
else {
write(["ok": false, "error": "bad_json"])
continue
}
let id = cmd["id"]
switch cmd["cmd"] as? String {
case "ping":
write(merge(["ok": true, "version": "1"], id))
case "permission":
let granted = AXIsProcessTrusted()
write(merge(["ok": true, "granted": granted], id))
case "capture":
write(merge(capture(), id))
default:
write(merge(["ok": false, "error": "unknown_cmd"], id))
}
}

// --- helpers below ---

func capture() -> [String: Any] {
guard AXIsProcessTrusted() else {
return ["ok": false, "error": "permission_denied"]
}
guard let app = NSWorkspace.shared.frontmostApplication else {
return ["ok": false, "error": "no_frontmost"]
}
let pid = app.processIdentifier
let appName = app.localizedName ?? app.bundleIdentifier ?? "unknown"
let axApp = AXUIElementCreateApplication(pid)

var focused: CFTypeRef?
let err = AXUIElementCopyAttributeValue(axApp, kAXFocusedWindowAttribute as CFString, &focused)
if err != .success || focused == nil {
// Fall back to first AXWindows entry. Some apps don't expose
// AXFocusedWindow until interaction.
var windowsRef: CFTypeRef?
let werr = AXUIElementCopyAttributeValue(axApp, kAXWindowsAttribute as CFString, &windowsRef)
if werr != .success {
// Report werr (the windows-list call) here, not err (the focused-window
// call) — they're different AX calls and the diagnostic only makes sense
// for the one that actually failed.
return ["ok": false, "error": "ax_failed", "axError": werr.rawValue]
}
let windows = windowsRef as? [AXUIElement] ?? []
guard let first = windows.first else {
return ["ok": false, "error": "no_window"]
}
focused = first
}
// Safe cast: Accessibility implementations are inconsistent and a buggy
// app could conceivably return something that isn't an AXUIElement.
// Force-casting here would crash the sidecar; bail with ax_failed instead.
guard CFGetTypeID(focused!) == AXUIElementGetTypeID() else {
return ["ok": false, "error": "ax_failed"]
}
let window = focused as! AXUIElement

var windowTitle = ""
var titleRef: CFTypeRef?
if AXUIElementCopyAttributeValue(window, kAXTitleAttribute as CFString, &titleRef) == .success,
let s = titleRef as? String {
windowTitle = s
}

var elements: [[String: Any]] = []
walk(window, depth: 0, out: &elements)

return [
"ok": true,
"app": appName,
"pid": Int(pid),
"window": windowTitle,
"elements": elements,
"truncated": elements.count >= MAX_ELEMENTS,
]
}

func walk(_ el: AXUIElement, depth: Int, out: inout [[String: Any]]) {
if out.count >= MAX_ELEMENTS { return }
if depth > MAX_DEPTH { return }

let role = stringAttr(el, kAXRoleAttribute) ?? ""
let text = readableText(el)
if !text.isEmpty {
let frame = elementFrame(el)
var entry: [String: Any] = ["role": role, "text": truncate(text, MAX_TEXT_LEN)]
if let f = frame { entry["frame"] = f }
out.append(entry)
if out.count >= MAX_ELEMENTS { return }
}

var childrenRef: CFTypeRef?
if AXUIElementCopyAttributeValue(el, kAXChildrenAttribute as CFString, &childrenRef) == .success,
let children = childrenRef as? [AXUIElement] {
for c in children {
if out.count >= MAX_ELEMENTS { return }
walk(c, depth: depth + 1, out: &out)
}
}
}

// Pull whichever attribute carries human-readable text for this element.
// AXValue is for fields/text areas; AXTitle for buttons/menus; AXDescription
// for icons/images; AXHelp / AXPlaceholderValue as fallbacks.
func readableText(_ el: AXUIElement) -> String {
for attr in [kAXValueAttribute, kAXTitleAttribute, kAXDescriptionAttribute,
kAXHelpAttribute, kAXPlaceholderValueAttribute as CFString] as [Any] {
let key = attr as! CFString
if let s = stringAttr(el, key as String), !s.isEmpty { return s }
}
return ""
}

func stringAttr(_ el: AXUIElement, _ key: String) -> String? {
var ref: CFTypeRef?
if AXUIElementCopyAttributeValue(el, key as CFString, &ref) == .success {
return ref as? String
}
return nil
}

func elementFrame(_ el: AXUIElement) -> [String: Double]? {
var posRef: CFTypeRef?
var sizeRef: CFTypeRef?
guard AXUIElementCopyAttributeValue(el, kAXPositionAttribute as CFString, &posRef) == .success,
AXUIElementCopyAttributeValue(el, kAXSizeAttribute as CFString, &sizeRef) == .success
else { return nil }
guard let posVal = posRef, let sizeVal = sizeRef else { return nil }
// Safe cast: a non-AXValue here would force-crash. Apps occasionally
// return malformed types for these attributes; skipping the frame is
// strictly preferable to taking down the sidecar.
guard CFGetTypeID(posVal) == AXValueGetTypeID(),
CFGetTypeID(sizeVal) == AXValueGetTypeID() else { return nil }
var pos = CGPoint.zero
var size = CGSize.zero
AXValueGetValue(posVal as! AXValue, .cgPoint, &pos)
AXValueGetValue(sizeVal as! AXValue, .cgSize, &size)
return ["x": Double(pos.x), "y": Double(pos.y), "w": Double(size.width), "h": Double(size.height)]
}

func truncate(_ s: String, _ n: Int) -> String {
if s.count <= n { return s }
let idx = s.index(s.startIndex, offsetBy: n)
return String(s[..<idx]) + "…"
}

func merge(_ base: [String: Any], _ id: Any?) -> [String: Any] {
var out = base
if let id = id { out["id"] = id }
return out
}

func write(_ obj: [String: Any]) {
if let data = try? JSONSerialization.data(withJSONObject: obj, options: []),
let s = String(data: data, encoding: .utf8) {
print(s)
}
}
5 changes: 3 additions & 2 deletions desktop/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
"private": true,
"main": "./out/main/index.js",
"scripts": {
"dev": "tsx scripts/patch-dev-electron-name.ts && electron-vite dev",
"dev": "yarn build:ax-capture && tsx scripts/patch-dev-electron-name.ts && electron-vite dev",
"build": "electron-vite build",
"build:ax-capture": "node scripts/build-ax-capture.mjs",
"test": "vitest run",
"typecheck": "tsc --noEmit",
"postinstall": "electron-rebuild -f -w better-sqlite3 --module-dir .",
"dist:mac": "node scripts/build-services.mjs && electron-vite build && node scripts/stage-app.mjs && node scripts/with-build-env.mjs electron-builder --mac",
"dist:mac": "AX_REQUIRE_UNIVERSAL=1 node scripts/build-services.mjs && electron-vite build && node scripts/stage-app.mjs && node scripts/with-build-env.mjs electron-builder --mac",
"smoke:mac": "node scripts/smoke-test.mjs",
"build:icon": "tsx scripts/build-mac-icon.ts ../mobile/assets/images/ios/icon-1024x1024.png build/icon-master.png build/icon.iconset && iconutil -c icns build/icon.iconset -o build/icon.icns && cp build/icon.iconset/icon_512x512.png resources/dock/icon.png",
"build:tray-icon": "tsx scripts/build-tray-icon.ts",
Expand Down
112 changes: 112 additions & 0 deletions desktop/scripts/build-ax-capture.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env node
// Build the ax-capture Swift sidecar as a universal binary and stage it
// under desktop/resources/bin/ so electron-builder picks it up via
// extraResources.
//
// Output: desktop/resources/bin/ax-capture (arm64+x86_64 universal)
import { execSync, spawnSync } from "node:child_process"
import { mkdirSync, copyFileSync, existsSync, readdirSync, statSync } from "node:fs"
import { dirname, resolve, join } from "node:path"
import { fileURLToPath } from "node:url"

const here = dirname(fileURLToPath(import.meta.url))
const desktopRoot = resolve(here, "..")
const pkgDir = resolve(desktopRoot, "native/ax-capture")
const sourcesDir = resolve(pkgDir, "Sources")
const packageManifest = resolve(pkgDir, "Package.swift")
const outDir = resolve(desktopRoot, "resources/bin")
const outBin = resolve(outDir, "ax-capture")

if (!existsSync(pkgDir)) {
console.error(`[build-ax-capture] package missing: ${pkgDir}`)
process.exit(1)
}

mkdirSync(outDir, { recursive: true })

// Skip the rebuild when no Swift source has changed since the last output.
// Keeps `yarn dev` tight — the Swift compile is ~4 s otherwise, paid on
// every Electron restart even when nothing has moved.
if (existsSync(outBin) && process.env.AX_FORCE_REBUILD !== "1") {
const outMtime = statSync(outBin).mtimeMs
const newest = newestSourceMtime(sourcesDir, statSync(packageManifest).mtimeMs)
if (newest <= outMtime) {
console.log("[build-ax-capture] up-to-date, skipping (AX_FORCE_REBUILD=1 to override)")
process.exit(0)
}
}

// Distribution builds (yarn dist:mac sets ELECTRON_BUILDER_RUNNING=1 via
// scripts/with-build-env.mjs) MUST produce a universal binary; shipping a
// silent arm64-only sidecar would crash on Intel installs. Dev builds may
// fall back to arm64-only when the x86_64 toolchain is missing.
const requireUniversal =
process.env.ELECTRON_BUILDER_RUNNING === "1" ||
process.env.AX_REQUIRE_UNIVERSAL === "1"

const archs = ["arm64", "x86_64"]
const builtBins = []
for (const arch of archs) {
console.log(`[build-ax-capture] swift build --arch ${arch}`)
const r = spawnSync(
"swift",
["build", "-c", "release", "--arch", arch, "--package-path", pkgDir],
{ stdio: "inherit" },
)
if (r.status !== 0) {
if (arch === "x86_64" && !requireUniversal) {
console.warn(
"[build-ax-capture] x86_64 build failed (likely missing toolchain on Apple Silicon dev). Falling back to arm64-only.",
)
continue
}
console.error(
`[build-ax-capture] ${arch} build failed; aborting (universal required for distribution builds)`,
)
process.exit(r.status ?? 1)
}
const built = execSync(
`swift build -c release --arch ${arch} --package-path "${pkgDir}" --show-bin-path`,
).toString().trim()
builtBins.push(resolve(built, "AXCapture"))
}

if (builtBins.length === 0) {
console.error("[build-ax-capture] no architectures built")
process.exit(1)
}

if (builtBins.length === 1) {
copyFileSync(builtBins[0], outBin)
} else {
const lipo = spawnSync("lipo", ["-create", "-output", outBin, ...builtBins], { stdio: "inherit" })
if (lipo.status !== 0) {
console.error("[build-ax-capture] lipo failed; aborting")
process.exit(lipo.status ?? 1)
}
}
const chmod = spawnSync("chmod", ["+x", outBin])
if (chmod.status !== 0) {
console.error("[build-ax-capture] chmod failed; aborting")
process.exit(chmod.status ?? 1)
}
if (!existsSync(outBin)) {
console.error(`[build-ax-capture] expected ${outBin} after build but it is missing`)
process.exit(1)
}

console.log(`[build-ax-capture] -> ${outBin}`)

function newestSourceMtime(root, seed) {
let newest = seed
const stack = [root]
while (stack.length > 0) {
const dir = stack.pop()
for (const entry of readdirSync(dir, { withFileTypes: true })) {
const p = join(dir, entry.name)
if (entry.isDirectory()) stack.push(p)
else newest = Math.max(newest, statSync(p).mtimeMs)
}
}
return newest
}
1 change: 1 addition & 0 deletions desktop/scripts/build-services.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ step("build relay-server bundle", () =>
run("yarn", ["workspace", "relay-server", "run", "build:bundle"], repoRoot),
)
step("build openclaw bundle", () => run("node", ["scripts/build-openclaw-bundle.mjs"], desktopRoot))
step("build ax-capture sidecar", () => run("node", ["scripts/build-ax-capture.mjs"], desktopRoot))

function step(label, fn) {
console.log(`\n[build-services] ${label}`)
Expand Down
Loading
Loading