diff --git a/CHANGELOG.md b/CHANGELOG.md index 62f9345..8687f06 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Fixed +- Keep backup Git commits inside the configured repository root and pin hashed backup text files to LF line endings. (#79 - thanks @rodriguez46p-ui) - Persist profile avatars exposed by Bird's full live-sync payloads. (#75 - thanks @RajvardhanPatil07) - Persist quoted tweet payloads returned by Bird-backed live syncs so quote cards render without a separate hydrate. (#76 - thanks @lukaskawerau) - Show full tweet text in Today citation popovers instead of truncating long posts after six lines. diff --git a/docs/backup.md b/docs/backup.md index 07eac85..af2005e 100644 --- a/docs/backup.md +++ b/docs/backup.md @@ -10,6 +10,7 @@ birdclaw can write the canonical SQLite store as deterministic JSONL shards that ## Layout ```text +.gitattributes manifest.json data/accounts.jsonl data/profiles.jsonl @@ -42,6 +43,7 @@ Design rules: - **profile bio entities** preserve extracted `@handle`, domain, and company-phrase identity hints, including inactive historical values - **follow graph** shards preserve followers/following snapshots, snapshot members, current edges, and append-only churn events - **no SQLite WAL/SHM, FTS shadow tables, or transient live cache rows** ever land in the backup +- **line endings** for hashed JSONL and manifest files stay LF on every platform via the generated `.gitattributes` The manifest pins per-shard byte counts, row counts, and SHA hashes. Validation walks every shard and verifies they line up. @@ -82,6 +84,8 @@ What `sync` does: 4. exports the local union back into deterministic text shards 5. commits and pushes the backup repo +Git operations are rooted at the configured `repoPath`. If that directory sits inside another worktree, Birdclaw initializes or uses a separate repository there instead of staging backup files into the enclosing project. + This is what makes birdclaw safe across multiple machines: each machine can sync independently, and the merge step preserves rows that only one side has. ## `backup import` diff --git a/src/lib/backup.test.ts b/src/lib/backup.test.ts index 1e2b629..eccf74f 100644 --- a/src/lib/backup.test.ts +++ b/src/lib/backup.test.ts @@ -5,6 +5,7 @@ import { existsSync, mkdirSync, readFileSync, + realpathSync, rmSync, symlinkSync, writeFileSync, @@ -748,6 +749,83 @@ describe("text backup", () => { { encoding: "utf8" }, ).trim(), ).toBe("1"); + expect( + execFileSync( + "git", + [ + "-C", + secondRepoPath, + "show-ref", + "--verify", + "refs/remotes/origin/main", + ], + { encoding: "utf8" }, + ).trim(), + ).toContain("refs/remotes/origin/main"); + }, 20000); + + it("isolates backup commits from an enclosing Git worktree", async () => { + const parentPath = makeTempDir("birdclaw-parent-worktree-"); + execFileSync("git", ["-C", parentPath, "init"]); + const repoPath = path.join(parentPath, "backup"); + mkdirSync(repoPath); + writeFileSync( + path.join(repoPath, ".gitattributes"), + "*.md text eol=lf\ndata/**/*.jsonl text eol=crlf\n", + ); + switchHome("birdclaw-nested-backup-"); + seedBackupFixture(); + + const result = await exportBackup({ repoPath, commit: true }); + + expect(result.git?.committed).toBe(true); + expect( + execFileSync("git", ["-C", repoPath, "rev-parse", "--show-toplevel"], { + encoding: "utf8", + }).trim(), + ).toBe(realpathSync(repoPath)); + expect( + execFileSync( + "git", + ["-C", parentPath, "diff", "--cached", "--name-only"], + { + encoding: "utf8", + }, + ), + ).toBe(""); + expect(readFileSync(path.join(repoPath, ".gitattributes"), "utf8")).toBe( + [ + "*.md text eol=lf", + "data/**/*.jsonl text eol=crlf", + "", + "# BEGIN birdclaw backup attributes", + "# Backup hashes use the raw LF-delimited bytes written by Birdclaw.", + "data/**/*.jsonl text eol=lf", + "manifest.json text eol=lf", + "# END birdclaw backup attributes", + "", + ].join("\n"), + ); + expect( + execFileSync("git", ["-C", repoPath, "ls-files", ".gitattributes"], { + encoding: "utf8", + }).trim(), + ).toBe(".gitattributes"); + expect( + execFileSync( + "git", + [ + "-C", + repoPath, + "check-attr", + "eol", + "--", + "data/tweets/2026.jsonl", + "manifest.json", + ], + { encoding: "utf8" }, + ), + ).toBe("data/tweets/2026.jsonl: eol: lf\nmanifest.json: eol: lf\n"); }, 20000); it("does not inherit commit signing for generated backup commits", async () => { diff --git a/src/lib/backup.ts b/src/lib/backup.ts index 3e766b3..c3ac7c7 100644 --- a/src/lib/backup.ts +++ b/src/lib/backup.ts @@ -30,6 +30,7 @@ const BACKUP_SCHEMA_VERSION = 2; const MIN_SUPPORTED_BACKUP_SCHEMA_VERSION = 1; const MANIFEST_PATH = "manifest.json"; const DATA_DIR = "data"; +const GITATTRIBUTES_PATH = ".gitattributes"; const AUTO_SYNC_CACHE_KEY = "backup:auto-sync"; const DEFAULT_STALE_AFTER_SECONDS = 15 * 60; let autoUpdateInFlight: Promise | null = null; @@ -438,6 +439,40 @@ function readPreviousManifestEffect( ); } +function ensureBackupGitattributesEffect(repoPath: string) { + return Effect.gen(function* () { + const attributesPath = yield* trySync(() => + resolveBackupFilePath(repoPath, GITATTRIBUTES_PATH), + ); + yield* assertNoSymlinkAncestorEffect(repoPath, attributesPath); + const requiredLines = [ + "data/**/*.jsonl text eol=lf", + `${MANIFEST_PATH} text eol=lf`, + ]; + const generatedBlock = [ + "# BEGIN birdclaw backup attributes", + "# Backup hashes use the raw LF-delimited bytes written by Birdclaw.", + ...requiredLines, + "# END birdclaw backup attributes", + "", + ].join("\n"); + const current = yield* tryPromise(() => + fs.readFile(attributesPath, "utf8"), + ).pipe(Effect.option); + if (current._tag === "Some" && current.value.endsWith(generatedBlock)) { + return; + } + const preserved = + current._tag === "Some" + ? current.value.replaceAll(generatedBlock, "").replace(/[\r\n]+$/u, "") + : ""; + const content = preserved + ? `${preserved}\n\n${generatedBlock}` + : generatedBlock; + yield* tryPromise(() => fs.writeFile(attributesPath, content, "utf8")); + }); +} + function maybeCommitAndPushEffect({ repoPath, message, @@ -454,21 +489,22 @@ function maybeCommitAndPushEffect({ } return Effect.gen(function* () { - yield* gitEffect([ - "-C", - repoPath, - "rev-parse", - "--is-inside-work-tree", - ]).pipe( - Effect.catchAll(() => - gitEffect(["-C", repoPath, "init"]).pipe(Effect.asVoid), - ), - ); + if (!(yield* isGitRepoEffect(repoPath))) { + yield* gitEffect(["-C", repoPath, "init"]); + } + if (!(yield* isGitRepoEffect(repoPath))) { + return yield* Effect.fail( + new Error( + "Backup Git operations must run at the configured repository root", + ), + ); + } yield* gitEffect([ "-C", repoPath, "add", + GITATTRIBUTES_PATH, "README.md", MANIFEST_PATH, DATA_DIR, @@ -541,6 +577,9 @@ function maybeCommitAndPushEffect({ } function isGitRepoEffect(repoPath: string) { + if (!existsSync(path.join(repoPath, ".git"))) { + return Effect.succeed(false); + } return gitEffect(["-C", repoPath, "rev-parse", "--is-inside-work-tree"]).pipe( Effect.as(true), Effect.catchAll(() => Effect.succeed(false)), @@ -604,7 +643,7 @@ function ensureBackupGitRepoEffect({ repoPath, "fetch", "origin", - "main", + "main:refs/remotes/origin/main", ]).pipe( Effect.flatMap(() => gitEffect(["-C", repoPath, "checkout", "-B", "main", "origin/main"]), @@ -667,6 +706,7 @@ export function exportBackupEffect({ new Error("Backup repository path must be a real directory"), ); } + yield* ensureBackupGitattributesEffect(resolvedRepoPath); yield* ensureBackupReadmeEffect(resolvedRepoPath); const shards = yield* trySync(() => buildShards(database));