Skip to content

Commit

Permalink
dont cache unchanged files (#356)
Browse files Browse the repository at this point in the history
* dont cache unchanged files

* order import

* v2

* Change files

* remove .only statement

Co-authored-by: Vincent Bailly <[email protected]>
  • Loading branch information
VincentBailly and Vincent Bailly authored May 6, 2021
1 parent 2aa3d66 commit 3df30b0
Show file tree
Hide file tree
Showing 12 changed files with 159 additions and 49 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"type": "patch",
"comment": "opt-in to optimize cache size",
"packageName": "backfill",
"email": "[email protected]",
"dependentChangeType": "patch",
"date": "2021-05-06T13:10:11.232Z"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"type": "patch",
"comment": "opt-in to optimize cache size",
"packageName": "backfill-cache",
"email": "[email protected]",
"dependentChangeType": "patch",
"date": "2021-05-06T13:10:11.312Z"
}
48 changes: 48 additions & 0 deletions packages/backfill/src/__tests__/backfill.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,54 @@ import { backfill } from "../index";
const logger = makeLogger("mute");

describe("backfill", () => {
it("with modified source files", async () => {
// Set up
const fixtureLocation = await setupFixture("basic");

const config = createConfig(logger, fixtureLocation);
config.outputGlob = ["src/*"];

const salt = "fooBar";
let buildCalled = 0;

const initialContent = fs
.readFileSync(path.join(fixtureLocation, "src", "index.ts"))
.toString();

const outputContent = `console.log("foo bar");`;
const buildCommand = async (): Promise<void> => {
await fs.writeFile(
path.join(fixtureLocation, "src", "index.ts"),
outputContent
);
buildCalled += 1;
};

// Execute
await backfill(config, buildCommand, salt, logger);

// Assert
expect(buildCalled).toBe(1);
expect(
fs.readFileSync(path.join(fixtureLocation, "src", "index.ts")).toString()
).toBe(outputContent);

// Reset
buildCalled = 0;
await fs.writeFile(
path.join(fixtureLocation, "src", "index.ts"),
initialContent
);

// Execute
await backfill(config, buildCommand, salt, logger);

// Assert
expect(buildCalled).toBe(0);
expect(
fs.readFileSync(path.join(fixtureLocation, "src", "index.ts")).toString()
).toBe(outputContent);
});
it("with cache miss and then cache hit", async () => {
// Set up
const fixtureLocation = await setupFixture("basic");
Expand Down
4 changes: 2 additions & 2 deletions packages/backfill/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export async function backfill(
const createPackageHash = async () =>
await computeHash(packageRoot, logger, hashSalt);
const fetch = async (hash: string) =>
await fetch_api(packageRoot, hash, logger);
await fetch_api(packageRoot, hash, logger, config);
const run = async () => {
try {
await buildCommand();
Expand All @@ -54,7 +54,7 @@ export async function backfill(
};
const put = async (hash: string) => {
try {
await put_api(packageRoot, hash, logger);
await put_api(packageRoot, hash, logger, config);
} catch (err) {
logger.error(
`Failed to persist the cache with the following error:\n\n${err}`
Expand Down
2 changes: 2 additions & 0 deletions packages/cache/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
},
"dependencies": {
"@azure/storage-blob": "12.1.2",
"@rushstack/package-deps-hash": "^2.4.48",
"backfill-config": "^6.1.3",
"backfill-logger": "^5.1.3",
"execa": "^4.0.0",
"find-up": "^5.0.0",
"fs-extra": "^8.1.0",
"globby": "^11.0.0",
"tar-fs": "^2.1.0"
Expand Down
8 changes: 3 additions & 5 deletions packages/cache/src/AzureBlobCacheStorage.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import * as path from "path";
import { BlobServiceClient } from "@azure/storage-blob";
import tarFs from "tar-fs";
import globby from "globby";

import { Logger } from "backfill-logger";
import { AzureBlobCacheStorageOptions } from "backfill-config";
Expand Down Expand Up @@ -91,7 +90,7 @@ export class AzureBlobCacheStorage extends CacheStorage {
}
}

protected async _put(hash: string, outputGlob: string[]): Promise<void> {
protected async _put(hash: string, filesToCache: string[]): Promise<void> {
const blobClient = createBlobClient(
this.options.connectionString,
this.options.container,
Expand All @@ -100,13 +99,12 @@ export class AzureBlobCacheStorage extends CacheStorage {

const blockBlobClient = blobClient.getBlockBlobClient();

const filesToCopy = await globby(outputGlob, { cwd: this.cwd });
const tarStream = tarFs.pack(this.cwd, { entries: filesToCopy });
const tarStream = tarFs.pack(this.cwd, { entries: filesToCache });

// If there's a maxSize limit, first sum up the total size of bytes of all the outputGlobbed files
if (this.options.maxSize) {
let total = 0;
for (const file of filesToCopy) {
for (const file of filesToCache) {
total = total + (await stat(path.join(this.cwd, file))).size;
}

Expand Down
82 changes: 71 additions & 11 deletions packages/cache/src/CacheStorage.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,60 @@
import { dirname, relative } from "path";
import globby from "globby";
import findUp from "find-up";
import { getPackageDeps } from "@rushstack/package-deps-hash";

import { Logger } from "backfill-logger";

const savedHashOfRepos: { [gitRoot: string]: { [file: string]: string } } = {};

function getRepoRoot(cwd: string): string {
const gitFolder = findUp.sync(".git", { cwd, type: "directory" });
if (!gitFolder) {
throw new Error(
"The location that backfill is being run against is not in a git repo"
);
}

return dirname(gitFolder);
}

function fetchHashesFor(cwd: string) {
const gitRoot = getRepoRoot(cwd);

savedHashOfRepos[gitRoot] ||
(savedHashOfRepos[gitRoot] = getPackageDeps(gitRoot).files);
}

function getMemoizedHashesFor(cwd: string): { [file: string]: string } {
fetchHashesFor(cwd);

const gitRoot = getRepoRoot(cwd);

const savedHashOfThisRepo = savedHashOfRepos[gitRoot] as {
[file: string]: string;
};

const pathRelativeToRepo = relative(gitRoot, cwd);

const filesInCwd = Object.keys(savedHashOfThisRepo).filter(
(o) => !relative(pathRelativeToRepo, o).startsWith("..")
);

return filesInCwd.reduce(
(acc, next) => ({
...acc,
[relative(pathRelativeToRepo, next).replace(
/\\/,
"/"
)]: savedHashOfThisRepo[next],
}),
{}
);
}

export interface ICacheStorage {
fetch: (hash: string) => Promise<boolean>;
put: (hash: string, outputGlob: string[]) => Promise<void>;
put: (hash: string, filesToCache: string[]) => Promise<void>;
}

export abstract class CacheStorage implements ICacheStorage {
Expand All @@ -17,26 +67,36 @@ export abstract class CacheStorage implements ICacheStorage {
tracer.stop();

this.logger.setHit(result);

// Save hash of files if not already memoized
fetchHashesFor(this.cwd);

return result;
}

public async put(hash: string, outputGlob: string[]): Promise<void> {
const tracer = this.logger.setTime("putTime");

const filesBeingCached = globby.sync(outputGlob, { cwd: this.cwd });
if (filesBeingCached.length === 0) {
throw new Error(
`Couldn't find any file on disk matching the output glob (${outputGlob.join(
", "
)})`
);
}
const filesMatchingOutputGlob = await globby(outputGlob, { cwd: this.cwd });

// Get the list of files that have not changed so we don't need to cache them.
const hashesNow = getPackageDeps(this.cwd).files;
const hashesThen = getMemoizedHashesFor(this.cwd);
const unchangedFiles = Object.keys(hashesThen).filter(
(s) => hashesThen[s] === hashesNow[s]
);

// Make this feature opt-in as it has not get been tested at scale
const excludeUnchanged = process.env["BACKFILL_EXCLUDE_UNCHANGED"] === "1";
const filesToCache = excludeUnchanged
? filesMatchingOutputGlob.filter((f) => !unchangedFiles.includes(f))
: filesMatchingOutputGlob;

await this._put(hash, outputGlob);
await this._put(hash, filesToCache);
tracer.stop();
}

protected abstract _fetch(hash: string): Promise<boolean>;

protected abstract _put(hash: string, outputGlob: string[]): Promise<void>;
protected abstract _put(hash: string, filesToCache: string[]): Promise<void>;
}
6 changes: 3 additions & 3 deletions packages/cache/src/LocalCacheStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,13 @@ export class LocalCacheStorage extends CacheStorage {
return true;
}

protected async _put(hash: string, outputGlob: string[]): Promise<void> {
protected async _put(hash: string, filesToCache: string[]): Promise<void> {
const localCacheFolder = this.getLocalCacheFolder(hash);

const files = globby.sync(outputGlob, { cwd: this.cwd });
await fs.mkdirp(localCacheFolder);

await Promise.all(
files.map(async (file) => {
filesToCache.map(async (file) => {
const destinationFolder = path.join(
localCacheFolder,
path.dirname(file)
Expand Down
2 changes: 1 addition & 1 deletion packages/cache/src/LocalSkipCacheStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export class LocalSkipCacheStorage extends CacheStorage {
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
protected async _put(hash: string, _outputGlob: string[]): Promise<void> {
protected async _put(hash: string, _filesToCache: string[]): Promise<void> {
const localCacheFolder = this.getLocalCacheFolder("skip-cache");
const hashFile = path.join(localCacheFolder, "hash");

Expand Down
6 changes: 3 additions & 3 deletions packages/cache/src/NpmCacheStorage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ export class NpmCacheStorage extends CacheStorage {
return true;
}

protected async _put(hash: string, outputGlob: string[]) {
protected async _put(hash: string, filesToCache: string[]) {
const { npmPackageName, registryUrl, npmrcUserconfig } = this.options;

const temporaryNpmOutputFolder = path.resolve(
Expand All @@ -102,10 +102,10 @@ export class NpmCacheStorage extends CacheStorage {
version: `0.0.0-${hash}`,
});

const files = await globby(outputGlob, { cwd: this.cwd });
await fs.mkdirp(temporaryNpmOutputFolder);

await Promise.all(
files.map(async (file) => {
filesToCache.map(async (file) => {
const destinationFolder = path.join(
temporaryNpmOutputFolder,
path.dirname(file)
Expand Down
27 changes: 3 additions & 24 deletions packages/cache/src/__tests__/LocalCacheStorage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ async function putInCache({
);
}

// This forces collection of the hashes before we change the files
await cacheStorage.fetch(hash);

if (expectSuccess) {
filesToCache.forEach((f) => createFileInFolder(fixtureLocation, f, false));
}
Expand Down Expand Up @@ -183,18 +186,6 @@ describe("LocalCacheStorage", () => {
});
});

it("will not persist cache when folder to cache does not exist", async () => {
await putInCache({
fixtureName: "basic",
hash: "811c319a73f988d9260fbf3f1d30f0f447c2a194",
expectSuccess: false,
outputGlob: ["lib/**", "dist/**"],
filesToCache: [],
errorMessage:
"Couldn't find any file on disk matching the output glob (lib/**, dist/**)",
});
});

it("will persist file matching glob in root folder", async () => {
await putInCache({
fixtureName: "basic",
Expand All @@ -204,18 +195,6 @@ describe("LocalCacheStorage", () => {
});
});

it("will not persist file excluded by a glob", async () => {
await putInCache({
fixtureName: "basic",
hash: "811c319a73f988d9260fbf3f1d30f0f447c2a194",
expectSuccess: false,
outputGlob: ["lib/**", "!lib/qwerty"],
filesToCache: ["lib/qwerty"],
errorMessage:
"Couldn't find any file on disk matching the output glob (lib/**, !lib/qwerty)",
});
});

it("will persist file when others are excluded in the same folder", async () => {
await putInCache({
fixtureName: "basic",
Expand Down
7 changes: 7 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1667,6 +1667,13 @@
resolved "https://registry.yarnpkg.com/@types/events/-/events-1.2.0.tgz#81a6731ce4df43619e5c8c945383b3e62a89ea86"
integrity sha512-KEIlhXnIutzKwRbQkGWb/I4HFqBuUykAdHgDED6xqwXJfONCjF5VoE0cXEiurh3XauygxzeDzgtXUqvLkxFzzA==

"@types/fs-extra@^8.0.0":
version "8.1.1"
resolved "https://registry.yarnpkg.com/@types/fs-extra/-/fs-extra-8.1.1.tgz#1e49f22d09aa46e19b51c0b013cb63d0d923a068"
integrity sha512-TcUlBem321DFQzBNuz8p0CLLKp0VvF/XH9E4KHNmgwyp4E3AfgI5cjiIVZWlbfThBop2qxFIh4+LeY6hVWWZ2w==
dependencies:
"@types/node" "*"

"@types/fs-extra@^9.0.11":
version "9.0.11"
resolved "https://registry.yarnpkg.com/@types/fs-extra/-/fs-extra-9.0.11.tgz#8cc99e103499eab9f347dbc6ca4e99fb8d2c2b87"
Expand Down

0 comments on commit 3df30b0

Please sign in to comment.