Skip to content

Commit da66533

Browse files
committed
Delete stale cache files on update
This actually addresses #66, though not in the way that I originall expected. Instead of forcing pkgfile to parse through pacman.conf on every search/list invocation, cleanup bad cache entries when performing an update. This way we get eventual consistency with pacman.conf.
1 parent 6f0fee8 commit da66533

File tree

9 files changed

+104
-59
lines changed

9 files changed

+104
-59
lines changed

meson.build

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,12 +180,13 @@ if gtest.found() and gmock.found()
180180
gtest_main = static_library('gtest_main', 'src/test/gtest_main.cc')
181181

182182
test(
183-
'filter_test',
183+
'unit_tests',
184184
executable(
185-
'filter_test',
185+
'unit_tests',
186186
files(
187187
'''
188188
src/filter_test.cc
189+
src/repo_test.cc
189190
'''.split(),
190191
),
191192
link_with: [libcommon, gtest_main],

src/archive_converter.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ bool ArchiveConverter::Finalize() {
137137
for (int i = chunk_number_ + 1;; ++i) {
138138
std::string path = MakeArchiveChunkFilename(base_filename_out_, i, false);
139139

140-
if (unlink(path.c_str()) != 0) {
140+
std::error_code ec;
141+
if (!fs::remove(path, ec)) {
141142
break;
142143
}
143144
}

src/pkgfile.cc

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "compress.hh"
1717
#include "filter.hh"
1818
#include "queue.hh"
19+
#include "repo.hh"
1920
#include "result.hh"
2021
#include "update.hh"
2122

@@ -323,28 +324,6 @@ std::unique_ptr<filter::Filter> Pkgfile::BuildFilterFromOptions(
323324
return filter;
324325
}
325326

326-
// Repo files should always be of the form ${reponame}.files.nnn where 'n' is an
327-
// 0-indexed, zero-padded, increasing integer
328-
bool HasRepoSuffix(std::string_view path) {
329-
const auto ndots = std::count(path.begin(), path.end(), '.');
330-
if (ndots != 2) {
331-
return false;
332-
}
333-
334-
auto pos = path.rfind('.');
335-
if (!path.substr(0, pos).ends_with(".files")) {
336-
return false;
337-
}
338-
339-
for (++pos; pos < path.size(); ++pos) {
340-
if (!isdigit(path[pos])) {
341-
return false;
342-
}
343-
}
344-
345-
return true;
346-
}
347-
348327
// static
349328
Pkgfile::RepoMap Pkgfile::DiscoverRepos(std::string_view cachedir,
350329
std::error_code& ec) {
@@ -355,7 +334,7 @@ Pkgfile::RepoMap Pkgfile::DiscoverRepos(std::string_view cachedir,
355334
// they're lexicographically ordered.
356335
for (const auto& entry : fs::directory_iterator(cachedir, ec)) {
357336
const std::string pathname = entry.path();
358-
if (!entry.is_regular_file() || !HasRepoSuffix(pathname)) {
337+
if (!entry.is_regular_file() || !FilenameHasRepoSuffix(pathname)) {
359338
continue;
360339
}
361340

src/repo.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,4 +166,24 @@ Repo::~Repo() {
166166
}
167167
}
168168

169+
bool FilenameHasRepoSuffix(std::string_view path) {
170+
const auto ndots = std::count(path.begin(), path.end(), '.');
171+
if (ndots != 2) {
172+
return false;
173+
}
174+
175+
auto pos = path.rfind('.');
176+
if (!path.substr(0, pos).ends_with(".files")) {
177+
return false;
178+
}
179+
180+
for (++pos; pos < path.size(); ++pos) {
181+
if (!isdigit(path[pos])) {
182+
return false;
183+
}
184+
}
185+
186+
return true;
187+
}
188+
169189
// vim: set ts=2 sw=2 et:

src/repo.hh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,8 @@ struct AlpmConfig {
6161
std::string architecture;
6262
};
6363

64+
// Verifies that the given file path has a format of "${reponame}.files.nnn"
65+
// where 'n' is an 0-indexed, zero-padded, increasing integer.
66+
bool FilenameHasRepoSuffix(std::string_view path);
67+
6468
// vim: set ts=2 sw=2 et:

src/update.cc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <sys/utsname.h>
1111

1212
#include <algorithm>
13+
#include <filesystem>
1314
#include <format>
1415

1516
#include "archive_converter.hh"
@@ -18,6 +19,7 @@
1819
#include "repo.hh"
1920

2021
namespace chrono = std::chrono;
22+
namespace fs = std::filesystem;
2123

2224
auto now = chrono::system_clock::now;
2325

@@ -254,6 +256,24 @@ bool Updater::RepackRepoData(const struct Repo* repo) {
254256
return converter != nullptr && converter->RewriteArchive();
255257
}
256258

259+
void Updater::TidyCacheDir(const std::set<std::string>& known_repos) {
260+
std::error_code ec;
261+
for (const auto& entry : fs::directory_iterator(cachedir_, ec)) {
262+
const fs::path filename = entry.path().filename();
263+
const fs::path reponame = entry.path().stem().stem();
264+
265+
if (!FilenameHasRepoSuffix(filename.c_str()) ||
266+
!known_repos.contains(reponame)) {
267+
std::error_code ec;
268+
fs::remove(entry, ec);
269+
if (ec.value() != 0) {
270+
fprintf(stderr, "warning: failed to remove stale cache file: %s\n",
271+
entry.path().c_str());
272+
}
273+
}
274+
}
275+
}
276+
257277
void Updater::DownloadWaitLoop(CURLM* multi) {
258278
int active_handles;
259279

@@ -417,6 +437,13 @@ int Updater::Update(const std::string& alpm_config_file, bool force) {
417437
ret = 1;
418438
}
419439

440+
std::set<std::string> known_repos;
441+
for (const auto& repo : alpm_config.repos) {
442+
known_repos.insert(repo.name);
443+
}
444+
445+
TidyCacheDir(known_repos);
446+
420447
return ret;
421448
}
422449

src/update.hh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
#include <curl/curl.h>
44

5+
#include <set>
6+
57
#include "pkgfile.hh"
68
#include "repo.hh"
79

@@ -19,6 +21,7 @@ class Updater {
1921
void DownloadWaitLoop(CURLM* multi);
2022
int DownloadCheckComplete(CURLM* multi, int remaining);
2123
bool RepackRepoData(const struct Repo* repo);
24+
void TidyCacheDir(const std::set<std::string>& known_repos);
2225

2326
std::string cachedir_;
2427
int compress_;

tests/pkgfile_test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import tempfile
99
import time
1010
import unittest
11+
from pathlib import Path
1112

1213

1314
def FindMesonBuildDir():
@@ -46,7 +47,7 @@ class TestCase(unittest.TestCase):
4647
def setUp(self):
4748
self.build_dir = FindMesonBuildDir()
4849
self._tempdir = tempfile.TemporaryDirectory()
49-
self.tempdir = self._tempdir.name
50+
self.tempdir = Path(self._tempdir.name)
5051
self.cachedir = os.path.join(
5152
os.path.dirname(os.path.realpath(__file__)), 'golden/pkgfile')
5253
self.alpmcachedir = os.path.join(

tests/update.py

Lines changed: 41 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pkgfile_test
66
import os
77
from collections import defaultdict
8+
from pathlib import Path
89

910

1011
def _sha256(path):
@@ -23,12 +24,12 @@ def setUp(self):
2324

2425
# relocate our cachedir to not overwrite the golden fileset
2526
self.goldendir = self.cachedir
26-
self.cachedir = os.path.join(self.tempdir, 'cache')
27-
os.mkdir(self.cachedir)
27+
self.cachedir = Path(self.tempdir, 'cache')
28+
self.cachedir.mkdir()
2829

2930
@staticmethod
3031
def getRepoFiles(subdir, reponame):
31-
return sorted(glob.glob('{}/{}.files.*'.format(subdir, reponame)))
32+
return sorted(Path(subdir).glob(f'{reponame}.files.*'))
3233

3334
def assertMatchesGolden(self, reponame):
3435
golden_files = self.getRepoFiles(self.goldendir, reponame)
@@ -48,15 +49,14 @@ def testUpdate(self):
4849
self.assertMatchesGolden('testing')
4950

5051
for repo in ('multilib', 'testing'):
51-
original_repo = '{}/x86_64/{repo}/{repo}.files'.format(
52-
self.alpmcachedir, repo=repo)
52+
original_repo = Path(self.alpmcachedir, 'x86_64', repo, f'{repo}.files')
5353

5454
for converted_repo in self.getRepoFiles(self.cachedir, repo):
5555
# Only compare the integer portion of the mtime. we'll only ever
5656
# get back second precision from a remote server, so any fractional
5757
# second that's present on our golden repo can be ignored.
58-
self.assertEqual(int(os.stat(original_repo).st_mtime),
59-
int(os.stat(converted_repo).st_mtime))
58+
self.assertEqual(int(original_repo.stat().st_mtime),
59+
int(converted_repo.stat().st_mtime))
6060

6161
def testUpdateForcesUpdates(self):
6262
r = self.Pkgfile(['-u'])
@@ -65,17 +65,15 @@ def testUpdateForcesUpdates(self):
6565
inodes_before = {}
6666
for r in ('multilib', 'testing'):
6767
for repofile in self.getRepoFiles(self.cachedir, r):
68-
inodes_before[os.path.basename(repofile)] = os.stat(
69-
repofile).st_ino
68+
inodes_before[repofile.name] = repofile.stat().st_ino
7069

7170
r = self.Pkgfile(['-uu'])
7271
self.assertEqual(r.returncode, 0)
7372

7473
inodes_after = {}
7574
for r in ('multilib', 'testing'):
7675
for repofile in self.getRepoFiles(self.cachedir, r):
77-
inodes_after[os.path.basename(repofile)] = os.stat(
78-
repofile).st_ino
76+
inodes_after[repofile.name] = repofile.stat().st_ino
7977

8078
for r in ('multilib', 'testing'):
8179
self.assertNotEqual(
@@ -87,25 +85,25 @@ def testUpdateSkipsUpToDate(self):
8785
r = self.Pkgfile(['-u'])
8886
self.assertEqual(r.returncode, 0)
8987

90-
# gather mtimes
88+
# gather inodes before the update
9189
inodes_before = defaultdict(dict)
9290
for r in ('multilib', 'testing'):
9391
for repofile in self.getRepoFiles(self.cachedir, r):
94-
inodes_before[r][repofile] = os.stat(repofile).st_ino
92+
inodes_before[r][repofile] = repofile.stat().st_ino
9593

9694
# set the mtime to the epoch, expect that it gets rewritten on next update
97-
os.utime(os.path.join(self.cachedir, 'testing.files.000'), (0, 0))
95+
os.utime(self.cachedir / 'testing.files.000', (0, 0))
9896

9997
r = self.Pkgfile(['-u'])
10098
self.assertEqual(r.returncode, 0)
10199

102-
# re-gather mtimes after a soft update
100+
# re-gather inodes after a soft update
103101
inodes_after = defaultdict(dict)
104102
for r in ('multilib', 'testing'):
105103
for repofile in self.getRepoFiles(self.cachedir, r):
106-
inodes_after[r][repofile] = os.stat(repofile).st_ino
104+
inodes_after[r][repofile] = repofile.stat().st_ino
107105

108-
# compare to mtimes after
106+
# compare inodes
109107
self.assertEqual(
110108
inodes_before['multilib'],
111109
inodes_after['multilib'],
@@ -117,53 +115,64 @@ def testUpdateSkipsUpToDate(self):
117115
msg='testing.files unexpectedly NOT rewritten by `pkgfile -u`')
118116

119117
def testUpdateSkipsBadServer(self):
120-
with open(os.path.join(self.tempdir, 'pacman.conf'), 'w') as f:
121-
f.write('''
118+
Path(self.tempdir / 'pacman.conf').write_text(f'''
122119
[options]
123120
Architecture = x86_64
124121
125122
[testing]
126-
Server = {fakehttp_server}/$arch/$repo/404
127-
Server = {fakehttp_server}/$arch/$repo
123+
Server = {self.baseurl}/$arch/$repo/404
124+
Server = {self.baseurl}/$arch/$repo
128125
129126
[multilib]
130-
Server = {fakehttp_server}/$arch/$repo
131-
'''.format(fakehttp_server=self.baseurl))
127+
Server = {self.baseurl}/$arch/$repo
128+
''')
132129

133130
r = self.Pkgfile(['-u'])
134131
self.assertEqual(r.returncode, 0)
135132

136133
def testUpdateFailsWhenExhaustingServers(self):
137-
with open(os.path.join(self.tempdir, 'pacman.conf'), 'w') as f:
138-
f.write('''
134+
Path(self.tempdir, 'pacman.conf').write_text(f'''
139135
[options]
140136
Architecture = x86_64
141137
142138
[testing]
143-
Server = {fakehttp_server}/$arch/$repo/404
139+
Server = {self.baseurl}/$arch/$repo/404
144140
145141
[multilib]
146-
Server = {fakehttp_server}/$arch/$repo
147-
'''.format(fakehttp_server=self.baseurl))
142+
Server = {self.baseurl}/$arch/$repo
143+
''')
148144

149145
r = self.Pkgfile(['-u'])
150146
self.assertNotEqual(r.returncode, 0)
151147

152148
def testUpdateCleansUpOldRepoChunks(self):
153149
r = self.Pkgfile(['-uu', '--repochunkbytes=5000'])
154150
self.assertEqual(r.returncode, 0)
155-
small_chunks = set(glob.glob('{}/testing.files*'.format(
156-
self.cachedir)))
151+
small_chunks = set(Path(self.cachedir).glob('testing.files*'))
157152

158153
# update again, creating ~half as many repo files
159154
r = self.Pkgfile(['-uu', '--repochunkbytes=200000'])
160155
self.assertEqual(r.returncode, 0)
161-
large_chunks = set(glob.glob('{}/testing.files*'.format(
162-
self.cachedir)))
156+
large_chunks = set(Path(self.cachedir).glob('testing.files*'))
163157

164158
# the 100k chunked fileset is a strict subset of the original 5k chunked fileset.
165159
self.assertLess(large_chunks, small_chunks)
166160

161+
def testUpdateRemovesUnknownRepos(self):
162+
expected_removed = (
163+
self.cachedir / "garbage.files",
164+
self.cachedir / "deletemebro.files.000",
165+
)
166+
167+
for p in expected_removed:
168+
p.touch()
169+
170+
r = self.Pkgfile(['-u'])
171+
self.assertEqual(r.returncode, 0)
172+
173+
for p in expected_removed:
174+
self.assertFalse(p.exists(), msg='{p} still exists, expected deleted')
175+
167176

168177
if __name__ == '__main__':
169178
pkgfile_test.main()

0 commit comments

Comments
 (0)