Skip to content

Commit edc6f86

Browse files
authored
quick_mounts_purge (#3344)
* quick_mounts_purge * self.assertRaises * address @charles-cowart comments
1 parent b021f8a commit edc6f86

File tree

3 files changed

+82
-2
lines changed

3 files changed

+82
-2
lines changed

qiita_db/test/test_util.py

+6
Original file line numberDiff line numberDiff line change
@@ -1291,6 +1291,12 @@ def test_purge_filepaths_test(self):
12911291
fps_viewed = self._get_current_filepaths()
12921292
self.assertCountEqual(fps_expected, fps_viewed)
12931293

1294+
def test_quick_mounts_purge(self):
1295+
# one of the tests creates a conflicting artifact_type so this test
1296+
# will always raise this ValueError
1297+
with self.assertRaises(ValueError):
1298+
qdb.util.quick_mounts_purge()
1299+
12941300

12951301
STUDY_INFO = {
12961302
'study_id': 1,

qiita_db/util.py

+69-1
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,14 @@
4949
from bcrypt import hashpw, gensalt
5050
from functools import partial
5151
from os.path import join, basename, isdir, exists, getsize
52-
from os import walk, remove, listdir, rename
52+
from os import walk, remove, listdir, rename, stat
5353
from glob import glob
5454
from shutil import move, rmtree, copy as shutil_copy
5555
from openpyxl import load_workbook
5656
from tempfile import mkstemp
5757
from csv import writer as csv_writer
5858
from datetime import datetime
59+
from time import time as now
5960
from itertools import chain
6061
from contextlib import contextmanager
6162
import h5py
@@ -896,6 +897,73 @@ def purge_filepaths(delete_files=True):
896897
qdb.sql_connection.TRN.execute()
897898

898899

900+
def quick_mounts_purge():
901+
r"""This is a quick mount purge as it only slightly relies on the database
902+
903+
Notes
904+
-----
905+
Currently we delete anything older than 30 days that is not linked
906+
to the database. This number is intentionally hardcoded in the code.
907+
At the time of this writing this number seem high but keeping it
908+
this way to be safe. In the future, if needed, it can be changed.
909+
"""
910+
with qdb.sql_connection.TRN:
911+
main_sql = """SELECT data_directory_id FROM qiita.artifact_type at
912+
LEFT JOIN qiita.data_directory dd ON (
913+
dd.data_type = at.artifact_type)
914+
WHERE subdirectory = true"""
915+
qdb.sql_connection.TRN.add(main_sql)
916+
mp_ids = qdb.sql_connection.TRN.execute_fetchflatten()
917+
mounts = [qdb.util.get_mountpoint_path_by_id(x) for x in mp_ids]
918+
folders = [join(x, f) for x in mounts for f in listdir(x)
919+
if f.isnumeric()]
920+
921+
# getting all unlinked folders
922+
to_delete = []
923+
for i, f in enumerate(folders):
924+
vals = f.split('/')
925+
aid = int(vals[-1])
926+
artifact_type = vals[-2]
927+
if artifact_type == 'FeatureData[Taxonomy]':
928+
continue
929+
930+
try:
931+
a = qdb.artifact.Artifact(aid)
932+
except qdb.exceptions.QiitaDBUnknownIDError:
933+
to_delete.append(f)
934+
continue
935+
if not a.artifact_type.startswith(artifact_type):
936+
raise ValueError('Review artifact type: '
937+
f'{a.id} {artifact_type} {a.artifact_type}')
938+
939+
# now, let's just keep those older than 30 days (in seconds)
940+
ignore = now() - (30*86400)
941+
to_keep = [x for x in to_delete if stat(x).st_mtime >= ignore]
942+
to_delete = set(to_delete) - set(to_keep)
943+
944+
# get stats to report
945+
stats = dict()
946+
for td in to_delete:
947+
f = td.split('/')[-2]
948+
if f not in stats:
949+
stats[f] = 0
950+
stats[f] += sum([getsize(join(p, fp)) for p, ds, fs in walk(td)
951+
for fp in fs])
952+
953+
report = ['----------------------']
954+
for f, s in stats.items():
955+
report.append(f'{f}\t{naturalsize(s)}')
956+
report.append(
957+
f'Total files {len(to_delete)} {naturalsize(sum(stats.values()))}')
958+
report.append('----------------------')
959+
960+
for td in list(to_delete):
961+
if exists(td):
962+
rmtree(td)
963+
964+
return '\n'.join(report)
965+
966+
899967
def _rm_exists(fp, obj, _id, delete_files):
900968
try:
901969
_id = int(_id)

scripts/qiita-cron-job

+7-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ import click
1212

1313
from qiita_db.util import (
1414
purge_filepaths as qiita_purge_filepaths,
15-
empty_trash_upload_folder as qiita_empty_trash_upload_folder)
15+
empty_trash_upload_folder as qiita_empty_trash_upload_folder,
16+
quick_mounts_purge as qiita_quick_mounts_purge)
1617
from qiita_db.meta_util import (
1718
update_redis_stats as qiita_update_redis_stats,
1819
generate_biom_and_metadata_release as
@@ -62,5 +63,10 @@ def generate_plugin_releases():
6263
qiita_generate_plugin_releases()
6364

6465

66+
@commands.command()
67+
def quick_mounts_purge():
68+
print(qiita_quick_mounts_purge())
69+
70+
6571
if __name__ == "__main__":
6672
commands()

0 commit comments

Comments
 (0)