|
49 | 49 | from bcrypt import hashpw, gensalt
|
50 | 50 | from functools import partial
|
51 | 51 | from os.path import join, basename, isdir, exists, getsize
|
52 |
| -from os import walk, remove, listdir, rename |
| 52 | +from os import walk, remove, listdir, rename, stat |
53 | 53 | from glob import glob
|
54 | 54 | from shutil import move, rmtree, copy as shutil_copy
|
55 | 55 | from openpyxl import load_workbook
|
56 | 56 | from tempfile import mkstemp
|
57 | 57 | from csv import writer as csv_writer
|
58 | 58 | from datetime import datetime
|
| 59 | +from time import time as now |
59 | 60 | from itertools import chain
|
60 | 61 | from contextlib import contextmanager
|
61 | 62 | import h5py
|
@@ -896,6 +897,73 @@ def purge_filepaths(delete_files=True):
|
896 | 897 | qdb.sql_connection.TRN.execute()
|
897 | 898 |
|
898 | 899 |
|
| 900 | +def quick_mounts_purge(): |
| 901 | + r"""This is a quick mount purge as it only slightly relies on the database |
| 902 | +
|
| 903 | + Notes |
| 904 | + ----- |
| 905 | + Currently we delete anything older than 30 days that is not linked |
| 906 | + to the database. This number is intentionally hardcoded in the code. |
| 907 | + At the time of this writing this number seem high but keeping it |
| 908 | + this way to be safe. In the future, if needed, it can be changed. |
| 909 | + """ |
| 910 | + with qdb.sql_connection.TRN: |
| 911 | + main_sql = """SELECT data_directory_id FROM qiita.artifact_type at |
| 912 | + LEFT JOIN qiita.data_directory dd ON ( |
| 913 | + dd.data_type = at.artifact_type) |
| 914 | + WHERE subdirectory = true""" |
| 915 | + qdb.sql_connection.TRN.add(main_sql) |
| 916 | + mp_ids = qdb.sql_connection.TRN.execute_fetchflatten() |
| 917 | + mounts = [qdb.util.get_mountpoint_path_by_id(x) for x in mp_ids] |
| 918 | + folders = [join(x, f) for x in mounts for f in listdir(x) |
| 919 | + if f.isnumeric()] |
| 920 | + |
| 921 | + # getting all unlinked folders |
| 922 | + to_delete = [] |
| 923 | + for i, f in enumerate(folders): |
| 924 | + vals = f.split('/') |
| 925 | + aid = int(vals[-1]) |
| 926 | + artifact_type = vals[-2] |
| 927 | + if artifact_type == 'FeatureData[Taxonomy]': |
| 928 | + continue |
| 929 | + |
| 930 | + try: |
| 931 | + a = qdb.artifact.Artifact(aid) |
| 932 | + except qdb.exceptions.QiitaDBUnknownIDError: |
| 933 | + to_delete.append(f) |
| 934 | + continue |
| 935 | + if not a.artifact_type.startswith(artifact_type): |
| 936 | + raise ValueError('Review artifact type: ' |
| 937 | + f'{a.id} {artifact_type} {a.artifact_type}') |
| 938 | + |
| 939 | + # now, let's just keep those older than 30 days (in seconds) |
| 940 | + ignore = now() - (30*86400) |
| 941 | + to_keep = [x for x in to_delete if stat(x).st_mtime >= ignore] |
| 942 | + to_delete = set(to_delete) - set(to_keep) |
| 943 | + |
| 944 | + # get stats to report |
| 945 | + stats = dict() |
| 946 | + for td in to_delete: |
| 947 | + f = td.split('/')[-2] |
| 948 | + if f not in stats: |
| 949 | + stats[f] = 0 |
| 950 | + stats[f] += sum([getsize(join(p, fp)) for p, ds, fs in walk(td) |
| 951 | + for fp in fs]) |
| 952 | + |
| 953 | + report = ['----------------------'] |
| 954 | + for f, s in stats.items(): |
| 955 | + report.append(f'{f}\t{naturalsize(s)}') |
| 956 | + report.append( |
| 957 | + f'Total files {len(to_delete)} {naturalsize(sum(stats.values()))}') |
| 958 | + report.append('----------------------') |
| 959 | + |
| 960 | + for td in list(to_delete): |
| 961 | + if exists(td): |
| 962 | + rmtree(td) |
| 963 | + |
| 964 | + return '\n'.join(report) |
| 965 | + |
| 966 | + |
899 | 967 | def _rm_exists(fp, obj, _id, delete_files):
|
900 | 968 | try:
|
901 | 969 | _id = int(_id)
|
|
0 commit comments