feat(store-folder): Add recursiveness and ignore

jofaval · Oct 2, 2022 · ae77716 · ae77716
1 parent 9735c7e
commit ae77716
Showing 1 changed file with 105 additions and 26 deletions.
diff --git a/python/store-folder-state.py b/python/store-folder-state.py
@@ -1,21 +1,30 @@
+from multiprocessing import cpu_count, Pool
 import glob
 from os import listdir, stat
 from os.path import dirname, join
 from datetime import datetime
-from typing import List
+from typing import List, Tuple
+import re
 
 """
 # CHANGELOG #
 
-## 2022-06-25
+# 2022-10-02
 
-### Added
+# Added
+
+- Implemented ignore functionality, with regex patterns.
+- Implemented multithreading functionalities
+
+# 2022-06-25
+
+# Added
 
 - Implemented the recursive functionality.
 
-## 2022-06-24
+# 2022-06-24
 
-### Added
+# Added
 
 - Created and developed the basic prototype to store the state of a messy folder.
 - Store datetime in the folder.
@@ -24,6 +33,13 @@
 
 BASEPATH = dirname(__file__)
 
+IGNORE_REGEX = [
+    re.compile(r'/node_modules/i'),
+]
+
+MULTITHREAD_WORKERS = cpu_count() - 1
+FILES_COLLECTOR: List[str] = list()
+
 
 def format_datetime(
     date: datetime = None,
@@ -69,30 +85,61 @@ def get_filename(
 
 
 def prepare_files(
-    filename: str
+    data: Tuple[List[str], str]
 ) -> str:
     """
     Prepares the files with their sizes and last modification date
 
-    filename : str
-        The name of the file to evaluate
+    data: Tuple[list, str]
+        collector : List[str]
+            The array with filenames to append the result to
+        filename : str
+            The name of the file to evaluate
 
     returns str
     """
-    absolute_path = join(BASEPATH, filename)
-    filedetails = stat(absolute_path)
-    filesize = filedetails.st_size / 1024 / 1024
-    creation_date = format_datetime(
-        timestamp=filedetails.st_ctime,
-        format='%d/%m/%y %H:%M:%S'
-    )
-    update_date = format_datetime(
-        timestamp=filedetails.st_atime,
-        format='%d/%m/%y %H:%M:%S'
-    )
-
-    real_filename = absolute_path.replace(join(BASEPATH, ''), '')
-    return f'{real_filename} - {round(filesize, 2)} MB - CREATION: {creation_date} - MODIFICATION: {update_date}'
+    global FILES_COLLECTOR
+    _, filename = data
+    try:
+        absolute_path = join(BASEPATH, filename)
+        filedetails = stat(absolute_path)
+        filesize = filedetails.st_size / 1024 / 1024
+        creation_date = format_datetime(
+            timestamp=filedetails.st_ctime,
+            format='%d/%m/%y %H:%M:%S'
+        )
+        update_date = format_datetime(
+            timestamp=filedetails.st_atime,
+            format='%d/%m/%y %H:%M:%S'
+        )
+
+        real_filename = absolute_path.replace(join(BASEPATH, ''), '')
+        processed_filename = f'{real_filename} - {round(filesize, 2)} MB - CREATION: {creation_date} - MODIFICATION: {update_date}'
+        # FILES_COLLECTOR.append(processed_filename)
+
+        filename = get_filename()
+        full_filename_path = join(BASEPATH, filename)
+        with open(full_filename_path, 'a+') as file_writer:
+            file_writer.write(processed_filename)
+            file_writer.write('\n')
+    except:
+        return f'Could not compute "{filename}"'
+
+
+def filter_files(filename: str) -> bool:
+    """
+    Filters out all of the files that should be ignored
+
+    filename: str
+        The filename to evaluate
+
+    returns bool
+    """
+    for path_to_ignore in IGNORE_REGEX:
+        if path_to_ignore.search(filename):
+            return False
+
+    return True
 
 
 def get_files_list(
@@ -120,20 +167,52 @@ def get_files_list(
     return files
 
 
-def main() -> None:
+def main(
+    bulk_write: bool = False
+) -> None:
     """
     Stores the actual state of the files at the root level of this file's folder
 
+    bulk_write : bool
+        Not recomended for large chunks of files,
+        it may be risky and there may be information loss due to crashes.
+        False by default
+
     returns None
     """
+    global FILES_COLLECTOR
 
     filename = get_filename()
     full_filename_path = join(BASEPATH, filename)
-    print(f'Stored at: "{full_filename_path}"')
+    print(f'Will be stored at: "{full_filename_path}"')
+
+    with open(full_filename_path, 'w+') as file_writer:
+        file_writer.write('')
+
+    scanned_files: Tuple[List[str], str] = [
+        (None, file)
+        for file in get_files_list(BASEPATH, recursive=True)
+        if filter_files(file)
+    ]
+    print('Files to evaluate', len(list(scanned_files)))
+
+    with Pool(MULTITHREAD_WORKERS) as executor:
+        executor.map(prepare_files, scanned_files)
+
+    global FILES_COLLECTOR
+    print(len(FILES_COLLECTOR))
+    if not FILES_COLLECTOR:
+        print('No files could be retrieved')
+        return
 
-    files = map(prepare_files, get_files_list(BASEPATH, recursive=True))
+    # preapred_files = map(prepare_files, scanned_files)
     with open(full_filename_path, 'w+') as file_writer:
-        file_writer.write('\n'.join(files))
+        if not bulk_write:
+            file_writer.write('\n'.join(FILES_COLLECTOR))
+        else:
+            for line in FILES_COLLECTOR:
+                file_writer.write(line)
+                file_writer.write('\n')
 
 
 if __name__ == '__main__':