Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 52 additions & 45 deletions rfi_file_monitor/engines/file_watchdog_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
exported_filetype,
with_advanced_settings,
with_pango_docs,
do_bulk_upload,
)
from .file_watchdog_engine_advanced_settings import (
FileWatchdogEngineAdvancedSettings,
Expand Down Expand Up @@ -125,39 +126,57 @@ def should_exit(self, value: bool):

def _search_for_existing_files(self, directory: Path) -> List[RegularFile]:
rv: List[RegularFile] = list()
for child in directory.iterdir():
if (
child.is_file()
and not child.is_symlink()
and match_path(
child,
path_tree = os.walk(directory)
for root, dirs, files in path_tree:
for fname in files:
if not Path(fname).is_symlink() and match_path(
Path(fname),
included_patterns=self._included_patterns,
excluded_patterns=self._excluded_patterns,
case_sensitive=False,
)
):
):
file_path = Path(os.path.join(root, fname))
relative_file_path = file_path.relative_to(
self.params.monitored_directory
)
_file = RegularFile(
str(file_path),
relative_file_path,
get_file_creation_timestamp(file_path),
FileStatus.SAVED,
)
rv.append(_file)
GLib.idle_add(
self._engine._appwindow._queue_manager.get_total_files_in_path,
len(rv),
priority=GLib.PRIORITY_DEFAULT_IDLE,
)

file_path = directory.joinpath(child)
relative_file_path = file_path.relative_to(
self.params.monitored_directory
)
_file = RegularFile(
str(file_path),
relative_file_path,
get_file_creation_timestamp(file_path),
FileStatus.SAVED,
)
rv.append(_file)
elif (
self.params.monitor_recursively
and child.is_dir()
and not child.is_symlink()
):
rv.extend(
self._search_for_existing_files(directory.joinpath(child))
)
return rv

@do_bulk_upload
def process_existing_files(self, existing_files):
try:
GLib.idle_add(
self._engine._appwindow._queue_manager.add,
existing_files,
priority=GLib.PRIORITY_DEFAULT_IDLE,
)
except Exception as e:
self._engine.cleanup()
GLib.idle_add(
self._engine.abort,
self._task_window,
e,
priority=GLib.PRIORITY_HIGH,
)
GLib.idle_add(
self._engine.kill_task_window,
self._task_window,
priority=GLib.PRIORITY_HIGH,
)
return

def run(self):
# confirm patterns are valid
if bool(
Expand All @@ -180,24 +199,12 @@ def run(self):
self._task_window.set_text,
"<b>Processing existing files...</b>",
)
try:
existing_files = self._search_for_existing_files(
Path(self.params.monitored_directory)
)
GLib.idle_add(
self._engine._appwindow._queue_manager.add,
existing_files,
priority=GLib.PRIORITY_DEFAULT_IDLE,
)
except Exception as e:
self._engine.cleanup()
GLib.idle_add(
self._engine.abort,
self._task_window,
e,
priority=GLib.PRIORITY_HIGH,
)
return

existing_files = self._search_for_existing_files(
Path(self.params.monitored_directory)
)
self.process_existing_files(existing_files)
return

# if we get here, things should be working.
# close task_window
Expand Down
6 changes: 5 additions & 1 deletion rfi_file_monitor/queue_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(self, appwindow):
self._files_dict: OrderedDictType[str, File] = OrderedDict()
self._jobs_list: Final[List[Job]] = list()
self._njobs_running: int = 0
self._total_files_in_path: int = 0

kwargs = dict(
halign=Gtk.Align.FILL,
Expand Down Expand Up @@ -453,6 +454,9 @@ def stop(self):
self._running = False
self.notify("running")

def get_total_files_in_path(self, number_of_files: int):
self._total_files_in_path = number_of_files

def _files_dict_timeout_cb(self, *user_data):
"""
This function runs every second, and will take action based on the status of all files in the dict
Expand Down Expand Up @@ -563,7 +567,7 @@ def _files_dict_timeout_cb(self, *user_data):
# update status bar
self._appwindow._status_grid.get_child_at(
0, 0
).props.label = f"Total: {len(self._files_dict)}"
).props.label = f"Total: {self._total_files_in_path }"
for _status, _counter in status_counters.items():
self._appwindow._status_grid.get_child_at(
int(_status), 0
Expand Down
40 changes: 40 additions & 0 deletions rfi_file_monitor/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import collections.abc
import functools
import threading
from time import sleep
from ..file import FileStatus

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -187,3 +189,41 @@ def wrapper(self: Operation, file: File):
raise NotImplementedError(f"{type(file)} is currently unsupported")

return wrapper


def do_bulk_upload(process_existing_files: Callable[List]):
@functools.wraps(process_existing_files)
def wrapper(self: Engine, existing_files: List):

chunk_size = 2000
if len(existing_files) > chunk_size:

# do not like this hard coded value but it is empirically derived -
# this is the max number of files that a queue can take without a long wait for users working on a standard
# size machine with 8CPU 8G RAM
chunked_input = [
existing_files[i : i + chunk_size]
for i in range(0, len(existing_files), chunk_size)
]
n = 1
processed_files = 0
for rv in chunked_input:
# chunk_weight = sum(
# [Path(file.filename).stat().st_size for file in rv]
# )
process_existing_files(self, rv)

while (
processed_files < chunk_size * n * 0.9
): # refresh the list when we are at 90% of the size
processed_files = sum(
[
item
for item in self._engine._appwindow._queue_manager._files_dict.values()
if item.status == FileStatus.SUCCESS
]
)
sleep(1)
n = n + 1

return wrapper
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ rfi_file_monitor.files =
WeightedRegularFile = rfi_file_monitor.files.regular_file:WeightedRegularFile
S3Object = rfi_file_monitor.files.s3_object:S3Object
Directory = rfi_file_monitor.files.directory:Directory
gui_scripts =
console_scripts =
rfi-file-monitor = rfi_file_monitor:main

[bdist_wheel]
Expand Down