Skip to content

Commit fe4ab37

Browse files
committed
desktop app fixed to new setup
1 parent 03627df commit fe4ab37

File tree

6 files changed

+64
-31
lines changed

6 files changed

+64
-31
lines changed

desktop_app/README.md

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,42 @@ This folder contains a Tkinter-based desktop application that mirrors the origin
1111
2. Install project dependencies if you have not already:
1212
```bash
1313
python -m pip install -r requirements.txt
14-
playwright install
14+
playwright install chromium
1515
```
16-
3. Start the desktop app:
16+
3. Start the desktop app using the launch script (recommended):
1717
```bash
18-
python desktop_app/task_collector_app.py
18+
./desktop_app/launch_task_collector.sh
19+
```
20+
21+
Or manually with the correct PYTHONPATH:
22+
```bash
23+
cd /path/to/web-environments
24+
source .venv/bin/activate
25+
PYTHONPATH="$(pwd)/src:$(pwd):$PYTHONPATH" python desktop_app/task_collector_app.py
1926
```
2027

2128
The GUI will prompt for the same inputs as the CLI (source, task type, description). After clicking **Launch Task**, a Chromium browser starts recording. When the task is done, click **Complete Task** to close the browser and persist recordings/answers.
2229

2330
## Building a distributable bundle (PyInstaller)
2431

25-
The simplest way to ship the app to volunteers is to package it with [PyInstaller](https://pyinstaller.org/). Run the following from the project root on the platform you're building for (Mac builds on macOS, Windows builds on Windows, etc.):
32+
The simplest way to ship the app to collectors is to package it with [PyInstaller](https://pyinstaller.org/). Run the following from the project root on the platform you're building for (Mac builds on macOS, Windows builds on Windows, etc.):
2633

2734
```bash
2835
python -m pip install pyinstaller
36+
python desktop_app/build_release.py --target macos # or --target windows
37+
```
38+
39+
This script automatically:
40+
- Installs Playwright browsers into the bundle
41+
- Configures all necessary hidden imports
42+
- Packages everything into a ready-to-distribute ZIP file
43+
- Includes installation instructions for end users
44+
45+
The final ZIP will be created in `desktop_app/dist/` and can be shared directly with collaborators.
46+
47+
Alternatively, you can build manually with PyInstaller:
48+
49+
```bash
2950
pyinstaller desktop_app/task_collector_app.py \
3051
--name TaskCollector \
3152
--windowed \

desktop_app/build_release.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -132,19 +132,25 @@ def build_with_pyinstaller(
132132
"multiprocessing.connection",
133133
# Hidden imports for src modules
134134
"--hidden-import",
135-
"src.config.storage_config",
135+
"config.storage",
136136
"--hidden-import",
137-
"src.config.browser_config",
137+
"config.browser_config",
138138
"--hidden-import",
139-
"src.config.initial_tasks",
139+
"config.start",
140140
"--hidden-import",
141-
"src.browser.stealth_browser",
141+
"browser.browser",
142142
"--hidden-import",
143-
"src.source_data.database",
143+
"browser.recorder",
144144
"--hidden-import",
145-
"src.tasks.task",
145+
"db.database",
146+
"--hidden-import",
147+
"db.task",
148+
"--hidden-import",
149+
"db.models",
146150
"--hidden-import",
147151
"desktop_app.task_worker",
152+
"--hidden-import",
153+
"peewee",
148154
# Collect all submodules
149155
"--collect-all",
150156
"google.cloud",
@@ -159,6 +165,15 @@ def build_with_pyinstaller(
159165
# macOS specific
160166
]
161167

168+
# Add optional imports if available
169+
if module_importable("google.cloud._storage_v2"):
170+
cmd.extend(["--hidden-import", "google.cloud._storage_v2"])
171+
172+
if module_importable("grpc"):
173+
cmd.extend(["--hidden-import", "grpc", "--collect-all", "grpc"])
174+
if module_importable("grpc._cython.cygrpc"):
175+
cmd.extend(["--hidden-import", "grpc._cython.cygrpc"])
176+
162177
if target == "macos":
163178
cmd.extend(
164179
[
@@ -302,10 +317,3 @@ def main() -> None:
302317
except BuildError as exc:
303318
print(f"ERROR: {exc}", file=sys.stderr)
304319
sys.exit(1)
305-
if module_importable("google.cloud._storage_v2"):
306-
cmd.extend(["--hidden-import", "google.cloud._storage_v2"])
307-
308-
if module_importable("grpc"):
309-
cmd.extend(["--hidden-import", "grpc", "--collect-all", "grpc"])
310-
if module_importable("grpc._cython.cygrpc"):
311-
cmd.extend(["--hidden-import", "grpc._cython.cygrpc"])

desktop_app/launch_task_collector.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55
cd "$(dirname "$0")/.."
66

77
# Activate the virtual environment
8-
source venv/bin/activate
8+
source .venv/bin/activate
99

1010
# Set environment variables to prevent bus errors
1111
export DISPLAY=:0.0
1212
export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
1313
export PYTHON_COREAUDIO_ALLOW_INSECURE_REQUESTS=1
14-
# Set Python path to include the project root
15-
export PYTHONPATH="$(pwd):$PYTHONPATH"
14+
# Set Python path to include the project root and src directory
15+
export PYTHONPATH="$(pwd)/src:$(pwd):$PYTHONPATH"
1616
echo "PYTHONPATH: $PYTHONPATH"
1717

1818
# Set Playwright browsers path

desktop_app/task_worker.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
from typing import Any, Dict, Optional
99

1010
from browser.browser import StealthBrowser
11+
from browser.recorder import get_video_path
1112
from config.start import InitialTasks
12-
from src.db.task import CreateTaskDto, Task, TaskManager
13+
from db.task import CreateTaskDto, TaskManager
14+
from db.models import TaskModel
1315

1416
logger = logging.getLogger(__name__)
1517

@@ -53,8 +55,9 @@ async def runner() -> None:
5355
task_manager = TaskManager.get_instance()
5456
new_task = CreateTaskDto(description, task_type, source, website)
5557
task_id = task_manager.create_task(new_task)
56-
task = Task(task_id, description, task_type, source, website)
57-
task_manager.set_current_task(task)
58+
# Get the task we just created
59+
task_model = TaskModel.get_by_id(task_id)
60+
task_manager.set_current_task(task_model)
5861
_send_safe(pipe, {"type": "task_started", "task_id": task_id})
5962
_send_safe(
6063
pipe,
@@ -90,7 +93,7 @@ async def runner() -> None:
9093
finally:
9194
try:
9295
if task_manager and answer_to_save is not None:
93-
task_manager.save_task_answer(answer_to_save)
96+
task_manager.set_current_task_answer(answer_to_save)
9497
except Exception as answer_error: # pylint: disable=broad-except
9598
logger.warning("Failed to persist task answer: %s", answer_error)
9699

@@ -99,11 +102,9 @@ async def runner() -> None:
99102
current_task = task_manager.get_current_task()
100103
if current_task:
101104
# Save video path to database
102-
import os
103-
from config.storage import VIDEOS_DIR
104-
105-
video_path = os.path.join(VIDEOS_DIR, f"task_{current_task.id}")
106-
task_manager.set_current_task_video_path(video_path)
105+
task_manager.set_current_task_video_path(
106+
get_video_path(current_task.id)
107+
)
107108
task_manager.end_current_task()
108109
task_manager.set_current_task(None) # type: ignore[arg-type]
109110
except Exception as mgr_error: # pylint: disable=broad-except

src/browser/browser.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ async def console_handler(msg):
8383
return
8484
if "Blocked script execution in" in text:
8585
return
86+
if "Failed to load resource: net::ERR_NAME_NOT_RESOLVED" in text:
87+
return
8688
print(f"🌐 Browser console: {text}")
8789

8890
self.page.on("console", console_handler)
@@ -211,7 +213,6 @@ async def launch_browser(self, task_id: int) -> BrowserContext:
211213

212214
# TODO: collect env with further n steps depth, using replay to bypass auths sections
213215
# TODO: eval runs in parallel containers, or ran on kernel, hosting tunneled versions locally while it runs?
214-
# TODO: Fix collection tool with new changes
215216
# TODO: websockets? like e.g. ChatGPT doesn't allow for collecting anything
216217

217218
browser = await self.playwright.chromium.launch(

src/environments/launch.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,12 @@ async def handle_routes_manually(self, route, request):
241241
# TODO: do we need to obsfucate in a more clever way?
242242
# - ?? Normalize JSON (remove volatile fields; sort keys) and hash; tolerate multipart boundary changes; ignore known nonce/timestamp params.
243243
# TODO: what if the request is sent twice, we'll be selecting the first one all the time.
244+
# semhash matching URL at times if they vary?
244245

245246
# TODO: this requires LM postprocessing selection of URL's to match or some dumb way for all POST? or smth
246247
# TODO: why when collecting, increasing/decreasing cart stuff fails
247248
# TODO: some assets in GET are also dynamic?, bunch of js/stylesheets are not found in HAR
249+
248250
urls_to_ignore_post_data = {
249251
"https://www.amazon.com/ax/claim",
250252
"https://www.amazon.com/aaut/verify/ap",
@@ -292,7 +294,7 @@ async def handle_routes_manually(self, route, request):
292294
return
293295

294296
logger.warning(
295-
"⚠️ No matching HAR entry found for %s, aborting",
297+
"⚠️ No matching HAR entry found for %s, aborting",
296298
request.url[:100] + "..." if len(request.url) > 100 else request.url,
297299
)
298300
await route.abort()

0 commit comments

Comments
 (0)