diff --git a/README.md b/README.md index d1217f0..543d468 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ A dashboard for the Baskerville project: setup, labelling and feedback ## How to set up and run ### The backend -To have a fully functional Baskerville Dashboard you need to have Baskerville installed and set up. You will need a Baskerville config to continue. +The backend is tightly connected to your Baskerville instance, it uses the same database and it uses the `baskerville` library for some of the parts. +To have a fully functional Baskerville Dashboard you need to have Baskerville installed and set up and a valid Baskerville config to continue. To run the backend, rename the [`config.yaml.example`](backend/conf/config.yaml.example) and fill in the details: ```yaml @@ -31,16 +32,47 @@ APP_CONFIG: - 'test.feedback' # where test is the uuid of your organization, as provided to you by eq. It should be present in baskerville config. - 'test.registration' # you can use environment variables like: - !ENV '${ORG_UUID}.registration' ``` -The next step is to run the flask app: + +For the installation, you need to follow [Baskerville's instructions first](https://github.com/deflect-ca/baskerville/tree/develop#installation). +In short, `spark-iforest`, `esretriever` and `baskerville` should be installed in this order: +```bash +# clone and install spark-iforest +git clone https://github.com/titicaca/spark-iforest +cd spark-iforest/python +pip install . + +# clone and install esretriever - for the ElasticSearch pipeline +cd ../../ +git clone https://github.com/equalitie/esretriever.git +cd esretriever +pip install . + +# Finally, clone and install baskerville +cd ../ +git clone --branch dashboard_changes_pt2 https://github.com/deflect-ca/baskerville.git +cd baskerville +pip install -e . +``` + +Next clone and install the dashboard: +```bash +git clone https://github.com/deflect-ca/baskerville_dashboard.git +cd baskerville_dashboard/backend +pip install -e . +``` + +Finally run the flask app: ```bash python app.py ``` The backend should be up and running on http://localhost:5000. You should be able to see `Baskerville-dashboard v0.0.1` in your browser. -*Note: This is the dev server. For deployment, see the options [here](https://flask.palletsprojects.com/en/1.1.x/deploying/))* +*Note: SocketIO for Python includes a production grade web server. For deployment, see the options [here](https://flask.palletsprojects.com/en/1.1.x/deploying/))* Make sure that the communication between the backend, your Baskerville deployment (Spark Cluster, Redis, Kafka, Postgres) and the Prediction Center (through Kafka) is allowed. +*Note2: For the feedback functionality described bellow, you need to have a running Baskerville feedback pipeline. + ### The front-end The front-end is developed with Angular (11.1.0 currently). To run it: @@ -54,6 +86,38 @@ The website is served on http://localhost:4200 To deploy it and serve it through an NGINX for example you can follow the steps [here](https://angular.io/guide/deployment) +Example of deployment (NodeJS version: v14.8.0): +```bash +cd baskerville_dashboard/front-end +# install front-end packages - might take time +npm install + +# provide values for the following +export API_BASE_URL='https://api.baskerville-dashboard.deflect.ca/api/1' +export SOCKET_URL='https://api.baskerville-dashboard.deflect.ca' +export BASKERVILLE_DASH_ROOT=/root/baskerville_dashboard +export BASKERVILLE_ROOT=/root/baskerville +export ADMIN_PASS=admin_pass_very_secret +export DB_PORT=5432 +export SPARK_LOCAL_HOSTNAME=localhost + +# this builds the front-end with provided configuration and copies the result in /var/www for nginx +npm run config && ng build --prod && \ +rm -rf /var/www/baskerville_dashboard_frontend/ && \ +cp -r dist/baskerville_dashboard_frontend/ /var/www/baskerville_dashboard_frontend/ && \ + +# reload nginx +/etc/init.d/nginx reload +``` + +## Docker-compose +You can find a Dockerized version of this repo [here](https://github.com/deflect-ca/baskerville_client) +```bash +export DOCKER_KAFKA_HOST=$(ipconfig getifaddr en0) +docker-compose up -d dashboard +``` + + ## How to provide feedback 1. Login with the admin account. diff --git a/backend/setup.py b/backend/setup.py index 7e5cb7a..e84f1f4 100644 --- a/backend/setup.py +++ b/backend/setup.py @@ -34,5 +34,8 @@ 'baskerville_dashboard.utils', 'baskerville_dashboard.vm', ], + package_data={ + 'baskerville_dashboard': ['data/*'], + }, ) diff --git a/backend/src/baskerville_dashboard/app.py b/backend/src/baskerville_dashboard/app.py index 97b007d..09d7c14 100644 --- a/backend/src/baskerville_dashboard/app.py +++ b/backend/src/baskerville_dashboard/app.py @@ -8,11 +8,12 @@ import uuid import eventlet +eventlet.monkey_patch() + import redis from redis import Redis from pyaml_env import parse_config -eventlet.monkey_patch() from baskerville_dashboard.auth import Auth from baskerville_dashboard.db.manager import SessionManager @@ -150,8 +151,8 @@ def add_extra_users(config, session): ).first() new_user = False try: - for u in config.get('USERS'): - print(u) + for u in config.get('USERS', []): + logger.debug(f'Checking {u.get("username")}') user = session.query(User).filter_by(username=u['username']).first() if not user: user = User() @@ -203,18 +204,21 @@ def set_up_kafka_thread(app_config, baskerville_config): """ global KAFKA_CONSUMER_THREAD import threading - KAFKA_CONSUMER_THREAD = threading.Thread( - target=consume_from_kafka, - args=(app_config, baskerville_config,), - daemon=True - ) - KAFKA_CONSUMER_THREAD.start() + try: + KAFKA_CONSUMER_THREAD = threading.Thread( + target=consume_from_kafka, + args=(app_config, baskerville_config,), + daemon=True + ) + KAFKA_CONSUMER_THREAD.start() + except: + traceback.print_exc() + logger.error(f'COULD NOT CONNECT TO KAFKA. ') def create_app(config=None, environment=None): global jwtApp, SECRET_KEY from baskerville_dashboard.db.manager import SessionManager - from baskerville.util.helpers import parse_config url_prefix = config['APP_CONFIG']['PREFIX'] SECRET_KEY = config['APP_CONFIG']['SECRET_KEY'] @@ -258,7 +262,7 @@ def create_app(config=None, environment=None): return app -app = create_app(parse_config('../config.yaml')) +app = create_app(parse_config('../../conf/config.yaml')) Session(app) CORS( app, diff --git a/backend/data/test_data_1k.json b/backend/src/baskerville_dashboard/data/test_data_1k.json similarity index 100% rename from backend/data/test_data_1k.json rename to backend/src/baskerville_dashboard/data/test_data_1k.json diff --git a/backend/data/training_config.sample.yaml b/backend/src/baskerville_dashboard/data/training_config.sample.yaml similarity index 100% rename from backend/data/training_config.sample.yaml rename to backend/src/baskerville_dashboard/data/training_config.sample.yaml diff --git a/backend/src/baskerville_dashboard/db/manager.py b/backend/src/baskerville_dashboard/db/manager.py index 8f0622c..658f02e 100644 --- a/backend/src/baskerville_dashboard/db/manager.py +++ b/backend/src/baskerville_dashboard/db/manager.py @@ -16,4 +16,4 @@ def set_engine(self, engine): @property def session(self): - return self.Session() + return self.Session diff --git a/backend/src/baskerville_dashboard/routes/retrain.py b/backend/src/baskerville_dashboard/routes/retrain.py index 27b2117..9a3ba15 100644 --- a/backend/src/baskerville_dashboard/routes/retrain.py +++ b/backend/src/baskerville_dashboard/routes/retrain.py @@ -7,9 +7,10 @@ import traceback import uuid +from pyaml_env import parse_config + from baskerville.db.dashboard_models import PendingWork from baskerville.models.config import TrainingConfig -from baskerville.util.helpers import parse_config from baskerville_dashboard.auth import login_required from baskerville_dashboard.db.manager import SessionManager from baskerville_dashboard.utils.helpers import ResponseEnvelope, \ diff --git a/backend/src/baskerville_dashboard/routes/try_baskerville.py b/backend/src/baskerville_dashboard/routes/try_baskerville.py index 2402991..04dcf97 100644 --- a/backend/src/baskerville_dashboard/routes/try_baskerville.py +++ b/backend/src/baskerville_dashboard/routes/try_baskerville.py @@ -23,8 +23,8 @@ get_baskerville_config, ReadLogs, ResponseEnvelope, is_compressed, \ get_extension, start_local_baskerville, get_default_data_path, \ process_details, is_process_running -from daemonize import Daemonize -from daemons.daemonizer import run +# from daemonize import Daemonize +# from daemons.daemonizer import run from requests.auth import HTTPBasicAuth from werkzeug.utils import secure_filename from flask import Blueprint, request, jsonify, session, current_app, url_for @@ -184,12 +184,14 @@ def start_baskerville_for(): else: try: args_to_action = (config, pipeline) + # TODO: get the values from the current_app config + # current_app.config kwargs_to_action = { 'BASKERVILLE_ROOT': os.environ['BASKERVILLE_ROOT'], - 'DB_HOST': os.environ['DB_HOST'], - 'DB_USER': os.environ['DB_USER'], - 'DB_PASS': os.environ['DB_PASS'], - 'DB_PORT': os.environ['DB_PORT'], + 'DB_HOST': os.environ.get('DB_HOST'), + 'DB_USER': os.environ.get('DB_USER'), + 'DB_PASS': os.environ.get('DB_PASS'), + 'DB_PORT': os.environ.get('DB_PORT'), } p = Process( daemon=True, diff --git a/backend/src/baskerville_dashboard/utils/helpers.py b/backend/src/baskerville_dashboard/utils/helpers.py index d673b77..5817595 100644 --- a/backend/src/baskerville_dashboard/utils/helpers.py +++ b/backend/src/baskerville_dashboard/utils/helpers.py @@ -112,7 +112,7 @@ def get_default_data_path(): :return: """ return os.path.join( - os.path.dirname(os.path.realpath(__file__)), '..', '..', 'data' + os.path.dirname(os.path.realpath(__file__)), '..', 'data' ) @@ -122,7 +122,7 @@ def get_default_conf_path(): :return: """ return os.path.join( - os.path.dirname(os.path.realpath(__file__)), '..', '..', 'conf' + os.path.dirname(os.path.realpath(__file__)), '..', '..', '..', 'conf' ) diff --git a/backend/src/baskerville_dashboard/utils/kafka.py b/backend/src/baskerville_dashboard/utils/kafka.py index b16dd30..2a30092 100644 --- a/backend/src/baskerville_dashboard/utils/kafka.py +++ b/backend/src/baskerville_dashboard/utils/kafka.py @@ -7,6 +7,8 @@ import json import traceback +import kafka.errors + from baskerville.db.dashboard_models import Message, PendingWork from baskerville.models.config import KafkaConfig from baskerville_dashboard.utils.enums import NotificationKind @@ -25,14 +27,36 @@ def value_deserializer(v): return {} +def create_kafka_topics(bootstrap_servers, topics): + from kafka.admin import KafkaAdminClient, NewTopic + admin_client = KafkaAdminClient( + bootstrap_servers=bootstrap_servers, + client_id='test' + ) + + topic_list = [] + for topic in topics: + topic_list.append( + NewTopic(name=topic, num_partitions=1, replication_factor=1) + ) + try: + admin_client.create_topics( + new_topics=topic_list, validate_only=False + ) + except kafka.errors.TopicAlreadyExistsError: + pass + + def get_kafka_consumer(kafka_config: KafkaConfig, topics=()): global KAFKA_CONSUMER from kafka import KafkaConsumer from kafka.client_async import selectors if not KAFKA_CONSUMER: + bootstrap_servers = f"{kafka_config['connection']['bootstrap_servers']}:9092" + create_kafka_topics(bootstrap_servers, topics) KAFKA_CONSUMER = KafkaConsumer( *topics, - bootstrap_servers=kafka_config['bootstrap_servers'], + bootstrap_servers=bootstrap_servers, selector=selectors.DefaultSelector, auto_offset_reset='earliest', value_deserializer=value_deserializer @@ -47,7 +71,7 @@ def get_aiokafka_consumer(kafka_config: KafkaConfig, topics=()): if not ASYNC_KAFKA_CONSUMER: ASYNC_KAFKA_CONSUMER = AIOKafkaConsumer( *topics, - bootstrap_servers=kafka_config['bootstrap_servers'], + bootstrap_servers=kafka_config['connection']['bootstrap_servers'], auto_offset_reset='earliest', value_deserializer=value_deserializer ) @@ -58,14 +82,15 @@ def get_kafka_producer(kafka_config: KafkaConfig): global KAFKA_PRODUCER from kafka.client_async import selectors if not KAFKA_PRODUCER: + connection = kafka_config['connection'] KAFKA_PRODUCER = KafkaProducer( - bootstrap_servers=kafka_config.bootstrap_servers, + bootstrap_servers=connection['bootstrap_servers'], selector=selectors.DefaultSelector, - security_protocol=kafka_config['security_protocol'], - ssl_check_hostname=kafka_config['ssl_check_hostname'], - ssl_cafile=kafka_config['ssl_cafile'], - ssl_certfile=kafka_config['ssl_certfile'], - ssl_keyfile=kafka_config['ssl_keyfile'] + security_protocol=connection['security_protocol'], + ssl_check_hostname=connection['ssl_check_hostname'], + ssl_cafile=connection['ssl_cafile'], + ssl_certfile=connection['ssl_certfile'], + ssl_keyfile=connection['ssl_keyfile'] ) return KAFKA_PRODUCER