Sunbird-Knowlg · pallakartheekreddy · Apr 13, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,5 @@
 **/*.iml
 **/application.conf
 *.conf
+docker/.migrations
+docker/.es-migrations
diff --git a/README.md b/README.md
diff --git a/docker-compose.yml b/docker-compose.yml
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
@@ -0,0 +1,77 @@
+services:
+
+  yugabyte:
+    image: yugabytedb/yugabyte:2025.2.0.1-b1
+    container_name: yugabyte
+    ports:
+      - "7001:7000"
+      - "9001:9000"
+      - "9042:9042"
+      - "5433:5433"
+    command: >
+      bin/yugabyted start --daemon=false
+      --listen=0.0.0.0
+      --tserver_flags=cql_proxy_bind_address=0.0.0.0:9042,pgsql_proxy_bind_address=0.0.0.0:5433
+    healthcheck:
+      test: ["CMD", "/home/yugabyte/bin/ycqlsh", "127.0.0.1", "9042", "-u", "cassandra", "-p", "cassandra", "-e", "DESCRIBE KEYSPACES"]
+      interval: 10s
+      timeout: 5s
+      retries: 12
+    volumes:
+      - yugabyte-data:/home/yugabyte/var
+
+  janusgraph:
+    image: ghcr.io/sunbird-spark/janusgraph:develop_b96ef55
+    container_name: janusgraph
+    platform: linux/amd64
+    ports:
+      - "8182:8182"
+    environment:
+      - JAVA_OPTIONS=-Xms256m -Xmx512m
+    volumes:
+      - ./janusgraph/conf/janusgraph.properties:/opt/bitnami/janusgraph/conf/janusgraph.properties
+      - ./janusgraph/conf/gremlin-server.yaml:/opt/bitnami/janusgraph/conf/gremlin-server.yaml
+      - ./janusgraph/scripts/schema_init.groovy:/opt/bitnami/janusgraph/scripts/schema_init.groovy
+    depends_on:
+      yugabyte:
+        condition: service_healthy
+
+  elasticsearch:
+    image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2
+    container_name: sunbird_es
+    ports:
+      - "9200:9200"
+      - "9300:9300"
+    environment:
+      - discovery.type=single-node
+    volumes:
+      - es-data:/usr/share/elasticsearch/data
+
+  kafka:
+    image: docker.io/sanketikahub/kafka:4.0.0
+    container_name: kafka
+    ports:
+      - "9092:9092"
+    environment:
+      - KAFKA_CFG_NODE_ID=1
+      - KAFKA_CFG_PROCESS_ROLES=broker,controller
+      - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093
+      - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://127.0.0.1:9092
+      - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER
+      - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT
+      - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=1@kafka:9093
+      - KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR=1
+      - KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE=true
+
+  # Redis is optional — start with: docker compose --profile redis up -d
+  redis:
+    image: redis:6.0.8
+    container_name: sunbird_redis
+    ports:
+      - "6379:6379"
+    profiles:
+      - redis
+
+volumes:
+  yugabyte-data:
+  es-data:
diff --git a/docker/init-elasticsearch.sh b/docker/init-elasticsearch.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# Downloads ES index and mapping definitions from sunbird-devops and applies them
+# against the local Elasticsearch container.
+#
+# Usage: ./init-elasticsearch.sh [BRANCH]
+#   BRANCH: branch of sunbird-devops to use (default: release-8.0.0)
+#
+# Prerequisites: docker must be running with the sunbird_es container up and healthy.
+
+set -e
+
+BRANCH="${1:-release-8.0.0}"
+ES_HOST="http://localhost:9200"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+DOWNLOADS_DIR="${SCRIPT_DIR}/.es-migrations"
+REPO_URL="https://github.com/project-sunbird/sunbird-devops.git"
+REPO_PATH="ansible/roles/es7-mapping/files"
+
+INDICES=(
+    "compositesearch"
+)
+
+echo "Downloading ES index/mapping definitions (branch: ${BRANCH})..."
+rm -rf "${DOWNLOADS_DIR}"
+if ! git clone --depth 1 --branch "${BRANCH}" --filter=blob:none --sparse "${REPO_URL}" "${DOWNLOADS_DIR}"; then
+    echo "ERROR: Failed to clone ${REPO_URL} (branch: ${BRANCH})"
+    exit 1
+fi
+cd "${DOWNLOADS_DIR}"
+if ! git sparse-checkout set "${REPO_PATH}"; then
+    echo "ERROR: Failed to sparse-checkout ${REPO_PATH}"
+    exit 1
+fi
+cd "${SCRIPT_DIR}"
+
+# Wait for Elasticsearch to be ready (max 60 seconds)
+echo "Waiting for Elasticsearch at ${ES_HOST}..."
+RETRIES=0
+MAX_RETRIES=30
+until curl -s "${ES_HOST}/_cluster/health" > /dev/null 2>&1; do
+    RETRIES=$((RETRIES + 1))
+    if [ ${RETRIES} -ge ${MAX_RETRIES} ]; then
+        echo "ERROR: Elasticsearch not reachable after ${MAX_RETRIES} attempts."
+        exit 1
+    fi
+    sleep 2
+done
+echo "Elasticsearch is ready."
+
+FAILED=0
+for index in "${INDICES[@]}"; do
+    index_file="${DOWNLOADS_DIR}/${REPO_PATH}/indices/${index}.json"
+    mapping_file="${DOWNLOADS_DIR}/${REPO_PATH}/mappings/${index}-mapping.json"
+
+    # Create index
+    if [ -f "${index_file}" ]; then
+        STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X PUT "${ES_HOST}/${index}" \
+            -H "Content-Type: application/json" \
+            -d @"${index_file}")
+        if [ "${STATUS}" = "200" ] || [ "${STATUS}" = "201" ]; then
+            echo "OK: index ${index} created"
+        elif [ "${STATUS}" = "400" ]; then
+            echo "SKIP: index ${index} already exists"
+        else
+            echo "FAIL: index ${index} (HTTP ${STATUS})"
+            FAILED=$((FAILED + 1))
+            continue
+        fi
+    else
+        echo "SKIP: ${index}.json not found"
+        continue
+    fi
+
+    # Apply mapping
+    if [ -f "${mapping_file}" ]; then
+        STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X PUT "${ES_HOST}/${index}/_mapping" \
+            -H "Content-Type: application/json" \
+            -d @"${mapping_file}")
+        if [ "${STATUS}" = "200" ] || [ "${STATUS}" = "201" ]; then
+            echo "OK: mapping for ${index} applied"
+        else
+            echo "FAIL: mapping for ${index} (HTTP ${STATUS})"
+            FAILED=$((FAILED + 1))
+        fi
+    else
+        echo "SKIP: ${index}-mapping.json not found"
+    fi
+done
+
+rm -rf "${DOWNLOADS_DIR}"
+
+echo ""
+if [ ${FAILED} -gt 0 ]; then
+    echo "${FAILED} operation(s) failed."
+    exit 1
+else
+    echo "All ES indices and mappings created successfully."
+fi
diff --git a/docker/init-yugabyte.sh b/docker/init-yugabyte.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Downloads CQL migration scripts from sunbird-spark-installer and runs them
+# against the local YugabyteDB container.
+#
+# Usage: ./init-yugabyte.sh [ENVIRONMENT] [BRANCH]
+#   ENVIRONMENT: keyspace prefix (default: dev)
+#   BRANCH:      branch of sunbird-spark-installer to use (default: develop)
+#
+# Prerequisites: docker must be running with the yugabyte container up.
+
+set -e
+
+ENV="${1:-dev}"
+BRANCH="${2:-develop}"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+MIGRATIONS_DIR="${SCRIPT_DIR}/.migrations"
+REPO_URL="https://github.com/Sunbird-Spark/sunbird-spark-installer.git"
+REPO_BASE="scripts/sunbird-yugabyte-migrations"
+
+echo "Downloading CQL migration scripts (branch: ${BRANCH})..."
+rm -rf "${MIGRATIONS_DIR}"
+if ! git clone --depth 1 --branch "${BRANCH}" --filter=blob:none --sparse "${REPO_URL}" "${MIGRATIONS_DIR}"; then
+    echo "ERROR: Failed to clone ${REPO_URL} (branch: ${BRANCH})"
+    exit 1
+fi
+cd "${MIGRATIONS_DIR}"
+if ! git sparse-checkout set "${REPO_BASE}/sunbird-knowlg" "${REPO_BASE}/sunbird-inquiry"; then
+    echo "ERROR: Failed to sparse-checkout migration directories"
+    exit 1
+fi
+cd "${SCRIPT_DIR}"
+
+FAILED=0
+
+run_migrations() {
+    local module="$1"
+    shift
+    local cql_files=("$@")
+    local repo_path="${REPO_BASE}/${module}"
+
+    echo ""
+    echo "Running ${module} migrations with ENV=${ENV}..."
+
+    for cql_file in "${cql_files[@]}"; do
+        src="${MIGRATIONS_DIR}/${repo_path}/${cql_file}"
+        if [ ! -f "${src}" ]; then
+            echo "SKIP: ${module}/${cql_file} not found"
+            continue
+        fi
+
+        tmp="/tmp/${module}_${cql_file}"
+        sed "s/\${ENV}/${ENV}/g" "${src}" > "${tmp}"
+
+        docker cp "${tmp}" yugabyte:/tmp/"${cql_file}"
+        if docker exec yugabyte /home/yugabyte/bin/ycqlsh 127.0.0.1 9042 \
+            -u cassandra -p cassandra \
+            -f /tmp/"${cql_file}" 2>&1; then
+            echo "OK: ${module}/${cql_file}"
+        else
+            echo "FAIL: ${module}/${cql_file}"
+            FAILED=$((FAILED + 1))
+        fi
+        rm -f "${tmp}"
+    done
+}
+
+# sunbird-knowlg migrations
+run_migrations "sunbird-knowlg" \
+    "sunbird.cql" \
+    "lock_db.cql" \
+    "dialcodes.cql" \
+    "content_store.cql" \
+    "contentstore.cql" \
+    "category_store.cql" \
+    "dialcode_store.cql" \
+    "hierarchy_store.cql" \
+    "platform_db.cql" \
+    "script_store.cql"
+
+# sunbird-inquiry migrations
+run_migrations "sunbird-inquiry" \
+    "hierarchy_store.cql" \
+    "question_store.cql"
+
+rm -rf "${MIGRATIONS_DIR}"
+
+echo ""
+if [ ${FAILED} -gt 0 ]; then
+    echo "${FAILED} migration(s) failed."
+    exit 1
+else
+    echo "All migrations completed successfully."
+fi
diff --git a/docker/janusgraph/conf/gremlin-server.yaml b/docker/janusgraph/conf/gremlin-server.yaml
@@ -0,0 +1,32 @@
+host: 0.0.0.0
+port: 8182
+evaluationTimeout: 60000
+channelizer: org.apache.tinkerpop.gremlin.server.channel.WebSocketChannelizer
+graphManager: org.janusgraph.graphdb.management.JanusGraphManager
+graphs: {graph: /opt/bitnami/janusgraph/conf/janusgraph.properties}
+scriptEngines:
+  gremlin-groovy:
+    plugins:
+      org.janusgraph.graphdb.tinkerpop.plugin.JanusGraphGremlinPlugin: {}
+      org.apache.tinkerpop.gremlin.server.jsr223.GremlinServerGremlinPlugin: {}
+      org.apache.tinkerpop.gremlin.tinkergraph.jsr223.TinkerGraphGremlinPlugin: {}
+      org.apache.tinkerpop.gremlin.jsr223.ImportGremlinPlugin:
+        classImports: [java.lang.Math]
+        methodImports: [java.lang.Math#*]
+      org.apache.tinkerpop.gremlin.jsr223.ScriptFileGremlinPlugin:
+        files:
+          - /opt/bitnami/janusgraph/scripts/empty-sample.groovy
+          - /opt/bitnami/janusgraph/scripts/schema_init.groovy
+processors:
+  - {className: org.apache.tinkerpop.gremlin.server.op.session.SessionOpProcessor, config: {sessionTimeout: 28800000}}
+  - {className: org.apache.tinkerpop.gremlin.server.op.traversal.TraversalOpProcessor, config: {cacheExpirationTime: 600000, cacheMaxSize: 1000}}
+maxInitialLineLength: 4096
+maxHeaderSize: 8192
+maxChunkSize: 8192
+maxContentLength: 65536
+maxAccumulationBufferComponents: 1024
+resultIterationBatchSize: 64
+writeBufferLowWaterMark: 32768
+writeBufferHighWaterMark: 65536
+threadPoolWorker: 1
+gremlinPool: 8
diff --git a/docker/janusgraph/conf/janusgraph.properties b/docker/janusgraph/conf/janusgraph.properties
@@ -0,0 +1,4 @@
+gremlin.graph=org.janusgraph.core.JanusGraphFactory
+storage.backend=cql
+storage.hostname=yugabyte
+storage.cql.keyspace=janusgraph