#! /bin/sh -e # genenetwork-machines --- Guix configuration for genenetwork machines # Copyright © 2025 Munyoki Kilyungi # # This file is part of genenetwork-machines. # # genenetwork-machines is free software: you can redistribute it # and/or modify it under the terms of the GNU General Public License # as published by the Free Software Foundation, either version 3 of # the License, or (at your option) any later version. # # genenetwork-machines is distributed in the hope that it will be # useful, but WITHOUT ANY WARRANTY; without even the implied warranty # of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with genenetwork-machines. If not, see # . # Build and install genenetwork container on your local machine. set -euo pipefail BASE_DIR="${HOME:-/home/$USER}/genenetwork" SYSTEM_DIRECTORIES=( "$BASE_DIR" "$BASE_DIR/var/log" "$BASE_DIR/var/genenetwork" "$BASE_DIR/etc/genenetwork/conf" "$BASE_DIR/etc/genenetwork" "$BASE_DIR/var/lib/redis" "$BASE_DIR/var/lib/virtuoso" "$BASE_DIR/var/lib/data" "$BASE_DIR/var/lib/gn-uploader/data" "$BASE_DIR/var/lib/gn-uploader/sessions" "$BASE_DIR/var/lib/xapian" "$BASE_DIR/var/genenetwork/genotype-files/genotype/json" "$BASE_DIR/var/lib/genenetwork-sqlite" "$BASE_DIR/var/lib/genenetwork-gnqa" "/tmp/local-container" ) GN_PROJECTS=( "genenetwork2:https://github.com/genenetwork/genenetwork2.git" "genenetwork3:https://github.com/genenetwork/genenetwork3.git" "gn-transform-databases:https://git.genenetwork.org/gn-transform-databases/" "gn-auth:https://git.genenetwork.org/gn-auth" "gn-docs:https://git.genenetwork.org/gn-docs" "gn-uploader:https://git.genenetwork.org/gn-uploader" ) # File mappings: source -> destination declare -A FILE_MAPPINGS=( ["etc/auth.db"]="$BASE_DIR/var/genenetwork/auth.db" ["etc/gn-uploader.db"]="$BASE_DIR/var/genenetwork/gn-uploader.db" ["etc/llm.db"]="$BASE_DIR/var/lib/genenetwork-sqlite/llm.db" ["etc/gn2-secrets.py"]="$BASE_DIR/etc/genenetwork/conf/gn2/secrets.py" ["etc/gn3-secrets.py"]="$BASE_DIR/etc/genenetwork/conf/gn3/secrets.py" ["etc/gn-auth-secrets.py"]="$BASE_DIR/etc/genenetwork/conf/gn-auth/secrets.py" ["etc/gn-uploader-secrets.py"]="$BASE_DIR/etc/genenetwork/conf/gn-uploader/secrets.py" ) CONTAINER_SCM="genenetwork-local-container.scm" CONTAINER_BIN="/usr/local/bin/genenetwork-local-container" GC_ROOT="/var/guix/gcroots/genenetwork-local-container" log() { local level="$1" shift echo "[$(date '+%Y-%m-%d %H:%M:%S')] $level: $*" >&2 } # Check dependencies for cmd in git sudo diff cp grep; do if ! command -v "$cmd" &>/dev/null; then log "ERROR" "Required command '$cmd' not found" exit 1 fi done if ! command -v guix &>/dev/null; then echo "Please install Guix to proceed. Follow the instructions at:" echo "https://issues.genenetwork.org/topics/octopus/set-up-guix-for-new-users" echo "After installing Guix, try running this script again." exit 1 fi # Check for gn-bioinformatics channel guix describe | grep gn-bioinformatics\ &> /dev/null && log "INFO" "guix guix-informatics $(guix describe | grep gn-bioinformatics | cut -d ' ' -f 4)" || \ (clear && guix describe && echo "" log "ERROR" "Please make sure your current profile has gn-bioinformatics" && echo "" && echo "Refer to:" && echo " https://issues.genenetwork.org/topics/guix/guix-profiles" && echo "to help you set your profile" && echo "" && exit 1) # Validate HOME is set if [ -z "${HOME:-}" ]; then log "ERROR" "HOME environment variable is not set" exit 1 fi init_container() { log "INFO" "Creating system directories..." for dir in "${SYSTEM_DIRECTORIES[@]}"; do # Check if directory exists and is accessible if [ -d "$dir" ]; then if [ -w "$dir" ]; then log "DEBUG" "Directory exists and is writable: $dir" else log "WARNING" "Directory exists but is not writable: $dir. Making this writable" sudo chown -R "$USER" "$dir" fi continue fi # Attempt to create directory log "INFO" "Creating directory: $dir" if [ -w "$(dirname "$dir")" ]; then # Parent directory is writable, try without sudo if ! mkdir -p "$dir"; then log "ERROR" "Failed to create directory without sudo: $dir" exit 1 fi else # Parent directory requires root, use sudo if ! sudo mkdir -p "$dir"; then log "ERROR" "Failed to create directory with sudo: $dir" exit 1 fi # Set ownership to current user if created with sudo if ! sudo chown -R "$USER" "$dir"; then log "WARNING" "Failed to set ownership for: $dir" fi fi done # Check and copy configuration files log "INFO" "Checking and copying configuration files..." for src in "${!FILE_MAPPINGS[@]}"; do dest="${FILE_MAPPINGS[$src]}" log "INFO" "Processing $src -> $dest" # Check if source file exists if [ ! -f "$src" ]; then log "ERROR" "Source file does not exist: $src" exit 1 fi # Check if destination file exists if [ ! -f "$dest" ]; then log "INFO" "Destination file does not exist, copying $src to $dest" mkdir -p "$(dirname "$dest")" if ! cp "$src" "$dest"; then log "ERROR" "Failed to copy $src to $dest" exit 1 fi continue fi # Compare files using diff log "INFO" "Comparing $src with $dest" if diff_output=$(diff -u "$dest" "$src" 2>&1); then log "INFO" "Files $src and $dest are identical" else log "INFO" "Differences found between $src and $dest:" echo "$diff_output" >&2 log "INFO" "Copying $src to $dest" if ! cp "$src" "$dest"; then log "ERROR" "Failed to copy $src to $dest" exit 1 fi fi done is_git_repository() { local dir="$1" # Check for standard repository if [ -d "$dir/.git" ]; then if [ -f "$dir/.git/HEAD" ] && [ -d "$dir/.git/refs" ]; then log "DEBUG" "Detected standard Git repository: $dir" return 0 else log "ERROR" "Directory $dir/.git exists but is not a valid Git repository" return 1 fi # Check for bare repository elif [ -f "$dir/HEAD" ] && [ -d "$dir/refs" ] && [ -d "$dir/objects" ]; then log "DEBUG" "Detected bare Git repository: $dir" return 0 else log "ERROR" "$dir exists but is not a Git repository (neither standard nor bare)" return 1 fi } # Clone GeneNetwork projects log "INFO" "Cloning GeneNetwork projects..." for project_entry in "${GN_PROJECTS[@]}"; do IFS=':' read -r project repo_url <<< "$project_entry" dir="$BASE_DIR/$project" if [ ! -d "$dir" ]; then log "INFO" "Cloning $project from $repo_url to $dir" if ! git clone "$repo_url" "$dir"; then log "ERROR" "Failed to clone $project" exit 1 fi else log "DEBUG" "Directory exists, skipping clone: $dir" if ! is_git_repository "$dir"; then log "ERROR" "$dir exists but is not a Git repository" exit 1 fi fi done FLASK_SESSIONS=( "gn-uploader:$BASE_DIR/gn-uploader/flask_session" "genenetwork2:$BASE_DIR/genenetwork2/flask_session" ) for flask_session in "${FLASK_SESSIONS[@]}"; do IFS=':' read -r project session_dir <<< "$flask_session" if [ ! -d "$session_dir" ]; then log "INFO" "Creating FLASK_SESSION directory: $session_dir" if ! mkdir -p "$session_dir"; then log "ERROR" "Failed to create FLASK_SESSION directory: $session_dir" exit 1 fi else log "DEBUG" "FLASK_SESSION directory already exists: $session_dir" fi done # Verify container SCM file exists if [ ! -f "$CONTAINER_SCM" ]; then log "ERROR" "Container SCM file not found: $CONTAINER_SCM" exit 1 fi # Create Guix system container log "INFO" "Creating Guix system container..." SHARE_OPTS=( "--share=$BASE_DIR/var/log=/var/log" "--share=$BASE_DIR/var/genenetwork=/var/genenetwork" "--share=$BASE_DIR/etc/genenetwork/conf=/etc/genenetwork/conf" "--share=$BASE_DIR/etc/genenetwork=/etc/genenetwork" "--share=$BASE_DIR/var/lib/redis=/var/lib/redis" "--share=$BASE_DIR/var/lib/virtuoso=/var/lib/virtuoso" "--share=$BASE_DIR/var/lib/data=/var/lib/data" "--share=$BASE_DIR/var/lib/gn-uploader/data=/var/lib/gn-uploader/data" "--share=$BASE_DIR/var/lib/gn-uploader/sessions=/var/lib/gn-uploader/sessions" "--share=$BASE_DIR/genenetwork2=/genenetwork2" "--share=$BASE_DIR/genenetwork3=/genenetwork3" "--share=$BASE_DIR/gn-uploader=/gn-uploader" "--share=$BASE_DIR/gn-auth=/gn-auth" "--share=$BASE_DIR/var/lib/xapian=/var/lib/xapian" "--share=$BASE_DIR/var/lib/genenetwork-sqlite=/var/lib/genenetwork-sqlite" "--share=$BASE_DIR/var/lib/genenetwork-gnqa=/var/lib/genenetwork-gnqa" "--share=/tmp/local-container=/tmp" "--share=$BASE_DIR/gn-docs=/var/lib/gn-docs" "--share=/run/mysqld=/run/mysqld" ) container_script=$(guix system container \ --network --fallback \ --load-path=. \ --verbosity=3 \ "${SHARE_OPTS[@]}" \ "$CONTAINER_SCM") log $container_script # Create symbolic links log "INFO" "Creating symbolic links..." if ! sudo ln -sf "$container_script" "$CONTAINER_BIN"; then log "ERROR" "Failed to create symbolic link: $CONTAINER_BIN" exit 1 fi if ! sudo ln -sf "$container_script" "$GC_ROOT"; then log "ERROR" "Failed to create GC root link: $GC_ROOT" exit 1 fi log "INFO" "Setup completed successfully!" log "INFO" "Container script: $container_script" log "INFO" "Run with: $CONTAINER_BIN" log "INFO" "Email: test@development.user" log "INFO" "Password: testpasswd" } if [ "$1" = "--init-container" ]; then init_container exit 0 fi init_sql() { # Configuration URL="https://files.genenetwork.org/database/db_webqtl_s-2025-02-18.sql.xz" DOWNLOAD_DIR="/tmp" FILE_NAME=$(basename "$URL") EXTRACTED_FILE="${FILE_NAME%.xz}" DB_USER="webqtlout" DB_PASSWORD="webqtlout" DB_HOST="localhost" DB_NAME="db_webqtl_local" MYSQL_ROOT_USER="" # Change to your MySQL admin user if different MYSQL_ROOT_PASSWORD="" # Set this or leave empty to prompt # Check for required tools for cmd in wget xz mysql; do if ! command -v "$cmd" &> /dev/null; then log ERROR "Required command '$cmd' not found" exit 1 fi done # Extract the .xz file if [ ! -f "/tmp/db_webqtl_s-2025-02-18.sql" ]; then # Download the file log INFO "Downloading $URL to $DOWNLOAD_DIR/$FILE_NAME" if ! wget -O "$DOWNLOAD_DIR/$FILE_NAME" "$URL"; then log ERROR "Failed to download $URL" exit 1 fi log INFO "Extracting $DOWNLOAD_DIR/$FILE_NAME" if ! xz -d "$DOWNLOAD_DIR/$FILE_NAME"; then log ERROR "Failed to extract $DOWNLOAD_DIR/$FILE_NAME" rm -f "$DOWNLOAD_DIR/$FILE_NAME" exit 1 fi fi # Prepare MySQL user and password credentials if [ -z "$MYSQL_ROOT_USER" ]; then log INFO "MySQL root user not set, prompting for input" read -s -p "Enter MySQL user: " MYSQL_ROOT_USER fi if [ -z "$MYSQL_ROOT_PASSWORD" ]; then log INFO "MySQL root password not set, prompting for input" read -s -p "Enter MySQL root password: " MYSQL_ROOT_PASSWORD fi # Check if DB user exists, create if not log INFO "Checking if MySQL user $DB_USER exists" USER_EXISTS=$(mysql -h "$DB_HOST" -u "$MYSQL_ROOT_USER" -p"$MYSQL_ROOT_PASSWORD" -e "SELECT EXISTS(SELECT 1 FROM mysql.user WHERE user = '$DB_USER' AND host = 'localhost') AS user_exists;" 2>/dev/null | grep -o '[0-1]$') if [ "$USER_EXISTS" = "0" ]; then log INFO "Creating MySQL user $DB_USER" if ! mysql -h "$DB_HOST" -u "$MYSQL_ROOT_USER" -p"$MYSQL_ROOT_PASSWORD" -e "CREATE USER '$DB_USER'@'localhost' IDENTIFIED BY '$DB_PASSWORD'; GRANT ALL PRIVILEGES ON $DB_NAME.* TO '$DB_USER'@'localhost'; FLUSH PRIVILEGES;" 2>/dev/null; then log ERROR "Failed to create MySQL user $DB_USER" rm -f "$DOWNLOAD_DIR/$EXTRACTED_FILE" exit 1 fi else log INFO "User $DB_USER already exists, ensuring privileges" if ! mysql -h "$DB_HOST" -u "$MYSQL_ROOT_USER" -p"$MYSQL_ROOT_PASSWORD" -e "GRANT ALL PRIVILEGES ON $DB_NAME.* TO '$DB_USER'@'localhost'; FLUSH PRIVILEGES;" 2>/dev/null; then log ERROR "Failed to update privileges for $DB_USER" rm -f "$DOWNLOAD_DIR/$EXTRACTED_FILE" exit 1 fi fi # Create database if it doesn't exist log INFO "Ensuring database $DB_NAME exists" if ! mysql -h "$DB_HOST" -u "$MYSQL_ROOT_USER" -p"$MYSQL_ROOT_PASSWORD" -e "CREATE DATABASE IF NOT EXISTS $DB_NAME;" 2>/dev/null; then log ERROR "Failed to create or verify database $DB_NAME" rm -f "$DOWNLOAD_DIR/$EXTRACTED_FILE" exit 1 fi # Install the SQL dump into the database log INFO "Importing $DOWNLOAD_DIR/$EXTRACTED_FILE into $DB_NAME" if ! mysql -h "$DB_HOST" -u "$DB_USER" -p"$DB_PASSWORD" "$DB_NAME" < "$DOWNLOAD_DIR/$EXTRACTED_FILE"; then log ERROR "Failed to import $DOWNLOAD_DIR/$EXTRACTED_FILE into $DB_NAME" rm -f "$DOWNLOAD_DIR/$EXTRACTED_FILE" exit 1 fi # Clean up log INFO "Removing $DOWNLOAD_DIR/$EXTRACTED_FILE" rm -f "$DOWNLOAD_DIR/$EXTRACTED_FILE" log INFO "Database import completed successfully" } if [ "$1" = "--init-sql" ]; then init_sql exit 0 fi init_rdf() { # Check for required tools for cmd in guix; do if ! command -v "$cmd" &> /dev/null; then log ERROR "Required command '$cmd' not found" exit 1 fi done log INFO "Please make sure your container is running for this to work" log INFO "This takes some time to run" # Check if directory exists and is accessible if [ -d "$BASE_DIR/var/lib/data" ]; then if [ -w "$BASE_DIR/var/lib/data" ]; then log "DEBUG" "Directory exists and is writable: $BASE_DIR/var/lib/data" else log "WARNING" "Directory exists but is not writable: $BASE_DIR/var/lib/data. Making this writable" sudo chown -R "$USER" "$BASE_DIR/var/lib/data" fi continue fi curr_dir="$PWD" cd "$BASE_DIR/gn-transform-databases" if ls $BASE_DIR/var/lib/data/*ttl >/dev/null 2>&1; then log INFO "Removing all the ttl files and generating them again" rm $BASE_DIR/var/lib/data/*ttl fi log INFO "Generating the ttl files" guix shell -m "manifest.scm" -- guile "generate-ttl-files.scm" \ --settings "$curr_dir/etc/conn.scm" --output "$BASE_DIR/var/lib/data" log INFO "Loading ttl files" guix shell guile-dbi -m "manifest.scm" -- guile load-rdf.scm \ "$curr_dir/etc/conn.scm" cd $curr_dir } if [ "$1" = "--init-rdf" ]; then init_rdf exit 0 fi init_xapian() { # Check for required tools log INFO "Please make sure your container is running for this to work" log INFO "This takes some time to run" # Check if directory exists and is accessible if [ -d "$BASE_DIR/var/lib/xapian" ]; then if [ -w "$BASE_DIR/var/lib/xapian" ]; then log "DEBUG" "Directory exists and is writable: $BASE_DIR/var/lib/xapian" else log "WARNING" "Directory exists but is not writable: $BASE_DIR/var/lib/xapian. Making this writable" sudo chown -R "$USER" "$BASE_DIR/var/lib/xapian" fi continue fi # Check if build directory exists if [ ! -f "$BASE_DIR/var/lib/xapian/build" ]; then rm -rf "$BASE_DIR/var/lib/xapian/build" fi env PYTHONPATH="$BASE_DIR/genenetwork3/" guix shell python-wrapper genenetwork3 \ --share="$BASE_DIR/var/lib/xapian" -- \ "$BASE_DIR/genenetwork3/scripts/index-genenetwork" create-xapian-index \ "$BASE_DIR/var/lib/xapian/build" \ "mysql://webqtlout:webqtlout@localhost/db_webqtl_local?unix_socket=/run/mysqld/mysqld.sock&charset=utf8" \ "http://localhost:7082/sparql" mv "$BASE_DIR/var/lib/xapian/build/*" "$BASE_DIR/var/lib/xapian/" \ && rmdir "$BASE_DIR/var/lib/xapian/build/" log INFO "Please restart the container to set the correct permissions" } if [ "$1" = "--init-xapian" ]; then init_xapian exit 0 fi # Function to handle yes/no prompts prompt_yes_no() { local prompt="$1" while true; do read -p "$prompt (yes/no): " response case "$response" in [Yy][Ee][Ss]|[Yy] ) return 0 ;; [Nn][Oo]|[Nn] ) return 1 ;; * ) echo "Please answer 'yes' or 'no'." ;; esac done } # Main tutorial function run_beginner_setup_tutorial() { clear # Step 1: Initialize Container echo "Step 1: Setting Up the Container" echo "--------------------------------" echo "Before we begin, make sure you are using the correct guix-profile that has:" echo "" echo " guix-bioinformatics" echo "" echo "Otherwise, I will terminate early. If you don't know how to do that, read:" echo "" echo " https://issues.genenetwork.org/topics/guix/guix-profiles" echo "" echo "This step prepares the container environment where GeneNetwork runs." echo "It ensures all necessary files, folders, and configurations are in place." echo "" echo "This step is similar to running:" echo "" echo "./genenetwork-local-container.sh --init-container" echo "" echo "NOTE: If you have any of the following projects:" echo " genenetwork2" echo " genenetwork3" echo " gn-uploader" echo "" echo "Move/copy them to $BASE_DIR. This is where'll you'll be hacking" echo "on them from. Because of how we mount things inside the container," echo "make sure the names appear as above." echo "" echo "Now, some things to note. AI search won't work. To get this working, if you want so," echo "Get into the container, and modify FAHAMU_AUTH_TOKEN in:" echo " /etc/genenetwork/conf/gn3/secrets.py" echo "" echo "Similarly, e-mail functionality ala resetting passwords or confirming a new user" echo "won't work unless you modify:" echo " SMTP_HOST = 'XXXX'" echo " SMTP_USER = 'XXXX'" echo " SMTP_PASSWORD = 'XXXX'" echo " SMTP_PORT = 'XXXX'" echo " EMAIL_ADDRESS = 'XXXX'" echo " EMAIL_DISPLAY_NAME = 'XXXX'" echo "inside:" echo " /etc/genenetwork/conf/gn-auth/secrets.py" echo "" if prompt_yes_no "Would you like to set up the container now?"; then init_container clear echo "Container setup complete!" echo "" echo "Right now you have a working container. You need to set-up sql, upload rdf data" echo "and build a xapian index to have a fully functioning system akin to production and" echo "our test environment." echo "" else clear echo "Skipping container setup. Moving to the next step." fi echo "" # Step 2: Initialize SQL Database echo "Step 2: Setting Up the SQL Database" echo "-----------------------------------" echo "Please make sure you have a working MariaDB installation on your Linux system." echo "" echo "This step creates and configures the database for storing GeneNetwork data." echo "" echo "It sets up tables and initial data needed for Genenetwork to run" echo "" echo "This step is similar to running:" echo "./genenetwork-local-container.sh --init-sql" echo "" echo "Make sure you have a running mysql instance for this to work" echo "" if prompt_yes_no "Would you like to set up the database now?"; then init_sql clear echo "Database setup complete!" else clear echo "Skipping database setup. Moving to the next step: Importing RDF" fi echo "" # Step 3: Initialize RDF echo "" echo "Step 3: Importing RDF into your local set-up" echo "---------------------------------------------" echo "This step adds RDF (Resource Description Framework) data." echo "" echo "RDF helps organize and link data for GeneNetwork." echo "" echo "IMPORTANT. This step requires you to have a running container instance." echo " Otherwise, it will fail." echo "To run your container, in a different shell, run:" echo "sudo /usr/local/bin/genenetwork-local-container" echo "" echo "This step is similar to running:" echo "./genenetwork-local-container.sh --init-rdf" echo "" echo "Make sure your container, if not, is running for this to work:" echo "" echo "sudo /usr/local/bin/genenetwork-local-container" if prompt_yes_no "Would you like to set up RDF now?"; then init_rdf clear echo "RDF setup complete!" else clear echo "Skipping RDF setup." fi echo "" # Step 4: Initialize XAPIAN clear echo "Step 3: Setting up XAPIAN search locally" echo "---------------------------------------------" echo "This indexes all your data from GeneNetwork and indexes it using XAPIAN" echo "" echo "Read more here:" echo "https://issues.genenetwork.org/topics/xapian/xapian-search" echo "" echo "IMPORTANT. This step requires you to have a running container instance." echo " Otherwise, it will fail." echo " To run your container, in a different shell, run:" echo " sudo /usr/local/bin/genenetwork-local-container" echo "" echo "This step is similar to running:" echo "./genenetwork-local-container.sh --init-xapian" echo "" echo "Make sure your container, if not, is running for this to work:" echo "" echo "sudo /usr/local/bin/genenetwork-local-container" if prompt_yes_no "Would you like to build the xapian index now?"; then init_xapian clear echo "XAPIAN setup complete!" else clear echo "Skipping RDF setup." fi echo "" # Completion Message echo "=============================================================" echo "Congratulations! You've completed the GeneNetwork Beginner Setup Tutorial!" echo "=============================================================" echo "You may have skipped some steps. To complete them, run the tutorial again." echo "Your environment is ready to the extent you chose to set it up." echo "If you run your container, you can access the different web-services at:" echo "gn-guile : localhost:8091" echo "genenetwork2 : localhost:8082" echo "genenetwork3 : localhost:8083" echo "gn-uploader : localhost:8085" echo "sparql web ui: localhost:7082/sparql" echo "" echo "To access virtuoso back-end service, run:" echo "guix shell virtuoso-ose isql -- 7081" echo "" echo "To login in gn-uploader/genenetwork2, use the following credentials:" echo " Email: test@development.user" echo " Password: testpasswd" exit 0 } # Check for --beginner-set-up flag or run tutorial interactively if [ "$1" = "--beginner-set-up" ]; then # Interactive mode: run the tutorial run_beginner_setup_tutorial exit 0 fi