reworked ray job start

12598d17 · Muck, Katrin · 1e8d6fcc · 12598d17 · 12598d17 · 12598d17
Commit 12598d17 authored 1 year ago by Muck, Katrin
--- a/98-frameworks/02-ray-python-multi-node-multi-task.sh
+++ b/98-frameworks/02-ray-python-multi-node-multi-task.sh
@@ -24,49 +24,85 @@
 #SBATCH --job-name=ray-test
 #SBATCH --qos=zen3_0512				# select zen3_0512 default qos
 #SBATCH --partition=zen3_0512		# select zen3_0512 hardware
-#SBATCH --nodes=2 					# tell VSC slurm to allocate 2 exclusive nodes
+#SBATCH --nodes=3 					# tell VSC slurm to allocate 3 exclusive nodes
 #SBATCH --time=00:05:00      		# set time limit of 5 min for testing
+#SBATCH --tasks-per-node=1          # 1 task per node (1 head + 2 workers)
+
+# optionally activate a conda or python environment
+module load openmpi/4.1.6-gcc-12.2.0-exh7lqk
+module load miniconda3
+eval "$(conda shell.bash hook)"
+conda activate ray

 # number of nodes
 nodes_num=$SLURM_JOB_NUM_NODES
 echo "nodes_num: $nodes_num"
+
+# tasks per node -> 1 task per node that starts a head/worker
+tasks_per_node=$SLURM_NTASKS_PER_NODE
+echo "tasks_per_node: $tasks_per_node"
+
 # (physical) cpus per node (slurm cpus on node gives us logical cores)
 cpus_per_node=$(( SLURM_CPUS_ON_NODE / 2 ))
 echo "cpus_per_node: $cpus_per_node"
-# cpus per task
-cpus_per_task=32
+
+# cpus per task: ray itself should do the work scheduling and hardware management 
+#   so it doesnt make a lot of sense to start too many workers
+cpus_per_task=$(( cpus_per_node / tasks_per_node ))
 echo "cpus_per_task: $cpus_per_task"
-# tasks per node
-tasks_per_node=$(( cpus_per_node / cpus_per_task ))
-echo "tasks_per_node: $tasks_per_node"
-# number of workers
+
+# number of tasks & workers
 tasks_total=$(( nodes_num * tasks_per_node ))
 echo "tasks_total: $tasks_total"

+workers_total=$(( tasks_total - 1 ))
+echo "workers_total: $workers_total"
+echo ""
+
 # print general job info
 ../util/print_job_info.sh

-# start the ray head directly on the batch host in the background
-../util/print_task_info_min.sh "ray-head" "hello from ray head!" &
-echo "starting ray head in background ..."
-echo "ray-head" &
+# first task we start goes to batch host, so to get the ip adress
+# we can just use this nodes ib0 ip address for the head task
+head_node_ip=$( ip address show ib0 | awk '/inet / {print $2}' | cut -d "/" -f 1 )
+# head_node_ip=$( hostname --ip-address )
+export head_node_ip
+export head_node_port=6379
+
+logfile="slurm-${SLURM_JOB_ID}-head.out"
+srun_args_head="--nodes=1 --ntasks=1 --cpus-per-task=$cpus_per_task --input=none --output=$logfile"
+echo "starting head with: $srun_args_head"
+srun $srun_args_head \
+    ray start --head --log-color=false \
+        --node-ip-address="$head_node_ip" --port="$head_node_port" \
+        --num-cpus="0" --num-gpus="0" \
+        --block &
+sleep 10s

-# next we start all workers with the configured number of cpus per task
-srun --ntasks-per-node=$tasks_per_node --ntasks=$tasks_total --cpus-per-task=$cpus_per_task \
-	../util/print_task_info_min.sh "ray-worker" "hello from ray worker!" &
+# start workers
+logfile="slurm-${SLURM_JOB_ID}-worker-%t.out"
+srun_args_worker="--relative=1 --nodes=$workers_total --ntasks=$workers_total --cpus-per-task=$cpus_per_task --input=none --output=$logfile"
+echo "starting $workers_total workers with: $srun_args_worker"
+srun $srun_args_worker \
+    ray start --log-color=false \
+        --address="$head_node_ip:$head_node_port" \
+        --num-cpus="$cpus_per_task" --num-gpus="0" \
+        --block &
+
+# wait for 10s to give the workers time to start
+sleep 10s

 # note: the started tasks will run as long as this batch process is existing (or they exit on their own)
 # so either wait for all child processes to finish 
-wait
+# wait
+
 #
 # 	OR
 #
 # run your own program (on the batch host)
 #
-#module load miniconda3
-#eval "$(conda shell.bash hook)"
-#conda activate my-env
-#python my_program.py
+python3 ray-test.py "$head_node_ip:$head_node_port"
+
 #
 # 	OR
 #

--- a/98-frameworks-ray/environment.yaml
+++ b/98-frameworks-ray/environment.yaml
+name: ray
+channels:
+  - bioconda
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.11
+  - libblas=*=*mkl
+  - ray-all
--- a/98-frameworks-ray/ray-test.py
+++ b/98-frameworks-ray/ray-test.py
+#!/usr/bin/env python3
+
+import socket
+import time
+import os
+import ray
+
+
+@ray.remote
+def hello_world():
+    return "hello world"
+
+
+@ray.remote
+def get_hostname():
+    time.sleep(10)
+    return socket.gethostname()
+
+
+address = f'{os.environ["head_node_ip"]}:{os.environ["head_node_port"]}'
+print("Calling ray init ...")
+context = ray.init(address=address)
+print(f"Context: {context}")
+
+print("Nodes in the Ray cluster:")
+print(ray.nodes())
+
+print("Cluster resources:")
+print(ray.cluster_resources())
+
+print("Call function:")
+print(ray.get(hello_world.remote()))
+
+print("Test parallelism:")
+
+start = time.time()
+tasks = [get_hostname.remote()
+         for _ in range(len(ray.nodes())*100)]
+scheduling_end = time.time()
+results = ray.get(tasks)
+end = time.time()
+
+# if everything is setup correctly total time should be
+# much less than nodes * cpus * 10s
+print(f"Got {len(results)} results:")
+print(set(results))
+print(f"Scheduling took: {scheduling_end-start}s")
+print(f"Total time: {end-start}s")
--- a/98-frameworks/01-ray-python-multi-node.sh
+++ b/98-frameworks/01-ray-python-multi-node.sh
-#!/bin/bash
-
-##############################################################################
-# User Request:
-#   - allocate multiple full nodes
-#   - run ray head and ray workers on each node
-#	- run python script on batch host
-#
-# Provided Allocation:
-#   - 3 exclusive nodes
-#   - 3x 64 physical cores / 128 logical cores
-#   - 3x 512 GB memory
-#
-# VSC policy:
-#   - '--nodes' flag set -> exclusive node allocation
-#   - '--ntasks-per-node' & '--ntasks' implicitly set to 64
-#		but overridden with srun arguments
-#   
-# Accounting:
-#   - 3x 64 core hours / hour
-##############################################################################
-
-#SBATCH --job-name=ray-test
-#SBATCH --qos=zen3_0512				# select zen3_0512 default qos
-#SBATCH --partition=zen3_0512		# select zen3_0512 hardware
-#SBATCH --nodes=3 					# tell VSC slurm to allocate 3 exclusive nodes
-#SBATCH --time=00:05:00      		# set time limit of 5 min for testing
-
-# use the slurm job num nodes env variable
-# since we use full nodes for a worker
-# first task is reserved for head task
-worker_num=$((SLURM_JOB_NUM_NODES - 1))
-
-# we need to tell srun how we want to start the tasks
-# - nodes to use (--nodes=1)
-# - tasks to start (--ntasks=1)
-# - cpus to bind to the task (--cpus-per-task=128)
-# (- tasks per node implicitly defaults to 1)
-srun_args="--nodes=1 --ntasks=1 --cpus-per-task=128"
-
-# note: this will start the head in its own task on a full node
-#	the full node is the first host in the node list which 
-#	is usually the batch host itself
-# 	you may also want to just run it on the batch host and
-# 	run it in the background instead. 
-#	don't forget to adjust the indizes in such a case
-echo "srun ray head in background"
-srun $srun_args -r 0 ../util/print_task_info_min.sh "ray-head" "hello from ray head (task 0)!" &
-
-for ((index = 1; index <= worker_num; index++)); do
-	echo "srun ray worker in background"
-	srun $srun_args -r $index ../util/print_task_info_min.sh "ray-worker-$index" "hello from ray worker (task $index)!" &
-done
-
-# note: the started tasks will run as long as this batch process is existing (or they exit on their own)
-# so either wait for all child processes to finish 
-wait
-#
-# 	OR
-#
-# run your own program (on the batch host)
-#
-#module load miniconda3
-#eval "$(conda shell.bash hook)"
-#conda activate my-env
-#python my_program.py
-#
-# 	OR
-#
-# start your own program in another task
-#	don't forget to adjust resource allocation accordingly
-#	e.g. allocate another node and start workers at index 2
-#
-#srun $srun_args -r 1 ../my_program_script.sh