*** Wartungsfenster jeden ersten Mittwoch vormittag im Monat ***

Skip to content
Snippets Groups Projects
Commit 3a3b8f7b authored by Pfister, Martin's avatar Pfister, Martin
Browse files

Add VSC5 scripts for FSDP llama3.1-70b

parent 7e9cf1bb
Branches
No related tags found
No related merge requests found
#!/bin/bash
#SBATCH --partition=zen3_0512_a100x2
#SBATCH --qos=zen3_0512_a100x2
## Specify resources:
#SBATCH --nodes=1
#SBATCH --gres=gpu:2 # up to 2 on VSC5/A100
#SBATCH --ntasks-per-node=1
## No need to specify RAM on VSC5, as it will be automatically
## allocated depending on the number of GPUs requested.
#SBATCH --time=3:00:00
# Set conda environment:
CONDA_ENV=finetuning
# Load conda:
module purge
module load miniconda3
# Include commands in output:
set -x
# Print current time and date:
date
# Print host name:
hostname
# List available GPUs:
nvidia-smi
# Set environment variables for communication between nodes:
export MASTER_PORT=24998
export MASTER_ADDR=$(scontrol show hostnames ${SLURM_JOB_NODELIST} | head -n 1)
# Print statistics:
echo "Using $((SLURM_NNODES * SLURM_GPUS_ON_NODE)) GPUs on $SLURM_NNODES nodes."
# Run AI scripts:
srun bash -c "conda run -n $CONDA_ENV --no-capture-output accelerate launch \
--num_machines $SLURM_NNODES \
--num_processes $((SLURM_NNODES * SLURM_GPUS_ON_NODE)) \
--num_cpu_threads_per_process 8 \
--main_process_ip $MASTER_ADDR \
--main_process_port $MASTER_PORT \
--machine_rank \$SLURM_PROCID \
--config_file \"fsdp_config.yml\" \
llama3.1-70b_train.py"
#!/bin/bash
#SBATCH --partition=zen2_0256_a40x2
#SBATCH --qos=zen2_0256_a40x2
## Specify resources:
#SBATCH --nodes=1
#SBATCH --gres=gpu:2 # up to 2 on VSC5/A100
#SBATCH --ntasks-per-node=1
## No need to specify RAM on VSC5, as it will be automatically
## allocated depending on the number of GPUs requested.
#SBATCH --time=3:00:00
# Set conda environment:
CONDA_ENV=finetuning
# Load conda:
module purge
module load miniconda3
# Include commands in output:
set -x
# Print current time and date:
date
# Print host name:
hostname
# List available GPUs:
nvidia-smi
# Set environment variables for communication between nodes:
export MASTER_PORT=24998
export MASTER_ADDR=$(scontrol show hostnames ${SLURM_JOB_NODELIST} | head -n 1)
# Print statistics:
echo "Using $((SLURM_NNODES * SLURM_GPUS_ON_NODE)) GPUs on $SLURM_NNODES nodes."
# Run AI scripts:
srun bash -c "conda run -n $CONDA_ENV --no-capture-output accelerate launch \
--num_machines $SLURM_NNODES \
--num_processes $((SLURM_NNODES * SLURM_GPUS_ON_NODE)) \
--num_cpu_threads_per_process 8 \
--main_process_ip $MASTER_ADDR \
--main_process_port $MASTER_PORT \
--machine_rank \$SLURM_PROCID \
--config_file \"fsdp_config.yml\" \
llama3.1-70b_train.py"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment