1 | #!/bin/bash
|
---|
2 |
|
---|
3 | function clean_up {
|
---|
4 | # Perform program exit housekeeping
|
---|
5 | # Optionally accepts an exit status
|
---|
6 | rm -f $TMP_FILE
|
---|
7 | exit $1
|
---|
8 | }
|
---|
9 |
|
---|
10 | function error_exit {
|
---|
11 | # Display error message and exit
|
---|
12 | echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
|
---|
13 | clean_up 1
|
---|
14 | }
|
---|
15 |
|
---|
16 | # create tmp file
|
---|
17 | export SLURM_HOSTFILE=$(mktemp)
|
---|
18 | PROGNAME=srun
|
---|
19 |
|
---|
20 | trap clean_up SIGHUP SIGINT SIGTERM
|
---|
21 |
|
---|
22 | if [ ! -f "$SLURM_HOSTFILE" ]; then
|
---|
23 | error_exit "tmp file $1 cannot be read"
|
---|
24 | fi
|
---|
25 |
|
---|
26 | # allocate resources
|
---|
27 | #salloc --partition=vis --nodes=3 --ntasks-per-node=24 --gres=gpu:2 --time=24:00:00 --mem=512000
|
---|
28 |
|
---|
29 | # create a hostfile with just one task on first hostname
|
---|
30 | # * each line sets the hostname of a task
|
---|
31 | # * 24 tasks per hostname
|
---|
32 | # * 12 tasks on host with vnc and paraview gui
|
---|
33 | # => double hostname: awk '1;1' (1=true, default action = print line)
|
---|
34 | # => remove first hostname once
|
---|
35 | # => 12x each line: awk '1;1;1;1;1;1;1;1;1;1;1;1;1;1;1;1'
|
---|
36 |
|
---|
37 | scontrol show hostname $SLURM_NODELIST \
|
---|
38 | | awk '1;1' \
|
---|
39 | | tail -n +2 \
|
---|
40 | | awk '1;1;1;1;1;1;1;1;1;1;1;1' > $SLURM_HOSTFILE
|
---|
41 | #nodeset -e -S\\n $SLURM_NODELIST | sort -r | awk '1;1' | head -n -1 > $SLURM_HOSTFILE
|
---|
42 |
|
---|
43 | # srun will use $SLURM_HOSTFILE if set
|
---|
44 | srun --ntasks=$(( 12 + ((SLURM_NNODES-1)*2)*12 )) \
|
---|
45 | --cpu_bind=sockets \
|
---|
46 | --distribution=arbitrary \
|
---|
47 | bash -c \
|
---|
48 | ' \
|
---|
49 | export TBB_NUM_THREADS=2; \
|
---|
50 | export OMP_NUM_THREADS=2; \
|
---|
51 | \
|
---|
52 | DISPLAYS=$(cd /tmp/.X11-unix; for x in `ls`; do echo -n :${x} | cut -d X -f 2; done); \
|
---|
53 | #DISPLAY_TYPES=(); for i in $DISPLAYS; do DISPLAY_TYPES+=($(export DISPLAY=:$i.0; glxinfo | grep "OpenGL renderer string")) ; done; \
|
---|
54 | \
|
---|
55 | echo -n $HOSTNAME: $SLURM_NODEID:$SLURM_PROCID-$SLURM_CPUS_PER_TASK, $DISPLAYS; \
|
---|
56 | \
|
---|
57 | # http://www.paraview.org/Wiki/Setting_up_a_ParaView_Server \
|
---|
58 | if [ $((SLURM_PROCID%2)) -eq 0 ]; then \
|
---|
59 | echo ... even; \
|
---|
60 | export DISPLAY=:0; \
|
---|
61 | export VGL_DISPLAY=:0; \
|
---|
62 | pvserver --disable-xdisplay-test --use-offscreen-rendering;
|
---|
63 | else \
|
---|
64 | echo ... odd; \
|
---|
65 | export DISPLAY=:1; \
|
---|
66 | export VGL_DISPLAY=:1; \
|
---|
67 | pvserver --disable-xdisplay-test --use-offscreen-rendering;
|
---|
68 | fi \
|
---|
69 | '
|
---|
70 | SRUN_RC=$?
|
---|
71 |
|
---|
72 | # check exit code
|
---|
73 | if [[ $SRUN_RC != 0 ]]; then
|
---|
74 | error_exit "Did you try to run pvserver on a login node?\nDid you try to run this script twice? \n This will fail: srun returns with error code $SRUN_RC"
|
---|
75 | fi
|
---|
76 |
|
---|
77 | clean_up
|
---|
78 |
|
---|