Hi Guilhem -
Thanks for the response.
To answer your first request - here is the output. I do have my cpu count set to take all of the cpus. I am running this as a single 96 cpu instance - SLURM can’t launch additional instances for me as I have it set up.
Here is the all.ctrl output:
*************************************************************** Job Resource Configuration ****************************************************************
job_letter=a
batchsystem=SLURM
partition=project
timelimit=0-05:00:00
steps_per_job=1
cpus_per_step=90
queues_per_step=90
cpus_per_queue=1
********************************************************************* Workflow Options ********************************************************************
central_todo_list_splitting_size=10000
ligands_todo_per_queue=1000
ligands_per_refilling_step=1000
collection_folder=…/…/ligand_library/
minimum_time_remaining=10
dispersion_time_min=3
dispersion_time_max=10
verbosity_commands=standard
verbosity_logfiles=debug
store_queue_log_files=all_uncompressed
keep_ligand_summary_logs=true
error_sensitivity=high
error_response=ignore
tempdir=/dev/shm
***************************************************************** Virtual Screenign Options ***************************************************************
docking_scenario_names=qvina02_rigid_receptor1:smina_rigid_receptor1
docking_scenario_programs=qvina02:smina_rigid
docking_scenario_replicas=1:1
docking_scenario_inputfolders=…/input-files/qvina02_rigid_receptor1:…/input-files/smina_rigid_receptor1
******************************************************************* Terminating Variables *****************************************************************
stop_after_next_check_interval=false
ligand_check_interval=100
stop_after_collection=false
stop_after_job=false
And here is the SLURM.conf output (with more sensitive info xxxxxx out)
ClusterName=virtualflow
SlurmctldHost=xxxxxxxxxxxxxxxxx
AuthType=auth/munge
CacheGroups=0
CryptoType=crypto/munge
EnforcePartLimits=ALL
JobCheckpointDir=/var/lib/slurm-llnl/checkpoint
MpiDefault=none
ProctrackType=proctrack/pgid
ReturnToService=1
SlurmctldPidFile=/var/run/slurm-llnl/slurmctld.pid
SlurmctldPort=6817
SrunPortRange=60001-63000
SlurmdPidFile=/var/run/slurm-llnl/slurmd.pid
SlurmdPort=6818
SlurmdSpoolDir=/var/lib/slurm-llnl/slurmd
SlurmUser=slurm
StateSaveLocation=/home/slurm
SwitchType=switch/none
TaskPlugin=task/cgroup
TmpFS=/scratch
InactiveLimit=0
KillWait=30
MinJobAge=300
SlurmctldTimeout=120
SlurmdTimeout=300
Waittime=0
FastSchedule=1
SchedulerType=sched/backfill
SchedulerPort=7321
SelectType=select/cons_res
SelectTypeParameters=CR_Core
PriorityType=priority/multifactor
PriorityDecayHalfLife=7-0
PriorityCalcPeriod=60
PriorityFavorSmall=YES
PriorityMaxAge=7-0
PriorityUsageResetPeriod=WEEKLY
PriorityWeightAge=1000
PriorityWeightFairshare=2000
PriorityWeightJobSize=3000
PriorityWeightPartition=5000
PriorityWeightQOS=0
MaxArraySize=10000
MaxJobCount=100000
AccountingStoreJobComment=YES
AccountingStorageEnforce=limits
JobCompType=jobcomp/filetxt
JobCompLoc=/var/log/slurm-llnl/job_completions
JobCompHost=xx.xx.xx.xx (removed this)
JobAcctGatherFrequency=30
JobAcctGatherType=jobacct_gather/none
SlurmctldDebug=7
SlurmctldLogFile=/var/log/slurm-llnl/slurmctld.log
SlurmdDebug=7
SlurmdLogFile=/var/log/slurm-llnl/slurmd.log
NodeName=xxxxxxxxxxxxxxxxxxxxx NodeAddr=xxxxxxxx RealMemory=84000 Sockets=2 CoresPerSocket=24 ThreadsPercore=2 Procs=96
PartitionName=debug Priority=8000 Nodes=xxxxxxxxxxxxxxx AllowQOS=restrained Default=NO MaxTime=INFINITE State=UP Shared=NO
PartitionName=project Priority=9000 Nodes=xxxxxxxxxxxxx MaxTime=INFINITE AllowQOS=maxjobs Default=NO State=UP Shared=NO
PartitionName=cpu Priority=5000 Nodes=xxxxxxxxxxxxxxx MaxTime=INFINITE AllowQOS=cpuonly Default=YES State=UP Shared=NO
PartitionName=gpu Priority=4000 Nodes=xxxxxxxxxxxxxx MaxTime=INFINITE MaxCPUsPerNode=8 Default=NO State=UP Shared=NO
Many thanks for any suggestions you can offer.
-Byron