@inproceedings{e3e1a6d146b34defb8ef5c344b5bd082,
title = "A slurm simulator: Implementation and parametric analysis",
abstract = "Slurm is an open-source resource manager for HPC that provides high configurability for inhomogeneous resources and job scheduling. Various Slurm parametric settings can significantly influence HPC resource utilization and job wait time, however in many cases it is hard to judge how these options will affect the overall HPC resource performance. The Slurm simulator can be a very helpful tool to aid parameter selection for a particular HPC resource. Here, we report our implementation of a Slurm simulator and the impact of parameter choice on HPC resource performance. The simulator is based on a real Slurm instance with modifications to allow simulation of historical jobs and to improve the simulation speed. The simulator speed heavily depends on job composition, HPC resource size and Slurm configuration. For an 8000 cores heterogeneous cluster, we achieve about 100 times acceleration, e.g. 20 days can be simulated in 5h. Several parameters affecting job placement were studied. Disabling node sharing on our 8000 core cluster showed a 45\% increase in the time needed to complete the same workload. For a large system (>6000 nodes) comprised of two distinct sub-clusters, two separate Slurm controllers and adding node sharing can cut waiting times nearly in half.",
keywords = "Batch jobs scheduler, HPC, SLURM, Simulator",
author = "Simakov, \{Nikolay A.\} and Innus, \{Martins D.\} and Jones, \{Matthew D.\} and DeLeon, \{Robert L.\} and White, \{Joseph P.\} and Gallo, \{Steven M.\} and Patra, \{Abani K.\} and Furlani, \{Thomas R.\}",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG 2018.; 8th International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computing Systems, PMBS 2017 ; Conference date: 13-11-2017 Through 13-11-2017",
year = "2018",
doi = "10.1007/978-3-319-72971-8\_10",
language = "English",
isbn = "9783319729701",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "197--217",
editor = "Simon Hammond and Stephen Jarvis and Steven Wright",
booktitle = "High Performance Computing Systems. Performance Modeling, Benchmarking, and Simulation - 8th International Workshop, Proceedings",
address = "Germany",
}