@inproceedings{becec65d8a4f4032a1c05550e1c0b7ff,
title = "Model-driven autoscaling for hadoop clusters",
abstract = "In this paper, we present the design and implementation of a model-driven auto scaling solution for Hadoop clusters. We first develop novel performance models for Hadoop workloads that relate job completion times to various workload and system parameters such as input size and resource allocation. We then employ statistical techniques to tune the models for specific workloads, including Terasort and K-means. Finally, we employ the tuned models to determine the resources required to successfully complete the Hadoop jobs as per the user-specified response time SLA. We implement our solution on an Open Stack-based cloud cluster running Hadoop. Our experimental results across different workloads demonstrate the auto scaling capabilities of our solution, and enable significant resource savings without compromising performance.",
keywords = "Auto Scaling, Hadoop, Performance Modeling",
author = "Anshul Gandhi and Parijat Dube and Andrzej Kochut and Li Zhang",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; 12th IEEE International Conference on Autonomic Computing, ICAC 2015 ; Conference date: 07-07-2015 Through 10-07-2015",
year = "2015",
month = sep,
day = "14",
doi = "10.1109/ICAC.2015.50",
language = "English",
series = "Proceedings - IEEE International Conference on Autonomic Computing, ICAC 2015",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "155--156",
editor = "Philippe Lalanda and Samuel Kounev and Ada Diaconescu and Lucy Cherkasova",
booktitle = "Proceedings - IEEE International Conference on Autonomic Computing, ICAC 2015",
}