@inproceedings{e306ed693d4a4dd1af998746bbb52d11,
title = "A weighted aggregating SGD for scalable parallelization in deep learning",
abstract = "We investigate the stochastic optimization problem and develop a scalable parallel computing algorithm for deep learning tasks. The key of our study involves a reformation of the objective function for the stochastic optimization in neural network models. We propose a novel update rule, named weighted aggregating stochastic gradient decent, after theoretically analyzing the characteristics of the newly formalized objective function. The new rule introduces a weighted aggregation scheme based on the performance of local workers and does not require a center variable. It assesses the relative importance of local workers and accepts them according to their contributions. Our new rule also allows the implementation of both synchronous and asynchronous parallelization and can result in varying convergence rates. For method evaluation, we benchmark our schemes against the mainstream algorithms, including the elastic averaging SGD in training deep neural networks for classification tasks. We conduct extensive experiments on several classic datasets, and the results confirm the strength of our scheme in accelerating the training of deep architecture and scalable parallelization.",
keywords = "Deep learning, Parallel Computing, Stochastic gradient descent",
author = "Pengzhan Guo and Zeyang Ye and Keli Xiao",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 19th IEEE International Conference on Data Mining, ICDM 2019 ; Conference date: 08-11-2019 Through 11-11-2019",
year = "2019",
month = nov,
doi = "10.1109/ICDM.2019.00126",
language = "English",
series = "Proceedings - IEEE International Conference on Data Mining, ICDM",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1072--1077",
editor = "Jianyong Wang and Kyuseok Shim and Xindong Wu",
booktitle = "Proceedings - 19th IEEE International Conference on Data Mining, ICDM 2019",
}