@article{bb8ec83b33c545688fa307460d493d4f,
title = "Current progress and open challenges for applying deep learning across the biosciences",
abstract = "Deep Learning (DL) has recently enabled unprecedented advances in one of the grand challenges in computational biology: the half-century-old problem of protein structure prediction. In this paper we discuss recent advances, limitations, and future perspectives of DL on five broad areas: protein structure prediction, protein function prediction, genome engineering, systems biology and data integration, and phylogenetic inference. We discuss each application area and cover the main bottlenecks of DL approaches, such as training data, problem scope, and the ability to leverage existing DL architectures in new contexts. To conclude, we provide a summary of the subject-specific and general challenges for DL across the biosciences.",
keywords = "Computational Biology, Deep Learning, Phylogeny, Proteins, Systems Biology",
author = "Nicolae Sapoval and Amirali Aghazadeh and Nute, {Michael G.} and Antunes, {Dinler A.} and Advait Balaji and Richard Baraniuk and Barberan, {C. J.} and Ruth Dannenfelser and Chen Dun and Mohammadamin Edrisi and Elworth, {R. A.Leo} and Bryce Kille and Anastasios Kyrillidis and Luay Nakhleh and Wolfe, {Cameron R.} and Zhi Yan and Vicky Yao and Treangen, {Todd J.}",
note = "Funding Information: A.A. is supported by the ARO (W911NF2110117). R.B. and CJ.B. are supported by NSF grants CCF-1911094, IIS-1838177, and IIS-1730574; ONR grants N00014-18-12571, N00014-20-1-2534, and MURI N00014-20-1-2787; AFOSR grant FA9550-18-1-0478; and a Vannevar Bush Faculty Fellowship, ONR grant N00014-18-1-2047. M.N and R.A.L.E. are supported by a training fellowship from the Gulf Coast Consortia, on the NLM Training Program in Biomedical Informatics & Data Science (T15LM007093). D.A.A. is partially supported by funds from the University of Houston. A.B., B.K., N.S., and T.J.T are partially supported by funds from the FunGCAT program from the Office of the Director of National Intelligence (ODNI), Intelligence Advanced Research Projects Activity (IARPA), via the Army Research Office (ARO) under Federal Award No. W911NF-17-2-0089. T.J.T is supported by NIH grant P01AI152999 and by NSF grant EF-212638. B.K. is supported by a fellowship from the National Library of Medicine Training Program in Biomedical Informatics and Data Science (5T15LM007093-30, PI: Kavraki). Z.Y., M.E., and L.N. are supported by NSF grants DBI-2030604 and IIS-2106837. R.D. and V.Y. are supported by Cancer Prevention & Research Institute of Texas (CPRIT) Award (RR190065). V.Y. is a CPRIT Scholar in Cancer Research and also supported by NIH grant RF1AG054564. A.K. is supported by NSF grants CCF-1907936, CNS-2003137. Publisher Copyright: {\textcopyright} 2022, The Author(s).",
year = "2022",
month = apr,
day = "1",
doi = "10.1038/s41467-022-29268-7",
language = "English (US)",
volume = "13",
pages = "1728",
journal = "Nature Communications",
issn = "2041-1723",
publisher = "Nature Publishing Group",
number = "1",
}