@inproceedings{dad372c2b5394409bc8366c2466b9a83,
title = "Scientific Formula Retrieval via Tree Embeddings",
abstract = "Exploiting the ever-growing corpus of scientific content calls for new ways and means to effectively organize, search, and retrieve scientific formulae. We propose a new data-driven framework for retrieving similar scientific formulae via learned formula representations based on tree embeddings. FORTE (for FOrmula Representation learning via Tree Embeddings) leverages operator tree representations of symbolic scientific formulae (such as math equations) to explicitly capture their inherent structural and semantic properties. FORTE employs i) a tree encoder that encodes the formula's operator tree into an embedding vector and ii) a tree decoder that directly generates a formula's operator tree from the embedding vector. We also develop a novel tree beam search algorithm that improves the quality of the decoded operator trees. We demonstrate that FORTE (sometimes significantly) outperforms various baseline methods on formula reconstruction and retrieval using a real-world dataset comprising 770k scientific formulae collected on-line.",
keywords = "generative models, information retrieval, representation learning, scientific formulae understanding, tree-structured data",
author = "Zichao Wang and Mengxue Zhang and Baraniuk, {Richard G.} and Lan, {Andrew S.}",
note = "Funding Information: This work is supported by NSF grants 1842378, 1937134, 1917713, 2118706, ONR grant N0014-20-1-2534, AFOSR grant FA9550-18-1-0478, and a Vannevar Bush Faculty Fellowship, ONR grant N00014-18-1-2047. Publisher Copyright: {\textcopyright} 2021 IEEE.; 2021 IEEE International Conference on Big Data, Big Data 2021 ; Conference date: 15-12-2021 Through 18-12-2021",
year = "2021",
doi = "10.1109/BigData52589.2021.9671942",
language = "English (US)",
series = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1493--1503",
editor = "Yixin Chen and Heiko Ludwig and Yicheng Tu and Usama Fayyad and Xingquan Zhu and Hu, {Xiaohua Tony} and Suren Byna and Xiong Liu and Jianping Zhang and Shirui Pan and Vagelis Papalexakis and Jianwu Wang and Alfredo Cuzzocrea and Carlos Ordonez",
booktitle = "Proceedings - 2021 IEEE International Conference on Big Data, Big Data 2021",
address = "United States",
}