@article{zaragoza_bui_widyastuti_mehrabi_cang_sha_grosberg_nie_2024, title={LMNA-Related Dilated Cardiomyopathy: Single-Cell Transcriptomics during Patient-derived iPSC Differentiation Support Cell type and Lineage-specific Dysregulation of Gene Expression and Development for Cardiomyocytes and Epicardium-Derived Cells with Lamin A/C Haploinsufficiency}, url={https://doi.org/10.1101/2024.06.12.598335}, DOI={10.1101/2024.06.12.598335}, abstractNote={-Related Dilated Cardiomyopathy (DCM) is an autosomal-dominant genetic condition with cardiomyocyte and conduction system dysfunction often resulting in heart failure or sudden death. The condition is caused by mutation in the Lamin A/C (}, author={Zaragoza, Michael V. and Bui, Thuy-Anh and Widyastuti, Halida P. and Mehrabi, Mehrsa and Cang, Zixuan and Sha, Yutong and Grosberg, Anna and Nie, Qing}, year={2024}, month={Jun} } @article{huynh_cang_2024, title={Topological and geometric analysis of cell states in single-cell transcriptomic data}, url={https://doi.org/10.1093/bib/bbae176}, DOI={10.1093/bib/bbae176}, abstractNote={Abstract}, author={Huynh, Tram and Cang, Zixuan}, year={2024}, month={Apr} } @article{cang_nie_2023, title={A mathematical method and software for spatially mapping intercellular communication}, volume={1}, ISSN={["1548-7105"]}, DOI={10.1038/s41592-022-01729-3}, journal={NATURE METHODS}, author={Cang, Zixuan and Nie, Qing}, year={2023}, month={Jan} } @article{dover_cang_ma_nie_vershynin_2023, title={AVIDA: An alternating method for visualizing and integrating data}, volume={68}, ISSN={1877-7511}, DOI={10.1016/j.jocs.2023.101998}, abstractNote={High-dimensional multimodal data arise in many scientific fields. The integration of multimodal data becomes challenging when there is no known correspondence between the samples and the features of different datasets. To tackle this challenge, we introduce AVIDA, a framework for simultaneously performing data alignment and dimension reduction. In the numerical experiments, Gromov–Wasserstein optimal transport and t-distributed stochastic neighbor embedding are used as the alignment and dimension reduction modules respectively. We show that by alternating dimension reduction and alignment, AVIDA aligns the representations of high-dimensional datasets without common features with four synthesized datasets and two real multimodal single-cell datasets. Compared to several existing methods, we demonstrate that AVIDA better preserves structures of individual datasets, especially distinct local structures in the joint low-dimensional representation, while achieving comparable alignment performance. Such a property is important in multimodal single-cell data analysis as some biological processes are uniquely captured by one of the datasets. In general applications, other methods can be used for the alignment and dimension reduction modules.}, journal={JOURNAL OF COMPUTATIONAL SCIENCE}, author={Dover, Kathryn and Cang, Zixuan and Ma, Anna and Nie, Qing and Vershynin, Roman}, year={2023}, month={Apr}, pages={101998} } @article{cang_zhao_almet_stabell_ramos_plikus_atwood_nie_2023, title={Screening cell-cell communication in spatial transcriptomics via collective optimal transport}, volume={1}, ISSN={["1548-7105"]}, url={https://doi.org/10.1038/s41592-022-01728-4}, DOI={10.1038/s41592-022-01728-4}, abstractNote={Abstract}, journal={NATURE METHODS}, author={Cang, Zixuan and Zhao, Yanxiang and Almet, Axel A. A. and Stabell, Adam and Ramos, Raul and Plikus, Maksim V. V. and Atwood, Scott X. X. and Nie, Qing}, year={2023}, month={Jan} } @article{walker_cang_ren_bourgain-chang_nie_2022, title={Deciphering tissue structure and function using spatial transcriptomics}, url={https://doi.org/10.1038/s42003-022-03175-5}, DOI={10.1038/s42003-022-03175-5}, abstractNote={Abstract}, journal={Communications Biology}, author={Walker, Benjamin L. and Cang, Zixuan and Ren, Honglei and Bourgain-Chang, Eric and Nie, Qing}, year={2022}, month={Mar} } @article{ren_walker_cang_nie_2022, title={Identifying multicellular spatiotemporal organization of cells with SpaceFlow}, volume={13}, ISSN={["2041-1723"]}, url={https://doi.org/10.1038/s41467-022-31739-w}, DOI={10.1038/s41467-022-31739-w}, abstractNote={Abstract}, number={1}, journal={NATURE COMMUNICATIONS}, author={Ren, Honglei and Walker, Benjamin L. and Cang, Zixuan and Nie, Qing}, year={2022}, month={Jul} } @article{cang_zhao_almet_stabell_ramos_plikus_atwood_nie_2022, title={Screening cell-cell communication in spatial transcriptomics via collective optimal transport}, url={https://doi.org/10.1101/2022.08.24.505185}, DOI={10.1101/2022.08.24.505185}, abstractNote={Abstract}, author={Cang, Zixuan and Zhao, Yanxiang and Almet, Axel A. and Stabell, Adam and Ramos, Raul and Plikus, Maksim and Atwood, Scott X. and Nie, Qing}, year={2022}, month={Aug} } @article{cang_wang_wang_cho_holmes_nie_2021, title={A multiscale model via single-cell transcriptomics reveals robust patterning mechanisms during early mammalian embryo development}, volume={17}, url={https://doi.org/10.1371/journal.pcbi.1008571}, DOI={10.1371/journal.pcbi.1008571}, abstractNote={During early mammalian embryo development, a small number of cells make robust fate decisions at particular spatial locations in a tight time window to form inner cell mass (ICM), and later epiblast (Epi) and primitive endoderm (PE). While recent single-cell transcriptomics data allows scrutinization of heterogeneity of individual cells, consistent spatial and temporal mechanisms the early embryo utilize to robustly form the Epi/PE layers from ICM remain elusive. Here we build a multiscale three-dimensional model for mammalian embryo to recapitulate the observed patterning process from zygote to late blastocyst. By integrating the spatiotemporal information reconstructed from multiple single-cell transcriptomic datasets, the data-informed modeling analysis suggests two major processes critical to the formation of Epi/PE layers: a selective cell-cell adhesion mechanism (via EphA4/EphrinB2) for fate-location coordination and a temporal attenuation mechanism of cell signaling (via Fgf). Spatial imaging data and distinct subsets of single-cell gene expression data are then used to validate the predictions. Together, our study provides a multiscale framework that incorporates single-cell gene expression datasets to analyze gene regulations, cell-cell communications, and physical interactions among cells in complex geometries at single-cell resolution, with direct application to late-stage development of embryogenesis.}, number={3}, journal={PLOS Computational Biology}, publisher={Public Library of Science (PLoS)}, author={Cang, Zixuan and Wang, Yangyang and Wang, Qixuan and Cho, Ken W. Y. and Holmes, William and Nie, Qing}, editor={Umulis, DavidEditor}, year={2021}, month={Mar}, pages={e1008571} } @article{zhu_du_nomura_gao_cang_wei_gordon_gurevitz_groome_dong_2021, title={Charge substitutions at the voltage-sensing module of domain III enhance actions of site-3 and site-4 toxins on an insect sodium channel}, volume={137}, url={https://doi.org/10.1016/j.ibmb.2021.103625}, DOI={10.1016/j.ibmb.2021.103625}, abstractNote={Scorpion α-toxins bind at the pharmacologically-defined site-3 on the sodium channel and inhibit channel inactivation by preventing the outward movement of the voltage sensor in domain IV (IVS4), whereas scorpion β-toxins bind at site-4 on the sodium channel and enhance channel activation by trapping the voltage sensor of domain II (IIS4) in its outward position. However, limited information is available on the role of the voltage-sensing modules (VSM, comprising S1-S4) of domains I and III in toxin actions. We have previously shown that charge reversing substitutions of the innermost positively-charged residues in IIIS4 (R4E, R5E) increase the activity of an insect-selective site-4 scorpion toxin, Lqh-dprIT3-c, on BgNav1-1a, a cockroach sodium channel. Here we show that substitutions R4E and R5E in IIIS4 also increase the activity of two site-3 toxins, LqhαIT from Leiurusquinquestriatus hebraeus and insect-selective Av3 from Anemonia viridis. Furthermore, charge reversal of either of two conserved negatively-charged residues, D1K and E2K, in IIIS2 also increase the action of the site-3 and site-4 toxins. Homology modeling suggests that S2-D1 and S2-E2 interact with S4-R4 and S4-R5 in the VSM of domain III (III-VSM), respectively, in the activated state of the channel. However, charge swapping between S2-D1 and S4-R4 had no compensatory effects on gating or toxin actions, suggesting that charged residue interactions are complex. Collectively, our results highlight the involvement of III-VSM in the actions of both site 3 and site 4 toxins, suggesting that charge reversing substitutions in III-VSM allosterically facilitate IIS4 or IVS4 voltage sensor trapping by these toxins.}, journal={Insect Biochemistry and Molecular Biology}, publisher={Elsevier BV}, author={Zhu, Qing and Du, Yuzhe and Nomura, Yoshiko and Gao, Rong and Cang, Zixuan and Wei, Guo-Wei and Gordon, Dalia and Gurevitz, Michael and Groome, James and Dong, Ke}, year={2021}, month={Oct}, pages={103625} } @article{maseda_cang_nie_2021, title={DEEPsc: A Deep Learning-Based Map Connecting Single-Cell Transcriptomics and Spatial Imaging Data}, volume={12}, url={http://dx.doi.org/10.3389/fgene.2021.636743}, DOI={10.3389/fgene.2021.636743}, abstractNote={Single-cell RNA sequencing (scRNA-seq) data provides unprecedented information on cell fate decisions; however, the spatial arrangement of cells is often lost. Several recent computational methods have been developed to impute spatial information onto a scRNA-seq dataset through analyzing known spatial expression patterns of a small subset of genes known as a reference atlas. However, there is a lack of comprehensive analysis of the accuracy, precision, and robustness of the mappings, along with the generalizability of these methods, which are often designed for specific systems. We present a system-adaptive deep learning-based method (DEEPsc) to impute spatial information onto a scRNA-seq dataset from a given spatial reference atlas. By introducing a comprehensive set of metrics that evaluate the spatial mapping methods, we compare DEEPsc with four existing methods on four biological systems. We find that while DEEPsc has comparable accuracy to other methods, an improved balance between precision and robustness is achieved. DEEPsc provides a data-adaptive tool to connect scRNA-seq datasets and spatial imaging datasets to analyze cell fate decisions. Our implementation with a uniform API can serve as a portal with access to all the methods investigated in this work for spatial exploration of cell fate decisions in scRNA-seq data. All methods evaluated in this work are implemented as an open-source software with a uniform interface.}, journal={Frontiers in Genetics}, publisher={Frontiers Media SA}, author={Maseda, Floyd and Cang, Zixuan and Nie, Qing}, year={2021}, month={Mar} } @article{tatarakis_cang_wu_sharma_karikomi_maclean_nie_schilling_2021, title={Single-cell transcriptomic analysis of zebrafish cranial neural crest reveals spatiotemporal regulation of lineage decisions during development}, url={https://doi.org/10.1016/j.celrep.2021.110140}, DOI={10.1016/j.celrep.2021.110140}, abstractNote={Neural crest (NC) cells migrate throughout vertebrate embryos to give rise to a huge variety of cell types, but when and where lineages emerge and their regulation remain unclear. We have performed single-cell RNA sequencing (RNA-seq) of cranial NC cells from the first pharyngeal arch in zebrafish over several stages during migration. Computational analysis combining pseudotime and real-time data reveals that these NC cells first adopt a transitional state, becoming specified mid-migration, with the first lineage decisions being skeletal and pigment, followed by neural and glial progenitors. In addition, by computationally integrating these data with RNA-seq data from a transgenic Wnt reporter line, we identify gene cohorts with similar temporal responses to Wnts during migration and show that one, Atp6ap2, is required for melanocyte differentiation. Together, our results show that cranial NC cell lineages arise progressively and uncover a series of spatially restricted cell interactions likely to regulate such cell-fate decisions.}, journal={Cell Reports}, author={Tatarakis, David and Cang, Zixuan and Wu, Xiaojun and Sharma, Praveer P. and Karikomi, Matthew and MacLean, Adam L. and Nie, Qing and Schilling, Thomas F.}, year={2021}, month={Dec} } @article{almet_cang_jin_nie_2021, title={The landscape of cell–cell communication through single-cell transcriptomics}, url={https://doi.org/10.1016/j.coisb.2021.03.007}, DOI={10.1016/j.coisb.2021.03.007}, abstractNote={Cell-cell communication is a fundamental process that shapes biological tissue. Historically, studies of cell-cell communication have been feasible for one or two cell types and a few genes. With the emergence of single-cell transcriptomics, we are now able to examine the genetic profiles of individual cells at unprecedented scale and depth. The availability of such data presents an exciting opportunity to construct a more comprehensive description of cell-cell communication. This review discusses the recent explosion of methods that have been developed to infer cell-cell communication from non-spatial and spatial single-cell transcriptomics, two promising technologies which have complementary strengths and limitations. We propose several avenues to propel this rapidly expanding field forward in meaningful ways.}, journal={Current Opinion in Systems Biology}, author={Almet, Axel A. and Cang, Zixuan and Jin, Suoqin and Nie, Qing}, year={2021}, month={Jun} } @article{nguyen_cang_wei_2020, title={A review of mathematical representations of biomolecular data}, url={https://doi.org/10.1039/C9CP06554G}, DOI={10.1039/C9CP06554G}, abstractNote={Recently, machine learning (ML) has established itself in various worldwide benchmarking competitions in computational biology, including Critical Assessment of Structure Prediction (CASP) and Drug Design Data Resource (D3R) Grand Challenges.}, journal={Physical Chemistry Chemical Physics}, publisher={Royal Society of Chemistry (RSC)}, author={Nguyen, Duc Duy and Cang, Zixuan and Wei, Guo-Wei}, year={2020} } @article{wang_cang_wei_2020, title={A topology-based network tree for the prediction of protein–protein binding affinity changes following mutation}, url={https://doi.org/10.1038/s42256-020-0149-6}, DOI={10.1038/s42256-020-0149-6}, abstractNote={The ability to predict protein–protein interactions is crucial to our understanding of a wide range of biological activities and functions in the human body, and for guiding drug discovery. Despite considerable efforts to develop suitable computational methods, predicting protein–protein interaction binding affinity changes following mutation (ΔΔG) remains a severe challenge. Algebraic topology, a champion in recent worldwide competitions for protein–ligand binding affinity predictions, is a promising approach to simplifying the complexity of biological structures. Here we introduce element- and site-specific persistent homology (a new branch of algebraic topology) to simplify the structural complexity of protein–protein complexes and embed crucial biological information into topological invariants. We also propose a new deep learning algorithm called NetTree to take advantage of convolutional neural networks and gradient-boosting trees. A topology-based network tree is constructed by integrating the topological representation and NetTree for predicting protein–protein interaction ΔΔG. Tests on major benchmark datasets indicate that the proposed topology-based network tree is an important improvement over the current state of the art in predicting ΔΔG. Persistent homology provides an efficient approach to simplifying the complexity of protein structure. Wang et al. combine this approach with convolutional neural networks and gradient-boosting trees to improve predictions of protein–protein interactions.}, journal={Nature Machine Intelligence}, author={Wang, Menglun and Cang, Zixuan and Wei, Guo-Wei}, year={2020}, month={Feb} } @article{haensel_jin_sun_cinco_dragan_nguyen_cang_gong_vu_maclean_et al._2020, title={Defining Epidermal Basal Cell States during Skin Homeostasis and Wound Healing Using Single-Cell Transcriptomics}, volume={30}, url={http://dx.doi.org/10.1016/j.celrep.2020.02.091}, DOI={10.1016/j.celrep.2020.02.091}, abstractNote={Our knowledge of transcriptional heterogeneities in epithelial stem and progenitor cell compartments is limited. Epidermal basal cells sustain cutaneous tissue maintenance and drive wound healing. Previous studies have probed basal cell heterogeneity in stem and progenitor potential, but a comprehensive dissection of basal cell dynamics during differentiation is lacking. Using single-cell RNA sequencing coupled with RNAScope and fluorescence lifetime imaging, we identify three non-proliferative and one proliferative basal cell state in homeostatic skin that differ in metabolic preference and become spatially partitioned during wound re-epithelialization. Pseudotemporal trajectory and RNA velocity analyses predict a quasi-linear differentiation hierarchy where basal cells progress from Col17a1Hi/Trp63Hi state to early-response state, proliferate at the juncture of these two states, or become growth arrested before differentiating into spinous cells. Wound healing induces plasticity manifested by dynamic basal-spinous interconversions at multiple basal transcriptional states. Our study provides a systematic view of epidermal cellular dynamics, supporting a revised “hierarchical-lineage” model of homeostasis.}, number={11}, journal={Cell Reports}, publisher={Elsevier BV}, author={Haensel, Daniel and Jin, Suoqin and Sun, Peng and Cinco, Rachel and Dragan, Morgan and Nguyen, Quy and Cang, Zixuan and Gong, Yanwen and Vu, Remy and MacLean, Adam L. and et al.}, year={2020}, month={Mar}, pages={3932–3947.e6} } @article{cang_munch_wei_2020, title={Evolutionary homology on coupled dynamical systems with applications to protein flexibility analysis}, volume={4}, url={http://dx.doi.org/10.1007/s41468-020-00057-9}, DOI={10.1007/s41468-020-00057-9}, abstractNote={While the spatial topological persistence is naturally constructed from a radius-based filtration, it has hardly been derived from a temporal filtration. Most topological models are designed for the global topology of a given object as a whole. There is no method reported in the literature for the topology of an individual component in an object to the best of our knowledge. For many problems in science and engineering, the topology of an individual component is important for describing its properties. We propose evolutionary homology (EH) constructed via a time evolution-based filtration and topological persistence. Our approach couples a set of dynamical systems or chaotic oscillators by the interactions of a physical system, such as a macromolecule. The interactions are approximated by weighted graph Laplacians. Simplices, simplicial complexes, algebraic groups and topological persistence are defined on the coupled trajectories of the chaotic oscillators. The resulting EH gives rise to time-dependent topological invariants or evolutionary barcodes for an individual component of the physical system, revealing its topology-function relationship. In conjunction with Wasserstein metrics, the proposed EH is applied to protein flexibility analysis, an important problem in computational biophysics. Numerical results for the B-factor prediction of a benchmark set of 364 proteins indicate that the proposed EH outperforms all the other state-of-the-art methods in the field.}, number={4}, journal={Journal of Applied and Computational Topology}, publisher={Springer Science and Business Media LLC}, author={Cang, Zixuan and Munch, Elizabeth and Wei, Guo-Wei}, year={2020}, month={Dec}, pages={481–507} } @article{cang_nie_2020, title={Inferring spatial and signaling relationships between cells from single cell transcriptomic data}, url={https://doi.org/10.1038/s41467-020-15968-5}, DOI={10.1038/s41467-020-15968-5}, abstractNote={Abstract}, journal={Nature Communications}, author={Cang, Zixuan and Nie, Qing}, year={2020}, month={Apr} } @article{cang_wei_2020, title={Persistent Cohomology for Data With Multicomponent Heterogeneous Information}, volume={2}, url={https://doi.org/10.1137/19M1272226}, DOI={10.1137/19M1272226}, abstractNote={Persistent homology is a powerful tool for characterizing the topology of a data set at various geometric scales. When applied to the description of molecular structures, persistent homology can capture the multiscale geometric features and reveal certain interaction patterns in terms of topological invariants. However, in addition to the geometric information, there is a wide variety of nongeometric information of molecular structures, such as element types, atomic partial charges, atomic pairwise interactions, and electrostatic potential functions, that is not described by persistent homology. Although element-specific homology and electrostatic persistent homology can encode some nongeometric information into geometry based topological invariants, it is desirable to have a mathematical paradigm to systematically embed both geometric and nongeometric information, i.e., multicomponent heterogeneous information, into unified topological representations. To this end, we propose a persistent cohomology based framework for the enriched representation of data. In our framework, nongeometric information can either be distributed globally or reside locally on the datasets in the geometric sense and can be properly defined on topological spaces, i.e., simplicial complexes. Using the proposed persistent cohomology based framework, enriched barcodes are extracted from datasets to represent heterogeneous information. We consider a variety of datasets to validate the present formulation and illustrate the usefulness of the proposed method based on persistent cohomology. It is found that the proposed framework outperforms or at least matches the state-of-the-art methods in the protein-ligand binding affinity prediction from massive biomolecular datasets without resorting to any deep learning formulation.}, number={2}, journal={SIAM Journal on Mathematics of Data Science}, publisher={Society for Industrial & Applied Mathematics (SIAM)}, author={Cang, Zixuan and Wei, Guo-Wei}, year={2020}, month={Jan}, pages={396–418} } @article{guo_cang_yao_kim_deans_wei_kang_hong_2020, title={Structural cavities are critical to balancing stability and activity of a membrane-integral enzyme}, volume={117}, url={https://doi.org/10.1073/pnas.1917770117}, DOI={10.1073/pnas.1917770117}, abstractNote={Significance}, number={36}, journal={Proceedings of the National Academy of Sciences}, publisher={Proceedings of the National Academy of Sciences}, author={Guo, Ruiqiong and Cang, Zixuan and Yao, Jiaqi and Kim, Miyeon and Deans, Erin and Wei, Guowei and Kang, Seung-gu and Hong, Heedeok}, year={2020}, month={Sep}, pages={22146–22156} } @article{nguyen_cang_wu_wang_cao_wei_2019, title={Mathematical deep learning for pose and binding affinity prediction and ranking in D3R Grand Challenges}, volume={33}, url={http://dx.doi.org/10.1007/s10822-018-0146-6}, DOI={10.1007/s10822-018-0146-6}, abstractNote={Advanced mathematics, such as multiscale weighted colored subgraph and element specific persistent homology, and machine learning including deep neural networks were integrated to construct mathematical deep learning models for pose and binding affinity prediction and ranking in the last two D3R Grand Challenges in computer-aided drug design and discovery. D3R Grand Challenge 2 focused on the pose prediction, binding affinity ranking and free energy prediction for Farnesoid X receptor ligands. Our models obtained the top place in absolute free energy prediction for free energy set 1 in stage 2. The latest competition, D3R Grand Challenge 3 (GC3), is considered as the most difficult challenge so far. It has five subchallenges involving Cathepsin S and five other kinase targets, namely VEGFR2, JAK2, p38-α, TIE2, and ABL1. There is a total of 26 official competitive tasks for GC3. Our predictions were ranked 1st in 10 out of these 26 tasks.}, number={1}, journal={Journal of Computer-Aided Molecular Design}, publisher={Springer Science and Business Media LLC}, author={Nguyen, Duc Duy and Cang, Zixuan and Wu, Kedi and Wang, Menglun and Cao, Yin and Wei, Guo-Wei}, year={2019}, month={Jan}, pages={71–82} } @article{cang_wei_2018, title={Integration of element specific persistent homology and machine learning for protein‐ligand binding affinity prediction}, url={https://doi.org/10.1002/cnm.2914}, DOI={10.1002/cnm.2914}, abstractNote={Abstract}, journal={International Journal for Numerical Methods in Biomedical Engineering}, author={Cang, Zixuan and Wei, Guo‐Wei}, year={2018}, month={Feb} } @article{zhao_cang_tong_wei_2018, title={Protein pocket detection via convex hull surface evolution and associated Reeb graph}, volume={34}, url={http://dx.doi.org/10.1093/bioinformatics/bty598}, DOI={10.1093/bioinformatics/bty598}, abstractNote={Abstract}, number={17}, journal={Bioinformatics}, publisher={Oxford University Press (OUP)}, author={Zhao, Rundong and Cang, Zixuan and Tong, Yiying and Wei, Guo-Wei}, year={2018}, month={Sep}, pages={i830–i837} } @article{cang_mu_wei_2018, title={Representability of algebraic topology for biomolecules in machine learning based scoring and virtual screening}, volume={14}, url={https://doi.org/10.1371/journal.pcbi.1005929}, DOI={10.1371/journal.pcbi.1005929}, abstractNote={This work introduces a number of algebraic topology approaches, including multi-component persistent homology, multi-level persistent homology, and electrostatic persistence for the representation, characterization, and description of small molecules and biomolecular complexes. In contrast to the conventional persistent homology, multi-component persistent homology retains critical chemical and biological information during the topological simplification of biomolecular geometric complexity. Multi-level persistent homology enables a tailored topological description of inter- and/or intra-molecular interactions of interest. Electrostatic persistence incorporates partial charge information into topological invariants. These topological methods are paired with Wasserstein distance to characterize similarities between molecules and are further integrated with a variety of machine learning algorithms, including k-nearest neighbors, ensemble of trees, and deep convolutional neural networks, to manifest their descriptive and predictive powers for protein-ligand binding analysis and virtual screening of small molecules. Extensive numerical experiments involving 4,414 protein-ligand complexes from the PDBBind database and 128,374 ligand-target and decoy-target pairs in the DUD database are performed to test respectively the scoring power and the discriminatory power of the proposed topological learning strategies. It is demonstrated that the present topological learning outperforms other existing methods in protein-ligand binding affinity prediction and ligand-decoy discrimination.}, number={1}, journal={PLOS Computational Biology}, publisher={Public Library of Science (PLoS)}, author={Cang, Zixuan and Mu, Lin and Wei, Guo-Wei}, editor={Peng, JianEditor}, year={2018}, month={Jan}, pages={e1005929} } @article{cang_wei_2017, title={Analysis and prediction of protein folding energy changes upon mutation by element specific persistent homology}, volume={7}, url={http://dx.doi.org/10.1093/bioinformatics/btx460}, DOI={10.1093/bioinformatics/btx460}, abstractNote={Motivation Site directed mutagenesis is widely used to understand the structure and function of biomolecules. Computational prediction of mutation impacts on protein stability offers a fast, economical and potentially accurate alternative to laboratory mutagenesis. Most existing methods rely on geometric descriptions, this work introduces a topology based approach to provide an entirely new representation of mutation induced protein stability changes that could not be obtained from conventional techniques. Results Topology based mutation predictor (T‐MP) is introduced to dramatically reduce the geometric complexity and number of degrees of freedom of proteins, while element specific persistent homology is proposed to retain essential biological information. The present approach is found to outperform other existing methods in the predictions of globular protein stability changes upon mutation. A Pearson correlation coefficient of 0.82 with an RMSE of 0.92 kcal/mol is obtained on a test set of 350 mutation samples. For the prediction of membrane protein stability changes upon mutation, the proposed topological approach has a 84% higher Pearson correlation coefficient than the current state‐of‐the‐art empirical methods, achieving a Pearson correlation of 0.57 and an RMSE of 1.09 kcal/mol in a 5‐fold cross validation on a set of 223 membrane protein mutation samples. Availability and implementation http://weilab.math.msu.edu/TML/TML‐MP/ Contact wei@math.msu.edu Supplementary information Supplementary data are available at Bioinformatics online.}, journal={Bioinformatics}, publisher={Oxford University Press (OUP)}, author={Cang, Zixuan and Wei, Guowei}, year={2017}, month={Jul} } @article{cang_wei_2017, title={TopologyNet: Topology based deep convolutional and multi-task neural networks for biomolecular property predictions}, volume={13}, DOI={10.1371/journal.pcbi.1005690}, abstractNote={Although deep learning approaches have had tremendous success in image, video and audio processing, computer vision, and speech recognition, their applications to three-dimensional (3D) biomolecular structural data sets have been hindered by the geometric and biological complexity. To address this problem we introduce the element-specific persistent homology (ESPH) method. ESPH represents 3D complex geometry by one-dimensional (1D) topological invariants and retains important biological information via a multichannel image-like representation. This representation reveals hidden structure-function relationships in biomolecules. We further integrate ESPH and deep convolutional neural networks to construct a multichannel topological neural network (TopologyNet) for the predictions of protein-ligand binding affinities and protein stability changes upon mutation. To overcome the deep learning limitations from small and noisy training sets, we propose a multi-task multichannel topological convolutional neural network (MM-TCNN). We demonstrate that TopologyNet outperforms the latest methods in the prediction of protein-ligand binding affinities, mutation induced globular protein folding free energy changes, and mutation induced membrane protein folding free energy changes. Availability: weilab.math.msu.edu/TDL/}, number={7}, journal={PLOS Computational Biology}, publisher={Public Library of Science (PLoS)}, author={Cang, Zixuan and Wei, Guo-Wei}, editor={Dunbrack, Roland L.Editor}, year={2017}, month={Jul}, pages={e1005690} } @article{cang_mu_wu_opron_xia_wei_2015, title={A topological approach for protein classification}, volume={3}, url={http://dx.doi.org/10.1515/mlbmb-2015-0009}, DOI={10.1515/mlbmb-2015-0009}, abstractNote={Abstract}, number={1}, journal={Computational and Mathematical Biophysics}, publisher={Walter de Gruyter GmbH}, author={Cang, Zixuan and Mu, Lin and Wu, Kedi and Opron, Kristopher and Xia, Kelin and Wei, Guo-Wei}, year={2015}, month={Nov} }