@article{bortner_gross_meshkat_shiu_sullivant_2023, title={Identifiability of linear compartmental tree models and a general formula for input-output equations}, volume={146}, ISSN={["1090-2074"]}, DOI={10.1016/j.aam.2023.102490}, abstractNote={A foundational question in the theory of linear compartmental models is how to assess whether a model is structurally identifiable – that is, whether parameter values can be inferred from noiseless data – directly from the combinatorics of the model. Our main result completely answers this question for models (with one input and one output) in which the underlying graph is a bidirectional tree; moreover, identifiability of such models can be verified visually. Models of this structure include two families of models often appearing in biological applications: catenary and mammillary models. Our analysis of such models is enabled by two supporting results, which are significant in their own right. One result gives the first general formula for the coefficients of input-output equations (certain equations that can be used to determine identifiability) that allows for input and output to be in distinct compartments. In another supporting result, we prove that identifiability is preserved when a model is enlarged and altered in specific ways involving adding a new compartment with a bidirected edge to an existing compartment.}, journal={ADVANCES IN APPLIED MATHEMATICS}, author={Bortner, Cashous and Gross, Elizabeth and Meshkat, Nicolette and Shiu, Anne and Sullivant, Seth}, year={2023}, month={May} } @article{stanley_sullivant_2023, title={On mixing behavior of a family of random walks determined by a linear recurrence}, volume={346}, ISSN={["1872-681X"]}, DOI={10.1016/j.disc.2022.113166}, abstractNote={We study random walks on the integers mod Gn that are determined by an integer sequence {Gn}n≥1 generated by a linear recurrence relation. Fourier analysis provides explicit formulas to compute the eigenvalues of the transition matrices and we use this to bound the mixing time of the random walks.}, number={1}, journal={DISCRETE MATHEMATICS}, author={Stanley, Caprice and Sullivant, Seth}, year={2023}, month={Jan} } @article{coons_sullivant_2023, title={The h*-polynomial of the order polytope of the zig-zag poset}, volume={30}, ISSN={["1077-8926"]}, DOI={10.37236/11526}, abstractNote={ We construct a family of shellings for the canonical triangulation of the order polytope of the zig-zag poset. This gives a new combinatorial interpretation for the coefficients in the numerator of the Ehrhart series of this order polytope in terms of the swap statistic on alternating permutations. We also offer an alternate proof of this result using the techniques of rank selection. Finally, we show that the sequence of coefficients of the numerator of this Ehrhart series is symmetric and unimodal. }, number={2}, journal={ELECTRONIC JOURNAL OF COMBINATORICS}, author={Coons, Jane Ivy and Sullivant, Seth}, year={2023}, month={Jun} } @article{misra_sullivant_2022, title={Directed Gaussian graphical models with toric vanishing ideals}, volume={138}, ISSN={["1090-2074"]}, DOI={10.1016/j.aam.2022.102345}, abstractNote={Directed Gaussian graphical models are statistical models that use a directed acyclic graph (DAG) to represent the conditional independence structures between a set of jointly normal random variables. The DAG specifies the model through recursive factorization of the parametrization, via restricted conditional distributions. In this paper, we make an attempt to characterize the DAGs whose vanishing ideals are toric ideals. In particular, we give some combinatorial criteria to construct such DAGs from smaller DAGs which have toric vanishing ideals. An associated monomial map called the shortest trek map plays an important role in our description of toric Gaussian DAG models. For DAGs whose vanishing ideal is toric, we prove results about the generating sets of those toric ideals.}, journal={ADVANCES IN APPLIED MATHEMATICS}, author={Misra, Pratik and Sullivant, Seth}, year={2022}, month={Jul} } @article{hollering_sullivant_2022, title={Exchangeable and sampling-consistent distributions on rooted binary trees}, ISSN={["1475-6072"]}, DOI={10.1017/jpr.2021.28}, abstractNote={Abstract We introduce a notion of finite sampling consistency for phylogenetic trees and show that the set of finitely sampling-consistent and exchangeable distributions on n-leaf phylogenetic trees is a polytope. We use this polytope to show that the set of all exchangeable and sampling-consistent distributions on four-leaf phylogenetic trees is exactly Aldous’ beta-splitting model, and give a description of some of the vertices for the polytope of distributions on five leaves. We also introduce a new semialgebraic set of exchangeable and sampling consistent models we call the multinomial model and use it to characterize the set of exchangeable and sampling-consistent distributions. Using this new model, we obtain a finite de Finetti-type theorem for rooted binary trees in the style of Diaconis’ theorem on finite exchangeable sequences.}, journal={JOURNAL OF APPLIED PROBABILITY}, author={Hollering, Benjamin and Sullivant, Seth}, year={2022}, month={Jan} } @article{bortner_sullivant_2022, title={Structural identifiability of series-parallel LCR systems}, volume={112}, ISSN={["1095-855X"]}, DOI={10.1016/j.jsc.2022.01.002}, abstractNote={We consider the identifiability problem for the parameters of series-parallel LCR circuit networks. We prove that for networks with only two classes of components (inductor-capacitor (LC), inductor-resistor (LR), and capacitor-resistor (RC)), the parameters are identifiable if and only if the number of non-monic coefficients of the constitutive equations equals the number of parameters. The notion of the “type” of the constitutive equations plays a key role in the identifiability of LC, LR, and RC networks. We also investigate the general series-parallel LCR circuits (with all three classes of components), and classify the types of constitutive equations that can arise, showing that there are 22 different types. However, we produce an example that shows that the basic notion of type that works to classify identifiability of two class networks is not sufficient to classify the identifiability of general series-parallel LCR circuits.}, journal={JOURNAL OF SYMBOLIC COMPUTATION}, author={Bortner, Cashous and Sullivant, Seth}, year={2022}, pages={79–104} } @article{hollering_sullivant_2021, title={Identifiability in phylogenetics using algebraic matroids}, volume={104}, ISSN={["1095-855X"]}, DOI={10.1016/j.jsc.2020.04.012}, abstractNote={Identifiability is a crucial property for a statistical model since distributions in the model uniquely determine the parameters that produce them. In phylogenetics, the identifiability of the tree parameter is of particular interest since it means that phylogenetic models can be used to infer evolutionary histories from data. In this paper we introduce a new computational strategy for proving the identifiability of discrete parameters in algebraic statistical models that uses algebraic matroids naturally associated to the models. We then use this algorithm to prove that the tree parameters are generically identifiable for 2-tree CFN and K3P mixtures. We also show that the k-cycle phylogenetic network parameter is identifiable under the K2P and K3P models.}, journal={JOURNAL OF SYMBOLIC COMPUTATION}, author={Hollering, Benjamin and Sullivant, Seth}, year={2021}, pages={142–158} } @article{coons_sullivant_2021, title={Quasi-independence models with rational maximum likelihood estimator}, volume={104}, ISSN={["1095-855X"]}, DOI={10.1016/j.jsc.2020.10.006}, abstractNote={We classify the two-way quasi-independence models (independence models with structural zeros) that have rational maximum likelihood estimators, or MLEs. We give a necessary and sufficient condition on the bipartite graph associated to the model for the MLE to be rational. In this case, we give an explicit formula for the MLE in terms of combinatorial features of this graph. We also use the Horn uniformization to show that for general log-linear models M with rational MLE, any model obtained by restricting to a face of the cone of sufficient statistics of M also has rational MLE.}, journal={JOURNAL OF SYMBOLIC COMPUTATION}, author={Coons, Jane Ivy and Sullivant, Seth}, year={2021}, pages={917–941} } @article{misra_sullivant_2019, title={BOUNDS ON THE EXPECTED SIZE OF THE MAXIMUM AGREEMENT SUBTREE FOR A GIVEN TREE SHAPE}, volume={33}, ISSN={["1095-7146"]}, DOI={10.1137/18M1213695}, abstractNote={We show that the expected size of the maximum agreement subtree of two $n$-leaf trees, uniformly random among all trees with the shape, is $\Theta(\sqrt{n})$. To derive the lower bound, we prove a global structural result on a decomposition of rooted binary trees into subgroups of leaves called blobs. To obtain the upper bound, we generalize a first moment argument for random tree distributions that are exchangeable and not necessarily sampling consistent.}, number={4}, journal={SIAM JOURNAL ON DISCRETE MATHEMATICS}, author={Misra, Pratik and Sullivant, Seth}, year={2019}, pages={2316–2325} } @article{sullivant_2019, title={Strongly Robust Toric Ideals in Codimension 2}, volume={10}, ISSN={1309-3452}, url={http://dx.doi.org/10.18409/jas.v10i1.62}, DOI={10.18409/jas.v10i1.62}, abstractNote={A homogeneous ideal is robust if its universal Gröbner basis is also a minimal generating set.  For toric ideals, one has the stronger definition: A toric ideal is strongly robust if its Graver basis equals the set of indispensable binomials.  We characterize the codimension 2  strongly robust toric ideals by their Gale diagrams.  This give a positive answer to a question of Petrovic, Thoma, and Vladoiu in the case of codimension 2 toric ideals.}, number={1}, journal={Journal of Algebraic Statistics}, publisher={Paul V. Galvin Library/Illinois Institute of Technology}, author={Sullivant, Seth}, year={2019}, month={Apr}, pages={128–136} } @misc{sullivant_2018, title={Algebraic Statistics}, ISBN={9781470435172 9781470449803}, ISSN={1065-7339}, url={http://dx.doi.org/10.1090/gsm/194}, DOI={10.1090/gsm/194}, journal={Graduate Studies in Mathematics}, publisher={American Mathematical Society}, author={Sullivant, Seth}, year={2018}, month={Nov} } @article{durden_sullivant_2018, title={Identifiability of Phylogenetic Parameters from k-mer Data Under the Coalescent}, volume={81}, ISSN={0092-8240 1522-9602}, url={http://dx.doi.org/10.1007/s11538-018-0399-1}, DOI={10.1007/s11538-018-0399-1}, abstractNote={Distances between sequences based on their k-mer frequency counts can be used to reconstruct phylogenies without first computing a sequence alignment. Past work has shown that effective use of k-mer methods depends on (1) model-based corrections to distances based on k-mers and (2) breaking long sequences into blocks to obtain repeated trials from the sequence-generating process. Good performance of such methods is based on having many high-quality blocks with many homologous sites, which can be problematic to guarantee a priori. Nature provides natural blocks of sequences into homologous regions—namely, the genes. However, directly using past work in this setting is problematic because of possible discordance between different gene trees and the underlying species tree. Using the multispecies coalescent model as a basis, we derive model-based moment formulas that involve the species divergence times and the coalescent parameters. From this setting, we prove identifiability results for the tree and branch length parameters under the Jukes–Cantor model of sequence mutations.}, number={2}, journal={Bulletin of Mathematical Biology}, publisher={Springer Nature}, author={Durden, Chris and Sullivant, Seth}, year={2018}, month={Feb}, pages={431–451} } @article{gross_sullivant_2018, title={The maximum likelihood threshold of a graph}, volume={24}, ISSN={["1573-9759"]}, DOI={10.3150/16-bej881}, abstractNote={The maximum likelihood threshold of a graph is the smallest number of data points that guarantees that maximum likelihood estimates exist almost surely in the Gaussian graphical model associated to the graph. We show that this graph parameter is connected to the theory of combinatorial rigidity. In particular, if the edge set of a graph $G$ is an independent set in the $n-1$-dimensional generic rigidity matroid, then the maximum likelihood threshold of $G$ is less than or equal to $n$. This connection allows us to prove many results about the maximum likelihood threshold.}, number={1}, journal={BERNOULLI}, author={Gross, Elizabeth and Sullivant, Seth}, year={2018}, month={Feb}, pages={386–407} } @article{bernstein_sullivant_2017, title={Normal Binary Hierarchical Models}, volume={26}, ISSN={["1944-950X"]}, DOI={10.1080/10586458.2016.1142911}, abstractNote={ABSTRACT Each simplicial complex and integer vector yields a vector configuration whose combinatorial properties are important for the analysis of contingency tables. We study the normality of these vector configurations including a description of operations on simplicial complexes that preserve normality, constructions of families of minimally nonnormal complexes, and computations classifying all of the normal complexes on up to six vertices. We repeat this analysis for compressed vector configurations, classifying all of the compressed complexes on up to six vertices.}, number={2}, journal={EXPERIMENTAL MATHEMATICS}, author={Bernstein, Daniel Irving and Sullivant, Seth}, year={2017}, pages={153–164} } @article{allman_rhodes_sullivant_2017, title={Statistically Consistent k-mer Methods for Phylogenetic Tree Reconstruction}, volume={24}, ISSN={["1557-8666"]}, DOI={10.1089/cmb.2015.0216}, abstractNote={Frequencies of k-mers in sequences are sometimes used as a basis for inferring phylogenetic trees without first obtaining a multiple sequence alignment. We show that a standard approach of using the squared Euclidean distance between k-mer vectors to approximate a tree metric can be statistically inconsistent. To remedy this, we derive model-based distance corrections for orthologous sequences without gaps, which lead to consistent tree inference. The identifiability of model parameters from k-mer frequencies is also studied. Finally, we report simulations showing that the corrected distance outperforms many other k-mer methods, even when sequences are generated with an insertion and deletion process. These results have implications for multiple sequence alignment as well since k-mer methods are usually the first step in constructing a guide tree for such algorithms.}, number={2}, journal={JOURNAL OF COMPUTATIONAL BIOLOGY}, author={Allman, Elizabeth S. and Rhodes, John A. and Sullivant, Seth}, year={2017}, month={Feb}, pages={153–171} } @article{bernstein_sullivant_2017, title={Unimodular binary hierarchical models}, volume={123}, ISSN={["1096-0902"]}, DOI={10.1016/j.jctb.2016.11.003}, abstractNote={Associated to each simplicial complex is a binary hierarchical model. We classify the simplicial complexes that yield unimodular binary hierarchical models. Our main theorem provides both a construction of all unimodular binary hierarchical models, together with a characterization in terms of excluded minors, where our definition of a minor allows the taking of links and induced complexes. A key tool in the proof is the lemma that the class of unimodular binary hierarchical models is closed under the Alexander duality operation on simplicial complexes.}, journal={JOURNAL OF COMBINATORIAL THEORY SERIES B}, author={Bernstein, Daniel Irving and Sullivant, Seth}, year={2017}, month={Mar}, pages={97–125} } @article{rauh_sullivant_2016, title={Lifting Markov bases and higher codimension toric fiber products}, volume={74}, ISSN={0747-7171}, url={http://dx.doi.org/10.1016/j.jsc.2015.07.003}, DOI={10.1016/j.jsc.2015.07.003}, abstractNote={We study how to lift Markov bases and Gröbner bases along linear maps of lattices. We give a lifting algorithm that allows to compute such bases iteratively provided a certain associated semigroup is normal. Our main application is the toric fiber product of toric ideals, where lifting gives Markov bases of the factor ideals that satisfy the compatible projection property. We illustrate the technique by computing Markov bases of various infinite families of hierarchical models. The methodology also implies new finiteness results for iterated toric fiber products.}, journal={Journal of Symbolic Computation}, publisher={Elsevier BV}, author={Rauh, Johannes and Sullivant, Seth}, year={2016}, month={May}, pages={276–307} } @article{fink_rajchgot_sullivant_2016, title={Matrix Schubert varieties and Gaussian conditional independence models}, volume={44}, ISSN={["1572-9192"]}, DOI={10.1007/s10801-016-0698-2}, abstractNote={Matrix Schubert varieties are certain varieties in the affine space of square matrices which are determined by specifying rank conditions on submatrices. We study these varieties for generic matrices, symmetric matrices, and upper triangular matrices in view of two applications to algebraic statistics: We observe that special conditional independence models for Gaussian random variables are intersections of matrix Schubert varieties in the symmetric case. Consequently, we obtain a combinatorial primary decomposition algorithm for some conditional independence ideals. We also characterize the vanishing ideals of Gaussian graphical models for generalized Markov chains. In the course of this investigation, we are led to consider three related stratifications, which come from the Schubert stratification of a flag variety. We provide some combinatorial results, including describing the stratifications using the language of rank arrays and enumerating the strata in each case.}, number={4}, journal={JOURNAL OF ALGEBRAIC COMBINATORICS}, author={Fink, Alex and Rajchgot, Jenna and Sullivant, Seth}, year={2016}, month={Dec}, pages={1009–1046} } @article{bernstein_ho_long_steel_st john_sullivant_2015, title={BOUNDS ON THE EXPECTED SIZE OF THE MAXIMUM AGREEMENT SUBTREE}, volume={29}, ISSN={["1095-7146"]}, DOI={10.1137/140997750}, abstractNote={We prove polynomial upper and lower bounds on the expected size of the maximum agreement subtree of two random binary phylogenetic trees under both the uniform distribution and Yule-Harding distribution. This positively answers a question posed in earlier work. Determining tight upper and lower bounds remains an open problem.}, number={4}, journal={SIAM JOURNAL ON DISCRETE MATHEMATICS}, author={Bernstein, Daniel Irving and Ho, Lam Si Tung and Long, Colby and Steel, Mike and St John, Katherine and Sullivant, Seth}, year={2015}, pages={2065–2074} } @article{meshkat_sullivant_eisenberg_2015, title={Identifiability Results for Several Classes of Linear Compartment Models}, volume={77}, ISSN={["1522-9602"]}, DOI={10.1007/s11538-015-0098-0}, abstractNote={Identifiability concerns finding which unknown parameters of a model can be estimated, uniquely or otherwise, from given input–output data. If some subset of the parameters of a model cannot be determined given input–output data, then we say the model is unidentifiable. In this work, we study linear compartment models, which are a class of biological models commonly used in pharmacokinetics, physiology, and ecology. In past work, we used commutative algebra and graph theory to identify a class of linear compartment models that we call identifiable cycle models, which are unidentifiable but have the simplest possible identifiable functions (so-called monomial cycles). Here we show how to modify identifiable cycle models by adding inputs, adding outputs, or removing leaks, in such a way that we obtain an identifiable model. We also prove a constructive result on how to combine identifiable models, each corresponding to strongly connected graphs, into a larger identifiable model. We apply these theoretical results to several real-world biological models from physiology, cell biology, and ecology.}, number={8}, journal={BULLETIN OF MATHEMATICAL BIOLOGY}, author={Meshkat, Nicolette and Sullivant, Seth and Eisenberg, Marisa}, year={2015}, month={Aug}, pages={1620–1651} } @article{long_sullivant_2015, title={Identifiability of 3-class Jukes-Cantor mixtures}, volume={64}, ISSN={["1090-2074"]}, DOI={10.1016/j.aam.2014.12.003}, abstractNote={We prove identifiability of the tree parameters of the 3-class Jukes–Cantor mixture model. The proof uses ideas from algebraic statistics, in particular: finding phylogenetic invariants that separate the varieties associated to different triples of trees; computing dimensions of the resulting phylogenetic varieties; and using the disentangling number to reduce to trees with a small number of leaves. Symbolic computation also plays a key role in handling the many different cases and finding relevant phylogenetic invariants.}, journal={ADVANCES IN APPLIED MATHEMATICS}, author={Long, Colby and Sullivant, Seth}, year={2015}, month={Mar}, pages={89–110} } @article{long_sullivant_2015, title={Tying up loose strands: Defining equations of the strand symmetric model}, volume={6}, DOI={10.18409/jas.v6i1.34}, abstractNote={The strand symmetric model is a phylogenetic model designed to reflect the symmetry inherent in the double-stranded structure of DNA. We show that the set of known phylogenetic invariants for the general strand symmetric model of the three leaf claw tree entirely defines the ideal. This knowledge allows one to determine the vanishing ideal of the general strand symmetric model of any trivalent tree. Our proof of the main result is computational. We use the fact that the Zariski closure of the strand symmetric model is the secant variety of a toric variety to compute the dimension of the variety. We then show that the known equations generate a prime ideal of the correct dimension using elimination theory.}, number={1}, journal={Journal of Algebraic Statistics}, author={Long, C. and Sullivant, S.}, year={2015}, pages={17–23} } @article{davidson_sullivant_2014, title={Distance-Based Phylogenetic Methods Around a Polytomy}, volume={11}, ISSN={["1557-9964"]}, DOI={10.1109/tcbb.2014.2309592}, abstractNote={Distance-based phylogenetic algorithms attempt to solve the NP-hard least-squares phylogeny problem by mapping an arbitrary dissimilarity map representing biological data to a tree metric. The set of all dissimilarity maps is a Euclidean space properly containing the space of all tree metrics as a polyhedral fan. Outputs of distance-based tree reconstruction algorithms such as UPGMA and neighbor-joining are points in the maximal cones in the fan. Tree metrics with polytomies lie at the intersections of maximal cones. A phylogenetic algorithm divides the space of all dissimilarity maps into regions based upon which combinatorial tree is reconstructed by the algorithm. Comparison of phylogenetic methods can be done by comparing the geometry of these regions. We use polyhedral geometry to compare the local nature of the subdivisions induced by least-squares phylogeny, UPGMA, and neighbor-joining when the true tree has a single polytomy with exactly four neighbors. Our results suggest that in some circumstances, UPGMA and neighbor-joining poorly match least-squares phylogeny.}, number={2}, journal={IEEE-ACM TRANSACTIONS ON COMPUTATIONAL BIOLOGY AND BIOINFORMATICS}, author={Davidson, Ruth and Sullivant, Seth}, year={2014}, pages={325–335} } @article{meshkat_sullivant_2014, title={Identifiable reparametrizations of linear compartment models}, volume={63}, ISSN={["0747-7171"]}, DOI={10.1016/j.jsc.2013.11.002}, abstractNote={Structural identifiability concerns finding which unknown parameters of a model can be quantified from given input–output data. Many linear ODE models, used in systems biology and pharmacokinetics, are unidentifiable, which means that parameters can take on an infinite number of values and yet yield the same input–output data. We use commutative algebra and graph theory to study a particular class of unidentifiable models and find conditions to obtain identifiable scaling reparametrizations of these models. Our main result is that the existence of an identifiable scaling reparametrization is equivalent to the existence of a scaling reparametrization by monomial functions. We provide an algorithm for finding these reparametrizations when they exist and partial results beginning to classify graphs which possess an identifiable scaling reparametrization.}, journal={JOURNAL OF SYMBOLIC COMPUTATION}, author={Meshkat, Nicolette and Sullivant, Seth}, year={2014}, month={May}, pages={46–67} } @article{engstrom_kahle_sullivant_2014, title={Multigraded commutative algebra of graph decompositions}, volume={39}, ISSN={["1572-9192"]}, DOI={10.1007/s10801-013-0450-0}, abstractNote={The toric fiber product is a general procedure for gluing two ideals, homogeneous with respect to the same multigrading, to produce a new homogeneous ideal. Toric fiber products generalize familiar constructions in commutative algebra like adding monomial ideals and the Segre product. We describe how to obtain generating sets of toric fiber products in non-zero codimension and discuss persistence of normality and primary decompositions under toric fiber products. Several applications are discussed, including (a) the construction of Markov bases of hierarchical models in many new cases, (b) a new proof of the quartic generation of binary graph models associated to K 4-minor free graphs, and (c) the recursive computation of primary decompositions of conditional independence ideals.}, number={2}, journal={JOURNAL OF ALGEBRAIC COMBINATORICS}, author={Engstrom, Alexander and Kahle, Thomas and Sullivant, Seth}, year={2014}, month={Mar}, pages={335–372} } @article{kahle_rauh_sullivant_2014, title={POSITIVE MARGINS AND PRIMARY DECOMPOSITION}, volume={6}, ISSN={["1939-2346"]}, DOI={10.1216/jca-2014-6-2-173}, abstractNote={We study random walks on contingency tables with fixed marginals, cor- responding to a (log-linear) hierarchical model. If the set of allowed moves is not a Markov basis, then there exist tables with the same marginals that are not connected. We study linear conditions on the values of the marginals that ensure that all tables in a given fiber are connected. We show that many graphical models have the posi- tive margins property, which says that all fibers with strictly positive marginals are connected by the quadratic moves that correspond to conditional independence state- ments. The property persists under natural operations such as gluing along cliques, but we also construct examples of graphical models not enjoying this property. Our analysis of the positive margins property depends on computing the primary decomposition of the associated conditional independence ideal. The main technical results of the paper are primary decompositions of the conditional independence ideals of graphical models of the N-cycle and the complete bipartite graph K2,N 2, with various restrictions on the size of the nodes.}, number={2}, journal={JOURNAL OF COMMUTATIVE ALGEBRA}, author={Kahle, Thomas and Rauh, Johannes and Sullivant, Seth}, year={2014}, pages={173–208} } @article{mahdi_meshkat_sullivant_2014, title={Structural Identifiability of Viscoelastic Mechanical Systems}, volume={9}, ISSN={["1932-6203"]}, DOI={10.1371/journal.pone.0086411}, abstractNote={We solve the local and global structural identifiability problems for viscoelastic mechanical models represented by networks of springs and dashpots. We propose a very simple characterization of both local and global structural identifiability based on identifiability tables, with the purpose of providing a guideline for constructing arbitrarily complex, identifiable spring-dashpot networks. We illustrate how to use our results in a number of examples and point to some applications in cardiovascular modeling.}, number={2}, journal={PLOS ONE}, author={Mahdi, Adam and Meshkat, Nicolette and Sullivant, Seth}, year={2014}, month={Feb} } @article{garcía-puente_petrović_sullivant_2013, title={Graphical models}, volume={5}, ISSN={1948-7916 1948-7916}, url={http://dx.doi.org/10.2140/jsag.2013.5.1}, DOI={10.2140/jsag.2013.5.1}, abstractNote={The Macaulay2 package GraphicalModels contains algorithms for the algebraic study of graphical models associated to undirected, directed and mixed graphs, and associated collections of conditional independence statements. Among the algorithms implemented are procedures for computing the vanishing ideal of graphical models, for generating conditional independence ideals of families of independence statements associated to graphs, and for checking for identifiable parameters in Gaussian mixed graph models. These procedures can be used to study fundamental problems about graphical models. GRAPHICAL MODELS. A graphical model is a statistical model associated to a graph, where the nodes of the graph represent random variables and the edges of the graph encode relationships between the random variables. Graphical models are an important class of statistical models used in many applications (see the standard textbooks [L, W]) because of their ability to model complex interactions between several random variables, by specifying interactions using only local information about connectivity between the vertices in a graph. There are two natural ways to specify a graphical model, through either conditional independence statements specified by the graph or via a parametric representation (often called a “factorization”). Every distribution that factors according to the graph satisfies the conditional independence statements implied by the graph. This leads to the question: Which distributions satisfy the conditional independence statements implied by the graph, but do not factor? Once we specify the types of random variables under consideration (e.g. discrete random variables or Gaussian random variables) it is possible to address the questions in the preceding paragraph using (computational) algebraic geometry. Indeed, in these cases, the set of all probability distributions satisfying a family of conditional independence constraints is a semialgebraic set. For discrete random variables, that semialgebraic set is a subset of the probability simplex, and can be represented by a certain homogeneous ideal generated by quadrics. For Gaussian random variables, this set of distributions corresponds to a semialgebraic subset of the cone of positive definite matrices. Similarly, the parametrized family of probability distributions also is a semialgebraic set (of the probability simplex for discrete random variables, and of the cone of positive definite matrices for Gaussian random variables). This algebraic perspective has been studied by different authors [GSS, GMS, S], and the book [DSS] provides details. The Macaulay2 package GraphicalModels allows the user to compute the ideals of conditional independence statements for any collection of statements for discrete or Gaussian random variables. It can also compute the vanishing ideal of a graphical model in these cases. A number of auxiliary functions are useful for doing further analyses of graphical models. 2010 Mathematics Subject Classification. 13P25, 14Q99, 62H99, 68W30. GraphicalModels version 1.0. 1 García-Puente, Petrović, and Sullivant :::: Graphical Models 2 For example, consider the directed acyclic graph G with five vertices {a,b,c,d,e} and edge set {a→ d,b→ d,c→ d,c→ e,d→ e}. The following commands compute the associated conditional independence ideal for the set of global Markov statements, CIglobal(G), and the vanishing ideal IG of the Gaussian graphical model on G. Macaulay2, version 1.6 with packages: ConwayPolynomials, Elimination, IntegralClosure, LLLBases, PrimaryDecomposition, ReesAlgebra, TangentCone i1 : needsPackage "GraphicalModels"; i2 : G = digraph{{a,d},{b,d},{c,{d,e}},{d,e}}; i3 : R = gaussianRing G; i4 : gens R o4 = {s , s , s , s , s , s , s , s , s , s , s , s , a,a a,b a,c a,d a,e b,b b,c b,d b,e c,c c,d c,e --------------------------------------------------------------------------s , s , s } d,d d,e e,e o4 : List i5 : I = conditionalIndependenceIdeal(R,globalMarkov(G)); o5 : Ideal of R i6 : J = gaussianVanishingIdeal(R); o6 : Ideal of R i7 : flatten degrees J o7 = {1, 1, 1, 2, 3, 3} GraphicalModels uses the package Graphs and a number of fundamental constructs and relationships associated with graphs. First we create a polynomial ring that contains the entries of the covariance matrix Σ of a jointly normal random vector as its indeterminates. Information about the underlying graph is stored in the polynomial ring. Hence some methods take just a ring as input, but require that it be created with gaussianRing or markovRing in the discrete case. For directed acyclic graphs, it is known that V (CIglobal(G))∩PDm =V (IG)∩PDm where PDm⊂R m+1 2 ) is the cone of m×m positive definite symmetric matrices. In particular, the set of such matrices satisfying the conditional independence constraints equals the set of covariance matrices in the image of the parametrization. Unfortunately, this does not imply that CIglobal(G) = IG. In the case of Gaussian random variables, a larger ideal, the trek ideal TG, generated by all subdeterminants of the covariance matrix that vanish on the model, and satisfying CIglobal(G) ⊆ TG ⊆ IG is sometimes equal to IG (see [STD]), as the following example shows. i8 : isSubset(I,J), I == J, J == trekIdeal(R,G) o8 = (true, false, true) o8 : Sequence Similar computations can also be performed for graphical models with discrete random variables, and with other graph families. The mathematical explanation of these graphical models and their associated ideals appear in the remaining sections. García-Puente, Petrović, and Sullivant :::: Graphical Models 3 COMPUTING CONDITIONAL INDEPENDENCE IDEALS. Conditional independence constraints on discrete or Gaussian random variables translate to rank conditions on certain matrices associated to the probability densities. We briefly explain these constructions here and how to generate these constraints in Macaulay2 using GraphicalModels; see [DSS, Ch. 3] for more detail. Let X = (X1, . . . ,Xn) be a discrete random vector where each random variable Xi has state space [di] = {1,2, . . . ,di}. Let d =(d1, . . . ,dn). A probability distribution for X is a tensor in Rd1⊗·· ·⊗Rdn , all of whose coordinates are nonnegative and sum to one. The set of all such distributions is the probability simplex ∆d . Let pi1···in = P(X1 = i1, . . . ,Xn = in) denote the probability of a primitive event. The polynomial ring in these quantities is created using the command markovRing. i9 : d = (2,3,2); R = markovRing d;}, number={1}, journal={Journal of Software for Algebra and Geometry}, publisher={Mathematical Sciences Publishers}, author={García-Puente, Luis and Petrović, Sonja and Sullivant, Seth}, year={2013}, pages={1–7} } @article{davidson_sullivant_2013, title={Polyhedral combinatorics of UPGMA cones}, volume={50}, ISSN={["1090-2074"]}, DOI={10.1016/j.aam.2012.10.002}, abstractNote={Distance-based methods such as UPGMA (Unweighted Pair Group Method with Arithmetic Mean) continue to play a significant role in phylogenetic research. We use polyhedral combinatorics to analyze the natural subdivision of the positive orthant induced by classifying the input vectors according to tree topologies returned by the algorithm. The partition lattice informs the study of UPGMA trees. We give a closed form for the extreme rays of UPGMA cones on n taxa, and compute the spherical volumes of the UPGMA cones for small n .}, number={2}, journal={ADVANCES IN APPLIED MATHEMATICS}, author={Davidson, Ruth and Sullivant, Seth}, year={2013}, month={Feb}, pages={327–338} } @article{draisma_sullivant_talaska_2013, title={Positivity for Gaussian graphical models}, volume={50}, ISSN={["0196-8858"]}, DOI={10.1016/j.aam.2013.03.001}, abstractNote={Gaussian graphical models are parametric statistical models for jointly normal random variables whose dependence structure is determined by a graph. In previous work, we introduced trek separation, which gives a necessary and sufficient condition in terms of the graph for when a subdeterminant is zero for all covariance matrices that belong to the Gaussian graphical model. Here we extend this result to give explicit cancellation-free formulas for the expansions of non-zero subdeterminants.}, number={5}, journal={ADVANCES IN APPLIED MATHEMATICS}, author={Draisma, Jan and Sullivant, Seth and Talaska, Kelli}, year={2013}, month={May}, pages={661–674} } @article{hillar_sullivant_2012, title={Finite Gröbner bases in infinite dimensional polynomial rings and applications}, volume={229}, ISSN={0001-8708}, url={http://dx.doi.org/10.1016/j.aim.2011.08.009}, DOI={10.1016/j.aim.2011.08.009}, abstractNote={We introduce the theory of monoidal Gröbner bases, a concept which generalizes the familiar notion in a polynomial ring and allows for a description of Gröbner bases of ideals that are stable under the action of a monoid. The main motivation for developing this theory is to prove finiteness results in commutative algebra and applications. A basic theorem of this type is that ideals in infinitely many indeterminates stable under the action of the symmetric group are finitely generated up to symmetry. Using this machinery, we give new streamlined proofs of some classical finiteness theorems in algebraic statistics as well as a proof of the independent set conjecture of Hoşten and the second author.}, number={1}, journal={Advances in Mathematics}, publisher={Elsevier BV}, author={Hillar, Christopher J. and Sullivant, Seth}, year={2012}, month={Jan}, pages={1–25} } @article{ardila_owen_sullivant_2012, title={Geodesics in CAT(0) cubical complexes}, volume={48}, ISSN={["1090-2074"]}, DOI={10.1016/j.aam.2011.06.004}, abstractNote={We describe an algorithm to compute the geodesics in an arbitrary CAT(0) cubical complex. A key tool is a correspondence between cubical complexes of global non-positive curvature and posets with inconsistent pairs. This correspondence also gives an explicit realization of such a complex as the state complex of a reconfigurable system, and a way to embed any interval in the integer lattice cubing of its dimension.}, number={1}, journal={ADVANCES IN APPLIED MATHEMATICS}, author={Ardila, Federico and Owen, Megan and Sullivant, Seth}, year={2012}, month={Jan}, pages={142–163} } @article{rhodes_sullivant_2012, title={Identifiability of Large Phylogenetic Mixture Models}, volume={74}, ISSN={["1522-9602"]}, DOI={10.1007/s11538-011-9672-2}, abstractNote={Phylogenetic mixture models are statistical models of character evolution allowing for heterogeneity. Each of the classes in some unknown partition of the characters may evolve by different processes, or even along different trees. Such models are of increasing interest for data analysis, as they can capture the variety of evolutionary processes that may be occurring across long sequences of DNA or proteins. The fundamental question of whether parameters of such a model are identifiable is difficult to address, due to the complexity of the parameterization. Identifiability is, however, essential to their use for statistical inference.We analyze mixture models on large trees, with many mixture components, showing that both numerical and tree parameters are indeed identifiable in these models when all trees are the same. This provides a theoretical justification for some current empirical studies, and indicates that extensions to even more mixture components should be theoretically well behaved. We also extend our results to certain mixtures on different trees, using the same algebraic techniques.}, number={1}, journal={BULLETIN OF MATHEMATICAL BIOLOGY}, author={Rhodes, John A. and Sullivant, Seth}, year={2012}, month={Jan}, pages={212–231} } @article{sullivant_2012, title={THE DISENTANGLING NUMBER FOR PHYLOGENETIC MIXTURES}, volume={26}, ISSN={["0895-4801"]}, DOI={10.1137/110843459}, abstractNote={We provide a logarithmic upper bound for the disentangling number on unordered lists of leaf labeled trees. This results is useful for analyzing phylogenetic mixture models. The proof depends on interpreting multisets of trees as high dimensional contingency tables.}, number={2}, journal={SIAM JOURNAL ON DISCRETE MATHEMATICS}, author={Sullivant, Seth}, year={2012}, pages={856–859} } @article{allman_rhodes_sullivant_2012, title={When Do Phylogenetic Mixture Models Mimic Other Phylogenetic Models?}, volume={61}, ISSN={["1076-836X"]}, DOI={10.1093/sysbio/sys064}, abstractNote={Phylogenetic mixture models, in which the sites in sequences undergo different substitution processes along the same or different trees, allow the description of heterogeneous evolutionary processes. As data sets consisting of longer sequences become available, it is important to understand such models, for both theoretical insights and use in statistical analyses. Some recent articles have highlighted disturbing "mimicking" behavior in which a distribution from a mixture model is identical to one arising on a different tree or trees. Other works have indicated such problems are unlikely to occur in practice, as they require very special parameter choices. After surveying some of these works on mixture models, we give several new results. In general, if the number of components in a generating mixture is not too large and we disallow zero or infinite branch lengths, then it cannot mimic the behavior of a nonmixture on a different tree. On the other hand, if the mixture model is locally overparameterized, it is possible for a phylogenetic mixture model to mimic distributions of another tree model. Although theoretical questions remain, these sorts of results can serve as a guide to when the use of mixture models in either maximum likelihood or Bayesian frameworks is likely to lead to statistically consistent inference, and when mimicking due to heterogeneity should be considered a realistic possibility. [Phylogenetic mixture models; parameter identifiability; heterogeneous sequence evolution.].}, number={6}, journal={SYSTEMATIC BIOLOGY}, author={Allman, Elizabeth S. and Rhodes, John A. and Sullivant, Seth}, year={2012}, month={Dec}, pages={1049–1059} } @article{bidkhori_sullivant_2011, title={Eulerian-Catalan numbers}, volume={ 18}, number={1}, journal={Electronic Journal of Combinatorics}, author={Bidkhori, H. and Sullivant, S.}, year={2011} } @article{drton_foygel_sullivant_2011, title={GLOBAL IDENTIFIABILITY OF LINEAR STRUCTURAL EQUATION MODELS}, volume={39}, ISSN={["0090-5364"]}, DOI={10.1214/10-aos859}, abstractNote={Structural equation models are multivariate statistical models that are defined by specifying noisy functional relationships among random variables. We consider the classical case of linear relationships and additive Gaussian noise terms. We give a necessary and sufficient condition for global identifiability of the model in terms of a mixed graph encoding the linear structural equations and the correlation structure of the error terms. Global identifiability is understood to mean injectivity of the parametrization of the model and is fundamental in particular for applicability of standard statistical methodology.}, number={2}, journal={ANNALS OF STATISTICS}, author={Drton, Mathias and Foygel, Rina and Sullivant, Seth}, year={2011}, month={Apr}, pages={865–886} } @article{allman_petrovic_rhodes_sullivant_2011, title={Identifiability of Two-Tree Mixtures for Group-Based Models}, volume={8}, ISSN={["1545-5963"]}, DOI={10.1109/tcbb.2010.79}, abstractNote={Phylogenetic data arising on two possibly different tree topologies might be mixed through several biological mechanisms, including incomplete lineage sorting or horizontal gene transfer in the case of different topologies, or simply different substitution processes on characters in the case of the same topology. Recent work on a 2-state symmetric model of character change showed that for 4 taxa, such a mixture model has nonidentifiable parameters, and thus, it is theoretically impossible to determine the two tree topologies from any amount of data under such circumstances. Here, the question of identifiability is investigated for two-tree mixtures of the 4-state group-based models, which are more relevant to DNA sequence data. Using algebraic techniques, we show that the tree parameters are identifiable for the JC and K2P models. We also prove that generic substitution parameters for the JC mixture models are identifiable, and for the K2P and K3P models obtain generic identifiability results for mixtures on the same tree. This indicates that the full phylogenetic signal remains in such mixtures, and the 2-state symmetric result is thus a misleading guide to the behavior of other models.}, number={3}, journal={IEEE-ACM TRANSACTIONS ON COMPUTATIONAL BIOLOGY AND BIOINFORMATICS}, author={Allman, Elizabeth S. and Petrovic, Sonja and Rhodes, John A. and Sullivant, Seth}, year={2011}, pages={710–722} } @article{sullivant_2010, title={Normal binary graph models}, volume={62}, ISSN={["0020-3157"]}, DOI={10.1007/s10463-010-0296-3}, abstractNote={We show that the marginal semigroup of a binary graph model is normal if and only if the graph is free of K 4 minors. The technique, based on the interplay of normality and the geometry of the marginal cone, has potential applications to other normality questions in algebraic statistics.}, number={4}, journal={ANNALS OF THE INSTITUTE OF STATISTICAL MATHEMATICS}, author={Sullivant, Seth}, year={2010}, month={Aug}, pages={717–726} } @article{sullivant_2010, title={Normal binary graph models.}, volume={64}, number={4}, journal={Annals of the Institute of Statistical Mathematics, Special Issue: Algebraic Methods in Computational Statistics}, author={Sullivant, Seth}, year={2010}, pages={717–726} } @article{sullivant_talaska_draisma_2010, title={TREK SEPARATION FOR GAUSSIAN GRAPHICAL MODELS}, volume={38}, ISSN={["0090-5364"]}, DOI={10.1214/09-aos760}, abstractNote={Gaussian graphical models are semi-algebraic subsets of the cone of positive definite covariance matrices. Submatrices with low rank correspond to generalizations of conditional independence constraints on collections of random variables. We give a precise graph-theoretic characterization of when submatrices of the covariance matrix have small rank for a general class of mixed graphs that includes directed acyclic and undirected graphs as special cases. Our new trek separation criterion generalizes the familiar d-separation criterion. Proofs are based on the trek rule, the resulting matrix factorizations and classical theorems of algebraic combinatorics on the expansions of determinants of path polynomials.}, number={3}, journal={ANNALS OF STATISTICS}, author={Sullivant, Seth and Talaska, Kelli and Draisma, Jan}, year={2010}, month={Jun}, pages={1665–1685} } @article{sullivant_2009, title={A Gröbner basis for the secant ideal of the second hypersimplex}, volume={1}, ISSN={1939-2346}, url={http://dx.doi.org/10.1216/jca-2009-1-2-327}, DOI={10.1216/jca-2009-1-2-327}, abstractNote={We determine a Groebner basis for the secant ideal of the toric ideal associated to the second hypersimplex, with respect to any circular term order. The Groebner basis of the secant ideal requires polynomials of odd degree up to n. This shows that the circular term order is 2-delightful, resolving a conjecture of Drton, Sturmfels, and the author. The proof uses Groebner degenerations for secant ideals, combinatorial characterizations of the secant ideals of monomial ideals, and the relations between secant ideals and prolongations.}, number={2}, journal={Journal of Commutative Algebra}, publisher={Rocky Mountain Mathematics Consortium}, author={Sullivant, S.}, year={2009}, month={Jun}, pages={327–338} } @book{putinar_sullivant_2009, title={Emerging Applications of Algebraic Geometry}, ISBN={9780387096858 9780387096865}, ISSN={0940-6573}, url={http://dx.doi.org/10.1007/978-0-387-09686-5}, DOI={10.1007/978-0-387-09686-5}, abstractNote={Recent advances in both the theory and implementation of computational algebraic geometry have led to new, striking applications to a variety of fields of research. The articles in this volume highlig}, journal={The IMA Volumes in Mathematics and its Applications}, publisher={Springer New York}, year={2009} } @article{sullivant_2009, title={Gaussian conditional independence relations have no finite complete characterization}, volume={213}, ISSN={["1873-1376"]}, DOI={10.1016/j.jpaa.2008.11.026}, abstractNote={We show that there can be no finite list of conditional independence relations which can be used to deduce all conditional independence implications among Gaussian random variables. To do this, we construct, for each n>3 a family of n conditional independence statements on n random variables which together imply that X1⫫X2, and such that no subset have this same implication. The proof relies on binomial primary decomposition.}, number={8}, journal={JOURNAL OF PURE AND APPLIED ALGEBRA}, author={Sullivant, Seth}, year={2009}, month={Aug}, pages={1502–1506} } @book{drton_sturmfels_sullivant_2009, title={Lectures on Algebraic Statistics}, ISBN={9783764389048 9783764389055}, url={http://dx.doi.org/10.1007/978-3-7643-8905-5}, DOI={10.1007/978-3-7643-8905-5}, abstractNote={How does an algebraic geometer studying secant varieties further the understanding of hypothesis tests in statistics? Why would a statistician working on factor analysis raise open problems about dete}, publisher={Birkhäuser Basel}, author={Drton, Mathias and Sturmfels, Bernd and Sullivant, Seth}, year={2009} } @article{beerenwinkel_sullivant_2009, title={Markov models for accumulating mutations}, volume={96}, ISSN={["1464-3510"]}, DOI={10.1093/biomet/asp023}, abstractNote={We introduce and analyze a waiting time model for the accumulation of genetic changes. The continuous-time conjunctive Bayesian network is defined by a partially ordered set of mutations and by the rate of fixation of each mutation. The partial order encodes constraints on the order in which mutations can fixate in the population, shedding light on the mutational pathways underlying the evolutionary process. We study a censored version of the model and derive equations for an em algorithm to perform maximum likelihood estimation of the model parameters. We also show how to select the maximum likelihood partially ordered set. The model is applied to genetic data from cancer cells and from drug resistant human immunodeficiency viruses, indicating implications for diagnosis and treatment. Copyright 2009, Oxford University Press.}, number={3}, journal={BIOMETRIKA}, author={Beerenwinkel, N. and Sullivant, S.}, year={2009}, month={Sep}, pages={645–661} } @article{sidman_sullivant_2009, title={Prolongations and Computational Algebra}, volume={61}, ISSN={["1496-4279"]}, DOI={10.4153/CJM-2009-047-5}, abstractNote={Abstract We explore the geometric notion of prolongations in the setting of computational algebra, extending results of Landsberg and Manivel which relate prolongations to equations for secant varieties. We also develop methods for computing prolongations that are combinatorial in nature. As an application, we use prolongations to derive a new family of secant equations for the binary symmetric model in phylogenetics.}, number={4}, journal={CANADIAN JOURNAL OF MATHEMATICS-JOURNAL CANADIEN DE MATHEMATIQUES}, author={Sidman, Jessica and Sullivant, Seth}, year={2009}, month={Aug}, pages={930–949} } @article{sullivant_2008, title={Algebraic geometry of Gaussian Bayesian networks}, volume={40}, ISSN={0196-8858}, url={http://dx.doi.org/10.1016/j.aam.2007.04.004}, DOI={10.1016/j.aam.2007.04.004}, abstractNote={Conditional independence models in the Gaussian case are algebraic varieties in the cone of positive definite covariance matrices. We study these varieties in the case of Bayesian networks, with a view towards generalizing the recursive factorization theorem to situations with hidden variables. In the case when the underlying graph is a tree, we show that the vanishing ideal of the model is generated by the conditional independence statements implied by graph. We also show that the ideal of any Bayesian network is homogeneous with respect to a multigrading induced by a collection of upstream random variables. This has a number of important consequences for hidden variable models. Finally, we relate the ideals of Bayesian networks to a number of classical constructions in algebraic geometry including toric degenerations of the Grassmannian, matrix Schubert varieties, and secant varieties.}, number={4}, journal={Advances in Applied Mathematics}, publisher={Elsevier BV}, author={Sullivant, Seth}, year={2008}, month={May}, pages={482–513} } @article{sullivant_2008, title={Combinatorial symbolic powers}, volume={319}, ISSN={0021-8693}, url={http://dx.doi.org/10.1016/j.jalgebra.2007.09.024}, DOI={10.1016/j.jalgebra.2007.09.024}, abstractNote={Symbolic powers are studied in the combinatorial context of monomial ideals. When the ideals are generated by quadratic squarefree monomials, the generators of the symbolic powers are obstructions to vertex covering in the associated graph and its blowups. As a result, perfect graphs play an important role in the theory, dual to the role played by perfect graphs in the theory of secants of monomial ideals. We use Gröbner degenerations as a tool to reduce questions about symbolic powers of arbitrary ideals to the monomial case. Among the applications are a new, unified approach to the Gröbner bases of symbolic powers of determinantal and Pfaffian ideals.}, number={1}, journal={Journal of Algebra}, publisher={Elsevier BV}, author={Sullivant, Seth}, year={2008}, month={Jan}, pages={115–142} } @article{sturmfels_sullivant_2008, title={Toric geometry of cuts and splits}, volume={57}, ISSN={0026-2285}, url={http://dx.doi.org/10.1307/mmj/1220879432}, DOI={10.1307/mmj/1220879432}, abstractNote={Associated to any graph is a toric ideal whose generators record relations among the cuts of the graph. We study these ideals and the geometry of the corresponding toric varieties. Our theorems and conjectures relate the combinatorial structure of the graph and the corresponding cut polytope to algebraic properties of the ideal. Cut ideals generalize toric ideals arising in phylogenetics and the study of contingency tables.}, number={0}, journal={The Michigan Mathematical Journal}, publisher={Michigan Mathematical Journal}, author={Sturmfels, Bernd and Sullivant, Seth}, year={2008}, month={Aug}, pages={689–709} } @article{hoşten_sullivant_2007, title={A finiteness theorem for Markov bases of hierarchical models}, volume={114}, ISSN={0097-3165}, url={http://dx.doi.org/10.1016/j.jcta.2006.06.001}, DOI={10.1016/j.jcta.2006.06.001}, abstractNote={We show that the complexity of the Markov bases of multidimensional tables stabilizes eventually if a single table dimension is allowed to vary. In particular, if this table dimension is greater than a computable bound, the Markov bases consist of elements from Markov bases of smaller tables. We give an explicit formula for this bound in terms of Graver bases. We also compute these Markov and Graver complexities for all K×2×2×2 tables.}, number={2}, journal={Journal of Combinatorial Theory, Series A}, publisher={Elsevier BV}, author={Hoşten, Serkan and Sullivant, Seth}, year={2007}, month={Feb}, pages={311–321} } @article{drton_sullivant_2007, title={Algebraic statistical models.}, volume={17}, number={4}, journal={Statistica Sinica}, author={Drton, Mathias and Sullivant, Seth}, year={2007}, pages={1273–1297} } @article{sullivant_2007, title={Toric fiber products}, volume={316}, ISSN={0021-8693}, url={http://dx.doi.org/10.1016/j.jalgebra.2006.10.004}, DOI={10.1016/j.jalgebra.2006.10.004}, abstractNote={We introduce and study the toric fiber product of two ideals in polynomial rings that are homogeneous with respect to the same multigrading. Under the assumption that the set of degrees of the variables form a linearly independent set, we can explicitly describe generating sets and Gröbner bases for these ideals. This allows us to unify and generalize some results in algebraic statistics.}, number={2}, journal={Journal of Algebra}, publisher={Elsevier BV}, author={Sullivant, Seth}, year={2007}, month={Oct}, pages={560–577} } @article{drton_sturmfels_sullivant_2006, title={Algebraic factor analysis: tetrads, pentads and beyond}, volume={138}, ISSN={0178-8051 1432-2064}, url={http://dx.doi.org/10.1007/s00440-006-0033-2}, DOI={10.1007/s00440-006-0033-2}, abstractNote={Factor analysis refers to a statistical model in which observed variables are conditionally independent given fewer hidden variables, known as factors, and all the random variables follow a multivariate normal distribution. The parameter space of a factor analysis model is a subset of the cone of positive definite matrices. This parameter space is studied from the perspective of computational algebraic geometry. Gr\"obner bases and resultants are applied to compute the ideal of all polynomial functions that vanish on the parameter space. These polynomials, known as model invariants, arise from rank conditions on a symmetric matrix under elimination of the diagonal entries of the matrix. Besides revealing the geometry of the factor analysis model, the model invariants also furnish useful statistics for testing goodness-of-fit.}, number={3-4}, journal={Probability Theory and Related Fields}, publisher={Springer Science and Business Media LLC}, author={Drton, Mathias and Sturmfels, Bernd and Sullivant, Seth}, year={2006}, month={Nov}, pages={463–493} } @article{sturmfels_sullivant_2006, title={Combinatorial secant varieties.}, volume={2}, journal={Quarterly Journal of Pure and Applied Mathematics}, author={Sturmfels, Bernd and Sullivant, Seth}, year={2006}, pages={285–309} } @article{sullivant_2006, title={Compressed polytopes and statistical disclosure limitation}, volume={58}, ISSN={0040-8735}, url={http://dx.doi.org/10.2748/tmj/1163775139}, DOI={10.2748/tmj/1163775139}, abstractNote={We provide a characterization of the compressed lattice polytopes in terms of their facet defining inequalities and we show that every compressed lattice polytope is affinely isomorphic to a 0/1-polytope. As an application, we characterize those graphs whose cut polytopes are compressed and discuss consequences for studying linear programming relaxations in statistical disclosure limitation.}, number={3}, journal={Tohoku Mathematical Journal}, publisher={Mathematical Institute, Tohoku University}, author={Sullivant, Seth}, year={2006}, month={Sep}, pages={433–445} } @article{eriksson_fienberg_rinaldo_sullivant_2006, title={Polyhedral conditions for the nonexistence of the MLE for hierarchical log-linear models}, volume={41}, ISSN={0747-7171}, url={http://dx.doi.org/10.1016/j.jsc.2005.04.003}, DOI={10.1016/j.jsc.2005.04.003}, abstractNote={We provide a polyhedral description of the conditions for the existence of the maximum likelihood estimate (MLE) for a hierarchical log-linear model. The MLE exists if and only if the observed margins lie in the relative interior of the marginal cone. Using this description, we give an algorithm for determining if the MLE exists. If the tree width is bounded, the algorithm runs in polynomial time. We also perform a computational study of the case of three random variables under the no three-factor effect model.}, number={2}, journal={Journal of Symbolic Computation}, publisher={Elsevier BV}, author={Eriksson, Nicholas and Fienberg, Stephen E. and Rinaldo, Alessandro and Sullivant, Seth}, year={2006}, month={Feb}, pages={222–233} } @article{chen_dinwoodie_sullivant_2006, title={Sequential importance sampling for multiway tables}, volume={34}, ISSN={0090-5364}, url={http://dx.doi.org/10.1214/009053605000000822}, DOI={10.1214/009053605000000822}, abstractNote={We describe an algorithm for the sequential sampling of entries in multiway contingency tables with given constraints. The algorithm can be used for computations in exact conditional inference. To justify the algorithm, a theory relates sampling values at each step to properties of the associated toric ideal using computational commutative algebra. In particular, the property of interval cell counts at each step is related to exponents on lead indeterminates of a lexicographic Grobner basis. Also, the approximation of integer programming by linear programming for sampling is related to initial terms of a toric ideal. We apply the algorithm to examples of contingency tables which appear in the social and medical sciences. The numerical results demonstrate that the theory is applicable and that the algorithm performs well.}, number={1}, journal={The Annals of Statistics}, publisher={Institute of Mathematical Statistics}, author={Chen, Yuguo and Dinwoodie, Ian H. and Sullivant, Seth}, year={2006}, month={Feb}, pages={523–545} } @article{slavkovic_sullivant_2006, title={The space of compatible full conditionals is a unimodular toric variety}, volume={41}, ISSN={0747-7171}, url={http://dx.doi.org/10.1016/j.jsc.2005.04.006}, DOI={10.1016/j.jsc.2005.04.006}, abstractNote={The set of all m -tuples of compatible full conditional distributions on discrete random variables is an algebraic set whose defining ideal is a unimodular toric ideal. We identify the defining polynomials of these ideals with closed walks on a bipartite graph. Our algebraic characterization provides a natural generalization of the requirement that compatible conditionals have identical odds ratios and holds regardless of the patterns of zeros in the conditional arrays.}, number={2}, journal={Journal of Symbolic Computation}, publisher={Elsevier BV}, author={Slavkovic, Aleksandra B. and Sullivant, Seth}, year={2006}, month={Feb}, pages={196–209} } @article{sullivant_2005, title={Small Contingency Tables with Large Gaps}, volume={18}, ISSN={0895-4801 1095-7146}, url={http://dx.doi.org/10.1137/s0895480104444090}, DOI={10.1137/s0895480104444090}, abstractNote={We construct examples of contingency tables on n binary random variables where the gap between the linear programming lower/upper bound and the true integer lower/upper bounds on cell entries is exponentially large. These examples provide evidence that linear programming may not be an effective heuristic for detecting disclosures when releasing margins of multiway tables.}, number={4}, journal={SIAM Journal on Discrete Mathematics}, publisher={Society for Industrial & Applied Mathematics (SIAM)}, author={Sullivant, Seth}, year={2005}, month={Jan}, pages={787–793} } @article{sturmfels_sullivant_2005, title={Toric Ideals of Phylogenetic Invariants}, volume={12}, ISSN={1066-5277 1557-8666}, url={http://dx.doi.org/10.1089/cmb.2005.12.204}, DOI={10.1089/cmb.2005.12.204}, abstractNote={Statistical models of evolution are algebraic varieties in the space of joint probability distributions on the leaf colorations of a phylogenetic tree. The phylogenetic invariants of a model are the polynomials which vanish on the variety. Several widely used models for biological sequences have transition matrices that can be diagonalized by means of the Fourier transform of an Abelian group. Their phylogenetic invariants form a toric ideal in the Fourier coordinates. We determine generators and Gröbner bases for these toric ideals. For the Jukes-Cantor and Kimura models on a binary tree, our Gröbner bases consist of certain explicitly constructed polynomials of degree at most four.}, number={2}, journal={Journal of Computational Biology}, publisher={Mary Ann Liebert Inc}, author={Sturmfels, Bernd and Sullivant, Seth}, year={2005}, month={Mar}, pages={204–228} } @article{dobra_sullivant_2004, title={A Divide-and-Conquer Algorithm for Generating Markov Bases of Multi-way Tables}, volume={19}, ISSN={0943-4062 1613-9658}, url={http://dx.doi.org/10.1007/bf03372101}, DOI={10.1007/bf03372101}, number={3}, journal={Computational Statistics}, publisher={Springer Nature}, author={Dobra, Adrian and Sullivant, Seth}, year={2004}, month={Sep}, pages={347–366} } @article{hoşten_sullivant_2004, title={Ideals of adjacent minors}, volume={277}, ISSN={0021-8693}, url={http://dx.doi.org/10.1016/j.jalgebra.2004.01.027}, DOI={10.1016/j.jalgebra.2004.01.027}, abstractNote={We give a description of the minimal primes of the ideal generated by the 2×2 adjacent minors of a generic matrix. We also compute the complete prime decomposition of the ideal of adjacent m×m minors of an m×n generic matrix when the characteristic of the ground field is zero. A key intermediate result is the proof that the ideals which appear as minimal primes are, in fact, prime ideals. This introduces a large new class of mixed determinantal ideals that are prime.}, number={2}, journal={Journal of Algebra}, publisher={Elsevier BV}, author={Hoşten, Serkan and Sullivant, Seth}, year={2004}, month={Jul}, pages={615–642} } @article{sullivant_develin_2003, title={Markov Bases of Binary Graph Models}, volume={7}, ISSN={0218-0006 0219-3094}, url={http://dx.doi.org/10.1007/s00026-003-0196-9}, DOI={10.1007/s00026-003-0196-9}, abstractNote={This paper is concerned with the topological invariant of a graph given by the maximum degree of a Markov basis element for the corresponding graph model for binary contingency tables. We describe a degree four Markov basis for the model when the underlying graph is a cycle and generalize this result to the complete bipartite graph K 2,n . We also give a combinatorial classification of degree two and three Markov basis moves as well as a Buchberger-free algorithm to compute moves of arbitrary given degree. Finally, we compute the algebraic degree of the model when the underlying graph is a forest.}, number={4}, journal={Annals of Combinatorics}, publisher={Springer Science and Business Media LLC}, author={Sullivant, Seth and Develin, Mike}, year={2003}, month={Dec}, pages={441–466} } @article{hoşten_sullivant_2002, title={Gröbner Bases and Polyhedral Geometry of Reducible and Cyclic Models}, volume={100}, ISSN={0097-3165}, url={http://dx.doi.org/10.1006/jcta.2002.3301}, DOI={10.1006/jcta.2002.3301}, abstractNote={This article studies the polyhedral structure and combinatorics of polytopes that arise from hierarchical models in statistics, and shows how to construct Grobner bases of toric ideals associated to a subset of such models. We study the polytopes for Cyclic models, and we give a complete polyhedral description of these polytopes in the binary cyclic case. Further, we show how to build Grobner bases of a reducible model from the Grobner bases of its pieces. This result also gives a different proof that decomposable models have quadratic Grobner bases. Finally, we present the solution of a problem posed by Vlach (Discrete Appl. Math. 13 (1986) 61-78) concerning the dimension of fibers coming from models corresponding to the boundary of a simplex.}, number={2}, journal={Journal of Combinatorial Theory, Series A}, publisher={Elsevier BV}, author={Hoşten, Serkan and Sullivant, Seth}, year={2002}, month={Nov}, pages={277–301} }