@article{del bonis-o'donnell_pinals_jeong_thakrar_wolfinger_landry_2019, title={Chemometric Approaches for Developing Infrared Nanosensors To Image Anthracyclines}, volume={58}, ISSN={["0006-2960"]}, DOI={10.1021/acs.biochem.8b00926}, abstractNote={Generation, identification, and validation of optical probes to image molecular targets in a biological milieu remain a challenge. Synthetic molecular recognition approaches leveraging the intrinsic near-infrared fluorescence of single-walled carbon nanotubes are promising for long-term biochemical imaging in tissues. However, generation of nanosensors for selective imaging of molecular targets requires a heuristic approach. Here, we present a chemometric platform for rapidly screening libraries of candidate single-walled carbon nanotube nanosensors against biochemical analytes to quantify the fluorescence response to small molecules, including vitamins, neurotransmitters, and chemotherapeutics. We further show this method can be applied to identify biochemical analytes that selectively modulate the intrinsic near-infrared fluorescence of candidate nanosensors. Chemometric analysis thus enables identification of nanosensor-analyte "hits" and also nanosensor fluorescence signaling modalities such as wavelength shifts that are optimal for translation to biological imaging. Through this approach, we identify and characterize a nanosensor for the chemotherapeutic anthracycline doxorubicin (DOX), which provides a ≤17 nm fluorescence red-shift and exhibits an 8 μM limit of detection, compatible with peak circulatory concentrations of doxorubicin common in therapeutic administration. We demonstrate the selectivity of this nanosensor over dacarbazine, a chemotherapeutic commonly co-injected with doxorubicin. Lastly, we establish nanosensor tissue compatibility for imaging of doxorubicin in muscle tissue by incorporating nanosensors into the mouse hindlimb and measuring the nanosensor response to exogenous DOX administration. Our results motivate chemometric approaches to nanosensor discovery for chronic imaging of drug partitioning into tissues and toward real-time monitoring of drug accumulation.}, number={1}, journal={BIOCHEMISTRY}, author={Del Bonis-O'Donnell, Jackson Travis and Pinals, Rebecca L. and Jeong, Sanghwa and Thakrar, Arni and Wolfinger, Russ D. and Landry, Markita P.}, year={2019}, month={Jan}, pages={54–64} } @article{jessri_wolfinger_lou_mary r. l'abbe_2017, title={Identification of dietary patterns associated with obesity in a nationally representative survey of Canadian adults: application of a priori, hybrid, and simplified dietary pattern techniques}, volume={105}, ISSN={["1938-3207"]}, DOI={10.3945/ajcn.116.134684}, abstractNote={Background: Analyzing the effects of dietary patterns is an important approach for examining the complex role of nutrition in the etiology of obesity and chronic diseases. Objectives: The objectives of this study were to characterize the dietary patterns of Canadians with the use of a priori, hybrid, and simplified dietary pattern techniques, and to compare the associations of these patterns with obesity risk in individuals with and without chronic diseases (unhealthy and healthy obesity). Design: Dietary recalls from 11,748 participants (≥18 y of age) in the cross-sectional, nationally representative Canadian Community Health Survey 2.2 were used. A priori dietary pattern was characterized with the use of the previously validated 2015 Dietary Guidelines for Americans Adherence Index (DGAI). Weighted partial least squares (hybrid method) was used to derive an energy-dense (ED), high-fat (HF), low–fiber density (LFD) dietary pattern with the use of 38 food groups. The associations of derived dietary patterns with disease outcomes were then tested with the use of multinomial logistic regression. Results: An ED, HF, and LFD dietary pattern had high positive loadings for fast foods, carbonated drinks, and refined grains, and high negative loadings for whole fruits and vegetables (≥|0.17|). Food groups with a high loading were summed to form a simplified dietary pattern score. Moving from the first (healthiest) to the fourth (least healthy) quartiles of the ED, HF, and LFD pattern and the simplified dietary pattern scores was associated with increasingly elevated ORs for unhealthy obesity, with individuals in quartile 4 having an OR of 2.57 (95% CI: 1.75, 3.76) and 2.73 (95% CI: 1.88, 3.98), respectively (P-trend < 0.0001). Individuals who adhered the most to the 2015 DGAI recommendations (quartile 4) had a 53% lower OR of unhealthy obesity (P-trend < 0.0001). The associations of dietary patterns with healthy obesity and unhealthy nonobesity were weaker, albeit significant. Conclusions: Consuming an ED, HF, and LFD dietary pattern and lack of adherence to the recommendations of the 2015 DGAI were associated with a significantly higher risk of obesity with and without accompanying chronic diseases.}, number={3}, journal={AMERICAN JOURNAL OF CLINICAL NUTRITION}, author={Jessri, Mahsa and Wolfinger, Russell D. and Lou, Wendy Y. and Mary R. L'Abbe}, year={2017}, month={Mar}, pages={669–684} } @article{miclaus_wolfinger_czika_2009, title={SNP Selection and Multidimensional Scaling to Quantify Population Structure}, volume={33}, ISSN={["1098-2272"]}, DOI={10.1002/gepi.20401}, abstractNote={In the new era of large-scale collaborative Genome Wide Association Studies (GWAS), population stratification has become a critical issue that must be addressed. In order to build upon the methods developed to control the confounding effect of a structured population, it is extremely important to visualize and quantify that effect. In this work, we develop methodology for single nucleotide polymorphism (SNP) selection and subsequent population stratification visualization based on deviation from Hardy-Weinberg equilibrium in conjunction with non-metric multidimensional scaling (MDS); a distance-based multivariate technique. Through simulation, it is shown that SNP selection based on Hardy-Weinberg disequilibrium (HWD) is robust against confounding linkage disequilibrium patterns that have been problematic in past studies and methods as well as producing a differentiated SNP set. Non-metric MDS is shown to be a multivariate visualization tool preferable to principal components in conjunction with HWD SNP selection through theoretical and empirical study from HapMap samples. The proposed selection tool offers a simple and effective way to select appropriate substructure-informative markers for use in exploring the effect that population stratification may have in association studies. Genet. Epidemiol. 33:488–496, 2009. © 2009 Wiley-Liss, Inc.}, number={6}, journal={GENETIC EPIDEMIOLOGY}, author={Miclaus, Kelci and Wolfinger, Russ and Czika, Wendy}, year={2009}, month={Sep}, pages={488–496} } @article{kennerly_ballmann_martin_wolfinger_gregory_stoskopf_gibson_2008, title={A gene expression signature of confinement in peripheral blood of red wolves (Canis rufus)}, volume={17}, ISSN={["1365-294X"]}, DOI={10.1111/j.1365-294X.2008.03775.x}, abstractNote={The stresses that animals experience as a result of modification of their ecological circumstances induce physiological changes that leave a signature in profiles of gene expression. We illustrate this concept in a comparison of free range and confined North American red wolves (Canis rufus). Transcription profiling of peripheral blood samples from 13 red wolf individuals in the Alligator River region of North Carolina revealed a strong signal of differentiation. Four hundred eighty-two out of 2980 transcripts detected on Illumina HumanRef8 oligonucleotide bead arrays were found to differentiate free range and confined wolves at a false discovery rate of 12.8% and P < 0.05. Over-representation of genes in focal adhesion, insulin signalling, proteasomal, and tryptophan metabolism pathways suggests the activation of pro-inflammatory and stress responses in confined animals. Consequently, characterization of differential transcript abundance in an accessible tissue such as peripheral blood identifies biomarkers that could be useful in animal management practices and for evaluating the impact of habitat changes on population health, particularly as attention turns to the impact of climate change on physiology and in turn species distributions.}, number={11}, journal={MOLECULAR ECOLOGY}, author={Kennerly, Erin and Ballmann, Anne and Martin, Stanton and Wolfinger, Russ and Gregory, Simon and Stoskopf, Michael and Gibson, Greg}, year={2008}, month={Jun}, pages={2782–2791} } @misc{ascencio-ibanez_sozzani_lee_chu_wolfinger_cella_hanley-bowdoin_2008, title={Global analysis of Arabidopsis gene expression uncovers a complex array of changes impacting pathogen response and cell cycle during geminivirus infection}, volume={148}, number={1}, journal={Plant Physiology}, author={Ascencio-Ibanez, J. T. and Sozzani, R. and Lee, T. J. and Chu, T. M. and Wolfinger, R. D. and Cella, R. and Hanley-Bowdoin, L.}, year={2008}, pages={436–454} } @article{hsieh_chu_wolfinger_2007, title={Comparison of statistical performance of univariate and bivariate mixed models for Affymetrix (R) probe level data}, volume={77}, ISSN={["0094-9655"]}, DOI={10.1080/10629360600826398}, abstractNote={Half of the probes on Affymetrix® microarrays contain a single base mismatch (MM) of a known perfect match (PM) target sequence. While putatively designed to detect nonspecific binding, the MM data can also contain true signals and because of this, debates persist concerning how to best combine PM and MM data for statistical modeling purposes. Most current approaches involve either subtracting some function of MM from PM or ignoring MM altogether. Here, we describe a bivariate model that includes both PM and MM based on the mixed linear modelling framework. It directly models the correlation between PM and MM and thereby increases the power of significant gene detection. In this paper, we show that the bivariate mixed model offers moderate gains in power over a comparable univariate model that ignores the MM data. The gains are more prominent when the number of replicates and the array-to-array variability is small. We apply the models to a small experiment on yeast and use the data as a basis for a Monte Carlo simulation.}, number={3}, journal={JOURNAL OF STATISTICAL COMPUTATION AND SIMULATION}, author={Hsieh, Wen-Ping and Chu, Tzu-Ming and Wolfinger, Russ}, year={2007}, pages={251–264} } @article{bushel_wolfinger_gibson_2007, title={Simultaneous clustering of gene expression data with clinical chemistry and pathological evaluations reveals phenotypic prototypes}, volume={1}, ISSN={["1752-0509"]}, DOI={10.1186/1752-0509-1-15}, abstractNote={Commonly employed clustering methods for analysis of gene expression data do not directly incorporate phenotypic data about the samples. Furthermore, clustering of samples with known phenotypes is typically performed in an informal fashion. The inability of clustering algorithms to incorporate biological data in the grouping process can limit proper interpretation of the data and its underlying biology. We present a more formal approach, the modk-prototypes algorithm, for clustering biological samples based on simultaneously considering microarray gene expression data and classes of known phenotypic variables such as clinical chemistry evaluations and histopathologic observations. The strategy involves constructing an objective function with the sum of the squared Euclidean distances for numeric microarray and clinical chemistry data and simple matching for histopathology categorical values in order to measure dissimilarity of the samples. Separate weighting terms are used for microarray, clinical chemistry and histopathology measurements to control the influence of each data domain on the clustering of the samples. The dynamic validity index for numeric data was modified with a category utility measure for determining the number of clusters in the data sets. A cluster's prototype, formed from the mean of the values for numeric features and the mode of the categorical values of all the samples in the group, is representative of the phenotype of the cluster members. The approach is shown to work well with a simulated mixed data set and two real data examples containing numeric and categorical data types. One from a heart disease study and another from acetaminophen (an analgesic) exposure in rat liver that causes centrilobular necrosis. The modk-prototypes algorithm partitioned the simulated data into clusters with samples in their respective class group and the heart disease samples into two groups (sick and buff denoting samples having pain type representative of angina and non-angina respectively) with an accuracy of 79%. This is on par with, or better than, the assignment accuracy of the heart disease samples by several well-known and successful clustering algorithms. Following modk-prototypes clustering of the acetaminophen-exposed samples, informative genes from the cluster prototypes were identified that are descriptive of, and phenotypically anchored to, levels of necrosis of the centrilobular region of the rat liver. The biological processes cell growth and/or maintenance, amine metabolism, and stress response were shown to discern between no and moderate levels of acetaminophen-induced centrilobular necrosis. The use of well-known and traditional measurements directly in the clustering provides some guarantee that the resulting clusters will be meaningfully interpretable.}, journal={BMC SYSTEMS BIOLOGY}, author={Bushel, Pierre R. and Wolfinger, Russell D. and Gibson, Greg}, year={2007}, month={Feb} } @article{chu_weir_wolfinger_2004, title={Comparison of Li-Wong and loglinear mixed models for the statistical analysis of oligonucleotide arrays}, volume={20}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btg435}, abstractNote={Li and Wong have described some useful statistical models for probe-level, oligonucleotide array data based on a multiplicative parametrization. In earlier work, we proposed similar analysis-of-variance-style mixed models fit on a log scale. With only subtle differences in the specification of their mean and stochastic error components, a question arises as to whether these models could lead to varying conclusions in practical application.In this paper, we provide an empirical comparison of the two models using a real data set, and find the models perform quite similarly across most genes, but with some interesting and important distinctions. We also present results from a simulation study designed to assess inferential properties of the models, and propose a modified test statistic for the Li-Wong model that provides an improvement in Type 1 error control. Advantages of both methods include the ability to directly assess and account for key sources of variability in the chip data and a means to automate statistical quality control.}, number={4}, journal={BIOINFORMATICS}, author={Chu, TM and Weir, BS and Wolfinger, RD}, year={2004}, month={Mar}, pages={500–506} } @article{stasolla_bozhkov_chu_van zyl_egertsdotter_suarez_craig_wolfinger_von arnold_sederoff_2004, title={Variation an transcript abundance during somatic embryogenesis in gymnosperms}, volume={24}, ISSN={["1758-4469"]}, DOI={10.1093/treephys/24.10.1073}, abstractNote={Somatic embryogenesis of Norway spruce (Picea abies L.) is a versatile model system to study molecular mechanisms regulating embryo development because it proceeds through defined developmental stages corresponding to specific culture treatments. Normal embryonic development involves early differentiation of proembryogenic masses (PEMs) into somatic embryos, followed by early and late embryogeny leading to the formation of mature cotyledonary embryos. In some cell lines there is a developmental arrest at the PEM-somatic embryo transition. To learn more about the molecular mechanisms regulating embryogenesis, we compared the transcript profiles of two normal lines and one developmentally arrested line. Ribonucleic acid, extracted from these cell lines at successive developmental stages, was analyzed on DNA microarrays containing 2178 expressed sequence tags (ESTs) (corresponding to 2110 unique cDNAs) from loblolly pine (Pinus taeda L.). Hybridization between spruce and pine species on microarrays has been shown to be effective (van Zyl et al. 2002, Stasolla et al. 2003). In contrast to the developmentally arrested line, the early phases of normal embryo development are characterized by a precise pattern of gene expression, i.e., repression followed by induction. Comparison of transcript levels between successive stages of embryogenesis allowed us to identify several genes that showed unique expression responses during normal development. Several of these genes encode proteins involved in detoxification processes, methionine synthesis and utilization, and carbohydrate metabolism. The potential role of these genes in embryo development is discussed.}, number={10}, journal={TREE PHYSIOLOGY}, author={Stasolla, C and Bozhkov, PV and Chu, TM and Van Zyl, L and Egertsdotter, U and Suarez, MF and Craig, D and Wolfinger, RD and Von Arnold, S and Sederoff, RR}, year={2004}, month={Oct}, pages={1073–1085} } @article{chhabra_shockley_conners_scott_wolfinger_kelly_2003, title={Carbohydrate-induced differential gene expression patterns in the hyperthermophilic bacterium Thermotoga maritima}, volume={278}, ISSN={["1083-351X"]}, DOI={10.1074/jbc.M211748200}, abstractNote={The hyperthermophilic bacteriumThermotoga maritima MSB8 was grown on a variety of carbohydrates to determine the influence of carbon and energy source on differential gene expression. Despite the fact that T. maritima has been phylogenetically characterized as a primitive microorganism from an evolutionary perspective, results here suggest that it has versatile and discriminating mechanisms for regulating and effecting complex carbohydrate utilization. Growth ofT. maritima on monosaccharides was found to be slower than growth on polysaccharides, although growth to cell densities of 108 to 109 cells/ml was observed on all carbohydrates tested. Differential expression of genes encoding carbohydrate-active proteins encoded in the T. maritimagenome was followed using a targeted cDNA microarray in conjunction with mixed model statistical analysis. Coordinated regulation of genes responding to specific carbohydrates was noted. Although glucose generally repressed expression of all glycoside hydrolase genes, other sugars induced or repressed these genes to varying extents. Expression profiles of most endo-acting glycoside hydrolase genes correlated well with their reported biochemical properties, although exo-acting glycoside hydrolase genes displayed less specific expression patterns. Genes encoding selected putative ABC sugar transporters were found to respond to specific carbohydrates, and in some cases putative oligopeptide transporter genes were also found to respond to specific sugar substrates. Several genes encoding putative transcriptional regulators were expressed during growth on specific sugars, thus suggesting functional assignments. The transcriptional response ofT. maritima to specific carbohydrate growth substrates indicated that sugar backbone- and linkage-specific regulatory networks are operational in this organism during the uptake and utilization of carbohydrate substrates. Furthermore, the wide ranging collection of such networks in T. maritima suggests that this organism is capable of adapting to a variety of growth environments containing carbohydrate growth substrates. The hyperthermophilic bacteriumThermotoga maritima MSB8 was grown on a variety of carbohydrates to determine the influence of carbon and energy source on differential gene expression. Despite the fact that T. maritima has been phylogenetically characterized as a primitive microorganism from an evolutionary perspective, results here suggest that it has versatile and discriminating mechanisms for regulating and effecting complex carbohydrate utilization. Growth ofT. maritima on monosaccharides was found to be slower than growth on polysaccharides, although growth to cell densities of 108 to 109 cells/ml was observed on all carbohydrates tested. Differential expression of genes encoding carbohydrate-active proteins encoded in the T. maritimagenome was followed using a targeted cDNA microarray in conjunction with mixed model statistical analysis. Coordinated regulation of genes responding to specific carbohydrates was noted. Although glucose generally repressed expression of all glycoside hydrolase genes, other sugars induced or repressed these genes to varying extents. Expression profiles of most endo-acting glycoside hydrolase genes correlated well with their reported biochemical properties, although exo-acting glycoside hydrolase genes displayed less specific expression patterns. Genes encoding selected putative ABC sugar transporters were found to respond to specific carbohydrates, and in some cases putative oligopeptide transporter genes were also found to respond to specific sugar substrates. Several genes encoding putative transcriptional regulators were expressed during growth on specific sugars, thus suggesting functional assignments. The transcriptional response ofT. maritima to specific carbohydrate growth substrates indicated that sugar backbone- and linkage-specific regulatory networks are operational in this organism during the uptake and utilization of carbohydrate substrates. Furthermore, the wide ranging collection of such networks in T. maritima suggests that this organism is capable of adapting to a variety of growth environments containing carbohydrate growth substrates. analysis of variance carboxymethylcellulose phosphotransferase system carbon catabolite repression Saccharolytic microorganisms employ a range of proteins to hydrolyze, transport, and utilize complex carbohydrates that serve as carbon and energy sources (1de Vos W.M. Kengen S.W.M. Voorhorst W.G.B. van der Oost J. Extremophiles. 1998; 2: 201-205Crossref PubMed Scopus (38) Google Scholar). In some cases, these proteins are very specific to particular carbohydrates, whereas in other situations they mediate the processing of a broader range of glycosides. For simple sugars, such as glucose, binding and transport proteins alone mediate substrate entry into specific intracellular anabolic and catabolic pathways (2Galperin M.Y. Noll K.M. Romano A.H. Appl. Environ. Microbiol. 1996; 62: 2915-2918PubMed Google Scholar). However, for complex carbohydrates, a series of glycoside hydrolases must first process the polysaccharide so that its backbone and side chain glycosidic linkages are hydrolyzed to the extent needed for binding, transport, and intracellular utilization. How specific organisms develop the capacity to utilize complex carbohydrates is not known, but this probably involves evolutionary pressures in addition to acquisition of this genetic potential through horizontal gene transfer events. In any case, a microorganism's capacity to utilize carbohydrates presumably reflects the availability of such substrates in its habitat. Therefore, insights into the repertoire of carbohydrate-active proteins in a given organism and how the expression of these proteins is regulated would reveal much about particular metabolic features in addition to how it interacts within a given ecosystem. Thermotoga maritima is an obligately anaerobic, heterotrophic, hyperthermophilic bacterium originally isolated from geothermal features associated with Vulcano Island, Italy (3Huber R. Langworthy T.A. Konig H. Thomm M. Woese C.R. Sleytr U.B. Stetter K.O. Arch. Microbiol. 1986; 144: 324-333Crossref Scopus (623) Google Scholar). Its capacity to utilize a wide range of simple and complex carbohydrates was confirmed by the inventory of glycoside hydrolases encoded in its genome (4Nelson K.E. Clayton R.A. Gill S.R. Gwinn M.L. Dodson R.J. Haft D.H. Hickey E.K. Peterson J.D. Nelson W.C. Ketchum K.A. McDonald L. Utterback T.R. Malek J.A. Linher K.D. Garrett M.M. Stewart A.M. Cotton M.D. Pratt M.S. Phillips C.A. Richardson D. Heidelberg J. Sutton G.G. Fleischmann R.D. Eisen J.A. Fraser C.M. et al.Nature. 1999; 399: 323-329Crossref PubMed Scopus (1206) Google Scholar). In fact, the T. maritima genome, despite its relatively small size, encodes the largest number of glycoside hydrolases of any bacterial or archaeal genome sequenced to date (see Fig. 1). From growth experiments and characterization of specific glycoside hydrolases (5Chhabra S.R. Shockley K.R. Ward D.E. Kelly R.M. Appl. Environ. Microbiol. 2002; 68: 545-554Crossref PubMed Scopus (91) Google Scholar), T. maritima is known to metabolize both polysaccharides and simple sugars, including carboxymethylcellulose, barley glucan, starch, galactomannan (5Chhabra S.R. Shockley K.R. Ward D.E. Kelly R.M. Appl. Environ. Microbiol. 2002; 68: 545-554Crossref PubMed Scopus (91) Google Scholar), xylan (6Bronnenmeier K. Kern A. Liebl W. Staudenbauer W.L. Appl. Environ. Microbiol. 1995; 61: 1399-1407Crossref PubMed Google Scholar), pectin, 1L. D. Kluskens, personal communication. 1L. D. Kluskens, personal communication. mannose, xylose, and glucose (2Galperin M.Y. Noll K.M. Romano A.H. Appl. Environ. Microbiol. 1996; 62: 2915-2918PubMed Google Scholar). In some cases, the proteins involved in the processing, transport, and utilization of these glycosides can be inferred from their apparent organization into operons in the T. maritimagenome sequence (4Nelson K.E. Clayton R.A. Gill S.R. Gwinn M.L. Dodson R.J. Haft D.H. Hickey E.K. Peterson J.D. Nelson W.C. Ketchum K.A. McDonald L. Utterback T.R. Malek J.A. Linher K.D. Garrett M.M. Stewart A.M. Cotton M.D. Pratt M.S. Phillips C.A. Richardson D. Heidelberg J. Sutton G.G. Fleischmann R.D. Eisen J.A. Fraser C.M. et al.Nature. 1999; 399: 323-329Crossref PubMed Scopus (1206) Google Scholar), whereas in other cases such classification is not clear. Regulation of genes encoding specific carbohydrate-active proteins in T. maritima has only been studied to a limited extent thus far (5Chhabra S.R. Shockley K.R. Ward D.E. Kelly R.M. Appl. Environ. Microbiol. 2002; 68: 545-554Crossref PubMed Scopus (91) Google Scholar, 7Nguyen T.N. Borges K.M. Romano A.H. Noll K.M. FEMS Microbiol. Lett. 2001; 195: 79-83Crossref PubMed Google Scholar), and the coordinated regulation of related genes involved in polysaccharide utilization has not been examined. Here, a targeted cDNA microarray, based on carbohydrate-active proteins from T. maritima, was used in conjunction with mixed model analysis (8Jin W. Riley R.M. Wolfinger R.D. White K.P. Passador-Gurgel G. Gibson G. Nat. Genet. 2001; 29: 389-395Crossref PubMed Scopus (522) Google Scholar, 9Wolfinger R.D. Gibson G. Wolfinger E.D. Bennett L. Hamadeh H. Bushel P. Afshari C. Paules R.S. J. Comput. Biol. 2001; 8: 625-637Crossref PubMed Scopus (856) Google Scholar) to explore issues related to saccharide utilization by this organism. Despite the fact thatT. maritima has been phylogenetically characterized as a primitive microorganism from an evolutionary perspective (10Achenbach-Richter L. Gupta R. Stetter K.O. Woese C.R. Syst. Appl. Microbiol. 1987; 9: 34-39Crossref PubMed Scopus (180) Google Scholar), results here support that it has versatile and discriminating mechanisms for regulating and effecting complex carbohydrate utilization. The relative importance of evolutionary processes and horizontal gene transfer (4Nelson K.E. Clayton R.A. Gill S.R. Gwinn M.L. Dodson R.J. Haft D.H. Hickey E.K. Peterson J.D. Nelson W.C. Ketchum K.A. McDonald L. Utterback T.R. Malek J.A. Linher K.D. Garrett M.M. Stewart A.M. Cotton M.D. Pratt M.S. Phillips C.A. Richardson D. Heidelberg J. Sutton G.G. Fleischmann R.D. Eisen J.A. Fraser C.M. et al.Nature. 1999; 399: 323-329Crossref PubMed Scopus (1206) Google Scholar) in developing its carbohydrate utilization capacity is not known, butT. maritima's ability to respond to various substrates in its growth environment underlies its ubiquity in global geothermal settings (11Nesbo C.L. Nelson K.E. Doolittle W.F. J. Bacteriol. 2002; 184: 4475-4488Crossref PubMed Scopus (55) Google Scholar). Open reading frames (total of 269) of known and putative genes related to sugar processing and other related metabolic functions were identified through BLAST (12Altschul S.F. Gish W. Miller W. Myers E.W. Lipman D.J. J. Mol. Biol. 1990; 215: 403-410Crossref PubMed Scopus (69678) Google Scholar) comparisons of protein sequences from the T. maritima MSB8 genome available on the World Wide Web at www.tigr.org/ tigrscripts/CMR2/GenomePage3.spl?database=btm. DNA primers were designed with similar annealing temperatures and minimal hairpin formation using Vector NTI 7.0 (Informax, Bethesda, MD). The selected probes were PCR-amplified in a PTC-100 Thermocycler (MJ Research, Inc., Waltham, MA) using Taq polymerase (Roche Molecular Biochemicals) and T. maritima genomic DNA, isolated as described previously (5Chhabra S.R. Shockley K.R. Ward D.E. Kelly R.M. Appl. Environ. Microbiol. 2002; 68: 545-554Crossref PubMed Scopus (91) Google Scholar). The integrity and concentration of the PCR products were verified on 1% agarose gels. PCR products were purified to 100 ng/μl using 96-well QIAquick PCR purification kits (Qiagen, Valencia, CA), resuspended in 50% Me2SO, and printed onto CMT-GAPS aminosilane-coated microscope slides (Corning Glass) using a 417 Arrayer (Affymetrix, Santa Clara, CA) in the North Carolina State University Genome Research Laboratory (Raleigh, NC). Eight replicates of each gene fragment were printed onto each slide. The DNA was then attached to the slides by UV cross-linking using a GS GeneLinker UV Chamber (Bio-Rad) set at 250 mJ and baked at 75 °C for 2 h. Growth ofT. maritima MSB8 cultures in artificial sea water was followed using optical density measurements and epifluorescence microscopic cell density enumeration, as described previously (5Chhabra S.R. Shockley K.R. Ward D.E. Kelly R.M. Appl. Environ. Microbiol. 2002; 68: 545-554Crossref PubMed Scopus (91) Google Scholar). Growth substrates glucose, mannose, xylose, β-xylan (birchwood), laminarin (Laminaria digitata), and starch (potato) were obtained from Sigma. Galactomannan (carob), glucomannan (konjac), carboxymethylcellulose, and β-glucan (barley) were obtained from Megazyme (Wicklow, Ireland). Growth substrates were prepared as described previously (5Chhabra S.R. Shockley K.R. Ward D.E. Kelly R.M. Appl. Environ. Microbiol. 2002; 68: 545-554Crossref PubMed Scopus (91) Google Scholar) and included in the medium at a final concentration of 0.25% (w/v). Substrate purities as provided by the manufacturers varied from 95 to 99%. To ensure minimum carryover between substrates, cells were grown for at least 10 passes on each carbon source using a 0.5% (v/v) starting innoculum before obtaining the growth curves. Specific growth rates on mono- and polysaccharide substrates were determined from the slopes of semilog plots of exponential cell growth versus time. Isolation of total RNA from T. maritima was performed on cells that were grown until early- to mid-exponential phase on the various growth substrates, as described in detail previously (5Chhabra S.R. Shockley K.R. Ward D.E. Kelly R.M. Appl. Environ. Microbiol. 2002; 68: 545-554Crossref PubMed Scopus (91) Google Scholar). First-strand cDNA was prepared from T. maritima total RNA using Stratascript (Stratagene, La Jolla, CA) and random hexamer primers (Invitrogen) by the incorporation of 5-[3-aminoallyl]-2′-deoxyuridine-5′-triphosphate (Sigma) as described elsewhere (13Hasseman J. TIGR Microarray Protocols. 2001; (http://www.tigr.org/tdb/microarray/protocolsTIGR.shtml)Google Scholar). The slides were scanned using a Scanarray 4000 scanner (GSI Lumonics and Billerica) in the North Carolina State University Genome Research Laboratory. Signal intensity data were obtained using Quantarray (GSI Lumonics). A loop design was constructed (see Fig. 2) to ensure reciprocal labeling for all 10 different experimental conditions. Replication of treatments, arrays, dyes, and cDNA spots allowed the use of analysis of variance (ANOVA)2 models for data analysis. ANOVAs are especially appropriate for loop designs in which a large number of conditions are compared with one another, eliminating uninteresting reference samples and allowing for the collection of more information on experimental conditions (14Kerr M.K. Churchill G.A. Genet. Res. 2001; 77: 123-128Crossref PubMed Scopus (465) Google Scholar). Mixed ANOVA models, in which some effects are considered fixed and others are considered random, have been used to re-examine published microarray data sets (9Wolfinger R.D. Gibson G. Wolfinger E.D. Bennett L. Hamadeh H. Bushel P. Afshari C. Paules R.S. J. Comput. Biol. 2001; 8: 625-637Crossref PubMed Scopus (856) Google Scholar) and examine the effects of sex, genotype, and age on transcription inDrosophila melanogaster (8Jin W. Riley R.M. Wolfinger R.D. White K.P. Passador-Gurgel G. Gibson G. Nat. Genet. 2001; 29: 389-395Crossref PubMed Scopus (522) Google Scholar). Using existing SAS procedures and customized Perl code, an automated data import system was developed to merge Quantarray intensity measurements, coordinate files generated by the array printer, and corresponding T. maritima locus numbers in a SAS data set (SAS Institute, Cary, NC). The data import system was verified through independent calculations in Excel (Microsoft, Seattle, WA). A linear normalization ANOVA model (9Wolfinger R.D. Gibson G. Wolfinger E.D. Bennett L. Hamadeh H. Bushel P. Afshari C. Paules R.S. J. Comput. Biol. 2001; 8: 625-637Crossref PubMed Scopus (856) Google Scholar) of log base 2 intensities was used to estimate global variation in the form of fixed (dye, treatment) and random (array, pin within array, pin spot within array) effects and random error using the following model: log2(y ijklmn) =m + Dj + T k +A i + A i(P1) +A i(S m P l) + εijklmn. The estimated effects calculated from this model were used to predict an expected intensity for each value, and then a residual was calculated as the difference between a replicate's observed and predicted intensity and then used as data to capture variation attributable to gene-specific effects after accounting for global variation. Gene-specific ANOVA models were then used to partition variation into gene-specific treatment effects, dye effects, and the same hierarchy of random effects described previously. Specifically, the model r ijklmn =m+ D i + T k +A i + A i(P1) +A i(S m P1) + εijklmn was fit separately to the residuals for each gene, and the resulting parameter estimates and S.E. values were then used for statistical inference. Volcano plots were used to visualize interesting contrasts or comparisons between two treatments or two groups of treatments (9Wolfinger R.D. Gibson G. Wolfinger E.D. Bennett L. Hamadeh H. Bushel P. Afshari C. Paules R.S. J. Comput. Biol. 2001; 8: 625-637Crossref PubMed Scopus (856) Google Scholar). A Bonferroni correction was utilized to adjust for the expected increase in false positives due to multiple comparisons (9Wolfinger R.D. Gibson G. Wolfinger E.D. Bennett L. Hamadeh H. Bushel P. Afshari C. Paules R.S. J. Comput. Biol. 2001; 8: 625-637Crossref PubMed Scopus (856) Google Scholar). Genes meeting the Bonferroni significance criteria were selected for further study, ensuring that genes with inconsistent fold changes would be eliminated from further analysis. Two complementary approaches were utilized to cluster data from T. maritima growth on 10 saccharides. To visualize the relative expression levels of all genes withina treatment, hierarchical clustering was performed on least squares means calculated from the linear models for each sugar (Fig. 3). To visualize the expression pattern of each single gene acrosstreatments, the least squares mean estimates were standardized using the mean and S.D. of the 10 least squares means estimates for a given gene. Each of the 10 least squares means estimates were standardized accordingly with the formula Y i = (X i − μ)/ς, where Y i = the standardized least squares means variable, μ = ΣX i/n, and ς = (Σ(X i − μ)2) 12. The standardized variable was then utilized for clustering (Fig. 3). For complete information on signal intensity, significance of expression changes, -fold changes, pairwise volcano plots, and hierarchical clustering for all of the genes included on the array, see the Supplemental Material. A targeted cDNA microarray for T. maritima was constructed that included 269 known and putative genes or about 15% of the total open reading frames in the T. maritima genome. This included the known set of genes related to glycoside utilization and modification (65 genes), proteolysis (40 genes), stress response, and proteolytic fermentation. Genes related to sugar transport (21 genes) or transcriptional regulation (69 genes) and 66 other genes of interest were also included. Genes apparently related to glycoside utilization and modification in T. maritima include 41 glycoside hydrolases, 17 glycosyl transferases, 6 carbohydrate esters, and 1 polysaccharide lyase. The corresponding encoded proteins have been classified into several families, based on amino acid sequence homology (15Henrissat B. Bairoch A. Biochem. J. 1996; 316: 695-696Crossref PubMed Scopus (1179) Google Scholar) (available on the World Wide Web at afmb.cnrs-mrs.fr/CAZY). There are over 130T. maritima proteins with sufficient BLAST homology to be classified into transcriptional regulatory or signal transduction COG categories (16Tatusov R.L. Natale D.A. Garkavtsev I.V. Tatusova T.A. Shankavaram U.T. Rao B.S. Kiryutin B. Galperin M.Y. Fedorova N.D. Koonin E.V. Nucleic Acids Res. 2001; 29: 22-28Crossref PubMed Scopus (1539) Google Scholar). These regulatory proteins have been assigned to families based on sequence homology; however, different proteins in the same families may have different DNA and substrate-binding specificities (17Mirny L.A. Gelfand M.S. J. Mol. Biol. 2002; 321: 7-20Crossref PubMed Scopus (116) Google Scholar). Also, proteins placed in different families may share the same name because of their regulon composition, as in the case of the Escherichia coli and Bacillus subtilis xylR protein (18Song S. Park C. J. Bacteriol. 1997; 179: 7025-7032Crossref PubMed Scopus (120) Google Scholar, 19Kreuzer P. Gartner D. Allmansberger R. Hillen W. J. Bacteriol. 1989; 171: 3840-3845Crossref PubMed Google Scholar). Of the 69 transcription/transduction genes on the array, six share similarity with the ROK (receptor, open reading frame,kinase) family of transcriptional regulators, which include glucokinases, B. subtilis XylR, and E. coli NagC (COG1940) (20Titgemeyer F. J. Cell. Biochem. 1993; 51: 69-74Crossref PubMed Scopus (19) Google Scholar). Six members of the PurR/LacI superfamily (COG1609) were included (21Mirny L.A. Gelfand M.S. Nucleic Acids Res. 2002; 30: 1704-1711Crossref PubMed Scopus (58) Google Scholar) along with the T. maritima IclR transcriptional regulator, whose structure was recently solved (22Zhang R.G. Kim Y. Skarina T. Beasley S. Laskowski R. Arrowsmith C. Edwards A. Joachimiak A. Savchenko A. J. Biol. Chem. 2002; 277: 19183-19190Abstract Full Text Full Text PDF PubMed Scopus (59) Google Scholar). Several pairs of sensor histidine kinases and response regulators of putative two-component regulatory systems were included, as were regulators from the MarR (23Cohen S.P. Hachler H. Levy S.B. J. Bacteriol. 1993; 175: 1484-1492Crossref PubMed Scopus (274) Google Scholar), AraC (24Martin R.G. Rosner J.L. Curr. Opin. Microbiol. 2001; 4: 132-137Crossref PubMed Scopus (182) Google Scholar), TroR (25Hardham J.M. Stamm L.V. Porcella S.F. Frye J.G. Barnes N.Y. Howell J.K. Mueller S.L. Radolf J.D. Weinstock G.M. Norris S.J. Gene (Amst.). 1997; 197: 47-64Crossref PubMed Scopus (62) Google Scholar), LytR (26Nikolskaya A.N. Galperin M.Y. Nucleic Acids Res. 2002; 30: 2453-2459Crossref PubMed Scopus (147) Google Scholar), ArsR (27Diorio C. Cai J. Marmor J. Shinder R. DuBow M.S. J. Bacteriol. 1995; 177: 2050-2056Crossref PubMed Google Scholar), and CspC (28Phadtare S. Alsina J. Inouye M. Curr. Opin. Microbiol. 1999; 2: 175-180Crossref PubMed Scopus (271) Google Scholar) families. The T. maritima genome contains ∼120 genes involved in oligopeptide/sugar transport. In the targeted microarray used here, 21 genes related to sugar transport were included on the basis of their proximity to the genes involved in glycoside utilization. This targeted microarray was used to examine the differential response of T. maritima grown on a range of mono- and polysaccharides at its optimal growth temperature of 80 °C. Growth conditions were analyzed based on an incomplete loop design (Fig.2). Treatments in the loop design were balanced with respect to dyes so that treatment effects were not confounded with dye effects. T. maritima cultures were grown on a variety of saccharides, including the monosaccharides glucose, mannose, and xylose. The polysaccharides investigated differed in backbone sugar type (glucose, mannose, and xylose), backbone linkage type (β-1,3; β-1,4; or α-1,4), and side chain residue type (galactose, glucuronic acid, or glucose) (see TableI). Included in these were a mixed backbone (konjac glucomannan: glucose/mannose) and a mixed linkage (barley glucan: β-1,4/1,3) polysaccharide. Final cell densities were in the range of 108 to 109cells/ml in all cases. Doubling times (min) for galactomannan (carob), β-glucan (barley), laminarin (L. digitata), β-xylan (birchwood), starch (potato), glucomannan (konjac), and carboxymethylcellulose were estimated to be 85, 72, 143, 61, 117, 74, and 78, respectively. On the monosaccharides, the doubling times (min) were 162, 253, and 188, for glucose, mannose, and xylose, respectively. Under identical conditions, the average doubling time for growth on monosaccharides (201 min) was observed to be substantially higher than that on the corresponding polysaccharide substrates (90 min).Table ICarbon sources used in this studyPoly/monosaccharideSourceBackbone structureSide chainMassDaGlucoseNAaNA, not available.Glc180MannoseNAMan180XyloseNAXylbXyl, xylose.150GalactomannanCarob(Man β1→4 Man)nGal (α1→6)NAGlucomannanKonjac(Glc β1→4 Man)n100,000Carboxymethyl celluloseNA(Glc β1→4 Glc)n90,000β-1,3/1,4-GlucanBarley(Glc β1→3,4 Glc)n250,000LaminarinL. digitata(Glc β1→3 Glc)n5,000StarchPotato(Glc α1→4 Glc)nGlc (α→16)nNAβ-XylanBirchwood(Xyl β1→4 Xyl)nGlr (α1→6)cGlr, glucuronic acid.NAa NA, not available.b Xyl, xylose.c Glr, glucuronic acid. Open table in a new tab Two hierarchical clusters are shown in Fig.3 to summarize the expression patterns of 269 T. maritima genes during growth on 10 saccharides. The first cluster is based on least squares means and compares the normalized expression levels of all genes within each treatment condition. The second cluster is based on standardized least squares means for a single gene across all 10 treatments to show the effect of different treatments on the relative expression of a particular gene. The hierarchical clustering based on standardized least squares means revealed many cases of apparent co-regulation of genes within potential operons (29McGuire A.M. Hughes J.D. Church G.M. Genome Res. 2000; 10: 744-757Crossref PubMed Scopus (160) Google Scholar). Several sets of spatially distant gene strings were observed to cluster with similar expression profiles, suggesting the presence of regulons in the T. maritima genome. Representative clusters are displayed in Fig.4. Overall expression levels of a number of genes remained consistently high or low regardless of the growth condition. These included constitutively expressed genes like TM0017 (pyruvate ferredoxin oxidoreductase) and TM0688 (glyceraldehyde-3-phosphate dehydrogenase) (30Blamey J.M. Adams M.W. Biochemistry. 1994; 33: 1000-1007Crossref PubMed Scopus (92) Google Scholar) as well as genes related to proteolytic activity. Both sets of genes with the corresponding known or putative functions are displayed in Fig. 5. Individual genes with high overall expression levels on only a single carbon source are indicated in Table II. Least squares means for all genes included in this study for all growth conditions are shown in Supplemental Table IV, along with the corresponding standardized values in Supplemental Table V. Below, gene regulation patterns within each functional category are examined for each monosaccharide and corresponding polysaccharide growth substrate.Figure 4Substrate-dependent regulation. Sample Clusters constructed using standardized least squares means. Known or putative functions as reported in the genome sequence are indicated.View Large Image Figure ViewerDownload (PPT)Figure 4Substrate-dependent regulation. Sample Clusters constructed using standardized least squares means. Known or putative functions as reported in the genome sequence are indicated.View Large Image Figure ViewerDownload (PPT)Figure 5Genes with overall high or low expression levels for all growth substrates. Clusters constructed using least squares means. Known or putative functions as reported in the genome sequence are indicated.View Large Image Figure ViewerDownload (PPT)Table IIGenes with high overall expression levels (log2R ≥ 0.6) on indicated growth substrateGrowth substrateLocusFunctionCarboxymethylcelluloseTM0963Oligoendopeptidase, putativeMannoseTM1755Phosphate butyryltransferaseTM1754Butyrate kinase, putativeTM1756Branched chain fatty acid kinase, putativeLaminarinTM0024LaminarinaseTM0032Transcriptional regulator, XylR-relatedStarchTM1835Cyclomaltodextrinase, putativeTM1840α-AmylaseTM1845PullulanaseXylanTM0055α-GlucuronidaseTM0065Transcriptional regulator, IclR familyXyloseTM0949Transcriptional regulator, LacI family Open table in a new tab Backbone- and linkage-specific gene regulation was observed in the case of endoglycoside hydrolase genes for growth on α- and β-specific glucans. Growth on carboxymethylcellulose (CMC) (see cluster 4.1), a β-1,4-linked glucose polymer, induced genes encoding extracellular endoglucanases TM1525 (cel12B) and TM0305 (cel74), as well as the intracellular endoglucanase TM1524 (cel12A) and the intracellular cellobiosyl phosphorylase, TM1848. Examination of cluster I (Fig. 3) reveals that expression levels of cel74 were substantially lower than those ofcel12A on glucan polysaccharides. Although the presence of a β-1,4-glucosidase gene (bglA) (accession number CAA52276) in T. maritima MSB8 has been reported (31Liebl W. Methods Enzymol. 2001; 330: 290-300Crossref PubMed Scopus (16) Google Scholar), the corresponding protein sequence does not show homology to deduced sequences identified in the T. maritima MSB8 genome (4Nelson K.E. Clayton R.A. Gill S.R. Gwinn M.L. Dodson R.J. Haft D.H. Hickey E.K. Peterson J.D. Nelson W.C. Ketchum K.A. McDonald L. Utterback T.R. Malek J.A. Linher K.D. Garrett M.M. Stewart}, number={9}, journal={JOURNAL OF BIOLOGICAL CHEMISTRY}, author={Chhabra, SR and Shockley, KR and Conners, SB and Scott, KL and Wolfinger, RD and Kelly, RM}, year={2003}, month={Feb}, pages={7540–7552} } @article{hsieh_chu_wolfinger_gibson_2003, title={Mixed-model reanalysis of primate data suggests tissue and species biases in oligonucleotide-based gene expression profiles}, volume={165}, number={2}, journal={Genetics}, author={Hsieh, W. P. and Chu, T. M. and Wolfinger, R. D. and Gibson, G.}, year={2003}, pages={747–757} } @article{chu_weir_wolfinger_2002, title={A systematic statistical linear modeling approach to oligonucleotide array experiments}, volume={176}, ISSN={["0025-5564"]}, DOI={10.1016/S0025-5564(01)00107-9}, abstractNote={We outline and describe steps for a statistically rigorous approach to analyzing probe-level Affymetrix GeneChip data. The approach employs classical linear mixed models and operates on a gene-by-gene basis. Forgoing any attempts at gene presence or absence calls, the method simultaneously considers the data across all chips in an experiment. Primary output includes precise estimates of fold change (some as low as 1.1), their statistical significance, and measures of array and probe variability. The method can accommodate complex experiments involving many kinds of treatments and can test for their effects at the probe level. Furthermore, mismatch probe data can be incorporated in different ways or ignored altogether. Data from an ionizing radiation experiment on human cell lines illustrate the key concepts.}, number={1}, journal={MATHEMATICAL BIOSCIENCES}, author={Chu, TM and Weir, B and Wolfinger, R}, year={2002}, month={Mar}, pages={35–51} } @article{wolfinger_gibson_wolfinger_bennett_hamadeh_bushel_afshari_paules_2001, title={Assessing gene significance from cDNA microarray expression data via mixed models}, volume={8}, ISSN={["1066-5277"]}, url={https://doi.org/10.1089/106652701753307520}, DOI={10.1089/106652701753307520}, abstractNote={The determination of a list of differentially expressed genes is a basic objective in many cDNA microarray experiments. We present a statistical approach that allows direct control over the percentage of false positives in such a list and, under certain reasonable assumptions, improves on existing methods with respect to the percentage of false negatives. The method accommodates a wide variety of experimental designs and can simultaneously assess significant differences between multiple types of biological samples. Two interconnected mixed linear models are central to the method and provide a flexible means to properly account for variability both across and within genes. The mixed model also provides a convenient framework for evaluating the statistical power of any particular experimental design and thus enables a researcher to a priori select an appropriate number of replicates. We also suggest some basic graphics for visualizing lists of significant genes. Analyses of published experiments studying human cancer and yeast cells illustrate the results.}, number={6}, journal={JOURNAL OF COMPUTATIONAL BIOLOGY}, author={Wolfinger, RD and Gibson, G and Wolfinger, ED and Bennett, L and Hamadeh, H and Bushel, P and Afshari, C and Paules, RS}, year={2001}, pages={625–637} } @article{jin_riley_wolfinger_white_passador-gurgel_gibson_2001, title={The contributions of sex, genotype and age to transcriptional variance in Drosophila melanogaster}, volume={29}, ISSN={["1061-4036"]}, DOI={10.1038/ng766}, number={4}, journal={NATURE GENETICS}, author={Jin, W and Riley, RM and Wolfinger, RD and White, KP and Passador-Gurgel, G and Gibson, G}, year={2001}, month={Dec}, pages={389–395} }