@article{del bonis-o'donnell_pinals_jeong_thakrar_wolfinger_landry_2019, title={Chemometric Approaches for Developing Infrared Nanosensors To Image Anthracyclines}, volume={58}, ISSN={["0006-2960"]}, DOI={10.1021/acs.biochem.8b00926}, abstractNote={Generation, identification, and validation of optical probes to image molecular targets in a biological milieu remain a challenge. Synthetic molecular recognition approaches leveraging the intrinsic near-infrared fluorescence of single-walled carbon nanotubes are promising for long-term biochemical imaging in tissues. However, generation of nanosensors for selective imaging of molecular targets requires a heuristic approach. Here, we present a chemometric platform for rapidly screening libraries of candidate single-walled carbon nanotube nanosensors against biochemical analytes to quantify the fluorescence response to small molecules, including vitamins, neurotransmitters, and chemotherapeutics. We further show this method can be applied to identify biochemical analytes that selectively modulate the intrinsic near-infrared fluorescence of candidate nanosensors. Chemometric analysis thus enables identification of nanosensor-analyte "hits" and also nanosensor fluorescence signaling modalities such as wavelength shifts that are optimal for translation to biological imaging. Through this approach, we identify and characterize a nanosensor for the chemotherapeutic anthracycline doxorubicin (DOX), which provides a ≤17 nm fluorescence red-shift and exhibits an 8 μM limit of detection, compatible with peak circulatory concentrations of doxorubicin common in therapeutic administration. We demonstrate the selectivity of this nanosensor over dacarbazine, a chemotherapeutic commonly co-injected with doxorubicin. Lastly, we establish nanosensor tissue compatibility for imaging of doxorubicin in muscle tissue by incorporating nanosensors into the mouse hindlimb and measuring the nanosensor response to exogenous DOX administration. Our results motivate chemometric approaches to nanosensor discovery for chronic imaging of drug partitioning into tissues and toward real-time monitoring of drug accumulation.}, number={1}, journal={BIOCHEMISTRY}, author={Del Bonis-O'Donnell, Jackson Travis and Pinals, Rebecca L. and Jeong, Sanghwa and Thakrar, Arni and Wolfinger, Russ D. and Landry, Markita P.}, year={2019}, month={Jan}, pages={54–64} } @article{jessri_wolfinger_lou_mary r. l'abbe_2017, title={Identification of dietary patterns associated with obesity in a nationally representative survey of Canadian adults: application of a priori, hybrid, and simplified dietary pattern techniques}, volume={105}, ISSN={["1938-3207"]}, DOI={10.3945/ajcn.116.134684}, abstractNote={Background: Analyzing the effects of dietary patterns is an important approach for examining the complex role of nutrition in the etiology of obesity and chronic diseases.Objectives: The objectives of this study were to characterize the dietary patterns of Canadians with the use of a priori, hybrid, and simplified dietary pattern techniques, and to compare the associations of these patterns with obesity risk in individuals with and without chronic diseases (unhealthy and healthy obesity).Design: Dietary recalls from 11,748 participants (≥18 y of age) in the cross-sectional, nationally representative Canadian Community Health Survey 2.2 were used. A priori dietary pattern was characterized with the use of the previously validated 2015 Dietary Guidelines for Americans Adherence Index (DGAI). Weighted partial least squares (hybrid method) was used to derive an energy-dense (ED), high-fat (HF), low-fiber density (LFD) dietary pattern with the use of 38 food groups. The associations of derived dietary patterns with disease outcomes were then tested with the use of multinomial logistic regression.Results: An ED, HF, and LFD dietary pattern had high positive loadings for fast foods, carbonated drinks, and refined grains, and high negative loadings for whole fruits and vegetables (≥|0.17|). Food groups with a high loading were summed to form a simplified dietary pattern score. Moving from the first (healthiest) to the fourth (least healthy) quartiles of the ED, HF, and LFD pattern and the simplified dietary pattern scores was associated with increasingly elevated ORs for unhealthy obesity, with individuals in quartile 4 having an OR of 2.57 (95% CI: 1.75, 3.76) and 2.73 (95% CI: 1.88, 3.98), respectively (P-trend < 0.0001). Individuals who adhered the most to the 2015 DGAI recommendations (quartile 4) had a 53% lower OR of unhealthy obesity (P-trend < 0.0001). The associations of dietary patterns with healthy obesity and unhealthy nonobesity were weaker, albeit significant.Conclusions: Consuming an ED, HF, and LFD dietary pattern and lack of adherence to the recommendations of the 2015 DGAI were associated with a significantly higher risk of obesity with and without accompanying chronic diseases.}, number={3}, journal={AMERICAN JOURNAL OF CLINICAL NUTRITION}, author={Jessri, Mahsa and Wolfinger, Russell D. and Lou, Wendy Y. and Mary R. L'Abbe}, year={2017}, month={Mar}, pages={669–684} } @article{miclaus_wolfinger_czika_2009, title={SNP Selection and Multidimensional Scaling to Quantify Population Structure}, volume={33}, ISSN={["1098-2272"]}, DOI={10.1002/gepi.20401}, abstractNote={AbstractIn the new era of large‐scale collaborative Genome Wide Association Studies (GWAS), population stratification has become a critical issue that must be addressed. In order to build upon the methods developed to control the confounding effect of a structured population, it is extremely important to visualize and quantify that effect. In this work, we develop methodology for single nucleotide polymorphism (SNP) selection and subsequent population stratification visualization based on deviation from Hardy‐Weinberg equilibrium in conjunction with non‐metric multidimensional scaling (MDS); a distance‐based multivariate technique. Through simulation, it is shown that SNP selection based on Hardy‐Weinberg disequilibrium (HWD) is robust against confounding linkage disequilibrium patterns that have been problematic in past studies and methods as well as producing a differentiated SNP set. Non‐metric MDS is shown to be a multivariate visualization tool preferable to principal components in conjunction with HWD SNP selection through theoretical and empirical study from HapMap samples. The proposed selection tool offers a simple and effective way to select appropriate substructure‐informative markers for use in exploring the effect that population stratification may have in association studies. Genet. Epidemiol. 33:488–496, 2009. © 2009 Wiley‐Liss, Inc.}, number={6}, journal={GENETIC EPIDEMIOLOGY}, author={Miclaus, Kelci and Wolfinger, Russ and Czika, Wendy}, year={2009}, month={Sep}, pages={488–496} } @article{kennerly_ballmann_martin_wolfinger_gregory_stoskopf_gibson_2008, title={A gene expression signature of confinement in peripheral blood of red wolves (Canis rufus)}, volume={17}, ISSN={["1365-294X"]}, DOI={10.1111/j.1365-294X.2008.03775.x}, abstractNote={AbstractThe stresses that animals experience as a result of modification of their ecological circumstances induce physiological changes that leave a signature in profiles of gene expression. We illustrate this concept in a comparison of free range and confined North American red wolves (Canis rufus). Transcription profiling of peripheral blood samples from 13 red wolf individuals in the Alligator River region of North Carolina revealed a strong signal of differentiation. Four hundred eighty‐two out of 2980 transcripts detected on Illumina HumanRef8 oligonucleotide bead arrays were found to differentiate free range and confined wolves at a false discovery rate of 12.8% and P < 0.05. Over‐representation of genes in focal adhesion, insulin signalling, proteasomal, and tryptophan metabolism pathways suggests the activation of pro‐inflammatory and stress responses in confined animals. Consequently, characterization of differential transcript abundance in an accessible tissue such as peripheral blood identifies biomarkers that could be useful in animal management practices and for evaluating the impact of habitat changes on population health, particularly as attention turns to the impact of climate change on physiology and in turn species distributions.}, number={11}, journal={MOLECULAR ECOLOGY}, author={Kennerly, Erin and Ballmann, Anne and Martin, Stanton and Wolfinger, Russ and Gregory, Simon and Stoskopf, Michael and Gibson, Greg}, year={2008}, month={Jun}, pages={2782–2791} } @misc{ascencio-ibanez_sozzani_lee_chu_wolfinger_cella_hanley-bowdoin_2008, title={Global analysis of Arabidopsis gene expression uncovers a complex array of changes impacting pathogen response and cell cycle during geminivirus infection}, volume={148}, number={1}, journal={Plant Physiology}, author={Ascencio-Ibanez, J. T. and Sozzani, R. and Lee, T. J. and Chu, T. M. and Wolfinger, R. D. and Cella, R. and Hanley-Bowdoin, L.}, year={2008}, pages={436–454} } @article{hsieh_chu_wolfinger_2007, title={Comparison of statistical performance of univariate and bivariate mixed models for Affymetrix (R) probe level data}, volume={77}, ISSN={["0094-9655"]}, DOI={10.1080/10629360600826398}, abstractNote={Half of the probes on Affymetrix® microarrays contain a single base mismatch (MM) of a known perfect match (PM) target sequence. While putatively designed to detect nonspecific binding, the MM data can also contain true signals and because of this, debates persist concerning how to best combine PM and MM data for statistical modeling purposes. Most current approaches involve either subtracting some function of MM from PM or ignoring MM altogether. Here, we describe a bivariate model that includes both PM and MM based on the mixed linear modelling framework. It directly models the correlation between PM and MM and thereby increases the power of significant gene detection. In this paper, we show that the bivariate mixed model offers moderate gains in power over a comparable univariate model that ignores the MM data. The gains are more prominent when the number of replicates and the array-to-array variability is small. We apply the models to a small experiment on yeast and use the data as a basis for a Monte Carlo simulation.}, number={3}, journal={JOURNAL OF STATISTICAL COMPUTATION AND SIMULATION}, author={Hsieh, Wen-Ping and Chu, Tzu-Ming and Wolfinger, Russ}, year={2007}, pages={251–264} } @article{bushel_wolfinger_gibson_2007, title={Simultaneous clustering of gene expression data with clinical chemistry and pathological evaluations reveals phenotypic prototypes}, volume={1}, ISSN={["1752-0509"]}, DOI={10.1186/1752-0509-1-15}, abstractNote={Commonly employed clustering methods for analysis of gene expression data do not directly incorporate phenotypic data about the samples. Furthermore, clustering of samples with known phenotypes is typically performed in an informal fashion. The inability of clustering algorithms to incorporate biological data in the grouping process can limit proper interpretation of the data and its underlying biology. We present a more formal approach, the modk-prototypes algorithm, for clustering biological samples based on simultaneously considering microarray gene expression data and classes of known phenotypic variables such as clinical chemistry evaluations and histopathologic observations. The strategy involves constructing an objective function with the sum of the squared Euclidean distances for numeric microarray and clinical chemistry data and simple matching for histopathology categorical values in order to measure dissimilarity of the samples. Separate weighting terms are used for microarray, clinical chemistry and histopathology measurements to control the influence of each data domain on the clustering of the samples. The dynamic validity index for numeric data was modified with a category utility measure for determining the number of clusters in the data sets. A cluster's prototype, formed from the mean of the values for numeric features and the mode of the categorical values of all the samples in the group, is representative of the phenotype of the cluster members. The approach is shown to work well with a simulated mixed data set and two real data examples containing numeric and categorical data types. One from a heart disease study and another from acetaminophen (an analgesic) exposure in rat liver that causes centrilobular necrosis. The modk-prototypes algorithm partitioned the simulated data into clusters with samples in their respective class group and the heart disease samples into two groups (sick and buff denoting samples having pain type representative of angina and non-angina respectively) with an accuracy of 79%. This is on par with, or better than, the assignment accuracy of the heart disease samples by several well-known and successful clustering algorithms. Following modk-prototypes clustering of the acetaminophen-exposed samples, informative genes from the cluster prototypes were identified that are descriptive of, and phenotypically anchored to, levels of necrosis of the centrilobular region of the rat liver. The biological processes cell growth and/or maintenance, amine metabolism, and stress response were shown to discern between no and moderate levels of acetaminophen-induced centrilobular necrosis. The use of well-known and traditional measurements directly in the clustering provides some guarantee that the resulting clusters will be meaningfully interpretable.}, journal={BMC SYSTEMS BIOLOGY}, author={Bushel, Pierre R. and Wolfinger, Russell D. and Gibson, Greg}, year={2007}, month={Feb} } @article{chu_weir_wolfinger_2004, title={Comparison of Li-Wong and loglinear mixed models for the statistical analysis of oligonucleotide arrays}, volume={20}, ISSN={["1460-2059"]}, DOI={10.1093/bioinformatics/btg435}, abstractNote={Abstract Motivation: Li and Wong have described some useful statistical models for probe-level, oligonucleotide array data based on a multiplicative parametrization. In earlier work, we proposed similar analysis-of-variance-style mixed models fit on a log scale. With only subtle differences in the specification of their mean and stochastic error components, a question arises as to whether these models could lead to varying conclusions in practical application. Results: In this paper, we provide an empirical comparison of the two models using a real data set, and find the models perform quite similarly across most genes, but with some interesting and important distinctions. We also present results from a simulation study designed to assess inferential properties of the models, and propose a modified test statistic for the Li–Wong model that provides an improvement in Type 1 error control. Advantages of both methods include the ability to directly assess and account for key sources of variability in the chip data and a means to automate statistical quality control. Availability: The Li–Wong models are available in dChip: http://www.biostat.harvard.edu/complab/dchip/, and both methods will be commercially available in the forthcoming SAS Microarray Solution. Supplementary information: Supplementary material is available at http://statgen.ncsu.edu/ggibson/Pubs.htm}, number={4}, journal={BIOINFORMATICS}, author={Chu, TM and Weir, BS and Wolfinger, RD}, year={2004}, month={Mar}, pages={500–506} } @article{stasolla_bozhkov_chu_van zyl_egertsdotter_suarez_craig_wolfinger_von arnold_sederoff_2004, title={Variation an transcript abundance during somatic embryogenesis in gymnosperms}, volume={24}, ISSN={["1758-4469"]}, DOI={10.1093/treephys/24.10.1073}, abstractNote={Somatic embryogenesis of Norway spruce (Picea abies L.) is a versatile model system to study molecular mechanisms regulating embryo development because it proceeds through defined developmental stages corresponding to specific culture treatments. Normal embryonic development involves early differentiation of proembryogenic masses (PEMs) into somatic embryos, followed by early and late embryogeny leading to the formation of mature cotyledonary embryos. In some cell lines there is a developmental arrest at the PEM-somatic embryo transition. To learn more about the molecular mechanisms regulating embryogenesis, we compared the transcript profiles of two normal lines and one developmentally arrested line. Ribonucleic acid, extracted from these cell lines at successive developmental stages, was analyzed on DNA microarrays containing 2178 expressed sequence tags (ESTs) (corresponding to 2110 unique cDNAs) from loblolly pine (Pinus taeda L.). Hybridization between spruce and pine species on microarrays has been shown to be effective (van Zyl et al. 2002, Stasolla et al. 2003). In contrast to the developmentally arrested line, the early phases of normal embryo development are characterized by a precise pattern of gene expression, i.e., repression followed by induction. Comparison of transcript levels between successive stages of embryogenesis allowed us to identify several genes that showed unique expression responses during normal development. Several of these genes encode proteins involved in detoxification processes, methionine synthesis and utilization, and carbohydrate metabolism. The potential role of these genes in embryo development is discussed.}, number={10}, journal={TREE PHYSIOLOGY}, author={Stasolla, C and Bozhkov, PV and Chu, TM and Van Zyl, L and Egertsdotter, U and Suarez, MF and Craig, D and Wolfinger, RD and Von Arnold, S and Sederoff, RR}, year={2004}, month={Oct}, pages={1073–1085} } @article{chhabra_shockley_conners_scott_wolfinger_kelly_2003, title={Carbohydrate-induced differential gene expression patterns in the hyperthermophilic bacterium Thermotoga maritima}, volume={278}, ISSN={["1083-351X"]}, DOI={10.1074/jbc.M211748200}, abstractNote={The hyperthermophilic bacteriumThermotoga maritima MSB8 was grown on a variety of carbohydrates to determine the influence of carbon and energy source on differential gene expression. Despite the fact that T. maritima has been phylogenetically characterized as a primitive microorganism from an evolutionary perspective, results here suggest that it has versatile and discriminating mechanisms for regulating and effecting complex carbohydrate utilization. Growth ofT. maritima on monosaccharides was found to be slower than growth on polysaccharides, although growth to cell densities of 108 to 109 cells/ml was observed on all carbohydrates tested. Differential expression of genes encoding carbohydrate-active proteins encoded in the T. maritimagenome was followed using a targeted cDNA microarray in conjunction with mixed model statistical analysis. Coordinated regulation of genes responding to specific carbohydrates was noted. Although glucose generally repressed expression of all glycoside hydrolase genes, other sugars induced or repressed these genes to varying extents. Expression profiles of most endo-acting glycoside hydrolase genes correlated well with their reported biochemical properties, although exo-acting glycoside hydrolase genes displayed less specific expression patterns. Genes encoding selected putative ABC sugar transporters were found to respond to specific carbohydrates, and in some cases putative oligopeptide transporter genes were also found to respond to specific sugar substrates. Several genes encoding putative transcriptional regulators were expressed during growth on specific sugars, thus suggesting functional assignments. The transcriptional response ofT. maritima to specific carbohydrate growth substrates indicated that sugar backbone- and linkage-specific regulatory networks are operational in this organism during the uptake and utilization of carbohydrate substrates. Furthermore, the wide ranging collection of such networks in T. maritima suggests that this organism is capable of adapting to a variety of growth environments containing carbohydrate growth substrates.}, number={9}, journal={JOURNAL OF BIOLOGICAL CHEMISTRY}, author={Chhabra, SR and Shockley, KR and Conners, SB and Scott, KL and Wolfinger, RD and Kelly, RM}, year={2003}, month={Feb}, pages={7540–7552} } @article{hsieh_chu_wolfinger_gibson_2003, title={Mixed-model reanalysis of primate data suggests tissue and species biases in oligonucleotide-based gene expression profiles}, volume={165}, number={2}, journal={Genetics}, author={Hsieh, W. P. and Chu, T. M. and Wolfinger, R. D. and Gibson, G.}, year={2003}, pages={747–757} } @article{chu_weir_wolfinger_2002, title={A systematic statistical linear modeling approach to oligonucleotide array experiments}, volume={176}, ISSN={["0025-5564"]}, DOI={10.1016/S0025-5564(01)00107-9}, abstractNote={We outline and describe steps for a statistically rigorous approach to analyzing probe-level Affymetrix GeneChip data. The approach employs classical linear mixed models and operates on a gene-by-gene basis. Forgoing any attempts at gene presence or absence calls, the method simultaneously considers the data across all chips in an experiment. Primary output includes precise estimates of fold change (some as low as 1.1), their statistical significance, and measures of array and probe variability. The method can accommodate complex experiments involving many kinds of treatments and can test for their effects at the probe level. Furthermore, mismatch probe data can be incorporated in different ways or ignored altogether. Data from an ionizing radiation experiment on human cell lines illustrate the key concepts.}, number={1}, journal={MATHEMATICAL BIOSCIENCES}, author={Chu, TM and Weir, B and Wolfinger, R}, year={2002}, month={Mar}, pages={35–51} } @article{wolfinger_gibson_wolfinger_bennett_hamadeh_bushel_afshari_paules_2001, title={Assessing gene significance from cDNA microarray expression data via mixed models}, volume={8}, ISSN={["1066-5277"]}, url={https://doi.org/10.1089/106652701753307520}, DOI={10.1089/106652701753307520}, abstractNote={The determination of a list of differentially expressed genes is a basic objective in many cDNA microarray experiments. We present a statistical approach that allows direct control over the percentage of false positives in such a list and, under certain reasonable assumptions, improves on existing methods with respect to the percentage of false negatives. The method accommodates a wide variety of experimental designs and can simultaneously assess significant differences between multiple types of biological samples. Two interconnected mixed linear models are central to the method and provide a flexible means to properly account for variability both across and within genes. The mixed model also provides a convenient framework for evaluating the statistical power of any particular experimental design and thus enables a researcher to a priori select an appropriate number of replicates. We also suggest some basic graphics for visualizing lists of significant genes. Analyses of published experiments studying human cancer and yeast cells illustrate the results.}, number={6}, journal={JOURNAL OF COMPUTATIONAL BIOLOGY}, author={Wolfinger, RD and Gibson, G and Wolfinger, ED and Bennett, L and Hamadeh, H and Bushel, P and Afshari, C and Paules, RS}, year={2001}, pages={625–637} } @article{jin_riley_wolfinger_white_passador-gurgel_gibson_2001, title={The contributions of sex, genotype and age to transcriptional variance in Drosophila melanogaster}, volume={29}, ISSN={["1061-4036"]}, DOI={10.1038/ng766}, abstractNote={Here we present a statistically rigorous approach to quantifying microarray expression data that allows the relative effects of multiple classes of treatment to be compared and incorporates analytical methods that are common to quantitative genetics. From the magnitude of gene effects and contributions of variance components, we find that gene expression in adult flies is affected most strongly by sex, less so by genotype and only weakly by age (for 1- and 6-wk flies); in addition, sex x genotype interactions may be present for as much as 10% of the Drosophila transcriptome. This interpretation is compromised to some extent by statistical issues relating to power and experimental design. Nevertheless, we show that changes in expression as small as 1.2-fold can be highly significant. Genotypic contributions to transcriptional variance may be of a similar magnitude to those relating to some quantitative phenotypes and should be considered when assessing the significance of experimental treatments.}, number={4}, journal={NATURE GENETICS}, author={Jin, W and Riley, RM and Wolfinger, RD and White, KP and Passador-Gurgel, G and Gibson, G}, year={2001}, month={Dec}, pages={389–395} }