@article{may_vouk_bitzer_rosnick_2004, title={An error-correcting code framework for genetic sequence analysis}, volume={341}, ISSN={["1879-2693"]}, DOI={10.1016/j.jfranklin.2003.12.009}, abstractNote={A fundamental challenge for engineering communication systems is the problem of transmitting information from the source to the receiver over a noisy channel. This same problem exists in a biological system. How can information required for the proper functioning of a cell, an organism, or a species be transmitted in an error introducing environment? Source codes (compression codes) and channel codes (error-correcting codes) address this problem in engineering communication systems. The ability to extend these information theory concepts to study information transmission in biological systems can contribute to the general understanding of biological communication mechanisms and extend the field of coding theory into the biological domain. In this work, we review and compare existing coding theoretic methods for modeling genetic systems. We introduce a new error-correcting code framework for understanding translation initiation, at the cellular level and present research results for Escherichia coli K-12. By studying translation initiation, we hope to gain insight into potential error-correcting aspects of genomic sequences and systems.}, number={1-2}, journal={JOURNAL OF THE FRANKLIN INSTITUTE-ENGINEERING AND APPLIED MATHEMATICS}, author={May, EE and Vouk, MA and Bitzer, DL and Rosnick, DI}, year={2004}, pages={89–109} } @article{may_vouk_bitzer_rosnick_2004, title={Coding theory based models for protein translation initiation in prokaryotic organisms}, volume={76}, ISSN={["0303-2647"]}, DOI={10.1016/j.biosystems.2004.05.017}, abstractNote={Our research explores the feasibility of using communication theory, error control (EC) coding theory specifically, for quantitatively modeling the protein translation initiation mechanism. The messenger RNA (mRNA) of Escherichia coli K-12 is modeled as a noisy (errored), encoded signal and the ribosome as a minimum Hamming distance decoder, where the 16S ribosomal RNA (rRNA) serves as a template for generating a set of valid codewords (the codebook). We tested the E. coli based coding models on 5′ untranslated leader sequences of prokaryotic organisms of varying taxonomical relation to E. coli including: Salmonella typhimurium LT2, Bacillus subtilis, and Staphylococcus aureus Mu50. The model identified regions on the 5′ untranslated leader where the minimum Hamming distance values of translated mRNA sub-sequences and non-translated genomic sequences differ the most. These regions correspond to the Shine–Dalgarno domain and the non-random domain. Applying the EC coding-based models to B. subtilis, and S. aureus Mu50 yielded results similar to those for E. coli K-12. Contrary to our expectations, the behavior of S. typhimurium LT2, the more taxonomically related to E. coli, resembled that of the non-translated sequence group.}, number={1-3}, journal={BIOSYSTEMS}, author={May, EE and Vouk, MA and Bitzer, DL and Rosnick, DI}, year={2004}, pages={249–260} }