@article{ling_menzies_hazard_shu_beel_2024, title={Trading Off Scalability, Privacy, and Performance in Data Synthesis}, volume={12}, ISSN={["2169-3536"]}, url={https://doi.org/10.1109/ACCESS.2024.3366556}, DOI={10.1109/ACCESS.2024.3366556}, abstractNote={Synthetic data has been widely applied in the real world recently. One typical example is the creation of synthetic data for privacy concerned datasets. In this scenario, synthetic data substitute the real data which contains the privacy information, and is used to public testing for machine learning models. Another typical example is the unbalance data over-sampling which the synthetic data is generated in the region of minority samples to balance the positive and negative ratio when training the machine learning models. In this study, we concentrate on the first example, and introduce (a) the Howso engine, and (b) our proposed random projection based synthetic data generation framework. We evaluate these two algorithms on the aspects of privacy preservation and accuracy, and compare them to the two state-of-the-art synthetic data generation algorithms DataSynthesizer and Synthetic Data Vault. We show that the synthetic data generated by Howso engine has good privacy and accuracy, which results in the best overall score. On the other hand, our proposed random projection based framework can generate synthetic data with highest accuracy score, and has the fastest scalability.}, journal={IEEE ACCESS}, author={Ling, Xiao and Menzies, Tim and Hazard, Christopher and Shu, Jack and Beel, Jacob}, year={2024}, pages={26642–26654} } @article{hazard_singh_2011, title={Intertemporal Discount Factors as a Measure of Trustworthiness in Electronic Commerce}, volume={23}, ISSN={["1558-2191"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-79953171081&partnerID=MN8TOARS}, DOI={10.1109/tkde.2010.141}, abstractNote={In multiagent interactions, such as e-commerce and file sharing, being able to accurately assess the trustworthiness of others is important for agents to protect themselves from losing utility. Focusing on rational agents in e-commerce, we prove that an agent's discount factor (time preference of utility) is a direct measure of the agent's trustworthiness for a set of reasonably general assumptions and definitions. We propose a general list of desiderata for trust systems and discuss how discount factors as trustworthiness meet these desiderata. We discuss how discount factors are a robust measure when entering commitments that exhibit moral hazards. Using an online market as a motivating example, we derive some analytical methods both for measuring discount factors and for aggregating the measurements.}, number={5}, journal={IEEE TRANSACTIONS ON KNOWLEDGE AND DATA ENGINEERING}, author={Hazard, Christopher J. and Singh, Munindar P.}, year={2011}, month={May}, pages={699–712} } @article{hazard_kimport_johnson_2005, title={Emergent behavior in two complex cellular automata rule sets}, volume={10}, ISSN={["1099-0526"]}, DOI={10.1002/cplx.20089}, abstractNote={Cellular automata systems often produce complex behavior from simple rule sets. The behaviors and results of two complex combinations of cellular automata rules are analyzed. Both two-dimensional rule sets add complexities to typical cellular automata systems by attaching attributes and rules to each cell. One of the rule sets produces gliders that reproduce upon collision, whereas the other grows into an intricate shape. Projection and entropy analysis classify the rule sets as complex for the intricate shape, but measurements indicate that the self-reproducing gliders fall between ordered and complex classification, despite their complex appearance. © 2005 Wiley Periodicals, Inc. Complexity 10: 45–55, 2005}, number={5}, journal={COMPLEXITY}, author={Hazard, CJ and Kimport, KR and Johnson, DH}, year={2005}, pages={45–55} }