@article{li_zhong_lobaton_huang_2022, title={Fusion of Human Gaze and Machine Vision for Predicting Intended Locomotion Mode}, volume={30}, ISSN={["1558-0210"]}, url={https://doi.org/10.1109/TNSRE.2022.3168796}, DOI={10.1109/TNSRE.2022.3168796}, abstractNote={Predicting the user’s intended locomotion mode is critical for wearable robot control to assist the user’s seamless transitions when walking on changing terrains. Although machine vision has recently proven to be a promising tool in identifying upcoming terrains in the travel path, existing approaches are limited to environment perception rather than human intent recognition that is essential for coordinated wearable robot operation. Hence, in this study, we aim to develop a novel system that fuses the human gaze (representing user intent) and machine vision (capturing environmental information) for accurate prediction of the user’s locomotion mode. The system possesses multimodal visual information and recognizes user’s locomotion intent in a complex scene, where multiple terrains are present. Additionally, based on the dynamic time warping algorithm, a fusion strategy was developed to align temporal predictions from individual modalities while producing flexible decisions on the timing of locomotion mode transition for wearable robot control. System performance was validated using experimental data collected from five participants, showing high accuracy (over 96% in average) of intent recognition and reliable decision-making on locomotion transition with adjustable lead time. The promising results demonstrate the potential of fusing human gaze and machine vision for locomotion intent recognition of lower limb wearable robots.}, journal={IEEE TRANSACTIONS ON NEURAL SYSTEMS AND REHABILITATION ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Li, Minhan and Zhong, Boxuan and Lobaton, Edgar and Huang, He}, year={2022}, pages={1103–1112} } @article{da silva_zhong_chen_lobaton_2022, title={Improving Performance and Quantifying Uncertainty of Body-Rocking Detection Using Bayesian Neural Networks}, volume={13}, ISSN={["2078-2489"]}, url={https://doi.org/10.3390/info13070338}, DOI={10.3390/info13070338}, abstractNote={Body-rocking is an undesired stereotypical motor movement performed by some individuals, and its detection is essential for self-awareness and habit change. We envision a pipeline that includes inertial wearable sensors and a real-time detection system for notifying the user so that they are aware of their body-rocking behavior. For this task, similarities of body rocking to other non-related repetitive activities may cause false detections which prevent continuous engagement, leading to alarm fatigue. We present a pipeline using Bayesian Neural Networks with uncertainty quantification for jointly reducing false positives and providing accurate detection. We show that increasing model capacity does not consistently yield higher performance by itself, while pairing it with the Bayesian approach does yield significant improvements. Disparities in uncertainty quantification are better quantified by calibrating them using deep neural networks. We show that the calibrated probabilities are effective quality indicators of reliable predictions. Altogether, we show that our approach provides additional insights on the role of Bayesian techniques in deep learning as well as aids in accurate body-rocking detection, improving our prior work on this subject.}, number={7}, journal={Information}, publisher={MDPI AG}, author={da Silva, Rafael Luiz and Zhong, Boxuan and Chen, Yuhan and Lobaton, Edgar}, year={2022}, month={Jul}, pages={338} } @article{zhong_huang_lobaton_2022, title={Reliable Vision-Based Grasping Target Recognition for Upper Limb Prostheses}, volume={52}, ISSN={["2168-2275"]}, url={https://doi.org/10.1109/TCYB.2020.2996960}, DOI={10.1109/TCYB.2020.2996960}, abstractNote={Computer vision has shown promising potential in wearable robotics applications (e.g., human grasping target prediction and context understanding). However, in practice, the performance of computer vision algorithms is challenged by insufficient or biased training, observation noise, cluttered background, etc. By leveraging Bayesian deep learning (BDL), we have developed a novel, reliable vision-based framework to assist upper limb prosthesis grasping during arm reaching. This framework can measure different types of uncertainties from the model and data for grasping target recognition in realistic and challenging scenarios. A probability calibration network was developed to fuse the uncertainty measures into one calibrated probability for online decision making. We formulated the problem as the prediction of grasping target while arm reaching. Specifically, we developed a 3-D simulation platform to simulate and analyze the performance of vision algorithms under several common challenging scenarios in practice. In addition, we integrated our approach into a shared control framework of a prosthetic arm and demonstrated its potential at assisting human participants with fluent target reaching and grasping tasks.}, number={3}, journal={IEEE TRANSACTIONS ON CYBERNETICS}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Zhong, Boxuan and Huang, He and Lobaton, Edgar}, year={2022}, month={Mar}, pages={1750–1762} } @article{zhong_silva_tran_huang_lobaton_2021, title={Efficient Environmental Context Prediction for Lower Limb Prostheses}, volume={6}, ISSN={["2168-2232"]}, url={https://doi.org/10.1109/TSMC.2021.3084036}, DOI={10.1109/TSMC.2021.3084036}, abstractNote={Environmental context prediction is important for wearable robotic applications, such as terrain-adaptive control. System efficiency is critical for wearable robots, in which system resources (e.g., processors and memory) are highly constrained. This article aims to address the system efficiency of real-time environmental context prediction for lower limb prostheses. First, we develop an uncertainty-aware frame selection strategy that can dynamically select frames according to lower limb motion and uncertainty captured by Bayesian neural networks (BNNs) for environment prediction. We further propose a dynamic Bayesian gated recurrent unit (D-BGRU) network to address the inconsistent frame rate which is a side effect of the dynamic frame selection. Second, we investigate the effects on the tradeoff between computational complexity and environment prediction accuracy of adding additional sensing modalities (e.g., GPS and an on-glasses camera) into the system. Finally, we implement and optimize our framework for embedded hardware, and evaluate the real-time inference accuracy and efficiency of classifying six types of terrains. The experiments show that our proposed frame selection strategy can reduce more than 90% of the computations without sacrificing environment prediction accuracy, and can be easily extended to the situation of multimodality fusion. We achieve around 93% prediction accuracy with less than one frame to be processed per second. Our model has 6.4 million 16-bit float numbers and takes 44 ms to process each frame on a lightweight embedded platform (NVIDIA Jetson TX2).}, number={6}, journal={IEEE TRANSACTIONS ON SYSTEMS MAN CYBERNETICS-SYSTEMS}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Zhong, Boxuan and Silva, Rafael Luiz and Tran, Michael and Huang, He and Lobaton, Edgar}, year={2021}, month={Jun} } @article{ge_richmond_zhong_marchitto_lobaton_2021, title={Enhancing the morphological segmentation of microscopic fossils through Localized Topology-Aware Edge Detection}, volume={45}, ISSN={["1573-7527"]}, url={https://doi.org/10.1007/s10514-020-09950-9}, DOI={10.1007/s10514-020-09950-9}, number={5}, journal={AUTONOMOUS ROBOTS}, publisher={Springer Science and Business Media LLC}, author={Ge, Qian and Richmond, Turner and Zhong, Boxuan and Marchitto, Thomas M. and Lobaton, Edgar J.}, year={2021}, month={Jun}, pages={709–723} } @article{zhong_silva_li_huang_lobaton_2021, title={Environmental Context Prediction for Lower Limb Prostheses With Uncertainty Quantification}, volume={18}, ISSN={["1558-3783"]}, url={https://doi.org/10.1109/TASE.2020.2993399}, DOI={10.1109/TASE.2020.2993399}, abstractNote={Reliable environmental context prediction is critical for wearable robots (e.g., prostheses and exoskeletons) to assist terrain-adaptive locomotion. This article proposed a novel vision-based context prediction framework for lower limb prostheses to simultaneously predict human’s environmental context for multiple forecast windows. By leveraging the Bayesian neural networks (BNNs), our framework can quantify the uncertainty caused by different factors (e.g., observation noise, and insufficient or biased training) and produce a calibrated predicted probability for online decision-making. We compared two wearable camera locations (a pair of glasses and a lower limb device), independently and conjointly. We utilized the calibrated predicted probability for online decision-making and fusion. We demonstrated how to interpret deep neural networks with uncertainty measures and how to improve the algorithms based on the uncertainty analysis. The inference time of our framework on a portable embedded system was less than 80 ms/frame. The results in this study may lead to novel context recognition strategies in reliable decision-making, efficient sensor fusion, and improved intelligent system design in various applications. Note to Practitioners—This article was motivated by two practical problems in computer vision for wearable robots: First, the performance of deep neural networks is challenged by real-life disturbances. However, reliable confidence estimation is usually unavailable and the factors causing failures are hard to identify. Second, evaluating wearable robots by intuitive trial and error is expensive due to the need for human experiments. Our framework produces a calibrated predicted probability as well as three uncertainty measures. The calibrated probability makes it easy to customize prediction decision criteria by considering how much the corresponding application can tolerate error. This study demonstrated a practical procedure to interpret and improve the performance of deep neural networks with uncertainty quantification. We anticipate that our methodology could be extended to other applications as a general scientific and efficient procedure of evaluating and improving intelligent systems.}, number={2}, journal={IEEE TRANSACTIONS ON AUTOMATION SCIENCE AND ENGINEERING}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Zhong, Boxuan and Silva, Rafael Luiz and Li, Minhan and Huang, He and Lobaton, Edgar}, year={2021}, month={Apr}, pages={458–470} } @article{li_zhong_hutmacher_liang_horrey_xu_2020, title={Detection of driver manual distraction via image-based hand and ear recognition}, volume={137}, ISSN={["1879-2057"]}, DOI={10.1016/j.aap.2020.105432}, abstractNote={Driving distraction is a leading cause of fatal car accidents, and almost nine people are killed in the US each day because of distracting activities. Therefore, reducing the number of distraction-affected traffic accidents remains an imperative issue. A novel algorithm for detection of drivers’ manual distraction was proposed in this manuscript. The detection algorithm consists of two modules. The first module predicts the bounding boxes of the driver's right hand and right ear from RGB images. The second module takes the bounding boxes as input and predicts the type of distraction. 106,677 frames extracted from videos, which were collected from twenty participants in a driving simulator, were used for training (50%) and testing (50%). For distraction classification, the results indicated that the proposed framework could detect normal driving, using the touchscreen, and talking with a phone with F1-score 0.84, 0.69, 0.82, respectively. For overall distraction detection, it achieved F1-score of 0.74. The whole framework ran at 28 frames per second. The algorithm achieved comparable overall accuracy with similar research, and was more efficient than other methods. A demo video for the algorithm can be found at https://youtu.be/NKclK1bHRd4.}, journal={ACCIDENT ANALYSIS AND PREVENTION}, author={Li, Li and Zhong, Boxuan and Hutmacher, Clayton, Jr. and Liang, Yulan and Horrey, William J. and Xu, Xu}, year={2020}, month={Mar} } @article{mitra_marchitto_ge_zhong_kanakiya_cook_fehrenbacher_ortiz_tripati_lobaton_2019, title={Automated species-level identification of planktic foraminifera using convolutional neural networks, with comparison to human performance}, volume={147}, ISSN={["1872-6186"]}, url={http://dx.doi.org/10.1016/j.marmicro.2019.01.005}, DOI={10.1016/j.marmicro.2019.01.005}, abstractNote={Picking foraminifera from sediment samples is an essential, but repetitive and low-reward task that is well-suited for automation. The first step toward building a picking robot is the development of an automated identification system. We use machine learning techniques to train convolutional neural networks (CNNs) to identify six species of extant planktic foraminifera that are widely used by paleoceanographers, and to distinguish the six species from other taxa. We employ CNNs that were previously built and trained for image classification. Foraminiferal training and identification use reflected light microscope digital images taken at 16 different illumination angles using a light-emitting diode (LED) ring. Overall machine accuracy, as a combination of precision and recall, is better than 80% even with limited training. We compare machine performance to that of human pickers (six experts and five novices) by tasking each with the identification of 540 specimens based on images. Experts achieved comparable precision but poorer recall relative to the machine, with an average accuracy of 63%. Novices scored lower than experts on both precision and recall, for an overall accuracy of 53%. The machine achieved fairly uniform performance across the six species, while participants' scores were strongly species-dependent, commensurate with their past experience and expertise. The machine was also less sensitive to specimen orientation (umbilical versus spiral views) than the humans. These results demonstrate that our approach can provide a versatile ‘brain’ for an eventual automated robotic picking system.}, journal={MARINE MICROPALEONTOLOGY}, publisher={Elsevier BV}, author={Mitra, R. and Marchitto, T. M. and Ge, Q. and Zhong, B. and Kanakiya, B. and Cook, M. S. and Fehrenbacher, J. S. and Ortiz, J. D. and Tripati, A. and Lobaton, E.}, year={2019}, month={Mar}, pages={16–24} } @inproceedings{zhong_ge_kanakiya_mitra_marchitto_lobaton_2017, title={A comparative study of image classification algorithms for foraminifera identification}, url={http://dx.doi.org/10.1109/ssci.2017.8285164}, DOI={10.1109/ssci.2017.8285164}, abstractNote={Identifying Foraminifera (or forams for short) is essential for oceanographic and geoscience research as well as petroleum exploration. Currently, this is mostly accomplished using trained human pickers, routinely taking weeks or even months to accomplish the task. In this paper, a foram identification pipeline is proposed to automatic identify forams based on computer vision and machine learning techniques. A microscope based image capturing system is used to collect a labelled image data set. Various popular image classification algorithms are adapted to this specific task and evaluated under various conditions. Finally, the potential of a weighted cross-entropy loss function in adjusting the trade-off between precision and recall is tested. The classification algorithms provide competitive results when compared to human experts labeling of the data set.}, booktitle={2017 IEEE Symposium Series on Computational Intelligence (SSCI)}, publisher={IEEE}, author={Zhong, Boxuan and Ge, Q. and Kanakiya, B. and Mitra, R. and Marchitto, T. and Lobaton, E.}, year={2017}, pages={3199–3206} } @inproceedings{ge_zhong_kanakiya_mitra_marchitto_lobaton_2017, title={Coarse-to-fine Foraminifera image segmentation through 3d and deep features}, url={http://dx.doi.org/10.1109/ssci.2017.8280982}, DOI={10.1109/ssci.2017.8280982}, abstractNote={Foraminifera are single-celled marine organisms, which are usually less than 1 mm in diameter. One of the most common tasks associated with foraminifera is the species identification of thousands of foraminifera contained in rock or ocean sediment samples, which can be a tedious manual procedure. Thus an automatic visual identification system is desirable. Some of the primary criteria for foraminifera species identification come from the characteristics of the shell itself. As such, segmentation of chambers and apertures in foraminifera images would provide powerful features for species identification. Nevertheless, none of the existing image-based, automatic classification approaches make use of segmentation, partly due to the lack of accurate segmentation methods for foraminifera images. In this paper, we propose a learning-based edge detection pipeline, using a coarse-to-fine strategy, to extract the vague edges from foraminifera images for segmentation using a relatively small training set. The experiments demonstrate our approach is able to segment chambers and apertures of foraminifera correctly and has the potential to provide useful features for species identification and other applications such as morphological study of foraminifera shells and foraminifera dataset labeling.}, booktitle={2017 IEEE Symposium Series on Computational Intelligence (SSCI)}, publisher={IEEE}, author={Ge, Q. and Zhong, Boxuan and Kanakiya, B. and Mitra, R. and Marchitto, T. and Lobaton, E.}, year={2017} } @inproceedings{zhong_qin_yang_chen_mudrick_taub_azevedo_lobaton_2017, title={Emotion recognition with facial expressions and physiological signals}, url={http://dx.doi.org/10.1109/ssci.2017.8285365}, DOI={10.1109/ssci.2017.8285365}, abstractNote={This paper proposes a temporal information preserving multi-modal emotion recognition framework based on physiological and facial expression data streams. The performance of each component is evaluated and compared individually and after data fusion. Specifically, we compared the effect of different views of cameras on facial expressions for emotion recognition, and combined these views to achieve better performance. A Temporal Information Preserving Framework (TIPF) is proposed to more accurately model the relationships between emotional and physiological states over time. Additionally, different fusion strategies are compared when combining information from different time periods and modalities. The experiments show that, TIPF significantly improves the emotion recognition performance when physiological signals are used and the best performance is achieved when fusing facial expressions and physiological data.}, booktitle={2017 IEEE Symposium Series on Computational Intelligence (SSCI)}, publisher={IEEE}, author={Zhong, Boxuan and Qin, Z. K. and Yang, S. and Chen, J. Y. and Mudrick, N. and Taub, M. and Azevedo, R. and Lobaton, E.}, year={2017}, pages={1170–1177} } @inproceedings{lokare_samadi_zhong_gonzalez_mohammadzadeh_lobaton_2017, title={Energy-efficient activity recognition via multiple time-scale analysis}, url={http://dx.doi.org/10.1109/ssci.2017.8285176}, DOI={10.1109/ssci.2017.8285176}, abstractNote={In this work, we propose a novel power-efficient strategy for supervised human activity recognition using a multiple time-scale approach, which takes into account various window sizes. We assess the proposed methodology on our new multimodal dataset for activities of daily life (ADL), which combines the use of physiological and inertial sensors from multiple wearable devices. We aim to develop techniques that can run efficiently in wearable devices for real-time activity recognition. Our analysis shows that the proposed approach Sequential Maximum-Likelihood (SML) achieves high F1 score across all activities while providing lower power consumption than the standard Maximum-Likelihood (ML) approach.}, booktitle={2017 IEEE Symposium Series on Computational Intelligence (SSCI)}, publisher={IEEE}, author={Lokare, N. and Samadi, S. and Zhong, Boxuan and Gonzalez, L. and Mohammadzadeh, F. and Lobaton, E.}, year={2017}, pages={1466–1472} }