@article{yang_zhu_howard_hoedt_schroeck_klaassen_freedland_williams_2022, title={Context-Based Identification of Muscle Invasion Status in Patients With Bladder Cancer Using Natural Language Processing}, url={https://doi.org/10.1200/CCI.21.00097}, DOI={10.1200/CCI.21.00097}, abstractNote={PURPOSE Mortality from bladder cancer (BC) increases exponentially once it invades the muscle, with inherent challenges delineating at the population level. We sought to develop and validate a natural language processing (NLP) model for automatically identifying patients with muscle-invasive bladder cancer (MIBC). METHODS All patients with a Current Procedural Terminology code for transurethral resection of bladder tumor (TURBT; n = 76,060) were selected from the Department of Veterans Affairs (VA) database. A sample of 600 patients (with 2,337 full-text notes) who had TURBT and confirmed pathology results were selected for NLP model development and validation. The NLP performance was assessed by calculating the sensitivity, specificity, positive predictive value, negative predictive value, F1 score, and overall accuracy at the individual note and patient levels. RESULTS In the validation cohort, the NLP model had average overall accuracies of 94% and 96% at the note and patient levels. Specifically, the F1 score and overall accuracy for predicting muscle invasion at the patient level were 0.87% and 96%, respectively. The model classified nonmuscle-invasive bladder cancer (NMIBC) with overall accuracies of 90% and 93% at the note and patient levels. When applying the model to 71,200 patients VA-wide, the model classified 13,642 (19%) as having MIBC and 47,595 (66%) as NMIBC and was able to identify invasion status for 96% of patients with TURBT at the population level. Inherent limitations include a relatively small training set, given the size of the VA population. CONCLUSION This NLP model, with high accuracy, may be a practical tool for efficiently identifying BC invasion status and aid in population-based BC research. "This study developed an NLP model ->92% overall accuracy and applied to 71,200 patients VA-wide, found invasion status in 96% TURBT patients!"}, journal={JCO Clinical Cancer Informatics}, author={Yang, Ruixin and Zhu, Di and Howard, Lauren E. and Hoedt, Amanda De and Schroeck, Florian R. and Klaassen, Zachary and Freedland, Stephen J. and Williams, Stephen B.}, year={2022}, month={May} } @article{yang_zhu_howard_hoedt_williams_freedland_klaassen_2022, title={Identification of Patients With Metastatic Prostate Cancer With Natural Language Processing and Machine Learning}, url={https://doi.org/10.1200/CCI.21.00071}, DOI={10.1200/CCI.21.00071}, abstractNote={PURPOSE Understanding treatment patterns and effectiveness for patients with metastatic prostate cancer (mPCa) is dependent on accurate assessment of metastatic status. The objective was to develop a natural language processing (NLP) model for identifying patients with mPCa and evaluate the model's performance against chart-reviewed data and an International Classification of Diseases (ICD) 9/10 code-based method. METHODS In total, 139,057 radiology reports on 6,211 unique patients from the Department of Veterans Affairs were used. The gold standard was metastases by detailed chart review of radiology reports. NLP performance was assessed by sensitivity, specificity, positive predictive value, negative predictive value, and date of metastases detection. Receiver operating characteristic curves was used to assess model performance. RESULTS When compared with chart review, the NLP model had high sensitivity and specificity (85% and 96%, respectively). The NLP model was able to predict patient-level metastasis status with a sensitivity of 91% and specificity of 81%, whereas sensitivity and specificity using ICD9/10 billing codes were 73% and 86%, respectively. For the NLP model, date of metastases detection was exactly concordant and within < 1 week in 55% and 58% of patients, compared with 8% and 17%, respectively, using the ICD9/10 billing codes method. The area under the curve for the NLP model was 0.911. A limitation is the NLP model was developed on the basis of a subset of patients with mPCa and may not be generalizable to all patients with mPCa. CONCLUSION This population-level NLP model for identifying patients with mPCa was more accurate than using ICD9/10 billing codes when compared with chart-reviewed data. Upon further validation, this model may allow for efficient population-level identification of patients with mPCa.}, journal={JCO Clinical Cancer Informatics}, author={Yang, Ruixin and Zhu, Di and Howard, Lauren E. and Hoedt, Amanda De and Williams, Stephen B. and Freedland, Stephen J. and Klaassen, Zachary}, year={2022}, month={Oct} } @book{wang_huang_wu_yang_2021, title={Continuous Human Learning Optimization with Enhanced Exploitation}, volume={1469 CCIS}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85118999376&partnerID=MN8TOARS}, DOI={10.1007/978-981-16-7213-2_46}, abstractNote={Human Learning Optimization (HLO) is an emerging meta-heuristic with promising potential. Although HLO can be directly applied to real-coded problems as a binary algorithm, the search efficiency may be significantly spoiled due to “the curse of dimensionality”. To extend HLO, Continuous HLO (CHLO) is developed to solve real-values problems. However, the research on CHLO is still in its initial stages, and further efforts are needed to exploit the effectiveness of the CHLO. Therefore, this paper proposes a novel continuous human learning optimization with enhanced exploitation (CHLOEE), in which the social learning operator is redesigned to perform global search more efficiently so that the individual learning operator is relieved to focus on performing local search for enhancing the exploitation ability. Finally, the CHLOEE is evaluated on the benchmark problem and compared with CHLO as well as recent state-of-the-art meta-heuristics. The experimental results show that the proposed CHLOEE has better optimization performance.}, journal={Communications in Computer and Information Science}, author={Wang, L. and Huang, B. and Wu, X. and Yang, R.}, year={2021}, pages={472–487} } @article{yang_he_xu_ni_jones_samatova_2018, title={An Intelligent and Hybrid Weighted Fuzzy Time Series Model Based on Empirical Mode Decomposition for Financial Markets Forecasting}, volume={10933}, ISBN={["978-3-319-95785-2"]}, ISSN={["1611-3349"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85049883593&partnerID=MN8TOARS}, DOI={10.1007/978-3-319-95786-9_8}, abstractNote={Given the potentially high impact of accurate financial market forecasting, there has been considerable research on time series analysis for financial markets. We present a new Intelligent Hybrid Weighted Fuzzy (IHWF) time series model to improve forecasting accuracy in financial markets, which are complex nonlinear time-sensitive systems, influenced by many factors. The IHWF model uniquely combines Empirical Mode Decomposition (EMD) with a novel weighted fuzzy time series method. The model is enhanced by an Adaptive Sine-Cosine Human Learning Optimization (ASCHLO) algorithm to help find optimal parameters that further improve forecasting performance. EMD is a time series processing technique to extract the possible modes of various kinds of institutional and individual investors and traders, embedded in a given time series. Subsequently, the proposed weighted fuzzy time series method with chronological order based frequency and Neighborhood Volatility Direction (NVD) is analyzed and integrated with ASCHLO to determine the effective universe discourse, intervals and weights. In order to evaluate the performance of proposed model, we evaluate actual trading data of Taiwan Capitalization Weighted Stock Index (TAIEX) from 1990 to 2004 and the findings are compared with other well-known forecasting models. The results show that the proposed method outperforms the listing models in terms of accuracy.}, journal={ADVANCES IN DATA MINING: APPLICATIONS AND THEORETICAL ASPECTS (ICDM 2018)}, author={Yang, Ruixin and He, Junyi and Xu, Mingyang and Ni, Haoqi and Jones, Paul and Samatova, Nagiza}, year={2018}, pages={104–118} } @inproceedings{xu_yang_ranshous_li_samatova_2018, title={Leveraging External Knowledge for Phrase-Based Topic Modeling}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85048362739&partnerID=MN8TOARS}, DOI={10.1109/TAAI.2017.25}, abstractNote={Topic modeling has been widely used for extracting the major topics from a corpus. Each discovered topic contains a set of related individual words that describe the topic itself. The discovered topics summarize the major themes of the corpus. Recently, a few phrase-based topic models have been proposed, which simultaneously model phrases and topics. The topics discovered by these models consist of phrases besides individual words, as phrases are typically more meaningful. However, these models typically require large amounts of data to provide reliable statistics for phrase-based topic modeling, thus limiting their performance in scenarios with limited data. To address this limitation, we propose a knowledge-based topic model that incorporates two types of pre-identified external knowledge for topical phrase discovery: Phrase knowledge, and phrase correlation knowledge. Phrase knowledge guides the discovery of meaningful phrases by leveraging a set of pre-identified exemplary phrases; Phrase correlation knowledge guides the discovery of meaningful topics by exploiting a set of pre-identified pairs of related phrases. Experimental results show that our method outperforms the state-of-the-art baseline on both small and large datasets, extracting more meaningful phrases and coherent topics.}, booktitle={Proceedings - 2017 Conference on Technologies and Applications of Artificial Intelligence, TAAI 2017}, author={Xu, M. and Yang, R. and Ranshous, S. and Li, S. and Samatova, N.F.}, year={2018}, pages={29–32} } @article{xu_yang_jones_samatova_2018, title={Mining Aspect-Specific Opinions from Online Reviews Using a Latent Embedding Structured Topic Model}, volume={10762}, ISSN={["1611-3349"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85055688176&partnerID=MN8TOARS}, DOI={10.1007/978-3-319-77116-8_15}, abstractNote={Online reviews often contain user’s specific opinions on aspects (features) of items. These opinions are very useful to merchants and customers, but manually extracting them is time-consuming. Several topic models have been proposed to simultaneously extract item aspects and user’s opinions on the aspects, as well as to detect sentiment associated with the opinions. However, existing models tend to find poor aspect-opinion associations when limited examples of the required word co-occurrences are available in corpus. These models often also assign incorrect sentiment to words. In this paper, we propose a Latent embedding structured Opinion mining Topic model, called the LOT, which can simultaneously discover relevant aspect-level specific opinions from small or large numbers of reviews and to assign accurate sentiment to words. Experimental results for topic coherence, document sentiment classification, and a human evaluation all show that our proposed model achieves significant improvements over several state-of-the-art baselines.}, journal={COMPUTATIONAL LINGUISTICS AND INTELLIGENT TEXT PROCESSING, CICLING 2017, PT II}, author={Xu, Mingyang and Yang, Ruixin and Jones, Paul and Samatova, Nagiza F.}, year={2018}, pages={195–210} } @inproceedings{yang_xu_jones_samatova_2018, title={Real time utility-based recommendation for revenue optimization via an adaptive online Top-K high utility itemsets mining model}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85050186014&partnerID=MN8TOARS}, DOI={10.1109/FSKD.2017.8393050}, abstractNote={Recommender Systems (RS) in e-commerce are typically used to suggest products to online shopping customers, and now play a key role in product marketing strategies for major online retailers, such as Walmart and Amazon. The main goal of such systems is to predict likely future customer desires and to trigger purchases through the timely provision of product recommendations. Therefore, RS have become indispensable tools for both customers and retailers. However, most existing RS recommend products from the point view of customers (i.e. likelihood of customer purchase) but ignore one of the most important business goals: the optimization of revenue. Consequently, there is an increasing need to learn utility patterns online and provide near real-time utility-based recommendations. To address these challenges, we first define the utility of recommendation sets and formulate the problem of real time utility-based recommendation. Next, we consider that online transaction streams are usually accompanied with flow fluctuation, and propose an Adaptive Online Top-K (RAOTK) high utility itemsets mining model to guide the utility-based recommendations. Additionally, three variants of this algorithm are described and we provide a structural comparison of the four algorithms with discussions on their advantages and limitations. Moreover, to make our model more personalized, we also take the buying power of customers into account and propose a simple but effective method to estimate the consumers' willingness to pay. Finally, extensive empirical results on real-world datasets show that the proposed model works effectively and outperforms several baselines.}, booktitle={ICNC-FSKD 2017 - 13th International Conference on Natural Computation, Fuzzy Systems and Knowledge Discovery}, author={Yang, R. and Xu, M. and Jones, P. and Samatova, N.}, year={2018}, pages={1859–1866} } @article{xu_yang_harenberg_samatova_2017, title={A Lifelong Learning Topic Model Structured Using Latent Embeddings}, ISSN={["2325-6516"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85018319146&partnerID=MN8TOARS}, DOI={10.1109/icsc.2017.15}, abstractNote={We propose a latent-embedding-structured lifelong learning topic model, called the LLT model, to discover coherent topics from a corpus. Specifically, we exploit latent word embeddings to structure our model and mine word correlation knowledge to assist in topic modeling. During each learning iteration, our model learns new word embeddings based on the topics generated in the previous learning iteration. Experimental results demonstrate that our LLT model is able to generate more coherent topics than state-of-the-art methods.}, journal={2017 11TH IEEE INTERNATIONAL CONFERENCE ON SEMANTIC COMPUTING (ICSC)}, author={Xu, Mingyang and Yang, Ruixin and Harenberg, Steve and Samatova, Nagiza F.}, year={2017}, pages={260–261} } @article{yang_xu_he_ranshous_samatova_2017, title={An Intelligent Weighted Fuzzy Time Series Model Based on a Sine-Cosine Adaptive Human Learning Optimization Algorithm and Its Application to Financial Markets Forecasting}, volume={10604}, ISBN={["978-3-319-69178-7"]}, ISSN={["1611-3349"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85033718976&partnerID=MN8TOARS}, DOI={10.1007/978-3-319-69179-4_42}, abstractNote={Financial forecasting is an extremely challenging task given the complex, nonlinear nature of financial market systems. To overcome this challenge, we present an intelligent weighted fuzzy time series model for financial forecasting, which uses a sine-cosine adaptive human learning optimization (SCHLO) algorithm to search for the optimal parameters for forecasting. New weighted operators that consider frequency based chronological order and stock volume are analyzed, and SCHLO is integrated to determine the effective intervals and weighting factors. Furthermore, a novel short-term trend repair operation is developed to complement the final forecasting process. Finally, the proposed model is applied to four world major trading markets: the Dow Jones Index (DJI), the German Stock Index (DAX), the Japanese Stock Index (NIKKEI), and Taiwan Stock Index (TAIEX). Experimental results show that our model is consistently more accurate than the state-of-the-art baseline methods. The easy implementation and effective forecasting performance suggest our proposed model could be a favorable market application prospect.}, journal={ADVANCED DATA MINING AND APPLICATIONS, ADMA 2017}, author={Yang, Ruixin and Xu, Mingyang and He, Junyi and Ranshous, Stephen and Samatova, Nagiza F.}, year={2017}, pages={595–607} } @article{wang_yang_ni_ye_fei_pardalos_2015, title={A human learning optimization algorithm and its application to multi-dimensional knapsack problems}, volume={34}, ISSN={["1872-9681"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84932632587&partnerID=MN8TOARS}, DOI={10.1016/j.asoc.2015.06.004}, abstractNote={Inspired by human learning mechanisms, a novel meta-heuristic algorithm named human learning optimization (HLO) is presented in this paper in which the individual learning operator, social learning operator, random exploration learning operator and re-learning operator are developed to generate new solutions and search for the optima by mimicking the human learning process. Then HLO is applied to solve the well-known 5.100 and 10.100 multi-dimensional knapsack problems from the OR-library and the performance of HLO is compared with that of other meta-heuristics collected from the recent literature. The experimental results show that the presented HLO achieves the best performance in comparison with other meta-heuristics, which demonstrates that HLO is a promising optimization tool.}, journal={APPLIED SOFT COMPUTING}, author={Wang, Ling and Yang, Ruixin and Ni, Haoqi and Ye, Wei and Fei, Minrui and Pardalos, Panos M.}, year={2015}, month={Sep}, pages={736–743} } @article{wang_ni_yang_pardalos_du_fei_2015, title={An adaptive simplified human learning optimization algorithm}, volume={320}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84937432288&partnerID=MN8TOARS}, DOI={10.1016/j.ins.2015.05.022}, abstractNote={This paper presents a novel meta-heuristic optimization algorithm, named Adaptive Simplified Human Learning Optimization (ASHLO), which is inspired by the human learning mechanisms. Three learning operators, i.e. the random learning operator, the individual learning operator, and the social learning operator, are developed to generate new solutions and search for the optima by mimicking the learning behaviors of humans. The numerical functions, deceptive functions and 0–1 knapsack problems are adopted as benchmark problems to validate the performance of ASHLO, and the results are compared with those of binary particle swarm optimization (BPSO), modified binary differential evolution (MBDE), the binary fruit fly optimization algorithm (bFOA) and adaptive binary harmony search (ABHS). The experimental results demonstrate that the developed ASHLO significantly outperforms BPSO, MBDE, bFOA and ABHS and has a robust search ability for various problems. With the adaptive strategy, the search ability of ASHLO is improved further especially on the high-dimensional and complicated problems. Considering the ease of implementation, the excellence of global search ability and the robustness for various problems, ASHLO is a promising optimization tool for scientific research and engineering applications.}, journal={Information Sciences}, author={Wang, L. and Ni, H. and Yang, R. and Pardalos, P.M. and Du, X. and Fei, M.}, year={2015}, pages={126–139} } @article{wang_ni_yang_pardalos_jia_fei_2015, title={Intelligent virtual reference feedback tuning and its application to heat treatment electric furnace control}, volume={46}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84946099910&partnerID=MN8TOARS}, DOI={10.1016/j.engappai.2015.08.008}, abstractNote={Virtual Reference Feedback Tuning (VRFT) is a data-driven one-shot control method which is very attractive for engineering applications. However, it cannot design controllers with the optimal control performance based on the standard VRFT approach as performance indices are not explicitly represented in its objective function. To deal with this problem, this paper presents a novel intelligent VRFT (IVRFT) based on adaptive binary ant system harmony search (ABASHS) where the reference model of VRFT, which potentially determines the control performance, is coordinately optimized with the controller by ABASHS to achieve the best control performance. Finally, the proposed ABASHS-based intelligent virtual reference feedback tuning (ABASHS-IVRFT) method is applied to the temperature control of the heat treatment electric furnace. The simulation results demonstrate that ABASHS-IVRFT is valid and can implement the optimal non-overshoot control easily and efficiently. Considering the characteristics such as ease of implementation and no need of the model information of controlled objects, ABASHS-IVRFT is a promising approach for engineering applications.}, journal={Engineering Applications of Artificial Intelligence}, author={Wang, L. and Ni, H. and Yang, R. and Pardalos, P.M. and Jia, L. and Fei, M.}, year={2015}, pages={1–9} } @book{wang_ni_yang_fei_ye_2014, title={A simple human learning optimization algorithm}, volume={462}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84908599084&partnerID=MN8TOARS}, DOI={10.1007/978-3-662-45261-5_7}, abstractNote={This paper presents a novel Simple Human Learning Optimization (SHLO) algorithm, which is inspired by human learning mechanisms. Three learning operators are developed to generate new solutions and search for the optima by mimicking the learning behaviors of human. The 0-1 knapsack problems are adopted as benchmark problems to validate the performance of SHLO, and the results are compared with those of binary particle swarm optimization (BPSO), modified binary differential evolution (MBDE), binary fruit fly optimization algorithm (bFOA) and adaptive binary harmony search algorithm (ABHS). The experimental results demonstrate that SHLO significantly outperforms BPSO, MBDE, bFOA and ABHS. Considering the ease of implementation and the excellence of global search ability, SHLO is a promising optimization tool.}, journal={Communications in Computer and Information Science}, author={Wang, L. and Ni, H. and Yang, R. and Fei, M. and Ye, W.}, year={2014}, pages={56–65} } @article{wang_ni_yang_pappu_fenn_pardalos_2014, title={Feature selection based on meta-heuristics for biomedicine}, volume={29}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84894101318&partnerID=MN8TOARS}, DOI={10.1080/10556788.2013.834900}, abstractNote={Feature selection can efficiently improve the accuracy of classification and reduce the measurement, storage and computation demands, and thus it has been applied in biomedical research increasingly. Considering the non-deterministic polynomial-time hard characteristic of feature selection, meta-heuristics are introduced into feature selection in biomedicine on account of their excellent global search ability. However, most of biomedical problems are characterized by high dimensionality, which is a challenge for feature selection methods based on meta-heuristics due to the curse of dimensionality. Thus, six meta-heuristics, that is, a genetic algorithm, particle swarm optimization, ant colony optimization, harmony search, differential evolution, and quantum-inspired evolutionary algorithm, which are widely studied in the meta-heuristic community, are introduced into feature selection in this paper and the performance of the algorithms is analysed and compared with each other for solving feature selection in biomedicine effectively. To evaluate the search ability of the algorithms fairly and exactly, a set of feature selection benchmark problems are designed and yielded for the performance tests. The experimental results show that all the meta-heuristics are powerful enough to achieve the ideal results on low-dimensional feature selection problems, while it is essential to choose a proper algorithm for the high-dimensional ones.}, number={4}, journal={Optimization Methods and Software}, author={Wang, L. and Ni, H. and Yang, R. and Pappu, V. and Fenn, M.B. and Pardalos, P.M.}, year={2014}, pages={703–719} } @article{wang_yang_pardalos_qian_fei_2013, title={An adaptive fuzzy controller based on harmony search and its application to power plant control}, volume={53}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84878279947&partnerID=MN8TOARS}, DOI={10.1016/j.ijepes.2013.05.015}, abstractNote={Considering the reliability and safety requirements of power stations, a novel adaptive fuzzy controller is presented to implement non-overshoot control in power plants in which the Lyapunov-based adaptive law is employed to guarantee the stability of the controller while a modified Adaptive Binary Harmony Search (ABHS) algorithm is utilized to search the optimal control parameters to improve the dynamic performance. Two strategies are analyzed and used with ABHS to guarantee the non-overshoot of control. Furthermore, a simple but efficient repair operation is developed to deal with the time-varying characteristic of the thermal process. Finally, the proposed ABHS-based Adaptive Fuzzy Control (ABHSAFC) method is applied to the bed temperature control of the Circulating Fluidized Bed Boiler (CFBB). The experimental results demonstrate that ABHSAFC can implement the expected non-overshoot control efficiently and outperforms the classical Lyapunov-based adaptive fuzzy control, ABHS-based fuzzy control and ABHS-based PID control. Considering the characteristics of the easy implementation, robustness and the guaranteed stability, ABHSAFC promises a favorable engineering application prospect.}, number={1}, journal={International Journal of Electrical Power and Energy Systems}, author={Wang, L. and Yang, R. and Pardalos, P.M. and Qian, L. and Fei, M.}, year={2013}, pages={272–278} } @article{wang_yang_xu_niu_pardalos_fei_2013, title={An improved adaptive binary Harmony Search algorithm}, volume={232}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-84875444050&partnerID=MN8TOARS}, DOI={10.1016/j.ins.2012.12.043}, abstractNote={Harmony Search (HS), inspired by the music improvisation process, is a new meta-heuristic optimization method and has been successfully used to tackle the optimization problems in discrete or continuous space. Although the standard HS algorithm is able to solve binary-coded optimization problems, the pitch adjustment operator of HS is degenerated in the binary space, which spoils the performance of the algorithm. Based on the analysis of the drawback of the standard HS, an improved adaptive binary Harmony Search (ABHS) algorithm is proposed in this paper to solve the binary-coded problems more effectively. Various adaptive mechanisms are examined and investigated, and a scalable adaptive strategy is developed for ABHS to enhance its search ability and robustness. The experimental results on the benchmark functions and 0–1 knapsack problems demonstrate that the proposed ABHS is efficient and effective, which outperforms the binary Harmony Search, the novel global Harmony Search algorithm and the discrete binary Particle Swarm Optimization in terms of the search accuracy and convergence speed.}, journal={Information Sciences}, author={Wang, L. and Yang, R. and Xu, Y. and Niu, Q. and Pardalos, P.M. and Fei, M.}, year={2013}, pages={58–87} }