@article{baldassarre_ernst_hermann_menzies_yedida_2023, title={(Re)Use of Research Results (Is Rampant)}, volume={66}, ISSN={["1557-7317"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85147250416&partnerID=MN8TOARS}, DOI={10.1145/3554976}, abstractNote={Prior pessimism about reuse in software engineering research may have been a result of using the wrong methods to measure the wrong things.}, number={2}, journal={COMMUNICATIONS OF THE ACM}, author={Baldassarre, Maria Teresa and Ernst, Neil and Hermann, Ben and Menzies, Tim and Yedida, Rahul}, year={2023}, month={Feb}, pages={75–81} } @article{yedida_krishna_kalia_menzies_xiao_vukovic_2023, title={An expert system for redesigning software for cloud applications}, volume={219}, ISSN={["1873-6793"]}, url={https://doi.org/10.1016/j.eswa.2023.119673}, DOI={10.1016/j.eswa.2023.119673}, abstractNote={Cloud-based software has many advantages. When services are divided into many independent components, they are easier to update. Also, during peak demand, it is easier to scale cloud services (just hire more CPUs). Hence, many organizations are partitioning their monolithic enterprise applications into cloud-based microservices. Recently there has been much work using machine learning to simplify this partitioning task. Despite much research, no single partitioning method can be recommended as generally useful. More specifically, those prior solutions are “brittle”; i.e. if they work well for one kind of goal in one dataset, then they can be sub-optimal if applied to many datasets and multiple goals. This work extends prior work and proposes DEEPLY to fix the brittleness problem. Specifically, we use (a) hyper-parameter optimization to sample from the Pareto frontier of configurations (b) a weighted loss to choose optimally from this Pareto frontier (c) the 1cycle learning rate policy to avoid local minima with Adam and (d) spectral clustering over k-means. Our work shows that DEEPLY outperforms other algorithms in this space across different metrics. Moreover, our ablation study reveals that of the changes, the weighted loss is the most important, followed by hyper-parameter optimization (contrary to prior belief). To enable the reuse of this research, DEEPLY is available on-line at .}, journal={EXPERT SYSTEMS WITH APPLICATIONS}, author={Yedida, Rahul and Krishna, Rahul and Kalia, Anup and Menzies, Tim and Xiao, Jin and Vukovic, Maja}, year={2023}, month={Jun} } @article{yedida_kang_tu_yang_lo_menzies_2023, title={How to Find Actionable Static Analysis Warnings: A Case Study With FindBugs}, volume={49}, ISSN={["1939-3520"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85147213634&partnerID=MN8TOARS}, DOI={10.1109/TSE.2023.3234206}, abstractNote={Automatically generated static code warnings suffer from a large number of false alarms. Hence, developers only take action on a small percent of those warnings. To better predict which static code warnings should not be ignored, we suggest that analysts need to look deeper into their algorithms to find choices that better improve the particulars of their specific problem. Specifically, we show here that effective predictors of such warnings can be created by methods that locally adjust the decision boundary (between actionable warnings and others). These methods yield a new high water-mark for recognizing actionable static code warnings. For eight open-source Java projects (cassandra, jmeter, commons, lucene-solr, maven, ant, tomcat, derby) we achieve perfect test results on 4/8 datasets and, overall, a median AUC (area under the true negatives, true positives curve) of 92%.}, number={4}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Yedida, Rahul and Kang, Hong Jin and Tu, Huy and Yang, Xueqi and Lo, David and Menzies, Tim}, year={2023}, month={Apr}, pages={2856–2872} } @article{yedida_kang_tu_yang_lo_menzies_2022, title={How to Find Actionable Static Analysis Warnings: A Case Study with FindBugs}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85146211964&partnerID=MN8TOARS}, DOI={10.48550/arXiv.2205.10504}, abstractNote={Automatically generated static code warnings suffer from a large number of false alarms. Hence, developers only take action on a small percent of those warnings. To better predict which static code warnings should not be ignored, we suggest that analysts need to look deeper into their algorithms to find choices that better improve the particulars of their specific problem. Specifically, we show here that effective predictors of such warnings can be created by methods that locally adjust the decision boundary (between actionable warnings and others). These methods yield a new high water-mark for recognizing actionable static code warnings. For eight open-source Java projects (cassandra, jmeter, commons, lucene-solr, maven, ant, tomcat, derby) we achieve perfect test results on 4/8 datasets and, overall, a median AUC (area under the true negatives, true positives curve) of 92%.}, journal={arXiv}, author={Yedida, R. and Kang, H.J. and Tu, H. and Yang, X. and Lo, D. and Menzies, T.}, year={2022} } @article{yedida_menzies_2022, title={How to Improve Deep Learning for Software Analytics (a case study with code smell detection)}, ISSN={["2160-1852"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85134010393&partnerID=MN8TOARS}, DOI={10.1145/3524842.3528458}, abstractNote={To reduce technical debt and make code more maintainable, it is important to be able to warn programmers about code smells. State-of-the-art code small detectors use deep learners, usually without exploring alternatives. For example, one promising alternative is GHOST (from TSE'21) that relies on a combination of hyper-parameter optimization of feedforward neural networks and a novel oversampling technique. The prior study from TSE'21 proposing this novel “fuzzy sampling” was somewhat limited in that the method was tested on defect prediction, but nothing else. Like defect prediction, code smell detection datasets have a class imbalance (which motivated “fuzzy sampling”). Hence, in this work we test if fuzzy sampling is useful for code smell detection. The results of this paper show that we can achieve better than state-of-the-art results on code smell detection with fuzzy oversampling. For example, for “feature envy”, we were able to achieve 99+% AUC across all our datasets, and on 8/10 datasets for “misplaced class”. While our specific results refer to code smell detection, they do suggest other lessons for other kinds of analytics. For example: (a) try better preprocessing before trying complex learners (b) include simpler learners as a baseline in software analytics (c) try “fuzzy sampling” as one such baseline. In order to support others trying to reproduce/extend/refute this work, all our code and data is available online at https://github.com/yrahul3910/code-smell-detection.}, journal={2022 MINING SOFTWARE REPOSITORIES CONFERENCE (MSR 2022)}, author={Yedida, Rahul and Menzies, Tim}, year={2022}, pages={156–166} } @article{yedida_menzies_2022, title={How to Improve Deep Learning for Software Analytics (a case study with code smell detection)}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85171075927&partnerID=MN8TOARS}, DOI={10.48550/arxiv.2202.01322}, abstractNote={To reduce technical debt and make code more maintainable, it is important to be able to warn programmers about code smells. State-of-the-art code small detectors use deep learners, without much exploration of alternatives within that technology. One promising alternative for software analytics and deep learning is GHOST (from TSE'21) that relies on a combination of hyper-parameter optimization of feedforward neural networks and a novel oversampling technique to deal with class imbalance. The prior study from TSE'21 proposing this novel "fuzzy sampling" was somewhat limited in that the method was tested on defect prediction, but nothing else. Like defect prediction, code smell detection datasets have a class imbalance (which motivated "fuzzy sampling"). Hence, in this work we test if fuzzy sampling is useful for code smell detection. The results of this paper show that we can achieve better than state-of-the-art results on code smell detection with fuzzy oversampling. For example, for "feature envy", we were able to achieve 99+\% AUC across all our datasets, and on 8/10 datasets for "misplaced class". While our specific results refer to code smell detection, they do suggest other lessons for other kinds of analytics. For example: (a) try better preprocessing before trying complex learners (b) include simpler learners as a baseline in software analytics (c) try "fuzzy sampling" as one such baseline.}, journal={arXiv}, author={Yedida, R. and Menzies, T.}, year={2022} } @article{yedida_beasley_korn_abrar_melo-filho_muratov_graedon_graedon_chirkova_tropsha_2022, title={TEXT MINING OF THE PEOPLE'S PHARMACY RADIO SHOW TRANSCRIPTS CAN IDENTIFY NOVEL DRUG REPURPOSING HYPOTHESES}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85127353260&partnerID=MN8TOARS}, DOI={10.1101/2022.02.02.22270107}, abstractNote={ABSTRACT}, journal={medRxiv}, author={Yedida, R. and Beasley, J.-M. and Korn, D. and Abrar, S.M. and Melo-Filho, C.C. and Muratov, E. and Graedon, J. and Graedon, T. and Chirkova, R. and Tropsha, A.}, year={2022} } @article{yedida_krishna_kalia_menzies_xiao_vukovic_2021, title={An Expert System for Redesigning Software for Cloud Applications}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85134109536&partnerID=MN8TOARS}, DOI={10.48550/arXiv.2109.14569}, abstractNote={Cloud-based software has many advantages. When services are divided into many independent components, they are easier to update. Also, during peak demand, it is easier to scale cloud services (just hire more CPUs). Hence, many organizations are partitioning their monolithic enterprise applications into cloud-based microservices. Recently there has been much work using machine learning to simplify this partitioning task. Despite much research, no single partitioning method can be recommended as generally useful. More specifically, those prior solutions are "brittle"; i.e. if they work well for one kind of goal in one dataset, then they can be sub-optimal if applied to many datasets and multiple goals. In order to find a generally useful partitioning method, we propose DEEPLY. This new algorithm extends the CO-GCN deep learning partition generator with (a) a novel loss function and (b) some hyper-parameter optimization. As shown by our experiments, DEEPLY generally outperforms prior work (including CO-GCN, and others) across multiple datasets and goals. To the best of our knowledge, this is the first report in SE of such stable hyper-parameter optimization. To aid reuse of this work, DEEPLY is available on-line at https://bit.ly/2WhfFlB.}, journal={arXiv}, author={Yedida, R. and Krishna, R. and Kalia, A. and Menzies, T. and Xiao, J. and Vukovic, M.}, year={2021} } @article{yedida_saha_2021, title={Beginning with machine learning: a comprehensive primer}, volume={7}, ISSN={["1951-6401"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85110638036&partnerID=MN8TOARS}, DOI={10.1140/epjs/s11734-021-00209-7}, number={10}, journal={EUROPEAN PHYSICAL JOURNAL-SPECIAL TOPICS}, author={Yedida, Rahul and Saha, Snehanshu}, year={2021}, month={Jul} } @book{baldassarre_ernst_hermann_menzies_yedida_2021, title={Crowdsourcing the State of the Art(ifacts}, number={2108.06821}, author={Baldassarre, M.T. and Ernst, N. and Hermann, B. and Menzies, T. and Yedida, R.}, year={2021} } @article{baldassarre_ernst_hermann_menzies_yedida_2021, title={Crowdsourcing the state of the art(ifacts)}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85170893555&partnerID=MN8TOARS}, DOI={10.48550/arxiv.2108.06821}, journal={arXiv}, author={Baldassarre, M.T. and Ernst, N. and Hermann, B. and Menzies, T. and Yedida, R.}, year={2021} } @article{yedida_menzies_2021, title={Documenting Evidence of a Reuse of 'A Systematic Study of the Class Imbalance Problem in Convolutional Neural Networks'}, url={https://doi.org/10.1145/3468264.3477212}, DOI={10.1145/3468264.3477212}, abstractNote={We report here the reuse of oversampling, and modifications to the basic approach, used in a recent TSE ’21 paper by YedidaMenzies. The method reused is the oversampling technique studied by Buda et al. These methods were studied in the SE domain (specifically, for defect prediction), and extended by Yedida & Menzies.}, journal={PROCEEDINGS OF THE 29TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING (ESEC/FSE '21)}, publisher={ACM}, author={Yedida, Rahul and Menzies, Tim}, year={2021}, pages={1595–1595} } @article{yedida_menzies_2021, title={Documenting Evidence of a Reuse of 'On the Number of Linear Regions of Deep Neural Networks'}, url={https://doi.org/10.1145/3468264.3477213}, DOI={10.1145/3468264.3477213}, abstractNote={We report here the reuse of theoretical insights from deep learning literature, used in a recent TSE '21 paper by Yedida & Menzies. The artifact replicated is the lower bound on the number of piecewise linear regions in the decision boundary of a feedforward neural network with ReLU activations, as studied by Montufar et al. We document the reuse of Theorem 4 from Montufar et al. by Yedida & Menzies.}, journal={PROCEEDINGS OF THE 29TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING (ESEC/FSE '21)}, publisher={ACM}, author={Yedida, Rahul and Menzies, Tim}, year={2021}, pages={1596–1596} } @article{yang_chen_yedida_yu_menzies_2021, title={Learning to recognize actionable static code warnings (is intrinsically easy)}, volume={26}, ISSN={["1573-7616"]}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85104658664&partnerID=MN8TOARS}, DOI={10.1007/s10664-021-09948-6}, abstractNote={Static code warning tools often generate warnings that programmers ignore. Such tools can be made more useful via data mining algorithms that select the “actionable” warnings; i.e. the warnings that are usually not ignored. In this paper, we look for actionable warnings within a sample of 5,675 actionable warnings seen in 31,058 static code warnings from FindBugs. We find that data mining algorithms can find actionable warnings with remarkable ease. Specifically, a range of data mining methods (deep learners, random forests, decision tree learners, and support vector machines) all achieved very good results (recalls and AUC(TRN, TPR) measures usually over 95% and false alarms usually under 5%). Given that all these learners succeeded so easily, it is appropriate to ask if there is something about this task that is inherently easy. We report that while our data sets have up to 58 raw features, those features can be approximated by less than two underlying dimensions. For such intrinsically simple data, many different kinds of learners can generate useful models with similar performance. Based on the above, we conclude that learning to recognize actionable static code warnings is easy, using a wide range of learning algorithms, since the underlying data is intrinsically simple. If we had to pick one particular learner for this task, we would suggest linear SVMs (since, at least in our sample, that learner ran relatively quickly and achieved the best median performance) and we would not recommend deep learning (since this data is intrinsically very simple).}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, publisher={Springer Science and Business Media LLC}, author={Yang, Xueqi and Chen, Jianfeng and Yedida, Rahul and Yu, Zhe and Menzies, Tim}, year={2021}, month={May} } @inproceedings{yedida_krishna_kalia_menzies_xiao_vukovic_2021, place={New York}, title={Lessons learned from hyper-parameter tuning for microservice candidate identi cation}, booktitle={Proceedings of the thirty-sixth IEEE/ACM International Conference on Automated Software Engineering (ASE)}, publisher={Association for Computing Machinery}, author={Yedida, R. and Krishna, R. and Kalia, A. and Menzies, T. and Xiao, J. and Vukovic, M.}, year={2021} } @article{yedida_krishna_kalia_menzies_xiao_vukovic_2021, title={Lessons learned from hyper-parameter tuning for microservice candidate identification}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85170462197&partnerID=MN8TOARS}, DOI={10.48550/arxiv.2106.06652}, abstractNote={When optimizing software for the cloud, monolithic applications need to be partitioned into many smaller *microservices*. While many tools have been proposed for this task, we warn that the evaluation of those approaches has been incomplete; e.g. minimal prior exploration of hyperparameter optimization. Using a set of open source Java EE applications, we show here that (a) such optimization can significantly improve microservice partitioning; and that (b) an open issue for future work is how to find which optimizer works best for different problems. To facilitate that future work, see [https://github.com/yrahul3910/ase-tuned-mono2micro](https://github.com/yrahul3910/ase-tuned-mono2micro) for a reproduction package for this research.}, journal={arXiv}, author={Yedida, R. and Krishna, R. and Kalia, A. and Menzies, T. and Xiao, J. and Vukovic, M.}, year={2021} } @inproceedings{yedida_krishna_kalia_menzies_xiao_vukovic_2021, title={Lessons learned from hyper-parameter tuning for microservice candidate identification}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85125444531&partnerID=MN8TOARS}, DOI={10.1109/ASE51524.2021.9678704}, abstractNote={When optimizing software for the cloud, monolithic applications need to be partitioned into many smaller microservices. While many tools have been proposed for this task, we warn that the evaluation of those approaches has been incomplete; e.g. minimal prior exploration of hyperparameter optimization. Using a set of open source Java EE applications, we show here that (a) such optimization can significantly improve microservice partitioning; and that (b) an open issue for future work is how to find which optimizer works best for different problems. To facilitate that future work, see https://github.com/yrahul3910/ase-tuned-mono2micro for a reproduction package for this research.}, booktitle={Proceedings - 2021 36th IEEE/ACM International Conference on Automated Software Engineering, ASE 2021}, author={Yedida, R. and Krishna, R. and Kalia, A. and Menzies, T. and Xiao, J. and Vukovic, M.}, year={2021}, pages={1141–1145} } @article{yedida_saha_prashanth_2021, title={LipschitzLR: Using theoretically computed adaptive learning rates for fast convergence}, volume={51}, ISSN={["1573-7497"]}, url={https://doi.org/10.1007/s10489-020-01892-0}, DOI={10.1007/s10489-020-01892-0}, abstractNote={We present a novel theoretical framework for computing large, adaptive learning rates. Our framework makes minimal assumptions on the activations used and exploits the functional properties of the loss function. Specifically, we show that the inverse of the Lipschitz constant of the loss function is an ideal learning rate. We analytically compute formulas for the Lipschitz constant of several loss functions, and through extensive experimentation, demonstrate the strength of our approach using several architectures and datasets. In addition, we detail the computation of learning rates when other optimizers, namely, SGD with momentum, RMSprop, and Adam, are used. Compared to standard choices of learning rates, our approach converges faster, and yields better results.}, number={3}, journal={APPLIED INTELLIGENCE}, publisher={Springer Science and Business Media LLC}, author={Yedida, Rahul and Saha, Snehanshu and Prashanth, Tejas}, year={2021}, month={Mar}, pages={1460–1478} } @article{yedida_yang_menzies_2021, title={Old but Gold: Reconsidering the value of feedforward learners for software analytics}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85171057311&partnerID=MN8TOARS}, DOI={10.48550/arxiv.2101.06319}, abstractNote={There has been an increased interest in the use of deep learning approaches for software analytics tasks. State-of-the-art techniques leverage modern deep learning techniques such as LSTMs, yielding competitive performance, albeit at the price of longer training times. Recently, Galke and Scherp [18] showed that at least for image recognition, a decades-old feedforward neural network can match the performance of modern deep learning techniques. This motivated us to try the same in the SE literature. Specifically, in this paper, we apply feedforward networks with some preprocessing to two analytics tasks: issue close time prediction, and vulnerability detection. We test the hypothesis laid by Galke and Scherp [18], that feedforward networks suffice for many analytics tasks (which we call, the "Old but Gold" hypothesis) for these two tasks. For three out of five datasets from these tasks, we achieve new high-water mark results (that out-perform the prior state-of-the-art results) and for a fourth data set, Old but Gold performed as well as the recent state of the art. Furthermore, the old but gold results were obtained orders of magnitude faster than prior work. For example, for issue close time, old but gold found good predictors in 90 seconds (as opposed to the newer methods, which took 6 hours to run). Our results supports the "Old but Gold" hypothesis and leads to the following recommendation: try simpler alternatives before more complex methods. At the very least, this will produce a baseline result against which researchers can compare some other, supposedly more sophisticated, approach. And in the best case, they will obtain useful results that are as good as anything else, in a small fraction of the effort. To support open science, all our scripts and data are available on-line at https://github.com/fastidiouschipmunk/simple.}, journal={arXiv}, author={Yedida, R. and Yang, X. and Menzies, T.}, year={2021} } @article{yedida_menzies_2021, title={On the Value of Oversampling for Deep Learning in Software Defect Prediction}, volume={48}, ISSN={0098-5589 1939-3520 2326-3881}, url={http://dx.doi.org/10.1109/TSE.2021.3079841}, DOI={10.1109/TSE.2021.3079841}, abstractNote={One truism of deep learning is that the automatic feature engineering (seen in the first layers of those networks) excuses data scientists from performing tedious manual feature engineering prior to running DL. For the specific case of deep learning for defect prediction, we show that that truism is false. Specifically, when we pre-process data with a novel oversampling technique called fuzzy sampling, as part of a larger pipeline called GHOST (Goal-oriented Hyper-parameter Optimization for Scalable Training), then we can do significantly better than the prior DL state of the art in 14/20 defect data sets. Our approach yields state-of-the-art results significantly faster deep learners. These results present a cogent case for the use of oversampling prior to applying deep learning on software defect prediction datasets.}, number={8}, journal={IEEE Transactions on Software Engineering}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Yedida, Rahul and Menzies, Tim}, year={2021}, pages={1–1} } @article{agrawal_yang_agrawal_yedida_shen_menzies_2021, title={Simpler Hyperparameter Optimization for Software Analytics: Why, How, When}, volume={48}, ISSN={0098-5589 1939-3520 2326-3881}, url={http://dx.doi.org/10.1109/TSE.2021.3073242}, DOI={10.1109/TSE.2021.3073242}, abstractNote={How can we make software analytics simpler and faster? One method is to match the complexity of analysis to the intrinsic complexity of the data being explored. For example, hyperparameter optimizers find the control settings for data miners that improve the predictions generated via software analytics. Sometimes, very fast hyperparameter optimization can be achieved by “DODGE-ing”; i.e., simply steering way from settings that lead to similar conclusions. But when is it wise to use that simple approach and when must we use more complex (and much slower) optimizers? To answer this, we applied hyperparameter optimization to 120 SE data sets that explored bad smell detection, predicting Github issue close time, bug report analysis, defect prediction, and dozens of other non-SE problems. We find that the simple DODGE works best for data sets with low “intrinsic dimensionality” ($\mu _D\approx 3$μD3) and very poorly for higher-dimensional data ($\mu _D > 8$μD>8). Nearly all the SE data seen here was intrinsically low-dimensional, indicating that DODGE is applicable for many SE analytics tasks.}, number={8}, journal={IEEE Transactions on Software Engineering}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Agrawal, Amritanshu and Yang, Xueqi and Agrawal, Rishabh and Yedida, Rahul and Shen, Xipeng and Menzies, Tim}, year={2021}, pages={1–1} } @book{yedida_yang_menzies_2021, title={When SIMPLE is better than complex: A case study on deep learning for predicting Bugzilla issue close time}, number={2101.06319}, author={Yedida, R. and Yang, X. and Menzies, T.}, year={2021} } @misc{saha_nagaraj_mathur_yedida_sneha_2020, title={Evolution of novel activation functions in neural network training for astronomy data: habitability classification of exoplanets}, volume={229}, ISSN={["1951-6401"]}, url={https://doi.org/10.1140/epjst/e2020-000098-9}, DOI={10.1140/epjst/e2020-000098-9}, abstractNote={We present analytical exploration of novel activation functions as consequence of integration of several ideas leading to implementation and subsequent use in habitability classification of exoplanets. Neural networks, although a powerful engine in supervised methods, often require expensive tuning efforts for optimized performance. Habitability classes are hard to discriminate, especially when attributes used as hard markers of separation are removed from the data set. The solution is approached from the point of investigating analytical properties of the proposed activation functions. The theory of ordinary differential equations and fixed point are exploited to justify the "lack of tuning efforts" to achieve optimal performance compared to traditional activation functions. Additionally, the relationship between the proposed activation functions and the more popular ones is established through extensive analytical and empirical evidence. Finally, the activation functions have been implemented in plain vanilla feed-forward neural network to classify exoplanets.}, number={16}, journal={EUROPEAN PHYSICAL JOURNAL-SPECIAL TOPICS}, publisher={Springer Science and Business Media LLC}, author={Saha, Snehanshu and Nagaraj, Nithin and Mathur, Archana and Yedida, Rahul and Sneha, H. R.}, year={2020}, month={Nov}, pages={2629–2738} } @article{yang_chen_yedida_yu_menzies_2020, title={How to Recognize Actionable Static Code Warnings (Using Linear SVMs)}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85171025965&partnerID=MN8TOARS}, DOI={10.48550/arxiv.2006.00444}, abstractNote={Static code warning tools often generate warnings that programmers ignore. Such tools can be made more useful via data mining algorithms that select the "actionable" warnings; i.e. the warnings that are usually not ignored. In this paper, we look for actionable warnings within a sample of 5,675 actionable warnings seen in 31,058 static code warnings from FindBugs. We find that data mining algorithms can find actionable warnings with remarkable ease. Specifically, a range of data mining methods (deep learners, random forests, decision tree learners, and support vector machines) all achieved very good results (recalls and AUC (TRN, TPR) measures usually over 95% and false alarms usually under 5%). Given that all these learners succeeded so easily, it is appropriate to ask if there is something about this task that is inherently easy. We report that while our data sets have up to 58 raw features, those features can be approximated by less than two underlying dimensions. For such intrinsically simple data, many different kinds of learners can generate useful models with similar performance. Based on the above, we conclude that learning to recognize actionable static code warnings is easy, using a wide range of learning algorithms, since the underlying data is intrinsically simple. If we had to pick one particular learner for this task, we would suggest linear SVMs (since, at least in our sample, that learner ran relatively quickly and achieved the best median performance) and we would not recommend deep learning (since this data is intrinsically very simple).}, journal={arXiv}, author={Yang, X. and Chen, J. and Yedida, R. and Yu, Z. and Menzies, T.}, year={2020} } @article{yedida_menzies_2020, title={On the value of oversampling for deep learning in software defect prediction}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85171025165&partnerID=MN8TOARS}, DOI={10.48550/arxiv.2008.03835}, abstractNote={One truism of deep learning is that the automatic feature engineering (seen in the first layers of those networks) excuses data scientists from performing tedious manual feature engineering prior to running DL. For the specific case of deep learning for defect prediction, we show that that truism is false. Specifically, when we preprocess data with a novel oversampling technique called fuzzy sampling, as part of a larger pipeline called GHOST (Goal-oriented Hyper-parameter Optimization for Scalable Training), then we can do significantly better than the prior DL state of the art in 14/20 defect data sets. Our approach yields state-of-the-art results significantly faster deep learners. These results present a cogent case for the use of oversampling prior to applying deep learning on software defect prediction datasets.}, journal={arXiv}, author={Yedida, R. and Menzies, T.}, year={2020} } @inbook{khaidem_yedida_theophilus_2020, series={Communications in Computer and Information Science}, title={Optimizing Inter-nationality of Journals: A Classical Gradient Approach Revisited via Swarm Intelligence}, volume={1290}, ISBN={9789813364622 9789813364639}, ISSN={1865-0929 1865-0937}, url={http://dx.doi.org/10.1007/978-981-33-6463-9_1}, DOI={10.1007/978-981-33-6463-9_1}, abstractNote={With the growth of a vast number of new journals, the de facto definitions of Internationality has raised debate across researchers. A robust set of metrics, not prone to manipulation, is paramount for evaluating influence when journals claim “International” status. The ScientoBASE project defines internationality in terms of publication quality and spread of influence beyond geographical boundaries. This is acheived through quantified metrics, like the NLIQ, OCQ, SNIP and ICR, passed into the Cobb Douglas Production Function to estimate the range of influence a journal has over its audience. The global optima of this range is the maximum projected internationality score, or the internationality index of the journal. The optimization, however, being multivariate and constrained presents several challenges to classical techniques, such as curvature variation, premature convergence and parameter scaling. This study approaches these issues by optimizing through the Swarm Intelligence meta-heuristic. Particle Swarm Optimization makes no assumptions on the function being optimized and does away with the need to calculate a gradient. These advantages circumvent the aforementioned issues and highlight the need for traction on machine learning in optimization. The model presented here observes that each journal has an associated globally optimal internationality score that fluctuates proportionally to input metrics, thereby describing a robust confluence of key influence indicators that pave way for investigating alternative criteria for attributing credits to publications.}, booktitle={Modeling, Machine Learning and Astronomy}, publisher={Springer Singapore}, author={Khaidem, Luckyson and Yedida, Rahul and Theophilus, Abhijit J.}, year={2020}, pages={3–14}, collection={Communications in Computer and Information Science} } @inproceedings{sridhar_saha_shaikh_yedida_saha_2020, title={Parsimonious Computing: A Minority Training Regime for Effective Prediction in Large Microarray Expression Data Sets}, ISBN={9781728169262}, url={http://dx.doi.org/10.1109/ijcnn48605.2020.9207083}, DOI={10.1109/ijcnn48605.2020.9207083}, abstractNote={Rigorous mathematical investigation of learning rates used in back-propagation in shallow neural networks has become a necessity. This is because experimental evidence needs to be endorsed by a theoretical background. Such theory may be helpful in reducing the volume of experimental effort to accomplish desired results. We leveraged the functional property of Mean Square Error, which is Lipschitz continuous to compute learning rate in shallow neural networks. We claim that our approach reduces tuning efforts, especially when a significant corpus of data has to be handled. We achieve remarkable improvement in saving computational cost while surpassing prediction accuracy reported in literature. The learning rate, proposed here, is the inverse of the Lipschitz constant. The work results in a novel method for carrying out gene expression inference on large microarray data sets with a shallow architecture constrained by limited computing resources. A combination of random sub-sampling of the dataset, an adaptive Lipschitz constant inspired learning rate and a new activation function, A-ReLU helped accomplish the results reported in the paper.}, booktitle={2020 International Joint Conference on Neural Networks (IJCNN)}, publisher={IEEE}, author={Sridhar, Shailesh and Saha, Snehanshu and Shaikh, Azhar and Yedida, Rahul and Saha, Sriparna}, year={2020}, month={Jul}, pages={1–8} } @article{parsimonious computing: a minority training regime for effective prediction in large microarray expression data sets_2020, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85169948146&partnerID=MN8TOARS}, DOI={10.48550/arxiv.2005.08442}, abstractNote={Rigorous mathematical investigation of learning rates used in back-propagation in shallow neural networks has become a necessity. This is because experimental evidence needs to be endorsed by a theoretical background. Such theory may be helpful in reducing the volume of experimental effort to accomplish desired results. We leveraged the functional property of Mean Square Error, which is Lipschitz continuous to compute learning rate in shallow neural networks. We claim that our approach reduces tuning efforts, especially when a significant corpus of data has to be handled. We achieve remarkable improvement in saving computational cost while surpassing prediction accuracy reported in literature. The learning rate, proposed here, is the inverse of the Lipschitz constant. The work results in a novel method for carrying out gene expression inference on large microarray data sets with a shallow architecture constrained by limited computing resources. A combination of random sub-sampling of the dataset, an adaptive Lipschitz constant inspired learning rate and a new activation function, A-ReLU helped accomplish the results reported in the paper.}, journal={arXiv}, year={2020} } @book{yedida_abrar_melo-filho_muratov_chirkova_tropsha_2020, title={Text Mining to Identify and Extract Novel Disease Treatments From Unstructured Datasets}, number={2011.07959}, author={Yedida, R. and Abrar, S.M. and Melo-Filho, C. and Muratov, E. and Chirkova, R. and Tropsha, A.}, year={2020} } @article{yedida_abrar_melo-filho_muratov_chirkova_tropsha_2020, title={Text mining to identify and extract novel disease treatments from unstructured datasets}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85171055748&partnerID=MN8TOARS}, DOI={10.48550/arxiv.2011.07959}, abstractNote={Objective: We aim to learn potential novel cures for diseases from unstructured text sources. More specifically, we seek to extract drug-disease pairs of potential cures to diseases by a simple reasoning over the structure of spoken text. Materials and Methods: We use Google Cloud to transcribe podcast episodes of an NPR radio show. We then build a pipeline for systematically pre-processing the text to ensure quality input to the core classification model, which feeds to a series of post-processing steps for obtaining filtered results. Our classification model itself uses a language model pre-trained on PubMed text. The modular nature of our pipeline allows for ease of future developments in this area by substituting higher quality components at each stage of the pipeline. As a validation measure, we use ROBOKOP, an engine over a medical knowledge graph with only validated pathways, as a ground truth source for checking the existence of the proposed pairs. For the proposed pairs not found in ROBOKOP, we provide further verification using Chemotext. Results: We found 30.4% of our proposed pairs in the ROBOKOP database. For example, our model successfully identified that Omeprazole can help treat heartburn.We discuss the significance of this result, showing some examples of the proposed pairs. Discussion and Conclusion: The agreement of our results with the existing knowledge source indicates a step in the right direction. Given the plug-and-play nature of our framework, it is easy to add, remove, or modify parts to improve the model as necessary. We discuss the results showing some examples, and note that this is a potentially new line of research that has further scope to be explored. Although our approach was originally oriented on radio podcast transcripts, it is input-agnostic and could be applied to any source of textual data and to any problem of interest.}, journal={arXiv}, author={Yedida, R. and Abrar, S.M. and Melo-Filho, C. and Muratov, E. and Chirkova, R. and Tropsha, A.}, year={2020} } @article{saha_nagaraj_mathur_yedida_2019, title={Evolution of novel activation functions in neural network training with applications to classification of exoplanets}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85170132364&partnerID=MN8TOARS}, DOI={10.48550/arxiv.1906.01975}, journal={arXiv}, author={Saha, S. and Nagaraj, N. and Mathur, A. and Yedida, R.}, year={2019} } @article{yedida_saha_2019, title={LipschitzLR: Using theoretically computed adaptive learning rates for fast convergence}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85170970222&partnerID=MN8TOARS}, DOI={10.48550/arxiv.1902.07399}, abstractNote={Optimizing deep neural networks is largely thought to be an empirical process, requiring manual tuning of several hyper-parameters, such as learning rate, weight decay, and dropout rate. Arguably, the learning rate is the most important of these to tune, and this has gained more attention in recent works. In this paper, we propose a novel method to compute the learning rate for training deep neural networks with stochastic gradient descent. We first derive a theoretical framework to compute learning rates dynamically based on the Lipschitz constant of the loss function. We then extend this framework to other commonly used optimization algorithms, such as gradient descent with momentum and Adam. We run an extensive set of experiments that demonstrate the efficacy of our approach on popular architectures and datasets, and show that commonly used learning rates are an order of magnitude smaller than the ideal value.}, journal={arXiv}, author={Yedida, R. and Saha, S.}, year={2019} } @article{agrawal_yang_agrawal_yedida_shen_menzies_2019, title={Simpler hyperparameter optimization for software analytics: Why, how, when?}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85169982985&partnerID=MN8TOARS}, DOI={10.48550/arxiv.1912.04061}, journal={arXiv}, author={Agrawal, A. and Yang, X. and Agrawal, R. and Yedida, R. and Shen, X. and Menzies, T.}, year={2019} } @misc{yedida_2018, title={An Introduction to Data Analysis}, author={Yedida, R.}, year={2018} } @article{yedida_reddy_vahi_j._abhilash_kulkarni_2018, title={Employee attrition prediction}, url={http://www.scopus.com/inward/record.url?eid=2-s2.0-85095619264&partnerID=MN8TOARS}, journal={arXiv}, author={Yedida, R. and Reddy, R. and Vahi, R. and J., Rahul and Abhilash and Kulkarni, D.}, year={2018} } @misc{yedida_2018, title={How to design a Flappy Bird game}, author={Yedida, R.}, year={2018} } @misc{yedida_2018, title={Machine Learning}, author={Yedida, R.}, year={2018} } @misc{yedida_2017, title={Complexity Classes and NP-Completeness}, author={Yedida, R.}, year={2017} }