@article{baldassarre_ernst_hermann_menzies_yedida_2023, title={(Re)Use of Research Results (Is Rampant)}, volume={66}, ISSN={["1557-7317"]}, DOI={10.1145/3554976}, abstractNote={Prior pessimism about reuse in software engineering research may have been a result of using the wrong methods to measure the wrong things.}, number={2}, journal={COMMUNICATIONS OF THE ACM}, author={Baldassarre, Maria Teresa and Ernst, Neil and Hermann, Ben and Menzies, Tim and Yedida, Rahul}, year={2023}, month={Feb}, pages={75–81} } @article{yedida_krishna_kalia_menzies_xiao_vukovic_2023, title={An expert system for redesigning software for cloud applications}, volume={219}, ISSN={["1873-6793"]}, DOI={10.1016/j.eswa.2023.119673}, abstractNote={Cloud-based software has many advantages. When services are divided into many independent components, they are easier to update. Also, during peak demand, it is easier to scale cloud services (just hire more CPUs). Hence, many organizations are partitioning their monolithic enterprise applications into cloud-based microservices. Recently there has been much work using machine learning to simplify this partitioning task. Despite much research, no single partitioning method can be recommended as generally useful. More specifically, those prior solutions are “brittle”; i.e. if they work well for one kind of goal in one dataset, then they can be sub-optimal if applied to many datasets and multiple goals. This work extends prior work and proposes DEEPLY to fix the brittleness problem. Specifically, we use (a) hyper-parameter optimization to sample from the Pareto frontier of configurations (b) a weighted loss to choose optimally from this Pareto frontier (c) the 1cycle learning rate policy to avoid local minima with Adam and (d) spectral clustering over k-means. Our work shows that DEEPLY outperforms other algorithms in this space across different metrics. Moreover, our ablation study reveals that of the changes, the weighted loss is the most important, followed by hyper-parameter optimization (contrary to prior belief). To enable the reuse of this research, DEEPLY is available on-line at .}, journal={EXPERT SYSTEMS WITH APPLICATIONS}, author={Yedida, Rahul and Krishna, Rahul and Kalia, Anup and Menzies, Tim and Xiao, Jin and Vukovic, Maja}, year={2023}, month={Jun} } @article{yedida_kang_tu_yang_lo_menzies_2023, title={How to Find Actionable Static Analysis Warnings: A Case Study With FindBugs}, volume={49}, ISSN={["1939-3520"]}, DOI={10.1109/TSE.2023.3234206}, abstractNote={Automatically generated static code warnings suffer from a large number of false alarms. Hence, developers only take action on a small percent of those warnings. To better predict which static code warnings should n ot be ignored, we suggest that analysts need to look deeper into their algorithms to find choices that better improve the particulars of their specific problem. Specifically, we show here that effective predictors of such warnings can be created by methods that l ocally adjust the decision boundary (between actionable warnings and others). These methods yield a new high water-mark for recognizing actionable static code warnings. For eight open-source Java projects (cassandra, jmeter, commons, lucene-solr, maven, ant, tomcat, derby) we achieve perfect test results on 4/8 datasets and, overall, a median AUC (area under the true negatives, true positives curve) of 92%.}, number={4}, journal={IEEE TRANSACTIONS ON SOFTWARE ENGINEERING}, author={Yedida, Rahul and Kang, Hong Jin and Tu, Huy and Yang, Xueqi and Lo, David and Menzies, Tim}, year={2023}, month={Apr}, pages={2856–2872} } @article{yedida_menzies_2022, title={How to Improve Deep Learning for Software Analytics (a case study with code smell detection)}, ISSN={["2160-1852"]}, DOI={10.1145/3524842.3528458}, abstractNote={To reduce technical debt and make code more maintainable, it is important to be able to warn programmers about code smells. State-of-the-art code small detectors use deep learners, usually without exploring alternatives. For example, one promising alternative is GHOST (from TSE'21) that relies on a combination of hyper-parameter optimization of feedforward neural networks and a novel oversampling technique.}, journal={2022 MINING SOFTWARE REPOSITORIES CONFERENCE (MSR 2022)}, author={Yedida, Rahul and Menzies, Tim}, year={2022}, pages={156–166} } @article{yedida_saha_2021, title={Beginning with machine learning: a comprehensive primer}, volume={7}, ISSN={["1951-6401"]}, DOI={10.1140/epjs/s11734-021-00209-7}, journal={EUROPEAN PHYSICAL JOURNAL-SPECIAL TOPICS}, author={Yedida, Rahul and Saha, Snehanshu}, year={2021}, month={Jul} } @book{baldassarre_ernst_hermann_menzies_yedida_2021, title={Crowdsourcing the State of the Art(ifacts}, number={2108.06821}, author={Baldassarre, M.T. and Ernst, N. and Hermann, B. and Menzies, T. and Yedida, R.}, year={2021} } @article{yedida_menzies_2021, title={Documenting Evidence of a Reuse of 'A Systematic Study of the Class Imbalance Problem in Convolutional Neural Networks'}, url={https://doi.org/10.1145/3468264.3477212}, DOI={10.1145/3468264.3477212}, abstractNote={We report here the reuse of oversampling, and modifications to the basic approach, used in a recent TSE ’21 paper by YedidaMenzies. The method reused is the oversampling technique studied by Buda et al. These methods were studied in the SE domain (specifically, for defect prediction), and extended by Yedida & Menzies.}, journal={PROCEEDINGS OF THE 29TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING (ESEC/FSE '21)}, publisher={ACM}, author={Yedida, Rahul and Menzies, Tim}, year={2021}, pages={1595–1595} } @article{yedida_menzies_2021, title={Documenting Evidence of a Reuse of 'On the Number of Linear Regions of Deep Neural Networks'}, url={https://doi.org/10.1145/3468264.3477213}, DOI={10.1145/3468264.3477213}, abstractNote={We report here the reuse of theoretical insights from deep learning literature, used in a recent TSE '21 paper by Yedida & Menzies. The artifact replicated is the lower bound on the number of piecewise linear regions in the decision boundary of a feedforward neural network with ReLU activations, as studied by Montufar et al. We document the reuse of Theorem 4 from Montufar et al. by Yedida & Menzies.}, journal={PROCEEDINGS OF THE 29TH ACM JOINT MEETING ON EUROPEAN SOFTWARE ENGINEERING CONFERENCE AND SYMPOSIUM ON THE FOUNDATIONS OF SOFTWARE ENGINEERING (ESEC/FSE '21)}, publisher={ACM}, author={Yedida, Rahul and Menzies, Tim}, year={2021}, pages={1596–1596} } @article{yang_chen_yedida_yu_menzies_2021, title={Learning to recognize actionable static code warnings (is intrinsically easy)}, volume={26}, ISSN={["1573-7616"]}, DOI={10.1007/s10664-021-09948-6}, abstractNote={Static code warning tools often generate warnings that programmers ignore. Such tools can be made more useful via data mining algorithms that select the “actionable” warnings; i.e. the warnings that are usually not ignored. In this paper, we look for actionable warnings within a sample of 5,675 actionable warnings seen in 31,058 static code warnings from FindBugs. We find that data mining algorithms can find actionable warnings with remarkable ease. Specifically, a range of data mining methods (deep learners, random forests, decision tree learners, and support vector machines) all achieved very good results (recalls and AUC(TRN, TPR) measures usually over 95% and false alarms usually under 5%). Given that all these learners succeeded so easily, it is appropriate to ask if there is something about this task that is inherently easy. We report that while our data sets have up to 58 raw features, those features can be approximated by less than two underlying dimensions. For such intrinsically simple data, many different kinds of learners can generate useful models with similar performance. Based on the above, we conclude that learning to recognize actionable static code warnings is easy, using a wide range of learning algorithms, since the underlying data is intrinsically simple. If we had to pick one particular learner for this task, we would suggest linear SVMs (since, at least in our sample, that learner ran relatively quickly and achieved the best median performance) and we would not recommend deep learning (since this data is intrinsically very simple).}, number={3}, journal={EMPIRICAL SOFTWARE ENGINEERING}, author={Yang, Xueqi and Chen, Jianfeng and Yedida, Rahul and Yu, Zhe and Menzies, Tim}, year={2021}, month={May} } @inproceedings{yedida_krishna_kalia_menzies_xiao_vukovic_2021, place={New York}, title={Lessons learned from hyper-parameter tuning for microservice candidate identi cation}, booktitle={Proceedings of the thirty-sixth IEEE/ACM International Conference on Automated Software Engineering (ASE)}, publisher={Association for Computing Machinery}, author={Yedida, R. and Krishna, R. and Kalia, A. and Menzies, T. and Xiao, J. and Vukovic, M.}, year={2021} } @article{yedida_saha_prashanth_2021, title={LipschitzLR: Using theoretically computed adaptive learning rates for fast convergence}, volume={51}, ISSN={["1573-7497"]}, url={https://doi.org/10.1007/s10489-020-01892-0}, DOI={10.1007/s10489-020-01892-0}, abstractNote={We present a novel theoretical framework for computing large, adaptive learning rates. Our framework makes minimal assumptions on the activations used and exploits the functional properties of the loss function. Specifically, we show that the inverse of the Lipschitz constant of the loss function is an ideal learning rate. We analytically compute formulas for the Lipschitz constant of several loss functions, and through extensive experimentation, demonstrate the strength of our approach using several architectures and datasets. In addition, we detail the computation of learning rates when other optimizers, namely, SGD with momentum, RMSprop, and Adam, are used. Compared to standard choices of learning rates, our approach converges faster, and yields better results.}, number={3}, journal={APPLIED INTELLIGENCE}, publisher={Springer Science and Business Media LLC}, author={Yedida, Rahul and Saha, Snehanshu and Prashanth, Tejas}, year={2021}, month={Mar}, pages={1460–1478} } @article{yedida_menzies_2021, title={On the Value of Oversampling for Deep Learning in Software Defect Prediction}, volume={48}, ISSN={0098-5589 1939-3520 2326-3881}, url={http://dx.doi.org/10.1109/TSE.2021.3079841}, DOI={10.1109/TSE.2021.3079841}, abstractNote={One truism of deep learning is that the automatic feature engineering (seen in the first layers of those networks) excuses data scientists from performing tedious manual feature engineering prior to running DL. For the specific case of deep learning for defect prediction, we show that that truism is false. Specifically, when we pre-process data with a novel oversampling technique called fuzzy sampling, as part of a larger pipeline called GHOST (Goal-oriented Hyper-parameter Optimization for Scalable Training), then we can do significantly better than the prior DL state of the art in 14/20 defect data sets. Our approach yields state-of-the-art results significantly faster deep learners. These results present a cogent case for the use of oversampling prior to applying deep learning on software defect prediction datasets.}, number={8}, journal={IEEE Transactions on Software Engineering}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Yedida, Rahul and Menzies, Tim}, year={2021}, pages={1–1} } @article{agrawal_yang_agrawal_yedida_shen_menzies_2021, title={Simpler Hyperparameter Optimization for Software Analytics: Why, How, When}, volume={48}, ISSN={0098-5589 1939-3520 2326-3881}, url={http://dx.doi.org/10.1109/TSE.2021.3073242}, DOI={10.1109/TSE.2021.3073242}, abstractNote={How can we make software analytics simpler and faster? One method is to match the complexity of analysis to the intrinsic complexity of the data being explored. For example, hyperparameter optimizers find the control settings for data miners that improve the predictions generated via software analytics. Sometimes, very fast hyperparameter optimization can be achieved by "DODGE-ing"; i.e. simply steering way from settings that lead to similar conclusions. But when is it wise to use that simple approach and when must we use more complex (and much slower) optimizers?} To answer this, we applied hyperparameter optimization to 120 SE data sets that explored bad smell detection, predicting Github issue close time, bug report analysis, defect prediction, and dozens of other non-SE problems. We find that the simple DODGE works best for data sets with low "intrinsic dimensionality" (u ~ 3) and very poorly for higher-dimensional data (u > 8). Nearly all the SE data seen here was intrinsically low-dimensional, indicating that DODGE is applicable for many SE analytics tasks.}, number={8}, journal={IEEE Transactions on Software Engineering}, publisher={Institute of Electrical and Electronics Engineers (IEEE)}, author={Agrawal, Amritanshu and Yang, Xueqi and Agrawal, Rishabh and Yedida, Rahul and Shen, Xipeng and Menzies, Tim}, year={2021}, pages={1–1} } @book{yedida_yang_menzies_2021, title={When SIMPLE is better than complex: A case study on deep learning for predicting Bugzilla issue close time}, number={2101.06319}, author={Yedida, R. and Yang, X. and Menzies, T.}, year={2021} } @misc{saha_nagaraj_mathur_yedida_sneha_2020, title={Evolution of novel activation functions in neural network training for astronomy data: habitability classification of exoplanets}, volume={229}, ISSN={["1951-6401"]}, url={https://doi.org/10.1140/epjst/e2020-000098-9}, DOI={10.1140/epjst/e2020-000098-9}, abstractNote={Quantification of habitability is a complex task. Previous attempts at measuring habitability are well documented. Classification of exoplanets, on the other hand, is a different approach and depends on quality of training data available in habitable exoplanet catalogs. Classification is the task of predicting labels of newly discovered planets based on available class labels in the catalog. We present analytical exploration of novel activation functions as consequence of integration of several ideas leading to implementation and subsequent use in habitability classification of exoplanets. Neural networks, although a powerful engine in supervised methods, often require expensive tuning efforts for optimized performance. Habitability classes are hard to discriminate, especially when attributes used as hard markers of separation are removed from the data set. The solution is approached from the point of investigating analytical properties of the proposed activation functions. The theory of ordinary differential equations and fixed point are exploited to justify the “lack of tuning efforts” to achieve optimal performance compared to traditional activation functions. Additionally, the relationship between the proposed activation functions and the more popular ones is established through extensive analytical and empirical evidence. Finally, the activation functions have been implemented in plain vanilla feed-forward neural network to classify exoplanets. The mathematical exercise supplements the grand idea of classifying exoplanets, computing habitability scores/indices and automatic grouping of the exoplanets converging at some level.}, number={16}, journal={EUROPEAN PHYSICAL JOURNAL-SPECIAL TOPICS}, publisher={Springer Science and Business Media LLC}, author={Saha, Snehanshu and Nagaraj, Nithin and Mathur, Archana and Yedida, Rahul and Sneha, H. R.}, year={2020}, month={Nov}, pages={2629–2738} } @inbook{khaidem_yedida_theophilus_2020, series={Communications in Computer and Information Science}, title={Optimizing Inter-nationality of Journals: A Classical Gradient Approach Revisited via Swarm Intelligence}, ISBN={9789813364622 9789813364639}, ISSN={1865-0929 1865-0937}, url={http://dx.doi.org/10.1007/978-981-33-6463-9_1}, DOI={10.1007/978-981-33-6463-9_1}, abstractNote={With the growth of a vast number of new journals, the de facto definitions of Internationality has raised debate across researchers. A robust set of metrics, not prone to manipulation, is paramount for evaluating influence when journals claim “International” status. The ScientoBASE project defines internationality in terms of publication quality and spread of influence beyond geographical boundaries. This is acheived through quantified metrics, like the NLIQ, OCQ, SNIP and ICR, passed into the Cobb Douglas Production Function to estimate the range of influence a journal has over its audience. The global optima of this range is the maximum projected internationality score, or the internationality index of the journal. The optimization, however, being multivariate and constrained presents several challenges to classical techniques, such as curvature variation, premature convergence and parameter scaling. This study approaches these issues by optimizing through the Swarm Intelligence meta-heuristic. Particle Swarm Optimization makes no assumptions on the function being optimized and does away with the need to calculate a gradient. These advantages circumvent the aforementioned issues and highlight the need for traction on machine learning in optimization. The model presented here observes that each journal has an associated globally optimal internationality score that fluctuates proportionally to input metrics, thereby describing a robust confluence of key influence indicators that pave way for investigating alternative criteria for attributing credits to publications.}, booktitle={Modeling, Machine Learning and Astronomy}, publisher={Springer Singapore}, author={Khaidem, Luckyson and Yedida, Rahul and Theophilus, Abhijit J.}, year={2020}, pages={3–14}, collection={Communications in Computer and Information Science} } @inproceedings{sridhar_saha_shaikh_yedida_saha_2020, title={Parsimonious Computing: A Minority Training Regime for Effective Prediction in Large Microarray Expression Data Sets}, ISBN={9781728169262}, url={http://dx.doi.org/10.1109/ijcnn48605.2020.9207083}, DOI={10.1109/ijcnn48605.2020.9207083}, abstractNote={Rigorous mathematical investigation of learning rates used in back-propagation in shallow neural networks has become a necessity. This is because experimental evidence needs to be endorsed by a theoretical background. Such theory may be helpful in reducing the volume of experimental effort to accomplish desired results. We leveraged the functional property of Mean Square Error, which is Lipschitz continuous to compute learning rate in shallow neural networks. We claim that our approach reduces tuning efforts, especially when a significant corpus of data has to be handled. We achieve remarkable improvement in saving computational cost while surpassing prediction accuracy reported in literature. The learning rate, proposed here, is the inverse of the Lipschitz constant. The work results in a novel method for carrying out gene expression inference on large microarray data sets with a shallow architecture constrained by limited computing resources. A combination of random sub-sampling of the dataset, an adaptive Lipschitz constant inspired learning rate and a new activation function, A-ReLU helped accomplish the results reported in the paper.}, booktitle={2020 International Joint Conference on Neural Networks (IJCNN)}, publisher={IEEE}, author={Sridhar, Shailesh and Saha, Snehanshu and Shaikh, Azhar and Yedida, Rahul and Saha, Sriparna}, year={2020}, month={Jul}, pages={1–8} } @book{yedida_abrar_melo-filho_muratov_chirkova_tropsha_2020, title={Text Mining to Identify and Extract Novel Disease Treatments From Unstructured Datasets}, number={2011.07959}, author={Yedida, R. and Abrar, S.M. and Melo-Filho, C. and Muratov, E. and Chirkova, R. and Tropsha, A.}, year={2020} } @misc{yedida_2018, title={An Introduction to Data Analysis}, author={Yedida, R.}, year={2018} } @misc{yedida_2018, title={How to design a Flappy Bird game}, author={Yedida, R.}, year={2018} } @misc{yedida_2018, title={Machine Learning}, author={Yedida, R.}, year={2018} } @misc{yedida_2017, title={Complexity Classes and NP-Completeness}, author={Yedida, R.}, year={2017} }