@article{huberman_reich_bondell_2022, title={Nonparametric conditional density estimation in a deep learning framework for short-term forecasting (May, 10.1007/s10651-021-00499-z, 2021)}, volume={8}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-022-00543-6}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Huberman, David B. and Reich, Brian J. and Bondell, Howard D.}, year={2022}, month={Aug} } @article{huberman_reich_bondell_2021, title={Nonparametric conditional density estimation in a deep learning framework for short-term forecasting}, volume={5}, ISSN={["1573-3009"]}, DOI={10.1007/s10651-021-00499-z}, abstractNote={Short-term forecasting is an important tool in understanding environmental processes. In this paper, we incorporate machine learning algorithms into a conditional distribution estimator for the purposes of forecasting tropical cyclone intensity. Many machine learning techniques give a single-point prediction of the conditional distribution of the target variable, which does not give a full accounting of the prediction variability. Conditional distribution estimation can provide extra insight on predicted response behavior, which could influence decision-making and policy. We propose a technique that simultaneously estimates the entire conditional distribution and flexibly allows for machine learning techniques to be incorporated. A smooth model is fit over both the target variable and covariates, and a logistic transformation is applied on the model output layer to produce an expression of the conditional density function. We provide two examples of machine learning models that can be used, polynomial regression and deep learning models. To achieve computational efficiency, we propose a case–control sampling approximation to the conditional distribution. A simulation study for four different data distributions highlights the effectiveness of our method compared to other machine learning-based conditional distribution estimation techniques. We then demonstrate the utility of our approach for forecasting purposes using tropical cyclone data from the Atlantic Seaboard. This paper gives a proof of concept for the promise of our method, further computational developments can fully unlock its insights in more complex forecasting and other applications.}, journal={ENVIRONMENTAL AND ECOLOGICAL STATISTICS}, author={Huberman, David B. and Reich, Brian J. and Bondell, Howard D.}, year={2021}, month={May} } @article{huberman_reich_pacifici_collazo_2020, title={Estimating the drivers of species distributions with opportunistic data using mediation analysis}, volume={11}, ISSN={["2150-8925"]}, url={https://doi.org/10.1002/ecs2.3165}, DOI={10.1002/ecs2.3165}, abstractNote={. Ecological occupancy modeling has historically relied on high-quality, low-quantity designed-survey data for estimation and prediction. In recent years, there has been a large increase in the amount of high-quantity, unknown-quality opportunistic data. This has motivated research on how best to combine these two data sources in order to optimize inference. Existing methods can be infeasible for large datasets or require opportunistic data to be located where designed-survey data exist. These methods map species occupancies, motivating a need to properly evaluate covariate effects (e.g., land cover proportion) on their distributions. We describe a spatial estimation method for supplementarily including additional opportunistic data using mediation analysis concepts. The opportunistic data mediate the effect of the covariate on the designed-survey data response, decomposing it into a direct and indirect effect. A component of the indirect effect can then be quickly estimated via regressing the mediator on the covariate, while the other components are estimated through a spatial occupancy model. The regression step allows for use of large quantities of opportunistic data that can be collected in locations with no designed-survey data available. Simulation results suggest that the mediated method produces an improvement in relative MSE when the data are of reasonable quality. However, when the simulated opportunistic data are poorly correlated with the true spatial process, the standard, unmediated method is still preferable. A spatiotemporal extension of the method is also developed for analyzing the effect of deciduous forest land cover on red-eyed vireo distribution in the southeastern United States and fi nd that including the opportunistic data do not lead to a substantial improvement. Opportunistic data quality remains an important consideration when employing this method, as with other data integration methods.}, number={6}, journal={ECOSPHERE}, publisher={Wiley}, author={Huberman, David B. and Reich, Brian J. and Pacifici, Krishna and Collazo, Jaime A.}, year={2020}, month={Jun} } @article{orgel_wojdyla_huberman_halperin_breithardt_singer_fox_hankey_mahaffey_jones_et al._2017, title={Noncentral Nervous System Systemic Embolism in Patients With Atrial Fibrillation Results From ROCKET AF (Rivaroxaban Once Daily, Oral, Direct Factor Xa Inhibition Compared With Vitamin K Antagonism for Prevention of Stroke and Embolism Trial in Atrial Fibrillation)}, volume={10}, ISSN={["1941-7713"]}, DOI={10.1161/circoutcomes.116.003520}, abstractNote={Atrial fibrillation (AF) is common and occurs in 2% to 4% of adults 60 years of age or older.1 Thromboembolic events, including stroke and noncentral nervous system (CNS) systemic embolism (SE), are common complications. Non-CNS SE accounts for ≈10% of all thromboembolic events2 and is important to identify because they are associated with high morbidity and mortality. Using data from ROCKET AF (Rivaroxaban Once Daily, Oral, Direct Factor Xa Inhibition Compared With Vitamin K Antagonism for Prevention of Stroke and Embolism Trial in Atrial Fibrillation; NCT00403767), we describe the incidence, location, diagnosis, treatment, and outcomes in patients with non-CNS SE. Baseline characteristics of patients with non-CNS SE are presented and discussed to identify those at increased risk of such an event. The design and methods of ROCKET AF have been described.3,4 In brief, it was a multicenter, randomized, double-blind, event-driven trial conducted at 1178 participating sites in 45 countries.3 Included patients had AF and were at moderate-to-high risk for stroke as defined by a CHADS2 score (Congestive Heart Failure, Hypertension, Age, Diabetes Mellitus, Stroke 2 Score) ≥2. A total of 14 264 patients were randomly assigned to receive fixed-dose rivaroxaban 20 mg daily (15 mg daily in patients with creatinine clearance 30–49 mL/min) or dose-adjusted warfarin (target international normalized ratio 2.0–3.0). Patients were intended to continue study drug throughout the trial unless discontinuation was clinically indicated (eg, safety concern, pregnancy, stroke or non-CNS SE, HIV diagnosis, abnormal liver function, creatinine clearance <25 mL/min on 2 consecutive measurements, or need for excluded medication).4 Non-CNS SE was defined as abrupt vascular insufficiency associated with clinical or radiological evidence of arterial occlusion in the absence of other likely mechanisms. In the presence of atherosclerotic peripheral artery disease (PAD), diagnosis of embolism to the lower extremities required …}, number={5}, journal={CIRCULATION-CARDIOVASCULAR QUALITY AND OUTCOMES}, author={Orgel, Ryan and Wojdyla, Daniel and Huberman, David and Halperin, Jonathan L. and Breithardt, Guenter and Singer, Daniel E. and Fox, Keith A. A. and Hankey, Graeme J. and Mahaffey, Kenneth W. and Jones, W. Schuyler and et al.}, year={2017}, month={May} }