<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Med.</journal-id>
<journal-title>Frontiers in Medicine</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Med.</abbrev-journal-title>
<issn pub-type="epub">2296-858X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fmed.2025.1644287</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Medicine</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Modeling the impact of social determinants on breast cancer screening: a data-driven approach</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Ma</surname>
<given-names>Guofang</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn0001"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3082849/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Scully</surname>
<given-names>Miranda G.</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="author-notes" rid="fn0001"><sup>&#x2020;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3139019/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Luo</surname>
<given-names>Jiahui</given-names>
</name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3114357/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Feng</surname>
<given-names>Jiazuo H.</given-names>
</name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gunn</surname>
<given-names>Christine M.</given-names>
</name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>diFlorio-Alexander</surname>
<given-names>Roberta M.</given-names>
</name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2892908/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tosteson</surname>
<given-names>Anna N. A.</given-names>
</name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Kraft</surname>
<given-names>Sally A.</given-names>
</name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
<xref ref-type="aff" rid="aff7"><sup>7</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3112742/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Marrero</surname>
<given-names>Wesley J.</given-names>
</name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2961235/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Department of Biomedical Data Science, Geisel School of Medicine at Dartmouth</institution>, <addr-line>Lebanon, NH</addr-line>, <country>United States</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Computer Science, Dartmouth College</institution>, <addr-line>Hanover, NH</addr-line>, <country>United States</country></aff>
<aff id="aff3"><sup>3</sup><institution>Thayer School of Engineering, Dartmouth College</institution>, <addr-line>Hanover, NH</addr-line>, <country>United States</country></aff>
<aff id="aff4"><sup>4</sup><institution>Dartmouth Cancer Center, Dartmouth Hitchcock Medical Center</institution>, <addr-line>Lebanon, NH</addr-line>, <country>United States</country></aff>
<aff id="aff5"><sup>5</sup><institution>Department of Medicine, Geisel School of Medicine at Dartmouth</institution>, <addr-line>Lebanon, NH</addr-line>, <country>United States</country></aff>
<aff id="aff6"><sup>6</sup><institution>The Dartmouth Institute for Health Policy and Clinical Practice</institution>, <addr-line>Lebanon, NH</addr-line>, <country>United States</country></aff>
<aff id="aff7"><sup>7</sup><institution>Population Health, Dartmouth Health</institution>, <addr-line>Lebanon, NH</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0002">
<p>Edited by: Aimee Campbell, Columbia University, United States</p>
</fn>
<fn fn-type="edited-by" id="fn0003">
<p>Reviewed by: William Ian Duncombe Rae, University of New South Wales, Australia</p>
<p>Rebecca S. Koszalinski, University of Central Florida, United States</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Wesley J. Marrero, <email>wesley.marrero@dartmouth.edu</email></corresp>
<fn fn-type="equal" id="fn0001"><p><sup>&#x2020;</sup>These authors have contributed equally to this work and share first authorship</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>20</day>
<month>08</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>12</volume>
<elocation-id>1644287</elocation-id>
<history>
<date date-type="received">
<day>10</day>
<month>06</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>07</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2025 Ma, Scully, Luo, Feng, Gunn, diFlorio-Alexander, Tosteson, Kraft and Marrero.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Ma, Scully, Luo, Feng, Gunn, diFlorio-Alexander, Tosteson, Kraft and Marrero</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec id="sec1">
<title>Background</title>
<p>This study addresses the critical science challenge of operationalizing social determinants of health (SDoH) in clinical practice. We develop and validate models demonstrating how SDoH predicts mammogram screening behavior within a rural population. Our work provides healthcare systems with an evidence-based framework for translating SDoH data into effective interventions.</p>
</sec>
<sec id="sec2">
<title>Methods</title>
<p>We model the relationship between SDoH and breast cancer screening adherence using data from over 63,000 patients with established primary care relationships within the Dartmouth Health System, an academic health system serving northern New England through seven hospitals and affiliated ambulatory clinics. Our analytical framework integrates multiple machine learning techniques including light gradient boosting machine, random forest, elastic-net logistic regression, Bayesian regression, and decision tree classifier with SDoH questionnaire responses, demographic information, geographic indicators, insurance status, and clinical measures to quantify and characterize the influence of SDoH on mammogram scheduling and attendance.</p>
</sec>
<sec id="sec3">
<title>Results</title>
<p>Our models achieve moderate discriminative performance in predicting screening behaviors, with an average Area Under the Receiver Operating Characteristic Curve (ROC AUC) of 71% for scheduling and 70% for attendance in validation datasets. Key social factors influencing screening behaviors include geographic accessibility measured by the Rural&#x2013;Urban Commuting Area, neighborhood socioeconomic status captured by the Area Deprivation Index, and healthcare access factors related to clinical sites. Additional influential variables include months since the last mammogram, current age, and the Charlson Comorbidity Score, which intersect with social factors influencing healthcare utilization. By systematically modeling these SDoH and related factors, we identify opportunities for healthcare organizations to transform SDoH data into targeted, facility-level intervention strategies while adapting to payer incentives and addressing screening disparities.</p>
</sec>
<sec id="sec4">
<title>Conclusion</title>
<p>Our model provides healthcare systems with a data-driven approach to understanding and addressing how SDoH shape mammogram screening behaviors, particularly among rural populations. This framework offers valuable guidance for healthcare providers to better understand and improve patients&#x2019; screening behaviors through targeted, evidence-based interventions.</p>
</sec>
</abstract>
<kwd-group>
<kwd>predictive modeling</kwd>
<kwd>machine learning</kwd>
<kwd>cancer screening</kwd>
<kwd>implementation science</kwd>
<kwd>breast cancer</kwd>
</kwd-group>
<counts>
<fig-count count="3"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="39"/>
<page-count count="11"/>
<word-count count="7434"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Healthcare Professions Education</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec5">
<label>1</label>
<title>Introduction</title>
<p>The integration of social determinants of health (SDoH) into clinical practice had emerged as a vital frontier in healthcare delivery transformation. Healthcare systems increasingly recognized that addressing SDoH can significantly impact health outcomes and costs (<xref ref-type="bibr" rid="ref1">1</xref>). Recent evidence demonstrated that higher SDoH needs correlate with greater expenses across both commercial and public insurance systems (<xref ref-type="bibr" rid="ref1">1</xref>). This recognition highlighted the need for financial incentives for healthcare organizations to incorporate SDoH data into their clinical workflows and decision-making processes (<xref ref-type="bibr" rid="ref2">2</xref>). Within this evolving landscape, breast cancer screening provided an ideal context for examining SDoH integration, as mammography represented a preventive service with well-documented benefits (<xref ref-type="bibr" rid="ref3">3</xref>). However, despite being an effective early detection tool for breast cancer, the second leading cause of cancer-related deaths among women globally, mammography screening rates consistently fell below national targets (<xref ref-type="bibr" rid="ref4">4</xref>). While clinical effectiveness and established guidelines provided strong evidence for mammography benefits, achieving optimal screening rates required addressing complex social, economic, and organizational factors that influenced patient access and engagement (<xref ref-type="bibr" rid="ref5">5</xref>, <xref ref-type="bibr" rid="ref6">6</xref>). This gap between evidence-based recommendations and clinical practice, including delayed scheduling and variations in screening accessibility across healthcare settings, reflects underlying barriers that extend beyond clinical factors alone (<xref ref-type="bibr" rid="ref7">7</xref>).</p>
<p>Various obstacles to breast cancer screening adherence have been were documented in the literature, including socioeconomic challenges (<xref ref-type="bibr" rid="ref5">5</xref>, <xref ref-type="bibr" rid="ref6">6</xref>), insurance status (<xref ref-type="bibr" rid="ref5">5</xref>), geographic accessibility (<xref ref-type="bibr" rid="ref5">5</xref>, <xref ref-type="bibr" rid="ref8">8</xref>), transportation limitations (<xref ref-type="bibr" rid="ref8">8</xref>), cultural beliefs (<xref ref-type="bibr" rid="ref9">9</xref>, <xref ref-type="bibr" rid="ref10">10</xref>), health literacy levels (<xref ref-type="bibr" rid="ref6">6</xref>), and provider communication effectiveness (<xref ref-type="bibr" rid="ref9">9</xref>). While these studies provided valuable evidence, they varied in methodological approach from large scale systematic reviews (<xref ref-type="bibr" rid="ref5">5</xref>) to smaller qualitative investigations (<xref ref-type="bibr" rid="ref9">9</xref>, <xref ref-type="bibr" rid="ref10">10</xref>) with corresponding differences in generalizability and depth of insights. This diversity in methodological approaches across the broader literature made it challenging to develop unified frameworks for understanding how multiple social determinants simultaneously influenced screening behaviors. Collectively, they illustrated how personal, social, and systemic factors could intertwine to create complex patterns of healthcare utilization and screening behaviors (<xref ref-type="bibr" rid="ref11">11</xref>). Understanding these patterns required recognizing that social determinants do not operate in isolation but rather formed inter-connected networks of influence that shaped individual health decisions.</p>
<p>While the relationships between SDoH and screening behaviors were well-documented, operationalizing SDoH data to improve screening outcomes still presented significant methodological challenges (<xref ref-type="bibr" rid="ref12 ref13 ref14">12&#x2013;14</xref>). The intricate connections between various social determinants and their variable impacts on clinical outcomes demanded sophisticated analytical approaches beyond traditional methods. Qualitative research had provided valuable foundations for identifying the multi-faceted nature of social factors influencing screening behaviors. For example, prior work had explored how economic stability and healthcare access barriers shaped lung cancer screening decisions among Latino communities (<xref ref-type="bibr" rid="ref15">15</xref>), how health system organizational factors created barriers to implementing social needs screening in primary care settings (<xref ref-type="bibr" rid="ref16">16</xref>), and how geographic and socioeconomic factors influenced cancer care trajectories and access to treatment (<xref ref-type="bibr" rid="ref17">17</xref>). However, these qualitative studies are inherently limited in their ability to analyze complex interactions among these factors at scale. Qualitative approaches, while providing rich contextual insights, typically examined small sample sizes that limit statistical power for detecting interaction effects between multiple social determinants. Additionally, the context-specific nature of qualitative research findings often limited their transferability across different healthcare settings and patient populations, making it difficult to establish generalizable relationships between social factors and screening behaviors.</p>
<p>These limitations underscored the need for analytical approaches that can handle large datasets and complex variable interactions. Machine learning approaches offered promising solutions to this complexity, enabling healthcare systems to analyze patterns within SDoH data and develop targeted interventions. These analytical techniques could identify subtle relationships across multiple social determinants simultaneously, which helped to reveal insights that might remain obscured using conventional methods. When healthcare systems could identify which combinations of social factors most strongly predict screening barriers, they could more effectively allocate resources and tailor interventions to the patients who would benefit most. By applying machine learning to SDoH data in the context of breast cancer screening, healthcare organizations could potentially develop personalized approaches to improving screening rates and meet their adherence targets.</p>
<p>In striving toward operationalizing SDoH data and overcoming the limitations of traditional analytical approaches, our study presented an integrated approach to predicting breast cancer screening behaviors. We first developed a generalizable framework for modeling the relationships between social determinants and mammogram scheduling and attendance, providing a structured approach to quantifying these complex influences. We then applied machine learning techniques to transform SDoH data into actionable insights that healthcare systems could use to improve mammogram adherence rates. Through this integrated approach, we aimed to create an evidence-informed methodology for leveraging SDoH data to enhance breast cancer screening outcomes while providing a replicable model that organizations could adapt for other preventive services and health outcomes. This work contributed to the implementation science pipeline, the process of moving research discoveries into routine healthcare practice (<xref ref-type="bibr" rid="ref18">18</xref>), by providing healthcare systems with quantitative tools to systematically translate SDoH data into actionable screening interventions.</p>
</sec>
<sec sec-type="materials|methods" id="sec6">
<label>2</label>
<title>Materials and methods</title>
<sec id="sec7">
<label>2.1</label>
<title>General framework for SDoH analysis in mammogram screening behavior</title>
<p>Our generalizable framework included the following steps: data pre-processing and variable construction, model selection and implementation, performance evaluation, and model explainability analysis. We detailed these steps and presented their execution for predicting the probability of mammogram screening behaviors, including both scheduling and attendance.</p>
<p>The comprehensive methodological detailed for each framework component, including specific algorithms, parameter settings, and validation procedures, were provided in <xref ref-type="sec" rid="sec34">Supplementary Methods M1&#x2013;M4</xref>. While this detailed framework was designed for broader application across healthcare systems, we demonstrated its implementation through a specific case study within the Dartmouth Health System.</p>
</sec>
<sec id="sec8">
<label>2.2</label>
<title>Case study: the Dartmouth health system</title>
<p>While the framework was designed to be generalizable across different healthcare systems, we applied it specifically to the Dartmouth Health System to demonstrate its practical utility and effectiveness in a real-world setting. Dartmouth Health was an academic health system serving patients across northern New England and nearby communities through seven community hospitals, affiliated ambulatory clinics, and the academic facility Dartmouth Hitchcock Medical Center (<xref ref-type="bibr" rid="ref19">19</xref>). The system encompassed facilities across Vermont and New Hampshire and utilized an integrated Epic electronic health record (EHR) system that enabled standardized data collection across most clinical sites (<xref ref-type="bibr" rid="ref19">19</xref>). With over 16,000 employees including 2,300 providers, the system delivered approximately 3 million outpatient visits annually and was recognized as a nationwide leader in rural health (<xref ref-type="bibr" rid="ref19">19</xref>). This application allowed us to assess the framework&#x2019;s ability to generate actionable insights within a defined healthcare context before broader implementation in diverse healthcare environments.</p>
<sec id="sec9">
<label>2.2.1</label>
<title>Framework overview for operationalizing SDoH</title>
<p><xref ref-type="fig" rid="fig1">Figure 1</xref> provided a visual representation of our analytical framework for operationalizing SDoH in breast cancer screening programs. This framework, as detailed in Section 2.1, offered a structured approach to integrating diverse healthcare data sources, implementing appropriate machine learning models, validating predictive performance, generating explainable insights, and translating findings into future intervention strategies. Building upon the methodological foundation established by previous work on breast cancer screening prediction models (<xref ref-type="bibr" rid="ref20">20</xref>), we compared the performance of multiple machine learning techniques. These techniques provided analytical strengths while maintaining interpretability for healthcare practitioners.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Framework overview for operationalizing SDoH in the Dartmouth health system.</p>
</caption>
<graphic xlink:href="fmed-12-1644287-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart of a system architecture for mammogram scheduling and attendance. It includes data sources like mammogram scheduling, clinical risk factors, and demographics. Data wrangling involves integration and variable processing. There are separate model selections for scheduling and attendance using techniques such as logistic regression and random forest. Nested cross-validation employs strategies like ROSE oversampling. Explainability analysis utilizes feature importance and dependency analysis. The outcome informs future intervention strategies for high-risk patients through texts, messages, and phone calls.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec10">
<label>2.2.2</label>
<title>Study design and data sources</title>
<p>Our study built upon data from the Dartmouth Health Cancer Screening Outreach Program to develop a framework for operationalizing SDoH into breast cancer scheduling practice. This program operated within Dartmouth Health&#x2019;s network, which spanned primary and specialty care services throughout New Hampshire and Vermont. A notable characteristic of the Dartmouth Health dataset was its relatively limited racial and ethnic diversity, reflecting the demographic composition of northern New England. This relative homogeneity created a more controlled environment for analyzing other social determinants affecting predominantly rural populations, though it might restrict the model&#x2019;s ability to capture certain disparities related to race and ethnicity.</p>
<p>The study integrated three primary data sources: (1) patient information from individuals with established primary care relationships (<italic>n</italic>&#x202F;=&#x202F;63,537), defined as those who received their usual primary care with Dartmouth Health and had had at least one visit with a Dartmouth Health primary care provider in the previous 3&#x202F;years (medically-homed patients); (2) SDoH questionnaire responses (<italic>n</italic>&#x202F;=&#x202F;18,359) capturing various dimensions of patient health related social needs; and (3) clinical risk assessment scores from the Epic electronic health record systems, including the Charlson Comorbidity Index (<xref ref-type="bibr" rid="ref21">21</xref>) and General Adult Risk Scores (<xref ref-type="bibr" rid="ref22">22</xref>), which provided standardized measures of patient health status and comorbidities.</p>
<p>For demographic variables from the patient information data source, we consolidated 66 language preferences into two groups (English and Others) given that English represented 98% of the population. The original dataset contained eight racial categories (White, American Indian, Asian, Black or African American, Choose not to Disclose, Multi-Racial, Native Hawaiian/Other Pacific Islander, and Unknown). Due to small sample sizes in several subcategories, we consolidated these into five major groups: White, Asian, Black, Hispanic, and Other.</p>
<p>SDoH data were collected using Dartmouth Health&#x2019;s standardized Adult Screener questionnaire embedded in Epic. Patients&#x2019; self-reported answers were captured through the screener administered via the MyDH patient portal or during clinical visits. The screening tool assessed 37 social determinant domains including housing stability, food security, transportation access, social isolation, financial strain, employment status, and healthcare access barriers (<xref ref-type="sec" rid="sec34">Supplementary Tables S3, S5</xref>). While Epic&#x2019;s SDoH screening modules had demonstrated implementation feasibility in clinical settings (<xref ref-type="bibr" rid="ref23">23</xref>), formal psychometric validation data for the complete screening instrument had not been published. Our primary outcome measure was mammogram scheduling status and attendance status, which served as our indicators of patient engagement with breast cancer screening.</p>
</sec>
<sec id="sec11">
<label>2.2.3</label>
<title>Study population and inclusion criteria</title>
<p>The study population encompassed female patients aged 50&#x2013;75&#x202F;years receiving active care at Dartmouth Health primary care clinics, with active care defined as having completed a primary care visit at the health system within the previous 3&#x202F;years. To maintain focus on adherence to standard Dartmouth Health breast cancer screening schedules for women with average risk, we excluded patients with breast cancer history or elevated risk factors that would necessitate different scheduling protocols. To ensure consistent screening practices across study sites, we also excluded two clinical sites that utilized different appointment scheduling protocols from the standard Dartmouth Health approach. While these sites demonstrated higher adherence rates due to automatic scheduling, their inclusion would have confounded our analysis of standard care patterns by introducing scheduling protocol variability.</p>
</sec>
<sec id="sec12">
<label>2.2.4</label>
<title>Model validation</title>
<p>To ensure the external validity of our findings, we employed our models on a hold-out test set (20% of the data) that was not used during model development or hyperparameter tuning. This approach provided an unbiased assessment of model generalizability to new patients within the Dartmouth Health System. We applied consistent performance evaluation metrics between our development and test phases, allowing us to directly compare predictive capabilities and quantify how effectively our models can identify screening patterns in previously unseen data. This evaluation on independent data helped determine whether the relationships identified during model training remained stable when applied in new contexts, providing healthcare systems with confidence that the implementation insights generated by our models would be reliable and actionable in clinical settings.</p>
</sec>
</sec>
<sec id="sec13">
<label>2.3</label>
<title>Sensitivity analyses and secondary analyses</title>
<p>To assess the robustness of our findings to different analytical assumptions, we conducted sensitivity analyses focusing on missing data handling approaches. Specifically, we performed complete case analyses using only patients with complete SDoH questionnaire data as sensitivity checks for our primary imputation-based approach. These analyses used identical modeling frameworks and performance evaluation metrics as described in Section 2.1 to ensure comparability with our primary results.</p>
<p>Additionally, we conducted comprehensive secondary analyses to provide deeper insights into factors influencing mammogram screening behaviors, including age-stratified evaluations, SDoH-only models, patient-level models, and clinic-level analyses. Detailed methodologies and results for all secondary analyses are presented in <xref ref-type="sec" rid="sec34">Supplementary materials S1.1&#x2013;S1.5</xref> (Scheduling analyses) and <xref ref-type="sec" rid="sec34">Supplementary materials S2.1&#x2013;S2.5</xref> (Attendance analyses).</p>
</sec>
</sec>
<sec sec-type="results" id="sec14">
<label>3</label>
<title>Results</title>
<sec id="sec15">
<label>3.1</label>
<title>Data structure and missingness</title>
<p>Our analysis of SDoH questionnaire data revealed substantial variation in response completeness across the 37 administered questions. Missingness rates ranged from 10.2 to 92.4%, with a median missingness of 73.8% across all questions (<xref ref-type="sec" rid="sec34">Supplementary Table S1</xref>). This evaluation identified 11 questions that exceeded our pre-established 80% missingness threshold, which were subsequently excluded from model development to ensure implementation reliability. The excluded questions primarily addressed sensitive domains such as mental health status, substance use behaviors, and detailed information regarding past scheduling experiences.</p>
<p>Examination of the dataset revealed distinct patterns in both scheduling and attendance behaviors. Scheduling rates, calculated as the proportion of all eligible women aged 50&#x2013;75 who had a mammogram scheduled, showed substantial variation across clinical sites (4.4&#x2013;21.3%), insurance types (Medicare: 16%; Commercial: 12.9%), age groups, and neighborhood deprivation levels. For attendance, missed appointment rates varied by clinical site (1.9&#x2013;9.1%), insurance status (Medicaid Managed: 13%; Blue Cross: 4%), and racial demographics (Asian: 0.8%; Hispanic: 8.1%) (<xref ref-type="sec" rid="sec34">Supplementary Table S4</xref>). We found a linear relationship between neighborhood deprivation and missed appointments (ADI 1: 1.6%; ADI 10: 11.4%) and higher attendance in urban areas compared to rural settings. SDoH questionnaire responses indicated that housing instability (multi-residence: 12.4% vs. single-residence: 4.8% missed appointments), transportation barriers (unable to work due to transportation: 18.8% vs. no barriers: 4.9%), food insecurity (often: 14.3% vs. never: 4.9%), and health literacy challenges were associated with lower scheduling rates and higher missed appointment rates (<xref ref-type="sec" rid="sec34">Supplementary Tables S2, S3, S5</xref>).</p>
</sec>
<sec id="sec16">
<label>3.2</label>
<title>Analytical framework performance</title>
<sec id="sec17">
<label>3.2.1</label>
<title>Scheduling model performance</title>
<p>The light gradient boosting model demonstrated a moderate average out-of-sample performance in our cross-validation scheme (AUC&#x202F;=&#x202F;0.709), followed by random forest (AUC&#x202F;=&#x202F;0.702) and elastic-net logistic regression (AUC&#x202F;=&#x202F;0.608). Both tree-based models significantly outperformed the logistic regression approach, with the light gradient boosting model showing a statistical advantage over logistic regression (AUC difference&#x202F;=&#x202F;0.050, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001) and random forest similarly demonstrating superior performance compared to logistic regression (AUC difference&#x202F;=&#x202F;0.05, <italic>p</italic>&#x202F;&#x003C;&#x202F;0.001). The difference between gradient boosting and random forest models was minimal (AUC difference&#x202F;=&#x202F;0.0001) and not statistically significant (<italic>p</italic>&#x202F;=&#x202F;0.972), confirming that both tree-based approaches had comparable predictive power for this scheduling behavior prediction.</p>
<p>The gradient boosting model, our best-performing approach, showed strong consistency across validation scenarios. The model&#x2019;s performance ranged from 0.707 (worst AUC on validation sets) to 0.711 (best AUC on validation sets), indicating stable predictive performance. Our AUC on the held-out test set, which predicts model performance on unseen data, also achieved a relatively similar AUC of 0.67. This stability was particularly important for healthcare systems implementing SDOH-informed scheduling programs across diverse communities.</p>
</sec>
<sec id="sec18">
<label>3.2.2</label>
<title>Attendance model performance</title>
<p>For attendance prediction, we used an identical approach to compare three machine learning models: Bayesian regression (AUC&#x202F;=&#x202F;0.702), elastic-net logistic regression (AUC&#x202F;=&#x202F;0.699), and decision tree classifier (AUC&#x202F;=&#x202F;0.666).</p>
<p>Delong&#x2019;s test showed that these three models performed comparably (AUC difference Bayes-Log: 0.004, <italic>p</italic>&#x202F;&#x003E;&#x202F;0.05) (AUC difference bayes-tree: 0.00423, <italic>p</italic>&#x202F;&#x003E;&#x202F;0.05) (AUC difference log-tree: 0.00323, <italic>p</italic>&#x202F;&#x003E;&#x202F;0.05). Given this comparable performance, we selected logistic regression as our final model for its computational simplicity and independence from prior assumptions. This selected model demonstrated moderate consistency across validation datasets. Performance ranged from AUC&#x202F;=&#x202F;0.6531 to AUC&#x202F;=&#x202F;0.7851, with an average validation AUC of 0.7282. When evaluated on the held-out test set, the model maintained robust performance (AUC&#x202F;=&#x202F;0.699), which showed somewhat consistent predictive power.</p>
</sec>
<sec id="sec19">
<label>3.2.3</label>
<title>Permutation-based variable importance</title>
<p>Our variable importance analysis from light gradient boosting machine (scheduling model) and elastic-net logistic regression (attendance model) using a permutation-based approach identified key social drivers for future implementation focus (<xref ref-type="fig" rid="fig2">Figure 2</xref>).</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Permutation importance based on percentage decrease in AUC. <bold>(A)</bold> Top 10 most important variables in the scheduling model; <bold>(B)</bold> Top 10 most important variables in the attendance model.</p>
</caption>
<graphic xlink:href="fmed-12-1644287-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Chart A shows permutation importance of features related to mammogram intervals, with "Months Since the Last Mammogram" at 18.55%. Chart B shows features impacting general adult scores, with "Primary Subdivision" at 24.5%.</alt-text>
</graphic>
</fig>
<p>For the scheduling model (<xref ref-type="fig" rid="fig2">Figure 2</xref>), months since the last mammogram emerged as the strongest individual predictor with a permutation importance value of 18.55%. Clinical site was the second most influential factor (7.15%), followed by MYDH Portal active within last year (5.6%). When considering cumulative effects, these top three features together represented more than 30% of the total permutation importance, suggesting that temporal, demographic, and organizational factors were particularly crucial for non-scheduling behavior. Geographic and socioeconomic factors also showed some influence, with RUCA4 (1.72%) and ADI state rank (0.15%) completing the top five predictors. Almost all traditional SDoH questionnaire responses such as homelessness (0.01%), food insecurity (0.01%), and financial hardship (0.01%) showed limited predictive power in our model and were therefore excluded from the diagram. This less prominent role of direct SDoH questionnaire measures compared to geographic and facility-level indicators suggested that social determinants might exert their influence through complex pathways that are better captured by community-level metrics and healthcare delivery characteristics than by individual self-reported social needs.</p>
<p>For the attendance model, clinical site was the most influential variable, contributing 21.7% to model performance, followed by current age (12.6%) and General Adult Risk Score (10.8%). This importance indicated that both site-level factors and patient health burden strongly influence attendance (<xref ref-type="fig" rid="fig2">Figure 2B</xref>). Insurance class (7%), Financial hardship (6.8%) and patient race (6.8%) also played notable roles, suggesting that insurance coverage, economic constraints, and demographic factors affected screening adherence. In contrast to the scheduling model, the attendance model excluded months since the last mammogram (the strongest scheduling predictor) to avoid data leakage, as temporal information was incorporated into the attendance outcome definition (see Methods 2.1.1).</p>
</sec>
<sec id="sec20">
<label>3.2.4</label>
<title>Partial dependence plots</title>
<p>To further examine how key social drivers influence breast cancer screening behavior, we plotted partial dependence plots for the most influential predictors (<xref ref-type="fig" rid="fig3">Figure 3</xref>). For the scheduling model, months since the last mammogram showed a distinct temporal pattern with particularly higher probability of not scheduling within the first few months, followed by a significant drop around 10&#x2013;12&#x202F;months, and subsequent fluctuations that stabilize after approximately 30&#x202F;months (<xref ref-type="fig" rid="fig3">Figure 3A</xref>). Among categorical predictors, clinical site demonstrated some variation in the probability of not scheduling across different healthcare facilities, with relatively consistent predicted probabilities ranging between approximately 0.45 and 0.55 (<xref ref-type="fig" rid="fig3">Figure 3B</xref>).</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Partial dependence plots for top predictors in breast cancer scheduling and attendance prediction. <bold>(A)</bold> Top numerical variable in the scheduling model; <bold>(B)</bold> Top categorical variable in the scheduling model; <bold>(C)</bold> Top numerical variable in the attendance model; <bold>(D)</bold> Top categorical variable in the attendance model.</p>
</caption>
<graphic xlink:href="fmed-12-1644287-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Chart panel with four graphs:A. Line graph showing the predicted probability of non-scheduling over months since the last mammogram, fluctuating between 0.25 and 0.50.B. Bar chart displaying predicted probability of non-scheduling across different clinical sites, ranging around 0.50.C. Line graph indicating predicted probability of non-attendance against general adult score, rising gradually.D. Bar chart illustrating predicted probability of non-attendance by primary subdivision, consistently low, close to 0.0.</alt-text>
</graphic>
</fig>
<p>For the attendance model (<xref ref-type="fig" rid="fig3">Figures 3C</xref>,<xref ref-type="fig" rid="fig3">D</xref>), the General Adult Risk Score showed a clear positive relationship with non-attendance probability, with higher scores indicating greater health complexity and comorbidity burden. These higher scores were associated with increased likelihood of missing scheduled appointments, rising from near-zero probability at low scores to approximately 0.25 at the highest health complexity scores (<xref ref-type="fig" rid="fig3">Figure 3C</xref>). In contrast, clinical site showed minimal variation in attendance patterns, with predicted non-attendance probabilities remaining consistently low (below 0.1) across most healthcare facilities (<xref ref-type="fig" rid="fig3">Figure 3D</xref>). While clinical site was one of the top important variables in our variable importance analysis, the practical differences in attendance rates between sites were modest once patient-level factors are accounted for.</p>
<p>Additional variables examined in our analysis, including Charlson Comorbidity Index, housing stability, patient race, age, and various social determinants of health measures showed relatively minimal impact on mammography screening or little variation in not scheduling (<xref ref-type="sec" rid="sec34">Supplementary Figure S1</xref>) and screening non-attendance probability (<xref ref-type="sec" rid="sec34">Supplementary Figure S2</xref>).</p>
</sec>
</sec>
</sec>
<sec sec-type="discussion" id="sec21">
<label>4</label>
<title>Discussion</title>
<p>Our study showed that machine learning approaches can effectively identify the factors that influence breast cancer scheduling and attendance behavior within a healthcare system. While we initially examined SDoH as potential drivers of screening patterns, our findings revealed that healthcare systems might achieve better impact by focusing on factors within their direct control. The light gradient boosting model achieved clinically meaningful performance comparable to other predictive models addressing SDoH-related scheduling outcomes (<xref ref-type="bibr" rid="ref24">24</xref>, <xref ref-type="bibr" rid="ref25">25</xref>). Furthermore, the elastic-net logistic regression model achieved modest performance relative to other predictive models when addressing SDoH related attendance outcomes (<xref ref-type="bibr" rid="ref26">26</xref>, <xref ref-type="bibr" rid="ref27">27</xref>). Our findings highlighted several key implementation domains: temporal patterns in scheduling behavior revealed the dynamic nature of patient engagement; facility-level variables emerged as important predictors, reflecting the influence of organizational characteristics; and social determinants and geographic factors demonstrated the impact of community context, though to a lesser degree than anticipated. While the model&#x2019;s performance reflected the inherent challenges of quantifying social factors, it might provide healthcare systems with actionable insights for implementing scheduling programs that address both organizational and community-level barriers while accounting for individual patient characteristics.</p>
<sec id="sec22">
<label>4.1</label>
<title>Implementation implications</title>
<p>Our analysis revealed that the relationship between social determinants and screening behavior involved multiple interacting factors. The light gradient boosting model&#x2019;s ability to capture these relationships (AUC&#x202F;=&#x202F;0.709), together with similar performance from the random forest approach (AUC&#x202F;=&#x202F;0.702), suggested that accounting for non-linear interactions between social drivers might help healthcare systems better understand scheduling behavior patterns.</p>
<p>Consistent with established literature, our models confirmed that geographic accessibility (RUCA) and socioeconomic factors (ADI state rank) influence screening behaviors (<xref ref-type="bibr" rid="ref5">5</xref>, <xref ref-type="bibr" rid="ref8">8</xref>). However, our variable importance analysis revealed a hierarchy of influence that differs from traditional approaches focused primarily on individual-level social barriers. Temporal factors (months since last mammogram) and organizational factors (clinical site) emerged as the strongest predictors, suggesting that healthcare systems might achieve more immediate impact through system-level interventions rather than attempting to address individual patients&#x2019; social circumstances. This finding highlighted a different priority than much of the existing mammography literature, which emphasized individual-level barriers such as transportation, cultural beliefs, and health literacy (<xref ref-type="bibr" rid="ref6">6</xref>, <xref ref-type="bibr" rid="ref9">9</xref>, <xref ref-type="bibr" rid="ref10">10</xref>). While these individual SDoH factors remained important in our descriptive analyses, our machine learning approach revealed that facility-level variations and care patterns were more predictive of scheduling and attendance behavior. This pattern was particularly true for traditional SDoH questionnaire response, where individual measures such as homelessness, food insecurity, and financial hardship each contributed less than 0.01% to variable importance. This minimal predictive power might have reflected the substantial missingness in SDoH data, potential underreporting of sensitive information in clinical settings, or that geographic and organizational indicators served as more reliable proxies for underlying social determinants.</p>
<p>For Dartmouth Health specifically, this suggested that standardizing practices across clinical sites might yield greater improvements in screening rates than traditional patient-education or transportation-assistance programs. For attendance behavior, the evidence suggested a dual approach combining system-level standardization with targeted interventions. This recommendation was supported by our clinic-level analysis for scheduling (<xref ref-type="sec" rid="sec34">Supplementary material S1.4</xref>), which confirmed substantial performance variation across sites. However, the clinic-level analysis for attendance (<xref ref-type="sec" rid="sec34">Supplementary material S2.4</xref>) showed inconsistent results, limiting conclusions about organizational effects.</p>
<p>Our findings also illuminated the complex interplay between organizational and social factors that traditional regression approaches often missed (<xref ref-type="bibr" rid="ref11">11</xref>). The elastic-net logistic regression model&#x2019;s performance in capturing attendance patterns (AUC&#x202F;=&#x202F;0.698) demonstrated that patient health complexity (General Adult Risk Score) and digital engagement (portal activity) were critical factors that complement traditional socioeconomic predictors. This insight provided healthcare systems with a more nuanced understanding of how to target interventions across different patient populations.</p>
<p>Beyond these specific findings for breast cancer screening, our systematic framework laid the groundwork for analyzing SDoH&#x2019;s influence on other preventive health behaviors, demonstrating the potential for broader applications in improving routine preventive care utilization. The methodological approach of combining individual-level social determinants with organizational and temporal factors could be adapted to examine colorectal cancer screening, cervical cancer screening, and other preventive services where similar complex interactions between social drivers and healthcare delivery factors likely influence patient engagement.</p>
</sec>
<sec id="sec23">
<label>4.2</label>
<title>Methodological contributions</title>
<p>Our analytical approach offered several methodological contributions to healthcare delivery and the implementation science. First, we demonstrated a novel approach to operationalizing SDoH in breast cancer scheduling and attendance practices, providing healthcare systems with a framework to translate social determinant screening tools into actionable screening strategies. Unlike previous work analyzing nationwide census tract-level scheduling rates and focused primarily on geographic accessibility and demographics, our study examined individual-level data integrating clinical, behavioral, and social determinants within a healthcare system context (<xref ref-type="bibr" rid="ref28">28</xref>). This focus allowed us to identify specific patient-level factors that directly influence scheduling decisions, rather than ecological correlations at the population level.</p>
<p>Second, our modeling framework effectively functioned as a poly-social risk score system, aggregating multiple social determinants to quantify their combined influence on screening adherence. This approach moved beyond examining isolated social determinants to consider how they collectively impact health behaviors. Third, the age-stratified analysis (<xref ref-type="sec" rid="sec34">Supplementary material S1.2</xref>) revealed important variations in these poly-social risk profiles across demographic groups, suggesting the need for age-specific implementation strategies that account for different SDoH impacts across the lifespan. Finally, our application of light gradient boosting models and elastic-net logistic regression models and partial dependence plots revealed important non-linear patterns in the relationship between months since the last mammogram and non-scheduling probability&#x2014;a critical insight that traditional regression approaches would likely miss. For the attendance model, our elastic-net logistic regression approach similarly captured complex relationships between organizational factors, patient characteristics, and social determinants, though with different key predictors than the scheduling model. These findings demonstrated the value of machine learning approaches in capturing complex relationships between certain social determinants and screening behavior.</p>
<p>The development of a unified modeling framework that incorporated both individual-level social drivers and system-level factors provided healthcare organizations with a template for analyzing their own screening programs. This approach could be particularly valuable as healthcare systems work to improve cancer screening rates for their medically-homed populations while effectively integrating SDoH data into their quality improvement initiatives.</p>
</sec>
<sec id="sec24">
<label>4.3</label>
<title>Future directions in implementation science</title>
<p>The substantial missingness in our SDoH questionnaire data, with a median of 73.8% across questions, reflected common implementation challenges in clinical settings. Our analysis excluded 11 questions that exceeded the 80% missingness threshold, which primarily addressed sensitive domains such as mental health status, substance use behaviors, and detailed information regarding past scheduling experiences. Health literacy, a factor that prior research has demonstrated to influence mammography screening adherence (<xref ref-type="bibr" rid="ref6">6</xref>), represented another domain affected by substantial missingness, restricting our ability to comprehensively assess its influence on patient screening decisions. These exclusions represented a methodological consideration because these sensitive domains might be critical drivers of patient decision-making regarding mammogram scheduling and attendance. More complete data on these domains could potentially alter our understanding of the factors driving screening behavior within healthcare systems. These data collection challenges highlighted the need for alternative approaches to capture important behavioral determinants. However, our framework was designed to be adaptable and can incorporate these variables when improved collection methods make such data available in future implementations.</p>
<p>These implementation challenges underscored the importance of systematic approaches to translating our findings into practice. The Consolidated Framework for Implementation Research (CFIR) offered a valuable lens for future efforts to translate our findings into practice. Though our current work focused on quantitative modeling rather than a full CFIR implementation, our findings provided a foundation for subsequent mixed-methods approaches that could more fully leverage implementation science frameworks.</p>
<p>For example, the facility-level variations identified in our model aligned with CFIR&#x2019;s &#x2018;inner setting&#x2019; domain, suggesting that organizational culture and readiness for implementation played important roles in both scheduling and attendance behaviors. Future work could build on our quantitative findings by using qualitative methods to explore how these organizational factors influenced practices and how interventions might be tailored to different clinical settings. Similarly, our findings related to geographic and socioeconomic factors corresponded to CFIR&#x2019;s &#x2018;outer setting&#x2019; domain, highlighting the importance of understanding patient needs and resources within their communities. Further investigations could provide deeper insights into how these community factors shaped decisions and how healthcare systems might better address them.</p>
<p>As healthcare systems consider implementing SDoH-informed interventions, CFIR and other implementation science frameworks could provide valuable guidance for assessing feasibility, sustainability, and potential barriers. Our work represented an important first step in this direction by providing quantitative evidence of key relationships that future implementation efforts should consider.</p>
</sec>
<sec id="sec25">
<label>4.4</label>
<title>Future research priorities</title>
<p>Our model was developed and validated within the Dartmouth Health system, which served a population with limited racial, ethnic, and linguistic diversity. This demographic homogeneity might have limited our ability to capture important language-related barriers to screening access and communication, and might have restricted the generalizability of our findings to more diverse healthcare settings and populations. Although our findings indicated small racial differences, future work should validate these approaches in healthcare systems serving more diverse communities to ensure broader applicability. It was important to note, however, that we had designed our work as a generalizable framework that could perform well in other situations and could incorporate more diverse racial groups and other demographic aspects if the necessary data were available.</p>
<p>Moreover, our modeling approach assumed that the ratio between screening and non-screening populations would remain stable over time. This assumption might not have held in different implementation contexts or as scheduling programs evolve. Healthcare systems implementing similar approaches should carefully consider their local population characteristics and mammogram scheduling patterns. Additionally, while our model demonstrated modest predictive performance within our system, its generalizability to other healthcare settings might be limited by differences in organizational structure, population characteristics, screening protocols, and the substantial missingness in our SDoH data, which might have limited our ability to fully capture social determinant influences. Future research should explore how these models could be adapted and calibrated for different healthcare contexts.</p>
<p>Beyond expanding the population and removing assumptions, we identified several priority areas for future research. First, the development of dynamic modeling approaches that could adapt to changing population characteristics and scheduling patterns would enhance the robustness of our framework. Additionally, integrating SDoH-informed scheduling models with other preventive care programs could create more comprehensive implementation strategies. Investigation of facility-level variations in scheduling patterns would have further identified best practices for implementation. Finally, extending our analytical framework to other scheduling programs, such as colorectal and cervical cancer scheduling, would increase the broader applicability of SDoH-informed modeling approaches and strengthen the overall impact of our generalizable framework across diverse healthcare settings.</p>
</sec>
<sec id="sec26">
<label>4.5</label>
<title>Conclusion</title>
<p>Our study provided healthcare systems with a data-driven approach to understanding and addressing how social determinants shape breast cancer scheduling practices. Our findings suggested that machine learning approaches could help healthcare systems develop more effective, targeted implementation strategies. As healthcare systems worked to meet cancer screening targets for their medically-homed populations, approaches that systematically analyzed and addressed social determinants of health could have become increasingly valuable for improving adherence and reducing disparities.</p>
<p>For the scientific community, these findings offered two primary contributions. First, our results demonstrated the relative influence of different predictors on screening behaviors, highlighting that healthcare systems might achieve greater impact by focusing on factors within their direct control rather than attempting to address individual patients&#x2019; social circumstances alone. Second, our framework enabled identification of patients at highest risk of not scheduling or attending appointments, providing a practical tool for targeted intervention strategies.</p>
<p>Looking ahead, our quantitative findings provided a foundation for future implementation science approaches that could more fully leverage frameworks like CFIR to translate these insights into practice. By combining machine learning approaches with implementation science, healthcare systems could develop more comprehensive strategies for addressing the complex interplay between social determinants and screening behaviors, ultimately improving health outcomes for diverse patient populations. Healthcare systems and researchers could adapt this approach using their own data to develop targeted interventions and improve mammography adherence within their specific patient populations and organizational contexts. Through such systematic approaches to understanding and addressing screening behaviors, healthcare system could potentially work toward more effective, evidence-based strategies for reducing disparities and improving preventive care delivery.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec27">
<title>Data availability statement</title>
<p>The data analyzed in this study is subject to the following licenses/restrictions: the dataset analyzed in this study contain de-identified patient health information from the Dartmouth Health System and cannot be made publicly available due to privacy restrictions. Data access would require formal agreements with Dartmouth Health and IRB approval. Requests to access these datasets should be directed to Wesley J. Marrero, <email>wesley.marrero@dartmouth.edu</email>.</p>
</sec>
<sec sec-type="ethics-statement" id="sec28">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Dartmouth Health Institutional Review Board. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec sec-type="author-contributions" id="sec29">
<title>Author contributions</title>
<p>GM: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft. MS: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft. JL: Writing &#x2013; review &#x0026; editing. JF: Writing &#x2013; review &#x0026; editing. CG: Writing &#x2013; review &#x0026; editing. Rd-A: Writing &#x2013; review &#x0026; editing. AT: Writing &#x2013; review &#x0026; editing. SK: Writing &#x2013; review &#x0026; editing. WM: Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec sec-type="funding-information" id="sec30">
<title>Funding</title>
<p>The author(s) declare that no financial support was received for the research and/or publication of this article.</p>
</sec>
<sec sec-type="COI-statement" id="sec31">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec32">
<title>Generative AI statement</title>
<p>The authors declare that no Gen AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="sec33">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec34">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fmed.2025.1644287/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fmed.2025.1644287/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mohan</surname><given-names>G</given-names></name> <name><surname>Gaskin</surname><given-names>DJ</given-names></name></person-group>. <article-title>Social determinants of health and US health care expenditures by insurer</article-title>. <source>JAMA Netw Open</source>. (<year>2024</year>) <volume>7</volume>:<fpage>e2440467</fpage>. doi: <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.40467</pub-id>, PMID: <pub-id pub-id-type="pmid">39441597</pub-id></citation></ref>
<ref id="ref2"><label>2.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rangachari</surname><given-names>P</given-names></name> <name><surname>Thapa</surname><given-names>A</given-names></name></person-group>. <article-title>Impact of hospital and health system initiatives to address social determinants of health (SDOH) in the United States: a scoping review of the peer-reviewed literature</article-title>. <source>BMC Health Serv Res</source>. (<year>2025</year>) <volume>25</volume>:<fpage>342</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12913-025-12494-2</pub-id>, PMID: <pub-id pub-id-type="pmid">40045246</pub-id></citation></ref>
<ref id="ref3"><label>3.</label><citation citation-type="journal"><person-group person-group-type="author"><collab id="coll1">US Preventive Services Task Force</collab></person-group>. <article-title>Screening for breast Cancer: US preventive services task force recommendation statement</article-title>. <source>JAMA</source>. (<year>2024</year>) <volume>331</volume>:<fpage>1918</fpage>&#x2013;<lpage>30</lpage>. doi: <pub-id pub-id-type="doi">10.1001/jama.2024.5534</pub-id>, PMID: <pub-id pub-id-type="pmid">38687503</pub-id></citation></ref>
<ref id="ref4"><label>4.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pace</surname><given-names>LE</given-names></name> <name><surname>Keating</surname><given-names>NL</given-names></name></person-group>. <article-title>New recommendations for breast Cancer screening&#x2014;in pursuit of health equity</article-title>. <source>JAMA Netw Open</source>. (<year>2024</year>) <volume>7</volume>:<fpage>e2411638</fpage>. doi: <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.11638</pub-id>, PMID: <pub-id pub-id-type="pmid">38687485</pub-id></citation></ref>
<ref id="ref5"><label>5.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Miller</surname><given-names>BC</given-names></name> <name><surname>Bowers</surname><given-names>JM</given-names></name> <name><surname>Payne</surname><given-names>JB</given-names></name> <name><surname>Moyer</surname><given-names>A</given-names></name></person-group>. <article-title>Barriers to mammography screening among racial and ethnic minority women</article-title>. <source>Soc Sci Med</source>. (<year>2019</year>) <volume>239</volume>:<fpage>112494</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.socscimed.2019.112494</pub-id>, PMID: <pub-id pub-id-type="pmid">31513931</pub-id></citation></ref>
<ref id="ref6"><label>6.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ponce-Chazarri</surname><given-names>L</given-names></name> <name><surname>Ponce-Bland&#x00F3;n</surname><given-names>JA</given-names></name> <name><surname>Immordino</surname><given-names>P</given-names></name> <name><surname>Giordano</surname><given-names>A</given-names></name> <name><surname>Morales</surname><given-names>F</given-names></name></person-group>. <article-title>Barriers to breast Cancer-screening adherence in vulnerable populations</article-title>. <source>Cancer</source>. (<year>2023</year>) <volume>15</volume>:<fpage>604</fpage>. doi: <pub-id pub-id-type="doi">10.3390/cancers15030604</pub-id>, PMID: <pub-id pub-id-type="pmid">36765561</pub-id></citation></ref>
<ref id="ref7"><label>7.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Elkin</surname><given-names>EB</given-names></name> <name><surname>Ishill</surname><given-names>NM</given-names></name> <name><surname>Snow</surname><given-names>JG</given-names></name> <name><surname>Panageas</surname><given-names>KS</given-names></name> <name><surname>Bach</surname><given-names>PB</given-names></name> <name><surname>Liberman</surname><given-names>L</given-names></name> <etal/></person-group>. <article-title>Geographic access and the use of screening mammography</article-title>. <source>Med Care</source>. (<year>2010</year>) <volume>48</volume>:<fpage>349</fpage>&#x2013;<lpage>56</lpage>. doi: <pub-id pub-id-type="doi">10.1097/MLR.0b013e3181ca3ecb</pub-id>, PMID: <pub-id pub-id-type="pmid">20195174</pub-id></citation></ref>
<ref id="ref8"><label>8.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pohl</surname><given-names>AL</given-names></name> <name><surname>Aderonmu</surname><given-names>AA</given-names></name> <name><surname>Grab</surname><given-names>JD</given-names></name> <name><surname>Cohen-Tigor</surname><given-names>LA</given-names></name> <name><surname>Morris</surname><given-names>AM</given-names></name></person-group>. <article-title>Transportation insecurity, social support, and adherence to Cancer screening</article-title>. <source>JAMA Netw Open</source>. (<year>2025</year>) <volume>8</volume>:<fpage>e2457336</fpage>. doi: <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.57336</pub-id>, PMID: <pub-id pub-id-type="pmid">39883460</pub-id></citation></ref>
<ref id="ref9"><label>9.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Albadawi</surname><given-names>RS</given-names></name> <name><surname>Alsharawneh</surname><given-names>A</given-names></name> <name><surname>Othman</surname><given-names>EH</given-names></name></person-group>. <article-title>Determinants and barriers to women&#x2019;s participation in breast cancer screening activities in Jordan: an in-depth study</article-title>. <source>BMC Public Health</source>. (<year>2025</year>) <volume>25</volume>:<fpage>1339</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12889-025-22611-9</pub-id>, PMID: <pub-id pub-id-type="pmid">40211231</pub-id></citation></ref>
<ref id="ref10"><label>10.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lofters</surname><given-names>AK</given-names></name> <name><surname>Schuler</surname><given-names>A</given-names></name> <name><surname>Slater</surname><given-names>M</given-names></name> <name><surname>Baxter</surname><given-names>NN</given-names></name> <name><surname>Persaud</surname><given-names>N</given-names></name> <name><surname>Pinto</surname><given-names>AD</given-names></name> <etal/></person-group>. <article-title>Using self-reported data on the social determinants of health in primary care to identify cancer screening disparities: opportunities and challenges</article-title>. <source>BMC Fam Pract</source>. (<year>2017</year>) <volume>18</volume>:<fpage>31</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12875-017-0599-z</pub-id>, PMID: <pub-id pub-id-type="pmid">28241787</pub-id></citation></ref>
<ref id="ref11"><label>11.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Coughlin</surname><given-names>SS</given-names></name></person-group>. <article-title>Social determinants of breast cancer risk, stage, and survival</article-title>. <source>Breast Cancer Res Treat</source>. (<year>2019</year>) <volume>177</volume>:<fpage>537</fpage>&#x2013;<lpage>48</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10549-019-05340-7</pub-id>, PMID: <pub-id pub-id-type="pmid">31270761</pub-id></citation></ref>
<ref id="ref12"><label>12.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Andermann</surname><given-names>A</given-names></name></person-group>. <article-title>Taking action on the social determinants of health in clinical practice: a framework for health professionals</article-title>. <source>Can Med Assoc J</source>. (<year>2016</year>) <volume>188</volume>:<fpage>E474</fpage>&#x2013;<lpage>83</lpage>. doi: <pub-id pub-id-type="doi">10.1503/cmaj.160177</pub-id>, PMID: <pub-id pub-id-type="pmid">27503870</pub-id></citation></ref>
<ref id="ref13"><label>13.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ganatra</surname><given-names>S</given-names></name> <name><surname>Khadke</surname><given-names>S</given-names></name> <name><surname>Kumar</surname><given-names>A</given-names></name> <name><surname>Khan</surname><given-names>S</given-names></name> <name><surname>Javed</surname><given-names>Z</given-names></name> <name><surname>Nasir</surname><given-names>K</given-names></name> <etal/></person-group>. <article-title>Standardizing social determinants of health data: a proposal for a comprehensive screening tool to address health equity a systematic review</article-title>. <source>Health Aff Sch</source>. (<year>2024</year>) <volume>2</volume>:<fpage>qxae151</fpage>. doi: <pub-id pub-id-type="doi">10.1093/haschl/qxae151</pub-id>, PMID: <pub-id pub-id-type="pmid">39677005</pub-id></citation></ref>
<ref id="ref14"><label>14.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Novilla</surname><given-names>MLB</given-names></name> <name><surname>Goates</surname><given-names>MC</given-names></name> <name><surname>Leffler</surname><given-names>T</given-names></name> <name><surname>Novilla</surname><given-names>NKB</given-names></name> <name><surname>Wu</surname><given-names>CY</given-names></name> <name><surname>Dall</surname><given-names>A</given-names></name> <etal/></person-group>. <article-title>Integrating social care into healthcare: a review on applying the social determinants of health in clinical settings</article-title>. <source>Int J Environ Res Public Health</source>. (<year>2023</year>) <volume>20</volume>:<fpage>6873</fpage>. doi: <pub-id pub-id-type="doi">10.3390/ijerph20196873</pub-id>, PMID: <pub-id pub-id-type="pmid">37835143</pub-id></citation></ref>
<ref id="ref15"><label>15.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Alaniz-Cant&#x00FA;</surname><given-names>EI</given-names></name> <name><surname>Goodwin</surname><given-names>K</given-names></name> <name><surname>Smith</surname><given-names>L</given-names></name> <name><surname>Acosta</surname><given-names>E</given-names></name> <name><surname>Ch&#x00E1;vez-I&#x00F1;iguez</surname><given-names>A</given-names></name> <name><surname>Evans</surname><given-names>MJ</given-names></name> <etal/></person-group>. <article-title>Understanding the perceived benefits, barriers, and cues to action for lung cancer screening among Latinos: a qualitative study</article-title>. <source>Front Oncol</source>. (<year>2024</year>) <volume>14</volume>:<fpage>1365739</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fonc.2024.1365739</pub-id>, PMID: <pub-id pub-id-type="pmid">38571494</pub-id></citation></ref>
<ref id="ref16"><label>16.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kazi</surname><given-names>S</given-names></name> <name><surname>Starling</surname><given-names>C</given-names></name> <name><surname>Milicia</surname><given-names>A</given-names></name> <name><surname>Buckley</surname><given-names>B</given-names></name> <name><surname>Grisham</surname><given-names>R</given-names></name> <name><surname>Gruber</surname><given-names>E</given-names></name> <etal/></person-group>. <article-title>Barriers and facilitators to screen for and address social needs in primary care practices in Maryland: a qualitative study</article-title>. <source>Front Health Serv</source>. (<year>2024</year>) <volume>4</volume>:<fpage>1380589</fpage>. doi: <pub-id pub-id-type="doi">10.3389/frhs.2024.1380589</pub-id>, PMID: <pub-id pub-id-type="pmid">38952646</pub-id></citation></ref>
<ref id="ref17"><label>17.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Teteh</surname><given-names>DK</given-names></name> <name><surname>Ferrell</surname><given-names>B</given-names></name> <name><surname>Okunowo</surname><given-names>O</given-names></name> <name><surname>Downie</surname><given-names>A</given-names></name> <name><surname>Erhunmwunsee</surname><given-names>L</given-names></name> <name><surname>Montgomery</surname><given-names>SB</given-names></name> <etal/></person-group>. <article-title>Social determinants of health and lung cancer surgery: a qualitative study</article-title>. <source>Front Public Health</source>. (<year>2023</year>) <volume>11</volume>:<fpage>1285419</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpubh.2023.1285419</pub-id>, PMID: <pub-id pub-id-type="pmid">38026333</pub-id></citation></ref>
<ref id="ref18"><label>18.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Proctor</surname><given-names>E</given-names></name> <name><surname>Silmere</surname><given-names>H</given-names></name> <name><surname>Raghavan</surname><given-names>R</given-names></name> <name><surname>Hovmand</surname><given-names>P</given-names></name> <name><surname>Aarons</surname><given-names>G</given-names></name> <name><surname>Bunger</surname><given-names>A</given-names></name> <etal/></person-group>. <article-title>Outcomes for implementation research: conceptual distinctions, measurement challenges, and research agenda</article-title>. <source>Adm Policy Ment Health Ment Health Serv Res</source>. (<year>2011</year>) <volume>38</volume>:<fpage>65</fpage>&#x2013;<lpage>76</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s10488-010-0319-7</pub-id>, PMID: <pub-id pub-id-type="pmid">20957426</pub-id></citation></ref>
<ref id="ref19"><label>19.</label><citation citation-type="other"><person-group person-group-type="author"><collab id="coll2">Dartmouth Health</collab></person-group> (<year>2025</year>) Who is Dartmouth health? Available online at: <ext-link xlink:href="https://www.dartmouth-health.org/about/who-is-dartmouth-health" ext-link-type="uri">https://www.dartmouth-health.org/about/who-is-dartmouth-health</ext-link> (Accessed April 1, 2025).</citation></ref>
<ref id="ref20"><label>20.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sacca</surname><given-names>L</given-names></name> <name><surname>Lobaina</surname><given-names>D</given-names></name> <name><surname>Burgoa</surname><given-names>S</given-names></name> <name><surname>Lotharius</surname><given-names>K</given-names></name> <name><surname>Moothedan</surname><given-names>E</given-names></name> <name><surname>Gilmore</surname><given-names>N</given-names></name> <etal/></person-group>. <article-title>Promoting artificial intelligence for global breast Cancer risk prediction and screening in adult women: a scoping review</article-title>. <source>J Clin Med</source>. (<year>2024</year>) <volume>13</volume>:<fpage>2525</fpage>. doi: <pub-id pub-id-type="doi">10.3390/jcm13092525</pub-id>, PMID: <pub-id pub-id-type="pmid">38731054</pub-id></citation></ref>
<ref id="ref21"><label>21.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Charlson</surname><given-names>ME</given-names></name> <name><surname>Pompei</surname><given-names>P</given-names></name> <name><surname>Ales</surname><given-names>KL</given-names></name> <name><surname>MacKenzie</surname><given-names>CR</given-names></name></person-group>. <article-title>A new method of classifying prognostic comorbidity in longitudinal studies: development and validation</article-title>. <source>J Chronic Dis</source>. (<year>1987</year>) <volume>40</volume>:<fpage>373</fpage>&#x2013;<lpage>83</lpage>. doi: <pub-id pub-id-type="doi">10.1016/0021-9681(87)90171-8</pub-id>, PMID: <pub-id pub-id-type="pmid">3558716</pub-id></citation></ref>
<ref id="ref22"><label>22.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Decker</surname><given-names>S</given-names></name> <name><surname>Dworsky</surname><given-names>M</given-names></name> <name><surname>Gibson</surname><given-names>T</given-names></name> <name><surname>Henke</surname><given-names>R</given-names></name> <name><surname>McDermott</surname><given-names>K</given-names></name></person-group>. <article-title>The impact of the affordable care act insurance expansions on opioid-related emergency department visits</article-title>. <source>Health Serv Res</source>. (<year>2021</year>) <volume>56</volume>:<fpage>64</fpage>&#x2013;<lpage>4</lpage>. doi: <pub-id pub-id-type="doi">10.1111/1475-6773.13787</pub-id></citation></ref>
<ref id="ref23"><label>23.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Berkowitz</surname><given-names>RL</given-names></name> <name><surname>Bui</surname><given-names>L</given-names></name> <name><surname>Shen</surname><given-names>Z</given-names></name> <name><surname>Pressman</surname><given-names>A</given-names></name> <name><surname>Moreno</surname><given-names>M</given-names></name> <name><surname>Brown</surname><given-names>S</given-names></name> <etal/></person-group>. <article-title>Evaluation of a social determinants of health screening questionnaire and workflow pilot within an adult ambulatory clinic</article-title>. <source>BMC Fam Pract</source>. (<year>2021</year>) <volume>22</volume>:<fpage>256</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12875-021-01598-3</pub-id>, PMID: <pub-id pub-id-type="pmid">34952582</pub-id></citation></ref>
<ref id="ref24"><label>24.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sotudian</surname><given-names>S</given-names></name> <name><surname>Afran</surname><given-names>A</given-names></name> <name><surname>LeBedis</surname><given-names>CA</given-names></name> <name><surname>Rives</surname><given-names>AF</given-names></name> <name><surname>Paschalidis</surname><given-names>IC</given-names></name> <name><surname>Fishman</surname><given-names>MDC</given-names></name></person-group>. <article-title>Social determinants of health and the prediction of missed breast imaging appointments</article-title>. <source>BMC Health Serv Res</source>. (<year>2022</year>) <volume>22</volume>:<fpage>1454</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s12913-022-08784-8</pub-id>, PMID: <pub-id pub-id-type="pmid">36451240</pub-id></citation></ref>
<ref id="ref25"><label>25.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stabellini</surname><given-names>N</given-names></name> <name><surname>Cullen</surname><given-names>J</given-names></name> <name><surname>Moore</surname><given-names>JX</given-names></name> <name><surname>Dent</surname><given-names>S</given-names></name> <name><surname>Sutton</surname><given-names>AL</given-names></name> <name><surname>Shanahan</surname><given-names>J</given-names></name> <etal/></person-group>. <article-title>Social determinants of health data improve the prediction of cardiac outcomes in females with breast Cancer</article-title>. <source>Cancer</source>. (<year>2023</year>) <volume>15</volume>:<fpage>4630</fpage>. doi: <pub-id pub-id-type="doi">10.3390/cancers15184630</pub-id>, PMID: <pub-id pub-id-type="pmid">37760599</pub-id></citation></ref>
<ref id="ref26"><label>26.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nelson</surname><given-names>A</given-names></name> <name><surname>Herron</surname><given-names>D</given-names></name> <name><surname>Rees</surname><given-names>G</given-names></name> <name><surname>Nachev</surname><given-names>P</given-names></name></person-group>. <article-title>Predicting scheduled hospital attendance with artificial intelligence</article-title>. <source>NPJ Digit Med</source>. (<year>2019</year>) <volume>2</volume>:<fpage>26</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41746-019-0103-3</pub-id>, PMID: <pub-id pub-id-type="pmid">31304373</pub-id></citation></ref>
<ref id="ref27"><label>27.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Salazar</surname><given-names>LH</given-names></name> <name><surname>Fernandes</surname><given-names>A</given-names></name> <name><surname>Dazzi</surname><given-names>R</given-names></name> <name><surname>Garcia</surname><given-names>N</given-names></name> <name><surname>Leithardt</surname><given-names>VRQ</given-names></name></person-group>. <article-title>Using different models of machine learning to predict attendance at medical appointments</article-title>. <source>J Inf Syst Eng Manag</source>. (<year>2020</year>) <volume>5</volume>:<fpage>em0122</fpage>. doi: <pub-id pub-id-type="doi">10.29333/jisem/8430</pub-id>, PMID: <pub-id pub-id-type="pmid">40747843</pub-id></citation></ref>
<ref id="ref28"><label>28.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hashtarkhani</surname><given-names>S</given-names></name> <name><surname>Zhou</surname><given-names>Y</given-names></name> <name><surname>Kumsa</surname><given-names>FA</given-names></name> <name><surname>White-Means</surname><given-names>S</given-names></name> <name><surname>Schwartz</surname><given-names>DL</given-names></name> <name><surname>Shaban-Nejad</surname><given-names>A</given-names></name></person-group>. <article-title>Analyzing geospatial and socioeconomic disparities in breast Cancer screening among populations in the United States: machine learning approach</article-title>. <source>JMIR Cancer</source>. (<year>2025</year>) <volume>11</volume>:&#x2013;<lpage>e59882</lpage>. doi: <pub-id pub-id-type="doi">10.2196/59882</pub-id>, PMID: <pub-id pub-id-type="pmid">39819978</pub-id></citation></ref>
<ref id="ref29"><label>29.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altmann</surname><given-names>A</given-names></name> <name><surname>Tolo&#x015F;i</surname><given-names>L</given-names></name> <name><surname>Sander</surname><given-names>O</given-names></name> <name><surname>Lengauer</surname><given-names>T</given-names></name></person-group>. <article-title>Permutation importance: a corrected feature importance measure</article-title>. <source>Bioinformatics</source>. (<year>2010</year>) <volume>26</volume>:<fpage>1340</fpage>&#x2013;<lpage>7</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btq134</pub-id>, PMID: <pub-id pub-id-type="pmid">20385727</pub-id></citation></ref>
<ref id="ref30"><label>30.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Breiman</surname><given-names>L</given-names></name></person-group>. <article-title>Random forests</article-title>. <source>Mach Learn</source>. (<year>2001</year>) <volume>45</volume>:<fpage>5</fpage>&#x2013;<lpage>32</lpage>. doi: <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id></citation></ref>
<ref id="ref31"><label>31.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>DeLong</surname><given-names>ER</given-names></name> <name><surname>DeLong</surname><given-names>DM</given-names></name> <name><surname>Clarke-Pearson</surname><given-names>DL</given-names></name></person-group>. <article-title>Comparing the areas under two or more correlated receiver operating characteristic curves: a nonparametric approach</article-title>. <source>Biometrics</source>. (<year>1988</year>) <volume>44</volume>:<fpage>837</fpage>. doi: <pub-id pub-id-type="doi">10.2307/2531595</pub-id>, PMID: <pub-id pub-id-type="pmid">3203132</pub-id></citation></ref>
<ref id="ref32"><label>32.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Friedman</surname><given-names>JH</given-names></name></person-group>. <article-title>Greedy function approximation: a gradient boosting machine</article-title>. <source>Ann Stat</source>. (<year>2001</year>) <volume>29</volume>:<fpage>3451</fpage>. doi: <pub-id pub-id-type="doi">10.1214/aos/1013203451</pub-id></citation></ref>
<ref id="ref33"><label>33.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gelman</surname><given-names>A</given-names></name> <name><surname>Jakulin</surname><given-names>A</given-names></name> <name><surname>Pittau</surname><given-names>MG</given-names></name> <name><surname>Su</surname><given-names>YS</given-names></name></person-group>. <article-title>A weakly informative default prior distribution for logistic and other regression models</article-title>. <source>Ann Appl Stat</source>. (<year>2008</year>) <volume>2</volume>:<fpage>191</fpage>. doi: <pub-id pub-id-type="doi">10.1214/08-AOAS191</pub-id></citation></ref>
<ref id="ref34"><label>34.</label><citation citation-type="other"><person-group person-group-type="author"><name><surname>Ke</surname><given-names>G.</given-names></name> <name><surname>Meng</surname><given-names>Qi</given-names></name> <name><surname>Finley</surname><given-names>Thomas</given-names></name> <name><surname>Wang</surname><given-names>Taifeng</given-names></name> <name><surname>Chen</surname><given-names>Wei</given-names></name> <name><surname>Ma</surname><given-names>Weidong</given-names></name> <etal/></person-group>. (<year>2017</year>) &#x2018;LightGBM: a highly efficient gradient boosting decision tree&#x2019;. Available online at: <ext-link xlink:href="https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree" ext-link-type="uri">https://papers.nips.cc/paper/6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree</ext-link> (Accessed March 28, 2025).</citation></ref>
<ref id="ref35"><label>35.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lunardon</surname><given-names>N</given-names></name> <name><surname>Menardi</surname><given-names>G</given-names></name> <name><surname>Torelli</surname><given-names>N</given-names></name></person-group>. <article-title>ROSE: a package for binary imbalanced learning</article-title>. <source>R J</source>. (<year>2014</year>) <volume>6</volume>:<fpage>79</fpage>. doi: <pub-id pub-id-type="doi">10.32614/RJ-2014-008</pub-id></citation></ref>
<ref id="ref36"><label>36.</label><citation citation-type="book"><person-group person-group-type="author"><name><surname>Sohil</surname><given-names>F</given-names></name> <name><surname>Sohali</surname><given-names>MU</given-names></name> <name><surname>Shabbir</surname><given-names>J</given-names></name></person-group>. <source>An introduction to statistical learning with applications in R</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Springer</publisher-name> (<year>2022</year>).</citation></ref>
<ref id="ref37"><label>37.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Stekhoven</surname><given-names>DJ</given-names></name> <name><surname>B&#x00FC;hlmann</surname><given-names>P</given-names></name></person-group>. <article-title>MissForest&#x2014;non-parametric missing value imputation for mixed-type data</article-title>. <source>Bioinformatics</source>. (<year>2012</year>) <volume>28</volume>:<fpage>112</fpage>&#x2013;<lpage>8</lpage>. doi: <pub-id pub-id-type="doi">10.1093/bioinformatics/btr597</pub-id>, PMID: <pub-id pub-id-type="pmid">22039212</pub-id></citation></ref>
<ref id="ref38"><label>38.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Varma</surname><given-names>S</given-names></name> <name><surname>Simon</surname><given-names>R</given-names></name></person-group>. <article-title>Bias in error estimation when using cross-validation for model selection</article-title>. <source>BMC Bioinformatics</source>. (<year>2006</year>) <volume>7</volume>:<fpage>91</fpage>. doi: <pub-id pub-id-type="doi">10.1186/1471-2105-7-91</pub-id>, PMID: <pub-id pub-id-type="pmid">16504092</pub-id></citation></ref>
<ref id="ref39"><label>39.</label><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zou</surname><given-names>H</given-names></name> <name><surname>Hastie</surname><given-names>T</given-names></name></person-group>. <article-title>Regularization and variable selection via the elastic net</article-title>. <source>J R Stat Soc Series B</source>. (<year>2005</year>) <volume>67</volume>:<fpage>301</fpage>&#x2013;<lpage>20</lpage>. doi: <pub-id pub-id-type="doi">10.1111/j.1467-9868.2005.00503.x</pub-id></citation></ref>
</ref-list>
</back>
</article>