<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Sleep</journal-id>
<journal-title>Frontiers in Sleep</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Sleep</abbrev-journal-title>
<issn pub-type="epub">2813-2890</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frsle.2024.1271167</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Sleep</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Machine learning data sources in pediatric sleep research: assessing racial/ethnic differences in electronic health record&#x02013;based clinical notes prior to model training</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Davenport</surname> <given-names>Mattina A.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2307760/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Sirrianni</surname> <given-names>Joseph W.</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Chisolm</surname> <given-names>Deena J.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1643130/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Abigail Wexner Research Institute, Center for Child Health Equity and Outcomes Research, Nationwide Children&#x00027;s Hospital</institution>, <addr-line>Columbus, OH</addr-line>, <country>United States</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Pediatrics, College of Medicine, The Ohio State University</institution>, <addr-line>Columbus, OH</addr-line>, <country>United States</country></aff>
<aff id="aff3"><sup>3</sup><institution>Abigail Wexner Research Institute, IT Research and Innovation, Nationwide Children&#x00027;s Hospital</institution>, <addr-line>Columbus, OH</addr-line>, <country>United States</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Daniel Combs, University of Arizona, United States</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Maureen McQuillan, Indiana University Bloomington, United States</p>
<p>Stuart F. Quan, Harvard Medical School, United States</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Mattina A. Davenport <email>mattina.davenport&#x00040;nationwidechildrens.org</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>14</day>
<month>02</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>3</volume>
<elocation-id>1271167</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>08</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>26</day>
<month>01</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2024 Davenport, Sirrianni and Chisolm.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Davenport, Sirrianni and Chisolm</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license></permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Pediatric sleep problems can be detected across racial/ethnic subpopulations in primary care settings. However, the electronic health record (EHR) data documentation that describes patients&#x00027; sleep problems may be inherently biased due to both historical biases and informed presence. This study assessed racial/ethnic differences in natural language processing (NLP) training data (e.g., pediatric sleep-related keywords in primary care clinical notes) prior to model training.</p></sec>
<sec>
<title>Methods</title>
<p>We used a predefined keyword features set containing 178 Peds B-SATED keywords. We then queried all the clinical notes from patients seen in pediatric primary care between the ages of 5 and 18 from January 2018 to December 2021. A least absolute shrinkage and selection operator (LASSO) regression model was used to investigate whether there were racial/ethnic differences in the documentation of Peds B-SATED keywords. Then, mixed-effects logistic regression was used to determine whether the odds of the presence of global Peds B-SATED dimensions also differed across racial/ethnic subpopulations.</p></sec>
<sec>
<title>Results</title>
<p>Using both LASSO and multilevel modeling approaches, the current study found that there were racial/ethnic differences in providers&#x00027; documentation of Peds B-SATED keywords and global dimensions. In addition, the most frequently documented Peds B-SATED keyword rankings qualitatively differed across racial/ethnic subpopulations.</p></sec>
<sec>
<title>Conclusion</title>
<p>This study revealed providers&#x00027; differential patterns of documenting Peds B-SATED keywords and global dimensions that may account for the under-detection of pediatric sleep problems among racial/ethnic subpopulations. In research, these findings have important implications for the equitable clinical documentation of sleep problems in pediatric primary care settings and extend prior retrospective work in pediatric sleep specialty settings.</p></sec></abstract>
<kwd-group>
<kwd>sleep</kwd>
<kwd>equity</kwd>
<kwd>public health</kwd>
<kwd>primary care</kwd>
<kwd>informatics</kwd>
<kwd>machine learning</kwd>
<kwd>population health</kwd>
</kwd-group>
<counts>
<fig-count count="3"/>
<table-count count="2"/>
<equation-count count="0"/>
<ref-count count="42"/>
<page-count count="10"/>
<word-count count="7137"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Insomnia</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Racial/ethnic disparities are well documented and persistent in pediatric sleep at the population level (Billings et al., <xref ref-type="bibr" rid="B3">2021</xref>; Meltzer et al., <xref ref-type="bibr" rid="B24">2021</xref>; Reynolds et al., <xref ref-type="bibr" rid="B32">2023</xref>). However, these disparities are often preventable and linked to social determinants of health at the individual, family, healthcare, and broader community/societal levels (Billings et al., <xref ref-type="bibr" rid="B3">2021</xref>; Fanta et al., <xref ref-type="bibr" rid="B8">2021</xref>; Yip et al., <xref ref-type="bibr" rid="B42">2022</xref>; Clarkson-Townsend et al., <xref ref-type="bibr" rid="B7">2023</xref>; Gueye-Ndiaye et al., <xref ref-type="bibr" rid="B12">2023</xref>). Pediatric primary care is ideal for preventing pediatric sleep disparities at the population level, yet providers in this setting typically lack the time and resources necessary to identify sleep problems (Honaker and Saunders, <xref ref-type="bibr" rid="B18">2018</xref>; Mosher and Piccinini-Vallis, <xref ref-type="bibr" rid="B28">2022</xref>; Williamson et al., <xref ref-type="bibr" rid="B39">2022</xref>; Golden et al., <xref ref-type="bibr" rid="B10">2023</xref>). Efficient machine learning and clinical decision support tools embedded in the pediatric primary care electronic health record (EHR) are needed to support universal screening of pediatric sleep problems at the population level (Anan et al., <xref ref-type="bibr" rid="B1">2023</xref>). In addition, EHR-embedded machine learning tools for data collection are essential to include patient-self report and aid providers with limited personnel and time constraints in pediatric primary care (Honaker et al., <xref ref-type="bibr" rid="B19">2019</xref>; Huffstetler et al., <xref ref-type="bibr" rid="B21">2022</xref>; Willis et al., <xref ref-type="bibr" rid="B40">2022</xref>). Therefore, EHR-embedded machine learning tools have the potential to innovatively address modifiable pediatric sleep care gaps (Kang et al., <xref ref-type="bibr" rid="B22">2021</xref>; Ramgopal et al., <xref ref-type="bibr" rid="B31">2023</xref>).</p>
<p>Despite their promise, EHR-embedded machine learning tools also have the capability of worsening racial/ethnic disparities due to inherently biased healthcare data sources used for training machine learning models (Chen et al., <xref ref-type="bibr" rid="B6">2023</xref>). In the context of the EHR, training data commonly used for machine learning in pediatric sleep may be inherently biased for two reasons: historical biases and informed presence (Hamilton et al., <xref ref-type="bibr" rid="B13">2021</xref>). Historical biases include an overrepresentation of non-Hispanic white patients in epidemiologic pediatric sleep cohorts, which have commonly leveraged patients with confirmed sleep diagnoses (Meltzer et al., <xref ref-type="bibr" rid="B23">2010</xref>; Honaker and Meltzer, <xref ref-type="bibr" rid="B17">2016</xref>). Informed presence occurs when patients navigate the healthcare system but experience barriers that yield variable interactions across racial/ethnic subpopulations (Phelan et al., <xref ref-type="bibr" rid="B29">2017</xref>). Informed presence is critical to account for when developing EHR-embedded machine learning tools because it can have downstream effects on how racial/ethnic subpopulations are classified, measured, and/or represented in healthcare data sources (Phelan et al., <xref ref-type="bibr" rid="B29">2017</xref>). Therefore, researchers should assess for such biases (e.g., historical or induced by patients&#x00027; healthcare navigation) that may be inherent in healthcare data sources prior to training and developing models for automated solutions (Huang et al., <xref ref-type="bibr" rid="B20">2022</xref>).</p>
<p>As posited in the Peds B-SATED framework by Meltzer et al. (<xref ref-type="bibr" rid="B24">2021</xref>), pediatric sleep problems can be multidimensional and include unhealthy sleep behaviors (B), poor sleep satisfaction (S), difficulty with alertness during waking hours (A), inappropriate sleep timing (T), low sleep efficiency (E), and inadequate sleep durations for age (D). A reliance on sleep diagnoses and polysomnography data limits epidemiologic and population pediatric sleep research by failing to capture all the subclinical characteristics described in the Peds B-SATED framework (Yang et al., <xref ref-type="bibr" rid="B41">2023</xref>). Leveraging clinical note data from the EHR is a way to improve our identification of Peds B-SATED in primary care settings using EHR-embedded machine learning tools. Natural language processing (NLP), a machine learning model for understanding language and contextualized nuances in EHR free-text clinical notes, is an innovative and available approach for capturing Peds B-SATED framework subclinical characteristics (Gianfrancesco and Goldstein, <xref ref-type="bibr" rid="B9">2021</xref>; Rahman et al., <xref ref-type="bibr" rid="B30">2022</xref>). However, this data source and approach are not exempt from being inherently biased due to the reliance on clinical notes, telephone notes, patient&#x02013;provider messages, and other text-based fields that are shaped by informed presence in healthcare systems (Rozier et al., <xref ref-type="bibr" rid="B33">2022</xref>; Walk et al., <xref ref-type="bibr" rid="B38">2022</xref>). Recent NLP work used to audit clinical notes has found that the language used to describe providers&#x00027; recognition of patients&#x00027; reported characteristics (e.g., sociodemographic and clinical) varies by race/ethnicity (Thompson et al., <xref ref-type="bibr" rid="B37">2021</xref>; Sun et al., <xref ref-type="bibr" rid="B36">2022</xref>). Therefore, it is important to assess how Peds B-SATED keywords and global dimensions are documented and captured across racial/ethnic subpopulations in NLP training data sources prior to model training and using these methods to develop EHR-embedded machine learning tools.</p>
<p>To address this knowledge gap, utilizing clinical notes from patients seen in pediatric primary care, the current study included two objectives: (1) A least absolute shrinkage and selection operator (LASSO)&#x02013;normalized logistic regression model was used to investigate whether there were racial/ethnic differences in documentation of Peds B-SATED keywords. (2) A mixed-effects logistic regression was used to determine whether the odds for the presence of global Peds B-SATED dimensions also differed across racial/ethnic subpopulations. We hypothesized that racial/ethnic differences in the documention of keywords and global dimensions would be observed by LASSO regression and multilevel modeling approaches.</p></sec>
<sec sec-type="methods" id="s2">
<title>2 Methods</title>
<sec>
<title>2.1 Participants</title>
<p>We conducted a cross-sectional cohort study of 44,244 patients, 5 to 18 years old, seen in a pediatric primary care network at a large academic medical center. We excluded infants and early childhood youth due to developmental reliance on caregivers to support sleep, which would require an extensive and separate set of pediatric behavioral sleep medicine keywords and phrases. The protocol was approved by the institutional review board at Nationwide Children&#x00027;s Hospital.</p></sec>
<sec>
<title>2.2 Data source and procedures</title>
<sec>
<title>2.2.1 DeepSuggest clinical note search engine</title>
<p>DeepSuggest is an internally developed and validated clinical note search engine at Nationwide Children&#x00027;s Hospital (Moosavinasab et al., <xref ref-type="bibr" rid="B27">2021</xref>). DeepSuggest queries clinical notes by a set of keywords and filters by note type, provider type, department specialty, date range, age range, and patient information such as name, date of birth, and medical record number. In addition to facilitating a search of EHR-based clinical notes, DeepSuggest expands query terms by recommending related or similar search keywords based on the similarity of keyword Word2Vec embeddings calculated across all notes in the repository on the backend (Mikolov et al., <xref ref-type="bibr" rid="B26">2013</xref>). During this process, duplicate notes are not included.</p>
<p>We utilized DeepSuggest to expand our initial keywords and retrieve clinical notes that contained at least one Peds B-SATED keyword or phrase. For vocabulary expansion, we entered our initial set of Peds B-SATED keywords into DeepSuggest, and it determined recommended keywords based on their relevance. This vocabulary expansion increased our keywords by including those with common misspellings (e.g., &#x0201C;insomia&#x0201D;), inconsistent punctuation (e.g., &#x0201C;sleepwalking&#x0201D; vs. &#x0201C;sleep-walking&#x0201D;), abbreviations, and synonyms (e.g., &#x0201C;difficulty staying awake during the day&#x0201D; vs. &#x0201C;sleepy during the day&#x0201D;).</p></sec>
<sec>
<title>2.2.2 Predefined keyword features set approach</title>
<p>We desired to cluster clinical notes into groupings based on their presence of global Peds B-SATED dimensions, so we applied a predefined keyword features set approach. This is an NLP approach that uses the presence of each of the keywords as a representation of the clinical note, rather than a predictive NLP model. To convert these Peds B-SATED keyword occurrences into a numerical representation, we searched each clinical note for an occurrence of each of the 178 keywords, using case-invariant matching, and phrases, using regular expressions. If a keyword or phrase was found, we would mark the keyword&#x00027;s corresponding index in a 178-dimensional vector with a 1; if no occurrence of that keyword was found, its value would be 0. At the end of this process, each note had a corresponding 178-dimensional binary keyword vector. In the end, using a predefined keyword features set containing 178 Peds B-SATED keywords, we queried all the clinical notes for patients between the ages of 5 and 18 from January 2018 to December 2021.</p></sec></sec>
<sec>
<title>2.3 Analyses</title>
<sec>
<title>2.3.1 LASSO regression model</title>
<p>To investigate whether there were racial/ethnic differences in the documentation of Peds B-SATED keywords, we fit a LASSO regression model predicting patients&#x00027; race/ethnicity using the occurrence of Peds B-SATED keywords as the input feature. This model assessed the Peds B-SATED keywords used in a patient&#x00027;s clinical note(s) to detect if they differed across race/ethnicity subpopulations in our cohort.</p>
<p>For this LASSO regression model, we performed two analytic steps. The first step was focused on the overall predictiveness of the Peds B-SATED keywords. We trained a LASSO regression model using 10-fold cross-validation. This included using the combined holdout subsets of data from each fold to evaluate the model&#x00027;s overall performance parameters: area under the receiver operating characteristic curve (AUC ROC), precision, recall, accuracy, and F1-scores. To ensure that our results were rigorous and meaningful, we repeated our 10-fold cross-validation training 1,000 times using bootstrapping to randomly resample the data in the training folds at each step, stratified by label (ensuring the racial/ethnic proportion of the training data remains constant). This analysis investigated the predictive capability of the Peds B-SATED keywords.</p>
<p>The second step was focused on identifying which Peds B-SATED keywords were most influential for predicting each racial/ethnic patient subpopulation. In this step, we trained another LASSO regression model using all of the data simultaneously. By examining the coefficients of each Peds B-SATED keyword, we reported the 10 most common keywords used for each racial/ethnic subpopulation. This step also included predicting patients&#x00027; race/ethnicity from the Peds B-SATED keywords in their clinical notes. To do this, we collapsed patient race/ethnicity into six categories: non-Hispanic Black, Hispanic/Latino, non-Hispanic white, non-Hispanic Asian, non-Hispanic Multiracial (more than two races), and Other Race/Ethnicity (including non-Hispanic Native Hawaiian or Other Pacific Islander, non-Hispanic American Indian or Alaska Native, and no information given/unknown/refuses to answer). Race is collected by patient report at the time of registration and entered into Epic by registration staff. Registration staff are trained to directly ask the caregiver/patient to select the race/ethnicity category they most identify with and have the option to not report this information. For predicting patients&#x00027; race/ethnicity, we aggregated each patient&#x00027;s Peds B-SATED keywords vector for all of their clinical notes and then used that in the LASSO regression model as the input features to predict patients&#x00027; race/ethnicity category.</p></sec>
<sec>
<title>2.3.2 Mixed-effects logistic regression</title>
<p>Descriptive analyses were used to count the incidence of patients that had global Peds B-SATED dimensions present. The percentages of patients with global Peds B-SATED dimensions are reported. Mixed-effects logistic regression models (e.g., clinical notes nested within patients, with a random effect for patients) were used to predict whether the presence of global Peds B-SATED dimensions differed by patients&#x00027; race/ethnicity. Statistical models were adjusted for covariates at both the note and patient levels. Note-level covariates included clinical note author type (nurse practitioner, physician, integrated pediatric psychologists, social workers, and others/trainees) and note date pandemic status was dichotomized (notes prior to 1 March 2020; notes after 1 March 2020). Patient-level covariates included the following sociodemographic characteristics: biological sex (female vs. male) and age. Logistic mixed-effects models were fitted using the glmer function from the lme4 package, the performance of the fitted models was compared, and the best model was selected using Akaike&#x00027;s information criterion and Bayesian information criterion. A <italic>p</italic>-value of &#x0003C;0.05 was considered statistically significant. Statistical analyses were performed using R version 4.2.2 and R Studio software (Bates et al., <xref ref-type="bibr" rid="B2">2015</xref>).</p></sec></sec></sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec>
<title>3.1 Patient cohort characteristics</title>
<p>Our sample consisted of 44,244 patients with 111,078 clinical notes. Of the patients, 51.4% were classified as non-Hispanic Black, 24.6% as non-Hispanic white, 8.1% as non-Hispanic Multiracial, 4.6% as non-Hispanic Asian American, 11.4% as non-Hispanic Other Race/Ethnicity, and15.6% as Hispanic or Latino. The mean age was 11.12 (<italic>SD</italic> = 3.78), and 49.1% were classified as female.</p></sec>
<sec>
<title>3.2 Racial/ethnic differences in documentation of Peds B-SATED keywords</title>
<p><xref ref-type="fig" rid="F1">Figure 1</xref> shows a comparison&#x02013;confusion matrix table. A confusion matrix is a table that is used to display the predictive ability of the LASSO regression model used in this study. Ideally, in this particular case, the predictive ability should be at 0 and not show the model&#x00027;s capability of predicting patients&#x00027; race/ethnicity from Peds B-SATED keywords. However, this reveals that race/ethnicity across patient subpopulations in our cohort could be predicted by primary care providers&#x00027; use of keywords in clinical notes and that this difference was most observable among non-Hispanic Black and non-Hispanic white patient subpopulations.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>The presented confusion matrix in this figure from the least absolute shrinkage and selection operator regression model revealed that providers&#x00027; use of Peds B-SATED keywords in clinical notes predicted patients&#x00027; race/ethnicity. The color spectrum on the right, ranging from 0 to 1,600, displays a near-zero predictive ability in purple and a high predictive ability in yellow. Ideally, the matrix would display that all predicted labels, race/ethnicity, were purple and near 0. NH, non-Hispanic; Multi, Multiracial.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frsle-03-1271167-g0001.tif"/>
</fig>
<p>A model with random classification ability will have an AUC ROC of 0.5, and a perfect model (which can separate everything without error 50% of the time) will have a value of 1.0. Our average AUC ROC score is 0.72. <xref ref-type="fig" rid="F2">Figure 2</xref> shows in no case was the AUC ROC score at or below 0.5, indicating that this LASSO regression model with Peds B-SATED keywords was always able to learn to discriminate patients by race/ethnicity to some degree, which is an indication of racial/ethnic differences in primary care providers&#x00027; documentation of Peds B-SATED keywords. <xref ref-type="fig" rid="F3">Figure 3</xref> shows that non-Hispanic Black patients had the highest overall model F1-score (mean = 0.56) and model recall (mean = 0.68) scores. In addition, non-Hispanic Black patients were similar to non-Hispanic white patients in terms of model precision.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Predict patient race: bootstrap (1,000 reps) 10-fold cross-validation. The Figure displays the distribution of the area under the receiver operating characteristic curve (AUC ROC) for 1,000 bootstrapped iterations of 10-fold cross-validation predicting patient race. AUC ROC is a metric that reflects how well a model can discriminate between categories. A random model will have an AUC ROC score of 0.5, and a perfect model (can separate everything without error) will have a value of 1.0. Our average AUC ROC score is 0.6194, with a standard distribution of 0.0151. In no case was the AUC ROC score at or below 0.5, indicating that the model is always able to discriminate patients&#x00027; race/ethnicity using the Peds B-SATED keywords from primary care providers&#x00027; clinical notes.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frsle-03-1271167-g0002.tif"/>
</fig>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Distribution of scores for 10-fold cross-validation on 1,000 rep bootstrap. The figure displays the specific performance of the least absolute shrinkage and selection operator regression model prediction of racial/ethnic subpopulations across precision, recall, F1-score (the harmonic mean of precision and recall), and the model&#x00027;s overall accuracy. NH, non-Hispanic; Multi, Multiracial.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="frsle-03-1271167-g0003.tif"/>
</fig>
<p>To more specifically demonstrate how providers&#x00027; documentation of specific Peds B-SATED keywords differed for racial/ethnic subpopulations, <xref ref-type="table" rid="T1">Table 1</xref> shows the top 10 Peds B-SATED keywords for each racial/ethnic subpopulation that were documented by pediatric primary care providers, including the coefficient ranking value. The most common Peds B-SATED keyword rankings qualitatively differed across racial/ethnic subpopulations. Most interestingly, non-Hispanic white patients had distinguished Peds B-SATED keywords that may lead to follow-up support and/or a referral to specialty care to address pediatric sleep problems. However, minoritized racial/ethnic subpopulations commonly had Peds B-SATED keywords that encompassed broader descriptions and relevant daytime sequelae of pediatric sleep problems.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Top 10 Peds B-SATED keywords based on highest ranking coefficients.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919497;color:#ffffff">
<th valign="top" align="left"><bold>Rank</bold></th>
<th valign="top" align="left"><bold>Hispanic</bold></th>
<th valign="top" align="left"><bold>NH white</bold></th>
<th valign="top" align="left"><bold>NH Black</bold></th>
<th valign="top" align="left"><bold>NH Asian</bold></th>
<th valign="top" align="left"><bold>NH Multi</bold></th>
<th valign="top" align="left"><bold>NH Other</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Limb movements (1.112)</td>
<td valign="top" align="left">Drink caffeine (1.727)</td>
<td valign="top" align="left">Bedtime schedule (0.943)</td>
<td valign="top" align="left">Poor focus (0.983)</td>
<td valign="top" align="left">Difficulty getting to sleep (0.825)</td>
<td valign="top" align="left">Sleeping difficulties (2.055)</td>
</tr> <tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Sleep difficulties (1.080)</td>
<td valign="top" align="left">Anxiety at night (1.501)</td>
<td valign="top" align="left">Problems with sleeping (0.891)</td>
<td valign="top" align="left">Up to (num) hours (0.901)</td>
<td valign="top" align="left">Disruptive behavior (0.560)</td>
<td valign="top" align="left">Up at night (1.754)</td>
</tr> <tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Less than (num) hours (0.788)</td>
<td valign="top" align="left">Limb movements (1.130)</td>
<td valign="top" align="left">Difficulties Sleep (0.720)</td>
<td valign="top" align="left">Inadequate sleep (0.794)</td>
<td valign="top" align="left">Sleeping during the day (0.512)</td>
<td valign="top" align="left">Cannot sleep (1.189)</td>
</tr> <tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">Sleep (num) hours (0.742)</td>
<td valign="top" align="left">Night waking (0.912)</td>
<td valign="top" align="left">Bedwetting (0.563)</td>
<td valign="top" align="left">Hypersomnia (0.779)</td>
<td valign="top" align="left">Melatonin (0.492)</td>
<td valign="top" align="left">Sleep habits (0.621)</td>
</tr> <tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">Misbehaves (0.659)</td>
<td valign="top" align="left">Impaired sleep (0.809)</td>
<td valign="top" align="left">Hypersomnolence (0.490)</td>
<td valign="top" align="left">Difficulties sleeping (0.709)</td>
<td valign="top" align="left">Sleep disturbances (0.402)</td>
<td valign="top" align="left">Trouble falling asleep (0.537)</td>
</tr> <tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left">Poor sleep pattern (0.634)</td>
<td valign="top" align="left">Parasomnia (0.721)</td>
<td valign="top" align="left">Up at night (0.466)</td>
<td valign="top" align="left">Sleepy (0.751)</td>
<td valign="top" align="left">Early morning awakening (0.381)</td>
<td valign="top" align="left">Difficulty sleeping (0.491)</td>
</tr> <tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left">Cannot sleep (0.590)</td>
<td valign="top" align="left">Trazodone (0.649)</td>
<td valign="top" align="left">Conduct problems (0.425)</td>
<td valign="top" align="left">Sleeping difficulty (0.709)</td>
<td valign="top" align="left">Napping (0.362)</td>
<td valign="top" align="left">Sleep disturbance (0.352)</td>
</tr> <tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left">Restless sleep (0.569)</td>
<td valign="top" align="left">Day-time sleepiness (0.594)</td>
<td valign="top" align="left">Often awake (0.393)</td>
<td valign="top" align="left">Less than (num) hours (0.585)</td>
<td valign="top" align="left">Disturbance in sleep (0.358)</td>
<td valign="top" align="left">Naps (0.273)</td>
</tr> <tr>
<td valign="top" align="left">9</td>
<td valign="top" align="left">Grouchy (0.545)</td>
<td valign="top" align="left">Melatonin (0.510)</td>
<td valign="top" align="left">Inattention (0.391)</td>
<td valign="top" align="left">Interrupted sleep (0.576)</td>
<td valign="top" align="left">Active (0.316)</td>
<td valign="top" align="left">Difficulty focusing (0.266)</td>
</tr> <tr>
<td valign="top" align="left">10</td>
<td valign="top" align="left">Go to bed (0.540)</td>
<td valign="top" align="left">Clonidine (0.508)</td>
<td valign="top" align="left">Naps (0.382)</td>
<td valign="top" align="left">Difficulty falling asleep (0.549)</td>
<td valign="top" align="left">Inattention (0.312)</td>
<td valign="top" align="left">Wakes up at (0.215)</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>NH, non-Hispanic; Multi, Multiracial.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title>3.3 Racial/ethnic differences in global Peds B-SATED dimensions</title>
<p>Once we unearthed the differences in primary care providers&#x00027; documentation of Peds B-SATED keywords across racial/ethnic subpopulations using LASSO regression, we sought to understand the odds of patients having the presence of global Peds B-SATED dimensions in their clinical notes. Of patients, 16.2% had one or more sleep behavior keywords present in clinical notes, 37.0% had one or more sleep satisfaction/quality keywords present, 25.2% had one or more alertness/daytime sleepiness keywords present, 13.5% had one or more sleep timing keywords present, 34.4% had one or more sleep efficiency keywords present, and only 2.1% had one or more sleep duration keywords present. <xref ref-type="table" rid="T2">Table 2</xref> shows the racial/ethnic differences in global Peds B-SATED dimensions.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Mixed-effects logistic regression models predicting racial/Ethnic differences in global Peds B-SATED dimensions.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919497;color:#ffffff">
<th/>
<th valign="top" align="center"><bold>Behaviors</bold></th>
<th valign="top" align="center"><bold>Satisfaction</bold></th>
<th valign="top" align="center"><bold>Alertness</bold></th>
<th valign="top" align="center"><bold>Timing</bold></th>
<th valign="top" align="center"><bold>Efficiency</bold></th>
<th valign="top" align="center"><bold>Duration</bold></th>
</tr>
</thead>
<tbody>
<tr style="background-color:#919497;color:#ffffff">
<td/>
<td valign="top" align="center"><bold>aOR</bold></td>
<td valign="top" align="center"><bold>aOR</bold></td>
<td valign="top" align="center"><bold>aOR</bold></td>
<td valign="top" align="center"><bold>aOR</bold></td>
<td valign="top" align="center"><bold>aOR</bold></td>
<td valign="top" align="center"><bold>aOR</bold></td>
</tr> <tr>
<td valign="top" align="left">NH Black</td>
<td valign="top" align="center">1.02</td>
<td valign="top" align="center">0.59<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">1.16<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">0.89</td>
<td valign="top" align="center">0.81<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">0.51</td>
</tr> <tr>
<td valign="top" align="left">Hispanic</td>
<td valign="top" align="center">0.94</td>
<td valign="top" align="center">0.52<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">0.83<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">1.88<sup>&#x0002A;</sup></td>
<td valign="top" align="center">0.76<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">2.46</td>
</tr> <tr>
<td valign="top" align="left">NH multiracial</td>
<td valign="top" align="center">1.06</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.53</td>
<td valign="top" align="center">1.01</td>
<td valign="top" align="center">0.30</td>
</tr> <tr>
<td valign="top" align="left">NH Asian</td>
<td valign="top" align="center">0.68<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">0.49<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">1.04</td>
<td valign="top" align="center">1.99</td>
<td valign="top" align="center">0.62<sup>&#x0002A;&#x0002A;</sup></td>
<td valign="top" align="center">0.31</td>
</tr> <tr>
<td valign="top" align="left">NH other</td>
<td valign="top" align="center">1.22</td>
<td valign="top" align="center">0.75<sup>&#x0002A;</sup></td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">3.24<sup>&#x0002A;</sup></td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">2.36</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Reference = NH white; aOR, adjusted odds ratio; NH, non-Hispanic. Note-level covariates include provider author type and pandemic status. Patient-level covariates include biological sex and age. <sup>&#x0002A;</sup>p &#x0003C; 0.05. <sup>&#x0002A;&#x0002A;</sup>p &#x0003C; 0.01.</p>
</table-wrap-foot>
</table-wrap>
<p>In adjusted models, non-Hispanic Black patients had a lower adjusted odds ratio (aOR = 0.59; 95% CI [0.55, 0.63]; <italic>p</italic> = 0.00) of having the presence of one or more sleep satisfaction keywords, a higher adjusted odds ratio (aOR = 1.16; 95% CI [1.09, 1.24]; <italic>p</italic> = 0.00) of having the presence of one or more alertness/daytime sleepiness keywords, and a lower adjusted odds ratio (aOR = 0.81; 95% CI [0.77, 0.86]; <italic>p</italic> = 0.00) of having the presence of sleep efficiency keywords, compared to non-Hispanic white patients.</p>
<p>In adjusted models, Hispanic patients had a lower adjusted odds ratio (aOR = 0.52; 95% CI [0.48&#x02013;0.57]; <italic>p</italic> = 0.00) of having the presence of one or more sleep satisfaction keywords, a lower adjusted odds ratio (aOR = 0.83; 95% CI [0.77, 0.90]; <italic>p</italic> = 0.00) of having the presence of one or more alertness/daytime sleepiness keywords, a higher adjusted odds ratio (aOR = 1.88; 95% CI [1.87, 2.99]; <italic>p</italic> = 0.01) of having the presence of sleep timing keywords, and a lower adjusted odds ratio (aOR = 0.76; 95% CI [0.70, 0.82]; <italic>p</italic> = 0.00) of having the presence of sleep efficiency keywords, compared to non-Hispanic white patients.</p>
<p>In adjusted models, non-Hispanic Multiracial patients had a lower adjusted odds ratio (aOR = 0.62; 95% CI [0.54, 0.71]; <italic>p</italic> = 0.00) of having the presence of sleep efficiency keywords, compared to non-Hispanic white patients.</p>
<p>In adjusted models, non-Hispanic Asian patients had a lower adjusted odds ratio (aOR = 0.68; 95% CI [0.56, 0.83]; <italic>p</italic> = 0.00) of having the presence of one or more sleep behavior keywords, a lower adjusted odds ratio (aOR = 0.49; 95% CI [0.42, 0.58]; <italic>p</italic> = 0.00) of having the presence of one or more sleep satisfaction keywords, and a lower adjusted odds ratio (aOR = 0.62; 95% CI [0.54, 0.71]; <italic>p</italic> = 0.00) of having the presence of sleep efficiency keywords, compared to non-Hispanic white patients.</p>
<p>In adjusted models, non-Hispanic Other Race/Ethnicity patients had a lower adjusted odds ratio (aOR = 0.75; 95% CI [0.60, 0.93]; <italic>p</italic> = 0.01) of having the presence of one or more sleep satisfaction keywords and a higher adjusted odds ratio (aOR = 3.24; 95% CI [1.13, 9.27]; <italic>p</italic> = 0.03) of having the presence of sleep timing keywords, compared to non-Hispanic white patients.</p>
<p>During the Covid-19 pandemic, patients had a lower adjusted odds ratio (aOR = 0.86; 95% CI [0.83, 0.90]; <italic>p</italic> = 0.00) of having the presence of alertness keywords, a higher odds ratio of having the presence of satisfaction keywords (aOR = 1.23; 95% CI [1.18, 1.29]; <italic>p</italic> = 0.00), a lower adjusted odds ratio (aOR = 0.79; 95% CI [0.70, 0.88]; <italic>p</italic> = 0.00) of having the presence of timing keywords, a higher odds ratio of having the presence of efficiency keywords (aOR = 1.23; 95% CI [1.18, 1.29]; <italic>p</italic> = 0.00), a lower adjusted odds ratio (aOR = 0.48; 95% CI [0.34, 0.68]; <italic>p</italic> = 0.00) of having the presence of duration keywords, and a higher odds ratio of having the presence of behavior keywords (aOR = 1.37; 95% CI [1.30, 1.45]; <italic>p</italic> = 0.00), compared to prior to the onset of pandemic.</p></sec></sec>
<sec sec-type="discussion" id="s4">
<title>4 Discussion</title>
<sec>
<title>4.1 Summary of findings</title>
<p>To our knowledge, this is the first study to assess racial/ethnic differences in NLP training data prior to model training. This study describes LASSO and multilevel modeling approaches that were used to identify these differences in pediatric primary care providers&#x00027; documentation of Peds B-SATED keywords and global dimensions across racial/ethnic subpopulations. In addition, the most frequently documented Peds B-SATED keyword rankings qualitatively differed across racial/ethnic subpopulations. These racial/ethnic differences of documented Peds B-SATED keywords and global dimensions both extend and align with previous studies of providers&#x00027; differential documentation of pediatric sleep problems in pediatric primary care settings (Honaker et al., <xref ref-type="bibr" rid="B15">2018</xref>; Carson et al., <xref ref-type="bibr" rid="B5">2023</xref>). Our findings have important implications for future practices in equitably documenting pediatric sleep problems across racial/ethnic subpopulations, which shape how patients experience the clinical workflow from identification in primary care to referral sleep specialty settings in pediatric healthcare institutions.</p>
<p>In addition to our main findings, we found that by using the Peds B-SATED keywords documented in primary care providers&#x00027; clinical notes, patients&#x00027; race/ethnicity could be predicted. In addition, the most frequently documented Peds B-SATED keyword rankings qualitatively differed across racial/ethnic subpopulations. These findings indicated that non-Hispanic white patients had keywords that typically noted pediatric sleep problems with more specificity, particularly those that require specialized intervention (Honaker and Saunders, <xref ref-type="bibr" rid="B18">2018</xref>). Yet, among racial/ethnic minoritized patients, keywords commonly included broad descriptions and relevant daytime sequelae of a pediatric sleep problem (e.g., daytime behavior problems, napping, inattentiveness, and irritability). Although these daytime sequelae are necessary for understanding phenotypes of pediatric sleep problems, primary care providers may not have the training to interpret these subclinical characteristics as clinically meaningful proxies for an underlying sleep problem (Golden et al., <xref ref-type="bibr" rid="B10">2023</xref>; Yang et al., <xref ref-type="bibr" rid="B41">2023</xref>). Therefore, it may be a source of concern that these broad descriptions and daytime sequelae are ranked in the top 10 documented keywords for primarily racially/ethnically minoritized patient subpopulations. Future clinical research is needed to further investigate patient&#x02013;provider factors that may influence these differences in documentation observed across racial/ethnic subpopulations (e.g., patient perception of sleep problems and provider screening practices).</p>
<p>We found that the top three global Peds B-SATED dimensions included satisfaction, efficiency, and alertness/daytime sleepiness (e.g., present in &#x0007E;25%&#x02212;37% of patient clinical notes). Although the most commonly monitored in past epidemiologic and population sleep health research, the current study found that only &#x0007E;2% of patients had the presence of keywords/phrases falling in the sleep duration dimension. We also found that the odds of certain global Peds B-SATED dimensions were both lower and higher in racial/ethnic minoritized subpopulations of patients, compared with non-Hispanic white patients. Thus, efforts to prevent racial/ethnic inequities and manage pediatric sleep problems in primary care require routine and multidimensional screening protocols (Meltzer et al., <xref ref-type="bibr" rid="B24">2021</xref>). In addition, the results highlight that it is also important for future work to consider the heterogeneity in patients&#x00027; susceptibility (e.g., social and environmental determinants; co-occurring health problems) that can influence their perception of, providers&#x00027; identification of, pediatric sleep problems (Rubens et al., <xref ref-type="bibr" rid="B34">2016</xref>; Billings et al., <xref ref-type="bibr" rid="B3">2021</xref>; Reynolds et al., <xref ref-type="bibr" rid="B32">2023</xref>).</p></sec>
<sec>
<title>4.2 Ethical implications</title>
<p>The goal of our research was to apply novel and efficient approaches to support the assessment of racial/ethnic differences in NLP training data (e.g., keywords). We approached this study from a &#x0201C;discovery&#x0201D; perspective in our methods, but this study aligns with the future directions for ethical machine learning in pediatric healthcare settings previously outlined in the literature (Huang et al., <xref ref-type="bibr" rid="B20">2022</xref>; Chen et al., <xref ref-type="bibr" rid="B6">2023</xref>). NLP allows pediatric sleep researchers to expand their reach beyond diagnoses and polysomnography data commonly utilized in past healthcare research (Ramgopal et al., <xref ref-type="bibr" rid="B31">2023</xref>). However, these more recent advances that leverage NLP-extracted data are not exempt from racial/ethnic bias that is inherently shaped by historical bias and informed presence (Phelan et al., <xref ref-type="bibr" rid="B29">2017</xref>; Boch et al., <xref ref-type="bibr" rid="B4">2022</xref>). This highlights the important prioritization of innovation and machine learning ethics in pediatric sleep research at the population level (Mhasawade et al., <xref ref-type="bibr" rid="B25">2021</xref>). Leading scholars have recently outlined key ethical and equity-centered processes to consider when using machine learning in healthcare settings (Boch et al., <xref ref-type="bibr" rid="B4">2022</xref>; Walk et al., <xref ref-type="bibr" rid="B38">2022</xref>; Chen et al., <xref ref-type="bibr" rid="B6">2023</xref>). They deemed the process of identifying and addressing biased patterns in data collection, imbalanced or skewed datasets, to be an important step called <italic>preprocessing</italic> (Huang et al., <xref ref-type="bibr" rid="B20">2022</xref>). This preprocessing process typically occurs prior to model training and deployment to prevent biased machine learning models (Huang et al., <xref ref-type="bibr" rid="B20">2022</xref>). Ethical machine learning approaches such as preprocessing are a necessity, not an optional step, to confirm and address any underlying bias in training data sources. This step is particularly important when processing NLP-derived data from clinical documentation and starts at the data collection phase (e.g., clinical protocols for assessment and documentation).</p></sec>
<sec>
<title>4.3 Clinical research implications</title>
<p>Our analyses identified potential differences in clinical documentation of pediatric sleep problems that necessitate future research that examines how primary care shapes the clinical sleep outcomes of racial/ethnic subpopulations. Clinically, these observed differences in our cohort may highlight the importance of clinicians routinely asking about sleep problems in a multidimensional way, which may be a potential pathway for improving the equitable identification of racially/ethnically minoritized patients in pediatric primary care settings (Meltzer et al., <xref ref-type="bibr" rid="B24">2021</xref>). In this pediatric primary care cohort study, we identified differential documentation of Peds B-SATED keywords and global dimensions across racial/ethnic subpopulations, but also a very low presence of various global dimensions in clinical notes overall. For example, the highest prevalence was the presence of the satisfaction dimension at &#x0007E;37%, while the lowest was the duration dimension at &#x0007E;2%. These findings allude to the importance of clinical machine learning to rapidly increase the efficiency of enhancing patients&#x00027; sleep health literacy and education, patient-driven data collection, and the development of clinical decision support tools to aid pediatric primary care providers (Harada et al., <xref ref-type="bibr" rid="B14">2021</xref>; Kang et al., <xref ref-type="bibr" rid="B22">2021</xref>; Ramgopal et al., <xref ref-type="bibr" rid="B31">2023</xref>). Theoretically, efficient clinical workflows for identifying such patients in pediatric primary care can aid research efforts for universal screening of pediatric sleep problems at the population level (Goldstein et al., <xref ref-type="bibr" rid="B11">2020</xref>). Improving the quality and increasing the vastness of data collected in pediatric primary settings can also determine influential social determinants of pediatric sleep problems (Huffstetler et al., <xref ref-type="bibr" rid="B21">2022</xref>). With this level of population-level surveillance in reach using both patient self-report and actigraphy, researchers will be able to develop EHR-embedded machine learning tools for primary care providers to recognize predictors and profiles for distinct pediatric sleep phenotypes (Willis et al., <xref ref-type="bibr" rid="B40">2022</xref>). In the age of precision health, these enhanced clinical workflows and tools are key for improving our ability to equitably reach racial/ethnic subpopulations that may benefit the most from targeted and tailored interventions (Seixas et al., <xref ref-type="bibr" rid="B35">2020</xref>; Honaker et al., <xref ref-type="bibr" rid="B16">2022</xref>).</p></sec>
<sec>
<title>4.4 Limitations</title>
<p>Pediatric sleep problems are multidimensional, but the variation of sleep health definitions and phenotypes among race/ethnicity subpopulations remains limited in pediatrics. Therefore, future work is needed to determine whether the differential patterns observed in providers&#x00027; documentation of sleep problems are related to unique differences in symptom presentation or literacy across racial/ethnic subpopulations. More specifically, future research should examine whether variation is due to patient self-report or caregiver report, differing community beliefs and literacy about sleep as a health experience, and/or the social and environmental determinants that are potential drivers of existing pediatric sleep disparities (Reynolds et al., <xref ref-type="bibr" rid="B32">2023</xref>; Yang et al., <xref ref-type="bibr" rid="B41">2023</xref>). Using a cross-sectional retrospective cohort design and EHR data, this study is not capable of determining whether provider&#x02013;patient interactions, implicit cognitive bias of providers or patients, or informed presence influences the ways Peds B-SATED keywords or global dimensions are captured or missed in primary care providers&#x00027; documentation (Phelan et al., <xref ref-type="bibr" rid="B29">2017</xref>). However, these findings do raise some concerns about differences in Peds B-SATED keyword rankings across racial/ethnic subpopulations. The way a provider documents sleep problems influences how patients&#x00027; future providers monitor and treat their pediatric sleep problems as clinically meaningful (Honaker and Saunders, <xref ref-type="bibr" rid="B18">2018</xref>). Consequently, varied provider documentation may yield differential care outcomes across racial/ethnic subpopulations of patients. Future studies and replication (e.g., other time periods, clinic settings, and multiple institutions) are needed to understand the factors that cause this differential documentation pattern that we observed across racial/ethnic subpopulations in our pediatric primary care cohort. In addition, the current study included a primarily non-Hispanic Black patient population.</p></sec></sec>
<sec sec-type="conclusions" id="s5">
<title>5 Conclusion</title>
<p>Overall, the purpose of the study was to assess racial/ethnic differences in providers&#x00027; documentation of Peds B-SATED keywords and global dimensions. Our findings unearthed racial/ethnic differences in our training data, using both LASSO and multilevel modeling approaches. The three primary results related to racial/ethnic bias in our NLP training data are both informative and addressable. First, we found that primary care providers&#x00027; documentation of keywords in clinical notes was able to predict patients&#x00027; race/ethnicity and that this difference was most observable among non-Hispanic Black and non-Hispanic white patient subpopulations. Second, the Peds B-SATED keyword rankings qualitatively differed across racial/ethnic subpopulations. Finally, the results of the mixed-effects models revealed that the presence of global dimensions in clinical notes varied between racially/ethnically minoritized patients compared to non-Hispanic white patients. In the end, the findings confirmed that developing standardized guidelines for documenting pediatric sleep problems in pediatric primary care, in collaboration with specialty sleep providers, may be warranted. This also highlights implications for routine and multidimensional screening in pediatric primary care settings, due to providers&#x00027; differential patterns of documenting Peds B-SATED keyword and global dimensions that may contribute to differences in clinical outcomes across racial/ethnic subpopulations. In pediatric sleep research, these findings have important implications for identifying a potential sleep care gap that is preventable in pediatric primary care.</p></sec>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The data analyzed in this study is subject to the following licenses/restrictions: The datasets presented in this article are not readily available because the data from this study is primarily composed of pediatric electronic healthcare record data that cannot be shared for legal, ethical, and privacy restriction purposes (e.g., patient confidentiality and privacy). Requests to access these datasets should be directed to <ext-link ext-link-type="uri" xlink:href="https://www.nationwidechildrens.org/research">https://www.nationwidechildrens.org/research</ext-link>.</p></sec>
<sec sec-type="ethics-statement" id="s7">
<title>Ethics statement</title>
<p>The studies involving humans were approved by the Nationwide Children&#x00027;s Hospital. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x00027; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p></sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>MD: Conceptualization, Formal analysis, Funding acquisition, Methodology, Writing&#x02014;original draft, Writing&#x02014;review &#x00026; editing. JS: Conceptualization, Formal analysis, Methodology, Writing&#x02014;original draft, Writing&#x02014;review &#x00026; editing. DC: Conceptualization, Funding acquisition, Supervision, Writing&#x02014;review &#x00026; editing.</p></sec>
</body>
<back>
<sec sec-type="funding-information" id="s9">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research, authorship, and/or publication of this article. MD was supported by the National Heart, Lung, Blood Institute (1K01HL169493-1; Principal Investigator: MD).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s10">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Anan</surname> <given-names>Y. H.</given-names></name> <name><surname>Nicole</surname> <given-names>F. K.</given-names></name> <name><surname>Michelle</surname> <given-names>M. G.</given-names></name> <name><surname>Carolyn</surname> <given-names>A. M.</given-names></name> <name><surname>Laura</surname> <given-names>P. R.</given-names></name></person-group> (<year>2023</year>). <article-title>Associations between sleep duration and positive mental health screens during adolescent preventive visits in primary care</article-title>. <source>Acad Pediatrics</source>. <volume>23</volume>, <fpage>1242</fpage>&#x02013;<lpage>1246</lpage>. <pub-id pub-id-type="doi">10.1016/j.acap.2023.02.013</pub-id><pub-id pub-id-type="pmid">36905952</pub-id></citation></ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bates</surname> <given-names>D.</given-names></name> <name><surname>M&#x000E4;chler</surname> <given-names>M.</given-names></name> <name><surname>Bolker</surname> <given-names>B.</given-names></name> <name><surname>Walker</surname> <given-names>S.</given-names></name></person-group> (<year>2015</year>). <article-title>Fitting linear mixed-effects models using lme4</article-title>. <source>J. Stat. Softw.</source> <volume>67</volume>, <fpage>1</fpage>&#x02013;<lpage>48</lpage>. <pub-id pub-id-type="doi">10.18637/jss.v067.i01</pub-id></citation>
</ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Billings</surname> <given-names>M. E.</given-names></name> <name><surname>Cohen</surname> <given-names>R. T.</given-names></name> <name><surname>Baldwin</surname> <given-names>C. M.</given-names></name> <name><surname>Johnson</surname> <given-names>D. A.</given-names></name> <name><surname>Palen</surname> <given-names>B. N.</given-names></name> <name><surname>Parthasarathy</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Disparities in sleep health and potential intervention models: a focused review</article-title>. <source>Chest</source> <volume>159</volume>, <fpage>1232</fpage>&#x02013;<lpage>1240</lpage>. <pub-id pub-id-type="doi">10.1016/j.chest.2020.09.249</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Boch</surname> <given-names>S.</given-names></name> <name><surname>Sezgin</surname> <given-names>E.</given-names></name> <name><surname>Linwood</surname> <given-names>S. L.</given-names></name></person-group> (<year>2022</year>). <article-title>Ethical artificial intelligence in paediatrics</article-title>. <source>The Lancet Child Adoles. Health</source> <volume>6</volume>, <fpage>833</fpage>&#x02013;<lpage>835</lpage>. <pub-id pub-id-type="doi">10.1016/S2352-4642(22)00243-7</pub-id><pub-id pub-id-type="pmid">36084667</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Carson</surname> <given-names>M.</given-names></name> <name><surname>Cicalese</surname> <given-names>O.</given-names></name> <name><surname>Bhandari</surname> <given-names>E.</given-names></name> <name><surname>Stefanovski</surname> <given-names>D.</given-names></name> <name><surname>Fiks</surname> <given-names>A. G.</given-names></name> <name><surname>Mindell</surname> <given-names>J. A.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Discrepancies between caregiver reported early childhood sleep problems and clinician documentation and referral</article-title>. <source>Acad. Pediatrics.</source> <volume>23</volume>, <fpage>1234</fpage>&#x02013;<lpage>1241</lpage>. <pub-id pub-id-type="doi">10.1016/j.acap.2023.02.001</pub-id><pub-id pub-id-type="pmid">36764578</pub-id></citation></ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>R. J.</given-names></name> <name><surname>Wang</surname> <given-names>J. J.</given-names></name> <name><surname>Williamson</surname> <given-names>D. F.</given-names></name> <name><surname>Chen</surname> <given-names>T. Y.</given-names></name> <name><surname>Lipkova</surname> <given-names>J.</given-names></name> <name><surname>Lu</surname> <given-names>M. Y.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Algorithmic fairness in artificial intelligence for medicine and healthcare</article-title>. <source>Nat. Biomed. Eng.</source> <volume>7</volume>, <fpage>719</fpage>&#x02013;<lpage>742</lpage>. <pub-id pub-id-type="doi">10.1038/s41551-023-01056-8</pub-id><pub-id pub-id-type="pmid">37380750</pub-id></citation></ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Clarkson-Townsend</surname> <given-names>D.</given-names></name> <name><surname>Jayden</surname> <given-names>P. G.</given-names></name> <name><surname>Shenita</surname> <given-names>R. P.</given-names></name> <name><surname>Seyni</surname> <given-names>N. G.</given-names></name> <name><surname>Kathleen</surname> <given-names>F.</given-names></name> <name><surname>Susan</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Is Exposure to Chemical pollutants associated with sleep outcomes? A systematic review</article-title>. <source>Sleep Med. Rev.</source> <volume>24</volume>, <fpage>101805</fpage>. <pub-id pub-id-type="doi">10.1016/j.smrv.2023.101805</pub-id></citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fanta</surname> <given-names>M.</given-names></name> <name><surname>Ladzekpo</surname> <given-names>D.</given-names></name> <name><surname>Unaka</surname> <given-names>N.</given-names></name></person-group> (<year>2021</year>). <article-title>Racism and pediatric health outcomes</article-title>. <source>Curr. Prob. Pediatr. Adoles. Health Care</source> <volume>51</volume>, <fpage>101087</fpage>. <pub-id pub-id-type="doi">10.1016/j.cppeds.2021.101087</pub-id><pub-id pub-id-type="pmid">34711499</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gianfrancesco</surname> <given-names>M. A.</given-names></name> <name><surname>Goldstein</surname> <given-names>N. D.</given-names></name></person-group> (<year>2021</year>). <article-title>A narrative review on the validity of electronic health record-based research in epidemiology</article-title>. <source>BMC Med. Res. Methodol.</source> <volume>21</volume>, <fpage>416</fpage>. <pub-id pub-id-type="doi">10.1186/s12874-021-01416-5</pub-id><pub-id pub-id-type="pmid">34706667</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Golden</surname> <given-names>M. E.</given-names></name> <name><surname>Cosottile</surname> <given-names>M.</given-names></name> <name><surname>Meadows</surname> <given-names>T.</given-names></name> <name><surname>Parikh</surname> <given-names>M. R.</given-names></name> <name><surname>O&#x00027;Dell</surname> <given-names>S. M.</given-names></name></person-group> (<year>2023</year>). <article-title>Primary care providers&#x00027; practices regarding patient sleep: impact of integrated behavioral health</article-title>. <source>Families Syst. Health</source> <volume>41</volume>, <fpage>192</fpage>&#x02013;<lpage>200</lpage>. <pub-id pub-id-type="doi">10.1037/fsh0000695</pub-id><pub-id pub-id-type="pmid">35708923</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goldstein</surname> <given-names>C. A.</given-names></name> <name><surname>Berry</surname> <given-names>R. B.</given-names></name> <name><surname>Kent</surname> <given-names>D. T.</given-names></name> <name><surname>Kristo</surname> <given-names>D. A.</given-names></name> <name><surname>Seixas</surname> <given-names>A. A.</given-names></name> <name><surname>Redline</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Artificial intelligence in sleep medicine: an american academy of sleep medicine position statement</article-title>. <source>J. Clin. Sleep Med.</source> <volume>16</volume>, <fpage>605</fpage>&#x02013;<lpage>607</lpage>. <pub-id pub-id-type="doi">10.5664/jcsm.8288</pub-id><pub-id pub-id-type="pmid">32022674</pub-id></citation></ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gueye-Ndiaye</surname> <given-names>S.</given-names></name> <name><surname>Williamson</surname> <given-names>A. A.</given-names></name> <name><surname>Redline</surname> <given-names>S.</given-names></name></person-group> (<year>2023</year>). <article-title>Disparities in sleep-disordered breathing: upstream risk factors, mechanisms, and implications</article-title>. <source>Clin. Chest Med.</source> <volume>44</volume>, <fpage>585</fpage>&#x02013;<lpage>603</lpage>. <pub-id pub-id-type="doi">10.1016/j.ccm.2023.03.012</pub-id><pub-id pub-id-type="pmid">37517837</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hamilton</surname> <given-names>A. J.</given-names></name> <name><surname>Strauss</surname> <given-names>A. T.</given-names></name> <name><surname>Martinez</surname> <given-names>D. A.</given-names></name> <name><surname>Hinson</surname> <given-names>J. S.</given-names></name> <name><surname>Levin</surname> <given-names>S.</given-names></name> <name><surname>Lin</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Machine learning and artificial intelligence: applications in healthcare epidemiology</article-title>. <source>Antimicrob. Stewardship Healthcare Epidemiol.</source> <volume>1</volume>, <fpage>192</fpage>. <pub-id pub-id-type="doi">10.1017/ash.2021.192</pub-id><pub-id pub-id-type="pmid">36168500</pub-id></citation></ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Harada</surname> <given-names>T.</given-names></name> <name><surname>Miyagami</surname> <given-names>T.</given-names></name> <name><surname>Kunitomo</surname> <given-names>K.</given-names></name> <name><surname>Shimizu</surname> <given-names>T.</given-names></name></person-group> (<year>2021</year>). <article-title>Clinical decision support systems for diagnosis in primary care: a scoping review</article-title>. <source>Int. J. Environ. Res. Pub. Health</source> <volume>18</volume>, <fpage>8435</fpage>. <pub-id pub-id-type="doi">10.3390/ijerph18168435</pub-id></citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Honaker</surname> <given-names>S. M.</given-names></name> <name><surname>Dugan</surname> <given-names>T.</given-names></name> <name><surname>Daftary</surname> <given-names>A.</given-names></name> <name><surname>Davis</surname> <given-names>S.</given-names></name> <name><surname>Saha</surname> <given-names>C.</given-names></name> <name><surname>Baye</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Unexplained practice variation in primary care providers&#x00027; concern for pediatric obstructive sleep apnea</article-title>. <source>Acad. Pediatrics</source> <volume>18</volume>, <fpage>418</fpage>&#x02013;<lpage>424</lpage>. <pub-id pub-id-type="doi">10.1016/j.acap.2018.01.011</pub-id><pub-id pub-id-type="pmid">29391284</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Honaker</surname> <given-names>S. M.</given-names></name> <name><surname>Gopalkrishnan</surname> <given-names>A.</given-names></name> <name><surname>Brann</surname> <given-names>M.</given-names></name> <name><surname>Wiehe</surname> <given-names>S.</given-names></name> <name><surname>Clark</surname> <given-names>A. A.</given-names></name> <name><surname>Chung</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x02018;It made all the difference&#x00027;: a qualitative study of parental experiences with pediatric obstructive sleep apnea detection</article-title>. <source>J. Clin. Sleep Med.</source> <volume>18</volume>, <fpage>1921</fpage>&#x02013;<lpage>1931</lpage>. <pub-id pub-id-type="doi">10.5664/jcsm.10024</pub-id><pub-id pub-id-type="pmid">35499142</pub-id></citation></ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Honaker</surname> <given-names>S. M.</given-names></name> <name><surname>Meltzer</surname> <given-names>L. J.</given-names></name></person-group> (<year>2016</year>). <article-title>Sleep in pediatric primary care: a review of the literature</article-title>. <source>Sleep Med. Rev.</source> <volume>25</volume>, <fpage>31</fpage>&#x02013;<lpage>39</lpage>. <pub-id pub-id-type="doi">10.1016/j.smrv.2015.01.004</pub-id><pub-id pub-id-type="pmid">26163054</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Honaker</surname> <given-names>S. M.</given-names></name> <name><surname>Saunders</surname> <given-names>T.</given-names></name></person-group> (<year>2018</year>). <article-title>The sleep checkup: sleep screening, guidance, and management in pediatric primary care</article-title>. <source>Clin. Practice Pediatr. Psychol.</source> <volume>6</volume>, <fpage>201</fpage>&#x02013;<lpage>210</lpage>. <pub-id pub-id-type="doi">10.1037/cpp0000227</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Honaker</surname> <given-names>S. M.</given-names></name> <name><surname>Street</surname> <given-names>A.</given-names></name> <name><surname>Daftary</surname> <given-names>A. S.</given-names></name> <name><surname>Downs</surname> <given-names>S. M.</given-names></name></person-group> (<year>2019</year>). <article-title>The use of computer decision support for pediatric obstructive sleep apnea detection in primary care</article-title>. <source>J. Clin. Sleep Med.</source> <volume>15</volume>, <fpage>453</fpage>&#x02013;<lpage>462</lpage>. <pub-id pub-id-type="doi">10.5664/jcsm.7674</pub-id><pub-id pub-id-type="pmid">30853049</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>J.</given-names></name> <name><surname>Galal</surname> <given-names>G.</given-names></name> <name><surname>Etemadi</surname> <given-names>M.</given-names></name> <name><surname>Vaidyanathan</surname> <given-names>M.</given-names></name></person-group> (<year>2022</year>). <article-title>Evaluation and mitigation of racial bias in clinical machine learning models: scoping review</article-title>. <source>JMIR Med. Inf.</source> <volume>10</volume>, <fpage>e36388</fpage>. <pub-id pub-id-type="doi">10.2196/36388</pub-id></citation>
</ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Huffstetler</surname> <given-names>A. N.</given-names></name> <name><surname>Epling</surname> <given-names>J.</given-names></name> <name><surname>Krist</surname> <given-names>A. H.</given-names></name></person-group> (<year>2022</year>). <article-title>The need for electronic health records to support delivery of behavioral health preventive services</article-title>. <source>JAMA</source> <volume>328</volume>, <fpage>707</fpage>. <pub-id pub-id-type="doi">10.1001/jama.2022.13391</pub-id><pub-id pub-id-type="pmid">35925570</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kang</surname> <given-names>J.</given-names></name> <name><surname>Hanif</surname> <given-names>M.</given-names></name> <name><surname>Mirza</surname> <given-names>E.</given-names></name> <name><surname>Khan</surname> <given-names>M. A.</given-names></name> <name><surname>Malik</surname> <given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Machine learning in primary care: potential to improve public health</article-title>. <source>J. Med. Eng. Technol.</source> <volume>45</volume>, <fpage>75</fpage>&#x02013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1080/03091902.2020.1853839</pub-id><pub-id pub-id-type="pmid">33283565</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meltzer</surname> <given-names>L. J.</given-names></name> <name><surname>Johnson</surname> <given-names>C.</given-names></name> <name><surname>Crosette</surname> <given-names>J.</given-names></name> <name><surname>Ramos</surname> <given-names>M.</given-names></name> <name><surname>Mindell</surname> <given-names>J. A.</given-names></name></person-group> (<year>2010</year>). <article-title>Prevalence of diagnosed sleep disorders in pediatric primary care practices</article-title>. <source>Pediatrics</source> <volume>125</volume>, <fpage>e1410</fpage>&#x02013;<lpage>e1418</lpage>. <pub-id pub-id-type="doi">10.1542/peds.2009-2725</pub-id><pub-id pub-id-type="pmid">20457689</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Meltzer</surname> <given-names>L. J.</given-names></name> <name><surname>Williamson</surname> <given-names>A. A.</given-names></name> <name><surname>Mindell</surname> <given-names>J. A.</given-names></name></person-group> (<year>2021</year>). <article-title>Pediatric sleep health: it matters, and so does how we define it</article-title>. <source>Sleep Med. Rev.</source> <volume>57</volume>, <fpage>101425</fpage>. <pub-id pub-id-type="doi">10.1016/j.smrv.2021.101425</pub-id><pub-id pub-id-type="pmid">33601324</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mhasawade</surname> <given-names>V.</given-names></name> <name><surname>Zhao</surname> <given-names>Y.</given-names></name> <name><surname>Chunara</surname> <given-names>R.</given-names></name></person-group> (<year>2021</year>). <article-title>Machine learning and algorithmic fairness in public and population health</article-title>. <source>Nat. Mach. Int.</source> <volume>3</volume>, <fpage>659</fpage>&#x02013;<lpage>666</lpage>. <pub-id pub-id-type="doi">10.1038/s42256-021-00373-4</pub-id></citation>
</ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mikolov</surname> <given-names>T.</given-names></name> <name><surname>Chen</surname> <given-names>K.</given-names></name> <name><surname>Corrado</surname> <given-names>G.</given-names></name> <name><surname>Dean</surname> <given-names>J.</given-names></name></person-group> (<year>2013</year>). <source>Efficient Estimation of Word Representations in Vector Space.</source> Ithaca, NY: Cornell University</citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moosavinasab</surname> <given-names>S.</given-names></name> <name><surname>Sezgin</surname> <given-names>E.</given-names></name> <name><surname>Sun</surname> <given-names>H.</given-names></name> <name><surname>Hoffman</surname> <given-names>J.</given-names></name> <name><surname>Huang</surname> <given-names>Y.</given-names></name> <name><surname>Lin</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>DeepSuggest: using neural networks to suggest related keywords for a comprehensive search of clinical notes</article-title>. <source>ACI Open</source> <volume>05</volume>, <fpage>e1</fpage>&#x02013;<lpage>12</lpage>. <pub-id pub-id-type="doi">10.1055/s-0041-1729982</pub-id></citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mosher</surname> <given-names>W. A.</given-names></name> <name><surname>Piccinini-Vallis</surname> <given-names>H.</given-names></name></person-group> (<year>2022</year>). <article-title>Assessing the frequency with which primary care providers address sleep of infants and young children</article-title>. <source>J. Sleep Res.</source> <volume>31</volume>, <fpage>13579</fpage>. <pub-id pub-id-type="doi">10.1111/jsr.13579</pub-id><pub-id pub-id-type="pmid">35253293</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Phelan</surname> <given-names>M.</given-names></name> <name><surname>Bhavsar</surname> <given-names>N. A.</given-names></name> <name><surname>Goldstein</surname> <given-names>B. A.</given-names></name></person-group> (<year>2017</year>). <article-title>Illustrating informed presence bias in electronic health records data: how patient interactions with a health system can impact inference</article-title>. <source>EGEMS</source> <volume>5</volume>, <fpage>22</fpage>. <pub-id pub-id-type="doi">10.5334/egems.243</pub-id><pub-id pub-id-type="pmid">29930963</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rahman</surname> <given-names>N.</given-names></name> <name><surname>Mozer</surname> <given-names>R.</given-names></name> <name><surname>McHugh</surname> <given-names>R. K.</given-names></name> <name><surname>Rockett</surname> <given-names>I. R.</given-names></name> <name><surname>Chow</surname> <given-names>C. M.</given-names></name> <name><surname>Vaughan</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Using natural language processing to improve suicide classification requires consideration of race</article-title>. <source>Suicide Life Threat. Behav.</source> <volume>52</volume>, <fpage>782</fpage>&#x02013;<lpage>791</lpage>. <pub-id pub-id-type="doi">10.1111/sltb.12862</pub-id><pub-id pub-id-type="pmid">35384040</pub-id></citation></ref>
<ref id="B31">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ramgopal</surname> <given-names>S.</given-names></name> <name><surname>Sanchez-Pinto</surname> <given-names>L. N.</given-names></name> <name><surname>Horvat</surname> <given-names>C. M.</given-names></name> <name><surname>Carroll</surname> <given-names>M. S.</given-names></name> <name><surname>Luo</surname> <given-names>Y.</given-names></name> <name><surname>Florin</surname> <given-names>T. A.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Artificial intelligence-based clinical decision support in pediatrics</article-title>. <source>Pediatric Res.</source> <volume>93</volume>, <fpage>334</fpage>&#x02013;<lpage>341</lpage>. <pub-id pub-id-type="doi">10.1038/s41390-022-02226-1</pub-id><pub-id pub-id-type="pmid">35906317</pub-id></citation></ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Reynolds</surname> <given-names>A. M.</given-names></name> <name><surname>Spaeth</surname> <given-names>A. M.</given-names></name> <name><surname>Hale</surname> <given-names>L.</given-names></name> <name><surname>Williamson</surname> <given-names>A. A.</given-names></name> <name><surname>LeBourgeois</surname> <given-names>M. K.</given-names></name> <name><surname>Wong</surname> <given-names>S. D.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Pediatric sleep: current knowledge, gaps, and opportunities for the future</article-title>. <source>Sleep</source> 46, zsad060. <pub-id pub-id-type="doi">10.1093/sleep/zsad060</pub-id><pub-id pub-id-type="pmid">36881684</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rozier</surname> <given-names>M. D.</given-names></name> <name><surname>Patel</surname> <given-names>K. K.</given-names></name> <name><surname>Cross</surname> <given-names>D. A.</given-names></name></person-group> (<year>2022</year>). <article-title>Electronic health records as biased tools or tools against bias: a conceptual model</article-title>. <source>Milbank Q.</source> <volume>100</volume>, <fpage>134</fpage>&#x02013;<lpage>150</lpage>. <pub-id pub-id-type="doi">10.1111/1468-0009.12545</pub-id><pub-id pub-id-type="pmid">34812541</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rubens</surname> <given-names>S. L.</given-names></name> <name><surname>Patrick</surname> <given-names>K. E.</given-names></name> <name><surname>Williamson</surname> <given-names>A. A.</given-names></name> <name><surname>Moore</surname> <given-names>M.</given-names></name> <name><surname>Mindell</surname> <given-names>J. A.</given-names></name></person-group> (<year>2016</year>). <article-title>Individual and socio-demographic factors related to presenting problem and diagnostic impressions at a pediatric sleep clinic</article-title>. <source>Sleep Med.</source> <volume>25</volume>, <fpage>67</fpage>&#x02013;<lpage>72</lpage>. <pub-id pub-id-type="doi">10.1016/j.sleep.2016.06.017</pub-id><pub-id pub-id-type="pmid">27823719</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Seixas</surname> <given-names>A. A.</given-names></name> <name><surname>Moore</surname> <given-names>J.</given-names></name> <name><surname>Chung</surname> <given-names>A.</given-names></name> <name><surname>Robbins</surname> <given-names>R.</given-names></name> <name><surname>Grandner</surname> <given-names>M.</given-names></name> <name><surname>Rogers</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Benefits of community-based approaches in assessing and addressing sleep health and sleep-related cardiovascular disease risk: a precision and personalized population health approach</article-title>. <source>Curr. Hypert. Rep.</source> <volume>22</volume>, <fpage>151</fpage>. <pub-id pub-id-type="doi">10.1007/s11906-020-01051-3</pub-id><pub-id pub-id-type="pmid">32671477</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>M.</given-names></name> <name><surname>Oliwa</surname> <given-names>T.</given-names></name> <name><surname>Peek</surname> <given-names>M. E.</given-names></name> <name><surname>Tung</surname> <given-names>E. L.</given-names></name></person-group> (<year>2022</year>). <article-title>Negative patient descriptors: documenting racial bias in the electronic health record</article-title>. <source>Health Affairs</source> <volume>41</volume>, <fpage>203</fpage>&#x02013;<lpage>211</lpage>. <pub-id pub-id-type="doi">10.1377/hlthaff.2021.01423</pub-id><pub-id pub-id-type="pmid">35044842</pub-id></citation></ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Thompson</surname> <given-names>H. M.</given-names></name> <name><surname>Sharma</surname> <given-names>B.</given-names></name> <name><surname>Bhalla</surname> <given-names>S.</given-names></name> <name><surname>Boley</surname> <given-names>R.</given-names></name> <name><surname>McCluskey</surname> <given-names>C.</given-names></name> <name><surname>Dligach</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Bias and fairness assessment of a natural language processing opioid misuse classifier: detection and mitigation of electronic health record data disadvantages across racial subgroups</article-title>. <source>J. Am. Med. Inf. Assoc.</source> <volume>28</volume>, <fpage>2393</fpage>&#x02013;<lpage>2403</lpage>. <pub-id pub-id-type="doi">10.1093/jamia/ocab148</pub-id><pub-id pub-id-type="pmid">34383925</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Walk</surname> <given-names>O. J. B. D.</given-names></name> <name><surname>Harry</surname> <given-names>R. N.</given-names></name> <name><surname>Sandra</surname> <given-names>S. J. L.</given-names></name> <name><surname>No&#x000E9;mie</surname> <given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>A scoping review of ethics considerations in clinical natural language processing</article-title>. <source>JAMIA Open</source> <volume>5</volume>, <fpage>ooac039</fpage>. <pub-id pub-id-type="doi">10.1093/jamiaopen/ooac062</pub-id></citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Williamson</surname> <given-names>A. A.</given-names></name> <name><surname>Soehner</surname> <given-names>A. M.</given-names></name> <name><surname>Boyd</surname> <given-names>R. C.</given-names></name> <name><surname>Buysse</surname> <given-names>D. J.</given-names></name> <name><surname>Harvey</surname> <given-names>A. G.</given-names></name> <name><surname>Jonassaint</surname> <given-names>C. R.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>A protocol for applying health equity-informed implementation science models and frameworks to adapt a sleep intervention for adolescents at risk for suicidal thoughts and behaviors</article-title>. <source>Front. Pub. Health</source> <volume>10</volume>, <fpage>971754</fpage>. <pub-id pub-id-type="doi">10.3389/fpubh.2022.971754</pub-id><pub-id pub-id-type="pmid">36311565</pub-id></citation></ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Willis</surname> <given-names>V. C.</given-names></name> <name><surname>Thomas Craig</surname> <given-names>K. J.</given-names></name> <name><surname>Jabbarpour</surname> <given-names>Y.</given-names></name> <name><surname>Scheufele</surname> <given-names>E. L.</given-names></name> <name><surname>Arriaga</surname> <given-names>Y. E.</given-names></name> <name><surname>Ajinkya</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Digital health interventions to enhance prevention in primary care: scoping review</article-title>. <source>JMIR Med. Inf.</source> <volume>10</volume>, <fpage>e33518</fpage>. <pub-id pub-id-type="doi">10.2196/33518</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>S.</given-names></name> <name><surname>Varghese</surname> <given-names>P.</given-names></name> <name><surname>Stephenson</surname> <given-names>E.</given-names></name> <name><surname>Tu</surname> <given-names>K.</given-names></name> <name><surname>Gronsbell</surname> <given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>Machine learning approaches for electronic health records phenotyping: a methodical review</article-title>. <source>J. Am. Med. Inf. Assoc.</source> <volume>30</volume>, <fpage>367</fpage>&#x02013;<lpage>381</lpage>. <pub-id pub-id-type="doi">10.1093/jamia/ocac216</pub-id></citation>
</ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yip</surname> <given-names>T.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Xie</surname> <given-names>M.</given-names></name> <name><surname>Ip</surname> <given-names>P. S.</given-names></name> <name><surname>Fowle</surname> <given-names>J.</given-names></name> <name><surname>Buckhalt</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>School start times, sleep, and youth outcomes: a meta-analysis</article-title>. <source>Pediatrics</source> <volume>149</volume>, <fpage>e054068</fpage>. <pub-id pub-id-type="doi">10.1542/peds.2021-054068</pub-id><pub-id pub-id-type="pmid">35593065</pub-id></citation></ref>
</ref-list>
</back>
</article> 