<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychiatry</journal-id>
<journal-title-group>
<journal-title>Frontiers in Psychiatry</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychiatry</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-0640</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyt.2026.1758503</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Cultural validation of the RCADS and use of ensemble learning for symptom profiling of anxiety and depression</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Hussain</surname><given-names>Zamir</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3220436/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Hasan</surname><given-names>Mahnoor</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3301058/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zaman</surname><given-names>Mehwish</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3131087/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Shamsi</surname><given-names>Syeda Aneela Zahra</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2931700/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Hamdan</surname><given-names>Qurrat Ulain</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Afzal</surname><given-names>Haseeba</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3388085/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>School of Interdisciplinary Engineering and Sciences (SINES), National University of Sciences and Technology (NUST)</institution>, <city>Islamabad</city>,&#xa0;<country country="pk">Pakistan</country></aff>
<aff id="aff2"><label>2</label><institution>Department of Statistical Science, University of Padua</institution>, <city>Padova</city>,&#xa0;<country country="it">Italy</country></aff>
<aff id="aff3"><label>3</label><institution>Institute of Psychiatry, Rawalpindi Medical University</institution>, <city>Rawalpindi</city>,&#xa0;<country country="pk">Pakistan</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Mehwish Zaman, <email xlink:href="mailto:mehwish.zaman@studenti.unipd.it">mehwish.zaman@studenti.unipd.it</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-27">
<day>27</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1758503</elocation-id>
<history>
<date date-type="received">
<day>01</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>03</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>01</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Hussain, Hasan, Zaman, Shamsi, Hamdan and Afzal.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Hussain, Hasan, Zaman, Shamsi, Hamdan and Afzal</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-27">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Depression and anxiety are the most prevalent global mental health concerns, especially among children and adolescents. Numerous screening tools are available to readily detect these issues. The cultural significance of these tools in specific communities should be validated, as socio-demographic factors can influence psychopathology. Moreover, screening tools are limited to the identification of a disorder and do not highlight critical symptoms that may be more dominant in disease progression.</p>
</sec>
<sec>
<title>Methods</title>
<p>In this study, a community sample of 237 Pakistani children and adolescents was used to validate the cultural significance of the Revised Child Anxiety and Depression Scale (RCADS) and its subscales, and develop machine learning (ML) models for profiling of the most significant symptoms of anxiety and depression.</p>
</sec>
<sec>
<title>Results</title>
<p>Cronbach&#x2019;s alpha for all subscales of RCADS except Separation Anxiety Disorder (SAD) and Obsessive-Compulsive Disorder (OCD) was above 0.7. Chi-square tests between each item of RCADS and the disorders showed that only gender and grade level of patients did not have statistically significant associations with majority of the scales. Lastly, four ML algorithms were trained where Random Forests exhibited the best performance with accuracies ranging from 0.85 to 0.98. The Gini importance calculated for each item in these models highlights the most dominant symptoms contributing to each disorder.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>Overall, the study shows that all 47 individual items in RCADS are culturally significant for the screening of anxiety and depressive disorders in Pakistani populations, however, the subscales for SAD and OCD warrant some modifications due to low Cronbach&#x2019;s alpha values. The results of ML algorithms yield satisfactory to exceptional metrics, suggesting that these models may be adapted as efficient screening support systems in clinical settings. However, external validation of the models on unseen data is necessary before practical implementation.</p>
</sec>
</abstract>
<kwd-group>
<kwd>anxiety</kwd>
<kwd>depression</kwd>
<kwd>machine learning</kwd>
<kwd>mental health</kwd>
<kwd>RCADS</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>National University of Sciences and Technology</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100007278</institution-id>
</institution-wrap>
</funding-source>
<award-id rid="sp1">IRP-24-09</award-id>
</award-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This study was supported by the National University of Sciences and Technology under Grant Number IRP-24-09 (Date: 19-08-2024). The Open Access funding has been provided by Universit&#xe0; degli Studi di Padova | University of Padua, Open Science Committee.</funding-statement>
</funding-group>
<counts>
<fig-count count="1"/>
<table-count count="9"/>
<equation-count count="0"/>
<ref-count count="40"/>
<page-count count="12"/>
<word-count count="6246"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Psychiatry</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Anxiety and depressive disorders are the most prevalent mental disorders, with 301 million and 280 million individuals affected worldwide, respectively (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). Mental disorders can develop at any age, where early years of life like childhood and adolescence are more vulnerable due to being turbulent and dynamic stages of growth and development (<xref ref-type="bibr" rid="B3">3</xref>). A meta-analysis published in 2015 estimates the global prevalence at 6.5% and 2.6%, respectively (<xref ref-type="bibr" rid="B4">4</xref>). Similar reviews conducted after COVID-19 reveal that the prevalence of childhood anxiety and depression has increased to 20.5% 25.2%, respectively (<xref ref-type="bibr" rid="B5">5</xref>). The presentation of these disorders at a young age is often undermined as transient mood lability and emotional bursts during puberty. However, it is estimated that almost 50% of mental disorders diagnosed in adulthood actually begin to develop during childhood and adolescence (<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>). If these disorders remain untreated, they can become quite debilitating and affect an individual&#x2019;s quality of life by reducing productivity and socialization. Early detection and timely treatment of these disorders can lead to better outcomes and improved prognosis as compared to later treatment in adulthood (<xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>Numerous screening tools are used in clinical practice to readily detect the presence of mental disorders. A few tools used for screening anxiety and depressive disorders in young populations include the Revised Child Anxiety and Depression Scale (RCADS), Screen for Child Anxiety Related Disorders (SCARED), and Spence Children&#x2019;s Anxiety Scale (SCAS) (<xref ref-type="bibr" rid="B9">9</xref>&#x2013;<xref ref-type="bibr" rid="B11">11</xref>). While these tools are convenient to quickly detect the presence or risk of potential issues, they are limited to broadly label patients as &#x201c;normal&#x201d;, &#x201c;borderline&#x201d;, &#x201c;at risk&#x201d;, and/or &#x201c;clinical&#x201d;. The specific behaviors or symptoms that predominantly contribute to the development or progression of a disorder cannot be elucidated through these screening measures alone. Such evaluation and symptom profiling is critical for the optimization and personalization of therapeutic strategies to improve patient outcomes (<xref ref-type="bibr" rid="B12">12</xref>). However, repeated sessions with patients are required to pinpoint critical behaviors. This process is also dependent on the clinician&#x2019;s experience and can be subject to clinical bias. An objective and methodical approach is required to highlight dominant symptoms and behaviors early in the screening process to readily formulate personalized treatment plans. The advent of Artificial Intelligence (AI) has introduced computational processes that are able to &#x201c;learn&#x201d; from patterns in data and extract critical information based on its mathematical significance or &#x201c;contribution&#x201d; in the process. Each variable is assigned a mathematical weight or importance metric depending on its effect upon the final outcome. This technique can be applied on screening tools to identify which specific questions of the tool (and subsequently, the associated symptoms) contribute significantly to disease progression.</p>
<p>Machine Learning (ML) is a field of AI which focuses on the development of computational techniques inspired by the human brain&#x2019;s ability to learn, adapt, and improve (<xref ref-type="bibr" rid="B13">13</xref>). Over the past decade, the predictive capabilities of ML have been extensively researched in the field of healthcare for the development of fast and effective screening tools that can serve as clinical decision supports for healthcare professionals (<xref ref-type="bibr" rid="B14">14</xref>, <xref ref-type="bibr" rid="B15">15</xref>). Coupled with clinician&#x2019;s insights, such ML-based approaches can also prove as beneficial assistants for the prediction of mental disorders (<xref ref-type="bibr" rid="B16">16</xref>). Many studies have examined various machine learning algorithms on multidimensional data (demographics, socio-economic information, results of screening and diagnostic tests) for the early detection of depression and anxiety. Haque et&#xa0;al. developed models for the prediction of depression in Australian children using Random Forests with an accuracy of 95% (<xref ref-type="bibr" rid="B17">17</xref>), and another research from the same authors reported models based on Gaussian Na&#xef;ve Bayes and Random Forest to be the best predictors of Obsessive-Compulsive Disorder, Separation Anxiety Disorder, and Attention-Deficit/Hyperactivity Disorder with accuracies ranging between 79-91% (<xref ref-type="bibr" rid="B18">18</xref>). A study on Palestinian adolescents used Support Vector Machines to develop models that can predict depression and anxiety at accuracies above 92% (<xref ref-type="bibr" rid="B19">19</xref>). Nemesure et&#xa0;al. used biometric and demographic data of university students to develop predictive models using an ensemble approach, where the models provided accuracies of 0.73 for Generalized Anxiety Disorder and 0.67 for Major Depressive Disorder (<xref ref-type="bibr" rid="B20">20</xref>). The primary objective of these studies was to develop a quick computational process for the prediction of mental disorders rather than focus on dominant behaviors or patterns for symptom profiling. There are a few studies that have utilized ensemble ML methods for the characterization of social risk factors that affect overall mental well-being of susceptible populations like children exposed to politically volatile circumstances (<xref ref-type="bibr" rid="B21">21</xref>). However, the utilization of such techniques for symptom profiling for anxiety and depressive disorders has not been reported thus far.</p>
<p>Moreover, there are no published studies that have analyzed these techniques on a Pakistani cohort. In addition to individual characteristics, environmental and social factors like poverty and inequality can also have detrimental effects on an individual&#x2019;s mental well-being (<xref ref-type="bibr" rid="B3">3</xref>). Such problems are quite prevalent in low-middle income countries (LMICs) like Pakistan, where mental health stigma and uninformed cultural beliefs exacerbate the burden of mental disorders. Mental health conditions are frequently interpreted through moral or religious lenses, often perceived as personal weakness or insufficient faith, which discourages help-seeking behavior and contributes to underdiagnoses (<xref ref-type="bibr" rid="B22">22</xref>). The prevalence of collectivist family structures also tends to impose conformity, emotional restraint, and prioritization of family honor over individual psychological needs. Additionally, socioeconomic stressors, political instability, and limited access to mental health services interact with these cultural dynamics, worsening the prevalence and persistence of depression and anxiety in the Pakistani context. Therefore, determining the cultural significance of individual interrogatories or items of screening tools is also required for the optimization of mental healthcare.</p>
<p>The following two aims were central to this study:</p>
<list list-type="simple">
<list-item>
<p>i. Develop an ML-based process for the symptom profiling of anxiety and depressive disorders to serve as a foundation for personalized mental healthcare.</p></list-item>
<list-item>
<p>ii. Use statistical techniques for the cultural evaluation of the Revised Child Anxiety and Depression Scales using a community sample of Pakistani Children and Adolescents.</p></list-item>
</list>
</sec>
<sec id="s2">
<label>2</label>
<title>Methods</title>
<p>All statistical and computational processes have been implemented in the programming language Python, version 3.10, using Colab notebooks.</p>
<sec id="s2_1">
<label>2.1</label>
<title>Screening tool</title>
<p>The Revised Child Anxiety and Depression Scale (RCADS) has been selected for this research as it simultaneously screens for different anxiety and depressive disorders in children and adolescents and is widely used by clinicians around the world. It has also been translated in Urdu, the national language of Pakistan, and psychometrically validated in a Pakistani cohort (<xref ref-type="bibr" rid="B23">23</xref>). RCADS is a 47-item scale used to screen for the risk of borderline and clinical anxiety and depressive disorders in children and adolescents aged 8 to 18 years (<xref ref-type="bibr" rid="B9">9</xref>). All 47 items are used to assess the risk of Total Internalizing Issues. These are then divided into two main scales for Major Depressive Disorder (MDD) and Total Anxiety. The latter scale of anxiety is further divided into 5 subscales for Generalized Anxiety Disorder (GAD), Separation Anxiety Disorder (SAD), Social Phobia or Social Anxiety Disorder (SP), Panic Disorder (PD), and Obsessive Compulsive Disorder (OCD). All these scales are scored using a 4-point system. The initial scores are then converted to corresponding T-scores according to the patient&#x2019;s gender and grade in school. T-scores below 65 are considered &#x201c;Normal&#x201d;, between 65&#x2013;69 are &#x201c;Borderline&#x201d;, and T-scores above 70 might indicate &#x201c;Clinical&#x201d; cases, warranting further professional intervention.</p>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Data description</title>
<p>The study received ethical approval from the Institutional Review Board at first author&#x2019;s affiliated institution under Application No. 2024-IRB-A-06/06 on 22-02-2024. Secondary data collected from both clinical (outpatient psychiatry department) and non-clinical (primary and secondary schools) settings under the supervision of the psychiatrist in our research team was used. For clinical settings, a consecutive sampling method was employed where patients within the age range of 8&#x2013;18 years referred for emotional problems were recruited. For non-clinical settings, a purposive sampling method was used where teachers referred students who were exhibiting emotional issues. A total of 285 samples were collected, 138 from clinical and 147 from non-clinical settings. As no statistically significant differences between clinical and non-clinical samples were obtained after applying independent sample t-tests (<xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>), the dataset was combined.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Results of independent sample t-tests applied between clinical and non-clinical samples.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">No.</th>
<th valign="middle" align="center">Scale/disorder</th>
<th valign="middle" align="center">Levene&#x2019;s test for equality of variances (p-value)</th>
<th valign="middle" align="center">t-test for equality of means (p-value)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">Total Internalizing Issues</td>
<td valign="middle" align="center">0.00</td>
<td valign="middle" align="center">0.06</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">Total Anxiety</td>
<td valign="middle" align="center">0.41</td>
<td valign="middle" align="center">0.75</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">MDD</td>
<td valign="middle" align="center">0.00</td>
<td valign="middle" align="center">0.02</td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">GAD</td>
<td valign="middle" align="center">0.08</td>
<td valign="middle" align="center">0.37</td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">SAD</td>
<td valign="middle" align="center">0.20</td>
<td valign="middle" align="center">0.12</td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">SP</td>
<td valign="middle" align="center">0.00</td>
<td valign="middle" align="center">0.08</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">PD</td>
<td valign="middle" align="center">0.00</td>
<td valign="middle" align="center">0.05</td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">OCD</td>
<td valign="middle" align="center">0.03</td>
<td valign="middle" align="center">0.24</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>After pre-processing and removal of missing values (see <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary File: Table 2.docx</bold></xref>), the final dataset consisted of 237 samples with 179 (75.8%) females and 58 (24.6%) males (<xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>). While limited, sample sizes in this range have been used in published research, particularly in studies employing behavioral features for classification tasks using machine learning (<xref ref-type="bibr" rid="B24">24</xref>&#x2013;<xref ref-type="bibr" rid="B28">28</xref>). The recruited participants were in Primary (25.7%), Middle (32.1%), and High School (42.2%). Their ages ranged from 8 years to 17 years, which have been grouped according to their education level. Scoring and labelling of the participants according to the criteria of RCADS revealed that majority were Normal and not at risk of anxiety and depressive disorders, except for SAD where Normal (42.2%) and Clinical (43.5%) cases were almost the same. Borderline cases were the fewest for all disorders.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Data description.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">No.</th>
<th valign="middle" align="center">Features</th>
<th valign="middle" align="center">Categories</th>
<th valign="middle" align="center">n (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="2" align="center">1</td>
<td valign="middle" rowspan="2" align="center">Gender</td>
<td valign="middle" align="center">Male</td>
<td valign="middle" align="center">58 (24.5)</td>
</tr>
<tr>
<td valign="middle" align="center">Female</td>
<td valign="middle" align="center">179 (75.5)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">2</td>
<td valign="middle" rowspan="3" align="center">Education</td>
<td valign="middle" align="center">Primary School</td>
<td valign="middle" align="center">61 (25.7)</td>
</tr>
<tr>
<td valign="middle" align="center">Middle School</td>
<td valign="middle" align="center">76 (32.1)</td>
</tr>
<tr>
<td valign="middle" align="center">High School</td>
<td valign="middle" align="center">100 (42.2)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">3</td>
<td valign="middle" rowspan="3" align="center">Ages</td>
<td valign="middle" align="center">8&#x2013;10 years</td>
<td valign="middle" align="center">61 (25.7)</td>
</tr>
<tr>
<td valign="middle" align="center">11&#x2013;13 years</td>
<td valign="middle" align="center">76 (32.1)</td>
</tr>
<tr>
<td valign="middle" align="center">14&#x2013;17 years</td>
<td valign="middle" align="center">100 (42.2)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">4</td>
<td valign="middle" rowspan="3" align="center">Prevalence of Internalizing Problems</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">157 (66.2)</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">22 (9.3)</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">58 (24.5)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">5</td>
<td valign="middle" rowspan="3" align="center">Prevalence of Overall Anxiety</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">132 (55.7)</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">23 (9.7)</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">82 (34.6)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">6</td>
<td valign="middle" rowspan="3" align="center">Prevalence of MDD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">165 (69.6)</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">21 (8.9)</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">51 (21.5)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">7</td>
<td valign="middle" rowspan="3" align="center">Prevalence of GAD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">201 (84.8)</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">14 (5.9)</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">22 (9.3)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">8</td>
<td valign="middle" rowspan="3" align="center">Prevalence of SAD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">100 (42.2)</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">34 (14.3)</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">103 (43.5)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">9</td>
<td valign="middle" rowspan="3" align="center">Prevalence of SP</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">147 (62.1)</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">20 (8.4)</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">70 (29.5)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">10</td>
<td valign="middle" rowspan="3" align="center">Prevalence of PD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">204 (86.1)</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">16 (6.7)</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">17 (7.2)</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">11</td>
<td valign="middle" rowspan="3" align="center">Prevalence of OCD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">163 (68.8)</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">31 (13.1)</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">43 (18.1)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Cultural evaluation of RCADS</title>
<sec id="s2_3_1">
<label>2.3.1</label>
<title>Reliability analysis</title>
<p>To evaluate the psychometric properties and reliability of RCADS for Pakistani children and adolescents, a brief reliability analysis is performed on RCADS where Cronbach&#x2019;s alpha has been estimated for each scale. This is a statistical measure for the reliability and validity of questionnaires or surveys, where values above 0.7 indicate that their constituents correlate adequately, resulting in a satisfactory measure of the problem or factor being evaluated (<xref ref-type="bibr" rid="B29">29</xref>). This analysis also describes how the deletion of any items from the questionnaire or survey affects the value of alpha, which informs if any amendments are required to improve overall reliability and validity. Item-total correlations for each constituent are also calculated, where values ranging between 0.2 to 0.6 indicate moderate correlations. This shows that the survey being evaluated focuses on the same topic or theme, but does not contain repetitive or redundant items.</p>
</sec>
<sec id="s2_3_2">
<label>2.3.2</label>
<title>Association analysis</title>
<p>The association of each item of RCADS with the presence of anxiety and depressive disorders has been investigated using Chi-square (&#x3c7;<sup>2</sup>) tests. The level of significance for these statistical tests has been set at 0.05, where results with a p-value less than 0.05 will be considered statistically significant. This analysis will determine if there are any items in RCADS that are not statistically significant in Pakistani populations. These items will be omitted resulting in a &#x3c7;<sup>2</sup>-subset of RCADS. The effect of the removal will be evaluated in subsequent steps.</p>
</sec>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Machine learning for symptom profiling</title>
<sec id="s2_4_1">
<label>2.4.1</label>
<title>Resolving class imbalance</title>
<p>From the data description in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>, it is clear that the three categories of &#x201c;Normal&#x201d;, &#x201c;Borderline&#x201d;, and &#x201c;Clinical&#x201d; are not equally distributed in the data, which leads to a &#x201c;class imbalance&#x201d; that can impair the performance of ML algorithms due to bias. This issue has been rectified through Synthetic Minority Over-sampling TEchnique for Nominal data (SMOTE-N). The method generates synthetic minority class instances by randomly selecting a minority instance and identifying its <italic>k</italic> nearest neighbors using the Value Difference Matric (VDM) (<xref ref-type="bibr" rid="B30">30</xref>). In this study, default value of <italic>k</italic> = 5 has been applied. Additional synthetic instances were generated for minority classes to match the number of cases of the majority class, ensuring a more balanced distribution in the dataset (<xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>).</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Number of instances in the data before and after resampling with SMOTE-N.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">No.</th>
<th valign="middle" align="center">Scale/disorder</th>
<th valign="middle" align="center">Categories</th>
<th valign="middle" align="center">Instances before resampling (n)</th>
<th valign="middle" align="center">Instances after resampling (n)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="3" align="center">1</td>
<td valign="middle" rowspan="3" align="center">Total Internalizing Issues</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">157</td>
<td valign="middle" align="center">157</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">22</td>
<td valign="middle" align="center">157</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">58</td>
<td valign="middle" align="center">157</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">2</td>
<td valign="middle" rowspan="3" align="center">Total Anxiety</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">132</td>
<td valign="middle" align="center">132</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">23</td>
<td valign="middle" align="center">132</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">82</td>
<td valign="middle" align="center">132</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">3</td>
<td valign="middle" rowspan="3" align="center">MDD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">165</td>
<td valign="middle" align="center">165</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">21</td>
<td valign="middle" align="center">165</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">51</td>
<td valign="middle" align="center">165</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">4</td>
<td valign="middle" rowspan="3" align="center">GAD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">201</td>
<td valign="middle" align="center">201</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">14</td>
<td valign="middle" align="center">201</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">22</td>
<td valign="middle" align="center">201</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">5</td>
<td valign="middle" rowspan="3" align="center">SAD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">100</td>
<td valign="middle" align="center">103</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">34</td>
<td valign="middle" align="center">103</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">103</td>
<td valign="middle" align="center">103</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">6</td>
<td valign="middle" rowspan="3" align="center">SP</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">204</td>
<td valign="middle" align="center">204</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">16</td>
<td valign="middle" align="center">204</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">17</td>
<td valign="middle" align="center">204</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">7</td>
<td valign="middle" rowspan="3" align="center">PD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">147</td>
<td valign="middle" align="center">147</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">20</td>
<td valign="middle" align="center">147</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">70</td>
<td valign="middle" align="center">147</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">8</td>
<td valign="middle" rowspan="3" align="center">OCD</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">163</td>
<td valign="middle" align="center">163</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">31</td>
<td valign="middle" align="center">163</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">43</td>
<td valign="middle" align="center">163</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_4_2">
<label>2.4.2</label>
<title>ML algorithms</title>
<p>Four ML algorithms viz. Decision Tree (DT), Random Forest (RF), Support Vector Machine (SVM), and Logistic Regression (LR) are selected to develop the proposed predictive models for anxiety and depression. These algorithms have been shortlisted due to their frequent application in related studies (<xref ref-type="bibr" rid="B17">17</xref>&#x2013;<xref ref-type="bibr" rid="B19">19</xref>). Primarily, default parameters as defined in the <italic>scikit-learn</italic> library are retained for the ML algorithms after iteratively investigating different hyperparameters until best performance was achieved (<xref ref-type="bibr" rid="B31">31</xref>). However, a few modifications in the hyperparameters were made as follows:</p>
<list list-type="bullet">
<list-item>
<p>For RF, maximum depth of the trees was set to <italic>10</italic> instead of the default <italic>None</italic>.</p></list-item>
<list-item>
<p>For SVM, the <italic>radial basis function</italic> or <italic>rbf</italic> kernel has been used for multi-class classification.</p></list-item>
<list-item>
<p>For LR, models with <italic>200</italic> iterations have been developed instead of the default of <italic>100</italic>.</p></list-item>
</list>
<p>The training, testing, and validation of the algorithms has been done using 5-fold cross-validation where data is divided into 5 parts or &#x201c;folds&#x201d; that are iteratively used as a testing set. The average performance of the algorithms is then computed by calculating the mean of the accuracies or cross-validation (CV) scores obtained on each testing set. Such techniques limit the possibility of issues like over-fitting, which can impair the algorithm&#x2019;s performance on real-world data (<xref ref-type="bibr" rid="B32">32</xref>).</p>
<p>Average CV scores, accuracy, macro average of recall or sensitivity (number of correct predictions of Borderline and Clinical cases), and macro average of specificity (number of correct predictions of Normal cases) of the algorithms have been used to evaluate model performance. Among these, recall is of utmost importance as the efficiency of clinical decision support is based on its ability to accurately identify Borderline and Clinical cases. These metrics have been analyzed to determine which of the four algorithms results in the most efficient predictive model.</p>
<p>The best algorithm will then be trained separately on the &#x3c7;<sup>2</sup>-subset of RCADS formed after association analysis. The results obtained using all items and the &#x3c7;<sup>2</sup>-subset will be compared to determine whether the removal of insignificant items impacts predictive efficiency. In addition to the aforementioned performance metrics, Cohen&#x2019;s Kappa coefficient (&#x3ba;) will also be calculated which is a measure of agreement between the ground truth and predictions made by an algorithm. To ensure cohesive results, the algorithms will make predictions on the same test so that obtained values of &#x3ba; for both sets of features are comparable. Then, McNemar&#x2019;s test will be performed to determine if the difference in the values of &#x3ba; is statistically significant. The significance level will be set to 0.05.</p>
<p>From the final algorithm, the significance or importance of predictive features will be determined to highlight the most significant symptoms or factors contributing to specific depressive and anxiety disorders.</p>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Reliability analysis</title>
<p><xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref> shows the Cronbach&#x2019;s alpha calculated for all scales of RCADS. The value of alpha is above the recommended threshold of 0.7 for all except SAD and OCD, where the values are 0.63 and 0.64, respectively. While these values are below the defined threshold, they are still moderate and can be considered somewhat satisfactory. However, these results reflect that the subscales for SAD and OCD warrant slight modifications for Pakistani pediatric populations. Additionally, deletion of any item from the scales decreases the value of alpha (see <xref ref-type="supplementary-material" rid="SM1"><bold>Supplementary File: Table 1.docx</bold></xref>), indicating that the original structure of RCADS constitutes a satisfactory measure for depressive and anxiety disorders in a Pakistani cohort, except for SAD and OCD, where further studies are required, possibly in regards to the addition of more items or the modification of present items.</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Values of Cronbach&#x2019;s alpha for all scales of RCADS.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">No</th>
<th valign="middle" align="center">Scale/disorder</th>
<th valign="middle" align="center">Cronbach&#x2019;s alpha</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">Total Internalizing Issues</td>
<td valign="middle" align="center">0.93</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">Total Anxiety</td>
<td valign="middle" align="center">0.91</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">MDD</td>
<td valign="middle" align="center">0.83</td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">GAD</td>
<td valign="middle" align="center">0.72</td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">SAD</td>
<td valign="middle" align="center">0.63</td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">SP</td>
<td valign="middle" align="center">0.78</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">PD</td>
<td valign="middle" align="center">0.81</td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">OCD</td>
<td valign="middle" align="center">0.64</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Association analysis</title>
<p>Majority of the items showed significant association with respective disorders, which corroborates with the satisfactory alpha values obtained during the reliability analysis. Only a few variables did not have significant results which are shown in <xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref>. For all scales, the variable of Gender is not significantly associated. Grade is not significantly associated with Total Anxiety, MDD, GAD, SAD, SP, and PD, while Total Internalizing Issues is the only scale where 2 items of RCADS are not significantly associated. These variables have been removed to create &#x3c7;<sup>2</sup>-subsets of each scale, which have been subsequently used to train ML algorithms to determine whether their removal influences predictive performance.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Items of RCADS that do not have statistically significant associations with the disorders.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">No.</th>
<th valign="middle" align="center">Scale/disorder</th>
<th valign="middle" align="center">Items</th>
<th valign="middle" align="center">&#x3c7;<sup>2</sup></th>
<th valign="middle" align="center">P-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="3" align="center">1</td>
<td valign="middle" rowspan="3" align="center">Total Internalizing Issues</td>
<td valign="middle" align="center">Item 5: I would feel afraid of being on my own at home</td>
<td valign="middle" align="center">5.16</td>
<td valign="middle" align="center">0.52</td>
</tr>
<tr>
<td valign="middle" align="center">Item 9: I worry about being away from my parents</td>
<td valign="middle" align="center">5.15</td>
<td valign="middle" align="center">0.52</td>
</tr>
<tr>
<td valign="middle" align="center">Gender</td>
<td valign="middle" align="center">1.23</td>
<td valign="middle" align="center">0.54</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">2</td>
<td valign="middle" rowspan="2" align="center">Total Anxiety</td>
<td valign="middle" align="center">Gender</td>
<td valign="middle" align="center">0.52</td>
<td valign="middle" align="center">0.77</td>
</tr>
<tr>
<td valign="middle" align="center">Grade</td>
<td valign="middle" align="center">27.29</td>
<td valign="middle" align="center">0.13</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">3</td>
<td valign="middle" rowspan="2" align="center">MDD</td>
<td valign="middle" align="center">Gender</td>
<td valign="middle" align="center">1.07</td>
<td valign="middle" align="center">0.59</td>
</tr>
<tr>
<td valign="middle" align="center">Grade</td>
<td valign="middle" align="center">26.29</td>
<td valign="middle" align="center">0.16</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">4</td>
<td valign="middle" rowspan="2" align="center">GAD</td>
<td valign="middle" align="center">Gender</td>
<td valign="middle" align="center">1.88</td>
<td valign="middle" align="center">0.39</td>
</tr>
<tr>
<td valign="middle" align="center">Grade</td>
<td valign="middle" align="center">22.11</td>
<td valign="middle" align="center">0.34</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">5</td>
<td valign="middle" rowspan="2" align="center">SAD</td>
<td valign="middle" align="center">Gender</td>
<td valign="middle" align="center">0.16</td>
<td valign="middle" align="center">0.92</td>
</tr>
<tr>
<td valign="middle" align="center">Grade</td>
<td valign="middle" align="center">26.68</td>
<td valign="middle" align="center">0.14</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">6</td>
<td valign="middle" rowspan="2" align="center">SP</td>
<td valign="middle" align="center">Gender</td>
<td valign="middle" align="center">3.37</td>
<td valign="middle" align="center">0.18</td>
</tr>
<tr>
<td valign="middle" align="center">Grade</td>
<td valign="middle" align="center">24.07</td>
<td valign="middle" align="center">0.24</td>
</tr>
<tr>
<td valign="middle" rowspan="2" align="center">7</td>
<td valign="middle" rowspan="2" align="center">PD</td>
<td valign="middle" align="center">Gender</td>
<td valign="middle" align="center">3.46</td>
<td valign="middle" align="center">0.18</td>
</tr>
<tr>
<td valign="middle" align="center">Grade</td>
<td valign="middle" align="center">23.19</td>
<td valign="middle" align="center">0.21</td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">OCD</td>
<td valign="middle" align="center">Gender</td>
<td valign="middle" align="center">4.02</td>
<td valign="middle" align="center">0.13</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Machine learning for symptom profiling</title>
<p>The performance of the four algorithms is compared with each other to deduce the most efficient predictive model (<xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>). For all scales, RF results in the highest metrics. SVM provides the second best results, with the performance being quite comparable for the scales of Total Internalizing Issues, SAD, and SP, where both algorithms exhibit the same specificity.</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Performance metrics of the four algorithms trained on all features for all scales of RCADS.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">No.</th>
<th valign="middle" rowspan="2" align="center">Scale/disorder</th>
<th valign="middle" rowspan="2" align="center">Algorithm</th>
<th valign="middle" colspan="4" align="center">Performance metrics</th>
</tr>
<tr>
<th valign="middle" align="center">Accuracy</th>
<th valign="middle" align="center">Recall</th>
<th valign="middle" align="center">Specificity</th>
<th valign="middle" align="center">Average CVS</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="4" align="center">1</td>
<td valign="middle" rowspan="4" align="center">Total Internalizing Issues</td>
<td valign="middle" align="center"><italic>DT</italic></td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">0.88</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>RF</italic></td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.95</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>SVM</italic></td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.94</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>LR</italic></td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.93</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">2</td>
<td valign="middle" rowspan="4" align="center">Total Anxiety</td>
<td valign="middle" align="center"><italic>DT</italic></td>
<td valign="middle" align="center">0.84</td>
<td valign="middle" align="center">0.84</td>
<td valign="middle" align="center">0.91</td>
<td valign="middle" align="center">0.84</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>RF</italic></td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.90</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>SVM</italic></td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">0.88</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>LR</italic></td>
<td valign="middle" align="center">0.87</td>
<td valign="middle" align="center">0.87</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.87</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">3</td>
<td valign="middle" rowspan="4" align="center">MDD</td>
<td valign="middle" align="center"><italic>DT</italic></td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.90</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>RF</italic></td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">0.96</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>SVM</italic></td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.93</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>LR</italic></td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.93</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">4</td>
<td valign="middle" rowspan="4" align="center">GAD</td>
<td valign="middle" align="center"><italic>DT</italic></td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.95</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>RF</italic></td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">0.97</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>SVM</italic></td>
<td valign="middle" align="center">0.92</td>
<td valign="middle" align="center">0.92</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.92</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>LR</italic></td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">0.90</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.90</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">5</td>
<td valign="middle" rowspan="4" align="center">SAD</td>
<td valign="middle" align="center"><italic>DT</italic></td>
<td valign="middle" align="center">0.78</td>
<td valign="middle" align="center">0.78</td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.79</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>RF</italic></td>
<td valign="middle" align="center">0.85</td>
<td valign="middle" align="center">0.85</td>
<td valign="middle" align="center">0.91</td>
<td valign="middle" align="center">0.84</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>SVM</italic></td>
<td valign="middle" align="center">0.84</td>
<td valign="middle" align="center">0.84</td>
<td valign="middle" align="center">0.91</td>
<td valign="middle" align="center">0.84</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>LR</italic></td>
<td valign="middle" align="center">0.83</td>
<td valign="middle" align="center">0.83</td>
<td valign="middle" align="center">0.91</td>
<td valign="middle" align="center">0.83</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">6</td>
<td valign="middle" rowspan="4" align="center">SP</td>
<td valign="middle" align="center"><italic>DT</italic></td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">0.96</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>RF</italic></td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">0.99</td>
<td valign="middle" align="center">0.98</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>SVM</italic></td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.99</td>
<td valign="middle" align="center">0.97</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>LR</italic></td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.98</td>
<td valign="middle" align="center">0.97</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">7</td>
<td valign="middle" rowspan="4" align="center">PD</td>
<td valign="middle" align="center"><italic>DT</italic></td>
<td valign="middle" align="center">0.89</td>
<td valign="middle" align="center">0.89</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">0.89</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>RF</italic></td>
<td valign="middle" align="center">0.92</td>
<td valign="middle" align="center">0.92</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.93</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>SVM</italic></td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">0.88</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>LR</italic></td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.88</td>
</tr>
<tr>
<td valign="middle" rowspan="4" align="center">8</td>
<td valign="middle" rowspan="4" align="center">OCD</td>
<td valign="middle" align="center"><italic>DT</italic></td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.88</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.87</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>RF</italic></td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.97</td>
<td valign="middle" align="center">0.93</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>SVM</italic></td>
<td valign="middle" align="center">0.89</td>
<td valign="middle" align="center">0.89</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">0.89</td>
</tr>
<tr>
<td valign="middle" align="center"><italic>LR</italic></td>
<td valign="middle" align="center">0.87</td>
<td valign="middle" align="center">0.87</td>
<td valign="middle" align="center">0.93</td>
<td valign="middle" align="center">0.87</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In the next step, the performance of RF trained using all items of RCADS and the &#x3c7;<sup>2</sup>-subsets have been compared (<xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>). In case of Total Internalizing Issues, all metrics improve after removal of Item 5, Item 9, and Gender. In case of Total Anxiety accuracy and&#xa0;recall improve after the removal of Grade and Gender, and in case of OCD, the same improvement is observed after removal of Gender only. For MDD and GAD, the metrics remain unchanged, however, the confusion matrices in <xref ref-type="table" rid="T7"><bold>Table&#xa0;7</bold></xref> reveal that RFs trained on the &#x3c7;<sup>2</sup>-subsets are slightly better at making true predictions. In&#xa0;case of SAD, SP, and PD, the performance of RF decreases after the removal of Grade and Gender.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Comparison between random forests trained on all features of RCADS and the &#x3c7;<sup>2</sup>-subsets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-17-1758503-g001.tif">
<alt-text content-type="machine-generated">Composite of eight bar graphs comparing Random Forests classification metrics&#x2014;accuracy, recall, specificity, and average CVS&#x2014;for internalizing issues, anxiety, MDD, GAD, SAD, SP, PD, and OCD. Each plot presents performance for all features and chi-squared subset, with specificity generally highest and recall lower. Light blue bars represent all features; dark blue bars represent &#x3c7;2-subset. Data trends and differences are visually compared across disorders.</alt-text>
</graphic></fig>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>Confusion matrices of random forests trained on all features and the &#x3c7;<sup>2</sup>-subsets for MDD and GAD.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">No.</th>
<th valign="middle" rowspan="2" align="center">Disorder</th>
<th valign="middle" rowspan="2" align="center">Set of features</th>
<th valign="middle" rowspan="2" colspan="2" align="center">Class labels</th>
<th valign="middle" colspan="3" align="center">Predicted</th>
</tr>
<tr>
<th valign="middle" align="center">Normal</th>
<th valign="middle" align="center">Borderline</th>
<th valign="middle" align="center">Clinical</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="6" align="center">1</td>
<td valign="middle" rowspan="6" align="center">MDD</td>
<td valign="middle" rowspan="3" align="center">All features</td>
<td valign="middle" rowspan="12" align="center">Actual</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">160</td>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">2</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">154</td>
<td valign="middle" align="center">6</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">160</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">&#x3c7;<sup>2</sup>-subset</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">161</td>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">2</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">154</td>
<td valign="middle" align="center">5</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">0</td>
<td valign="middle" align="center">160</td>
</tr>
<tr>
<td valign="middle" rowspan="6" align="center">2</td>
<td valign="middle" rowspan="6" align="center">GAD</td>
<td valign="middle" rowspan="3" align="center">All features</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">195</td>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">3</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">194</td>
<td valign="middle" align="center">2</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">192</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">&#x3c7;<sup>2</sup>-subset</td>
<td valign="middle" align="center">Normal</td>
<td valign="middle" align="center">196</td>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">4</td>
</tr>
<tr>
<td valign="middle" align="center">Borderline</td>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">196</td>
<td valign="middle" align="center">3</td>
</tr>
<tr>
<td valign="middle" align="center">Clinical</td>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">193</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From the previous comparison of performance metrics, a marginal difference is observed between the algorithms trained on all features and the &#x3c7;<sup>2</sup>-subset. Further evaluation was required for more conclusive results. Therefore, Cohen&#x2019;s Kappa coefficient (&#x3ba;) was also evaluated by using the RF algorithms to make predictions on the same test sets (<xref ref-type="table" rid="T8"><bold>Table&#xa0;8</bold></xref>). Comparison between the values of &#x3ba; also yield a similar trend of results as before. In case of Total Internalizing Issues, algorithms trained on the &#x3c7;<sup>2</sup>-subset have higher values of &#x3ba; showing that this subset resulted in better model agreement between ground truth and predictions. In case of Total Anxiety, MDD, GAD, and OCD, no difference is observed in the values of &#x3ba;. For SAD, SP, and PD, the value of &#x3ba; is higher for models trained on all features. For the scales where differences in the values of &#x3ba; are observed, McNemar&#x2019;s test was also applied, where all tests had p-values above 0.05. This confirms that any difference in the performance of the algorithms has occurred by chance.</p>
<table-wrap id="T8" position="float">
<label>Table&#xa0;8</label>
<caption>
<p>Cohen&#x2019;s Kappa coefficient for random forests trained on all features and the &#x3c7;<sup>2</sup>-subset.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" rowspan="2" align="center">No</th>
<th valign="middle" rowspan="2" align="center">Scale/disorder</th>
<th valign="middle" colspan="2" align="center">Cohen&#x2019;s kappa coefficient (&#x3ba;)</th>
<th valign="middle" rowspan="2" align="center">McNemar&#x2019;s test (p-value)</th>
</tr>
<tr>
<th valign="middle" align="center">All features</th>
<th valign="middle" align="center">&#x3c7;<sup>2</sup>-subset</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="center">1</td>
<td valign="middle" align="center">Total Internalizing Issues</td>
<td valign="middle" align="center">0.92</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">0.81</td>
</tr>
<tr>
<td valign="middle" align="center">2</td>
<td valign="middle" align="center">Total Anxiety</td>
<td valign="middle" align="center">0.87</td>
<td valign="middle" align="center">0.87</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">MDD</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">0.94</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">4</td>
<td valign="middle" align="center">GAD</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="center">5</td>
<td valign="middle" align="center">SAD</td>
<td valign="middle" align="center">0.77</td>
<td valign="middle" align="center">0.73</td>
<td valign="middle" align="center">0.09</td>
</tr>
<tr>
<td valign="middle" align="center">6</td>
<td valign="middle" align="center">SP</td>
<td valign="middle" align="center">0.96</td>
<td valign="middle" align="center">0.95</td>
<td valign="middle" align="center">0.72</td>
</tr>
<tr>
<td valign="middle" align="center">7</td>
<td valign="middle" align="center">PD</td>
<td valign="middle" align="center">0.89</td>
<td valign="middle" align="center">0.86</td>
<td valign="middle" align="center">0.93</td>
</tr>
<tr>
<td valign="middle" align="center">8</td>
<td valign="middle" align="center">OCD</td>
<td valign="middle" align="center">0.91</td>
<td valign="middle" align="center">0.91</td>
<td valign="middle" align="center">&#x2013;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>For symptom profiling, the Gini importance of the features used to train the final RF models was analyzed. This is a measure for the reduction in misclassification or Gini impurity contributed by each feature in the model. Higher values of Gini importance indicate higher contribution of a feature towards the reduction of impurity or error. For each scale of RCADS, the top 3 features with the&#xa0;highest Gini importance are discussed ahead (<xref ref-type="table" rid="T9"><bold>Table&#xa0;9</bold></xref>). As Gini importance can be biased and inflate the significance of features (<xref ref-type="bibr" rid="B33">33</xref>), a permutation test was also applied at a significance level of 0.05. The p-values of the permutation are also reported which show that the observed importance of the features in statistically significant.</p>
<table-wrap id="T9" position="float">
<label>Table&#xa0;9</label>
<caption>
<p>Top 3 features with the highest Gini importance for each random forest of RCADS and its subscales.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="center">No.</th>
<th valign="middle" align="center">Scale/disorder</th>
<th valign="middle" align="center">Top 3 features</th>
<th valign="middle" align="center">Gini importance</th>
<th valign="middle" align="center">Permutation test (p-value)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" rowspan="3" align="center">1</td>
<td valign="middle" rowspan="3" align="center">Total Internalizing Issues</td>
<td valign="middle" align="center">Item 34: All of a sudden I feel really scared for no reason at all</td>
<td valign="middle" align="center">0.08</td>
<td valign="middle" align="center">0.00</td>
</tr>
<tr>
<td valign="middle" align="center">Item 15: I have problems with my appetite</td>
<td valign="middle" align="center">0.07</td>
<td valign="middle" align="center">0.00</td>
</tr>
<tr>
<td valign="middle" align="center">Item 41: I worry that I will suddenly get a scared feeling when there is nothing to be afraid of</td>
<td valign="middle" align="center">0.06</td>
<td valign="middle" align="center">0.02</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">2</td>
<td valign="middle" rowspan="3" align="center">Total Anxiety</td>
<td valign="middle" align="center">Item 24: When I have a problem, my heart beats really fast</td>
<td valign="middle" align="center">0.08</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Item 4: I worry when I think I have done poorly at something</td>
<td valign="middle" align="center">0.08</td>
<td valign="middle" align="center">0.00</td>
</tr>
<tr>
<td valign="middle" align="center">Item 27: I worry that something bad will happen to me</td>
<td valign="middle" align="center">0.06</td>
<td valign="middle" align="center">0.00</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">3</td>
<td valign="middle" rowspan="3" align="center">MDD</td>
<td valign="middle" align="center">Item 40: I feel like I don&#x2019;t want to move</td>
<td valign="middle" align="center">0.23</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Item 6: Nothing is much fun anymore</td>
<td valign="middle" align="center">0.16</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Item 19: I have no energy for things</td>
<td valign="middle" align="center">0.14</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">4</td>
<td valign="middle" rowspan="3" align="center">GAD</td>
<td valign="middle" align="center">Item 27: I worry that something bad will happen to me</td>
<td valign="middle" align="center">0.31</td>
<td valign="middle" align="center">0.02</td>
</tr>
<tr>
<td valign="middle" align="center">Item 22: I worry that bad things will happen to me</td>
<td valign="middle" align="center">0.19</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Item 13: I worry that something awful will happen to someone in my family</td>
<td valign="middle" align="center">0.15</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">5</td>
<td valign="middle" rowspan="3" align="center">SAD</td>
<td valign="middle" align="center">Item 46: I would feel scared if I had to stay away from home overnight</td>
<td valign="middle" align="center">0.18</td>
<td valign="middle" align="center">0.02</td>
</tr>
<tr>
<td valign="middle" align="center">Item 18: I have trouble going to school in the mornings because I feel nervous or afraid</td>
<td valign="middle" align="center">0.15</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Item 17: I feel scared if I have to sleep on my own</td>
<td valign="middle" align="center">0.15</td>
<td valign="middle" align="center">0.02</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">6</td>
<td valign="middle" rowspan="3" align="center">SP</td>
<td valign="middle" align="center">Item 32: I worry what other people think of me</td>
<td valign="middle" align="center">0.16</td>
<td valign="middle" align="center">0.04</td>
</tr>
<tr>
<td valign="middle" align="center">Item 38: I feel afraid if I have to talk in front of my class</td>
<td valign="middle" align="center">0.15</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Item 20: I worry I might look foolish</td>
<td valign="middle" align="center">0.14</td>
<td valign="middle" align="center">0.00</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">7</td>
<td valign="middle" rowspan="3" align="center">PD</td>
<td valign="middle" align="center">Item 34: All of a sudden I feel really scared for no reason at all</td>
<td valign="middle" align="center">0.22</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Item 26: I suddenly start to tremble or shake when there is no reason for this</td>
<td valign="middle" align="center">0.15</td>
<td valign="middle" align="center">0.00</td>
</tr>
<tr>
<td valign="middle" align="center">Item 39: My heart suddenly starts to beat too quickly for no reason</td>
<td valign="middle" align="center">0.14</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" rowspan="3" align="center">8</td>
<td valign="middle" rowspan="3" align="center">OCD</td>
<td valign="middle" align="center">Item 23: I can&#x2019;t seem to get bad or silly thoughts out of my head</td>
<td valign="middle" align="center">0.22</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Item 31: I have to think of special thoughts (like numbers or words) to stop bad things from happening</td>
<td valign="middle" align="center">0.19</td>
<td valign="middle" align="center">0.01</td>
</tr>
<tr>
<td valign="middle" align="center">Item 10: I get bothered by bad or silly thoughts or pictures in my mind</td>
<td valign="middle" align="center">0.16</td>
<td valign="middle" align="center">0.01</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>The top 3 features elucidated by the models show that in case of Total Internalizing Issues, two items pertaining to unexplainable anxiety and one item regarding issues with appetite are the most dominant. For Total Anxiety, accelerated heartbeat due to anxiety, being worried about poor performance, and a general fear about something bad happening to oneself are the top 3 symptoms. For MDD, the top 3 features relate to the depressive symptoms of lethargy and lack of interest. In case of GAD, general fears regarding bad things happening to oneself or a loved one dominate, and for SAD, being away from home, avoiding or being anxious about going to school, and sleeping alone appear on top. For SP, fears related to how perceive oneself, presenting in front of an audience, and appearing foolish are the top 3 symptoms. In case of PD, the three features are related to unexplainable increase in fear, trembling, and faster heartrate. For OCD, obsessive thoughts are the dominant features as opposed to compulsive acts. For each individual, RF models can highlight a list of the most significant features or symptoms, which can be used to formulate personalized therapies for each individual, where the most critical behaviors are addressed expeditiously.</p>
<p>Similar studies on the predictive modelling or classification of anxiety and depressive disorders either utilize tree-based ensemble ML algorithms like RF and XGBoost, or report them as the best performing model from a pool of different classical and ensemble algorithms (<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>, <xref ref-type="bibr" rid="B20">20</xref>, <xref ref-type="bibr" rid="B34">34</xref>). Similar results were also obtained in the present research, where RF outperformed other algorithms. This is attributed to the ensemble nature of the algorithm which optimizes output by the aggregation of multiple decision trees, and its inherent ability to deal with potential non-linear relationships among features (<xref ref-type="bibr" rid="B35">35</xref>). This may also be attributed to the nature of the data and classification task, which is fairly straightforward as the numerical value of a few features elucidate how each instance will be classified in to the three classes of Normal, Borderline, and Clinical. However, it is worth mentioning that the performance of SVM is comparable to RF in some cases. Qasrawi et&#xa0;al. also report better performance of SVM for the classification of depression and anxiety in their research (<xref ref-type="bibr" rid="B19">19</xref>). SVM is also a powerful classification algorithm that performs well on the type of dataset used in this research. However, the reason for preference of RF over SVM in this regard is not merely informed by the slightly better performance of RF and the recurrence of the algorithm in quoted literature. Random Forests are more robust and scalable algorithms which can handle non-linear data without the requirement of pre-standardization and deal with missing information via imputation (<xref ref-type="bibr" rid="B35">35</xref>). Support Vector Machines require complete and scaled data in order to perform efficiently (<xref ref-type="bibr" rid="B36">36</xref>). As the wholeness of medical data is not guaranteed in real-world application, Random Forests might prove to be a more practical choice as an efficient decision support system for the screening of mental disorders.</p>
<p>In the final models selected for each scale of RCADS, Gender has been removed as a predictive feature for Total Internalizing Issues, Total Anxiety, MDD, GAD, and OCD based on the performance metrics. This is interesting as the original T-scoring of RCADS is dependent on the gender of the patient, and research also explains a distinction between psychopathology of males and females (<xref ref-type="bibr" rid="B37">37</xref>). In contrast, removing the attribute of gender from the models for SAD, SP, and PD reduces their performance. Similar results are obtained for the attribute of grade level, where its removal from the models of Total Anxiety, MDD, and GAD improves model performance, while the metrics of models for SAD, SP, and PD reduce. Additionally, in case of Total Internalizing Issues, the removal of Items 5 and 9 improve model performance. These items of RCADS pertain to the specific behaviors associated with SAD, therefore, they may not be considered the primary contributors or predictors of general internalizing behavior. However, as these items are retained in the specific model for SAD, their omission from the scale of Total Internalizing Issues seems counter-intuitive. For a more robust comparison, the final RF models were further evaluated on the same test set, and Cohen&#x2019;s kappa was calculated for models trained on all features and the &#x3c7;<sup>2</sup>-subsets. This additional testing also revealed contrasting results, where values of &#x3ba; either increased, decreased, or remained unchanged for some scales. For scales, where any difference in the value of &#x3ba; was observed, McNemar&#x2019;s test confirmed that the differences were not statistically significant. Due to the conflicting nature of the outcomes and the proof that any difference in model performance has occurred by chance, it is concluded that no changes need to be made in the composition of RCADS and its interrogatories based on this portion of the research alone. All items within RCADS are culturally significant in a Pakistani cohort, which is to be expected from a widely-used and validated psychometric scale curated by experienced professionals. However, the lower Cronbach&#x2019;s alpha values obtained for the scales of SAD and OCD indicate that further studies regarding the composition of these subscales with respect to Pakistani populations are required.</p>
<p>It is important to discern that the aim of AI-driven decision support systems is not to provide a definitive diagnosis and, ultimately, a &#x201c;replacement&#x201d; for traditional clinical practices. The role of these proposed tools is the enhancement of the existing clinical process. In resource-limited areas like Pakistan, psychiatrists are greatly outnumbered by potential patients. Reportedly, there is only one fully trained mental health professional for every 360,000 patients in Pakistan (<xref ref-type="bibr" rid="B38">38</xref>). The statistics become even more drastic for younger patients, with only 1% of the outpatient mental health facilities specializing in child and adolescent psychiatry (<xref ref-type="bibr" rid="B22">22</xref>). The implementation of an additional screening aid in the clinical workflow could alleviate the burden of mental health professionals. The speed of AI-driven tools enables them to screen multiple individuals simultaneously, and their efficiency can allow the prioritization of patients that require urgent review from a professional. As the influx of patients in child and adolescent psychiatry departments is quite low, it would be more beneficial to equip general or pediatric outpatient departments with these decision support systems. In addition to conventional check-ups, if parents or frontline healthcare providers are concerned about the mental well-being of their child or patient, they can quickly screen them using the proposed predictive models. This preliminary screening can then inform if the patient should be&#xa0;referred to a mental health specialist, and the symptom profiling can facilitate the specialist in forming patient-centered therapeutic regimes.</p>
<p>While the study establishes the importance of each item of RCADS and provides RF algorithms that can screen mental disorders efficiently, there are certain limitations that may affect the generalizability of the proposed models. Firstly, the sample size is quite small. Although similar sample sizes have been reported in related literature (<xref ref-type="bibr" rid="B24">24</xref>&#x2013;<xref ref-type="bibr" rid="B28">28</xref>), ML algorithms require extensive data for optimal performance. While the implementation of SMOTE-N slightly increases the dataset, this is an artificial or synthetic inflation of data, which is not comparable to real instances that are collected organically. Secondly, the gender distribution is quite&#xa0;imbalanced which can be attributed to the sampling methods used&#xa0;for data collection. Both consecutive and purposive sampling are&#xa0;non-probability sampling techniques which can induce bias. However, these methods were employed for convenience, as data collection for mental health research poses some hurdles due to limited clinical cases and socio-cultural implications. Additionally, the reported predisposition of affective disorders in females may also contribute to the gender imbalance (<xref ref-type="bibr" rid="B37">37</xref>). Thirdly, the data features only consist of gender, grade, and the&#xa0;47 items of RCADS. Socio-cultural attributes like family structure, socioeconomic status, and parental education levels are primary determinants of a child&#x2019;s cognitive development (<xref ref-type="bibr" rid="B39">39</xref>, <xref ref-type="bibr" rid="B40">40</xref>). Therefore, their inclusion with questions regarding behaviors and personality traits will result in inclusive and comprehensive screening tools.</p>
<p>In terms of future prospects of this study, the proposed models should be further optimized using a larger and uniform dataset. Socio-demographic and biometric indicators of patients should also be incorporated for more robust cultural adaptation of proposed decision support systems. This study serves as a foundation for the predictive modelling and symptom profiling of depressive and anxiety disorders in at-risk children and adolescents using ubiquitous and standard screening practices. It shows that simple computational approaches can provide efficient decision support systems for healthcare professionals to alleviate their workload and optimize patient outcomes. However, it is appreciated that the implementation of such technological interventions requires extensive validation on external datasets that the models have not been exposed to during initial training. This external validation will investigate the generalizability and reproducibility of the models on varying cohorts.</p>
</sec>
</body>
<back>
<sec id="s5" sec-type="data-availability">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p></sec>
<sec id="s6" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>The studies involving humans were approved by Institutional Review Board (NUST-IRB), National University of Sciences and Technology (NUST). The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation in this study was provided by the participants&#x2019; legal guardians/next of kin.</p></sec>
<sec id="s7" sec-type="author-contributions">
<title>Author contributions</title>
<p>ZH: Conceptualization, Supervision, Writing &#x2013; original draft, Validation, Methodology. MH: Investigation, Writing &#x2013; original draft, Methodology, Formal analysis. MZ: Writing &#x2013; review &amp; editing. SS: Writing &#x2013; review &amp; editing, Methodology. QH: Validation, Writing &#x2013; review &amp; editing, Data curation. HA: Methodology, Writing &#x2013; review &amp; editing.</p></sec>
<ack>
<title>Acknowledgments</title>
<p>The authors are grateful to the Institute of Psychiatry at Benazir Bhutto Hospital, Rawalpindi, for providing us the secondary data used in this study. We are also thankful to the professionals at Centre for Counseling and Career Advisory (C3A), NUST, Islamabad, for their clinical insights and the Deep Learning Lab in NUST, Islamabad, for their computational resources and expertise.</p>
</ack>
<sec id="s9" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s10" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If&#xa0;you identify any issues, please contact us.</p></sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<sec id="s12" sec-type="supplementary-material">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpsyt.2026.1758503/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpsyt.2026.1758503/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Table1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
<supplementary-material xlink:href="Table2.docx" id="SM2" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author"><collab>GBD 2019 Mental Disorders Collaborators</collab>
</person-group>. 
<article-title>Global, regional, and national burden of 12 mental disorders in 204 countries and territories, 1990&#x2013;2019: a systematic analysis for the Global Burden of Disease Study 2019</article-title>. <source>Lancet Psychiatry</source>. (<year>2022</year>) <volume>9</volume>:<page-range>137&#x2013;50</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/S2215-0366(21)00395-3</pub-id>, PMID: <pub-id pub-id-type="pmid">35026139</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author"><collab>Institute of Health Metrics and Evaluation</collab>
</person-group>. <source>Global health data exchange (GHDx)</source>. <publisher-loc>Seattle, Washington, USA</publisher-loc>: 
<publisher-name>Institute for Health Metrics and Evaluation</publisher-name> (<year>2021</year>).
</mixed-citation>
</ref>
<ref id="B3">
<label>3</label>
<mixed-citation publication-type="web">
<person-group person-group-type="author"><collab>World Health Organization</collab>
</person-group>. 
<article-title>World health organization</article-title> (<year>2022</year>). Available online at: <uri xlink:href="https://www.who.int/news-room/fact-sheets/detail/mental-health-strengthening-our-response">https://www.who.int/news-room/fact-sheets/detail/mental-health-strengthening-our-response</uri> (Accessed <date-in-citation content-type="access-date">February 3, 2025</date-in-citation>).
</mixed-citation>
</ref>
<ref id="B4">
<label>4</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Polanczyk</surname> <given-names>GV</given-names></name>
<name><surname>Salum</surname> <given-names>GA</given-names></name>
<name><surname>Sugaya</surname> <given-names>LS</given-names></name>
<name><surname>Caye</surname> <given-names>A</given-names></name>
<name><surname>Rohde</surname> <given-names>LA</given-names></name>
</person-group>. 
<article-title>Annual Research Review: A meta-analysis of the worldwide prevalence of mental disorders in children and adolescents</article-title>. <source>J Child Psychol Psychiatry</source>. (<year>2015</year>) <volume>56</volume>:<page-range>345&#x2013;65</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/jcpp.12381</pub-id>, PMID: <pub-id pub-id-type="pmid">25649325</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<label>5</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Racine</surname> <given-names>N</given-names></name>
<name><surname>McArthur</surname> <given-names>BA</given-names></name>
<name><surname>Cooke</surname> <given-names>JE</given-names></name>
<name><surname>Eirich</surname> <given-names>R</given-names></name>
<name><surname>Zhu</surname> <given-names>J</given-names></name>
<name><surname>Madigan</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Global prevalence of&#xa0;depressive and anxiety symptoms in children and adolescents during COVID-19: A&#xa0;meta-analysis</article-title>. <source>JAMA Pediatr</source>. (<year>2021</year>) <volume>175</volume>:<page-range>1142&#x2013;50</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jamapediatrics.2021.2482</pub-id>, PMID: <pub-id pub-id-type="pmid">34369987</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kessler</surname> <given-names>RC</given-names></name>
<name><surname>Amminger</surname> <given-names>GP</given-names></name>
<name><surname>Aguilar-Gaxiola</surname> <given-names>S</given-names></name>
<name><surname>Alonso</surname> <given-names>J</given-names></name>
<name><surname>Lee</surname> <given-names>S</given-names></name>
<name><surname>&#xdc;st&#xfc;n</surname> <given-names>TB</given-names></name>
</person-group>. 
<article-title>Age of onset of mental disorders: a review of recent literature</article-title>. <source>Curr Opin Psychiatry</source>. (<year>2007</year>) <volume>20</volume>:<page-range>359&#x2013;64</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1097/YCO.0b013e32816ebc8c</pub-id>, PMID: <pub-id pub-id-type="pmid">17551351</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Copeland</surname> <given-names>WE</given-names></name>
<name><surname>Shanahan</surname> <given-names>L</given-names></name>
<name><surname>Costello</surname> <given-names>EJ</given-names></name>
<name><surname>Angold</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>Childhood and adolescent psychiatric disorders as predictors of young adult disorders</article-title>. <source>Arch Gen Psychiatry</source>. (<year>2009</year>) <volume>66</volume>:<page-range>764&#x2013;72</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/archgenpsychiatry.2009.85</pub-id>, PMID: <pub-id pub-id-type="pmid">19581568</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hazell</surname> <given-names>P</given-names></name>
</person-group>. 
<article-title>Does the treatment of mental disorders in childhood lead to a healthier adulthood</article-title>? <source>Curr Opin Psychiatry</source>. (<year>2007</year>) <volume>20</volume>:<page-range>315&#x2013;8</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1097/YCO.0b013e3281a7368d</pub-id>, PMID: <pub-id pub-id-type="pmid">17551343</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<label>9</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Piqueras</surname> <given-names>JA</given-names></name>
<name><surname>Mart&#xed;n-Vivar</surname> <given-names>M</given-names></name>
<name><surname>Sandin</surname> <given-names>B</given-names></name>
<name><surname>San Luis</surname> <given-names>C</given-names></name>
<name><surname>Pineda</surname> <given-names>D</given-names></name>
</person-group>. 
<article-title>The Revised Child Anxiety and Depression Scale: A systematic review and reliability generalization meta-analysis</article-title>. <source>J Affect Disord</source>. (<year>2017</year>) <volume>218</volume>:<page-range>153&#x2013;69</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jad.2017.04.022</pub-id>, PMID: <pub-id pub-id-type="pmid">28475961</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<label>10</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Hale</surname> <given-names>WW</given-names></name>
<name><surname>Crocetti</surname> <given-names>E</given-names></name>
<name><surname>Raaijmakers</surname> <given-names>QA</given-names></name>
<name><surname>Meeus</surname> <given-names>WH</given-names></name>
</person-group>. 
<article-title>A meta-analysis of the cross-cultural psychometric properties of the Screen for Child Anxiety Related Emotional Disorders (SCARED)</article-title>. <source>J Child Psychol Psychiatry</source>. (<year>2011</year>) <volume>52</volume>:<fpage>80</fpage>&#x2013;<lpage>90</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1469-7610.2010.02285.x</pub-id>, PMID: <pub-id pub-id-type="pmid">20662993</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<label>11</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Orgil&#xe9;s</surname> <given-names>M</given-names></name>
<name><surname>Fern&#xe1;ndez-Mart&#xed;nez</surname> <given-names>I</given-names></name>
<name><surname>Guill&#xe9;n-Riquelme</surname> <given-names>A</given-names></name>
<name><surname>Espada</surname> <given-names>JP</given-names></name>
<name><surname>Essau</surname> <given-names>CA</given-names></name>
</person-group>. 
<article-title>A systematic review of the factor structure and reliability of the Spence Children&#x2019;s Anxiety Scale</article-title>. <source>J Affect Disord</source>. (<year>2016</year>) <volume>190</volume>:<page-range>333&#x2013;40</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jad.2015.09.055</pub-id>, PMID: <pub-id pub-id-type="pmid">26544617</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<label>12</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Moggia</surname> <given-names>D</given-names></name>
<name><surname>Lutz</surname> <given-names>W</given-names></name>
<name><surname>Brakemeier</surname> <given-names>EL</given-names></name>
<name><surname>Bickman</surname> <given-names>L</given-names></name>
</person-group>. 
<article-title>Treatment Personalization and&#xa0;Precision Mental Health Care: Where are we and where do we want to go</article-title>? <source>Adm Policy Ment Health Ment Health Serv Res</source>. (<year>2024</year>) <volume>51</volume>:<page-range>611&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s10488-024-01407-w</pub-id>, PMID: <pub-id pub-id-type="pmid">39172281</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<label>13</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>ZH</given-names></name>
</person-group>. <source>Machine learning</source>. <publisher-loc>Singapore</publisher-loc>: 
<publisher-name>Springer nature</publisher-name> (<year>2021</year>).
</mixed-citation>
</ref>
<ref id="B14">
<label>14</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Habehh</surname> <given-names>H</given-names></name>
<name><surname>Gohel</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Machine learning in healthcare</article-title>. <source>Curr Genomics</source>. (<year>2021</year>) <volume>22</volume>:<fpage>291</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.2174/1389202922666210705124359</pub-id>, PMID: <pub-id pub-id-type="pmid">35273459</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Amjad</surname> <given-names>H</given-names></name>
<name><surname>Hussain</surname> <given-names>Z</given-names></name>
<name><surname>Hasan</surname> <given-names>M</given-names></name>
<name><surname>Ul Hassan</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>Machine learning-based models for screening of anemia and leukemia using features of complete blood count reports</article-title>. <source>Sci Rep</source>. (<year>2025</year>) <volume>15</volume>:<fpage>33333</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-025-21279-w</pub-id>, PMID: <pub-id pub-id-type="pmid">41023304</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rogan</surname> <given-names>J</given-names></name>
<name><surname>Bucci</surname> <given-names>S</given-names></name>
<name><surname>Firth</surname> <given-names>J</given-names></name>
</person-group>. 
<article-title>Health care professionals&#x2019; Views on the use of passive sensing, AI, and machine learning in mental health care: systematic review with meta-synthesis</article-title>. <source>JMIR Ment Health</source>. (<year>2024</year>) <volume>11</volume>:<elocation-id>e49577</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.2196/49577</pub-id>, PMID: <pub-id pub-id-type="pmid">38261403</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Haque</surname> <given-names>UM</given-names></name>
<name><surname>Kabir</surname> <given-names>E</given-names></name>
<name><surname>Khanam</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Detection of child depression using machine learning methods</article-title>. <source>PloS One</source>. (<year>2021</year>) <volume>16</volume>:<elocation-id>e0261131</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1371/journal.pone.0261131</pub-id>, PMID: <pub-id pub-id-type="pmid">34914728</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Haque</surname> <given-names>UM</given-names></name>
<name><surname>Kabir</surname> <given-names>E</given-names></name>
<name><surname>Khanam</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Early detection of paediatric and adolescent obsessive&#x2013;compulsive, separation anxiety and attention deficit hyperactivity disorder using machine learning algorithms</article-title>. <source>Health Inf Sci Syst</source>. (<year>2023</year>) <volume>11</volume>:<fpage>31</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s13755-023-00232-z</pub-id>, PMID: <pub-id pub-id-type="pmid">37489154</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<label>19</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qasrawi</surname> <given-names>R</given-names></name>
<name><surname>Vicuna Polo</surname> <given-names>SP</given-names></name>
<name><surname>Abu Al-Halawa</surname> <given-names>D</given-names></name>
<name><surname>Hallaq</surname> <given-names>S</given-names></name>
<name><surname>Abdeen</surname> <given-names>Z</given-names></name>
</person-group>. 
<article-title>Assessment and prediction of depression and anxiety risk factors in schoolchildren: machine learning techniques performance analysis</article-title>. <source>JMIR Form Res</source>. (<year>2022</year>) <volume>6</volume>:<elocation-id>e32736</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.2196/32736</pub-id>, PMID: <pub-id pub-id-type="pmid">35665695</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<label>20</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Nemesure</surname> <given-names>MD</given-names></name>
<name><surname>Heinz</surname> <given-names>MV</given-names></name>
<name><surname>Huang</surname> <given-names>R</given-names></name>
<name><surname>Jacobson</surname> <given-names>NC</given-names></name>
</person-group>. 
<article-title>Predictive modeling of depression and anxiety using electronic health records and a novel machine learning approach with artificial intelligence</article-title>. <source>Sci Rep</source>. (<year>2021</year>) <volume>11</volume>:<fpage>1980</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-021-81368-4</pub-id>, PMID: <pub-id pub-id-type="pmid">33479383</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<label>21</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qasrawi</surname> <given-names>R</given-names></name>
<name><surname>Vicuna Polo</surname> <given-names>S</given-names></name>
<name><surname>Abu Khader</surname> <given-names>R</given-names></name>
<name><surname>Abu Al-Halawa</surname> <given-names>D</given-names></name>
<name><surname>Hallaq</surname> <given-names>S</given-names></name>
<name><surname>Abu Halaweh</surname> <given-names>N</given-names></name>
<etal/>
</person-group>. 
<article-title>Machine learning techniques for identifying mental health risk factor associated with schoolchildren cognitive ability living in politically violent environments</article-title>. <source>Front Psychiatry</source>. (<year>2023</year>) <volume>14</volume>:<elocation-id>1071622</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpsyt.2023.1071622</pub-id>, PMID: <pub-id pub-id-type="pmid">37304448</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<label>22</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Taj</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Mental health in Pakistan</article-title>. In: <source>Routledge handbook of psychiatry in Asia</source>. <publisher-loc>London, England</publisher-loc>: 
<publisher-name>Routledge</publisher-name> (<year>2015</year>). p. <page-range>103&#x2013;14</page-range>.
</mixed-citation>
</ref>
<ref id="B23">
<label>23</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mehmood</surname> <given-names>T</given-names></name>
<name><surname>Sultan</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Translation and adaptation of revised children&#x2019;s anxiety and depression scale</article-title>. <source>Int J Lib Arts Soc Sci</source>. (<year>2014</year>) <volume>2</volume>:<fpage>95</fpage>&#x2013;<lpage>106</lpage>.
</mixed-citation>
</ref>
<ref id="B24">
<label>24</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Abdelwahab</surname> <given-names>MM</given-names></name>
<name><surname>Al-Karawi</surname> <given-names>KA</given-names></name>
<name><surname>Hasanin</surname> <given-names>E</given-names></name>
<name><surname>Semary</surname> <given-names>H</given-names></name>
</person-group>. 
<article-title>Autism spectrum disorder prediction in children using machine learning</article-title>. <source>J Disabil Res</source>. (<year>2024</year>) <volume>3</volume>:<fpage>20230064</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.57197/JDR-2023-0064</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<label>25</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Farooq</surname> <given-names>MS</given-names></name>
<name><surname>Tehseen</surname> <given-names>R</given-names></name>
<name><surname>Sabir</surname> <given-names>M</given-names></name>
<name><surname>Atal</surname> <given-names>Z</given-names></name>
</person-group>. 
<article-title>Detection of autism spectrum disorder (ASD) in children and adults using machine learning</article-title>. <source>Sci Rep</source>. (<year>2023</year>) <volume>13</volume>:<fpage>9605</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41598-023-35910-1</pub-id>, PMID: <pub-id pub-id-type="pmid">37311766</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<label>26</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Khudhur</surname> <given-names>DD</given-names></name>
<name><surname>Khudhur</surname> <given-names>SD</given-names></name>
</person-group>. 
<article-title>The classification of autism spectrum disorder by machine learning methods on multiple datasets for four age groups</article-title>. <source>Meas Sens</source>. (<year>2023</year>) <volume>27</volume>:<fpage>100774</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.measen.2023.100774</pub-id>, PMID: <pub-id pub-id-type="pmid">41735180</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<label>27</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Raj</surname> <given-names>S</given-names></name>
<name><surname>Masood</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Analysis and detection of autism spectrum disorder using machine learning techniques</article-title>. <source>Proc Comput Sci</source>. (<year>2020</year>) <volume>167</volume>:<fpage>994</fpage>&#x2013;<lpage>1004</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.procs.2020.03.399</pub-id>, PMID: <pub-id pub-id-type="pmid">41735180</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<label>28</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Thorup</surname> <given-names>E</given-names></name>
<name><surname>Nystr&#xf6;m</surname> <given-names>P</given-names></name>
<name><surname>B&#xf6;lte</surname> <given-names>S</given-names></name>
<name><surname>Falck-Ytter</surname> <given-names>T</given-names></name>
</person-group>. 
<article-title>What are you looking at? Gaze following with and without target objects in ASD and typical development</article-title>. <source>Autism</source>. (<year>2022</year>) <volume>26</volume>:<page-range>1668&#x2013;80</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1177/13623613211061940</pub-id>, PMID: <pub-id pub-id-type="pmid">34903076</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<label>29</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Tavakol</surname> <given-names>M</given-names></name>
<name><surname>Dennick</surname> <given-names>R</given-names></name>
</person-group>. 
<article-title>Making sense of Cronbach&#x2019;s alpha</article-title>. <source>Int J Med Educ</source>. (<year>2011</year>) <volume>2</volume>:<fpage>53</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.5116/ijme.4dfb.8dfd</pub-id>, PMID: <pub-id pub-id-type="pmid">28029643</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<label>30</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chawla</surname> <given-names>NV</given-names></name>
<name><surname>Bowyer</surname> <given-names>KW</given-names></name>
<name><surname>Hall</surname> <given-names>LO</given-names></name>
<name><surname>Kegelmeyer</surname> <given-names>WP</given-names></name>
</person-group>. 
<article-title>SMOTE: synthetic minority over-sampling technique</article-title>. <source>J Artif Intell Res</source>. (<year>2002</year>) <volume>16</volume>:<page-range>321&#x2013;57</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<label>31</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Pedregosa</surname> <given-names>F</given-names></name>
<name><surname>Varoquaux</surname> <given-names>G</given-names></name>
<name><surname>Gramfort</surname> <given-names>A</given-names></name>
<name><surname>Michel</surname> <given-names>V</given-names></name>
<name><surname>Thirion</surname> <given-names>B</given-names></name>
<name><surname>Grisel</surname> <given-names>O</given-names></name>
<etal/>
</person-group>. 
<article-title>Scikit-learn: machine learning in python</article-title>. <source>J Mach Learn Res</source>. (<year>2011</year>) <volume>12</volume>:<page-range>2825&#x2013;30</page-range>.
</mixed-citation>
</ref>
<ref id="B32">
<label>32</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Roelofs</surname> <given-names>R</given-names></name>
<name><surname>Shankar</surname> <given-names>V</given-names></name>
<name><surname>Recht</surname> <given-names>B</given-names></name>
<name><surname>Fridovich-Keil</surname> <given-names>S</given-names></name>
<name><surname>Hardt</surname> <given-names>M</given-names></name>
<name><surname>Miller</surname> <given-names>J</given-names></name>
<etal/>
</person-group>. 
<article-title>A meta-analysis of overfitting in machine learning</article-title>. <source>Adv Neural Inf Process Syst</source>. (<year>2019</year>) <volume>32</volume>:<page-range>9179&#x2013;189</page-range>.
</mixed-citation>
</ref>
<ref id="B33">
<label>33</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Altmann</surname> <given-names>A</given-names></name>
<name><surname>Tolo&#x15f;i</surname> <given-names>L</given-names></name>
<name><surname>Sander</surname> <given-names>O</given-names></name>
<name><surname>Lengauer</surname> <given-names>T</given-names></name>
</person-group>. 
<article-title>Permutation importance: a corrected feature importance measure</article-title>. <source>Bioinformatics</source>. (<year>2010</year>) <volume>26</volume>:<page-range>1340&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/bioinformatics/btq134</pub-id>, PMID: <pub-id pub-id-type="pmid">20385727</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<label>34</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Richter</surname> <given-names>T</given-names></name>
<name><surname>Fishbain</surname> <given-names>B</given-names></name>
<name><surname>Fruchter</surname> <given-names>E</given-names></name>
<name><surname>Richter-Levin</surname> <given-names>G</given-names></name>
<name><surname>Okon-Singer</surname> <given-names>H</given-names></name>
</person-group>. 
<article-title>Machine&#xa0;learning-based diagnosis support system for differentiating between clinical anxiety and depression disorders</article-title>. <source>J Psychiatr Res</source>. (<year>2021</year>) <volume>141</volume>:<fpage>199</fpage>&#x2013;<lpage>205</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jpsychires.2021.06.044</pub-id>, PMID: <pub-id pub-id-type="pmid">34246974</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<label>35</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Rigatti</surname> <given-names>SJ</given-names></name>
</person-group>. 
<article-title>Random forest</article-title>. <source>J Insur Med</source>. (<year>2017</year>) <volume>47</volume>:<page-range>31&#x2013;9</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.17849/insm-47-01-31-39.1</pub-id>, PMID: <pub-id pub-id-type="pmid">28836909</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<label>36</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bhavsar</surname> <given-names>H</given-names></name>
<name><surname>Panchal</surname> <given-names>MH</given-names></name>
</person-group>. 
<article-title>A review on support vector machine for data classification</article-title>. <source>Int J Adv Res Comput Eng Technol IJARCET</source>. (<year>2012</year>) <volume>1</volume>:<page-range>185&#x2013;9</page-range>.
</mixed-citation>
</ref>
<ref id="B37">
<label>37</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Rosenfield</surname> <given-names>S</given-names></name>
<name><surname>Mouzon</surname> <given-names>D</given-names></name>
</person-group>. 
<article-title>Gender and mental health</article-title>. In: 
<person-group person-group-type="editor">
<name><surname>Aneshensel</surname> <given-names>CS</given-names></name>
<name><surname>Phelan</surname> <given-names>JC</given-names></name>
<name><surname>Bierman</surname> <given-names>A</given-names></name>
</person-group>, editors. <source>Handbook of the sociology of mental health</source>. 
<publisher-name>Springer Netherlands</publisher-name>, <publisher-loc>Dordrecht</publisher-loc> (<year>2013</year>). p. <page-range>277&#x2013;96</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-94-007-4276-5_14</pub-id>, PMID: <pub-id pub-id-type="pmid">41728168</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<label>38</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ahmad</surname> <given-names>I</given-names></name>
</person-group>. 
<article-title>Highlighting Pakistan&#x2019;s psychiatrist shortage amid a persistent mental health crisis</article-title>. <source>J Pak Med Assoc</source>. (<year>2025</year>) <volume>76</volume>:<page-range>130&#x2013;0</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.47391/JPMA.31107</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<label>39</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sammadar</surname> <given-names>LDB</given-names></name>
<name><surname>Gaur</surname> <given-names>BS</given-names></name>
<name><surname>Gaur</surname> <given-names>SS</given-names></name>
</person-group>. 
<article-title>The sociology of mental health: an analysis of&#xa0;the&#xa0;impact of socio-cultural factors on mental health</article-title>. <source>Sociology</source>. (<year>2022</year>) <volume>11</volume>:<page-range>159&#x2013;63</page-range>. Available online at: <uri xlink:href="https://www.eduzonejournal.com/index.php/eiprmj/article/view/111">https://www.eduzonejournal.com/index.php/eiprmj/article/view/111</uri> (<date-in-citation content-type="access-date">Accessed April 15, 2025</date-in-citation>).
</mixed-citation>
</ref>
<ref id="B40">
<label>40</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Van Kamp</surname> <given-names>I</given-names></name>
<name><surname>Waye</surname> <given-names>KP</given-names></name>
<name><surname>Kanninen</surname> <given-names>K</given-names></name>
<name><surname>Gulliver</surname> <given-names>J</given-names></name>
<name><surname>Bozzon</surname> <given-names>A</given-names></name>
<name><surname>Psyllidis</surname> <given-names>A</given-names></name>
<etal/>
</person-group>. 
<article-title>Early environmental quality and life-course mental health effects: The Equal-Life project</article-title>. <source>Environ Epidemiol</source>. (<year>2022</year>) <volume>6</volume>:<elocation-id>e183</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1097/EE9.0000000000000183</pub-id>, PMID: <pub-id pub-id-type="pmid">35169662</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1825081">Sasidhar Gunturu</ext-link>, Bronx-Lebanon Hospital Center, United States</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1829490">Sifan Wang</ext-link>, The University of Hong Kong, Hong Kong SAR, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3340652">Kavita Kothari</ext-link>, Atrium Healthcare, United States</p></fn>
</fn-group>
</back>
</article>