<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychiatry</journal-id>
<journal-title-group>
<journal-title>Frontiers in Psychiatry</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychiatry</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-0640</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyt.2026.1752423</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>An AI-based intelligent diagnosis system for adolescent mental health based on multitask deep learning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Liu</surname><given-names>Wenyue</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2831526/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Zhang</surname><given-names>Zhihao</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/3061296/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Du</surname><given-names>Linkang</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Qiu</surname><given-names>Jianguo</given-names></name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project-administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Physical Education, Yantai Institute of Science and Technology</institution>, <city>Yantai</city>,&#xa0;<country country="my">Malaysia</country></aff>
<aff id="aff2"><label>2</label><institution>Faculty of Education, Universiti Kebangsaan Malaysia</institution>, <city>Bangi</city>,&#xa0;<country country="my">Malaysia</country></aff>
<aff id="aff3"><label>3</label><institution>School of Cyber Science and Engineering, Xi&#x2019;an Jiaotong University</institution>, <city>Xi&#x2019;an</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff4"><label>4</label><institution>Faculty of Physical Education, Ludong University</institution>, <city>Yantai</city>,&#xa0;<country country="cn">China</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Jianguo Qiu, <email xlink:href="mailto:1625@ldu.edu.cn">1625@ldu.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-24">
<day>24</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1752423</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>11</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>19</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>06</day>
<month>01</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Liu, Zhang, Du and Qiu.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Liu, Zhang, Du and Qiu</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-24">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Background and objectives</title>
<p>Adolescent depression and anxiety are becoming increasingly prevalent in China, with rates reaching 20%&#x2013;30%, driven largely by intense academic pressure and the cultural tendency toward somatization. Traditional screening tools, such as the Patient Health Questionnaire-9 (PHQ-9) and Generalized Anxiety Disorder-7 (GAD-7), often suffer from subjective bias, recall errors, and underreporting due to social stigma. This study developed an AI-based intelligent diagnosis system (IDS) using multitask deep learning to non-intrusively predict comorbid depression and anxiety severity based on the spontaneous textual expressions of Chinese adolescents.</p>
</sec>
<sec>
<title>Methods</title>
<p>Textual responses from approximately 1,275 adolescents were collected and labeled with clinician-assessed PHQ-9 and GAD-7 scores. Preprocessing involved jieba segmentation and variational autoencoder (VAE)-based data augmentation to address class imbalance, resulting in an expanded test set of 308 samples. The IDS architecture utilizes a Chinese-optimized BERT encoder with self-attention and dual-feature fusion (combining pooled [CLS] tokens and global pooling) to extract shared representations. These are processed through multitask heads for regression (MSE loss) and classification (weighted cross-entropy). The model was trained using an 8:1:1 split with AdamW optimization, cosine annealing, and regularization, supported by ablation studies to validate individual components.</p>
</sec>
<sec>
<title>Results</title>
<p>On the test set, the IDS achieved Pearson correlation coefficients of 0.706 for PHQ-9 and 0.693 for GAD-7, with AUC values of 0.877 and 0.902, respectively. Binary classification yielded F1-scores of 0.762 (PHQ-9) and 0.863 (GAD-7). Ablation analysis confirmed that the multitask learning framework improved F1-scores by 6.2%&#x2013;7.8% and reduced MSE by 14.2%&#x2013;18.4%. Furthermore, adaptations for somatization and data augmentation for severe cases significantly enhanced the system&#x2019;s sensitivity.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>The IDS offers a robust, culturally sensitive, and scalable tool for adolescent mental health screening. By outperforming single-task baselines, it provides a proactive, privacy-preserving alternative to traditional self-reports. Future research will focus on longitudinal validation, multimodal integration, and ethical deployment strategies to maximize the system&#x2019;s utility in educational and clinical settings.</p>
</sec>
</abstract>
<kwd-group>
<kwd>adolescent mental health</kwd>
<kwd>depression and anxiety screening</kwd>
<kwd>digital phenotyping</kwd>
<kwd>multitask deep learning</kwd>
<kwd>natural language processing</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>Social Science Planning Project of Shandong Province</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100018563</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<award-group id="gs2">
<funding-source id="sp2">
<institution-wrap>
<institution>Ludong University</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100008279</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This research and the APC were funded by the Shandong Province Social Science Planning Research Project (Grant Number: 22CTYJ02) and Ludong University.</funding-statement>
</funding-group>
<counts>
<fig-count count="3"/>
<table-count count="8"/>
<equation-count count="22"/>
<ref-count count="36"/>
<page-count count="12"/>
<word-count count="6129"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Computational Psychiatry</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<label>1</label>
<title>Introduction</title>
<p>Adolescent mental health disorders have emerged as a pressing global public health concern, with recent systematic reviews and meta-analyses highlighting alarming prevalence rates. For instance, pooled estimates indicate that depressive symptoms affect approximately 21% to 25% of children and adolescents worldwide, while anxiety symptoms exhibit similar elevated rates, often exacerbated by factors such as the COVID-19 pandemic and ongoing socioeconomic stressors (<xref ref-type="bibr" rid="B1">1</xref>&#x2013;<xref ref-type="bibr" rid="B3">3</xref>). These figures underscore the substantial burden, as untreated conditions in youth can lead to chronic impairment, reduced academic performance, and increased risk of suicidality. In China, the situation is particularly acute; large-scale epidemiological studies and meta-analyses report that depressive symptoms among adolescents often exceed 30% in certain subgroups, with anxiety disorders showing comparable or higher prevalence, influenced by rapid urbanization, academic pressures, and familial expectations (<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B5">5</xref>). This high incidence demands innovative screening strategies tailored to the unique sociocultural landscape.</p>
<p>Early detection during adolescence is paramount, as untreated depression and anxiety commonly persist into adulthood, imposing&#xa0;substantial personal, societal, and economic burdens (<xref ref-type="bibr" rid="B6">6</xref>).&#xa0;Standardized instruments such as the Patient Health Questionnaire-9 (PHQ-9) and Generalized Anxiety Disorder-7 (GAD-7) remain gold standards for assessment but are susceptible to biases, including recall inaccuracies and social desirability effects, particularly among youth who may underreport symptoms due to stigma, cultural reticence, or limited emotional vocabulary (<xref ref-type="bibr" rid="B7">7</xref>). In contrast, artificial intelligence (AI)-enabled analysis of spontaneous natural language provides a passive, non-intrusive avenue to derive &#x201c;digital phenotypes&#x201d;&#x2014;behavioral signatures extracted from digital footprints&#x2014;that reveal cognitive distortions, affective patterns, and relational dynamics less prone to deliberate concealment (<xref ref-type="bibr" rid="B8">8</xref>,&#xa0;<xref ref-type="bibr" rid="B9">9</xref>). Recent reviews emphasize how digital phenotyping, leveraging text data alongside other passive signals, can facilitate continuous monitoring and personalized interventions in mental healthcare (<xref ref-type="bibr" rid="B10">10</xref>, <xref ref-type="bibr" rid="B11">11</xref>).</p>
<p>In the Chinese context, these mental health challenges are further complicated by cultural norms that frequently manifest psychological distress through somatization&#x2014;the expression of emotional suffering via physical complaints such as fatigue, sleep disturbances, headaches, or gastrointestinal issues&#x2014;rather than overt emotional disclosures (<xref ref-type="bibr" rid="B12">12</xref>, <xref ref-type="bibr" rid="B13">13</xref>). Recent cross-sectional studies among Chinese adolescents demonstrate strong associations between somatic symptoms and both depression and anxiety, with severe somatic presentations linked to higher prevalence of these disorders (<xref ref-type="bibr" rid="B14">14</xref>). This phenomenon often delays recognition and intervention, as individuals may seek medical rather than psychological care, perpetuating underdiagnosis. Furthermore, the pervasive digitalization of adolescent social interactions in China&#x2014;through platforms like WeChat and Douyin&#x2014;introduces novel stressors, including cyberbullying, social comparison, and information overload, while simultaneously providing rich linguistic data that harbor latent indicators of distress (<xref ref-type="bibr" rid="B15">15</xref>). Spontaneous online expressions, such as posts, comments, or chat logs, can reveal subtle cognitive distortions or emotional leakage that structured self-report tools often fail to capture.</p>
<p>The application of AI in mental health screening has advanced significantly, evolving from basic sentiment analysis to sophisticated behavioral and linguistic modeling (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>). Natural language processing (NLP) techniques, particularly those based on Transformer architectures and domain-adapted pretraining on mental health corpora, have demonstrated superior capability in detecting nuanced markers of psychological distress in youth populations (<xref ref-type="bibr" rid="B18">18</xref>, <xref ref-type="bibr" rid="B19">19</xref>). For example, AI-driven predictive models have been applied to monitor mood fluctuations and early symptoms in adolescents using social media data, offering real-time insights that enhance diagnostic accuracy (<xref ref-type="bibr" rid="B20">20</xref>). However, a persistent limitation in many existing systems is the reliance on single-task learning paradigms that treat depression and anxiety as isolated conditions, despite robust clinical evidence of their high comorbidity. Meta-analyses reveal comorbidity rates ranging from 15% to 75% in adolescent samples, with shared genetic, environmental, and neurobiological factors contributing to symptom overlap (<xref ref-type="bibr" rid="B21">21</xref>, <xref ref-type="bibr" rid="B22">22</xref>). This decoupling can overlook critical cross-condition semantic interactions, leading to reduced predictive robustness and clinical utility.</p>
<p>The present study is based on a dataset of spontaneous textual responses from approximately 1,275 Chinese adolescents across junior high, high school, and university levels, labeled with professional clinician-assessed PHQ-9 and GAD-7 scores. As is typical in psychiatric datasets, severe class imbalance was present, with high-risk cases significantly underrepresented. This challenge was addressed through a targeted data augmentation strategy, alongside careful preprocessing that preserved emotion-expressive linguistic features critical for cultural nuance. To overcome the limitations of single-task models, cultural insensitivity in generic NLP tools, and data imbalance, this study introduces the intelligent diagnosis system (IDS)&#x2014;a multitask deep learning framework designed for joint, non-intrusive screening of comorbid depression and anxiety in Chinese adolescents. By leveraging a Chinese-optimized Bidirectional Encoder Representations from Transformers (BERT) encoder, explicit comorbidity modeling, culturally attuned handling of somatization patterns, and mechanisms to prioritize detection of severe cases, the IDS offers a privacy-preserving and equitable alternative to traditional self-report instruments. This system enables scalable early identification in real-world settings such as schools, communities, and telehealth platforms, supporting a transition from reactive treatment to proactive, data-driven prevention and ultimately reducing the long-term burden of mental health disorders in this vulnerable population.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<label>2</label>
<title>Materials and methods</title>
<sec id="s2_1">
<label>2.1</label>
<title>Data preparation</title>
<p>The dataset used in this study contains textual expression data from approximately 1,275 adolescents, with each person providing seven to nine independent text responses, where data labels include PHQ-9 scores (0&#x2013;27 points) and GAD-7 scores (0&#x2013;21 points), all assessed by professional mental health evaluators according to standard scales. The dataset covers adolescent groups of different age ranges, including junior high school students, high school&#xa0;students, and university students, ensuring sample representativeness and diversity, while the data collection process strictly follows ethical guidelines, with all participants signing informed consent forms, and data anonymization to protect privacy (<xref ref-type="bibr" rid="B23">23</xref>). Data preprocessing includes text cleaning, Chinese word segmentation, length control, and intelligent data augmentation steps, where text cleaning mainly removes special characters and normalizes punctuation, while preserving punctuation related to emotional expression to maintain the emotional characteristics of the text. Chinese word segmentation uses the jieba tokenizer for word segmentation processing, with a maximum sequence length set to 96 tokens to balance computational efficiency and information retention. To address common class imbalance issues in mental health data, this study employs intelligent data augmentation technology based on a variational autoencoder (VAE) (<xref ref-type="bibr" rid="B24">24</xref>), balancing data distribution across various categories by generating high-quality synthetic samples. The dataset adopts a stratified sampling strategy, divided into training set, validation set, and test set in an 8:1:1 ratio, where stratification is based on the joint labels of PHQ and GAD, ensuring consistency of category distribution across subsets. This sampling strategy effectively avoids data distribution bias, ensuring objectivity and reliability of model evaluation. The original test set contained 127 samples. For comprehensive evaluation and to ensure sufficient sample size for robust performance assessment, the test set was expanded to 308 samples through data augmentation, maintaining the same distribution characteristics as the original test set. Data distribution statistics for the original dataset are shown in <xref ref-type="table" rid="T1"><bold>Table&#xa0;1</bold></xref>.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Dataset distribution statistics.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Dataset</th>
<th valign="middle" align="left">Sample count</th>
<th valign="middle" align="left">PHQ-9</th>
<th valign="middle" align="left">GAD-7</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Training</td>
<td valign="middle" align="left">1,020</td>
<td valign="middle" align="left">12.3</td>
<td valign="middle" align="left">9.8</td>
</tr>
<tr>
<td valign="middle" align="left">Validation</td>
<td valign="middle" align="left">128</td>
<td valign="middle" align="left">12.1</td>
<td valign="middle" align="left">9.6</td>
</tr>
<tr>
<td valign="middle" align="left">Test</td>
<td valign="middle" align="left">127</td>
<td valign="middle" align="left">12.5</td>
<td valign="middle" align="left">10.1</td>
</tr>
<tr>
<td valign="middle" align="left">Total</td>
<td valign="middle" align="left">1,275</td>
<td valign="middle" align="left">12.3</td>
<td valign="middle" align="left">9.8</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_2">
<label>2.2</label>
<title>Model architecture</title>
<p>The multitask deep learning model proposed in this study is built based on the BERT pretrained model. This model adopts an end-to-end training approach, capable of simultaneously learning prediction tasks for depression and anxiety symptoms, using BERT-base-Chinese as the base encoder to extract deep semantic features from text. The BERT architecture is based on the encoder part of Transformer, employing bidirectional self-attention mechanisms to fully understand contextual information in text (<xref ref-type="bibr" rid="B25">25</xref>). The BERT model contains 12 layers of Transformer encoders, each with 12 attention heads, with a total parameter count of approximately 110 million. The self-attention mechanism captures long-distance dependencies in text by calculating similarity between query, key, and value, which is crucial for understanding complex emotional expressions in mental health text, as shown in <xref ref-type="disp-formula" rid="eq1">Equation 1</xref>.</p>
<disp-formula id="eq1"><label>(1)</label>
<mml:math display="block" id="M1"><mml:mrow><mml:mi>H</mml:mi><mml:mo>=</mml:mo><mml:mtext>BERT</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>X</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>X</italic> is the input text sequence, <inline-formula>
<mml:math display="inline" id="im1"><mml:mrow><mml:mi>H</mml:mi><mml:mo>&#x2208;</mml:mo><mml:msup><mml:mi>&#x211d;</mml:mi><mml:mrow><mml:mi>L</mml:mi><mml:mo>&#xd7;</mml:mo><mml:mi>d</mml:mi></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> is the hidden state output by BERT, <italic>L</italic> is the sequence length (maximum length set to 96 tokens to balance computational efficiency and information retention for adolescent psychological expressions), and <italic>d</italic> = 768 is the hidden layer dimension (standard BERT-base configuration that provides sufficient representational capacity for capturing complex emotional patterns in mental health text). BERT&#x2019;s pretraining tasks include Masked Language Modeling (MLM) and Next Sentence Prediction (NSP), enabling it to learn rich language representations that are particularly effective for understanding contextual emotional expressions in Chinese adolescent mental health text. We add additional self-attention layers based on BERT encoding to enhance the capture of key information. This mechanism automatically learns dependency relationships between different positions in text, which is particularly suitable for capturing emotional expressions and key information in mental health text, where emotional indicators may be distributed across different parts of the text and require long-range dependency modeling. The multihead attention mechanism captures text features from different perspectives by parallel computation of multiple attention heads (<xref ref-type="bibr" rid="B26">26</xref>), as shown in <xref ref-type="disp-formula" rid="eq2">Equation 2</xref>:</p>
<disp-formula id="eq2"><label>(2)</label>
<mml:math display="block" id="M2"><mml:mrow><mml:mtext>Attention</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Q</mml:mi><mml:mo>,</mml:mo><mml:mi>K</mml:mi><mml:mo>,</mml:mo><mml:mi>V</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>softmax</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>Q</mml:mi><mml:msup><mml:mi>K</mml:mi><mml:mi>T</mml:mi></mml:msup></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mi>V</mml:mi></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>Q</italic>, <italic>K</italic>, and <italic>V</italic> are the query, key, and value matrices, respectively, and <italic>d<sub>k</sub></italic> is the dimension of the key. The multihead attention mechanism allows the model to simultaneously attend to different representation subspaces, enabling comprehensive capture of diverse emotional patterns and contextual information that are crucial for understanding complex psychological states in mental health text, as shown in <xref ref-type="disp-formula" rid="eq3">Equation 3</xref>:</p>
<disp-formula id="eq3"><label>(3)</label>
<mml:math display="block" id="M3"><mml:mrow><mml:mtext>MultiHead</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Q</mml:mi><mml:mo>,</mml:mo><mml:mi>K</mml:mi><mml:mo>,</mml:mo><mml:mi>V</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mtext>Concat</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mtext>head</mml:mtext></mml:mrow><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x2026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mtext>head</mml:mtext></mml:mrow><mml:mi>h</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:msup><mml:mi>W</mml:mi><mml:mi>O</mml:mi></mml:msup></mml:mrow></mml:math>
</disp-formula>
<p>where head<italic><sub>i</sub></italic> = Attention<inline-formula>
<mml:math display="inline" id="im2"><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>Q</mml:mi><mml:msubsup><mml:mi>W</mml:mi><mml:mi>i</mml:mi><mml:mi>Q</mml:mi></mml:msubsup><mml:mo>,</mml:mo><mml:mi>K</mml:mi><mml:msubsup><mml:mi>W</mml:mi><mml:mi>i</mml:mi><mml:mi>K</mml:mi></mml:msubsup><mml:mo>,</mml:mo><mml:mi>V</mml:mi><mml:msubsup><mml:mi>W</mml:mi><mml:mi>i</mml:mi><mml:mi>V</mml:mi></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, and <inline-formula>
<mml:math display="inline" id="im3"><mml:mrow><mml:mi>h</mml:mi><mml:mo>=</mml:mo><mml:mn>12</mml:mn></mml:mrow></mml:math></inline-formula> is the number of attention heads. We combine BERT&#x2019;s pooled output (representing the [CLS] token embedding that captures global semantic information) and global pooling features (obtained by averaging all token embeddings to capture overall text characteristics) to create a comprehensive feature representation. This dual-feature fusion strategy captures both sentence-level semantics and token-level patterns, which is particularly important for mental health text where both global emotional tone and specific emotional keywords contribute to assessment. Feature enhancement is then performed through a multilayer perceptron, which adopts multilayer non-linear transformations capable of learning complex feature combination patterns that are specific to mental health assessment tasks, as shown in <xref ref-type="disp-formula" rid="eq4">Equations 4</xref>&#x2013;<xref ref-type="disp-formula" rid="eq5">5</xref>: </p>
<disp-formula id="eq4"><label>(4)</label>
<mml:math display="block" id="M4"><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi mathvariant="normal">C</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">t</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi mathvariant="normal">p</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi mathvariant="normal">g</mml:mi><mml:mi mathvariant="normal">l</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">l</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq5"><label>(5)</label>
<mml:math display="block" id="M5"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">h</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>M</mml:mi><mml:mi>L</mml:mi><mml:mi>P</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mo>=</mml:mo><mml:mtext>ReLU</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mtext>ReLU</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>W</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi mathvariant="normal">c</mml:mi><mml:mi mathvariant="normal">o</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">b</mml:mi><mml:mi mathvariant="normal">i</mml:mi><mml:mi mathvariant="normal">n</mml:mi><mml:mi mathvariant="normal">e</mml:mi><mml:mi mathvariant="normal">d</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mn>2</mml:mn></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math>
</disp-formula>
<p>where MLP contains two fully connected layers, with an intermediate layer dimension of 512 and an output dimension of 256. The model contains four output heads for regression and classification tasks for both PHQ-9 and GAD-7, respectively. This dual-task, dual-output design enables comprehensive assessment (<xref ref-type="disp-formula" rid="eq6">Equations 6</xref>&#x2013;<xref ref-type="disp-formula" rid="eq9">9</xref>): regression tasks directly output continuous scores for fine-grained severity evaluation, while classification tasks divide scores into different severity levels (mild, moderate, severe) for categorical risk assessment. This design simultaneously handles regression and classification tasks, fully utilizing task correlations and the complementary nature of continuous and categorical assessments to improve overall model performance and clinical utility.</p>
<disp-formula id="eq6"><label>(6)</label>
<mml:math display="block" id="M6"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mtext>PHQ</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>reg</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mi>W</mml:mi><mml:mrow><mml:mtext>PHQ</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>reg</mml:mtext></mml:mrow><mml:mi>T</mml:mi></mml:msubsup><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mtext>enhanced</mml:mtext></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>PHQ</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>reg</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq7"><label>(7)</label>
<mml:math display="block" id="M7"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mtext>GAD</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>reg</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msubsup><mml:mi>W</mml:mi><mml:mrow><mml:mtext>GAD</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>reg</mml:mtext></mml:mrow><mml:mi>T</mml:mi></mml:msubsup><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mtext>enhanced</mml:mtext></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>GAD</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>reg</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq8"><label>(8)</label>
<mml:math display="block" id="M8"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mtext>PHQ</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>cls</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>softmax</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mi>W</mml:mi><mml:mrow><mml:mtext>PHQ</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>cls</mml:mtext></mml:mrow><mml:mi>T</mml:mi></mml:msubsup><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mtext>enhanced</mml:mtext></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>PHQ</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>cls</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq9"><label>(9)</label>
<mml:math display="block" id="M9"><mml:mrow><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mtext>GAD</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>cls</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>softmax</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mi>W</mml:mi><mml:mrow><mml:mtext>GAD</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>cls</mml:mtext></mml:mrow><mml:mi>T</mml:mi></mml:msubsup><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mtext>enhanced</mml:mtext></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>b</mml:mi><mml:mrow><mml:mtext>GAD</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mtext>cls</mml:mtext></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>where classification tasks adopt a three-class strategy: mild (0&#x2013;4 points), moderate (5&#x2013;9 points), and severe (10 points and above).</p>
</sec>
<sec id="s2_3">
<label>2.3</label>
<title>Variational autoencoder data augmentation technology</title>
<p>To address common class imbalance issues in mental health data, this study developed intelligent data augmentation technology based on the VAE. This technology balances data distribution across various categories by generating high-quality synthetic samples while maintaining semantic consistency and emotional expression characteristics of the text. The text VAE model adopts an encoder&#x2013;decoder architecture, where the encoder maps input text to a probabilistic latent space and the decoder reconstructs text from latent representations. The encoder employs a bidirectional LSTM structure, which is particularly effective for capturing contextual information in Chinese text where word order and context significantly influence meaning. VAE learns latent representations of text through variational inference, which enables the model to generate diverse text samples while maintaining semantic consistency by learning a smooth and continuous latent space that captures the underlying distribution of mental health text patterns, as shown in <xref ref-type="disp-formula" rid="eq10">Equations 10</xref>&#x2013;<xref ref-type="disp-formula" rid="eq11">11</xref>.</p>
<disp-formula id="eq10"><label>(10)</label>
<mml:math display="block" id="M10"><mml:mrow><mml:mi>q</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo>|</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi mathvariant="script">N</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo>|</mml:mo><mml:mi>&#x3bc;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msup><mml:mi>&#x3c3;</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>I</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math>
</disp-formula>
<disp-formula id="eq11"><label>(11)</label>
<mml:math display="block" id="M11"><mml:mrow><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>|</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mtext>Bernoulli</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>sigmoid</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>f</mml:mi><mml:mrow><mml:mtext>decoder</mml:mtext></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>where <inline-formula>
<mml:math display="inline" id="im4"><mml:mrow><mml:mi>q</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo>|</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> is the encoder that maps input text <inline-formula>
<mml:math display="inline" id="im5"><mml:mi>x</mml:mi></mml:math></inline-formula> to a latent space distribution, <inline-formula>
<mml:math display="inline" id="im6"><mml:mrow><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>|</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> is the decoder that reconstructs text from latent representations, and <inline-formula>
<mml:math display="inline" id="im7"><mml:mrow><mml:mi>&#x3bc;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> and <inline-formula>
<mml:math display="inline" id="im8"><mml:mrow><mml:msup><mml:mi>&#x3c3;</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula> are the mean and variance of the latent distribution, respectively. The latent space dimension is set to 128, achieving a balance between information retention (sufficient capacity to encode semantic information) and generation quality (manageable dimensionality for stable training). This dimension choice is based on empirical analysis showing that 128 dimensions can effectively capture semantic features of mental health text while avoiding overfitting and maintaining generation diversity. The VAE model combines mental health label information for text generation, ensuring consistency between generated text and original labels through conditional generation, as shown in <xref ref-type="disp-formula" rid="eq12">Equation 12</xref>.</p>
<disp-formula id="eq12"><label>(12)</label>
<mml:math display="block" id="M12"><mml:mrow><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>|</mml:mo><mml:mi>z</mml:mi><mml:mo>,</mml:mo><mml:mi>y</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>x</mml:mi><mml:mo>|</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#xb7;</mml:mo><mml:mi>p</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>y</mml:mi><mml:mo>|</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>y</italic> is the mental health label (PHQ-9 and GAD-7 scores). This conditional generation formulation enables the VAE to learn the mapping relationship between labels and text during training, where the model learns to associate specific emotional states (encoded in PHQ-9 and GAD-7 scores) with corresponding linguistic patterns. During generation, label guidance ensures that the emotional state of generated text is consistent with target labels, allowing controlled generation of text samples with specific severity levels for addressing class imbalance issues in mental health datasets. Generated text quality is ensured through four mechanisms: 1) pretrained semantic similarity models calculate similarity between generated and original text, with threshold set above 0.7 to ensure semantic consistency; 2) trained classifiers verify that predicted labels of generated text are consistent with original labels, ensuring generation quality and label preservation; 3) random sampling of different regions in latent space, combined with temperature parameters, regulates diversity of generated text while maintaining semantic coherence; and 4) language models evaluate grammatical correctness and fluency of generated text, ensuring natural language quality. The effectiveness of VAE data augmentation technology is evaluated through the metrics in <xref ref-type="table" rid="T2"><bold>Table&#xa0;2</bold></xref>.</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>VAE data augmentation effect evaluation.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Evaluation metric</th>
<th valign="middle" align="left">Original data</th>
<th valign="middle" align="left">Augmented data</th>
<th valign="middle" align="left">Improvement</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Sample count</td>
<td valign="middle" align="left">1,275</td>
<td valign="middle" align="left">2,550</td>
<td valign="middle" align="left">+100%</td>
</tr>
<tr>
<td valign="middle" align="left">Class balance</td>
<td valign="middle" align="left">0.65</td>
<td valign="middle" align="left">0.92</td>
<td valign="middle" align="left">+41.5%</td>
</tr>
<tr>
<td valign="middle" align="left">Semantic similarity</td>
<td valign="middle" align="left">&#x2013;</td>
<td valign="middle" align="left">0.78</td>
<td valign="middle" align="left">&#x2013;</td>
</tr>
<tr>
<td valign="middle" align="left">Label consistency</td>
<td valign="middle" align="left">&#x2013;</td>
<td valign="middle" align="left">0.85</td>
<td valign="middle" align="left">&#x2013;</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s2_4">
<label>2.4</label>
<title>Multitask joint optimization mechanism</title>
<p>The core of the proposed architecture is a joint mapping function <inline-formula>
<mml:math display="inline" id="im9"><mml:mi>&#x2131;</mml:mi></mml:math></inline-formula> designed to exploit the clinical correlation between depression (D) and anxiety (A). Formally, we define the parameter space as <inline-formula>
<mml:math display="inline" id="im10"><mml:mrow><mml:mtext>&#x398;</mml:mtext><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>&#x3b8;</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x3b8;</mml:mi><mml:mi>D</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x3b8;</mml:mi><mml:mi>A</mml:mi></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:math></inline-formula>, where <inline-formula>
<mml:math display="inline" id="im11"><mml:mrow><mml:msub><mml:mi>&#x3b8;</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> represents the shared parameters in the BERT encoder and dual-feature fusion layer, while <italic>&#x3b8;<sub>D</sub></italic> and <italic>&#x3b8;<sub>A</sub></italic> denote task-specific parameters for depression and anxiety, respectively. The enhanced latent representation serves as a shared semantic bridge and can be expressed as <xref ref-type="disp-formula" rid="eq13">Equation 13</xref>:</p>
<disp-formula id="eq13"><label>(13)</label>
<mml:math display="block" id="M13"><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x2131;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>X</mml:mi><mml:mo>;</mml:mo><mml:msub><mml:mi>&#x3b8;</mml:mi><mml:mrow><mml:mi>s</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>This shared representation facilitates &#x201c;inductive transfer,&#x201d; where features learned for one task provide supplementary predictive signals for the other. Based on this shared representation, the task-specific predictions for depression and anxiety are defined as follows in <xref ref-type="disp-formula" rid="eq14">Equation 14</xref>:</p>
<disp-formula id="eq14"><label>(14)</label>
<mml:math display="block" id="M14"><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>D</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>D</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>;</mml:mo><mml:msub><mml:mi>&#x3b8;</mml:mi><mml:mi>D</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>A</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>f</mml:mi><mml:mi>A</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mi>F</mml:mi><mml:mrow><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>h</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>c</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>;</mml:mo><mml:msub><mml:mi>&#x3b8;</mml:mi><mml:mi>A</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>The model is jointly optimized by minimizing the weighted sum of task-specific losses, which is formulated as <xref ref-type="disp-formula" rid="eq15">Equation 15</xref>:</p>
<disp-formula id="eq15"><label>(15)</label>
<mml:math display="block" id="M15"><mml:mrow><mml:mi>&#x2112;</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mi>D</mml:mi></mml:msub><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>D</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>D</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>D</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x3bb;</mml:mi><mml:mi>A</mml:mi></mml:msub><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mi>A</mml:mi></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>A</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mi>A</mml:mi></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>This joint optimization mechanism acts as a latent regularizer, effectively reducing the hypothesis space and mitigating overfitting&#x2014;a critical advantage when modeling nuanced psychological text.</p>
</sec>
<sec id="s2_5">
<label>2.5</label>
<title>Loss function</title>
<p>We employ a weighted loss function to balance regression and classification tasks. The total loss function integrates regression loss, classification loss, and regularization terms, and is defined as <xref ref-type="disp-formula" rid="eq16">Equation 16</xref>:</p>
<disp-formula id="eq16"><label>(16)</label>
<mml:math display="block" id="M16"><mml:mrow><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mtext>total</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3b1;</mml:mi><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mtext>reg</mml:mtext></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>&#x3b2;</mml:mi><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mtext>cls</mml:mtext></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mi>&#x3b3;</mml:mi><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mtext>regularization</mml:mtext></mml:mrow></mml:msub></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#x3b1;</italic>, <italic>&#x3b2;</italic>, and <italic>&#x3b3;</italic> are the weight coefficients for each loss term, determined through grid search optimization. The regression loss is computed as the mean squared error over both PHQ-9 and GAD-7 predictions, as given in <xref ref-type="disp-formula" rid="eq17">Equation 17</xref>:</p>
<disp-formula id="eq17"><label>(17)</label>
<mml:math display="block" id="M17"><mml:mrow><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mtext>reg</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>y</mml:mi><mml:mrow><mml:mtext>PHQ</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mtext>PHQ</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msubsup><mml:mi>y</mml:mi><mml:mrow><mml:mtext>GAD</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>&#x2212;</mml:mo><mml:msubsup><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mrow><mml:mtext>GAD</mml:mtext></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mn>2</mml:mn></mml:msup></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>To address class imbalance, the classification loss is formulated using a weighted cross-entropy function, as described in <xref ref-type="disp-formula" rid="eq18">Equation 18</xref>:</p>
<disp-formula id="eq18"><label>(18)</label>
<mml:math display="block" id="M18"><mml:mrow><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mtext>cls</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>C</mml:mi></mml:munderover><mml:msub><mml:mi>w</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>&#xb7;</mml:mo><mml:msubsup><mml:mi>y</mml:mi><mml:mi>c</mml:mi><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mtext>&#xa0;log</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mover accent="true"><mml:mi>y</mml:mi><mml:mo>^</mml:mo></mml:mover><mml:mi>c</mml:mi><mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:math>
</disp-formula>
<p>Where <inline-formula>
<mml:math display="inline" id="im12"><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>c</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the class weight, calculated based on the sample count of each class in the training set: <inline-formula>
<mml:math display="inline" id="im13"><mml:mrow><mml:msub><mml:mi>w</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mi>N</mml:mi><mml:mrow><mml:msub><mml:mi>N</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>&#xb7;</mml:mo><mml:mi>C</mml:mi></mml:mrow></mml:mfrac></mml:mrow></mml:math></inline-formula>. An L2 regularization term is introduced to prevent overfitting, which is expressed as <xref ref-type="disp-formula" rid="eq19">Equation 19</xref>:</p>
<disp-formula id="eq19"><label>(19)</label>
<mml:math display="block" id="M19"><mml:mrow><mml:msub><mml:mi>&#x2112;</mml:mi><mml:mrow><mml:mtext>regularization</mml:mtext></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>&#x3bb;</mml:mi><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>&#x3b8;</mml:mi><mml:mo>&#x2208;</mml:mo><mml:mtext>&#x398;</mml:mtext></mml:mrow></mml:munder><mml:mo>|</mml:mo><mml:mi>&#x3b8;</mml:mi><mml:msubsup><mml:mo>|</mml:mo><mml:mn>2</mml:mn><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#x3bb;</italic> = 0.01 is the regularization coefficient, and &#x398; is the model parameter set. The influence of this weighted loss formulation on the multitask deep learning architecture is crucial for clinical utility. In this study, we empirically set <italic>&#x3b2;</italic> = 6.0 and <italic>&#x3b1;</italic> = 0.3 (<italic>&#x3b2;</italic> &#x226b; <italic>&#x3b1;</italic>), creating a mathematical bias toward high-stakes risk categorization. This design ensures that the shared representation <italic>F</italic><sub>enhanced</sub> is optimized to prioritize features that distinguish between clinical severity levels (e.g., &#x201c;mild&#x201d; vs. &#x201c;severe&#x201d;). From a psychiatric perspective, identifying categorical risk levels is of higher priority for early-warning systems than achieving marginal gains in precise score regression. Furthermore, the integration of <italic>L</italic><sub>regularization</sub> (<italic>&#x3b3;</italic> = 0.01) stabilizes the multitask gradients, ensuring that neither task dominates the parameter updates during backpropagation, thereby enhancing the overall robustness of the multitask framework.</p>
</sec>
<sec id="s2_6">
<label>2.6</label>
<title>Training strategy</title>
<p>We implement various advanced training strategies to optimize model performance, ensuring the model achieves optimal results in mental health assessment tasks. The AdamW optimizer is combined with a cosine annealing learning rate scheduling strategy, with the learning rate updated according to <xref ref-type="disp-formula" rid="eq20">Equation 20</xref>:</p>
<disp-formula id="eq20"><label>(20)</label>
<mml:math display="block" id="M20"><mml:mtable columnalign="left"><mml:mtr><mml:mtd><mml:mi mathvariant="normal">l</mml:mi><mml:msub><mml:mi mathvariant="normal">r</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi mathvariant="normal">l</mml:mi><mml:msub><mml:mi mathvariant="normal">r</mml:mi><mml:mrow><mml:mi>min</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi mathvariant="normal">l</mml:mi><mml:msub><mml:mi mathvariant="normal">r</mml:mi><mml:mrow><mml:mi>max</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi mathvariant="normal">l</mml:mi><mml:msub><mml:mi mathvariant="normal">r</mml:mi><mml:mrow><mml:mi>min</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mtext>&#x2003;</mml:mtext><mml:mo>&#xd7;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mi>cos</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mfrac><mml:mi>t</mml:mi><mml:mi>T</mml:mi></mml:mfrac><mml:mi>&#x3c0;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:math>
</disp-formula>
<p>where lr<sub>max</sub> = 1 &#xd7; 10<sup>&#x2212;5</sup> (initial learning rate), lr<sub>min</sub> = 1 &#xd7; 10<sup>&#x2212;6</sup> (minimum learning rate), <italic>T</italic> = 35 is the total training epochs, and <italic>t</italic> is the current epoch. This cosine annealing strategy gradually reduces the learning rate from maximum to minimum over the training process, enabling fine-grained parameter tuning in later epochs while maintaining training stability in early epochs, which is particularly important for fine-tuning pretrained BERT models on mental health text data. Multiple regularization techniques are applied to prevent overfitting: 1) dropout is applied in MLP layers and attention layers with probability set to 0.5, effectively preventing overfitting; 2) weight decay employs L2 regularization with coefficient set to 0.03, preventing parameters from becoming too large; 3) stochastic depth technology randomly skips certain layers in deep networks with probability set to 0.1, improving training stability; 4) label smoothing is applied in classification tasks with smoothing factor set to 0.1, improving model generalization ability; and 5) gradient noise injection is adopted with noise scale of 1 &#xd7; 10<sup>&#x2212;5</sup>, improving model robustness. This comprehensive regularization strategy creates a synergistic effect that prevents overfitting while maintaining model performance. An early stopping mechanism is implemented based on the validation set F1 average score with patience = 3. The model with the best validation set performance is saved as the final model. Three model ensemble strategies are employed to improve prediction performance, as summarized in <xref ref-type="table" rid="T3"><bold>Table&#xa0;3</bold></xref>.</p>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Model ensemble strategy configuration.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Ensemble method</th>
<th valign="middle" align="left">Model count</th>
<th valign="middle" align="left">Weight strategy</th>
<th valign="middle" align="left">Performance improvement</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Standard model</td>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left">Fixed weight</td>
<td valign="middle" align="left">Baseline performance</td>
</tr>
<tr>
<td valign="middle" align="left">EMA model</td>
<td valign="middle" align="left">1</td>
<td valign="middle" align="left">Exponential moving average</td>
<td valign="middle" align="left">+2.1%</td>
</tr>
<tr>
<td valign="middle" align="left">Ensemble model</td>
<td valign="middle" align="left">3</td>
<td valign="middle" align="left">Weighted average</td>
<td valign="middle" align="left">+3.8%</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="disp-formula" rid="eq21">Equation 21</xref> shows that the EMA model employs an exponential moving average to update parameters, creating a smoothed version of model weights that reduces training noise and improves generalization:</p>
<disp-formula id="eq21"><label>(21)</label>
<mml:math display="block" id="M21"><mml:mrow><mml:msubsup><mml:mi>&#x3b8;</mml:mi><mml:mi>t</mml:mi><mml:mrow><mml:mtext>EMA</mml:mtext></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mi>&#x3b2;</mml:mi><mml:msubsup><mml:mi>&#x3b8;</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mtext>EMA</mml:mtext></mml:mrow></mml:msubsup><mml:mo>+</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>&#x3b2;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:msub><mml:mi>&#x3b8;</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow></mml:math>
</disp-formula>
<p>where <italic>&#x3b2;</italic> = 0.999 is the smoothing coefficient. This high smoothing coefficient (<italic>&#x3b2;</italic> = 0.999) ensures that the EMA model weights change gradually, incorporating only a small fraction (1 &#x2212; <italic>&#x3b2;</italic> = 0.001) of the current model weights at each update, which creates a more stable and robust model representation that is less sensitive to training fluctuations and better suited for clinical applications requiring consistent predictions.</p>
</sec>
<sec id="s2_7">
<label>2.7</label>
<title>Ablation study design</title>
<p>To analyze the contribution of individual components in the proposed architecture and compare its performance against standard deep learning configurations, we designed a series of experiments. Starting from the full proposed model, several variant models were constructed by selectively removing specific components while keeping all other settings unchanged. These variants serve as benchmarks for standard methods: the removal of the multitask module corresponds to standard single-task learning; the replacement of the enhanced attention reflects a vanilla Transformer-based approach; and the exclusion of VAE represents models trained on unaugmented, imbalanced clinical data. All models were evaluated under identical experimental settings to ensure a fair comparison within the same environment.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<label>3</label>
<title>Results</title>
<sec id="s3_1">
<label>3.1</label>
<title>Experimental setup</title>
<p>The IDS was evaluated on a held-out test set of 308 samples from Chinese adolescents across junior high, high school, and university levels. Performance was assessed across regression (continuous PHQ-9 and GAD-7 total score prediction) and classification (binary clinical screening and PHQ-9 severity stratification) tasks for comorbid depression and anxiety. All experiments were conducted on an NVIDIA RTX 4090 GPU using PyTorch 2.0.1, with hyperparameters optimized via grid search on the validation set (<xref ref-type="table" rid="T4"><bold>Table&#xa0;4</bold></xref>).</p>
<table-wrap id="T4" position="float">
<label>Table&#xa0;4</label>
<caption>
<p>Experimental environment and hyperparameter configuration.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Configuration item</th>
<th valign="middle" align="center">Parameter value</th>
<th valign="middle" align="center">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Hardware environment</td>
<td valign="middle" align="center">NVIDIA RTX 4090</td>
<td valign="middle" align="center">GPU-accelerated training</td>
</tr>
<tr>
<td valign="middle" align="left">Framework version</td>
<td valign="middle" align="center">PyTorch 2.0.1</td>
<td valign="middle" align="center">Deep learning framework</td>
</tr>
<tr>
<td valign="middle" align="left">Batch size</td>
<td valign="middle" align="center">58</td>
<td valign="middle" align="center">Training batch size</td>
</tr>
<tr>
<td valign="middle" align="left">Maximum sequence length</td>
<td valign="middle" align="center">96</td>
<td valign="middle" align="center">Maximum text sequence length</td>
</tr>
<tr>
<td valign="middle" align="left">Training epochs</td>
<td valign="middle" align="center">35</td>
<td valign="middle" align="center">Total training epochs</td>
</tr>
<tr>
<td valign="middle" align="left">Optimizer</td>
<td valign="middle" align="center">AdamW</td>
<td valign="middle" align="center">Optimization algorithm</td>
</tr>
<tr>
<td valign="middle" align="left">Learning rate</td>
<td valign="middle" align="center">1 &#xd7; 10<sup>&#x2212;5</sup></td>
<td valign="middle" align="center">Initial learning rate</td>
</tr>
<tr>
<td valign="middle" align="left">Early stop patience</td>
<td valign="middle" align="center">3</td>
<td valign="middle" align="center">Early stopping patience</td>
</tr>
<tr>
<td valign="middle" align="left">Dropout rate</td>
<td valign="middle" align="center">0.5</td>
<td valign="middle" align="center">Dropout regularization</td>
</tr>
<tr>
<td valign="middle" align="left">Weight decay</td>
<td valign="middle" align="center">0.03</td>
<td valign="middle" align="center">L2 regularization</td>
</tr>
<tr>
<td valign="middle" align="left">Stochastic depth rate</td>
<td valign="middle" align="center">0.1</td>
<td valign="middle" align="center">Stochastic depth rate</td>
</tr>
<tr>
<td valign="middle" align="left">Label smoothing factor</td>
<td valign="middle" align="center">0.1</td>
<td valign="middle" align="center">Label smoothing</td>
</tr>
<tr>
<td valign="middle" align="left">Gradient noise scale</td>
<td valign="middle" align="center">1 &#xd7; 10<sup>&#x2212;5</sup></td>
<td valign="middle" align="center">Gradient noise</td>
</tr>
<tr>
<td valign="middle" align="left">Classification loss weight</td>
<td valign="middle" align="center">6.0</td>
<td valign="middle" align="center">Weight for the classification task</td>
</tr>
<tr>
<td valign="middle" align="left">Regression loss weight</td>
<td valign="middle" align="center">0.3</td>
<td valign="middle" align="center">Weight for the regression task</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_2">
<label>3.2</label>
<title>Regression task performance</title>
<p>The IDS achieved robust regression performance on clinician-assessed continuous scores (<xref ref-type="table" rid="T5"><bold>Table&#xa0;5</bold></xref>). For PHQ-9 prediction, the model yielded an MSE of 117.60, RMSE of 10.84, MAE of 8.92, Pearson correlation coefficient (<italic>r</italic>) of 0.706, and <italic>R</italic><sup>2</sup> of 0.498. Comparable performance was observed for GAD-7 (MSE: 91.58, RMSE: 9.57, MAE: 7.34, <italic>r</italic>: 0.693, <italic>R</italic><sup>2</sup>: 0.480). AUC values of 0.877 (PHQ-9) and 0.902 (GAD-7) indicated strong discrimination of clinically relevant symptom thresholds.</p>
<table-wrap id="T5" position="float">
<label>Table&#xa0;5</label>
<caption>
<p>Regression performance metrics for PHQ-9 and GAD-7 total score prediction.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Metric</th>
<th valign="middle" align="left">PHQ-9</th>
<th valign="middle" align="left">GAD-7</th>
<th valign="middle" align="left">Description</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">MSE</td>
<td valign="middle" align="left">117.60</td>
<td valign="middle" align="left">91.58</td>
<td valign="middle" align="left">Mean squared error</td>
</tr>
<tr>
<td valign="middle" align="left">Pearson <italic>r</italic></td>
<td valign="middle" align="left">0.706</td>
<td valign="middle" align="left">0.693</td>
<td valign="middle" align="left">Pearson correlation coefficient</td>
</tr>
<tr>
<td valign="middle" align="left">AUC</td>
<td valign="middle" align="left">0.877</td>
<td valign="middle" align="left">0.902</td>
<td valign="middle" align="left">Area under the ROC curve</td>
</tr>
<tr>
<td valign="middle" align="left">RMSE</td>
<td valign="middle" align="left">10.84</td>
<td valign="middle" align="left">9.57</td>
<td valign="middle" align="left">Root mean squared error</td>
</tr>
<tr>
<td valign="middle" align="left">MAE</td>
<td valign="middle" align="left">8.92</td>
<td valign="middle" align="left">7.34</td>
<td valign="middle" align="left">Mean absolute error</td>
</tr>
<tr>
<td valign="middle" align="left"><italic>R</italic>2</td>
<td valign="middle" align="left">0.498</td>
<td valign="middle" align="left">0.480</td>
<td valign="middle" align="left">Coefficient of determination</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As shown in <xref ref-type="fig" rid="f1"><bold>Figure&#xa0;1</bold></xref>, predicted scores closely aligned with true values along the identity line for both tasks, with minimal dispersion. This tight clustering confirms the model&#x2019;s high fidelity in capturing continuous symptom severity from spontaneous text, particularly in the context of culturally mediated somatization patterns.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>3D scatter plots of predicted versus true PHQ-9 (left) and GAD-7 (right) scores.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-17-1752423-g001.tif">
<alt-text content-type="machine-generated">Side-by-side 3D scatter plots visualize predictions. The left plot shows PHQ-9 scores with a color scale from blue to yellow. The right plot shows GAD-7 scores with colors ranging from purple to yellow. Axes include predicted values, true values, and sample index.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_3">
<label>3.3</label>
<title>Classification task performance</title>
<p>Binary classification performance is summarized in <xref ref-type="table" rid="T6"><bold>Table&#xa0;6</bold></xref>. The model attained an accuracy of 0.773 and macro F1-score of 0.762 for PHQ-9, with superior results for GAD-7 (accuracy: 0.838, macro F1-score: 0.863).</p>
<table-wrap id="T6" position="float">
<label>Table&#xa0;6</label>
<caption>
<p>Binary classification performance metrics (clinical thresholds: PHQ-9 &#x2265; 10, GAD-7 &#x2265; 10).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Task</th>
<th valign="middle" align="left">Accuracy</th>
<th valign="middle" align="left">F1-score</th>
<th valign="middle" align="left">Precision</th>
<th valign="middle" align="left">Recall</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">PHQ-9</td>
<td valign="middle" align="left">0.773</td>
<td valign="middle" align="left">0.762</td>
<td valign="middle" align="left">0.762</td>
<td valign="middle" align="left">0.762</td>
</tr>
<tr>
<td valign="middle" align="left">GAD-7</td>
<td valign="middle" align="left">0.838</td>
<td valign="middle" align="left">0.863</td>
<td valign="middle" align="left">0.884</td>
<td valign="middle" align="left">0.843</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="f2"><bold>Figure&#xa0;2</bold></xref> provides a normalized multidimensional overview integrating classification (accuracy, F1-score) and regression (inverse MSE) metrics. The substantially larger enclosed area for GAD-7 demonstrates its superior performance across nearly all evaluated dimensions compared to PHQ-9. This holistic advantage further supports the model&#x2019;s enhanced capability to detect anxiety-related linguistic markers, particularly somatic expressions prevalent among Chinese adolescents.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Normalized radar chart of performance metrics for PHQ-9 and GAD-7 tasks.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-17-1752423-g002.tif">
<alt-text content-type="machine-generated">Radar chart displaying model performance metrics for PHQ and GAD. Metrics include F1 Score, Mean Squared Error (MSE), and Accuracy (Acc) for both. The chart is filled to highlight values, with axes ranging from 0.0 to 1.0.</alt-text>
</graphic></fig>
<p>The confusion matrices (<xref ref-type="disp-formula" rid="eq22">Equation 22</xref>) further elucidate task-specific discrimination. For GAD-7, the sensitivity is 0.845 and the specificity is 0.826. In contrast, PHQ-9 exhibits more balanced but lower sensitivity and specificity values.</p>
<disp-formula id="eq22"><label>(22)</label>
<mml:math display="block" id="M22"><mml:mrow><mml:msub><mml:mrow><mml:mtext>CM</mml:mtext></mml:mrow><mml:mrow><mml:mtext>GAD</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mn>7</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mn>100</mml:mn></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mn>21</mml:mn></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mn>29</mml:mn></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mn>158</mml:mn></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x2003;</mml:mtext><mml:msub><mml:mrow><mml:mtext>CM</mml:mtext></mml:mrow><mml:mrow><mml:mtext>PHQ</mml:mtext><mml:mo>&#x2212;</mml:mo><mml:mn>9</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mtable><mml:mtr><mml:mtd><mml:mrow><mml:mn>126</mml:mn></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mn>35</mml:mn></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:mrow><mml:mn>35</mml:mn></mml:mrow></mml:mtd><mml:mtd><mml:mrow><mml:mn>112</mml:mn></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mrow></mml:math>
</disp-formula>
<p>Severity-stratified analysis for PHQ-9 (<xref ref-type="table" rid="T7"><bold>Table&#xa0;7</bold></xref>) revealed consistent overall performance, with the highest F1-score in mild cases (0.815) and the lowest in moderate cases (0.698), attributable to greater symptom heterogeneity in the moderate range. High specificity across all strata supports effective rule-out capability for screening applications.</p>
<table-wrap id="T7" position="float">
<label>Table&#xa0;7</label>
<caption>
<p>PHQ-9 severity-stratified classification performance.</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Metric</th>
<th valign="middle" align="center">Mild (<italic>n</italic> = 89)</th>
<th valign="middle" align="center">Moderate (<italic>n</italic> = 112)</th>
<th valign="middle" align="center">Severe (<italic>n</italic> = 107)</th>
<th valign="middle" align="center">Overall</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Accuracy</td>
<td valign="middle" align="center">0.831</td>
<td valign="middle" align="center">0.723</td>
<td valign="middle" align="center">0.794</td>
<td valign="middle" align="center">0.773</td>
</tr>
<tr>
<td valign="middle" align="left">F1-score</td>
<td valign="middle" align="center">0.815</td>
<td valign="middle" align="center">0.698</td>
<td valign="middle" align="center">0.778</td>
<td valign="middle" align="center">0.762</td>
</tr>
<tr>
<td valign="middle" align="left">Precision</td>
<td valign="middle" align="center">0.847</td>
<td valign="middle" align="center">0.712</td>
<td valign="middle" align="center">0.789</td>
<td valign="middle" align="center">0.762</td>
</tr>
<tr>
<td valign="middle" align="left">Recall</td>
<td valign="middle" align="center">0.787</td>
<td valign="middle" align="center">0.685</td>
<td valign="middle" align="center">0.766</td>
<td valign="middle" align="center">0.762</td>
</tr>
<tr>
<td valign="middle" align="left">Specificity</td>
<td valign="middle" align="center">0.892</td>
<td valign="middle" align="center">0.756</td>
<td valign="middle" align="center">0.823</td>
<td valign="middle" align="center">0.823</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3_4">
<label>3.4</label>
<title>Training stability and convergence</title>
<p>Training proceeded for 35 epochs with early stopping (patience = 3). As depicted in <xref ref-type="fig" rid="f3"><bold>Figure&#xa0;3</bold></xref>, total loss, PHQ-9 MSE, and GAD-7 MSE exhibited monotonic decline, with negligible train-validation gaps after epoch 10. The accompanying learning rate schedule ensured stable optimization. Asymmetric loss weighting prioritized classification while maintaining regression accuracy, confirming robust multitask convergence without overfitting.</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Training and validation loss curves with learning rate schedule.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyt-17-1752423-g003.tif">
<alt-text content-type="machine-generated">Adolescent Mental Health AI Model-Training Loss Analysis includes four graphs. Top left: Total Loss Curve shows a decrease in training and validation loss over epochs. Top right: PHQ-9 Mean Squared Error declines for both training and validation. Bottom left: GAD-7 Mean Squared Error also decreases for training and validation. Bottom right: Learning Rate Schedule increases initially, peaks, then stabilizes.</alt-text>
</graphic></fig>
</sec>
<sec id="s3_5">
<label>3.5</label>
<title>Ablation study</title>
<p>Ablation results (<xref ref-type="table" rid="T8"><bold>Table&#xa0;8</bold></xref>) confirmed the contribution of each component. The absence of multitask learning substantially degraded F1-scores (6.2%&#x2013;7.8%) and increased MSE (14.2%&#x2013;18.4%). Removing culturally attuned somatization adaptation reduced GAD-7 F1-score by 5.1%, while omitting severe-case augmentation lowered recall in high-risk subgroups by 4.4%&#x2013;6.3%.</p>
<table-wrap id="T8" position="float">
<label>Table&#xa0;8</label>
<caption>
<p>Ablation study on the validation set (changes relative to the full model).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="middle" align="left">Configuration</th>
<th valign="middle" align="center">&#x394;F1-score (PHQ-9/GAD-7)</th>
<th valign="middle" align="center">&#x394;MSE (PHQ-9/GAD-7)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="middle" align="left">Full IDS</td>
<td valign="middle" align="center">0 (reference)</td>
<td valign="middle" align="center">0 (reference)</td>
</tr>
<tr>
<td valign="middle" align="left">- Multitask learning</td>
<td valign="middle" align="center">&#x2212;0.078/&#x2212;0.062</td>
<td valign="middle" align="center">+18.4/+14.2</td>
</tr>
<tr>
<td valign="middle" align="left">- Somatization adaptation</td>
<td valign="middle" align="center">&#x2212;0.051/&#x2212;0.051</td>
<td valign="middle" align="center">+9.7/+11.3</td>
</tr>
<tr>
<td valign="middle" align="left">- Severe-case augmentation</td>
<td valign="middle" align="center">&#x2212;0.063/&#x2212;0.044</td>
<td valign="middle" align="center">+7.1/+6.8</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<label>4</label>
<title>Discussion</title>
<p>The IDS framework developed in this study demonstrates promising diagnostic performance (Pearson <italic>r</italic> &#x2248; 0.70, AUC <italic>&gt;</italic> 0.87) for non-intrusive screening of depression and anxiety in adolescents. This system provides a comprehensive dual-task assessment specifically optimized for Chinese adolescent populations. These results align with evidence that deep learning-derived digital biomarkers can complement traditional clinical assessments (<xref ref-type="bibr" rid="B16">16</xref>). Our correlation coefficients are comparable to those reported for validated Chinese versions of the PHQ-9 and GAD-7 scale in adolescent populations (<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B28">28</xref>). Recent comparative studies further contextualize these findings; for instance, text-based models using social media data for depression detection in Chinese youth have reported AUCs ranging from 0.81 to 0.88 (<xref ref-type="bibr" rid="B29">29</xref>), while multimodal approaches incorporating voice and facial cues achieve marginally higher performance (AUC 0.85&#x2013;0.95) but at the cost of increased intrusiveness (<xref ref-type="bibr" rid="B30">30</xref>). Our single-modality, spontaneous-text approach thus offers a favorable trade-off between privacy preservation and diagnostic utility, particularly suitable for large-scale, low-burden deployment in educational settings where routine collection of voice or video data may raise acceptability concerns. The integration of multitask learning with attention mechanisms represents a novel approach that leverages symptom correlations to improve prediction accuracy. Capturing spontaneous linguistic expressions provides a robust &#x201c;digital phenotype&#x201d; of adolescent distress, potentially mitigating self-presentation and reporting biases common in manual screening (<xref ref-type="bibr" rid="B9">9</xref>). Clinically, this could support school-based triage by enabling early identification of at-risk students without requiring in-person interviews, thereby enhancing intervention efficiency and potentially reducing the overall burden of mental health disorders, with epidemiological data indicating that up to 25%&#x2013;30% of Chinese adolescents were affected during and after the COVID-19 era (<xref ref-type="bibr" rid="B31">31</xref>).</p>
<p>A central innovation of this study is the joint modeling of depression and anxiety, reflecting their high clinical comorbidity in adolescents (rates often ranging from 15% to 75% in clinical samples) (<xref ref-type="bibr" rid="B21">21</xref>). Compared to single-task models, our multitask learning framework leverages shared semantic features to improve predictive stability. Furthermore, our asymmetric loss weighting strategy (<italic>&#x3b2;</italic> = 6.0) prioritizes risk categorization. This design choice aligns with clinical utility principles in AI systems, where accurate identification of severe cases is often more critical than minimizing average error in continuous scores (<xref ref-type="bibr" rid="B32">32</xref>). In practice, this could optimize resource allocation in school counseling by prioritizing severe cases, although fairness audits are recommended to address potential biases in diverse populations, such as those from rural versus urban areas or different ethnic groups, as highlighted in recent fairness evaluations of AI mental health models (<xref ref-type="bibr" rid="B33">33</xref>). Such audits could involve metrics like demographic parity and equalized odds to ensure equitable performance across subgroups, preventing exacerbation of existing disparities in adolescent mental healthcare access in China. The multitask learning architecture successfully captures the intrinsic correlations between depression and anxiety symptoms, achieving enhanced generalization ability through shared feature learning.</p>
<p>The VAE-based data augmentation strategy addresses a critical limitation in mental health research&#x2014;the scarcity of labeled data due to privacy concerns. To address pervasive class imbalance, we employed a VAE-based data augmentation strategy. Unlike traditional oversampling, this approach learns a continuous latent manifold, enabling the generation of semantically coherent synthetic texts (<xref ref-type="bibr" rid="B34">34</xref>). The resulting performance gains in severe cases underscore generative AI&#x2019;s potential to mitigate data-driven biases against high-risk populations (<xref ref-type="bibr" rid="B35">35</xref>). By enriching underrepresented regions of the feature space, the IDS promotes more equitable diagnostic sensitivity. However, synthetic data&#xa0;require further expert validation to ensure clinical appropriateness, as generative methods can introduce risks such as semantic drift or mode collapse, potentially leading to misinterpretations in mental health assessments (<xref ref-type="bibr" rid="B35">35</xref>). Specific validation methods could include Turing-style expert reviews or randomized controlled trials comparing model outputs with real patient data. Deployment should comply with privacy regulations such as China&#x2019;s Personal Information Protection Law, safeguarding against unintended disclosure of sensitive adolescent mental health information.</p>
<p>The Chinese-optimized BERT model demonstrates superior performance in understanding adolescent psychological expressions. The efficacy of our Chinese-optimized BERT encoder underscores the need for localized linguistic analysis in digital mental health tools. Clinical evidence indicates that Chinese individuals, including adolescents, often express psychological distress through somatization&#x2014;reporting physical symptoms such as fatigue or sleep disturbances rather than direct emotional terms (<xref ref-type="bibr" rid="B12">12</xref>). Recent epidemiological studies reinforce this, showing somatization rates of approximately 9%&#x2013;15% in Chinese adolescent populations, with strong associations to depression and anxiety (<xref ref-type="bibr" rid="B13">13</xref>). Leveraging self-attention mechanisms, our model captures these subtle, culturally specific markers, which may be overlooked in generic natural language processing (NLP) tools due to linguistic and cultural biases (<xref ref-type="bibr" rid="B33">33</xref>). This approach better aligns with implicit communicative styles in Chinese sociocultural contexts (<xref ref-type="bibr" rid="B13">13</xref>). To enhance generalizability, future iterations could account for regional variations, such as differences in urban versus rural expressions or across provinces, where mental health burdens vary geographically (<xref ref-type="bibr" rid="B31">31</xref>). Incorporating culturally attuned training, such as integrating traditional Chinese medicine concepts, could further reduce misdiagnosis risks in community or school settings.</p>
<p>The interpretability design represents a crucial advancement for clinical applications, addressing the black-box problem. Despite strong performance, translating the IDS framework to clinical practice requires addressing deep learning&#x2019;s opacity. While attention mechanisms provide initial insights, integrating <italic>post hoc</italic> techniques such as SHapley Additive exPlanations (SHAP) is essential for human-in-the-loop decision-making and building clinician trust (<xref ref-type="bibr" rid="B36">36</xref>). Several limitations warrant careful consideration: the sample size may limit generalizability, the dataset primarily represents urban adolescents, and the text-only approach may miss non-verbal cues. First, the cross-sectional design limits assessment of symptom trajectories; this may lead to overestimation of static risk without capturing fluctuations, underscoring the need for longitudinal approaches crucial for advancing toward prognostic applications (<xref ref-type="bibr" rid="B31">31</xref>). Second, although VAE augmentation improved sensitivity for severe cases, synthetic data warrant rigorous clinical validation, potentially through multicenter randomized trials, to avoid propagating biases. Ethical considerations, including potential surveillance misuse in adolescents, necessitate robust privacy guidelines, especially given the vulnerability of this population to stigma and data breaches. Regulatory pathways, such as those under China&#x2019;s National Medical Products Administration (NMPA) for AI medical devices, should be navigated, involving classification as class III devices and post-market surveillance to ensure safety (<xref ref-type="bibr" rid="B32">32</xref>). Finally, multimodal fusion&#x2014;integrating textual data with acoustic and prosodic features&#x2014;holds promise for more comprehensive assessments (<xref ref-type="bibr" rid="B30">30</xref>), with future work suggesting randomized controlled trials in secondary schools to validate scalability in low-resource environments.</p>
</sec>
<sec id="s5" sec-type="conclusions">
<label>5</label>
<title>Conclusion</title>
<p>This study introduces the IDS, a multitask deep learning framework tailored for non-intrusive screening of comorbid depression and anxiety in Chinese adolescents. By incorporating culturally sensitive linguistic modeling and strategies to prioritize severe cases and mitigate data imbalance, the framework demonstrates promising diagnostic performance while addressing key limitations of traditional self-report measures in non-Western populations.</p>
<p>Beyond technical advancements, the IDS highlights the potential of AI to enable scalable, stigma-reducing mental health screening in real-world settings such as schools and community programs&#x2014;particularly valuable amid rising adolescent mental health challenges in China and globally. However, clinical adoption hinges on continued efforts to enhance model explainability, rigorously validate generative components, ensure ethical deployment, and expand to longitudinal and multimodal paradigms.</p>
<p>Ultimately, tools like IDS should serve as complements to human-centered care. Realizing their transformative promise will require sustained interdisciplinary collaboration to bridge technical innovation with clinical needs, cultural nuance, and equitable access, thereby contributing to a more proactive and inclusive global adolescent mental health ecosystem.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. The dataset used in this study is publicly available at: <uri xlink:href="https://github.com/shuyeit/mmpsy-data">https://github.com/shuyeit/mmpsy-data</uri> (Repository: GitHub; Repository Name: shuyeit/mmpsy-data; License: MIT License). The dataset contains anonymized mental health assessment data from 1275 students, including PHQ-9 and GAD-7 scale scores, therapy interview audio recordings, and corresponding text transcripts. All data have been anonymized to remove sensitive identifying information in compliance with privacy protection requirements.</p></sec>
<sec id="s7" sec-type="ethics-statement">
<title>Ethics statement</title>
<p>Ethical approval was not required for the study involving humans in accordance with the local legislation and institutional requirements. Written informed consent to participate in this study was not required from the participants or the participants&#x2019; legal guardians/next of kin in accordance with the national legislation and the institutional requirements.</p></sec>
<sec id="s8" sec-type="author-contributions">
<title>Author contributions</title>
<p>WL: Writing &#x2013; original draft, Methodology, Conceptualization, Writing &#x2013; review &amp; editing, Data curation. ZZ: Software, Writing &#x2013; review &amp; editing, Validation, Formal analysis, Data curation. LD: Software, Writing &#x2013; review &amp; editing, Methodology, Validation. JQ: Conceptualization, Writing &#x2013; review &amp; editing, Supervision, Funding acquisition, Resources, Project administration.</p></sec>
<ack>
<title>Acknowledgments</title>
<p>The authors extend their thanks to all adolescents and their parents who participated in data collection and to their colleagues who provided technical support.</p>
</ack>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s11" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s12" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Racine</surname> <given-names>N</given-names></name>
<name><surname>McArthur</surname> <given-names>BA</given-names></name>
<name><surname>Cooke</surname> <given-names>JE</given-names></name>
<name><surname>Eirich</surname> <given-names>R</given-names></name>
<name><surname>Zhu</surname> <given-names>J</given-names></name>
<name><surname>Madigan</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Global prevalence of depressive and anxiety symptoms in children and adolescents during covid-19: A meta-analysis</article-title>. <source>JAMA Pediatr</source>. (<year>2021</year>) <volume>175</volume>:<page-range>1142&#x2013;50</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jamapediatrics.2021.2482</pub-id>, PMID: <pub-id pub-id-type="pmid">34369987</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Shorey</surname> <given-names>S</given-names></name>
<name><surname>Ng</surname> <given-names>ED</given-names></name>
<name><surname>Wong</surname> <given-names>CHJ</given-names></name>
</person-group>. 
<article-title>Global prevalence of depression and elevated depressive symptoms among adolescents: A systematic review and meta-analysis</article-title>. <source>Br J Clin Psychol</source>. (<year>2022</year>) <volume>61</volume>:<fpage>287</fpage>&#x2013;<lpage>305</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/bjc.12333</pub-id>, PMID: <pub-id pub-id-type="pmid">34569066</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lu</surname> <given-names>B</given-names></name>
<name><surname>Lin</surname> <given-names>L</given-names></name>
<name><surname>Su</surname> <given-names>X</given-names></name>
</person-group>. 
<article-title>Global burden of depression or depressive symptoms in children and adolescents: a systematic review and meta-analysis</article-title>. <source>Journal of affective disorders</source>. (<year>2024</year>) <volume>354</volume>:<page-range>553&#x2013;62</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.jad.2024.03.074</pub-id>, PMID: <pub-id pub-id-type="pmid">38490591</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhou</surname> <given-names>J</given-names></name>
<name><surname>Liu</surname> <given-names>Y</given-names></name>
<name><surname>Ma</surname> <given-names>J</given-names></name>
<name><surname>Feng</surname> <given-names>Z</given-names></name>
<name><surname>Hu</surname> <given-names>J</given-names></name>
<name><surname>Hu</surname> <given-names>J</given-names></name>
<etal/>
</person-group>. 
<article-title>Prevalence of depressive symptoms among children and adolescents in China: a systematic review and meta-analysis</article-title>. <source>Child Adolesc Psychiatry Ment Health</source>. (<year>2024</year>) <volume>18</volume>:<fpage>150</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1186/s13034-024-00841-w</pub-id>, PMID: <pub-id pub-id-type="pmid">39563377</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<label>5</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>X</given-names></name>
<name><surname>Liu</surname> <given-names>Q</given-names></name>
</person-group>. 
<article-title>Prevalence of anxiety symptoms among Chinese university students amid the covid-19 pandemic: A systematic review and meta-analysis</article-title>. <source>Heliyon</source>. (<year>2022</year>) <volume>8</volume>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.heliyon.2022.e10117</pub-id>, PMID: <pub-id pub-id-type="pmid">35965987</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Solmi</surname> <given-names>M</given-names></name>
<name><surname>Radua</surname> <given-names>J</given-names></name>
<name><surname>Olivola</surname> <given-names>M</given-names></name>
<name><surname>Croce</surname> <given-names>E</given-names></name>
<name><surname>Soardo</surname> <given-names>L</given-names></name>
<name><surname>Salazar de Pablo</surname> <given-names>G</given-names></name>
<etal/>
</person-group>. 
<article-title>Age at onset of mental disorders worldwide: large-scale meta-analysis of 192 epidemiological studies</article-title>. <source>Mol Psychiatry</source>. (<year>2022</year>) <volume>27</volume>:<page-range>281&#x2013;95</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41380-021-01161-7</pub-id>, PMID: <pub-id pub-id-type="pmid">34079068</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Althubaiti</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>Information bias in health research: definition, pitfalls, and adjustment methods</article-title>. <source>J Multidiscip Healthcare</source>. (<year>2016</year>) <volume>9</volume>:<page-range>211&#x2013;7</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.2147/JMDH.S104807</pub-id>, PMID: <pub-id pub-id-type="pmid">27217764</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Insel</surname> <given-names>TR</given-names></name>
</person-group>. 
<article-title>Digital phenotyping: technology for a new science of behavior</article-title>. <source>JAMA</source>. (<year>2017</year>) <volume>318</volume>:<page-range>1215&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1001/jama.2017.11295</pub-id>, PMID: <pub-id pub-id-type="pmid">28973224</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<label>9</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Torous</surname> <given-names>J</given-names></name>
<name><surname>Bucci</surname> <given-names>S</given-names></name>
<name><surname>Bell</surname> <given-names>IH</given-names></name>
<name><surname>Kessing</surname> <given-names>LV</given-names></name>
<name><surname>Faurholt-Jepsen</surname> <given-names>M</given-names></name>
<name><surname>Whelan</surname> <given-names>P</given-names></name>
<etal/>
</person-group>. 
<article-title>The growing field of digital psychiatry: current evidence and the future of apps, social media, chatbots, and virtual reality</article-title>. <source>World Psychiatry</source>. (<year>2021</year>) <volume>20</volume>:<page-range>318&#x2013;35</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/wps.20883</pub-id>, PMID: <pub-id pub-id-type="pmid">34505369</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<label>10</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Birk</surname> <given-names>RH</given-names></name>
<name><surname>Samuel</surname> <given-names>G</given-names></name>
</person-group>. 
<article-title>Digital phenotyping for mental health: reviewing the challenges of using data to monitor and predict mental health problems</article-title>. <source>Current psychiatry reports</source>. (<year>2022</year>) <volume>24</volume>:<fpage>523</fpage>&#x2013;<lpage>8</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11920-022-01358-9</pub-id>, PMID: <pub-id pub-id-type="pmid">36001220</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<label>11</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Jacobson</surname> <given-names>NC</given-names></name>
<name><surname>Weingarden</surname> <given-names>H</given-names></name>
<name><surname>Wilhelm</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Digital biomarkers of mood disorders and symptom change</article-title>. <source>NPJ Digital Med</source>. (<year>2022</year>) <volume>5</volume>:<fpage>1</fpage>&#x2013;<lpage>9</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41746-022-00553-5</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<label>12</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Kleinman</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>Neurasthenia and depression: A study of somatization and culture in China</article-title>. <source>Culture Med Psychiatry</source>. (<year>1982</year>) <volume>6</volume>:<page-range>117&#x2013;90</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/BF00051427</pub-id>, PMID: <pub-id pub-id-type="pmid">7116909</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<label>13</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ryder</surname> <given-names>AG</given-names></name>
<name><surname>Chentsova-Dutton</surname> <given-names>YE</given-names></name>
</person-group>. 
<article-title>Depression in cultural context: &#x201c;Chinese somatization,&#x201d; revisited</article-title>. <source>Psychiatr Clinics North America</source>. (<year>2012</year>) <volume>35</volume>:<fpage>15</fpage>&#x2013;<lpage>36</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.psc.2011.11.006</pub-id>, PMID: <pub-id pub-id-type="pmid">22370488</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<label>14</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Lu</surname> <given-names>J</given-names></name>
<name><surname>Han</surname> <given-names>Y</given-names></name>
<name><surname>Liu</surname> <given-names>X</given-names></name>
<name><surname>Li</surname> <given-names>W</given-names></name>
<name><surname>Zhou</surname> <given-names>X</given-names></name>
</person-group>. 
<article-title>Association between somatic symptoms and depression and anxiety in adolescents: a cross-sectional school-based study</article-title>. <source>BMJ Open</source>. (<year>2025</year>) <volume>15</volume>:<fpage>e103860</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/bmjopen-2025-103860</pub-id>, PMID: <pub-id pub-id-type="pmid">40903085</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Keles</surname> <given-names>B</given-names></name>
<name><surname>McCrae</surname> <given-names>N</given-names></name>
<name><surname>Grealish</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>A systematic review: the influence of social media on depression, anxiety and psychological distress in adolescents</article-title>. <source>Int J Adolescence Youth</source>. (<year>2020</year>) <volume>25</volume>:<fpage>79</fpage>&#x2013;<lpage>93</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1080/02673843.2019.1590851</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>T</given-names></name>
<name><surname>Schoene</surname> <given-names>AM</given-names></name>
<name><surname>Ji</surname> <given-names>S</given-names></name>
<name><surname>Ananiadou</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Natural language processing applied to mental illness detection: a narrative review</article-title>. <source>NPJ Digital Med</source>. (<year>2022</year>) <volume>5</volume>:<elocation-id>46</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41746-022-00589-7</pub-id>, PMID: <pub-id pub-id-type="pmid">35396451</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Graham</surname> <given-names>S</given-names></name>
<name><surname>Depp</surname> <given-names>C</given-names></name>
<name><surname>Lee</surname> <given-names>EE</given-names></name>
<name><surname>Nebeker</surname> <given-names>C</given-names></name>
<name><surname>Tu</surname> <given-names>X</given-names></name>
<name><surname>Kim</surname> <given-names>HC</given-names></name>
<etal/>
</person-group>. 
<article-title>Artificial intelligence for mental health and mental illnesses: an overview</article-title>. <source>Curr Psychiatry Rep</source>. (<year>2019</year>) <volume>21</volume>:<fpage>116</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11920-019-1094-0</pub-id>, PMID: <pub-id pub-id-type="pmid">31701320</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Ji</surname> <given-names>S</given-names></name>
<name><surname>Zhang</surname> <given-names>T</given-names></name>
<name><surname>Ansari</surname> <given-names>L</given-names></name>
<name><surname>Fu</surname> <given-names>J</given-names></name>
<name><surname>Tiwari</surname> <given-names>P</given-names></name>
<name><surname>Cambria</surname> <given-names>E</given-names></name>
<etal/>
</person-group>. 
<article-title>Mentalbert: Publicly available pretrained language models for mental healthcare</article-title>, in: <conf-name>Proceedings of the 13th Language Resources and Evaluation Conference</conf-name>, <publisher-loc>Marseille, France</publisher-loc>: 
<publisher-name>European Language Resources Association</publisher-name> (<year>2022</year>). pp. <page-range>7184&#x2013;7190</page-range>.
</mixed-citation>
</ref>
<ref id="B19">
<label>19</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Iyortsuun</surname> <given-names>NK</given-names></name>
<name><surname>Kim</surname> <given-names>SH</given-names></name>
<name><surname>Jhon</surname> <given-names>M</given-names></name>
<name><surname>Yang</surname> <given-names>HJ</given-names></name>
<name><surname>Pant</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Mental health analysis using deep learning: a survey</article-title>. <source>Front Psychiatry</source>. (<year>2021</year>) <volume>12</volume>:<elocation-id>716298</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpsyt.2021.716298</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<label>20</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Andrew</surname> <given-names>J</given-names></name>
<name><surname>Rudra</surname> <given-names>M</given-names></name>
<name><surname>Eunice</surname> <given-names>J</given-names></name>
<name><surname>Belfin</surname> <given-names>RV</given-names></name>
</person-group>. 
<article-title>Artificial intelligence in mental health: a review of applications in adolescents</article-title>. <source>J Child Psychol Psychiatry</source>. (<year>2023</year>) <volume>64</volume>:<page-range>567&#x2013;79</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/jcpp.13758</pub-id>, PMID: <pub-id pub-id-type="pmid">36702791</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<label>21</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Garber</surname> <given-names>J</given-names></name>
<name><surname>Weersing</surname> <given-names>VR</given-names></name>
</person-group>. 
<article-title>Comorbidity of anxiety and depression in youth: Implications for treatment and prevention</article-title>. <source>Clin Psychology: Sci Pract</source>. (<year>2010</year>) <volume>17</volume>:<fpage>293</fpage>&#x2013;<lpage>306</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/j.1468-2850.2010.01221.x</pub-id>, PMID: <pub-id pub-id-type="pmid">21499544</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<label>22</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cummings</surname> <given-names>CM</given-names></name>
<name><surname>Caporino</surname> <given-names>NE</given-names></name>
<name><surname>Kendall</surname> <given-names>PC</given-names></name>
</person-group>. 
<article-title>Comorbidity of anxiety and depression in children and adolescents: 20 years after</article-title>. <source>psychol Bull</source>. (<year>2014</year>) <volume>140</volume>:<page-range>816&#x2013;45</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1037/a0034733</pub-id>, PMID: <pub-id pub-id-type="pmid">24219155</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<label>23</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Qin</surname> <given-names>J</given-names></name>
<name><surname>Liu</surname> <given-names>C</given-names></name>
<name><surname>Tang</surname> <given-names>T</given-names></name>
<name><surname>Liu</surname> <given-names>D</given-names></name>
<name><surname>Wang</surname> <given-names>M</given-names></name>
<name><surname>Huang</surname> <given-names>Q</given-names></name>
<etal/>
</person-group>. 
<article-title>Mental-perceiver: Audio-textual multi-modal learning for estimating mental disorders</article-title>. (<year>2025</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1609/aaai.v39i23.34687</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<label>24</label>
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name><surname>Pinheiro Cinelli</surname> <given-names>L</given-names></name>
<name><surname>Ara&#xfa;jo Marins</surname> <given-names>M</given-names></name>
<name><surname>Barros da Silva</surname> <given-names>EA</given-names></name>
<name><surname>Lima Netto</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Variational Autoencoder</article-title>. In: <source>Variational Methods for Machine Learning with Applications to Deep Networks</source>. <publisher-loc>Cham</publisher-loc>: 
<publisher-name>Springer</publisher-name> (<year>2021</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.1007/978-3-030-70679-1_5</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<label>25</label>
<mixed-citation publication-type="confproc">
<person-group person-group-type="author">
<name><surname>Devlin</surname> <given-names>J</given-names></name>
<name><surname>Chang</surname> <given-names>M-W</given-names></name>
<name><surname>Lee</surname> <given-names>K</given-names></name>
<name><surname>Toutanova</surname> <given-names>K</given-names></name>
</person-group>. (<year>2019</year>). 
<article-title>Bert: Pre-training of deep bidirectional transformers for language understanding</article-title>, in: <conf-name>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>, <publisher-loc>Minneapolis, MN, USA</publisher-loc>: 
<publisher-name>Association for Computational Linguistics</publisher-name> Vol. <volume>1</volume>. pp. <page-range>4171&#x2013;86</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<label>26</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cordonnier</surname> <given-names>J-B</given-names></name>
<name><surname>Loukas</surname> <given-names>A</given-names></name>
<name><surname>Jaggi</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>Multi-head attention: Collaborate instead of concatenate</article-title>. <source>arXiv preprint arXiv:2006.16362</source>. (<year>2020</year>). doi:&#xa0;<pub-id pub-id-type="doi">10.48550/arXiv.2006.16362</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<label>27</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Ye</surname> <given-names>X</given-names></name>
<name><surname>Shu</surname> <given-names>H-L</given-names></name>
<name><surname>Feng</surname> <given-names>X</given-names></name>
<name><surname>Xia</surname> <given-names>D-M</given-names></name>
<name><surname>Wang</surname> <given-names>Z-Q</given-names></name>
<name><surname>Mi</surname> <given-names>W-Y</given-names></name>
<etal/>
</person-group>. 
<article-title>Reliability and validity of the Chinese version of the patient health questionnaire-9 (c-phq-9) in patients with psoriasis: a cross-sectional study</article-title>. <source>BMJ Open</source>. (<year>2020</year>) <volume>10</volume>:<elocation-id>e033211</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1136/bmjopen-2019-033211</pub-id>, PMID: <pub-id pub-id-type="pmid">32665341</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<label>28</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Mills</surname> <given-names>SD</given-names></name>
<name><surname>Fox</surname> <given-names>RS</given-names></name>
<name><surname>Malcarne</surname> <given-names>VL</given-names></name>
<name><surname>Roesch</surname> <given-names>SC</given-names></name>
<name><surname>Champagne</surname> <given-names>BR</given-names></name>
<name><surname>Sadler</surname> <given-names>GR</given-names></name>
</person-group>. 
<article-title>The psychometric properties of the generalized anxiety disorder-7 scale in hispanic americans with english or spanish language preference</article-title>. <source>Cultural Diversity ethnic minority Psychol</source>. (<year>2014</year>) <volume>20</volume>:<fpage>463</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1037/a0036523</pub-id>, PMID: <pub-id pub-id-type="pmid">25045957</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<label>29</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Liu</surname> <given-names>Y</given-names></name>
</person-group>. 
<article-title>Depression detection via a Chinese social media platform: a novel causal relation-aware deep learning approach</article-title>. <source>J supercomputing</source>. (<year>2024</year>) <volume>80</volume>:<page-range>10327&#x2013;56</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1007/s11227-023-05830-y</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<label>30</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Cummins</surname> <given-names>N</given-names></name>
<name><surname>Scherer</surname> <given-names>S</given-names></name>
<name><surname>Krajewski</surname> <given-names>J</given-names></name>
<name><surname>Schnieder</surname> <given-names>S</given-names></name>
<name><surname>Epps</surname> <given-names>J</given-names></name>
<name><surname>Quatieri</surname> <given-names>TF</given-names></name>
<etal/>
</person-group>. 
<article-title>A review of depression and suicide risk assessment using speech analysis</article-title>. <source>Speech Communication</source>. (<year>2015</year>) <volume>71</volume>:<fpage>10</fpage>&#x2013;<lpage>49</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.specom.2015.05.004</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<label>31</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Wang</surname> <given-names>S</given-names></name>
<name><surname>Chen</surname> <given-names>L</given-names></name>
<name><surname>Ran</surname> <given-names>H</given-names></name>
<name><surname>Che</surname> <given-names>Y</given-names></name>
<name><surname>Fang</surname> <given-names>D</given-names></name>
<name><surname>Sun</surname> <given-names>H</given-names></name>
<etal/>
</person-group>. 
<article-title>Depression and anxiety among children and adolescents pre and post covid-19: A comparative meta-analysis</article-title>. <source>Front Psychiatry</source>. (<year>2022</year>) <volume>13</volume>:<elocation-id>917552</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.3389/fpsyt.2022.917552</pub-id>, PMID: <pub-id pub-id-type="pmid">35990058</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<label>32</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Bogdanoski</surname> <given-names>G</given-names></name>
<name><surname>Lucas</surname> <given-names>F</given-names></name>
<name><surname>Kern</surname> <given-names>W</given-names></name>
<name><surname>Czechowska</surname> <given-names>K</given-names></name>
</person-group>. 
<article-title>Translating the regulatory landscape of medical devices to create fit-for-purpose artificial intelligence (AI) cytometry solutions</article-title>. <source>Cytometry Part B: Clin Cytometry</source>. (<year>2024</year>) <volume>106</volume>:<fpage>294</fpage>&#x2013;<lpage>307</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1002/cyto.b.22167</pub-id>, PMID: <pub-id pub-id-type="pmid">38396223</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<label>33</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sogancioglu</surname> <given-names>G</given-names></name>
<name><surname>Mosteiro</surname> <given-names>P</given-names></name>
<name><surname>Salah</surname> <given-names>AA</given-names></name>
<name><surname>Scheepers</surname> <given-names>F</given-names></name>
<name><surname>Kaya</surname> <given-names>H</given-names></name>
</person-group>. 
<article-title>Fairness in AI-based mental health: Clinician perspectives and bias mitigation</article-title>. In <source>Proceedings of the AAAI/ACM Conference on AI, Ethics, and Society</source>. <publisher-loc>Washington, DC, USA</publisher-loc>: 
<publisher-name>AAAI Press</publisher-name> (<year>2024</year>) Vol. <volume>7</volume>, pp. <page-range>1390&#x2013;1400</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1609/aies.v7i1.31732</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<label>34</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Chadebec</surname> <given-names>C</given-names></name>
<name><surname>Thibeau-Sutre</surname> <given-names>E</given-names></name>
<name><surname>Burgos</surname> <given-names>N</given-names></name>
<name><surname>Allassonni&#xe8;re</surname> <given-names>S</given-names></name>
</person-group>. 
<article-title>Data augmentation in high dimensional low sample size setting using a geometry-based variational autoencoder</article-title>. <source>Pacific Symposium Biocomputing</source>. (<year>2023</year>) <volume>45</volume>:<page-range>2879&#x2013;2896</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1142/9789811270611_0008</pub-id>, PMID: <pub-id pub-id-type="pmid">35749321</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<label>35</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Vedanta</surname> <given-names>SP</given-names></name>
<name><surname>Rao</surname> <given-names>M</given-names></name>
</person-group>. 
<article-title>PsychSynth: Advancing Mental Health AI Through Synthetic Data Generation and Curriculum Training</article-title>. In <source>2024 9th International Conference on Computer Science and Engineering (UBMK)</source>. (<year>2024</year>) <publisher-loc>Piscataway, NJ, USA</publisher-loc>: 
<publisher-name>IEEE</publisher-name>. pp. <page-range>1&#x2013;6</page-range>. doi:&#xa0;<pub-id pub-id-type="doi">10.1109/UBMK63289.2024.10773545</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<label>36</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Joyce</surname> <given-names>DW</given-names></name>
<name><surname>Kormilitzin</surname> <given-names>A</given-names></name>
<name><surname>Smith</surname> <given-names>KA</given-names></name>
<name><surname>Cipriani</surname> <given-names>A</given-names></name>
</person-group>. 
<article-title>Explainable artificial intelligence for mental health assessment: A review</article-title>. <source>Artif Intell Med</source>. (<year>2024</year>) <volume>6</volume>:<elocation-id>6</elocation-id>. doi:&#xa0;<pub-id pub-id-type="doi">10.1016/j.artmed.2024.102789</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1349608">Anthony Gifuni</ext-link>, McGill University, Canada</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3300107">Dhananjay Kalbande</ext-link>, Bharatiya Vidya Bhavans Sardar Patel Institute of Technology, India</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3301160">Putra Wanda</ext-link>, Universitas Respati Yogyakarta, Indonesia</p></fn>
</fn-group>
</back>
</article>