<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Digit. Health</journal-id>
<journal-title-group>
<journal-title>Frontiers in Digital Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Digit. Health</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2673-253X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdgth.2025.1615250</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>AI-driven dynamic psychological measurement: correcting university student mental health scales using daily behavioral and cognitive data</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Tong</surname><given-names>B. G.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2858116/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role></contrib>
<contrib contrib-type="author">
<name><surname>Liang</surname><given-names>Zihong</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role></contrib>
<contrib contrib-type="author">
<name><surname>He</surname><given-names>Xuemei</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname><given-names>Fan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role></contrib>
<contrib contrib-type="author">
<name><surname>Yang</surname><given-names>Li</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Gao</surname><given-names>Lijia</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role></contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>Department of Psychiatry, Inner Mongolia People&#x2019;s Hospital</institution>, <city>Hohhot</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Student Management Department, Inner Mongolia People&#x2019;s Hospital</institution>, <city>Hohhot</city>, <country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Inner Mongolia Clinical College, Inner Mongolia Medical University</institution>, <city>Hohhot</city>, <country country="cn">China</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> B. G. Tong <email xlink:href="mailto:dackmoon123@126.com">dackmoon123@126.com</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2025-11-28"><day>28</day><month>11</month><year>2025</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2025</year></pub-date>
<volume>7</volume><elocation-id>1615250</elocation-id>
<history>
<date date-type="received"><day>21</day><month>04</month><year>2025</year></date>
<date date-type="rev-recd"><day>02</day><month>10</month><year>2025</year></date>
<date date-type="accepted"><day>12</day><month>11</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2025 Tong, Liang, He, Yang, Yang and Gao.</copyright-statement>
<copyright-year>2025</copyright-year><copyright-holder>Tong, Liang, He, Yang, Yang and Gao</copyright-holder><license><ali:license_ref start_date="2025-11-28">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract><sec><title>Objective</title>
<p>This study aimed to evaluate an Artificial Intelligence (AI)-driven dynamic psychological measurement method for correcting traditional mental health scales. We sought to validate its feasibility using daily behavioral and cognitive data from university students and assess its potential as an intervention tool.</p>
</sec><sec><title>Methods</title>
<p>A total of 177 university students participated in a one-and-a-half-year study. Using a WeChat mini-program, we collected data from cognitive voting (87 instances), behavioral check-ins (66 instances), and standardized psychological scales (SAS, SDS, SCL-90). Scale scores were dynamically adjusted using Large Language Models (LLMs) and Retrieval-Augmented Generation (RAG) techniques. Paired-sample <italic>t</italic>-tests, MANOVA, and Cohen&#x2019;s <italic>d</italic> were used to compare the performance of the dynamic model against traditional scales. Intervention effects were validated using the Hamilton Anxiety Rating Scale (HAM-A) and Hamilton Depression Rating Scale (HAM-D).</p>
</sec><sec><title>Results</title>
<p>The dynamic assessment demonstrated superior performance in identifying both anxiety (SAS: dynamic model AUC = 0.95 vs. traditional AUC = 0.86) and depression (SDS: dynamic model AUC = 0.93 vs. traditional AUC = 0.82). Over three semesters, participating students showed significant decreases in clinically-rated anxiety scores on the HAM-A (15.2&#x0025; reduction; 95&#x0025; CI for mean difference [1.00, 5.25], <italic>p</italic> = 0.004) and depression scores on the HAM-D (40.0&#x0025; reduction; 95&#x0025; CI for mean difference [2.71, 7.71], <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM1"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>0.001</mml:mn></mml:math></inline-formula>). High student engagement was observed (cognitive voting participation: 79&#x0025;; behavioral check-ins: 42&#x0025;). While the dynamic adjustment for the SCL-90 was initially effective (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM2"><mml:msup><mml:mi>R</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>=</mml:mo><mml:mn>0.34</mml:mn></mml:math></inline-formula>), its specificity later decreased, potentially due to interference from life factors (dynamic model MSE = 102.74 vs. traditional MSE = 84.17).</p>
</sec><sec><title>Discussion</title>
<p>AI-driven dynamic assessment provides superior accuracy for anxiety (SAS) and depression (SDS) scales over static methods by effectively capturing psychological fluctuations. The significant reductions in clinically-rated anxiety and depression suggest the system may function as an integrated assessment-intervention loop, fostering self-awareness through continuous feedback. High user engagement confirms the method&#x2019;s feasibility. However, the model&#x2019;s diminished specificity for the complex SCL-90 scale over time highlights challenges in handling intricate, long-term symptom patterns. This research supports a shift towards continuous &#x201C;digital phenotyping&#x201D; and underscores the need for rigorous validation, multimodal data integration, and robust ethical considerations.</p>
</sec>
</abstract>
<kwd-group>
<kwd>dynamic psychological measurement</kwd>
<kwd>artificial intelligence</kwd>
<kwd>scale correction</kwd>
<kwd>university student mental health</kwd>
<kwd>behavioral and cognitive data</kwd>
</kwd-group><funding-group>
<funding-statement>The author(s) declare that financial support was received for the research and/or publication of this article. This work was supported by the 2022 Research Fund of Inner Mongolia People&#x2019;s Hospital (Grant No. 2022YN28). The funder provided financial support for this study but had no role in the research design, data collection, analysis, interpretation, or decision to publish.</funding-statement>
</funding-group>
<counts>
<fig-count count="17"/>
<table-count count="10"/><equation-count count="56"/><ref-count count="37"/><page-count count="21"/><word-count count="212313215"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Digital Mental Health</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<sec id="s1a" sec-type="background"><label>1.1</label><title>Background</title>
<p>The field of psychometrics faces a fundamental challenge rooted in the philosophical nature of mental states: the inherent tension between stable, long-term traits and fluctuating, moment-to-moment psychological states (<xref ref-type="bibr" rid="B1">1</xref>). Traditional psychometric assessments, while valuable, often provide a static &#x201C;snapshot&#x201D; of an individual&#x2019;s mental health, which may not adequately capture the dynamic, real-time fluctuations of affective states such as anxiety and depression. This is particularly critical for university students, a demographic navigating a period of significant developmental and academic pressure. Recent large-scale epidemiological studies in China have highlighted the severity of this issue, with detection rates for depression and anxiety among university students reported at 9.8&#x0025; and 15.5&#x0025;, respectively (<xref ref-type="bibr" rid="B2">2</xref>). The mental health landscape for medical students can be even more demanding, characterized by a rigorous curriculum and high-stakes examinations, necessitating more sensitive and ecologically valid assessment methods (<xref ref-type="bibr" rid="B3">3</xref>). Consequently, accurately capturing the ebb and flow of <bold>state anxiety</bold> and <bold>state depression</bold> remains a critical research topic.</p>
<p>Within psychological research, measurement is broadly approached through observational, experimental, and psychometric testing methods (<xref ref-type="bibr" rid="B4">4</xref>). While observational and experimental methods offer high validity, their resource-intensive nature limits scalability. Psychometric testing, conversely, offers convenience and has been widely adopted. However, this convenience often comes at the cost of ecological validity, as a single questionnaire provides limited insight into a student&#x2019;s psychological journey. This limitation reveals a significant gap: the need for a measurement paradigm that integrates the scalability of psychometric tests with the contextual richness of observational methods. To truly understand and potentially &#x201C;correct&#x201D; a student&#x2019;s self-reported score, it is essential to look beyond the score itself and examine the underlying cognitive and behavioral patterns that drive these emotional states.</p>
<p>This perspective aligns with the principles of <bold>Cognitive-Behavioral Theory (CBT)</bold>, which posits that emotions are shaped by an interplay of thoughts and behaviors. The continuous advancement of Information and Communication Technology (ICT) and Artificial Intelligence (AI) now offers an unprecedented opportunity to operationalize this holistic view at scale. The field is already witnessing significant progress through innovations like Computerized Adaptive Testing (CAT) (<xref ref-type="bibr" rid="B5">5</xref>, <xref ref-type="bibr" rid="B6">6</xref>) and Automatic Item Generation (AIG) (<xref ref-type="bibr" rid="B7">7</xref>). The ubiquity of smartphones has further enabled the collection of rich, longitudinal data via social networks, allowing for the creation of dynamic user profiles that integrate network theory with psychological process models (<xref ref-type="bibr" rid="B8">8</xref>).</p>
<p>More recently, the emergence of powerful AI, particularly large language models like ChatGPT, is poised to act as a &#x201C;human proxy&#x201D; in interpreting complex behavioral data, heralding a quiet revolution in psychometrics (<xref ref-type="bibr" rid="B9">9</xref>, <xref ref-type="bibr" rid="B10">10</xref>). The trajectory of psychological measurement is inevitably shifting towards capturing multimodal data from daily life via wearable devices&#x2014;including facial expressions, voice patterns, and even brain imaging scans&#x2014;long before Brain-Computer Interfaces (BCIs) might achieve true &#x201C;mind-reading&#x201D; (<xref ref-type="bibr" rid="B11">11</xref>). Crucially, this technological evolution is blurring the boundary between mental health assessment and clinical intervention (<xref ref-type="bibr" rid="B12">12</xref>). Modern intelligent applications are evolving from passive monitors into &#x201C;digital health promoters,&#x201D; capable of providing data-driven, adaptive interventions based on real-time psychometric insights (<xref ref-type="bibr" rid="B13">13</xref>, <xref ref-type="bibr" rid="B14">14</xref>). These systems create a bidirectional feedback loop, where assessment informs personalized intervention, and the resulting behavioral changes continuously update the assessment itself. Such detailed personal profiles not only guide psychological interventions (<xref ref-type="bibr" rid="B15">15</xref>) but also serve as core indicators for evaluating their effectiveness and tracking mental health status over time (<xref ref-type="bibr" rid="B16">16</xref>). This technologically-empowered, integrated paradigm forms the foundation for the novel assessment framework proposed in this study.</p>
</sec>
<sec id="s1b"><label>1.2</label><title>Research objectives</title>
<p>Against the backdrop of the inherent challenges in traditional psychometrics and the opportunities afforded by technological innovation, this research aims to pioneer and implement a novel pathway for university student mental health assessment. Utilizing a WeChat mini-program platform, we deeply integrate information technology with psychological theory. Drawing inspiration from the principles of Ecological Momentary Assessment (EMA), which emphasize capturing experiences in real-time, real-world contexts (<xref ref-type="bibr" rid="B17">17</xref>), we constructed a dynamic, interactive, and continuous mental health assessment ecosystem.</p>
<p>The core objectives of this study were threefold. First, we aimed to <bold>evaluate the accuracy</bold> of an AI-driven dynamic assessment model, testing whether integrating daily cognitive and behavioral data could correct and enhance the performance of traditional static mental health scales. Second, we sought to <bold>explore the potential intervention effect</bold> of the system, investigating whether continuous engagement and feedback could lead to measurable improvements in students&#x2019; clinically-rated mental health. Third, we intended to <bold>assess the feasibility and user engagement</bold> of this approach, examining its practicality and acceptance within a real-world university setting.</p>
<p>To achieve these objectives, our conceptual strategy was to create a multidimensional, multimodal mental health assessment framework. This framework creatively integrates three distinct data streams: (1) traditional, standardized psychometric scales for baseline assessment; (2) motivation-driven &#x201C;cognitive voting&#x201D; to capture students&#x2019; thought tendencies and emotional responses to daily events (<xref ref-type="bibr" rid="B18">18</xref>); and (3) systematic &#x201C;behavioral check-ins&#x201D; for the idiographic measurement of actual behavioral patterns (<xref ref-type="bibr" rid="B19">19</xref>). By continuously collecting and integrating these data points, our system aims to construct dynamic, real-time mental health profiles for each student.</p>
<p>Ultimately, the goal of this study extends beyond passive assessment. By providing immediate feedback and gamifying personality attributes, we aim to actively engage students in their own mental health management. This process is designed to foster self-awareness and encourage positive behavioral or cognitive adjustments, thereby creating an integrated assessment and intervention loop. In essence, this research endeavors to develop and validate an intelligent and personalized tool that is more attuned to the contemporary needs of university mental health services, achieving greater immediacy, comprehensiveness, and personalization (<xref ref-type="bibr" rid="B20">20</xref>).</p>
</sec>
</sec>
<sec id="s2" sec-type="methods"><label>2</label><title>Methods</title>
<sec id="s2a"><label>2.1</label><title>Overview</title>
<p>This study employed a longitudinal design to dynamically assess the psychological states of 177 undergraduate clinical medicine students over one and a half years. The research was conducted via a custom-developed WeChat mini-program, which served as the primary platform for data collection and user interaction. Our methodological framework integrated three core data streams: (1) periodic, systematic self-report scale screenings conducted at the beginning of each semester; (2) high-frequency ecological cognitive voting, comprising 87 instances distributed throughout the study period; and (3) event-driven behavioral check-ins, with 66 distinct instances recorded over the same period.</p>
<p>The core self-report scales included the Self-Rating Anxiety Scale (SAS), the Self-Rating Depression Scale (SDS), the Symptom Checklist-90 (SCL-90), the Eysenck Personality Questionnaire (EPQ), the Sixteen Personality Factor Questionnaire (16PF), and the Myers-Briggs Type Indicator (MBTI). Cognitive voting was designed as a method for students to express opinions and emotional attitudes regarding their campus life and current events through multiple-choice questions. Behavioral check-ins involved the systematic recording of specific events each student encountered, such as academic achievements, social conflicts, or personal milestones. These events were defined and logged by four designated &#x201C;life teachers&#x201D; (counselors), providing a semi-objective measure of students&#x2019; real-world experiences.</p>
</sec>
<sec id="s2b"><label>2.2</label><title>Measures and instruments</title>
<p>A battery of standardized psychometric scales was administered to collect baseline and periodic data on students&#x2019; mental health status. All selected instruments have been validated for use within the Chinese cultural context. The internal consistency for these scales, as reported in validation studies with Chinese university students or related populations, is detailed below.</p>
<p>
<list list-type="bullet">
<list-item>
<p><bold>Self-rating anxiety scale (SAS).</bold> The SAS is a 20-item self-report questionnaire used to measure the subjective severity of anxiety symptoms. The Chinese version of the SAS, introduced and validated by Wang (1984), has demonstrated good psychometric properties in Chinese populations (<xref ref-type="bibr" rid="B21">21</xref>). Subsequent validation studies among university students have reported high internal consistency, with Cronbach&#x2019;s alpha coefficients typically around 0.88 (<xref ref-type="bibr" rid="B22">22</xref>).</p></list-item>
<list-item>
<p><bold>Self-rating depression scale (SDS).</bold> The SDS is a 20-item self-report instrument designed to assess the level of depressive symptomatology. The Chinese version has been widely used and validated (<xref ref-type="bibr" rid="B21">21</xref>). Studies have confirmed its reliability and validity among various Chinese populations, including university students, with reported Cronbach&#x2019;s alpha values typically ranging from 0.84 to 0.89 (<xref ref-type="bibr" rid="B23">23</xref>, <xref ref-type="bibr" rid="B24">24</xref>).</p></list-item>
<list-item>
<p><bold>Symptom checklist-90 (SCL-90).</bold> The SCL-90 is a comprehensive 90-item self-report inventory that evaluates a broad range of psychological problems and symptoms of psychopathology across nine primary symptom dimensions. The Chinese version was introduced by Wang (1984) and its utility in the Chinese context has been extensively confirmed, with excellent internal consistency (Cronbach&#x2019;s alpha often &#x003E; 0.90) reported in validation studies (<xref ref-type="bibr" rid="B21">21</xref>, <xref ref-type="bibr" rid="B25">25</xref>).</p></list-item>
<list-item>
<p><bold>Eysenck personality questionnaire (EPQ).</bold> The EPQ is a self-report questionnaire designed to measure major personality dimensions: Psychoticism (P), Extraversion (E), and Neuroticism (N), along with a Lie (L) scale. We utilized the Chinese revised version developed by Gong (1986), which is well-validated for Chinese adults (<xref ref-type="bibr" rid="B26">26</xref>). Its subscales have shown acceptable to good reliability, with Cronbach&#x2019;s alpha coefficients generally ranging from 0.70 to 0.85 (<xref ref-type="bibr" rid="B27">27</xref>).</p></list-item>
<list-item>
<p><bold>Sixteen personality factor questionnaire (16PF).</bold> The 16PF is a comprehensive personality assessment tool that measures 16 primary personality factors. The Chinese revised version has been validated and normed for the Chinese population, including university students (<xref ref-type="bibr" rid="B28">28</xref>, <xref ref-type="bibr" rid="B29">29</xref>). The reliability for its various factors typically falls within an acceptable range for personality measures (Cronbach&#x2019;s alpha: 0.60&#x2013;0.80).</p></list-item>
<list-item>
<p><bold>Myers-Briggs type indicator (MBTI).</bold> The MBTI is a self-report questionnaire indicating different psychological preferences in how people perceive the world and make decisions. In this study, the MBTI was not used as a rigid diagnostic or predictive tool, but rather as an auxiliary instrument to facilitate student engagement and self-exploration. The personality type descriptions served as a dynamic &#x201C;character profile&#x201D; within the mini-program, providing a basis for the AI to generate personalized feedback and gamified interactions, thereby transforming static scores into a narrative, humanized experience. We referenced the Chinese version of the instrument, which has demonstrated adequate reliability (Cronbach&#x2019;s alpha: 0.70&#x2013;0.85) in validation studies (<xref ref-type="bibr" rid="B30">30</xref>).</p></list-item>
</list></p>
</sec>
<sec id="s2c"><label>2.3</label><title>Study design and procedure</title>
<p>This study employed a longitudinal design combining principles of psychometric testing, Ecological Momentary Assessment (EMA), and digital intervention. Over a one-and-a-half-year period, we conducted a dynamic assessment of psychological states involving 177 undergraduate clinical medicine students. The entire study was facilitated through a custom-developed WeChat mini-program, which served as the primary interface for data collection, feedback delivery, and student engagement. The research procedure involved an initial baseline assessment using standardized scales at the beginning of the first semester, followed by continuous data collection through weekly cognitive voting and event-contingent behavioral check-ins, alongside periodic reassessments with the full scale battery at the start of subsequent semesters.</p>
</sec>
<sec id="s2d"><label>2.4</label><title>The dynamic assessment framework</title>
<p>To derive both quantitative and qualitative insights from the rich, multimodal data, we developed a novel conceptual framework to structure our assessment. This framework was designed to bridge the gap between high-level psychometric scores and the granular, real-world data collected daily. Drawing inspiration from psychometric modeling approaches such as the bifactor model (<xref ref-type="bibr" rid="B31">31</xref>), testlet models (<xref ref-type="bibr" rid="B32">32</xref>), and second-order factor models (<xref ref-type="bibr" rid="B33">33</xref>), as well as the time-series nature of Vector Autoregression (VAR) models (<xref ref-type="bibr" rid="B34">34</xref>), we established an intermediate hierarchical structure. This collaborative effort, involving four experienced university life teachers and three clinical psychologists, resulted in a model that could systematically capture and interpret the psychological and behavioral patterns reflected in students&#x2019; daily lives.</p>
<p>The structure consists of six primary factors: Anxiety Cognition, Anxiety Behavior, Depression Cognition, Depression Behavior, Personality Cognition, and Personality Behavior. Each primary factor was further subdivided into four sub-dimensions (e.g., negative events, developmental concerns, life discipline), resulting in a total of 24 cognitive and behavioral dimensions. The complete framework is detailed in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>.</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>Cognitive and behavioral dimensions and their relationship with psychometric scales.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left">Dimensions</th>
<th valign="top" align="center">Anxiety cognition</th>
<th valign="top" align="center">Anxiety behavior</th>
<th valign="top" align="center">Depression cognition</th>
<th valign="top" align="center">Depression behavior</th>
<th valign="top" align="center">Personality cognition</th>
<th valign="top" align="center">Personality behavior</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Negative events</td>
<td valign="top" align="left">Pessimistic outlook</td>
<td valign="top" align="left">Excessive avoidance</td>
<td valign="top" align="left">Negative self-assessment</td>
<td valign="top" align="left">Somatization</td>
<td valign="top" align="left">Extreme notions</td>
<td valign="top" align="left">Defense mechanisms</td>
</tr>
<tr>
<td valign="top" align="left">Developmental concerns</td>
<td valign="top" align="left">Excessive worry</td>
<td valign="top" align="left">Neuroticism</td>
<td valign="top" align="left">Lack of emotional expression</td>
<td valign="top" align="left">Hedonistic withdrawal</td>
<td valign="top" align="left">Values</td>
<td valign="top" align="left">Paranoid behavior</td>
</tr>
<tr>
<td valign="top" align="left">Life discipline</td>
<td valign="top" align="left">Procrastination</td>
<td valign="top" align="left">Compulsion</td>
<td valign="top" align="left">Passive withdrawal</td>
<td valign="top" align="left">Absenteeism or tardiness</td>
<td valign="top" align="left">Philosophy of life</td>
<td valign="top" align="left">Individuality</td>
</tr>
<tr>
<td valign="top" align="left">Interpersonal interaction</td>
<td valign="top" align="left">Over-sensitivity</td>
<td valign="top" align="left">Stressful events</td>
<td valign="top" align="left">Social isolation</td>
<td valign="top" align="left">Negative events</td>
<td valign="top" align="left">Hostile margins</td>
<td valign="top" align="left">Interpersonal conflict events</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>This framework served two key purposes. First, it provided a structured schema to ensure that every designed cognitive vote and behavioral check-in could be systematically mapped to one or more specific dimensions. At the outset of the study, we also mapped all items from the psychometric tests to these 24 dimensions. Specifically, the 130 items from SAS, SDS, and SCL-90 were linked to the framework, primarily for weighting their original scores during the assessment process. The 328 items from EPQ, 16PF, and MBTI were also linked, serving as a reference for personality assessment and allowing students to observe continuous changes in their profiles. This entire mapping process (visualized in <xref ref-type="fig" rid="F1">Figures&#x00A0;1</xref>&#x2013;<xref ref-type="fig" rid="F4">4</xref>, which are detailed later) created a comprehensive, interconnected data ecosystem.</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>Sankey diagram mapping 87 cognitive voting results to 12 cognitive dimensions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g001.tif"><alt-text content-type="machine-generated">Sankey diagram illustrating connections between votes and cognitive dimensions. Votes are listed on the left, linked by colored flows to cognitive dimensions on the right. Each flow represents the association between a vote and a specific cognitive dimension, highlighting the distribution and relationship patterns.</alt-text>
</graphic>
</fig>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>Sankey diagram mapping 66 behavioral check-in records to 12 behavioral dimensions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g002.tif"><alt-text content-type="machine-generated">Sankey diagram depicting connections between various \"clock in\" entries and \"Behavioral Dimensions.\" The left side lists multiple \"clock in\" entries, each connected by lines to different \"Behavioral Dimensions\" on the right, labeled from 1 to 12. The connections are represented with color-coded lines indicating different relationships or pathways.</alt-text>
</graphic>
</fig>
<fig id="F3" position="float"><label>Figure&#x00A0;3</label>
<caption><p>Sankey diagram mapping 24 cognitive and behavioral dimensions to 130 symptom scale items.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g003.tif"><alt-text content-type="machine-generated">Sankey diagram illustrating connections between modules and symptoms. Modules are represented by blue rectangles on the left, and symptoms by red rectangles on the right. Flow lines vary in thickness, indicating the strength of connections between each module and symptom.</alt-text>
</graphic>
</fig>
<fig id="F4" position="float"><label>Figure&#x00A0;4</label>
<caption><p>Sankey diagram mapping 24 cognitive and behavioral dimensions to 328 personality assessment items.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g004.tif"><alt-text content-type="machine-generated">Sankey diagram showing connections between various modules and characters. Modules labeled on the left and characters on the right, linked by colored flows indicating relationships. Blue modules and red characters are connected with lines of varying thickness, signifying different levels of association.</alt-text>
</graphic>
</fig>
<p>Second, the framework acted as a reference for the dynamic adjustment algorithm. For instance, if a student repeatedly selected pessimistic options in cognitive voting, their scores on the SDS and related SCL-90 dimensions would be algorithmically adjusted. Similarly, demonstrated self-discipline in behavioral check-ins would influence their personality trait profiles derived from the MBTI and 16PF, which are understood to be shaped by cognitive development and cultural adaptation processes during university years (<xref ref-type="bibr" rid="B35">35</xref>). These traits, in turn, are predictive of significant life outcomes (<xref ref-type="bibr" rid="B36">36</xref>). This continuous, iterative design and adjustment process aimed to produce a psychological status description that more closely approximates the reality captured by observational methods.</p>
</sec>
<sec id="s2e"><label>2.5</label><title>Application of AI models</title>
<p>To enhance assessment efficiency and accuracy, our framework leveraged a combination of Large Language Models (LLMs) and Retrieval-Augmented Generation (RAG) techniques. This AI-driven approach automated and refined the process of linking ecological data to psychometric constructs. The specific commercial LLM API utilized was Alibaba&#x2019;s Qwen series (model: qwen-max).</p>
<p>The application of AI models involved a multi-stage process:</p>
<p>
<list list-type="order">
<list-item>
<p><bold>Automated content mapping:</bold> The LLM played a crucial role in assisting our research team (comprising life teachers and clinical psychologists) in the initial design and ongoing management of the ecological assessment content. Specifically, it was used to automate the mapping of each cognitive voting item and behavioral check-in definition to the 24 dimensions of our conceptual framework. This process significantly enhanced the standardization and efficiency of our content management. Through the use of the LLM, mapping tasks that would have required hours of manual expert deliberation were completed in minutes, greatly improving the real-time scalability of the assessment. The results of this comprehensive mapping are visualized in <xref ref-type="fig" rid="F1">Figures&#x00A0;1</xref>&#x2013;<xref ref-type="fig" rid="F8">8</xref>.</p></list-item>
<list-item>
<p><bold>Retrieval-augmented generation (RAG) for dynamic adjustment:</bold> To perform dynamic adjustments on student scores and profiles, we implemented a RAG model. The knowledge base for this model was constructed from three distinct sources: (1) <bold>Personal Information,</bold> including students&#x2019; historical behavioral data, academic records, and extracurricular activities, provided by counselors; (2) <bold>Psychological Knowledge,</bold> a text library containing principles of Cognitive-Behavioral Theory, personality trait descriptions, and excerpts from key psychometric textbooks; and (3) <bold>Scale Scoring Protocols,</bold> a structured database of all items, dimensions, and scoring rules for the psychometric scales used.</p>
<p>When new data was received (e.g., a student&#x2019;s response to a cognitive vote or a free-text comment), the RAG system would first retrieve the most relevant information from this comprehensive knowledge base. This retrieved context, which might include the student&#x2019;s recent behaviors, relevant personality traits, and specific item-weighting rules, was then fed to the LLM.</p></list-item>
<list-item>
<p><bold>Personalized feedback generation:</bold> Based on the context provided by the RAG process, the LLM then performed two key functions. First, it calculated the necessary adjustments to the student&#x2019;s scores on relevant scales (e.g., SAS, SDS, SCL-90). Second, and perhaps more importantly, it generated personalized, narrative-based feedback. For example, a change in a student&#x2019;s MBTI profile based on recent data would trigger the LLM to generate a descriptive text explaining this shift in a humanized, supportive tone, transforming a simple score change into a meaningful &#x201C;character story&#x201D; for the student.</p></list-item>
</list>This integrated AI approach not only streamlined the complex process of linking daily life data to psychometric scores but also enabled the delivery of dynamic, personalized, and narrative-rich feedback, which was central to the intervention aspect of our study design.</p>
<fig id="F5" position="float"><label>Figure&#x00A0;5</label>
<caption><p>Sankey diagram mapping 87 cognitive voting results to 130 symptom scale items.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g005.tif"><alt-text content-type="machine-generated">Sankey diagram showing connections between votes and symptoms. Blue lines represent various votes numbered from 1 to 86 on the left, linked to red bars labeled with symptom numbers from 1 to 130 on the right. The lines vary in thickness, indicating the strength of the connection between each vote and symptom.</alt-text>
</graphic>
</fig>
<fig id="F6" position="float"><label>Figure&#x00A0;6</label>
<caption><p>Sankey diagram mapping 66 behavioral check-in records to 130 symptom scale items.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g006.tif"><alt-text content-type="machine-generated">A network diagram showing connections between \"clock in\" labels on the left and \"symptom\" labels on the right. Blue lines represent connections, with various intersections creating a complex web linking specific \"clock in\" points to corresponding \"symptoms.\" The diagram visualizes multiple associations between these two groups.</alt-text>
</graphic>
</fig>
<fig id="F7" position="float"><label>Figure&#x00A0;7</label>
<caption><p>Sankey diagram mapping 87 cognitive voting results to 328 personality assessment items.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g007.tif"><alt-text content-type="machine-generated">Sankey diagram showing connections between votes (blue squares) and characters (red squares). Lines, in pink and blue, represent relationships, intertwining to indicate links between each vote and corresponding character.</alt-text>
</graphic>
</fig>
<fig id="F8" position="float"><label>Figure&#x00A0;8</label>
<caption><p>Sankey diagram mapping 66 behavioral check-in records to 328 personality assessment items.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g008.tif"><alt-text content-type="machine-generated">Sankey diagram displaying connections between multiple \"clock in\" points on the left and corresponding \"character\" labels on the right. Blue and red bands illustrate the flow of connections between these points, showing varying widths and overlapping paths.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2f"><label>2.6</label><title>Gamified intervention and engagement</title>
<p>A core tenet of our study design was to move beyond passive measurement and actively promote students&#x2019; mental health through a multi-faceted, interactive environment. The mini-program was designed not only to evaluate psychological states but also to foster self-awareness and motivate positive change. This was achieved by integrating gamified feedback, community interaction, and a nuanced alert mechanism, all delivered within the students&#x2019; everyday life contexts (<xref ref-type="bibr" rid="B37">37</xref>).</p>
<sec id="s2f1"><label>2.6.1</label><title>Gamified feedback and personality development</title>
<p>Instead of presenting students with raw anxiety or depression scores, we utilized trait descriptions from personality scales (e.g., EPQ, 16PF, MBTI) to create dynamic attributes, akin to those of a virtual character. When students participated in cognitive voting or completed behavioral check-ins, their actions had direct consequences on their &#x201C;character profile,&#x201D; earning them experience points and leading to attribute changes. For example, consistently choosing proactive behaviors could increase a &#x201C;conscientiousness&#x201D; attribute. These changes were instantly reflected in their personalized profile, providing immediate, narrative-based feedback. This gamified approach was intended to motivate students&#x2019; intrinsic drive to improve their own psychological state by adjusting their daily behaviors or cognitive patterns.</p>
</sec>
<sec id="s2f2"><label>2.6.2</label><title>Community module and personalized rankings</title>
<p>To further enhance engagement, we designed a community module that provided interactive, personalized rankings. These rankings did not display raw symptom scores. Instead, they reflected psychological fluctuations in relation to recent campus events (e.g., exams, social activities), using contextual descriptions (e.g., &#x201C;recent obsessive thinking&#x201D; or &#x201C;hostile emotions&#x201D;) rather than clinical labels. This allowed students to compare their psychological state with that of their peers in a de-stigmatized manner, motivating them to make positive adjustments when they noticed significant deviations.</p>
</sec>
<sec id="s2f3"><label>2.6.3</label><title>Alert mechanism and manual intervention</title>
<p>The system incorporated a multi-layered alert mechanism. For the administrative end (life teachers and counselors), the system provided direct notifications when a student&#x2019;s dynamically adjusted symptom scores (from SAS, SDS, or specific SCL-90 dimensions) exceeded predefined clinical thresholds, prompting timely manual intervention.</p>
<p>For the student-facing end, the alert was more nuanced. When scores reached a warning level, the AI would generate descriptive feedback highlighted in red. This feedback would not simply state a high score, but would translate it into a context-aware, narrative warning. For instance, for a student with an INFP personality profile whose paranoia subscale score increased, the system might generate a message suggesting they may be feeling overly sensitive or &#x201C;as suspicious as Lin Daiyu,&#x201D; drawing a culturally relevant analogy. Non-critical fluctuations would also be reflected through real-time changes in their status description but without the red alert, ensuring a continuous stream of feedback. Crucially, the system was biased towards providing positive reinforcement for improvements, fostering an overall supportive and encouraging environment.</p>
</sec>
</sec>
<sec id="s2g"><label>2.7</label><title>Participants</title>
<p>This study was initially launched in the autumn semester of 2022, emerging from an urgent need to dynamically monitor student mental health during a campus-wide lockdown due to the COVID-19 pandemic in Hohhot. The traditional, infrequent survey-based assessments were deemed insufficient for the volatile psychological environment. Consequently, this research began as an enhanced mental health management initiative for a specific cohort of students.</p>
<p>A total of 177 undergraduate students (98 female, 79 male; mean age = 20.53 years) from four classes in their second (sophomore) and third (junior) years of a clinical medicine program at a medical university participated in the study. The sampling method employed was <bold>cluster sampling</bold>, as the initial cohort consisted of all students residing in a single dormitory building under unified management during the lockdown. All students in this building were initially enrolled in the program. The final sample of 177 participants represents those students who remained and provided continuous, complete data across the entire one-and-a-half-year (three academic semesters) study period. Students who left the cohort prematurely (e.g., due to changes in lockdown status or academic leave) were excluded from the final analysis due to incomplete longitudinal data.</p>
<p>The sample size was determined by the size of the accessible student cluster in the unique real-world context, rather than by a priori power analysis, which is appropriate for the exploratory nature of this study. Following detailed introductions by their life guidance teachers (counselors), all participants provided voluntary informed consent electronically via the WeChat mini-program, fully understanding the research objectives, procedures, and confidentiality measures. The demographic characteristics of the participants are detailed in <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>.</p>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>Participant characteristics.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left">Class</th>
<th valign="top" align="center">Female</th>
<th valign="top" align="center">Male</th>
<th valign="top" align="center">Total</th>
<th valign="top" align="center">Average age</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">A</td>
<td valign="top" align="center">31</td>
<td valign="top" align="center">21</td>
<td valign="top" align="center">52</td>
<td valign="top" align="center">20.60</td>
</tr>
<tr>
<td valign="top" align="left">B</td>
<td valign="top" align="center">25</td>
<td valign="top" align="center">22</td>
<td valign="top" align="center">47</td>
<td valign="top" align="center">20.34</td>
</tr>
<tr>
<td valign="top" align="left">C</td>
<td valign="top" align="center">18</td>
<td valign="top" align="center">18</td>
<td valign="top" align="center">36</td>
<td valign="top" align="center">20.64</td>
</tr>
<tr>
<td valign="top" align="left">D</td>
<td valign="top" align="center">24</td>
<td valign="top" align="center">18</td>
<td valign="top" align="center">42</td>
<td valign="top" align="center">20.55</td>
</tr>
<tr>
<td valign="top" align="left">All classes</td>
<td valign="top" align="center">98</td>
<td valign="top" align="center">79</td>
<td valign="top" align="center">177</td>
<td valign="top" align="center">20.53</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s2g1"><label>2.7.1</label><title>Ethical statement</title>
<p>The study protocol was approved by the Ethics Committee of Inner Mongolia People&#x2019;s Hospital as part of a funded research project (Approval No. 2022LL019, Date: March 10, 2022). The research was conducted in accordance with the Declaration of Helsinki, ensuring voluntary participation and data security.</p>
</sec>
</sec>
<sec id="s2h"><label>2.8</label><title>Measurement process</title>
<p>The data collection for this longitudinal study officially commenced in September 2022 and concluded in March 2024, spanning three academic semesters. Prior to the official start, the WeChat mini-program had been fully developed and had undergone preliminary small-scale testing. The measurement process consisted of three primary components:</p>
<p>
<list list-type="order">
<list-item>
<p><bold>Periodic psychometric screening:</bold> At the beginning of each semester, participants were organized to complete a systematic self-report screening using the full battery of standardized scales (SAS, SDS, SCL-90, EPQ, 16PF, and MBTI). This provided periodic baseline data for each assessment wave.</p></list-item>
<list-item>
<p><bold>Continuous ecological data collection:</bold> Throughout the one-and-a-half-year period, we implemented weekly data collection tasks via the mini-program. These tasks included &#x201C;cognitive voting&#x201D; and &#x201C;behavioral check-ins,&#x201D; designed to encourage students&#x2019; expression of their views and emotional attitudes related to daily life events and situations.</p></list-item>
<list-item>
<p><bold>Data collection summary:</bold> Over the course of the study, a total of 87 distinct cognitive voting activities were conducted, achieving an average participation rate of 79&#x0025;. Concurrently, 66 behavioral check-in tasks were assigned, with an average participation rate of 42&#x0025;. The lower participation rate for behavioral check-ins is primarily attributed to their event-specific nature; some tasks targeted experiences (e.g., specific academic challenges or social events) that were only applicable to a subset of participants at any given time. Nevertheless, the majority of check-in tasks focused on common aspects of student life and learning, aiming to reflect general behavioral patterns.</p></list-item>
</list>This multi-modal, multi-wave data collection approach allowed for a comprehensive and dynamic monitoring of students&#x2019; mental health status, providing a rich dataset for subsequent analysis and intervention.</p>
</sec>
<sec id="s2i"><label>2.9</label><title>Statistical methods</title>
<p>All statistical analyses were conducted using Python (version 3.8.18) with the SciPy (version 1.10.1), Pandas (version 1.5.3), and Scikit-learn libraries. Data visualization was performed using Matplotlib (version 3.7.1) and Seaborn (version 0.12.2). The level of statistical significance for all tests was set at <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM3"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>0.05</mml:mn></mml:math></inline-formula>. The analysis was designed to address two core research questions.</p>
<sec id="s2i1"><label>2.9.1</label><title>Analysis of assessment accuracy</title>
<p>The first objective was to evaluate whether the dynamic assessment model could more accurately reflect students&#x2019; mental health status compared to traditional static scales, using professionally administered Hamilton Rating Scales (HAM-A and HAM-D) as the clinical gold standard. The analysis varied based on the nature of the outcome.</p>
<p>
<list list-type="bullet">
<list-item>
<p><bold>For categorical classification performance (anxiety/depression):</bold> To evaluate the ability of SAS and SDS scores (both traditional and dynamic) to correctly classify students&#x2019; clinical anxiety and depression status, we constructed Receiver Operating Characteristic (ROC) curves and calculated the Area Under the Curve (AUC). We also computed standard performance metrics including accuracy, recall, and F1-score.</p></list-item>
<list-item>
<p><bold>For continuous score prediction (general symptoms):</bold> To assess the performance of SCL-90 scores in predicting continuous HAM-A and HAM-D scores, we used two primary metrics: the coefficient of determination (R-squared, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM4"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>) to quantify the proportion of variance explained, and the Mean Squared Error (MSE) to measure prediction error.</p></list-item>
<list-item>
<p><bold>For agreement analysis:</bold> To assess the agreement between traditional and dynamically adjusted scores, we performed both Pearson (for linear relationships) and Spearman (for monotonic relationships) correlation analyses. Given that Shapiro- Wilk tests indicated that most of our data did not strictly adhere to a normal distribution (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM5"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>0.05</mml:mn></mml:math></inline-formula>), the non-parametric Spearman correlation provides a more robust measure of association.</p></list-item>
</list></p>
</sec>
<sec id="s2i2"><label>2.9.2</label><title>Analysis of intervention effects</title>
<p>The second objective was to explore the potential intervention effect of long-term engagement with the dynamic assessment system. This was analyzed through a quasi-experimental design.</p>
<p>
<list list-type="bullet">
<list-item>
<p><bold>Comparison group:</bold> The primary intervention group consisted of the 177 students who actively participated in our dynamic assessment program. A comparison group was established, comprising 92 students from two other classes of the same grade and university who were not managed by our hospital. This group only completed the routine, semester-based self-report scales (SAS, SDS, and SCL-90) and did not participate in the dynamic assessment or interactive components of the mini-program.</p></list-item>
<list-item>
<p><bold>Statistical tests:</bold> We calculated the change score (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM6"><mml:mi mathvariant="normal">&#x0394;</mml:mi></mml:math></inline-formula>Score) for each participant in both groups between the first (October 2022) and final (March 2024) assessments.
<list list-type="simple">
<list-item><label>&#x25CB;</label><p><bold>Independent-sample <italic>t</italic>-tests</bold> were used to compare the mean <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM7"><mml:mi mathvariant="normal">&#x0394;</mml:mi></mml:math></inline-formula>Score between the intervention and comparison groups to determine if our program led to significantly greater improvements.</p></list-item>
<list-item><label>&#x25CB;</label><p><bold>Multivariate analysis of variance (MANOVA)</bold> was employed to simultaneously evaluate the overall impact of the intervention across multiple mental health dimensions (e.g., anxiety, depression, somatization).</p></list-item>
<list-item><label>&#x25CB;</label><p>We acknowledged that prerequisite assumptions for parametric tests, such as normality and homogeneity of variances (Levene&#x2019;s test), were not always met. However, given the robustness of these tests with larger sample sizes, and supplemented by our non-parametric analyses, we proceeded while interpreting the results with appropriate caution.</p></list-item>
</list></p></list-item>
<list-item>
<p><bold>Effect size analysis:</bold> To evaluate the practical significance of any observed changes, we calculated Cohen&#x2019;s <italic>d</italic> for all key comparisons, providing a standardized measure of the magnitude of the intervention effect.</p></list-item>
</list></p>
</sec>
</sec>
</sec>
<sec id="s3" sec-type="results"><label>3</label><title>Results</title>
<sec id="s3a"><label>3.1</label><title>Comparison of SAS assessment results</title>
<sec id="s3a1"><label>3.1.1</label><title>Descriptive and inferential statistics</title>
<p>As illustrated in <xref ref-type="fig" rid="F9">Figure&#x00A0;9</xref>, both traditional and dynamically corrected Self-Rating Anxiety Scale (SAS) scores exhibited a gradual downward trend across the three semesters of the study. The mean score for the traditional assessment decreased from 48.77 (Semester 1) to 46.56 (Semester 3). In contrast, the dynamic assessment showed a more substantial reduction, from an initial mean of 56.78 to 48.67 over the same period.</p>
<fig id="F9" position="float"><label>Figure&#x00A0;9</label>
<caption><p>Distribution of traditional vs. dynamic SAS scores across semesters. The boxplots illustrate a consistent downward trend in anxiety scores for both assessment types. Notably, the dynamically adjusted scores show a more pronounced decrease over time and a reduction in score variance by the third semester, suggesting a positive longitudinal effect and greater score stabilization.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g009.tif"><alt-text content-type="machine-generated">Box plot comparing SAS scores for traditional and dynamic assessments over three semesters. Each semester shows two boxes: green for traditional and orange for dynamic assessments. The dynamic scores appear generally higher across all semesters.</alt-text>
</graphic>
</fig>
<p>Inferential statistics, detailed in <xref ref-type="table" rid="T3">Table&#x00A0;3</xref>, confirm these observations. Paired-sample <italic>t</italic>-tests revealed that the decrease in scores between semesters was statistically significant for both methods (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM8"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula> for all comparisons). Furthermore, the differences between the traditional and dynamically corrected scores were statistically significant within each of the three semesters (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM9"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula> for all), indicating that our dynamic adjustment algorithm consistently and significantly altered the assessment results.</p>
<table-wrap id="T3" position="float"><label>Table&#x00A0;3</label>
<caption><p>Comparison of SAS scores across semesters with <italic>t</italic>-test results.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Assessment type</th>
<th valign="top" align="center" colspan="3">Mean SAS score by semester</th>
<th valign="top" align="center" colspan="3">Paired-sample <italic>t</italic>-test (sem1 vs. sem3)</th>
</tr>
<tr>
<th valign="top" align="center">Sem 1</th>
<th valign="top" align="center">Sem 2</th>
<th valign="top" align="center">Sem 3</th>
<th valign="top" align="center">Mean diff.</th>
<th valign="top" align="center">95&#x0025; CI</th>
<th valign="top" align="center"><italic>p</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SAS traditional</td>
<td valign="top" align="center">48.77</td>
<td valign="top" align="center">47.47</td>
<td valign="top" align="center">46.56</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM10"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>2.21</td>
<td valign="top" align="center">[<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM11"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>2.67, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM12"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>1.75]</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM13"><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left">SAS dynamic</td>
<td valign="top" align="center">56.78</td>
<td valign="top" align="center">52.85</td>
<td valign="top" align="center">48.67</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM14"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>8.10</td>
<td valign="top" align="center">[<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM15"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>9.18, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM16"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>7.02]</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM17"><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula></td>
</tr>
<tr>
<td valign="top" align="left"><italic>t</italic>-test: T vs. D</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM18"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula></td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM19"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula></td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM20"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula></td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3a2"><label>3.1.2</label><title>Model evaluation metrics</title>
<p>To evaluate the predictive validity of the dynamic model against the clinical gold standard (HAM-A), we assessed its performance in both the first and third semesters. As detailed in the combined summary in <xref ref-type="table" rid="T4">Table&#x00A0;4</xref>, the dynamic correction model demonstrated consistently superior performance across both time points. In the first semester, the accuracy of the dynamic model was 94.86&#x0025;, a significant improvement over the traditional model&#x2019;s 77.14&#x0025;, with a corresponding increase in recall from 75.00&#x0025; to 96.43&#x0025;. This high level of performance was sustained into the third semester (accuracy: 92.00&#x0025; vs. 83.43&#x0025;), suggesting that the dynamic adjustment offers greater consistency and reliability for long-term tracking.</p>
<table-wrap id="T4" position="float"><label>Table&#x00A0;4</label>
<caption><p>Prediction performance metrics of SAS scores in first and third semesters.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Metric</th>
<th valign="top" align="center" colspan="2">First semester</th>
<th valign="top" align="center" colspan="2">Third semester</th>
</tr>
<tr>
<th valign="top" align="center">SAS traditional</th>
<th valign="top" align="center">SAS dynamic</th>
<th valign="top" align="center">SAS traditional</th>
<th valign="top" align="center">SAS dynamic</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="center">77.14&#x0025;</td>
<td valign="top" align="center">94.86&#x0025;</td>
<td valign="top" align="center">83.43&#x0025;</td>
<td valign="top" align="center">92.00&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Recall</td>
<td valign="top" align="center">75.00&#x0025;</td>
<td valign="top" align="center">96.43&#x0025;</td>
<td valign="top" align="center">76.70&#x0025;</td>
<td valign="top" align="center">89.32&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">F1 score</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.96</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.93</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The superiority of the dynamic model is further evidenced by the Receiver Operating Characteristic (ROC) curve analysis (<xref ref-type="fig" rid="F10">Figures&#x00A0;10</xref>, <xref ref-type="fig" rid="F11">11</xref>). In both the first and third semesters, the Area Under the Curve (AUC) for the dynamically corrected results was consistently higher (Semester 1: AUC = 0.95 vs. 0.86; Semester 3: AUC = 0.89 vs. 0.85), demonstrating a stronger and more accurate correlation with the clinician-rated HAM-A assessment results.</p>
<fig id="F10" position="float"><label>Figure&#x00A0;10</label>
<caption><p>ROC curves for SAS vs. HAM-A anxiety in the first semester. The graph clearly shows the superior discriminative ability of the dynamic assessment model (AUC = 0.95) compared to the traditional model (AUC = 0.86) in identifying clinically significant anxiety.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g010.tif"><alt-text content-type="machine-generated">ROC curve comparing two models, SAS Traditional and SAS Dynamic. The SAS Traditional model (blue line) has an AUC of 0.86, while the SAS Dynamic model (orange line) has an AUC of 0.95. A random guess is represented by a dashed line. The graph shows true positive rate versus false positive rate.</alt-text>
</graphic>
</fig>
<fig id="F11" position="float"><label>Figure&#x00A0;11</label>
<caption><p>ROC curves for SAS vs. HAM-A anxiety in the third semester. The dynamic model maintains its predictive advantage (AUC = 0.89) over the traditional model (AUC = 0.85), indicating sustained performance in long-term tracking.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g011.tif"><alt-text content-type="machine-generated">ROC curve comparing SAS Traditional and SAS Dynamic methods. The SAS Traditional (blue line) has an AUC of 0.85, while SAS Dynamic (orange line) has an AUC of 0.89. The diagonal line represents a random guess. True Positive Rate is on the y-axis, False Positive Rate on the x-axis.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s3b"><label>3.2</label><title>Comparison of SDS assessment results</title>
<sec id="s3b1"><label>3.2.1</label><title>Descriptive and inferential statistics</title>
<p>Similar to the anxiety scores, the Self-Rating Depression Scale (SDS) scores also showed a general decline over the study period, with the dynamic model again demonstrating a more pronounced effect (<xref ref-type="fig" rid="F12">Figure&#x00A0;12</xref>). The mean score for the traditional assessment remained relatively stable, decreasing slightly from 44.43 (Semester 1) to 42.91 (Semester 3). In stark contrast, the dynamically adjusted scores decreased markedly from a mean of 47.27 in the first semester to 39.78 in the third.</p>
<fig id="F12" position="float"><label>Figure&#x00A0;12</label>
<caption><p>Distribution of traditional vs. dynamic SDS scores across semesters. The boxplots highlight a much steeper decline in depression scores for the dynamic assessment compared to the relatively stable traditional scores. This suggests the dynamic model is more sensitive to longitudinal changes in depressive symptoms.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g012.tif"><alt-text content-type="machine-generated">Box plot comparing SDS scores for traditional and dynamic assessments across three semesters. Traditional assessments are in green and dynamic in red. Each box shows median, quartiles, and range.</alt-text>
</graphic>
</fig>
<p>The statistical significance of these trends is confirmed in <xref ref-type="table" rid="T5">Table&#x00A0;5</xref>. While the longitudinal decrease in the traditional SDS scores was statistically significant (<italic>p</italic> = .019), the magnitude of change in the dynamic scores was far greater and more significant (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM21"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula>). Paired-sample <italic>t</italic>-tests also showed significant differences between the traditional and dynamic scores in the first and third semesters, underscoring the substantial impact of our real-time data integration.</p>
<table-wrap id="T5" position="float"><label>Table&#x00A0;5</label>
<caption><p>Comparison of SDS scores across semesters with <italic>t</italic>-test results.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Assessment type</th>
<th valign="top" align="center" colspan="3">Mean SDS score by semester</th>
<th valign="top" align="center" colspan="3">Paired-sample <italic>t</italic>-test (Sem1 vs. Sem3)</th>
</tr>
<tr>
<th valign="top" align="center">Sem 1</th>
<th valign="top" align="center">Sem 2</th>
<th valign="top" align="center">Sem 3</th>
<th valign="top" align="center">Mean diff.</th>
<th valign="top" align="center">95&#x0025; CI</th>
<th valign="top" align="center"><italic>p</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SDS traditional</td>
<td valign="top" align="center">44.43</td>
<td valign="top" align="center">44.11</td>
<td valign="top" align="center">42.91</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM22"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>1.52</td>
<td valign="top" align="center">[<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM23"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>2.82, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM24"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>0.22]</td>
<td valign="top" align="center">.019</td>
</tr>
<tr>
<td valign="top" align="left">SDS dynamic</td>
<td valign="top" align="center">47.27</td>
<td valign="top" align="center">45.81</td>
<td valign="top" align="center">39.78</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM25"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>7.49</td>
<td valign="top" align="center">[<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM26"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>8.87, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM27"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>6.11]</td>
<td valign="top" align="center">&#x003C; .001</td>
</tr>
<tr>
<td valign="top" align="left"><italic>t</italic>-test: T vs. D</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM28"><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>.002</mml:mn></mml:math></inline-formula></td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM29"><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>.054</mml:mn></mml:math></inline-formula></td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM30"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula></td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3b2"><label>3.2.2</label><title>Model evaluation metrics</title>
<p>The dynamic model&#x2019;s superior ability to predict clinically-rated depression (HAM-D) was evident across the study. As summarized in <xref ref-type="table" rid="T6">Table&#x00A0;6</xref>, the dynamic model consistently outperformed the traditional scale. In the first semester, the dynamic model achieved an accuracy of 92.57&#x0025; and a recall of 85.71&#x0025;, substantially higher than the traditional model&#x2019;s 73.71&#x0025; and 60.71&#x0025;, respectively. This enhancement is critical, as it indicates a marked reduction in false negatives (missed cases). This superior performance was maintained in the third semester, with the dynamic model showing higher accuracy (94.29&#x0025; vs. 81.14&#x0025;) and recall (78.26&#x0025; vs. 67.39&#x0025;).</p>
<table-wrap id="T6" position="float"><label>Table&#x00A0;6</label>
<caption><p>Prediction performance metrics of SDS scores in first and third semesters.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Metric</th>
<th valign="top" align="center" colspan="2">First semester</th>
<th valign="top" align="center" colspan="2">Third semester</th>
</tr>
<tr>
<th valign="top" align="center">SDS traditional</th>
<th valign="top" align="center">SDS dynamic</th>
<th valign="top" align="center">SDS traditional</th>
<th valign="top" align="center">SDS dynamic</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="center">73.71&#x0025;</td>
<td valign="top" align="center">92.57&#x0025;</td>
<td valign="top" align="center">81.14&#x0025;</td>
<td valign="top" align="center">94.29&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">Recall</td>
<td valign="top" align="center">60.71&#x0025;</td>
<td valign="top" align="center">85.71&#x0025;</td>
<td valign="top" align="center">67.39&#x0025;</td>
<td valign="top" align="center">78.26&#x0025;</td>
</tr>
<tr>
<td valign="top" align="left">F1 score</td>
<td valign="top" align="center">0.69</td>
<td valign="top" align="center">0.92</td>
<td valign="top" align="center">0.65</td>
<td valign="top" align="center">0.88</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>This enhanced predictive power is also clearly visualized in the ROC curve analysis (<xref ref-type="fig" rid="F13">Figures&#x00A0;13</xref>, <xref ref-type="fig" rid="F14">14</xref>). The dynamic model consistently yielded a higher AUC in both the first semester (0.93 vs. 0.82) and the third semester (0.91 vs. 0.88), confirming its stronger correlation with and predictive accuracy for the HAM-D assessment results.</p>
<fig id="F13" position="float"><label>Figure&#x00A0;13</label>
<caption><p>ROC curves for SDS vs. HAM-D depression in the first semester. The dynamic model (AUC = 0.93) demonstrates a substantially improved ability to discriminate between clinical and non-clinical depression compared to the traditional model (AUC = 0.82).</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g013.tif"><alt-text content-type="machine-generated">ROC curve comparing two systems: SDS Traditional in blue with an AUC of 0.82, and SDS Dynamic in orange with an AUC of 0.93. The dashed line represents random guessing. True Positive Rate is on the y-axis, and False Positive Rate is on the x-axis.</alt-text>
</graphic>
</fig>
<fig id="F14" position="float"><label>Figure&#x00A0;14</label>
<caption><p>ROC curves for SDS vs. HAM-D depression in the third semester. The dynamic model continues to show superior performance (AUC = 0.91) over the traditional model (AUC = 0.88), reinforcing its value in long-term assessment.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g014.tif"><alt-text content-type="machine-generated">ROC curve comparing SDS Traditional and SDS Dynamic models. The SDS Traditional model (blue line) has an AUC of 0.88, and the SDS Dynamic model (orange line) has an AUC of 0.91. The black dashed line represents a random guess. The plot shows True Positive Rate versus False Positive Rate.</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s3c"><label>3.3</label><title>Comparison of SCL-90 assessment results</title>
<sec id="s3c1"><label>3.3.1</label><title>Descriptive and inferential statistics</title>
<p>The analysis of the Symptom Checklist-90 (SCL-90) total scores reveals a similar longitudinal trend, as shown in <xref ref-type="fig" rid="F15">Figure&#x00A0;15</xref>. Both assessment methods registered a decrease in overall psychological symptomatology over the three semesters. The traditional SCL-90 total score showed a modest, non-significant decrease from a mean of 146.80 to 132.31. In contrast, the decrease in the dynamically adjusted scores was more pronounced and statistically significant, dropping from 150.14 in Semester 1 to 123.31 in Semester 3 (p = .019), as detailed in <xref ref-type="table" rid="T7">Table&#x00A0;7</xref>. This suggests that the dynamic model was more sensitive to overall improvements in students&#x2019; self-reported psychological well-being over time.</p>
<fig id="F15" position="float"><label>Figure&#x00A0;15</label>
<caption><p>Distribution of traditional vs. dynamic SCL-90 scores across semesters. The boxplots show a general decline in overall psychological symptoms. The decrease is more statistically significant and substantial for the dynamically adjusted scores, indicating greater sensitivity to longitudinal changes.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g015.tif"><alt-text content-type="machine-generated">Box plot showing SCL90 scores across three semesters for two assessment types: Traditional (green) and Dynamic (orange). Each semester displays two adjacent box plots with similar medians around 100 and outliers above 300.</alt-text>
</graphic>
</fig>
<table-wrap id="T7" position="float"><label>Table&#x00A0;7</label>
<caption><p>Comparison of SCL-90 scores across semesters with <italic>t</italic>-test results.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Assessment type</th>
<th valign="top" align="center" colspan="3">Mean SCL-90 score by semester</th>
<th valign="top" align="center" colspan="3">Paired-sample <italic>t</italic>-test (Sem1 vs. Sem3)</th>
</tr>
<tr>
<th valign="top" align="center">Sem 1</th>
<th valign="top" align="center">Sem 2</th>
<th valign="top" align="center">Sem 3</th>
<th valign="top" align="center">Mean diff.</th>
<th valign="top" align="center">95&#x0025; CI</th>
<th valign="top" align="center"><italic>p</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SCL-90 traditional</td>
<td valign="top" align="center">146.80</td>
<td valign="top" align="center">139.13</td>
<td valign="top" align="center">132.31</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM31"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>14.49</td>
<td valign="top" align="center">[<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM32"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>30.34, 1.36]</td>
<td valign="top" align="center">.073</td>
</tr>
<tr>
<td valign="top" align="left">SCL-90 dynamic</td>
<td valign="top" align="center">150.14</td>
<td valign="top" align="center">133.43</td>
<td valign="top" align="center">123.31</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM33"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>26.83</td>
<td valign="top" align="center">[<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM34"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>48.11, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM35"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>5.55]</td>
<td valign="top" align="center">.019</td>
</tr>
<tr>
<td valign="top" align="left"><italic>t</italic>-test: T vs. D</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM36"><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>.574</mml:mn></mml:math></inline-formula></td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM37"><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>.278</mml:mn></mml:math></inline-formula></td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM38"><mml:mi>p</mml:mi><mml:mo>=</mml:mo><mml:mn>.031</mml:mn></mml:math></inline-formula></td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3c2"><label>3.3.2</label><title>Model evaluation metrics</title>
<p>The evaluation of the SCL-90 model&#x2019;s performance in predicting continuous HAM-A and HAM-D scores revealed a more complex pattern, as detailed in <xref ref-type="table" rid="T8">Tables&#x00A0;8</xref>, <xref ref-type="table" rid="T9">9</xref>.</p>
<table-wrap id="T8" position="float"><label>Table&#x00A0;8</label>
<caption><p>Top 3 SCL-90 predictors for HAM-A and HAM-D in the first semester.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Predictors rank</th>
<th valign="top" align="center" colspan="3">Traditional SCL-90</th>
<th valign="top" align="center" colspan="3">Dynamic SCL-90</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" align="center">Predicting <break/>HAM-A</th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM39"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></th>
<th valign="top" align="center">MSE</th>
<th valign="top" align="center">Predicting <break/>HAM-A</th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM40"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></th>
<th valign="top" align="center">MSE</th>
</tr>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Other</td>
<td valign="top" align="center">0.21</td>
<td valign="top" align="center">139.15</td>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">0.34</td>
<td valign="top" align="center">116.99</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">0.20</td>
<td valign="top" align="center">141.34</td>
<td valign="top" align="left">Obsessive</td>
<td valign="top" align="center">0.33</td>
<td valign="top" align="center">118.06</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Depression</td>
<td valign="top" align="center">0.20</td>
<td valign="top" align="center">142.38</td>
<td valign="top" align="left">Phobia</td>
<td valign="top" align="center">0.33</td>
<td valign="top" align="center">119.32</td>
</tr>
<tr>
<th valign="top" align="left">Predictors rank</th>
<th valign="top" align="center">Predicting HAM-D</th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM41"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></th>
<th valign="top" align="center">MSE</th>
<th valign="top" align="center">Predicting HAM-D</th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM42"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></th>
<th valign="top" align="center">MSE</th>
</tr>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Obsessive</td>
<td valign="top" align="center">0.07</td>
<td valign="top" align="center">200.63</td>
<td valign="top" align="left">Anxiety</td>
<td valign="top" align="center">0.28</td>
<td valign="top" align="center">154.93</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Depression</td>
<td valign="top" align="center">0.07</td>
<td valign="top" align="center">200.90</td>
<td valign="top" align="left">Somatization</td>
<td valign="top" align="center">0.28</td>
<td valign="top" align="center">155.74</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Phobia</td>
<td valign="top" align="center">0.07</td>
<td valign="top" align="center">201.14</td>
<td valign="top" align="left">Phobia</td>
<td valign="top" align="center">0.28</td>
<td valign="top" align="center">156.82</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T9" position="float"><label>Table&#x00A0;9</label>
<caption><p>Top 3 SCL-90 predictors for HAM-A and HAM-D in the third semester.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Rank</th>
<th valign="top" align="center" colspan="3">Traditional SCL-90</th>
<th valign="top" align="center" colspan="3">Dynamic SCL-90</th>
</tr>
</thead>
<tbody>
<tr>
<th valign="top" align="center">Predicting HAM-A</th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM43"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></th>
<th valign="top" align="center">MSE</th>
<th valign="top" align="center">Predicting HAM-A</th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM44"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></th>
<th valign="top" align="center">MSE</th>
</tr>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Depression</td>
<td valign="top" align="center">0.17</td>
<td valign="top" align="center">110.79</td>
<td valign="top" align="left">Psychoticism</td>
<td valign="top" align="center">0.17</td>
<td valign="top" align="center">111.91</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">0.17</td>
<td valign="top" align="center">110.88</td>
<td valign="top" align="left">Anxiety</td>
<td valign="top" align="center">0.16</td>
<td valign="top" align="center">112.06</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Anxiety</td>
<td valign="top" align="center">0.17</td>
<td valign="top" align="center">111.24</td>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">0.16</td>
<td valign="top" align="center">113.27</td>
</tr>
<tr>
<th valign="top" align="left">Rank</th>
<th valign="top" align="center">Predicting HAM-D</th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM45"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></th>
<th valign="top" align="center">MSE</th>
<th valign="top" align="center">Predicting HAM-D</th>
<th valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM46"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula></th>
<th valign="top" align="center">MSE</th>
</tr>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Obsessive</td>
<td valign="top" align="center">0.25</td>
<td valign="top" align="center">84.17</td>
<td valign="top" align="left">Psychoticism</td>
<td valign="top" align="center">0.08</td>
<td valign="top" align="center">102.74</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Total score</td>
<td valign="top" align="center">0.21</td>
<td valign="top" align="center">87.88</td>
<td valign="top" align="left">Somatization</td>
<td valign="top" align="center">0.08</td>
<td valign="top" align="center">103.22</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Interpersonal</td>
<td valign="top" align="center">0.21</td>
<td valign="top" align="center">88.01</td>
<td valign="top" align="left">Obsessive</td>
<td valign="top" align="center">0.07</td>
<td valign="top" align="center">103.59</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In the first semester, the dynamic model demonstrated a stronger correlation with the clinical ratings. For instance, when predicting HAM-A scores, the dynamic model&#x2019;s total score achieved an R-squared of 0.34, substantially higher than the traditional model&#x2019;s R-squared of 0.20. This indicates that the dynamic scores initially explained a larger proportion of the variance in clinically-rated anxiety. The dynamic model also showed a lower Mean Squared Error (MSE) for predicting HAM-A (116.99 vs. 141.34), suggesting a higher initial prediction accuracy.</p>
<p>However, this advantage diminished by the third semester. While the traditional model&#x2019;s predictive power for HAM-D (as measured by the obsessive-compulsive subscale) showed a relatively strong correlation (<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM47"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> = 0.25), the dynamic model&#x2019;s correlations weakened across most dimensions (e.g., psychoticism predicting HAM-D, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM48"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> = 0.08). Furthermore, the prediction error for the dynamic model increased for HAM-D (MSE = 102.74) compared to the traditional model (MSE = 84.17). This suggests that while the dynamic adjustments are effective for simpler constructs like anxiety and depression, their specificity may decrease over time for the multi-faceted SCL-90, potentially due to interference from complex life events not captured by our model.</p>
</sec>
</sec>
<sec id="s3d"><label>3.4</label><title>Comparison of HAM-A and HAM-D pre- and post-assessments</title>
<p>To assess the potential intervention effect of long-term participation in the dynamic assessment system, we analyzed the changes in clinician-rated Hamilton Anxiety (HAM-A) and Hamilton Depression (HAM-D) scores from the first semester (November 2022) to the third semester (March 2024).</p>
<p>As shown in <xref ref-type="table" rid="T10">Table&#x00A0;10</xref> and visualized in <xref ref-type="fig" rid="F16">Figures&#x00A0;16</xref>, <xref ref-type="fig" rid="F17">17</xref>, participating students exhibited statistically significant reductions in both anxiety and depression levels over the study period. The mean HAM-A score decreased by 15.2&#x0025;, from 20.61 to 17.49. The mean HAM-D score showed an even more pronounced reduction of 40.0&#x0025;, decreasing from 13.02 to 7.81. Paired-sample <italic>t</italic>-tests confirmed that both of these changes were statistically significant (HAM-A: <italic>p</italic> = .004; HAM-D: <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM49"><mml:mi>p</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mn>.001</mml:mn></mml:math></inline-formula>).</p>
<table-wrap id="T10" position="float"><label>Table&#x00A0;10</label>
<caption><p>Comparison of HAM-A and HAM-D scores between first and third semesters.</p></caption>
<table>
<thead>
<tr>
<th valign="top" align="left" rowspan="2">Scale</th>
<th valign="top" align="center" colspan="2">Mean score</th>
<th valign="top" align="center" colspan="3">Paired-sample <italic>t</italic>-test (Sem1 vs. Sem3)</th>
</tr>
<tr>
<th valign="top" align="center">Sem 1</th>
<th valign="top" align="center">Sem 3</th>
<th valign="top" align="center">Mean diff.</th>
<th valign="top" align="center">95&#x0025; CI</th>
<th valign="top" align="center"><italic>p</italic>-value</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">HAM-A</td>
<td valign="top" align="center">20.61</td>
<td valign="top" align="center">17.49</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM50"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>3.12</td>
<td valign="top" align="center">[<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM51"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>5.25, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM52"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>1.00]</td>
<td valign="top" align="center">.004</td>
</tr>
<tr>
<td valign="top" align="left">HAM-D</td>
<td valign="top" align="center">13.02</td>
<td valign="top" align="center">7.81</td>
<td valign="top" align="center"><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM53"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>5.21</td>
<td valign="top" align="center">[<inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM54"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>7.71, <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM55"><mml:mo>&#x2212;</mml:mo></mml:math></inline-formula>2.71]</td>
<td valign="top" align="center">&#x003C;.001</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F16" position="float"><label>Figure&#x00A0;16</label>
<caption><p>Clinician-rated anxiety (HAM-A) score comparison between first and third semesters. The boxplot shows a statistically significant decrease in the mean anxiety scores among students who participated in the dynamic assessment system over one and a half years.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g016.tif"><alt-text content-type="machine-generated">Box plot comparing HAMA scores between two assessment types over semesters. Semester 1 shows higher scores with the traditional method in green, while semester 3 shows lower scores with the dynamic method in orange. Outliers are present in the dynamic method.</alt-text>
</graphic>
</fig>
<fig id="F17" position="float"><label>Figure&#x00A0;17</label>
<caption><p>Clinician-rated depression (HAM-D) score comparison between first and third semesters. A pronounced and statistically significant reduction in mean depression scores is observed, highlighting the potential therapeutic benefit of the dynamic assessment and feedback system.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1615250-g017.tif"><alt-text content-type="machine-generated">Boxplot comparing HAMD scores across two types of assessments for semesters one and three. The first box is labeled \"Traditional\" in green, showing higher scores and more outliers. The second, \"Dynamic\" in orange, displays lower scores with fewer outliers.</alt-text>
</graphic>
</fig>
<p>These findings are particularly noteworthy as they are based on independent clinical assessments rather than self-report measures. The results suggest that continuous engagement with the dynamic assessment system&#x2014;which provides regular feedback and promotes self-monitoring&#x2014;may have a tangible, positive therapeutic effect. This demonstrates the system&#x2019;s potential not only as a more accurate measurement tool but also as a practical, scalable intervention for improving student mental health.</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion"><label>4</label><title>Discussion</title>
<p>This study introduced and evaluated an AI-driven dynamic psychological assessment system designed to correct and enhance traditional mental health scales using daily behavioral and cognitive data from university students. Our primary findings robustly support the feasibility and superiority of this novel approach. The results demonstrate that the dynamic model significantly improved the accuracy of anxiety (SAS) and depression (SDS) assessments compared to static, single-time-point measurements, showing a stronger correlation with clinician-rated gold standards. Furthermore, long-term engagement with the system was associated with significant reductions in clinically-rated anxiety (HAM-A) and depression (HAM-D) scores, suggesting the system&#x2019;s potential not only as a measurement tool but also as an integrated assessment-intervention loop. However, the study also revealed challenges, particularly in the declining predictive power of the model for the more complex, multi-faceted SCL-90 scale over time. In the following sections, we will interpret these principal findings, discuss their clinical and theoretical implications, analyze the model&#x2019;s performance limitations, and outline the study&#x2019;s limitations and future research directions.</p>
<sec id="s4a"><label>4.1</label><title>Principal findings and clinical implications</title>
<p>The central finding of this study is that integrating high-frequency, ecologically-valid data from students&#x2019; daily lives significantly enhances the accuracy of traditional psychometric scales for anxiety and depression. The superior performance of our dynamic model, evidenced by higher AUCs and a stronger correlation with clinician ratings, underscores a fundamental limitation of static assessments: their inability to capture the fluid, context-dependent nature of mental states. A single questionnaire provides a cross-sectional &#x201C;snapshot,&#x201D; which can be easily biased by a student&#x2019;s mood on a particular day. In contrast, our dynamic system functions like a longitudinal &#x201C;film,&#x201D; continuously updating the assessment based on a stream of cognitive and behavioral data points, thereby creating a more robust, nuanced, and authentic psychological profile.</p>
<p>The clinical implications of this finding are profound, signaling a potential paradigm shift in mental health assessment. In an era where daily life is increasingly mediated by digital technology, platforms like WeChat or wearable devices are becoming vast, untapped repositories of behavioral and cognitive data. Our research provides a proof-of-concept that these digital footprints can be ethically harnessed to move beyond reactive, clinic-based assessments towards a model of proactive, continuous, and personalized mental health monitoring. While the tools developed in this study are still nascent, they point towards a future where &#x201C;digital phenotyping&#x201D; could become a cornerstone of clinical practice. Such systems could enable clinicians to detect subtle negative changes in a student&#x2019;s state long before they escalate into a crisis, facilitating early and targeted interventions.</p>
<p>Furthermore, the significant reduction in clinically-rated HAM-A and HAM-D scores suggests that the assessment process itself can be therapeutic. By engaging students in daily self-monitoring and providing immediate, narrative-based feedback, our system creates an active assessment-intervention loop. This process aligns with the principles of measurement-based care, where continuous data is used to inform and guide treatment. The gamified and humanized feedback encourages self-reflection and may empower students to make small but meaningful adjustments to their daily routines and thought patterns, fostering a sense of agency over their own mental well-being. This suggests that the future of digital mental health tools lies not just in their predictive power, but in their ability to function as interactive companions that promote psychological resilience.</p>
</sec>
<sec id="s4b"><label>4.2</label><title>Interpretation in light of theoretical frameworks</title>
<p>While this study was initially driven by practical needs rather than a single theoretical doctrine, its design and findings resonate strongly with several contemporary psychological frameworks. Our approach can be understood as an applied synthesis of principles rooted in the state-trait theory of personality, Ecological Momentary Assessment (EMA), and the emerging field of digital phenotyping.</p>
<p>At its core, our research directly addresses the classic <bold>state-trait distinction</bold> (<xref ref-type="bibr" rid="B1">1</xref>). Our fundamental premise is that traditional scales, while effective at capturing stable &#x201C;traits,&#x201D; are insufficient for tracking the moment-to-moment &#x201C;states&#x201D; that constitute daily emotional life. By continuously integrating daily cognitive and behavioral data, our dynamic model is a deliberate attempt to create a &#x201C;state-sensitive&#x201D; measurement tool. The finding that our dynamic scores are more predictive of clinical ratings supports the idea that an accumulation of state-level data provides a more valid picture of an individual&#x2019;s current mental health status than a single trait-level snapshot.</p>
<p>The methodology itself is a direct application of <bold>Ecological Momentary Assessment (EMA)</bold>. By collecting high-frequency data in the students&#x2019; natural environment, we mitigate the recall bias inherent in retrospective questionnaires. This ecological validity is crucial for understanding the real-world triggers and fluctuations of anxiety and depression. Furthermore, our work extends EMA by not just collecting data, but by using it to actively correct and inform psychometric scores in near real-time.</p>
<p>Finally, our study contributes to the burgeoning field of <bold>digital phenotyping</bold>. The ultimate vision, as articulated in literature on the future of psychometrics, is the use of multimodal data from daily life&#x2014;captured via smartphones and wearable devices&#x2014;to construct a comprehensive picture of mental well-being (<xref ref-type="bibr" rid="B11">11</xref>). Our WeChat mini-program serves as a direct, albeit early, implementation of this vision. By translating digital interactions (cognitive votes and behavioral check-ins) into psychometrically meaningful adjustments, we are building a data-driven &#x201C;digital phenotype&#x201D; of student mental health. The finding that this process may also be therapeutic aligns with theories of <bold>Self-Regulation</bold>, where the feedback from one&#x2019;s own data is a critical mechanism for behavioral change and goal attainment.</p>
</sec>
<sec id="s4c"><label>4.3</label><title>Understanding the model&#x2019;s performance over time</title>
<p>A particularly noteworthy and complex finding of this study was the observed decline in the predictive power of our dynamic model for the SCL-90 over time. While the model initially showed a clear advantage over the traditional scale in predicting clinical ratings, this superiority diminished by the third semester. We propose that this phenomenon is not attributable to a single cause, but rather to a multifactorial interplay of methodological, psychological, and contextual factors.</p>
<p>First, the possibility of a <bold>reactivity effect</bold> cannot be discounted. Over the course of one and a half years, it is plausible that students became &#x201C;assessment-savvy.&#x201D; They may have discerned the underlying patterns of the system, consciously or unconsciously adjusting their responses in cognitive voting and behavioral check-ins to present themselves in a more favorable light or to &#x201C;manage&#x201D; their profiles. This form of learned behavior would introduce noise into the ecological data, decoupling it from their authentic psychological state and thereby weakening the model&#x2019;s predictive accuracy.</p>
<p>Second, the decline may reflect the inherent <bold>limitations of our model in capturing the complexity of the SCL-90 construct and real-world life events</bold>. The SCL-90 measures a broad spectrum of psychopathological symptoms, which are often influenced by significant, discrete life events (e.g., academic failures, relationship breakdowns, the end of the COVID-19 lockdown). Our model, while effective at tracking the general ebb and flow of anxiety and depression, may lack the specificity to account for the impact of such major external shocks on more complex symptom dimensions like psychoticism or paranoia. The initial success of the model might have occurred during a period of relative environmental stability (i.e., the lockdown), while its later decline could coincide with a return to a more chaotic and unpredictable post-pandemic campus life, where a wider range of unmeasured confounding variables came into play.</p>
<p>Third, and perhaps counterintuitively, the model&#x2019;s declining predictive power might be a paradoxical signal of its <bold>success as an intervention</bold>. The continuous feedback and self-monitoring process may have genuinely enhanced students&#x2019; self-regulation and coping skills. As students became more psychologically resilient, their mental states may have stabilized and exhibited less variance. Consequently, their daily cognitive and behavioral patterns would become less predictive of pathology simply because there was less pathology to predict. In this view, the &#x201C;signal&#x201D; (i.e., symptom fluctuation) weakened, making it harder for the model to make accurate predictions against a baseline of improved mental health. Disentangling these three potential explanations presents a significant challenge and underscores a crucial direction for future research.</p>
</sec>
<sec id="s4d"><label>4.4</label><title>Limitations and future directions</title>
<p>Despite the promising findings, this study has several limitations that must be acknowledged. First, the research was conducted in the context of a unique historical event&#x2014;the COVID-19 pandemic and subsequent campus lockdown. This &#x201C;natural experiment&#x201D; setting, while providing a compelling rationale for the study, introduces significant <bold>confounding variables</bold>. The observed improvements in student mental health could be partially attributed to the cessation of the lockdown and a return to normal campus life (<italic>history effect</italic>), or to the natural process of student maturation over the one-and-a-half-year period (<italic>maturation effect</italic>).</p>
<p>Second, our study design lacks a <bold>randomized controlled trial (RCT)</bold> framework. The comparison group, while similar in academic background, was not randomly assigned, which introduces potential selection bias. Furthermore, the act of being continuously monitored and engaged might have induced a <bold>Hawthorne effect</bold>, where participants&#x2019; behavior changed simply because they were aware of being studied. While our results provide strong preliminary evidence, a future RCT with randomized allocation to either the dynamic assessment group or a control group (receiving only traditional assessments) would be necessary to definitively establish the causal effect of our intervention.</p>
<p>Third, our data collection, while ecologically valid, was limited to self-report (cognitive votes) and semi-objective data (counselor-logged behavioral check-ins). Future research should aim to integrate more <bold>objective, passive data streams</bold>, such as smartphone sensor data (e.g., screen time, mobility patterns) or wearable device metrics (e.g., sleep patterns, heart rate variability). This would create a more comprehensive digital phenotype, reduce the potential for reactivity effects, and further enhance the model&#x2019;s predictive power. Finally, the AI model itself, while effective, requires further refinement in its ability to interpret and adapt to complex, long-term psychological changes.</p>
</sec>
</sec>
<sec id="s5" sec-type="conclusions"><label>5</label><title>Conclusion</title>
<p>In conclusion, this study provides compelling evidence for the value of AI-driven dynamic psychological assessment as a means to correct and enhance traditional mental health scales. By integrating real-time behavioral and cognitive data, we developed a system that not only offers a more accurate and nuanced picture of university students&#x2019; mental health but also functions as a potential therapeutic tool through continuous, gamified feedback. Despite the inherent limitations of its quasi-experimental design, our research demonstrates a viable pathway towards a new paradigm of proactive, personalized, and continuous mental health care. This work underscores the immense potential of ethically harnessing digital technologies to move beyond static snapshots and create a more dynamic, responsive, and ultimately more effective system for supporting student well-being.</p>
</sec>
</body>
<back>
<sec id="s6" sec-type="data-availability"><title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec id="s7" sec-type="ethics-statement"><title>Ethics statement</title>
<p>The studies involving humans were approved by Ethics Committee of Inner Mongolia Autonomous Region People&#x2019;s Hospital. The studies were conducted in accordance with the local legislation and institutional requirements. The participants provided their written informed consent to participate in this study.</p>
</sec>
<sec id="s8" sec-type="author-contributions"><title>Author contributions</title>
<p>BT: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft. ZL: Writing &#x2013; review &#x0026; editing, Writing &#x2013; original draft, Resources, Validation, Project administration. XH: Conceptualization, Formal analysis, Project administration, Writing &#x2013; review &#x0026; editing. FY: Validation, Writing &#x2013; review &#x0026; editing, Conceptualization. LY: Writing &#x2013; review &#x0026; editing, Supervision, Project administration, Data curation. LG: Writing &#x2013; review &#x0026; editing, Investigation, Software.</p>
</sec>
<sec id="s10" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declare that no Generative AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s12" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Husserl</surname> <given-names>E</given-names></name></person-group>. <source>On the Phenomenology of the Consciousness of Internal Time (1893&#x2013;1917)</source>. <publisher-loc>Dordrecht</publisher-loc>: <publisher-name>Springer Science &#x0026; Business Media</publisher-name> (<year>1991</year>).</mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Han</surname> <given-names>SS</given-names></name> <name><surname>Zhang</surname> <given-names>YS</given-names></name> <name><surname>Zhu</surname> <given-names>W</given-names></name> <name><surname>Ye</surname> <given-names>YP</given-names></name> <name><surname>Li</surname> <given-names>YX</given-names></name> <name><surname>Meng</surname> <given-names>SQ</given-names></name></person-group>, et al. <article-title>Status and epidemiological characteristics of depression and anxiety among Chinese university students in 2023</article-title>. <source>BMC Public Health</source>. (<year>2025</year>) <volume>25</volume>:<fpage>1189</fpage>. <pub-id pub-id-type="doi">10.1186/s12889-025-22443-7</pub-id><pub-id pub-id-type="pmid">40155930</pub-id></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shao</surname> <given-names>R</given-names></name> <name><surname>He</surname> <given-names>P</given-names></name> <name><surname>Ling</surname> <given-names>B</given-names></name> <name><surname>Tan</surname> <given-names>L</given-names></name> <name><surname>Xu</surname> <given-names>L</given-names></name> <name><surname>Hou</surname> <given-names>Y</given-names></name></person-group>, et al. <article-title>Prevalence of depression and anxiety and correlations between depression, anxiety, family functioning, social support and coping styles among chinese medical students</article-title>. <source>BMC Psychol</source>. (<year>2020</year>) <volume>8</volume>:<fpage>38</fpage>. <pub-id pub-id-type="doi">10.1186/s40359-020-00402-8</pub-id><pub-id pub-id-type="pmid">32321593</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Boyle</surname> <given-names>GJ</given-names></name> <name><surname>Saklofske</surname> <given-names>DH</given-names></name> <name><surname>Matthews</surname> <given-names>G</given-names></name></person-group>, editors. <source>Measures of Personality and Social Psychological Constructs</source>. <publisher-loc>San Diego, CA</publisher-loc>: <publisher-name>Academic Press</publisher-name> (<year>2015</year>).</mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Linden</surname> <given-names>WJVD</given-names></name> <name><surname>Glas</surname> <given-names>GAW</given-names></name></person-group>. <source>Computerized Adaptive Testing: Theory and Practice</source>. <publisher-loc>Dordrecht</publisher-loc>: <publisher-name>Springer Science &#x0026; Business Media</publisher-name> (<year>2000</year>).</mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Hambleton</surname> <given-names>R</given-names></name></person-group>. <source>Psychometric Models, Test Designs and Item Types for the Next Generation of Educational and Psychological Tests</source>. <publisher-loc>Chichester</publisher-loc>: <publisher-name>John Wiley &#x0026; Sons</publisher-name> (<year>2008</year>). <comment>p. 77&#x2013;89</comment>.</mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Embretson</surname> <given-names>S</given-names></name> <name><surname>Yang</surname> <given-names>X</given-names></name></person-group>. <article-title>23 Automatic item generation and cognitive psychology</article-title>. <comment>In: Rao C, Sinharay S, editors. <italic>Psychometrics</italic>. Amsterdam: Elsevier (2006). Handbook of Statistics; vol. 26. p. 747&#x2013;68</comment>.</mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kaplan</surname> <given-names>AM</given-names></name> <name><surname>Haenlein</surname> <given-names>M</given-names></name></person-group>. <article-title>Users of the world, unite! the challenges and opportunities of social media</article-title>. <source>Bus Horiz</source>. (<year>2010</year>) <volume>53</volume>:<fpage>59</fpage>&#x2013;<lpage>68</lpage>. <pub-id pub-id-type="doi">10.1016/j.bushor.2009.09.003</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Russell</surname> <given-names>SJ</given-names></name> <name><surname>Norvig</surname> <given-names>P</given-names></name></person-group>. <source>Artificial Intelligence: A Modern Approach</source>. <publisher-loc>Englewood Cliffs, NJ</publisher-loc>: <publisher-name>Pearson</publisher-name> (<year>2016</year>).</mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kjell</surname> <given-names>ON</given-names></name> <name><surname>Kjell</surname> <given-names>K</given-names></name> <name><surname>Schwartz</surname> <given-names>HA</given-names></name></person-group>. <article-title>Beyond rating scales: with targeted evaluation, large language models are poised for psychological assessment</article-title>. <source>Psychiatry Res</source>. (<year>2024</year>) <volume>333</volume>:<fpage>115667</fpage>. <pub-id pub-id-type="doi">10.1016/j.psychres.2023.115667</pub-id><pub-id pub-id-type="pmid">38290286</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moura</surname> <given-names>I</given-names></name> <name><surname>Teles</surname> <given-names>A</given-names></name> <name><surname>Viana</surname> <given-names>D</given-names></name> <name><surname>Marques</surname> <given-names>J</given-names></name> <name><surname>Coutinho</surname> <given-names>L</given-names></name> <name><surname>Silva</surname> <given-names>F</given-names></name></person-group>. <article-title>Digital phenotyping of mental health using multimodal sensing of multiple situations of interest: a systematic literature review</article-title>. <source>J Biomed Inform</source>. (<year>2023</year>) <volume>138</volume>:<fpage>104278</fpage>. <pub-id pub-id-type="doi">10.1016/j.jbi.2022.104278</pub-id><pub-id pub-id-type="pmid">36586498</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stein</surname> <given-names>DJ</given-names></name> <name><surname>Shoptaw</surname> <given-names>SJ</given-names></name> <name><surname>Vigo</surname> <given-names>DV</given-names></name> <name><surname>Lund</surname> <given-names>C</given-names></name> <name><surname>Cuijpers</surname> <given-names>P</given-names></name> <name><surname>Bantjes</surname> <given-names>J</given-names></name></person-group>, et al. <article-title>Psychiatric diagnosis and treatment in the 21st century: paradigm shifts versus incremental integration</article-title>. <source>World Psychiatry</source>. (<year>2022</year>) <volume>21</volume>:<fpage>393</fpage>&#x2013;<lpage>414</lpage>. <pub-id pub-id-type="doi">10.1002/wps.20998</pub-id><pub-id pub-id-type="pmid">36073709</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bone</surname> <given-names>C</given-names></name> <name><surname>Simmonds-Buckley</surname> <given-names>M</given-names></name> <name><surname>Thwaites</surname> <given-names>R</given-names></name> <name><surname>Sandford</surname> <given-names>D</given-names></name> <name><surname>Merzhvynska</surname> <given-names>M</given-names></name> <name><surname>Rubel</surname> <given-names>J</given-names></name></person-group>, et al. <article-title>Dynamic prediction of psychological treatment outcomes: development and validation of a prediction model using routinely collected symptom data</article-title>. <source>Lancet Digit Health</source>. (<year>2021</year>) <volume>3</volume>:<fpage>e231</fpage>&#x2013;<lpage>40</lpage>. <pub-id pub-id-type="doi">10.1016/S2589-7500(21)00018-2</pub-id><pub-id pub-id-type="pmid">33766287</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stieger</surname> <given-names>M</given-names></name> <name><surname>Wepfer</surname> <given-names>S</given-names></name> <name><surname>R&#x00FC;egger</surname> <given-names>D</given-names></name> <name><surname>Kowatsch</surname> <given-names>T</given-names></name> <name><surname>Roberts</surname> <given-names>BW</given-names></name> <name><surname>Allemand</surname> <given-names>M</given-names></name></person-group>. <article-title>Becoming more conscientious or more open to experience? Effects of a two&#x2013;week smartphone&#x2013;based intervention for personality change</article-title>. <source>Eur J Pers</source>. (<year>2020</year>) <volume>34</volume>:<fpage>345</fpage>&#x2013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1002/per.2267</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Matz</surname> <given-names>SC</given-names></name> <name><surname>Beck</surname> <given-names>ED</given-names></name> <name><surname>Atherton</surname> <given-names>OE</given-names></name> <name><surname>White</surname> <given-names>M</given-names></name> <name><surname>Rauthmann</surname> <given-names>JF</given-names></name> <name><surname>Mroczek</surname> <given-names>DK</given-names></name></person-group>, et al. <article-title>Personality science in the digital age: the promises and challenges of psychological targeting for personalized behavior-change interventions at scale</article-title>. <source>Perspect Psychol Sci</source>. (<year>2024</year>) <volume>19</volume>:<fpage>1031</fpage>&#x2013;<lpage>56</lpage>. <pub-id pub-id-type="doi">10.1177/17456916231191774</pub-id><pub-id pub-id-type="pmid">37642145</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Stieger</surname> <given-names>M</given-names></name> <name><surname>Allemand</surname> <given-names>M</given-names></name> <name><surname>Roberts</surname> <given-names>BW</given-names></name> <name><surname>Davis</surname> <given-names>JP</given-names></name></person-group>. <article-title>Mindful of personality trait change: are treatment effects on personality trait change ephemeral and attributable to changes in states?</article-title> <source>J Pers</source>. (<year>2022</year>) <volume>90</volume>:<fpage>375</fpage>&#x2013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.1111/jopy.12672</pub-id><pub-id pub-id-type="pmid">34486730</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wrzus</surname> <given-names>C</given-names></name> <name><surname>Neubauer</surname> <given-names>AB</given-names></name></person-group>. <article-title>Ecological momentary assessment: a meta-analysis on designs, samples, and compliance across research fields</article-title>. <source>Assessment</source>. (<year>2023</year>) <volume>30</volume>:<fpage>825</fpage>&#x2013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1177/10731911211067538</pub-id><pub-id pub-id-type="pmid">35016567</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cox</surname> <given-names>WM</given-names></name> <name><surname>Klinger</surname> <given-names>E</given-names></name></person-group>. <article-title>Assessing current concerns and goals idiographically: a review of the motivational structure questionnaire family of instruments</article-title>. <source>J Clin Psychol</source>. (<year>2023</year>) <volume>79</volume>:<fpage>667</fpage>&#x2013;<lpage>82</lpage>. <pub-id pub-id-type="doi">10.1002/jclp.23256</pub-id><pub-id pub-id-type="pmid">34599846</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wright</surname> <given-names>AG</given-names></name> <name><surname>Zimmermann</surname> <given-names>J</given-names></name></person-group>. <article-title>Applied ambulatory assessment: integrating idiographic and nomothetic principles of measurement</article-title>. <source>Psychol Assess</source>. (<year>2019</year>) <volume>31</volume>:<fpage>1467</fpage>&#x2013;<lpage>80</lpage>. <pub-id pub-id-type="doi">10.1037/pas0000685</pub-id><pub-id pub-id-type="pmid">30896209</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fonseca-Pedrero</surname> <given-names>E</given-names></name> <name><surname>R&#x00F3;denas-Perea</surname> <given-names>G</given-names></name> <name><surname>P&#x00E9;rez-Alb&#x00E9;niz</surname> <given-names>A</given-names></name> <name><surname>Al-Halab&#x00ED;</surname> <given-names>S</given-names></name> <name><surname>P&#x00E9;rez</surname> <given-names>M</given-names></name> <name><surname>Mu&#x00F1;iz</surname> <given-names>J</given-names></name></person-group>. <article-title>La hora de la evaluaci&#x00F3;n ambulatoria</article-title>. <source>Papeles Psic&#x00F3;l</source>. (<year>2022</year>) <volume>43</volume>:<fpage>21</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.23923/pap.psicol.2983</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Z</given-names></name></person-group>. <article-title>The symptom checklist-90 (scl-90) in China</article-title>. <source>Chin Ment Health J</source>. (<year>1984</year>) <volume>1</volume>:<fpage>68</fpage>&#x2013;<lpage>70</lpage>.</mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>J</given-names></name> <name><surname>Zhang</surname> <given-names>X</given-names></name></person-group>. <article-title>Study on the reliability and validity of the Chinese version of the self-rating anxiety scale</article-title>. <source>Chin J Health Psychol</source>. (<year>2012</year>) <volume>20</volume>:<fpage>1723</fpage>&#x2013;<lpage>5</lpage>.</mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lee</surname> <given-names>H</given-names></name> <name><surname>Chiu</surname> <given-names>H</given-names></name> <name><surname>Wing</surname> <given-names>Y</given-names></name> <name><surname>Leung</surname> <given-names>C</given-names></name> <name><surname>Kwong</surname> <given-names>P</given-names></name> <name><surname>Chung</surname> <given-names>D</given-names></name></person-group>. <article-title>The zung self-rating depression scale: screening for depression among the Hong Kong Chinese elderly</article-title>. <source>J Geriatr Psychiatry Neurol</source>. (<year>1994</year>) <volume>7</volume>:<fpage>216</fpage>&#x2013;<lpage>20</lpage>. <pub-id pub-id-type="doi">10.1177/089198879400700404</pub-id><pub-id pub-id-type="pmid">7826489</pub-id></mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>W</given-names></name> <name><surname>Wu</surname> <given-names>Q</given-names></name> <name><surname>Luo</surname> <given-names>C</given-names></name></person-group>. <article-title>Reliability and validity of the chinese version of the beck depression inventory-II among depression outpatients</article-title>. <source>Chin Ment Health J</source>. (<year>2015</year>) <volume>29</volume>:<fpage>44</fpage>&#x2013;<lpage>9</lpage>.</mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jin</surname> <given-names>H</given-names></name> <name><surname>Wu</surname> <given-names>W</given-names></name> <name><surname>Zhang</surname> <given-names>M</given-names></name></person-group>. <article-title>Preliminary analysis of scl-90 assessment results in normal Chinese</article-title>. <source>Chin J Nervous Ment Dis</source>. (<year>1986</year>) <volume>12</volume>:<fpage>260</fpage>&#x2013;<lpage>3</lpage>.</mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Gong</surname> <given-names>Y</given-names></name></person-group>. <source>Manual for Eysenck Personality Questionnaire (Revised Chinese version)</source>. <publisher-loc>Changsha</publisher-loc>: <publisher-name>Hunan Medical College Map Press</publisher-name> (<year>1986</year>).</mixed-citation></ref>
<ref id="B27"><label>27.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Qian</surname> <given-names>M</given-names></name> <name><surname>Wu</surname> <given-names>G</given-names></name> <name><surname>Zhu</surname> <given-names>R</given-names></name> <name><surname>Zhang</surname> <given-names>S</given-names></name></person-group>. <article-title>Development of the revised eysenck personality questionnaire short scale for chinese (EPQ-RSC)</article-title>. <source>Acta Psychol Sin</source>. (<year>2000</year>) <volume>32</volume>:<fpage>317</fpage>&#x2013;<lpage>23</lpage>.</mixed-citation></ref>
<ref id="B28"><label>28.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dai</surname> <given-names>X</given-names></name> <name><surname>Wu</surname> <given-names>Y</given-names></name></person-group>. <article-title>A study on the revision of 16pf in China</article-title>. <source>Psychol Sci</source>. (<year>1988</year>) <volume>11</volume>:<fpage>26</fpage>&#x2013;<lpage>32</lpage>.</mixed-citation></ref>
<ref id="B29"><label>29.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>J</given-names></name> <name><surname>Yu</surname> <given-names>L</given-names></name></person-group>. <article-title>A study on the norms of 16pf for Chinese college students</article-title>. <source>Chin J Appl Psychol</source>. (<year>1998</year>) <volume>4</volume>:<fpage>22</fpage>&#x2013;<lpage>6</lpage>.</mixed-citation></ref>
<ref id="B30"><label>30.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>J</given-names></name> <name><surname>Miao</surname> <given-names>D</given-names></name></person-group>. <article-title>The reliability and validity of the Chinese version of the myers-briggs type indicator</article-title>. <source>Chin J Clin Psychol</source>. (<year>2007</year>) <volume>15</volume>:<fpage>240</fpage>&#x2013;<lpage>2</lpage>.</mixed-citation></ref>
<ref id="B31"><label>31.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gegenfurtner</surname> <given-names>A</given-names></name></person-group>. <article-title>Bifactor exploratory structural equation modeling: a meta-analytic review of model fit</article-title>. <source>Front Psychol</source>. (<year>2022</year>) <volume>13</volume>:<fpage>1037111</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2022.1037111</pub-id><pub-id pub-id-type="pmid">36389589</pub-id></mixed-citation></ref>
<ref id="B32"><label>32.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>W</given-names></name> <name><surname>Wang</surname> <given-names>C</given-names></name> <name><surname>Xiao</surname> <given-names>J</given-names></name></person-group>. <article-title>A testlet diagnostic classification model with attribute hierarchies</article-title>. <source>Appl Psychol Meas</source>. (<year>2023</year>) <volume>47</volume>:<fpage>183</fpage>&#x2013;<lpage>99</lpage>. <pub-id pub-id-type="doi">10.1177/01466216231165315</pub-id><pub-id pub-id-type="pmid">37113526</pub-id></mixed-citation></ref>
<ref id="B33"><label>33.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sellbom</surname> <given-names>M</given-names></name> <name><surname>Tellegen</surname> <given-names>A</given-names></name></person-group>. <article-title>Factor analysis in psychological assessment research: common pitfalls and recommendations</article-title>. <source>Psychol Assess</source>. (<year>2019</year>) <volume>31</volume>:<fpage>1428</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1037/pas0000623</pub-id><pub-id pub-id-type="pmid">31120298</pub-id></mixed-citation></ref>
<ref id="B34"><label>34.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schuurman</surname> <given-names>NK</given-names></name> <name><surname>Hamaker</surname> <given-names>EL</given-names></name></person-group>. <article-title>Measurement error and person-specific reliability in multilevel autoregressive modeling</article-title>. <source>Psychol Methods</source>. (<year>2019</year>) <volume>24</volume>:<fpage>70</fpage>&#x2013;<lpage>91</lpage>. <pub-id pub-id-type="doi">10.1037/met0000188</pub-id><pub-id pub-id-type="pmid">30188157</pub-id></mixed-citation></ref>
<ref id="B35"><label>35.</label><mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Cervone</surname> <given-names>D</given-names></name> <name><surname>Pervin</surname> <given-names>LA</given-names></name></person-group>. <source>Personality: Theory and Research</source>. <publisher-loc>Hoboken, NJ</publisher-loc>: <publisher-name>John Wiley &#x0026; Sons</publisher-name> (<year>2022</year>).</mixed-citation></ref>
<ref id="B36"><label>36.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Beck</surname> <given-names>ED</given-names></name> <name><surname>Jackson</surname> <given-names>JJ</given-names></name></person-group>. <article-title>A mega-analysis of personality prediction: Robustness and boundary conditions</article-title>. <source>J Pers Soc Psychol</source>. (<year>2022</year>) <volume>122</volume>:<fpage>523</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1037/pspp0000386</pub-id><pub-id pub-id-type="pmid">35157487</pub-id></mixed-citation></ref>
<ref id="B37"><label>37.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Russell</surname> <given-names>MA</given-names></name> <name><surname>Gajos</surname> <given-names>JM</given-names></name></person-group>. <article-title>Annual research review: ecological momentary assessment studies in child psychology and psychiatry</article-title>. <source>J Child Psychol Psychiatry</source>. (<year>2020</year>) <volume>61</volume>:<fpage>376</fpage>&#x2013;<lpage>94</lpage>. <pub-id pub-id-type="doi">10.1111/jcpp.13204</pub-id><pub-id pub-id-type="pmid">31997358</pub-id></mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/776153/overview">Fabrizio Stasolla</ext-link>, Giustino Fortunato University, Italy</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1176782/overview">Jonathan Mart&#x00ED;nez-L&#x00ED;bano</ext-link>, Universidad Andr&#x00E9;s Bello, Chile</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2144107/overview">Parimita Roy</ext-link>, Thapar Institute of Engineering and Technology (Deemed to be University), India</p></fn>
</fn-group>
<fn-group>
<fn fn-type="abbr" id="abbrev1"><label>Abbreviations:</label><p>AI, artificial intelligence; AIG, automatic item generation; AUC, area under the curve; CAT, computerized adaptive testing; Cohen&#x2019;s <italic>d</italic>, Cohen&#x2019;s <italic>d</italic> (effect size measure); EMA, ecological momentary assessment; EPQ, eysenck personality questionnaire; HAM-A, hamilton anxiety rating scale; HAM-D, hamilton depression rating scale; ICT, information and communication technology; IRT, item response theory; LLM, large language model; MANOVA, multivariate analysis of variance; MBTI, Myers-Briggs type indicator; MSE, mean squared error; NLP, natural language processing; <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM56"><mml:msup><mml:mi>R</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, <italic>R</italic>-squared (coefficient of determination); RAG, retrieval augmented generation; SAS, self-rating anxiety scale; SCL-90, symptom checklist-90; SDS, self-rating depression scale; <italic>t</italic>-test, <italic>t</italic>-test (statistical hypothesis test); VAR, vector autoregression; 16PF, sixteen personality factor questionnaire.</p></fn>
</fn-group>
</back>
</article>