<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Digit. Health</journal-id><journal-title-group>
<journal-title>Frontiers in Digital Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Digit. Health</abbrev-journal-title></journal-title-group>
<issn pub-type="epub">2673-253X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdgth.2025.1621271</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>MATRIX: Mental heAlth diagnostics Through Real time Intelligent unified X-AI attribution reasoning</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes"><name><surname>Ramnani</surname><given-names>Sweety</given-names></name><xref ref-type="aff" rid="aff1"/>
<xref ref-type="corresp" rid="cor1">&#x002A;</xref><uri xlink:href="https://loop.frontiersin.org/people/3052693/overview"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Roy</surname><given-names>Kaushik</given-names></name><xref ref-type="aff" rid="aff1"/><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
<contrib contrib-type="author"><name><surname>Sheth</surname><given-names>Amit</given-names></name><xref ref-type="aff" rid="aff1"/><uri xlink:href="https://loop.frontiersin.org/people/1477293/overview" /><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role><role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x0026; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x0026; editing</role></contrib>
</contrib-group>
<aff id="aff1"><institution>Computer Science Department, Artificial Intelligence Institute, University of South Carolina</institution>, <city>Columbia</city>, <state>SC</state>, <country country="us">United States</country></aff>
<author-notes>
<corresp id="cor1"><label>&#x002A;</label><bold>Correspondence:</bold> Sweety Ramnani <email xlink:href="mailto:sramnani@email.sc.edu">sramnani@email.sc.edu</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-13"><day>13</day><month>02</month><year>2026</year></pub-date>
<pub-date publication-format="electronic" date-type="collection"><year>2025</year></pub-date>
<volume>7</volume><elocation-id>1621271</elocation-id>
<history>
<date date-type="received"><day>30</day><month>04</month><year>2025</year></date>
<date date-type="rev-recd"><day>16</day><month>11</month><year>2025</year></date>
<date date-type="accepted"><day>08</day><month>12</month><year>2025</year></date>
</history>
<permissions>
<copyright-statement>&#x00A9; 2026 Ramnani, Roy and Sheth.</copyright-statement>
<copyright-year>2026</copyright-year><copyright-holder>Ramnani, Roy and Sheth</copyright-holder><license><ali:license_ref start_date="2026-02-13">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p></license>
</permissions>
<abstract>
<p>Escalating prevalence of mental health issues worldwide has created an unprecedented demand for mental healthcare services, yet the shortage of qualified practitioners limits accessibility for countless individuals in need. AI has emerged as a potential solution to support mental health professionals, offering assistance that goes beyond simple diagnostic aid. This research introduces a novel AI-powered real-time diagnostic support system&#x2014;MATRIX&#x2014;for mental healthcare diagnostics designed to interact with users using natural language and utilizing the Patient Health Questionnaire-9 (PHQ-9), a standardized clinical tool for assessing depressive symptoms. The system classifies the interaction into a well-defined checklist and generates the most likely diagnosis through a framework termed X-AI Attribution Reasoning, which provides explainable and attributable diagnostic logic for interdisciplinary clarity. Unlike existing diagnostic support systems that primarily rely on static scoring or predefined rule sets, MATRIX integrates explainable AI (XAI) principles to deliver interpretable reasoning pathways that clinicians can trace and validate. The PHQ-9 implementation within MATRIX has been tested in controlled clinical simulations, confirming its usability and alignment with real-world assessment practices. The system not only accelerates the diagnostic process but also provides transparent explanations, detailed reasoning for such diagnoses, and clinically relevant attributions linked to standard SNOMED Concept IDs, which can be directly utilized by clinicians for documentation, referrals, and electronic health record integration while maintaining data privacy. By offering this level of insight, the system fosters a trustworthy AI-human collaboration that aids clinicians in understanding and validating each diagnostic recommendation. Interpretability within MATRIX is achieved through visual attribution maps and narrative output summaries, ensuring that decision processes remain both transparent and clinically meaningful. The integration of these features enables practitioners to focus on patient care with the assurance that AI-assisted diagnostics align with clinical standards, resulting in reduced time spent per patient and enhanced patient throughput. Our preliminary findings indicate that MATRIX achieves over 89&#x0025; classification accuracy and high clinician satisfaction in pilot evaluations, demonstrating that AI-driven support systems with explainable, reasonable, and attributable real-time diagnostics can significantly enhance the capacity of mental health services and improve access to timely and effective care for those affected by mental health conditions. This study highlights the essential role of AI in enhancing both the efficiency and trustworthiness of mental health diagnostics in clinical settings, making a compelling case for the integration of AI into modern mental healthcare.</p>
</abstract>
<kwd-group>
<kwd>mental health</kwd>
<kwd>real-time diagnostics</kwd>
<kwd>attribution</kwd>
<kwd>explainable AI</kwd>
<kwd>PHQ-9</kwd>
<kwd>SNOMED-CT</kwd>
</kwd-group><funding-group><funding-statement>The author(s) declared that financial support was not received for this work and/or its publication.</funding-statement></funding-group><counts>
<fig-count count="5"/>
<table-count count="3"/><equation-count count="18"/><ref-count count="26"/><page-count count="11"/><word-count count="0"/></counts><custom-meta-group><custom-meta><meta-name>section-at-acceptance</meta-name><meta-value>Digital Mental Health</meta-value></custom-meta></custom-meta-group>
</article-meta>
</front>
<body><sec id="s1" sec-type="intro"><label>1</label><title>Introduction</title>
<p>Mental health providers face unprecedented labor shortages, with many agencies struggling to fill positions (<xref ref-type="bibr" rid="B1">1</xref>). According to new data released by the World Health Organization (WHO), more than 1 billion people are living with mental health disorders, with conditions such as anxiety and depression inflicting immense human and economic tolls. The global median number of mental health workers stands at 13 per 100,000 people, with extreme shortages in low- and middle-income countries (<xref ref-type="bibr" rid="B2">2</xref>). In the United States alone, nearly 96 unmet needs for prescribers, indicating a critical shortage of available professionals whereas in Mexico, only 1.1 psychiatrists per 100,000 population exist in the public sector, exacerbating the lack of adequate care for mental health disorders (<xref ref-type="bibr" rid="B3">3</xref>). This global treatment gap underscores that the shortage of mental health professionals is not limited to North America but represents a critical international public health concern. The shortage of mental healthcare providers significantly hampers the quality and accessibility of mental health services. This crisis is characterized by high vacancy rates, increased turnover, and an inadequate distribution of professionals, leading to the delayed or cancelation of patient appointments and even the closure of wards in various regions (<xref ref-type="bibr" rid="B1">1</xref>). A severe shortage of mental health professionals, coupled with barriers like stigma, resource constraints, and geographical limitations, creates a considerable gap in the provision of effective support.</p>
<p>With the rise of AI, new opportunities have emerged to transform mental healthcare, offering innovative approaches that can bridge this gap (<xref ref-type="bibr" rid="B4">4</xref>). AI-powered chatbots and virtual assistants represent a significant breakthrough, providing accessible, scalable, and cost-effective alternatives to traditional mental health support. It is important to distinguish, however, between AI chatbots that provide conversational support and AI-based diagnostic tools that analyze clinical data for symptom detection and prediction. While chatbots primarily facilitate self-help and engagement, diagnostic systems aim to assist clinicians in making data-driven assessments, requiring higher levels of accuracy and clinical validation. These AI tools utilize natural language processing (NLP) and machine learning (ML) to simulate therapeutic conversations, helping users engage in a more personalized and supportive experience. AI-powered diagnostic systems, on the other hand, analyze multimodal clinical data&#x2014;such as patient narratives, behavioral markers, and physiological parameters&#x2014;to identify symptom patterns and support clinical decision-making. AI-powered mental health diagnostics systems offer promising solutions to improve mental health diagnostics, as these systems can assist by analyzing vast amounts of data to improve predictions by differentiating between similar symptoms, aiding therapists in accurate diagnoses, and improving patient treatment outcomes (<xref ref-type="bibr" rid="B5">5</xref>). This capability facilitates the early detection of mental health conditions and enables timely interventions, ultimately leading to improved patient outcomes while enhancing monitoring capabilities (<xref ref-type="bibr" rid="B6">6</xref>).</p>
<p>However, the use of AI in mental health diagnostics is not without its challenges. Major challenges include clinicians&#x2019; trust, ethical concerns, and the opaque &#x201C;black box&#x201D; nature of AI. Explainable artificial intelligence (X-AI) addresses these issues to a certain extent by demystifying AI decision-making, allowing clinicians to understand the reasoning behind system-generated diagnoses, which helps foster trust in the technology (<xref ref-type="bibr" rid="B4">4</xref>). By focusing on X-AI-driven transparency, such system can not only build trust but also make mental health resources more accessible to underserved populations by providing 24/7 support and overcoming geographical barriers and stigma associated with mental health treatments (<xref ref-type="bibr" rid="B7">7</xref>).</p>
<p>Furthermore,it is noted that the validation of AI models is essential to ensure their performance aligns with real-world clinical environments, as accuracy metrics may not fully capture their effectiveness (<xref ref-type="bibr" rid="B8">8</xref>). The lack of standardized clinical attribution across various diagnostic models makes it more challenging to integrate with existing healthcare diagnostic systems. This research presents a novel, systematic, clinically relevant approach that unifies the power of ML with human-interpretable, AI-generated explanations and reasoning capabilities, clinically grounded through Systematized Nomenclature of Medicine&#x2014;Clinical Terms (SNOMED CT) attribution. SNOMED CT provides a globally recognized, standardized vocabulary that enhances interpretability and ensures interoperability across healthcare systems, thereby facilitating the seamless exchange of clinically meaningful data. Integrating SNOMED CT (<xref ref-type="bibr" rid="B9">9</xref>) for structured, evidence-based attribution can further enhance diagnostic accuracy and contextual relevance. By adding a layer of clinically relevant attribution to the existing X-AI layer, the system could improve the clinical reliability of AI-assisted mental health diagnostic interventions and encourage broader acceptance within existing healthcare diagnostic systems.</p>
</sec>
<sec id="s2"><label>2</label><title>Literature review</title>
<p>The integration of AI within healthcare systems has been widely advocated for its potential to enhance personalization, contextual adaptation, and scalability in mental health interventions (<xref ref-type="bibr" rid="B10">10</xref>). AI-powered chatbots and automated systems can reduce clinician workload, streamline routine processes, and extend access to mental healthcare without incurring proportional increases in cost. These benefits have positioned AI as a promising tool for broad-reaching support in mental health services. However, these advantages must be balanced against ongoing challenges including data privacy concerns, algorithmic bias, a lack of transparency, and the need for rigorous clinical validation (<xref ref-type="bibr" rid="B7">7</xref>, <xref ref-type="bibr" rid="B11">11</xref>).</p>
<p>To address transparency and trust concerns, X-AI techniques such as Local Interpretable Model-Agnostic Explanations (LIME)&#x2014;which approximates a complex model locally to explain individual predictions&#x2014;and SHapley Additive exPlanations (SHAP)&#x2014;which assigns each feature an importance value based on cooperative game theory&#x2014;have been introduced (<xref ref-type="bibr" rid="B12">12</xref>). These tools help clinicians understand the features driving AI predictions, thereby enhancing diagnostic precision and improving resource allocation (<xref ref-type="bibr" rid="B13">13</xref>). Their adoption is essential in mitigating risks associated with opaque decision-making and potential misdiagnosis, ultimately supporting patient welfare (<xref ref-type="bibr" rid="B14">14</xref>). Techniques such as LIME and SHAP, as highlighted in recent studies, highlight the strengths (e.g., SHAP&#x2019;s consistency) and limitations (e.g., LIME&#x2019;s instability), emphasizing the need for careful selection of explanation methods depending on the clinical context. As AI systems become increasingly pervasive in mental healthcare, ethical and regulatory considerations gain critical importance (<xref ref-type="table" rid="A1">Algorithm</xref>). Clinical validation ensures that AI-driven diagnostics meet standards of transparency, safety, and accuracy, while proper clinical attribution enhances trust between clinicians and patients (<xref ref-type="bibr" rid="B11">11</xref>). Attribution methods&#x2014;used to assess feature importance and understand model decisions&#x2014;are crucial for mitigating algorithmic bias and ensuring equitable care for diverse populations (<xref ref-type="bibr" rid="B15">15</xref>). Clear attribution frameworks also strengthen the ethical foundation of AI-based mental health diagnostics by improving transparency and accountability (<xref ref-type="bibr" rid="B16">16</xref>, <xref ref-type="bibr" rid="B17">17</xref>). SNOMED CT plays an increasingly central role in advancing the reliability, interoperability, and clinical relevance of AI systems. As a comprehensive, standardized clinical terminology, SNOMED CT enhances the alignment of AI-generated insights with globally recognized medical standards, improving both accuracy and trust (<xref ref-type="bibr" rid="B18">18</xref>). The linkage between SNOMED CT and model outputs can occur through both <italic>post hoc</italic> mapping, where predicted diagnostic terms are translated into SNOMED concepts, and integration during model training, where structured terminology is embedded directly into the model to guide representation learning and terminology-aware predictions. This dual integration pathway enhances the interpretability and utility of explanation tools such as LIME and SHAP. Recent empirical studies demonstrate that embedding SNOMED CT into AI pipelines improves fairness by reducing terminology-related bias and enhances diagnostic consistency across healthcare settings (<xref ref-type="bibr" rid="B19">19</xref>). Case studies have shown that SNOMED CT-driven models yield improved clinical decision support outcomes, particularly in mental health contexts where linguistic ambiguity often complicates diagnosis. Furthermore, the structured representation of patient symptoms supports personalized and context-specific treatment planning while enabling scalable, interoperable, and clinically meaningful AI applications. Thus, SNOMED CT-based clinical attribution provides a robust foundation for trustworthy X-AI, addressing key challenges related to transparency, fairness, and ethical compliance in mental health diagnostics.</p>
</sec>
<sec id="s3"><label>3</label><title>Demystifying PHQ-9 and SNOMED CT</title>
<sec id="s3a"><label>3.1</label><title>Patient health questionnaire</title>
<p>PHQ-9 is a widely used, standardized self-administered tool for assessing the severity of depression. It consists of nine questions based on the diagnostic criteria for major depressive disorder outlined in the Diagnostic and Statistical Manual of Mental Disorders (DSM) (<xref ref-type="bibr" rid="B20">20</xref>). Each question asks the respondent to rate the frequency of specific depressive symptoms over the last 2 weeks on a scale from 0 (not at all) to 3 (nearly every day). PHQ-9 scores range from 0 to 27, with higher scores indicating greater severity of depression. The major categories are as follows: 0&#x2013;4, minimal depression; 5&#x2013;9, mild depression; 10&#x2013;14, moderate depression; 15&#x2013;19, moderately severe depression; and 20&#x2013;27, severe depression. The tool is commonly used in clinical settings to screen for depression, monitor treatment progress, inform initial diagnostic decisions. The standardized format is shown in <xref ref-type="fig" rid="F1">Figure&#x00A0;1</xref>.</p>
<fig id="F1" position="float"><label>Figure&#x00A0;1</label>
<caption><p>Standardized PHQ-9 checklist. Reproduced from &#x201C;<ext-link ext-link-type="uri" xlink:href="https://www.phqscreeners.com/images/sites/g/files/g10060481/f/201412/PHQ-9_English.pdf">Patient Health Questionnaire (PHQ) Screeners</ext-link>&#x201D; by Drs. Robert L. Spitzer, Janet B.W. Williams, Kurt Kroenke and colleagues.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1621271-g001.tif"><alt-text content-type="machine-generated">Patient Health Questionnaire (PHQ-9) for assessing mental health. Lists nine problems to evaluate over the past two weeks, scored from zero (not at all) to three (nearly every day). Includes columns for adding the scores and a total score section.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3b"><label>3.2</label><title>SNOMED CT</title>
<p>SNOMED CT is the world&#x2019;s most comprehensive, multilingual clinical terminology system (<xref ref-type="bibr" rid="B21">21</xref>). It contains over 300,000 concepts with associated terms, synonyms, attributes, and hierarchical relationships. SNOMED CT is a designated U.S. standard for electronic health information exchange and is essential for accurately capturing patient problems, medical histories, and clinical documentation in electronic health records (EHRs).</p>
<p>SNOMED CT supports semantic interoperability by standardizing how clinical information is represented and exchanged across healthcare systems. Each concept in SNOMED CT (<xref ref-type="bibr" rid="B9">9</xref>) corresponds to a unique clinical meaning and is referenced using a machine-readable numeric concept identifier that is not inherently interpretable by clinicians. For example, the concept &#x201C;Depressed mood (finding)&#x201D; is represented by the concept ID 366979004, providing an unambiguous reference for documentation, analysis, and integration into computational systems. <xref ref-type="fig" rid="F2">Figure&#x00A0;2</xref> illustrates the hierarchical structure through which SNOMED CT organizes clinical concepts into expressive and computable relationships. Together, PHQ-9 and SNOMED CT provide the clinical foundation of our system: PHQ-9 supplies structured symptom data, while SNOMED CT ensures standardized terminology and semantic consistency for diagnostic interpretation.</p>
<fig id="F2" position="float"><label>Figure&#x00A0;2</label>
<caption><p>Standardized SNOMED CT.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1621271-g002.tif"><alt-text content-type="machine-generated">Diagram of SNOMED CT design. It includes components like Concepts, Hierarchies, Attributes, Identifiers, Descriptions, and Relationships. The hierarchy section lists categories such as Body Structure and Clinical Finding. An example trace follows \"Heart Failure\" through hierarchies with related terms like \"Weak Heart.\" It also illustrates relationships, showing how \"Arthropathy\" connects to \"Joint Finding\" and how attributes relate different hierarchies, e.g., \"Appendicitis\" to \"Inflammation.\"</alt-text>
</graphic>
</fig>
</sec>
</sec>
<sec id="s4"><label>4</label><title>Our approach</title>
<p>The MATRIX framework is depicted in <xref ref-type="fig" rid="F3">Figure&#x00A0;3</xref>. The algorithm underlying this tool leverages NLP and ML techniques to detect mental and health symptoms based on conversational responses with the respondent in real time. The algorithmic steps and specifications of various data resources are detailed in the following sections. User responses are parsed in real time through an NLP pipeline that identifies significant linguistic markers, extracting key features such as emotional cue indicators and patterns, which are then analyzed using the PHQ-9 SYMPTOM ONTOLOGY dataset (detailed in the next section) to enhance the precision of PHQ-9 questionnaire classification.</p>
<fig id="F3" position="float"><label>Figure&#x00A0;3</label>
<caption><p>MATRIX framework.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1621271-g003.tif"><alt-text content-type="machine-generated">Diagram showing a process titled \"MATRIX\" for diagnostic output. It includes user input leading to data preprocessing. The process involves natural language processing, systematized nomenclature of medicine clinical terms (SNOMED CT), patient health questionnaire (PHQ-9) checklist, and reasoning through explainable AI.</alt-text>
</graphic>
</fig>
<p>Users&#x2019; current state of mind shared in a natural language format, parsed through an NLP pipeline reaches the next stage where it is further analyzed&#x2014;guided by questions rooted in the standarized PHQ-9 questionnaire, screening depressive symptoms and assessing symptom severity(as per major categories described in the previous section) based on the input received from the NLP pipeline.</p>
<p>This severity score guides the rule-based internal system in generating the most likely diagnosis. To achieve an accurate diagnosis based on symptom severity scores, each depressive symptom feature is weighted and assessed by a five-layered (obtained through hyperparameter testing) neural network model trained on a vast dataset of the PRIMATE ontology, which contains widely annotated datasets. This system-generated diagnosis is then fed to a reasoning layer, which draws on clinical guidelines to understand why specific symptoms have been flagged and generates explanations with the use of language model, BERT model in this case, to ensure privacy of users&#x2019; data (more details are provided in the Results section), enhancing the system&#x2019;s interpretability and helping clinicians understand better the rationale behind specific diagnosis.</p>
<p>Further refining its diagnostic utility, our system attributes detected symptoms and diagnoses by fetching relevant SNOMED CT IDs in real time, which are recognized clinical codes used in EHRs worldwide. By dynamically associating symptoms with SNOMED CT identifiers, the algorithm ensures that each AI-based classification and diagnosis aligns with clinically accepted mental health diagnoses, providing a direct pathway from initial symptom detection to actionable diagnostic insights.</p>
<p>The MATRIX model (flowchart depicted in <xref ref-type="fig" rid="F4">Figure&#x00A0;4</xref>) dynamically evaluates user input in real time, mapping it onto a spectrum of symptom severity, generating the most likely diagnosis, and embedding a layer of reasoning that offers transparent explanations for the system&#x2019;s decision and attributing the diagnosis with a specific clinically relevant SNOMED concept identifier. Mental health practitioners can thus access these AI-generated recommendations, along with clinically relevant justifications, all in real time, enabling them to review, adjust, or supplement the AI suggestions seamlessly.</p>
<fig id="F4" position="float"><label>Figure&#x00A0;4</label>
<caption><p>MATRIX flowchart.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1621271-g004.tif"><alt-text content-type="machine-generated">Flowchart of a real-time mental health care diagnostic system. It consists of three sections: PHQ-9 classification, rule-based reasoning, and SNOMED CT attribution. Patient input undergoes natural language processing within the PHQ-9 classification to generate a checklist. System training uses a PHQ-9 classifier and PRIMATE dataset. Rule-based reasoning uses clinical rules to provide reasoning via explainable AI. SNOMED CT attribution utilizes a concept identifier for clinical attribution. The final output is an AI-generated clinical diagnostic, delivered to the clinician. The MATRIX interface connects each process.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s5"><label>5</label><title>Data resources</title>
<sec id="s5a"><label>5.1</label><title>PRIMATE dataset</title>
<p>This dataset consists of 2,000 publicly available Reddit posts, each describing users&#x2019; mental health experiences and containing binary annotations (&#x201C;yes&#x201D;/&#x201C;no&#x201D;) indicating whether the text reflects PHQ-9 symptoms (S1&#x2013;S9). The dataset predominantly includes posts from English-speaking users aged 18&#x2013;35 years, based on platform demographics reported in prior studies, although exact demographic identifiers are not included to maintain anonymity. As the dataset was sourced from existing public-domain Reddit posts, no additional informed consent was required; the original study complied with Reddit&#x2019;s terms of service, and ethical approval was obtained at the dataset creation stage (<xref ref-type="bibr" rid="B22">22</xref>). Annotations were contributed by five crowd-workers, with an interannotator agreement of 67&#x0025;. To strengthen reliability, mental health professionals conducted a secondary evaluation, reaching an agreement of 85&#x0025;. In addition, the original dataset reports Cohen&#x2019;s = 0.71 (95&#x0025; CI: 0.68&#x2013;0.74), indicating substantial interannotator agreement and reinforcing the quality of labeled PHQ-9 symptom data.</p>
</sec>
<sec id="s5b"><label>5.2</label><title>PHQ-9 symptom ontology</title>
<p>This ontology contains more than 500 dictionary words categorized based on similarity and semantic closeness to symptoms corresponding to each of the nine standard PHQ-9 questions (<xref ref-type="bibr" rid="B20">20</xref>). The ontology was compiled through a hybrid approach: expert consensus from licensed psychologists who reviewed symptom clusters, lexicon-based expansion using standard psychological terminology, and NLP-assisted validation, in which embeddings were used to verify semantic proximity between terms. This multistage construction process ensures clinical coherence and strong alignment between textual symptom cues and the nine PHQ-9 diagnostic categories. For illustration, the term &#x201C;loss of interest&#x201D; is mapped to PHQ-9 item 1 through similarity scoring and expert validation.</p>
</sec>
<sec id="s5c"><label>5.3</label><title>SNOMED CT database</title>
<p>The SNOMED CT lexicon includes over 50,000 clinical terms and their associated concept identifiers, as maintained by SNOMED International (UK). Each entry contains a unique concept ID and a case-significance flag for standardized usage across EHR systems (<xref ref-type="bibr" rid="B9">9</xref>). In this system, the SNOMED CT database is accessed locally rather than via API to avoid transmitting clinical text externally. This approach minimizes privacy risks and improves performance during real-time inference. &#x201C;Closest match&#x201D; retrieval is performed using a semantic similarity algorithm based on cosine similarity between term embeddings, supplemented with rule-based filters to ensure medically valid matches. For example, when the model identifies &#x201C;moderate depression,&#x201D; the reasoning output links PHQ-9 items (e.g., elevated scores in S1, S2, and S6) to the SNOMED concept &#x201C;Moderate major depression (ID: 370143000),&#x201D; along with an explanation of the contributing symptoms. Clinicians retain full authority to edit or override any system-generated SNOMED CT mappings; their feedback is logged and incorporated into subsequent fine-tuning cycles, supporting continuous improvement and clinical safety.</p>
</sec>
</sec>
<sec id="s6"><label>6</label><title>Methodology</title>
<table-wrap id="A1" position="float"><label>Algorithm</label>
<table frame="hsides" rules="groups">
<colgroup>
<col align="left"/>
</colgroup>
<tbody>
<tr>
<td valign="top" align="left">
<list list-type="simple">
<list-item>
<p>Initial system training: The system begins with initial training on the PHQ-9 PRIMATE dataset. It learns to identify relevant features from annotated data. Once trained, the system can create the PHQ-9 checklist by itself from user input.</p></list-item>
<list-item>
<p>NLP preprocessing component: Tokenization, stop word removal, and feature extraction techniques are applied to the raw input to create embeddings, which are further processed at the next stage.</p></list-item>
<list-item>
<p>Embedding component: Embeddings are dense, low-dimensional vector representations of words, sentences, and documents. These vectors capture semantic meanings and relationships between words or phrases. This component creates embeddings for input tokens. It is done basically to contextualize symptoms based on the user&#x2019;s input responses. It extracts key symptom indicators and sentiments that correlate with depression criteria.</p></list-item>
<list-item>
<p>PHQ9 checklist generation: The MATRIX system, pre-trained in step 1 to generate the PHQ-9 checklist, processes each question iteratively and determines the answer as &#x201C;Yes&#x201D; (if the symptom is present) or &#x201C;No&#x201D; (if the symptom is absent) based on the embeddings input. It leverages the PHQ-9 symptom ontology for reference.</p></list-item>
<list-item>
<p>PHQ-9 scoring module: In the next stage, embeddings for the clinical rules are created. The system takes those embeddings, along with the system-generated PHQ-9 checklist from the previous stage, and calculates the PHQ-9 severity score to determine depression severity. At this stage, the score is used to calculate diagnostic thresholds (e.g., mild, moderate, moderately severe, severe depression), and an appropriate diagnosis is indicated.</p></list-item>
<list-item>
<p>Reasoning component: After the system provides a diagnosis and a checklist, the capability of large language models is leveraged to provide the reasoning and generate explanations in natural language for clinicians&#x2019; aid.</p></list-item>
<list-item>
<p>Diagnostic clinical attribution: This stage maps the system-generated diagnosis with SNOMED Concept IDs to ensure standardization in clinical terminology, providing clinicians&#x2019; with clear attributions for each diagnosis. It refers to the SNOMED CT database to find the closest match and fetches the concept identifier in real time.</p></list-item>
<list-item>
<p>Explainability (X-AI) layer: It incorporates human-readable explanations (from the previous layer) of diagnostic results. It includes the symptoms and responses that contributed to the overall PHQ-9 severity score, integrating <italic>highlighted</italic> symptoms and relevant SNOMED IDs, to enhance transparency and trust in system-generated diagnoses.</p></list-item>
<list-item>
<p>Diagnosis output: The system creates a completes clinical diagnosis incorporating the PHQ-9 checklist, symptoms severity score, severity level, and reasoning through proper explanations and attribution with an appropriate SNOMED CT identifier in an intuitive format.</p></list-item>
</list></td>
</tr>
</tbody>
</table>
</table-wrap>
<p>MATRIX, as a comprehensive unified interface, sets a new standard in digital mental health support by enabling rapid assessments and AI-driven responses. By streamlining the initial evaluation process, it reduces consultation time, allowing mental health professionals to concentrate on critical interventions. In addition, this approach significantly enhances accessibility, extending support to underserved populations through digital platforms. It empowers practitioners with reliable, data-driven insights for well-informed decision-making. By automating diagnostic documentation and structuring responses for a quick review, the system optimizes the efficiency of mental healthcare services, improving patient throughput and saving valuable clinician time.</p>
</sec>
<sec id="s7"><label>7</label><title>System technical specification</title>
<p>The system integrates components for data preprocessing, feature extraction using transformers, and classification using a custom-built neural network. The system accepts input from a JSON file containing textual posts (post&#x005F;text) for initial system training and the PHQ-9 symptom ontology file, with columns representing PHQ-9 questions and their associated symptoms. The data preprocessing pipeline iterates through the posts, identifying relevant symptoms from the ontology, and associates them with corresponding PHQ-9 questions. Feature extraction and embedding generation: Text embeddings are generated using the Sentence Transformers (all-MiniLM-L6-v2) for semantic representation. Input text is tokenized and padded to 128 tokens, and mean pooling is applied over token embeddings to produce a fixed-size vector representation.</p>
<p>Further, a feedforward neural network is employed for multiclass classification. The architecture comprises an input layer that matches the dimensionality of the embedding vectors, a fully connected hidden layer with 128 neurons and ReLU activation, and an output layer that provides a probability distribution across PHQ-9 questions using the Softmax activation function. For classification decisions, a confidence threshold of 0.55 is applied. Predictions with probabilities below this threshold are suppressed to reduce false positives; otherwise, raw probabilities contribute to downstream reasoning. The network is optimized using the Adam optimizer with a learning rate of 0.001 (obtained through hyperparameter testing), and the loss is calculated using the CrossEntropyLoss function. The model is trained for 32 epochs, optimizing weights through backpropagation. The evaluation is performed by comparing predicted classifications with true labels for the held-out test set to assess performance. Moreover, to generate a diagnosis, the system counts the occurrences of predicted PHQ-9 questions and identifies patterns that indicate major depressive symptoms. The diagnostic results summarize the predominant questions and their frequency, providing insights into symptom distribution. The model generates predictions in the form of a system-generated PHQ-9 checklist and most likely diagnosis.</p>
<p>For the next layer of reasoning, the system is designed to leverage a pretrained PyTorch-based model to analyze input from the previous layer and produce diagnostic explanations, complete with system evaluation metrics. The system processes the inputs through a trained model (described above) and tokenizer. Open-source model BERT-base-uncased, a standard 110M-parameter pretrained model, is utilized, which provides textual reasoning aligned with observed symptoms in the previous layer. For each question in the checklist, the system offers explanations, linking and highlighting the detected patterns (cues) to the corresponding PHQ-9 criteria. Diagnostic reasoning is contextualized, ensuring that the level of depression aligns with PHQ-9 standards and observed symptom patterns, which diminishes the occurrence of hallucinations. Sample system input, output, and prompt formats are presented in <xref ref-type="fig" rid="F5">Figure&#x00A0;5</xref>.</p>
<fig id="F5" position="float"><label>Figure&#x00A0;5</label>
<caption><p>System input, output, and prompt formats.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="fdgth-07-1621271-g005.tif"><alt-text content-type="machine-generated">Text interface showing a mental health diagnostic prompt format for an AI assistant. Includes sections for user input, system-generated PHQ-9 checklist, diagnosis, and highlighted text indicating depression symptoms. The diagnosis is \"Mild Depression\" with a SNOMED-CT code of 310495003. Highlighted phrases indicate key concerns like feeling unmotivated, unhappy, and replaceable, suggesting a need for support.</alt-text>
</graphic>
</fig>
<p>The attribution layer matches and attributes system diagnoses against SNOMED CT terminology in real time using both regular expression filtering and semantic embeddings. The system is designed to enhance the precision of diagnosis-to-concept mapping by leveraging Sentence Transformers for semantic similarity calculations. The input from the previous layers is tokenized and converted to lowercase to improve matching accuracy. The diagnosis is divided into keywords, and a regex pattern is constructed. The &#x201C;term&#x201D; column in the SNOMED dataset is filtered for matches containing any of the keywords, ensuring broader term coverage. If multiple terms match, the system uses Sentence Transformers to compute embeddings for the matched terms and the diagnosis. Cosine similarity scores are calculated to identify the term most semantically similar to the diagnosis input. Once the closest match is determined (presented as a similarity score), the system extracts and returns relevant information, including &#x201C;moduleId,&#x201D; &#x201C;conceptId,&#x201D; &#x201C;languageCode,&#x201D; and the matched SNOMED concept. A detailed message is included in the output to enhance interpretability depicted in <xref ref-type="table" rid="T1">Table&#x00A0;1</xref>.</p>
<table-wrap id="T1" position="float"><label>Table&#x00A0;1</label>
<caption><p>System diagnosis information for the attribution layer.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="left"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">SNOMED</th>
<th valign="top" align="center" colspan="2">Diagnosis input: major depression</th>
</tr>
<tr>
<th valign="top" align="center">CT detail</th>
<th valign="top" align="center">Field</th>
<th valign="top" align="center">Sample values</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">ModuleId</td>
<td valign="top" align="center">900,000,000,000,207,008</td>
</tr>
<tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">ConceptId</td>
<td valign="top" align="center">123,456,789</td>
</tr>
<tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">LanguageCode</td>
<td valign="top" align="center">en</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">TypeId</td>
<td valign="top" align="center">900,000,000,000,003,001</td>
</tr>
<tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">CaseSignificanceId</td>
<td valign="top" align="center">900,000,000,000,448,009</td>
</tr>
<tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left">Snomed&#x005F;concept</td>
<td valign="top" align="center">Major depression</td>
</tr>
<tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left">Similarity score</td>
<td valign="top" align="center">0.91</td>
</tr>
<tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left"><italic>*Message</italic></td>
<td valign="top" align="center">This is system-generated diagnosis. More information can be found in the following link: <ext-link ext-link-type="uri" xlink:href="https://www.snomed.org/get-snomed">https://www.snomed.org/get-snomed</ext-link></td>
</tr>
</tbody>
</table>
</table-wrap>
<p>System testing is carried out on a subset (44 samples) of manually validated diagnosis-to-SNOMED mappings. The combination of regex filtering and semantic embeddings ensures improved accuracy. We believe that with the proposed enhancements, the system can become a highly reliable tool for mapping clinical data. To support clinical deployment, the system reports the following: average inference time per input, 38&#x2009;ms on an NVIDIA T4 GPU; end-to-end latency (preprocessing <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM1"><mml:mo stretchy="false">&#x2192;</mml:mo></mml:math></inline-formula> reasoning), 112&#x2009;ms; and peak memory usage during inference, 1.3&#x2009;GB. The code is made available as open source on <ext-link ext-link-type="uri" xlink:href="https://github.com/Sweenderella/MATRIX">https://github.com/Sweenderella/MATRIX</ext-link> for ease of reproducibility.</p>
</sec>
<sec id="s8"><label>8</label><title>Evaluation results and discussion</title>
<sec id="s8a"><label>8.1</label><title>Ground data generation and interpretation of metrics</title>
<p>Annotated instances from the PRIMATE dataset was used as ground data. The MATRIX system was rigorously tested against these ground datasets along with several state-of-the-art models, such as Generative Pretrained Transformer 3 model with 175B parameters, Gemini with approximately 3&#x0303;00B parameters, and Claude with approximately 70&#x2013;100B parameters. The system was given a standard initial prompt that clearly defined the task. A system-generated checklist for the standard questionnaire was recorded for every model under testing, and a confusion matrix was created. Performance was measured using standard evaluation metrics (refer <xref ref-type="disp-formula" rid="disp-formula1">Equations 1</xref>&#x2013;<xref ref-type="disp-formula" rid="disp-formula4">4</xref>), where TP refers to true positive values, TN refers to true negative, and FP and FN refer to false positive and false negative values, respectively.<disp-formula id="disp-formula1"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM1"><mml:mtext>Accuracy</mml:mtext><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mrow><mml:mi mathvariant="normal">TP</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">TN</mml:mi></mml:mrow></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">TP</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">TN</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">FP</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">FN</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math><label>(1)</label></disp-formula>Accuracy measures the proportion of correctly classified instances out of the total number of instances.<disp-formula id="disp-formula2"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM2"><mml:mtext>Precision</mml:mtext><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">TP</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">TP</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">FP</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math><label>(2)</label></disp-formula>Precision quantifies the proportion of true positive predictions among all positive predictions, reflecting reliability.<disp-formula id="disp-formula3"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM3"><mml:mtext>Recall</mml:mtext><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi mathvariant="normal">TP</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mi mathvariant="normal">TP</mml:mi></mml:mrow><mml:mo>+</mml:mo><mml:mrow><mml:mi mathvariant="normal">FN</mml:mi></mml:mrow></mml:mrow></mml:mfrac></mml:mrow></mml:math><label>(3)</label></disp-formula>Recall measures the proportion of true positive instances that were correctly identified out of all actual positives.<disp-formula id="disp-formula4"><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="DM4"><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>&#x22C5;</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mtext>Precision</mml:mtext><mml:mo>&#x22C5;</mml:mo><mml:mtext>Recall</mml:mtext></mml:mrow><mml:mrow><mml:mtext>Precision</mml:mtext><mml:mo>+</mml:mo><mml:mtext>Recall</mml:mtext></mml:mrow></mml:mfrac></mml:mrow></mml:math><label>(4)</label></disp-formula>The F1 score is the harmonic mean of precision and recall, providing a balanced measure of the performance of a model, especially in imbalanced datasets. These findings are presented in <xref ref-type="table" rid="T2">Table&#x00A0;2</xref>.</p>
<table-wrap id="T2" position="float"><label>Table&#x00A0;2</label>
<caption><p>Performance metrics for PHQ-9 checklist generation.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">Precision</th>
<th valign="top" align="center">Recall</th>
<th valign="top" align="center">F1 score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><bold>MATRIX</bold></td>
<td valign="top" align="center"><bold>0.89</bold></td>
<td valign="top" align="center"><bold>0.87</bold></td>
<td valign="top" align="center"><bold>0.91</bold></td>
<td valign="top" align="center"><bold>0.89</bold></td>
</tr>
<tr>
<td valign="top" align="left">GPT-3</td>
<td valign="top" align="center">0.88</td>
<td valign="top" align="center">0.87</td>
<td valign="top" align="center">0.89</td>
<td valign="top" align="center">0.88</td>
</tr>
<tr>
<td valign="top" align="left">Gemini</td>
<td valign="top" align="center">0.85</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.86</td>
<td valign="top" align="center">0.85</td>
</tr>
<tr>
<td valign="top" align="left">Claude</td>
<td valign="top" align="center">0.86</td>
<td valign="top" align="center">0.79</td>
<td valign="top" align="center">0.82</td>
<td valign="top" align="center">0.80</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF1"><p>Bold values indicate improved PHQ-9 checklist generation performance, indicating greater predictive accuracy, balanced precision and recall, and more reliable identification of clinically relevant depressive symptoms compared to baseline models.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>The proposed model achieved an accuracy of 89&#x0025;, indicating its effectiveness in correctly classifying PHQ-9 checklist responses compared to a few other benchmark models. A precision score of 87&#x0025; reflects the ability of the model to reliably generate positive responses, minimizing false positives. With a recall score of 91&#x0025;, the model effectively identifies all relevant instances, reducing false negatives in checklist generation. The F1 score of 89&#x0025; demonstrates a balanced tradeoff between precision and recall, underscoring the robustness of the model for PHQ-9 checklist generation. The results confirm that the proposed model shows improvements against traditional benchmarks after targeting training with the PRIMATE dataset, particularly in recall and F1 score, underscoring its ability to accurately assess depression severity from input and align symptoms with clinically validated PHQ-9 standards.</p>
</sec>
<sec id="s8b"><label>8.2</label><title>Context reasoning understanding and evaluation metrics</title>
<p>To further assess the ability of the system to generate explanations and provide reasoning, we evaluated the performance of multiple pretrained models in our proposed model, including DistilBERT (<xref ref-type="bibr" rid="B23">23</xref>), BERT-Base (<xref ref-type="bibr" rid="B24">24</xref>), RoBERTa-Base and RoBERTa-Large (<xref ref-type="bibr" rid="B25">25</xref>), and DeBERTa-Base and DeBERTa-Large (<xref ref-type="bibr" rid="B26">26</xref>).</p>
<p>Despite the impressive performance of large language models (LLMs) like GPT-3 and Gemini in tasks involving complex reasoning, we chose not to use them other than testing our proposed models with ground data in the previous section due to significant concerns regarding data privacy. LLMs require cloud-based processing, which can inadvertently expose sensitive user data, as the input is sent to external servers for processing. Given that our application, MATRIX, is designed as a standalone system that processes data locally, using LLMs would compromise the privacy and security of user information. In contrast, BERT-based models operate entirely within the local environment, ensuring that no data leaves the user&#x2019;s machine, thus maintaining strict privacy standards. Moreover, although LLMs exhibit exceptional reasoning capabilities, the BERT family of models has been shown to deliver highly accurate results in context understanding and reasoning tasks, while addressing the data privacy issues that are critical in healthcare and other sensitive domains. Thus, BERT-based models strike an optimal balance between performance and user data protection, making them a more suitable choice for our application.</p>
<p>To assess the contextual understanding and reasoning capabilities of our proposed model, we employed several advanced metrics, including cosine similarity, semantic similarity, and contextual relevance scores. These metrics allow us to quantify how well the model captures and responds to the contextual meaning of the input data.
<list list-type="simple">
<list-item>
<p><italic>Cosine similarity</italic> measures the similarity between two non-zero vectors by computing the cosine of the angle between them. It is commonly used to assess how closely related two texts are in terms of their semantic meaning. The formula for cosine similarity is given as follows:<disp-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="UDM1"><mml:mtext>Cosine similarity</mml:mtext><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>A</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mi>A</mml:mi><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mi>B</mml:mi><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>where
<list list-type="simple">
<list-item>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM2"><mml:mi>A</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM3"><mml:mi>B</mml:mi></mml:math></inline-formula> are the embedding vectors of the input context and generated response, respectively.</p></list-item>
<list-item>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM4"><mml:mi>A</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>B</mml:mi></mml:math></inline-formula> is the dot product of the two vectors.</p></list-item>
<list-item>
<p><inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM5"><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mi>A</mml:mi><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM6"><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mi>B</mml:mi><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo></mml:math></inline-formula> are the magnitudes (Euclidean norms) of vectors <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM7"><mml:mi>A</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM8"><mml:mi>B</mml:mi></mml:math></inline-formula>, respectively.</p></list-item>
</list>A cosine similarity score close to 1 indicates high semantic similarity, implying that the generated response is highly aligned with the input context, while a score closer to 0 suggests weak alignment.</p></list-item>
<list-item>
<p>The <italic>Semantic similarity score</italic> measures the degree to which two sentences or pieces of text convey the same meaning. Using advanced embedding models, such as Sentence-BERT or Universal Sentence Encoder, we computed the similarity between the semantic representations of the input context and the generated response. The similarity score is derived by comparing the cosine of the angle between the two sentence embeddings.<disp-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="UDM2"><mml:mtext>Semantic similarity score</mml:mtext><mml:mo>=</mml:mo><mml:mi>cos</mml:mi><mml:mo>&#x2061;</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi>&#x03B8;</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>A</mml:mi><mml:mo>&#x22C5;</mml:mo><mml:mi>B</mml:mi></mml:mrow><mml:mrow><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mi>A</mml:mi><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo><mml:mi>B</mml:mi><mml:mo fence="false" stretchy="false">&#x2016;</mml:mo></mml:mrow></mml:mfrac></mml:mrow></mml:math></disp-formula>where <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM9"><mml:mi>A</mml:mi></mml:math></inline-formula> and <inline-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="IM10"><mml:mi>B</mml:mi></mml:math></inline-formula> represent the semantic embeddings of the input and generated response, respectively. Higher values of semantic similarity indicate that the model has captured the contextual meaning and provided a more coherent and relevant explanation.</p></list-item>
<list-item>
<p>The <italic>Contextual relevance score</italic> (<italic>CRS</italic>) evaluates the extent to which the generated explanation maintains logical consistency with the input context. This score is computed using contextual embedding model BERT, which assesses how well the explanation fits within the broader input context as per the following metric:<disp-formula><mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" id="UDM3"><mml:mtext>Context relevance</mml:mtext><mml:mo>=</mml:mo><mml:mrow><mml:mfrac><mml:mtext>Relevant contextual match</mml:mtext><mml:mtext>Total contextual potential</mml:mtext></mml:mfrac></mml:mrow></mml:math></disp-formula>A higher context relevance score reflects that the model&#x2019;s response is more relevant and logically consistent with the input context.</p></list-item>
</list>Given the importance of these metrics in evaluating the reasoning ability of the proposed system, we incorporated these scores into our evaluation framework. The results of this evaluation are detailed in <xref ref-type="table" rid="T3">Table&#x00A0;3</xref>. It is worth noting that we chose BERT-based models over LLMs due to privacy concerns. As MATRIX is a standalone application, processing data locally ensures that user data are not transmitted to external servers. This design decision guarantees privacy while still maintaining high performance in context understanding and reasoning tasks.</p>
<table-wrap id="T3" position="float"><label>Table&#x00A0;3</label>
<caption><p>Context reasoning evaluation for MATRIX.</p></caption>
<table>
<colgroup>
<col align="left"/>
<col align="center"/>
<col align="center"/>
<col align="center"/>
</colgroup>
<thead>
<tr>
<th valign="top" align="left">Model (parameters)</th>
<th valign="top" align="center">Cosine similarity</th>
<th valign="top" align="center">Semantic similarity score</th>
<th valign="top" align="center">Context relevance score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">DistilBERT (66M)</td>
<td valign="top" align="center">0.85</td>
<td valign="top" align="center">0.80</td>
<td valign="top" align="center">0.75</td>
</tr>
<tr>
<td valign="top" align="left">DeBERTa-B (86M)</td>
<td valign="top" align="center">0.92</td>
<td valign="top" align="center">0.91</td>
<td valign="top" align="center">0.90</td>
</tr>
<tr>
<td valign="top" align="left">BERT-B (110M)</td>
<td valign="top" align="center">0.90</td>
<td valign="top" align="center">0.88</td>
<td valign="top" align="center">0.85</td>
</tr>
<tr>
<td valign="top" align="left">RoBERTa-B (125M)</td>
<td valign="top" align="center">0.91</td>
<td valign="top" align="center">0.89</td>
<td valign="top" align="center">0.88</td>
</tr>
<tr>
<td valign="top" align="left">DeBERTa-L (304M)</td>
<td valign="top" align="center"><bold>0.93</bold></td>
<td valign="top" align="center"><bold>0.92</bold></td>
<td valign="top" align="center"><bold>0.91</bold></td>
</tr>
<tr>
<td valign="top" align="left">RoBERTa-L (355M)</td>
<td valign="top" align="center">0.92</td>
<td valign="top" align="center">0.90</td>
<td valign="top" align="center">0.89</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TF2"><p>Bold values indicate stronger contextual reasoning performance, reflecting greater semantic alignment, improved contextual relevance, and more accurate representation of meaning through DeBERTa-L in the MATRIX framework.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>The results presented highlight the strong performance of these models in maintaining semantic alignment and contextual coherence across a diverse range of inputs. DeBERTa-L and DeBERTa-B, in particular, achieved the highest scores across all metrics, demonstrating their ability to generate contextually relevant and semantically consistent responses.</p>
<p>Offering clinicians an efficient, transparent, and clinically aligned diagnostic tool while maintaining users&#x2019; privacy allows mental health professionals to serve more patients with a higher degree of accuracy and confidence. This scalability is crucial amid the rising demand for mental health services and the shortage of qualified practitioners. Furthermore, by automating key aspects of the diagnostic process, the system frees clinicians to focus on in-depth patient care, potentially improving patient outcomes and satisfaction. Its alignment with clinical standards and emphasis on transparency also make it a trustworthy and adaptable tool, suitable for diverse healthcare settings where reliable, AI-assisted diagnostics are urgently needed.</p>
<p>It is worth noting that we are currently engaged in a thorough improvement of the quality of outcomes, consulting mental healthcare domain experts to obtain a comprehensive report on human agreement scores. This study represents an encouraging preliminary exploration of the real-time standalone MATRIX system in the form of a mental health companion, offering real-time clinically relevant assessments.</p>
</sec>
</sec>
<sec id="s9" sec-type="conclusions"><label>9</label><title>Conclusion</title>
<p>The implemented system provides a comprehensive framework for generating, classifying, and explaining PHQ-9 checklists. By leveraging advanced language models such as DeBERTa and RoBERTa, the system demonstrated state-of-the-art performance in aligning AI-generated outputs with established clinical standards. Rigorous evaluations using the PRIMATE dataset highlighted its ability to generate accurate, reliable, and interpretable diagnostics, making it a transformative tool in mental healthcare. The integration of standardized assessments, such as PHQ-9, ensures diagnostic consistency, while mapping AI-driven reasoning to real-time SNOMED CT concepts fosters clinical relevance and actionable insights. This capability not only reduces diagnostic time but also enhances trust of clinicians in AI-driven recommendations, ultimately supporting more efficient workflows and improved patient outcomes.</p>
<p>By addressing critical challenges in mental health diagnostics, the system exemplifies the potential of X-AI to bridge the gap between technical innovation and clinical application while maintaining the privacy of users&#x2019; data. Its scalable and accessible design positions it as a valuable resource in addressing the growing demand for mental health services, particularly in underresourced and geographically dispersed populations. Moreover, the novel MATRIX interface enables real-time interaction and reasoning, enhancing both patient engagement and clinician efficiency. The study&#x2019;s emphasis on transparency, accuracy, and semantic reasoning ensures that the system not only delivers clinically reliable outputs but also supports practitioners in delivering high-quality care in diverse healthcare environments.</p>
</sec>
<sec id="s11" sec-type="data-availability"><title>Data availability statement</title>
<p>The datasets presented in this study can be found in online repositories. The names of the repository/repositories and accession number(s) can be found in the article/Supplementary Material.</p>
</sec>
</body>
<back>
<sec id="s12" sec-type="author-contributions"><title>Author contributions</title>
<p>SR: Conceptualization, Data curation, Investigation, Methodology, Software, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. KR: Formal analysis, Project administration, Resources, Validation, Writing &#x2013; review &#x0026; editing. AS: Funding acquisition, Project administration, Resources, Supervision, Validation, Visualization, Conceptualization, Writing &#x2013; review &#x0026; editing.</p>
</sec>
<sec id="s14" sec-type="COI-statement"><title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s15" sec-type="ai-statement"><title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence, and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec id="s16" sec-type="disclaimer"><title>Publisher&#x0027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list><title>References</title>
<ref id="B1"><label>1.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Louden</surname> <given-names>JE</given-names></name> <name><surname>Avila</surname> <given-names>A</given-names></name> <name><surname>Villar</surname> <given-names>O</given-names></name> <name><surname>Jung</surname> <given-names>H</given-names></name> <name><surname>Kosyluk</surname> <given-names>K</given-names></name> <name><surname>Flores</surname> <given-names>E</given-names></name></person-group>. <article-title>Self-stigma of mental illness among Latino people on the U.S.-M&#x00E9;xico border</article-title>. <source>Stigma Health</source>. (<year>2023</year>) <volume>8</volume>:<fpage>21</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1037/sah0000365</pub-id></mixed-citation></ref>
<ref id="B2"><label>2.</label><mixed-citation publication-type="other"><collab>World Health Organization</collab>. <article-title>Over a billion people living with mental health conditions- services require urgent scale-up</article-title>. <comment>WHO News Release</comment>. <publisher-loc>Geneva</publisher-loc> (<year>2025</year>). <comment>Available online at:</comment> <ext-link ext-link-type="uri" xlink:href="https://www.who.int/news/item/02-09-2025-over-a-billion-people-living-with-mental-health-conditions-services-require-urgent-scale-up">https://www.who.int/news/item/02-09-2025-over-a-billion-people-living-with-mental-health-conditions-services-require-urgent-scale-up</ext-link> <comment>(Accessed January 5, 2026).</comment></mixed-citation></ref>
<ref id="B3"><label>3.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Barr&#x00F3;n-Vel&#x00E1;zquez</surname> <given-names>E</given-names></name> <name><surname>Mendoza-Velasquez</surname> <given-names>JJ</given-names></name> <name><surname>Mercado-Lara</surname> <given-names>A</given-names></name> <name><surname>Quijada-Gaytan</surname> <given-names>JM</given-names></name> <name><surname>Flores-V&#x00E1;zquez</surname> <given-names>JF</given-names></name></person-group>. <article-title>The mental health provider shortage in the Mexican public sector: 2023 estimates of psychiatrists and psychologists</article-title>. <source>Salud Ment</source>. (<year>2024</year>) <volume>47</volume>(<issue>4</issue>):<fpage>179</fpage>&#x2013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.17711/SM.0185-3325.2024.023</pub-id></mixed-citation></ref>
<ref id="B4"><label>4.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abed</surname> <given-names>VN</given-names></name> <name><surname>Mohammed</surname> <given-names>N</given-names></name></person-group>. <article-title>Artificial intelligence (AI) generated health counseling for mental illness patients</article-title>. <source>Curr Psychiatry Res Rev</source>. (<year>2024</year>) <volume>21</volume>:<fpage>269</fpage>&#x2013;<lpage>83</lpage>. <pub-id pub-id-type="doi">10.2174/0126660822277500240109050359</pub-id></mixed-citation></ref>
<ref id="B5"><label>5.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Muetunda</surname> <given-names>F</given-names></name> <name><surname>Sabry</surname> <given-names>S</given-names></name> <name><surname>Jamil</surname> <given-names>ML</given-names></name> <name><surname>Pais</surname> <given-names>S</given-names></name> <name><surname>Dias</surname> <given-names>G</given-names></name> <name><surname>Cordeiro</surname> <given-names>J</given-names></name></person-group>. <article-title>AI-assisted diagnosing, monitoring and treatment of mental disorders: a survey</article-title>. <source>ACM Trans Comput Healthc</source>. (<year>2024</year>) <volume>5</volume>(<issue>4</issue>):<fpage>1</fpage>&#x2013;<lpage>24</lpage>. <pub-id pub-id-type="doi">10.1145/3681794</pub-id></mixed-citation></ref>
<ref id="B6"><label>6.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Kaushik</surname> <given-names>P</given-names></name> <name><surname>Jain</surname> <given-names>E</given-names></name> <name><surname>Gill</surname> <given-names>KS</given-names></name> <name><surname>Upadhyay</surname> <given-names>D</given-names></name> <name><surname>Devliyal</surname> <given-names>S</given-names></name></person-group>. <comment>Optimizing mental health prediction by fine-tuning decision classifier parameters for enhanced accuracy. In: <italic>2024 2nd International Conference on Sustainable Computing and Smart Systems (ICSCSS), Coimbatore, India</italic>. IEEE (2024). p. 935&#x2013;9</comment>. <pub-id pub-id-type="doi">10.1109/ICSCSS60660.2024.10625480</pub-id></mixed-citation></ref>
<ref id="B7"><label>7.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Abdul</surname> <given-names>S</given-names></name> <name><surname>Adeghe</surname> <given-names>EP</given-names></name> <name><surname>Adegoke</surname> <given-names>BO</given-names></name> <name><surname>Adegoke</surname> <given-names>AA</given-names></name> <name><surname>Udedeh</surname> <given-names>EH</given-names></name></person-group>. <article-title>Mental health management in healthcare organizations: challenges and strategies&#x2014;a review</article-title>. <source>Int Med Sci Res J</source>. (<year>2024</year>) <volume>4</volume>(<issue>5</issue>):<fpage>585</fpage>&#x2013;<lpage>605</lpage>. <pub-id pub-id-type="doi">10.51594/imsrj.v4i5.1157</pub-id></mixed-citation></ref>
<ref id="B8"><label>8.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Patel</surname> <given-names>R</given-names></name> <name><surname>Wee</surname> <given-names>SN</given-names></name> <name><surname>Ramaswamy</surname> <given-names>R</given-names></name> <name><surname>Thadani</surname> <given-names>S</given-names></name> <name><surname>Tandi</surname> <given-names>J</given-names></name> <name><surname>Garg</surname> <given-names>R</given-names></name></person-group>, et al. <article-title>NeuroBlu, an electronic health record (EHR) trusted research environment (TRE) to support mental healthcare analytics with real-world data</article-title>. <source>BMJ Open</source>. (<year>2022</year>) <volume>12</volume>:<fpage>e057227</fpage>. <pub-id pub-id-type="doi">10.1136/bmjopen-2021-057227</pub-id><pub-id pub-id-type="pmid">35459671</pub-id></mixed-citation></ref>
<ref id="B9"><label>9.</label><mixed-citation publication-type="other"><collab>SNOMED-International</collab>. <comment>SNOMED-CT starter guide (2024). Available online at: <ext-link ext-link-type="uri" xlink:href="https://confluence.ihtsdotools.org/display/DOCSTART/SNOMED-STARTER-GUIDE">https://confluence.ihtsdotools.org/display/DOCSTART/SNOMED-STARTER-GUIDE</ext-link> (Accessed December 3, 2024)</comment>.</mixed-citation></ref>
<ref id="B10"><label>10.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Casu</surname> <given-names>M</given-names></name> <name><surname>Triscari</surname> <given-names>S</given-names></name> <name><surname>Battiato</surname> <given-names>S</given-names></name> <name><surname>Guarnera</surname> <given-names>L</given-names></name> <name><surname>Caponnetto</surname> <given-names>P</given-names></name></person-group>. <article-title>AI chatbots for mental health: a scoping review of effectiveness, feasibility, and applications</article-title>. <source>Appl Sci</source>. (<year>2024</year>) <volume>14</volume>(<issue>13</issue>):<fpage>5889</fpage>. <pub-id pub-id-type="doi">10.3390/app14135889</pub-id></mixed-citation></ref>
<ref id="B11"><label>11.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jacobson</surname> <given-names>N</given-names></name> <name><surname>McClain</surname> <given-names>H</given-names></name> <name><surname>New</surname> <given-names>ML</given-names></name></person-group>. <article-title>Psychiatry in the digital age: an in-depth examination of online interventions</article-title>. <source>J Biomed Res Rep</source>. (<year>2023</year>) <volume>4</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.59657/28374681.brs.24.062</pub-id></mixed-citation></ref>
<ref id="B12"><label>12.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shivaprasad</surname> <given-names>S</given-names></name> <name><surname>Chadaga</surname> <given-names>K</given-names></name> <name><surname>Dias</surname> <given-names>CC</given-names></name> <name><surname>Sampathila</surname> <given-names>N</given-names></name> <name><surname>Prabhu</surname> <given-names>S</given-names></name></person-group>. <article-title>An interpretable schizophrenia diagnosis framework using machine learning and explainable artificial intelligence</article-title>. <source>Syst Sci Control Eng</source>. (<year>2024</year>) <volume>12</volume>(<issue>1</issue>):<fpage>2364033</fpage>. <pub-id pub-id-type="doi">10.1080/21642583.2024.2364033</pub-id></mixed-citation></ref>
<ref id="B13"><label>13.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kerz</surname> <given-names>E</given-names></name> <name><surname>Zanwar</surname> <given-names>S</given-names></name> <name><surname>Qiao</surname> <given-names>Y</given-names></name> <name><surname>Wiechmann</surname> <given-names>D</given-names></name></person-group>. <article-title>Towards explainable AI (XAI) for mental health detection based on language behavior</article-title>. <source>Front Psychiatry</source>. (<year>2023</year>) <volume>14</volume>:<fpage>1219479</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyt.2023.1219479</pub-id><pub-id pub-id-type="pmid">38144474</pub-id></mixed-citation></ref>
<ref id="B14"><label>14.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Antoniou</surname> <given-names>E</given-names></name> <name><surname>Stamoulou</surname> <given-names>P</given-names></name> <name><surname>Tzanoulinou</surname> <given-names>M-D</given-names></name> <name><surname>Orovou</surname> <given-names>E</given-names></name></person-group>. <article-title>Perinatal mental health; the role and the effect of the partner: a systematic review</article-title>. <source>Healthcare</source>. (<year>2021</year>) <volume>9</volume>(<issue>11</issue>):<fpage>1572</fpage>. <pub-id pub-id-type="doi">10.3390/healthcare9111572</pub-id><pub-id pub-id-type="pmid">34828618</pub-id></mixed-citation></ref>
<ref id="B15"><label>15.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Soni</surname> <given-names>J</given-names></name> <name><surname>Jijina</surname> <given-names>P</given-names></name></person-group>. <article-title>Design and delivery of a need-based mental health promotion program for shelter-home adolescent</article-title>. <source>Indian J Soc Psychiatry</source>. (<year>2022</year>) <volume>38</volume>(<issue>3</issue>):<fpage>287</fpage>&#x2013;<lpage>92</lpage>. <pub-id pub-id-type="doi">10.4103/ijsp.ijsp/170/20</pub-id></mixed-citation></ref>
<ref id="B16"><label>16.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Shit</surname> <given-names>A</given-names></name> <name><surname>Roy</surname> <given-names>S</given-names></name> <name><surname>Maji</surname> <given-names>P</given-names></name> <name><surname>Majhi</surname> <given-names>AA</given-names></name> <name><surname>Sadhukhan</surname> <given-names>B</given-names></name></person-group>. <comment>Digital psychiatry: utilizing AI for mental health assessment on social platforms. In: <italic>2024 International Conference on Intelligent Algorithms for Computational Intelligence Systems (IACIS), Hassan, India</italic>. IEEE (2024). p. 1&#x2013;7</comment>. <pub-id pub-id-type="doi">10.1109/IACIS61494.2024.10721680</pub-id></mixed-citation></ref>
<ref id="B17"><label>17.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>P-C</given-names></name> <name><surname>Akhtar</surname> <given-names>N</given-names></name> <name><surname>Jiang</surname> <given-names>J</given-names></name> <name><surname>Mian</surname> <given-names>A</given-names></name></person-group>. <comment>Backdoor-based explainable AI benchmark for high fidelity evaluation of attribution methods. <italic>arxiv</italic> [Preprint]. (2024)</comment>. <pub-id pub-id-type="doi">10.48550/arxiv.2405.02344</pub-id></mixed-citation></ref>
<ref id="B18"><label>18.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Roberts</surname> <given-names>L</given-names></name> <name><surname>Lanes</surname> <given-names>S</given-names></name> <name><surname>Peatman</surname> <given-names>O</given-names></name> <name><surname>Assheton</surname> <given-names>P</given-names></name></person-group>. <article-title>The importance of SNOMED CT concept specificity in healthcare analytics</article-title>. <source>Health Inf Manage J</source>. (<year>2024</year>) <volume>53</volume>(<issue>3</issue>):<fpage>157</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1177/18333583221144662</pub-id></mixed-citation></ref>
<ref id="B19"><label>19.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chang</surname> <given-names>E</given-names></name> <name><surname>Sung</surname> <given-names>S</given-names></name></person-group>. <article-title>Use of SNOMED CT in large language models: scoping review</article-title>. <source>JMIR Med Inform</source>. (<year>2024</year>) <volume>12</volume>:<fpage>e62924</fpage>. <pub-id pub-id-type="doi">10.2196/62924</pub-id><pub-id pub-id-type="pmid">39374057</pub-id></mixed-citation></ref>
<ref id="B20"><label>20.</label><mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kroenke</surname> <given-names>K</given-names></name> <name><surname>Spitzer</surname> <given-names>RL</given-names></name> <name><surname>Williams</surname> <given-names>JBW</given-names></name></person-group>. <article-title>The PHQ-9: validity of a brief depression severity measure</article-title>. <source>J Gen Intern Med</source>. (<year>2001</year>) <volume>16</volume>(<issue>9</issue>):<fpage>606</fpage>&#x2013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1046/j.1525-1497.2001.016009606.x</pub-id><pub-id pub-id-type="pmid">11556941</pub-id></mixed-citation></ref>
<ref id="B21"><label>21.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Davidson</surname> <given-names>D</given-names></name> <name><surname>Rawson</surname> <given-names>M</given-names></name></person-group>. <comment>SNOMED CT: why it matters to you (2024). Available online at: <ext-link ext-link-type="uri" xlink:href="https://www.wolterskluwer.com/en/expert-insights/snomed-ct-why-it-matters-to-you">https://www.wolterskluwer.com/en/expert-insights/snomed-ct-why-it-matters-to-you</ext-link> (Accessed December 1, 2024)</comment>.</mixed-citation></ref>
<ref id="B22"><label>22.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Gupta</surname> <given-names>S</given-names></name> <name><surname>Agarwal</surname> <given-names>A</given-names></name> <name><surname>Gaur</surname> <given-names>M</given-names></name> <name><surname>Roy</surname> <given-names>K</given-names></name> <name><surname>Narayanan</surname> <given-names>V</given-names></name> <name><surname>Kumaraguru</surname> <given-names>P</given-names></name></person-group>, et al. <comment>Learning to automate follow-up question generation using process knowledge for depression triage on Reddit posts. In: <italic>Proceedings of the Eighth Workshop on Computational Linguistics and Clinical Psychology, Seattle, United States</italic>. Association for Computational Linguistics (2022). p. 137&#x2013;47</comment>.</mixed-citation></ref>
<ref id="B23"><label>23.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Sanh</surname> <given-names>V</given-names></name></person-group>. <comment>DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. <italic>arXiv</italic> [Preprint]. <italic>arXiv:1910.01108</italic> (2019)</comment>.</mixed-citation></ref>
<ref id="B24"><label>24.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Devlin</surname> <given-names>J</given-names></name> <name><surname>Chang</surname> <given-names>M-W</given-names></name> <name><surname>Lee</surname> <given-names>K</given-names></name> <name><surname>Toutanova</surname> <given-names>K</given-names></name></person-group>. <comment>BERT: pre-training of deep bidirectional transformers for language understanding. In: <italic>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Minneapolis, MN</italic>. Association for Computational Linguistics (2019). p. 4171&#x2013;86</comment>.</mixed-citation></ref>
<ref id="B25"><label>25.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Y</given-names></name></person-group>. <comment>RoBERTa: a robustly optimized bert pretraining approach. <italic>arXiv</italic> [Preprint]. <italic>arXiv:1907.11692</italic> (2019). Available online at: <ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/1907.11692">https://arxiv.org/abs/1907.11692</ext-link> (Accessed January 6, 2026).</comment></mixed-citation></ref>
<ref id="B26"><label>26.</label><mixed-citation publication-type="other"><person-group person-group-type="author"><name><surname>He</surname> <given-names>P</given-names></name> <name><surname>Liu</surname> <given-names>X</given-names></name> <name><surname>Gao</surname> <given-names>J</given-names></name> <name><surname>Chen</surname> <given-names>W</given-names></name></person-group>. <comment>DeBERTa: decoding-enhanced bert with disentangled attention. <italic>arXiv</italic> [Preprint]. <italic>arXiv:2006.03654</italic> (2020)</comment>.</mixed-citation></ref></ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by"><p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2573573/overview">Alaa Abd-alrazaq</ext-link>, Weill Cornell Medicine, Qatar</p></fn>
<fn id="n2" fn-type="custom" custom-type="reviewed-by"><p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/500246/overview">Michele Procacci</ext-link>, Terzo Centro di Psicoterapia, Italy</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3055281/overview">Ratna Yunita Setiyani Subardjo</ext-link>, University of Aisyiyah Yogyakarta, Indonesia</p></fn>
</fn-group>
</back>
</article>