<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychol.</journal-id>
<journal-title>Frontiers in Psychology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychol.</abbrev-journal-title>
<issn pub-type="epub">1664-1078</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyg.2025.1612769</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Psychology</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Enhancing TextGCN for depression detection on social media with emotion representation</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Mao</surname><given-names>Huimin</given-names></name>
<uri xlink:href="https://loop.frontiersin.org/people/3104497/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Han</surname><given-names>Qing</given-names></name>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2278385/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/funding-acquisition/"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
</contrib>
</contrib-group>
<aff><institution>School of Medical Technology and Information Engineering, Zhejiang Chinese Medical University</institution>, <addr-line>Hangzhou</addr-line>, <country>China</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/156789/overview">Eleni Peristeri</ext-link>, Aristotle University of Thessaloniki, Greece</p></fn>
<fn fn-type="edited-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/2946302/overview">Abdelmoniem Helmy</ext-link>, Cairo University, Egypt</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3105583/overview">Luis Roberto Garcia-Noguez</ext-link>, Autonomous University of Queretaro, Mexico</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3106125/overview">Amit Aylani</ext-link>, Vidyalankar Institute of Technology, India</p></fn>
<corresp id="c001">&#x002A;Correspondence: Qing Han, <email>hanqing@zcmu.edu.cn</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>26</day>
<month>08</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>16</volume>
<elocation-id>1612769</elocation-id>
<history>
<date date-type="received">
<day>16</day>
<month>04</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>08</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2025 Mao and Han.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Mao and Han</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec id="sec1">
<title>Background</title>
<p>Depression, also known as depressive disorder, is a pervasive mental health condition that affects individuals across diverse backgrounds and demographics. The detection of depression has emerged as a critical area of research in response to the growing global burden of mental health disorders.</p>
</sec>
<sec id="sec2">
<title>Objective</title>
<p>This study aims to augment the performance of TextGCN for depression detection by leveraging social media posts that have been enriched with emotional representation.</p>
</sec>
<sec id="sec3">
<title>Methods</title>
<p>We propose an enhanced TextGCN model that incorporate emotion representation learned from fine-tuned pre-trained language models, including MentalBERT, MentalRoBERTa, and RoBERTaDepressionDetection. Our approach involves integrating these models into TextGCN to capitalize on their emotional representation capabilities. Furthermore, unlike previous studies that discard emoticons and emojis as noise, we retain them as individual tokens during preprocessing to preserve potential affective cues.</p>
</sec>
<sec id="sec4">
<title>Results</title>
<p>The results demonstrate a significant improvement in performance achieved by the enhanced TextGCN models, when integrated with embeddings learned from MentalBERT, MentalRoBERTa, and RoBERTaDepressionDetection, compared to baseline models on five benchmark datasets.</p>
</sec>
<sec id="sec5">
<title>Conclusion</title>
<p>Our research highlights the potential of pre-trained models to enhance emotional representation in TextGCN, leading to improved detection accuracy, and can serve as a foundation for future research and applications in the mental health domain. In the forthcoming stages, we intend to refine our model by incorporating more balanced and targeted data sets, with the goal of exploring its potential applications in mental health.</p>
</sec>
</abstract>
<kwd-group>
<kwd>graph convolutional networks</kwd>
<kwd>depression detection</kwd>
<kwd>emotion representation</kwd>
<kwd>social media</kwd>
<kwd>pre-trained language models</kwd>
<kwd>mental health</kwd>
<kwd>psychology</kwd>
</kwd-group>
<contract-num rid="cn1">2024B014</contract-num>
<contract-num rid="cn2">2024ZF059</contract-num>
<contract-sponsor id="cn1">Project of Zhejiang Federation of Humanities and Social Sciences</contract-sponsor>
<contract-sponsor id="cn2">Zhejiang Provincial Traditional Chinese Medicine Science and Technology Plan Project</contract-sponsor>
<counts>
<fig-count count="5"/>
<table-count count="3"/>
<equation-count count="9"/>
<ref-count count="57"/>
<page-count count="14"/>
<word-count count="9718"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Emotion Science</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec6">
<label>1</label>
<title>Introduction</title>
<p>Depressive disorder, commonly referred to as depression, is a widespread mental health condition that affects individuals of all backgrounds and demographics. According to global statistics, approximately 280 million people worldwide were affected by depression in 2019, with a significant proportion of this population comprising children and adolescents, numbering 23 million (<xref ref-type="bibr" rid="ref54">World Health Organization, 2025</xref>). Characterized by a persistent decline in mood or a marked diminution of pleasure in activities over an extended period, depressive disorder has far-reaching consequences that permeate various aspects of life, including academic performance, workplace productivity, and interpersonal relationships with family, peers, and the broader community. Furthermore, depression poses a substantial risk of suicidal behavior, which remains a leading cause of mortality among individuals aged 15&#x2013;29, underscoring the imperative need for comprehensive support and intervention strategies. There is an imperative need to address this condition, as a substantial number of individuals suffering from mental disorders, particularly depression, remain underserved by adequate care. Despite the availability of proven prevention and treatment strategies, a considerable proportion of individuals with mental health issues continue to lack access to comprehensive care (<xref ref-type="bibr" rid="ref2">Altamura et al., 2008</xref>; <xref ref-type="bibr" rid="ref8">Bukh et al., 2013</xref>; <xref ref-type="bibr" rid="ref17">Fekadu et al., 2022</xref>; <xref ref-type="bibr" rid="ref10">Carey et al., 2014</xref>; <xref ref-type="bibr" rid="ref56">Yang et al., 2022</xref>). Untreated depression can have a detrimental impact on the disease progression and prognosis, ultimately resulting in suboptimal outcomes. Consequently, early detection of depression is crucial, as it not only optimizes treatment efficacy but also alleviates the burden on healthcare systems, reduces reliance on specialized providers, and mitigates the associated social stigma (<xref ref-type="bibr" rid="ref52">Uddin et al., 2022</xref>).</p>
<p>The detection of depression has emerged as a critical area of research in response to the growing global burden of mental health disorders. Traditionally, the diagnosis of depression has relied heavily on the International Classification of Diseases (ICD) and the Diagnostic and Statistical Manual of Mental Disorders (DSM), which are susceptible to subjective bias and the clinician&#x2019;s diagnostic expertise (<xref ref-type="bibr" rid="ref46">Rubin, 2018</xref>). However, recent initiatives have sought to revolutionize the field by harnessing advancements in artificial intelligence (AI) and machine learning to enhance early and accurate detection. The integration of AI in mental health care is increasingly acknowledged by practicing psychiatrists, who foresee its pivotal role in shaping the future of care (<xref ref-type="bibr" rid="ref16">Doraiswamy et al., 2020</xref>; <xref ref-type="bibr" rid="ref23">Han and Zhao, 2025</xref>). These cutting-edge tools leverage a diverse array of data sources, including clinical interviews, speech patterns, electroencephalograms, wearable device metrics, and electronic health records, to identify subtle indicators of depression (<xref ref-type="bibr" rid="ref50">Squires et al., 2023</xref>). Researchers anticipate discovering a disparity in social media activity between individuals with typical social behaviors and those with depression. The extensive usage of social media platforms has led to a significant increase in users sharing their thoughts and personal experiences, which can provide valuable insights into the detection of depression (<xref ref-type="bibr" rid="ref26">Islam et al., 2018</xref>). Longitudinal data from social media platforms has been recognized as a highly valuable resource, offering broad self-disclosure opportunities (<xref ref-type="bibr" rid="ref5">Balani and De Choudhury, 2024</xref>). By leveraging this data, numerous studies have proposed depression detection models based on traditional machine learning approaches. However, manual feature extraction from user posts is often necessary, utilizing various technologies such as Linguistic Inquiry and Word Count (LIWC), N-grams, Term Frequency-Inverse Document Frequency (TF-IDF), and Bag of Words (BOW) (<xref ref-type="bibr" rid="ref20">Guntuku et al., 2017</xref>). The integration of these technologies enables researchers to develop detection models using a range of traditional machine learning algorithms, including Naive Bayes, Logistic Regression, Support Vector Machines (SVM), and Random Forest, among others (<xref ref-type="bibr" rid="ref43">Philip Thekkekara et al., 2024</xref>; <xref ref-type="bibr" rid="ref40">Manna and Nakai, 2019</xref>).</p>
<p>Despite notable progress achieved by traditional machine learning algorithms in the field of depression detection, several limitations persist. The manual construction of features necessitates substantial expertise and domain knowledge, as well as considerable effort to identify relevant features for training (<xref ref-type="bibr" rid="ref39">Malhotra and Jindal, 2022</xref>). In contrast, deep learning techniques can automatically extract features from raw text vectors and provide abstract summaries of information, whereas traditional approaches often rely on shallow semantic features or statistical text models. Recent studies have leveraged deep learning algorithms, including Convolutional Neural Networks (CNN), Recurrent Neural Networks (RNN), and algorithms incorporating attention components and transformer-based architectures such as BERT (<xref ref-type="bibr" rid="ref14">Devlin et al., 2018</xref>), to improve depression detection outcomes based on social media text data (<xref ref-type="bibr" rid="ref42">Orabi et al., 2018</xref>). CNN models have demonstrated exceptional proficiency in extracting local features and patterns from text data through the application of convolutional operations. Conversely, Graph Neural Networks (GNNs) have shown remarkable efficacy in capturing global semantic relationships and structured dependencies within text data, which can be effectively represented as graphs. The Text Graph Convolutional Network (TextGCN) (<xref ref-type="bibr" rid="ref33">Liang et al., 2018</xref>) leverages the strengths of both CNN and GNN approaches, thereby enabling the integration of word-level and document-level semantic information in a comprehensive manner. This innovative methodology facilitates the effective comprehension of complex human emotions, particularly those associated with depression, rendering TextGCN a valuable tool for this application. However, the current implementation of TextGCN relies on one-hot vector representations of individual words or documents, without the incorporation of pre-trained word embeddings or external knowledge (<xref ref-type="bibr" rid="ref33">Liang et al., 2018</xref>). This limitation underscores the need for the development of more advanced representation techniques that can accurately capture the intricate emotional nuances embedded within text data.</p>
<p>Building a model to identify depression through social media data typically involves a multi-step approach: data acquisition, data preprocessing, the creation of word representations, model training, and model validation. Emotion representation is a critical component of natural language processing (NLP), as it enables models to accurately capture the emotional nuances of human language, thereby enhancing their comprehension of emotional dimensions. The application of various techniques, such as word embeddings (e.g., Word2Vec, GloVe) and contextualized embeddings (e.g., BERT, RoBERTa), has been widely adopted to encode emotional cues in text, resulting in improved performance in tasks like text classification (<xref ref-type="bibr" rid="ref3">Aydo&#x011F;an and Karci, 2020</xref>), sentiment analysis (<xref ref-type="bibr" rid="ref45">Rezaeinia et al., 2019</xref>), emotion detection, and depression detection (<xref ref-type="bibr" rid="ref29">Lara et al., 2021</xref>; <xref ref-type="bibr" rid="ref30">Lestandy and Abdurrahim, 2023</xref>). Recent research has integrated emotion representation into depression detection models, aiming to enhance accuracy and contribute to advancements in mental health research and practice. For example, models like TextGCN with Emotion Graph Representation have demonstrated promise in abstracting both explicit and implicit emotional signals associated with mental health, showcasing the potential of emotion representation in depression detection (<xref ref-type="bibr" rid="ref9">Cabral et al., 2024</xref>).</p>
<p>This study concentrates on augmenting the performance of TextGCN, subsequently developing an improved model based on TextGCN for depression detection by harnessing social media posts enriched with emotion representation. To achieve this goal, we using the pre-trained emotion representation to replace the basic input for TextGCN. And we compared the different contextual representations generated by various pre-trained large language models for the depression detection models. Combining the embedding technique and the TextGCN approach, we propose our models as the depression detection tool, incorporating posts from social media platforms such as Twitter, Reddit, and web forums.</p>
<p>Our approach and key contributions can be summarized as follows.</p><list list-type="order">
<list-item>
<p>We explored the applicability of the emotion representations in depression detection, contributing to the future work in this field.</p>
</list-item>
<list-item>
<p>We propose a new framework for depression detection with a higher accuracy and availability, which achieved the better performance on three publicly available social media posts depression detection datasets. The construction contains two parts, the pre-trained emotion representation and the Textual Graph Convolutional Networks classification (TextGCN).</p>
</list-item>
<list-item>
<p>Comprehensive experiments across three datasets yield the following key findings: Utilizing pre-trained word embeddings significantly outperforms using one-hot vectors as input to TextGCN. Models leveraging RoBERTa-based embeddings consistently outperform those based on BERT embeddings. Fine-tuning further enhances the performance of both BERT-and RoBERTa-based models compared to their base versions. Models trained on longer texts achieve better results than those trained on shorter texts.</p>
</list-item>
</list>
<p>The remainder of this paper is organized as follows: Section 2 presents the methods has employed for depression detection, particularly the approaches like Textual Graph Convolutional Networks classification (TextGCN) and emotion representation. Section 3 introduces the specific technical methods in our model. The whole experimental content is described in Section 4, from datasets to model etc. Results are discussed in Section 5. Lastly, Section 6 conclude the paper.</p>
</sec>
<sec id="sec7">
<label>2</label>
<title>Related works</title>
<sec id="sec8">
<label>2.1</label>
<title>Depression detection</title>
<p>Depression is a pervasive mental health disorder that affects millions of individuals worldwide, often without being consciously acknowledged. It can lead to a significant diminishment of interest in everyday activities, potentially culminating in suicidal ideation. Traditionally, depression diagnosis is based on standardized clinical criteria, which encompasses current symptomatology and medical history (<xref ref-type="bibr" rid="ref49">Smith et al., 2013</xref>). The integration of big data and machine learning algorithms presents an effective and efficient means of automating depression detection, providing valuable support to healthcare professionals and patients alike. At its core, classic depression detection constitutes a classification problem, wherein the distinction between healthy and depressed individuals or the prediction of severity is paramount (<xref ref-type="bibr" rid="ref15">Dinkel et al., 2019</xref>).</p>
<p>Researchers have conducted an in-depth examination of behavioral indicators, encompassing facial expressions, speech patterns, and other multi-modal signals. For example, <xref ref-type="bibr" rid="ref48">Shin et al. (2022)</xref> leveraged clinical interview transcripts to train a machine learning model for the detection of depression and suicidal risk. This study employed the Naive Bayes classifier, yielding an area under the curve (AUC) of 0.905, sensitivity of 0.699, and specificity of 0.964 for diagnosing depression. A study conducted by <xref ref-type="bibr" rid="ref21">Had&#x017E;i&#x0107; et al. (2024)</xref> explored the potential of artificial intelligence, specifically machine learning, as a diagnostic tool for depression in clinical interviews. This investigation focused on the performance of a BERT-based natural language processing (NLP) model, which achieved an accuracy of 0.71. In contrast, an untrained GPT-3.5 model demonstrated superior performance, achieving an accuracy of 0.88.</p>
<p>The integration of social media into mental health studies presents a novel avenue for accessing data from a broader population beyond traditional clinical settings. This expansion enables researchers to gather insights from a wider subset of individuals, including those who may not have been previously accounted for in clinical studies (<xref ref-type="bibr" rid="ref9">Cabral et al., 2024</xref>). Younger generations have been found to be more inclined to express suicidal ideation on social media, rather than disclosing it to healthcare professionals or family members (<xref ref-type="bibr" rid="ref28">John et al., 2018</xref>). In contrast to clinical data, social media data offers several advantages, including ease of access, richer content, and lower concealment potential (<xref ref-type="bibr" rid="ref39">Malhotra and Jindal, 2022</xref>). Several studies have demonstrated the potential of social media data in detecting mental health issues. For instance, <xref ref-type="bibr" rid="ref19">Govindasamy and Palanichamy (2021)</xref> utilized Twitter data to develop a depression detection model, employing a combination of Na&#x00EF;ve Bayes and NBTree classifiers to achieve optimal results. <xref ref-type="bibr" rid="ref4">Ayyalasomayajula&#x2019;s (2024)</xref> investigation into the impact of linguistic features on depression detection through Reddit posts yielded impressive outcomes, with an exceptional <italic>F</italic><sub>1</sub>-score achieved using a TF-IDF-based logistic regression model on the same dataset. This research highlights the potential of social media data in mental health studies, and its ability to outperform traditional methodologies. The findings of this study contribute to the growing body of evidence on the use of social media data in mental health research, and underscore the importance of exploring this avenue in future studies.</p>
<p>The application of deep learning technologies, excluding traditional machine learning approaches, has been increasingly employed in the detection of depression. <xref ref-type="bibr" rid="ref42">Orabi et al. (2018)</xref> leveraged Convolutional Neural Networks (CNNs) and Recurrent Neural Networks (RNNs) to identify individuals exhibiting signs of mental illness, utilizing limited amounts of unstructured data sourced from Twitter. Their CNNWithMax models demonstrated a higher accuracy of 87.957%, with optimized embedding, and were found to outperform RNN models in depression detection. In contrast, <xref ref-type="bibr" rid="ref41">Mihov et al. (2022)</xref> utilized heterogeneous graph convolution to develop a depression detection model, MentalNet, by representing users&#x2019; social circles, including analysis of user interactions and the intimacy of user contacts. The framework achieved excellent performance on Twitter data. <xref ref-type="bibr" rid="ref35">Liu (2024)</xref> integrated XLM-RoBERTa and TextGCN to propose a depression clinical detection model based on data from Twitter, Reddit, and Weibo. XLM-RoBERTa was employed to extract semantic insights from multilingual text, while TextGCN was leveraged to acquire knowledge of the multilingual text structure. The study demonstrated that the incorporation of TextGCN significantly enhanced the performance of the model. TextGCN represents a pioneering approach, wherein the entire corpus is represented as a heterogeneous graph, enabling the simultaneous construction of word and document representations via Graph Neural Networks (GNNs). This novel framework outperforms state-of-the-art text classification methods, without relying on pre-trained word embeddings or external knowledge, and has been successfully evaluated on a range of benchmark datasets. In contrast, prior studies (<xref ref-type="bibr" rid="ref34">Lin et al., 2021</xref>; <xref ref-type="bibr" rid="ref22">Han et al., 2022</xref>; <xref ref-type="bibr" rid="ref32">Li et al., 2023</xref>) have sought to augment GCN models by incorporating pre-trained representations or integrating external knowledge.</p>
</sec>
<sec id="sec9">
<label>2.2</label>
<title>Emotion representation</title>
<p>Embeddings are numerical vector representations of text data, enabling efficient processing and analysis by computers. This technique is commonly employed in natural language processing tasks, such as sentiment analysis, and serves as a crucial component in deep learning methods. Embeddings have been developed in various forms, including character, word, and sentence levels, and have been widely explored in existing studies (<xref ref-type="bibr" rid="ref9001">Zhang and Han, 2025</xref>).</p>
<p>Research has demonstrated the potential of word embeddings in detecting depression on social media platforms, such as Reddit. A study by <xref ref-type="bibr" rid="ref30">Lestandy and Abdurrahim (2023)</xref> investigated the impact of word embedding dimensions on the detection of depression using a Bidirectional Long Short-Term Memory (BiLSTM) model in conjunction with Word2Vec and GloVe methods. The findings highlight the efficacy of combining word embeddings with Recurrent Neural Network (RNN) technology for depression identification, with the Word2Vec approach yielding significant advantages. <xref ref-type="bibr" rid="ref13">Couto et al. (2022)</xref> proposed a framework for early depression detection by analyzing changes in language use on social media, utilizing temporal word embeddings. The proposed method employed two temporal word embedding models: TWEC, based on word2vec, and DCWE, built on pre-trained language models like BERT. The experimental results demonstrated that the DCWE model outperformed most participants in the CLEF eRisk tasks of 2017 and 2018, achieving top performance in ERDE5 and ERDE50 metrics, and even surpassing state-of-the-art methods in some cases. In a study by <xref ref-type="bibr" rid="ref18">Goswami et al. (2024)</xref> explored depression detection by combining embeddings extracted by BERT or RoBERTa with a classifier RNN, whose models achieved a validation accuracy of 99.9%. <xref ref-type="bibr" rid="ref24">Hong et al. (2022)</xref> proposed a novel approach to node attributes within a Graph Neural Network (GNN)-based model, where node-specific embeddings are captured for each word in the vocabulary to measure the severity of depression symptoms. <xref ref-type="bibr" rid="ref55">Xin et al. (2024)</xref> compared the suitability of various pre-trained language models, including BERT, MentalBERT, MentalLongformer, Llama 2-7B, and Llama 3-8B, across three different segmentations of interview transcripts. The findings revealed that LLM embeddings facilitated efficient classification of outcomes in qualitative studies on adolescent depression, and confirmed their potential in future detection and treatment of depression. <xref ref-type="bibr" rid="ref7">Bucur&#x2019;s (2024)</xref> study compared two transformer-based embedding methods, MentalRoBERTa and an MPNet variant, for representing Reddit social media posts and BDI-II questionnaire responses. The results showed that the model designed for semantic search performed better than the mental health pre-trained model in embedding tasks, highlighting the performance differences of various embedding methods in depression symptom detection.</p>
<p>Briefly, numerous studies explore the model integrating emotion representation and machine learning or deep learning technology for depression detection, achieving certain results. And GNN can improve the models&#x2019; ability of representation generation. In the study, we attempt to detect depression from different social media platforms and websites specifically designed to support patients with depression to further expand the scope of our model using; we apply the deep learning method that can use those measures for the detection of individuals who are suffering from depression; we propose a novel computational framework that can improve the accuracy for automatic depression detection.</p>
</sec>
</sec>
<sec sec-type="methods" id="sec10">
<label>3</label>
<title>Methods</title>
<sec id="sec11">
<label>3.1</label>
<title>Overview</title>
<p>We enhance the TextGCN (<xref ref-type="bibr" rid="ref33">Liang et al., 2018</xref>) by incorporating emotion representation to investigate the trend of depression detection in social media. At its core, TextGCN posits that a text corpus can be effectively represented as a graph, <inline-formula>
<mml:math id="M1">
<mml:mi>G</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>E</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M2">
<mml:mi>V</mml:mi>
</mml:math>
</inline-formula> comprises a set of word and document nodes. The edges <inline-formula>
<mml:math id="M3">
<mml:mi>E</mml:mi>
</mml:math>
</inline-formula> consist of word-word connections <inline-formula>
<mml:math id="M4">
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula>, word-document relationships <inline-formula>
<mml:math id="M5">
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula>, and document-document connections <inline-formula>
<mml:math id="M6">
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula>. <inline-formula>
<mml:math id="M7">
<mml:mi>A</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mi>R</mml:mi>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#x00D7;</mml:mo>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the graph adjacency matrix, representing the weights between nodes in the graph. <xref ref-type="fig" rid="fig1">Figure 1</xref> illustrates the architecture of our depression detection classification model enhanced by emotion representation.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Overview of our model&#x2019;s architecture.</p>
</caption>
<graphic xlink:href="fpsyg-16-1612769-g001.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Diagram of a mental health detection model using language processing and TextGCN. At the top, mental health materials and social media posts fine-tune a pre-trained language model (PLM). The fine-tuned PLM processes input text, generating initial word/document representations. A post-text example, "Depressed and lonely&#x2026;," is shown. TextGCN processes these representations through hidden layers, resulting in word document graphs and final word document representation. Connections between document and word nodes, labeled with PMI, TF-IDF, and Jaccard, are visualized. The output classifies texts into "Depression" or "Non-Depression."</alt-text>
</graphic>
</fig>
<p>To inform our model, we leverage pre-trained models that have been domain-adapted or fine-tuned using mental health materials or other relevant information to generate emotion embeddings.</p>
<p>We construct a text graph, where nodes represent words and documents in the corpus, and edges represent their interconnections. Our fine-tuned pre-trained language model generates emotion representations as initial weights for the TextGCN model. The constructed graph is then passed through two layers of Graph Convolutional Networks (GCN) for depression detection tasks.</p>
</sec>
<sec id="sec12">
<label>3.2</label>
<title>TextGCN</title>
<sec id="sec13">
<label>3.2.1</label>
<title>Node-edge construction</title>
<sec id="sec14">
<label>3.2.1.1</label>
<title>Node construction</title>
<p>The node set <inline-formula>
<mml:math id="M8">
<mml:mi>V</mml:mi>
</mml:math>
</inline-formula> comprises all documents and unique words within the corpus. Assuming there are <inline-formula>
<mml:math id="M9">
<mml:mi>D</mml:mi>
</mml:math>
</inline-formula> documents and <inline-formula>
<mml:math id="M10">
<mml:mi>M</mml:mi>
</mml:math>
</inline-formula> unique words in the corpus, the total number of nodes is <inline-formula>
<mml:math id="M11">
<mml:mo>&#x2223;</mml:mo>
<mml:mi>V</mml:mi>
<mml:mo>&#x2223;</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi>n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mi>D</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>M</mml:mi>
</mml:math>
</inline-formula>. Building upon the work of <xref ref-type="bibr" rid="ref22">Han et al. (2022)</xref>, we leverage pre-trained embeddings to enhance the TextGCN model. For example, we could utilize BERT to generate the initial node representation, where the learned vector for the [CLS] token serves as the basis for initializing each document node. Each document <inline-formula>
<mml:math id="M12">
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is processed through the BERT, yielding a sequence representation <inline-formula>
<mml:math id="M13">
<mml:msub>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:msub>
</mml:math>
</inline-formula> for that document. For example, a document <inline-formula>
<mml:math id="M14">
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> such as &#x201C;Sunny feels happy&#x201D; results in <inline-formula>
<mml:math id="M15">
<mml:msub>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:msub>
</mml:math>
</inline-formula> as <inline-formula>
<mml:math id="M16">
<mml:msubsup>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="true">[</mml:mo>
<mml:mi>CLS</mml:mi>
<mml:mo stretchy="true">]</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mtext>Sunnny</mml:mtext>
</mml:msubsup>
<mml:msubsup>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mtext>feels</mml:mtext>
</mml:msubsup>
<mml:msubsup>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mtext>happy</mml:mtext>
</mml:msubsup>
<mml:msubsup>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="true">[</mml:mo>
<mml:mi>SEP</mml:mi>
<mml:mo stretchy="true">]</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> after representation learning.</p>
<p>We utilize the [CLS] representation <inline-formula>
<mml:math id="M17">
<mml:msubsup>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:mrow>
<mml:mo stretchy="true">[</mml:mo>
<mml:mi>CLS</mml:mi>
<mml:mo stretchy="true">]</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula> as the node embedding for <inline-formula>
<mml:math id="M18">
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>. Next, we construct the document context by aggregating all documents containing the word <inline-formula>
<mml:math id="M19">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>, represented as <inline-formula>
<mml:math id="M20">
<mml:msub>
<mml:mi>D</mml:mi>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msub>
</mml:math>
</inline-formula>. Subsequently, a min-pooling operation is applied to all BERT representations <inline-formula>
<mml:math id="M21">
<mml:msubsup>
<mml:mi>B</mml:mi>
<mml:msub>
<mml:mi>D</mml:mi>
<mml:mi>d</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:msubsup>
</mml:math>
</inline-formula> of the word <inline-formula>
<mml:math id="M22">
<mml:msub>
<mml:mi>W</mml:mi>
<mml:mi>m</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> across the document collection.</p>
</sec>
<sec id="sec15">
<label>3.2.1.2</label>
<title>Edge construction</title>
<p>Inspired by the work of <xref ref-type="bibr" rid="ref22">Han et al. (2022)</xref>, we utilize the all co-occurring relations between every two types of nodes.</p>
<p>The set of edges is <inline-formula>
<mml:math id="M23">
<mml:mi>E</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="true">}</mml:mo>
</mml:math>
</inline-formula>, where <inline-formula>
<mml:math id="M24">
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> represents Pointwise Mutual Information, (PMI), where <inline-formula>
<mml:math id="M25">
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>w</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> represents Term Frequency-Inverse Document Frequency (TF-IDF), and <inline-formula>
<mml:math id="M26">
<mml:msub>
<mml:mi>E</mml:mi>
<mml:mrow>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>i</mml:mi>
</mml:msub>
<mml:msub>
<mml:mi>d</mml:mi>
<mml:mi>j</mml:mi>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> represents Jaccard similarity. Finally, we get the graph adjacency matrix A as follows.<disp-formula id="E1">
<mml:math id="M27">
<mml:msub>
<mml:mi>A</mml:mi>
<mml:mi mathvariant="italic">ij</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo stretchy="true">{</mml:mo>
<mml:mtable equalrows="true" equalcolumns="true" displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mi>PMI</mml:mi>
<mml:mi mathvariant="italic">ij</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mspace width="0.33em"/>
<mml:mi>are</mml:mi>
<mml:mspace width="0.33em"/>
<mml:mtext>words</mml:mtext>
<mml:mo>;</mml:mo>
<mml:mi>PMI</mml:mi>
<mml:mo>&#x003E;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mi>TF</mml:mi>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mi>IDF</mml:mi>
<mml:mi mathvariant="italic">ij</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mspace width="0.33em"/>
<mml:mtext>is word</mml:mtext>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mspace width="0.33em"/>
<mml:mtext>is document</mml:mtext>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mtext>Jaccard</mml:mtext>
<mml:mi mathvariant="italic">ij</mml:mi>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
<mml:mspace width="0.33em"/>
<mml:mi>are</mml:mi>
<mml:mspace width="0.33em"/>
<mml:mtext>documents</mml:mtext>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mtext>otherwise</mml:mtext>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:math>
</disp-formula></p>
</sec>
</sec>
<sec id="sec16">
<label>3.2.2</label>
<title>GCN for text classification</title>
<p>The central concept of GCN (<xref ref-type="bibr" rid="ref48">Shin et al., 2022</xref>) is to consolidate information from a node&#x2019;s neighboring nodes via graph convolution operations. Each layer within the GCN disseminates information according to the following formula (<xref ref-type="disp-formula" rid="EQ1">Equation 1</xref>):</p><disp-formula id="EQ1">
<label>(1)</label>
<mml:math id="M28">
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mi>f</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi>&#x03C3;</mml:mi>
<mml:mo stretchy="true">(</mml:mo>
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</disp-formula><p>where <inline-formula>
<mml:math id="M29">
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the node feature matrix at the <inline-formula>
<mml:math id="M30">
<mml:mi>l</mml:mi>
</mml:math>
</inline-formula>-th layer, and <inline-formula>
<mml:math id="M31">
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the initial node feature matrix. <inline-formula>
<mml:math id="M32">
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:msup>
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:msup>
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mn>1</mml:mn>
<mml:mn>2</mml:mn>
</mml:mfrac>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the normalized symmetric adjacency matrix, where <inline-formula>
<mml:math id="M33">
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mo>=</mml:mo>
<mml:mi>A</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>I</mml:mi>
</mml:math>
</inline-formula>, and <inline-formula>
<mml:math id="M34">
<mml:mi>I</mml:mi>
</mml:math>
</inline-formula> is the identity matrix used to add self-connections. <inline-formula>
<mml:math id="M35">
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
</mml:math>
</inline-formula> is the degree matrix, where <inline-formula>
<mml:math id="M36">
<mml:msub>
<mml:mover accent="true">
<mml:mi>D</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mi mathvariant="italic">ii</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mo>&#x2211;</mml:mo>
<mml:mi>j</mml:mi>
</mml:msub>
<mml:msub>
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">&#x02DC;</mml:mo>
</mml:mover>
<mml:mi mathvariant="italic">ij</mml:mi>
</mml:msub>
</mml:math>
</inline-formula>. <inline-formula>
<mml:math id="M37">
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mi>l</mml:mi>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> is the trainable weight matrix at the <inline-formula>
<mml:math id="M38">
<mml:mi>l</mml:mi>
</mml:math>
</inline-formula>-th layer. <inline-formula>
<mml:math id="M39">
<mml:mi>&#x03C3;</mml:mi>
</mml:math>
</inline-formula> is a non-linear activation function, typically ReLU.</p>
<p>We evaluate our models through a depression detection task. This approach offers a more straightforward and feasible implementation compared to alternative methods. We utilize a diverse set of initial word-document representations, including one-hot encoding, three large language models (two fundamental and three pre-trained), and three models specifically designed for mental health classification and depression detection. To construct co-occurrence information, we leverage pre-defined methods. Following the construction of the text graph, we apply a two-layer Graph Convolutional Network (GCN) architecture. The output of the first layer is processed through a Rectified Linear Unit (ReLU) activation function, while the output of the second layer is routed through a softmax function to facilitate classification. The specific formulas are as follows (<xref ref-type="disp-formula" rid="EQ2">Equations 2</xref>, <xref ref-type="disp-formula" rid="EQ3">3</xref>):</p><disp-formula id="EQ2">
<label>(2)</label>
<mml:math id="M40">
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mtext>ReLU</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</disp-formula><disp-formula id="EQ3">
<label>(3)</label>
<mml:math id="M41">
<mml:mi>Z</mml:mi>
<mml:mo>=</mml:mo>
<mml:mtext>softmax</mml:mtext>
<mml:mo stretchy="true">(</mml:mo>
<mml:mover accent="true">
<mml:mi>A</mml:mi>
<mml:mo stretchy="true">&#x0302;</mml:mo>
</mml:mover>
<mml:msup>
<mml:mi>H</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mi>W</mml:mi>
<mml:mrow>
<mml:mo stretchy="true">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo stretchy="true">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="true">)</mml:mo>
</mml:math>
</disp-formula>
<p>The final classification loss function is the cross-entropy loss (<xref ref-type="disp-formula" rid="EQ4">Equation 4</xref>):</p><disp-formula id="EQ4">
<label>(4)</label>
<mml:math id="M42">
<mml:mi>L</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo>&#x2212;</mml:mo>
<mml:munder>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>d</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>D</mml:mi>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:munderover>
<mml:mo movablelimits="false">&#x2211;</mml:mo>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mi>F</mml:mi>
</mml:munderover>
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi mathvariant="italic">df</mml:mi>
</mml:msub>
<mml:mtext>In</mml:mtext>
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi mathvariant="italic">df</mml:mi>
</mml:msub>
</mml:math>
</disp-formula><p>where <inline-formula>
<mml:math id="M43">
<mml:msub>
<mml:mi>y</mml:mi>
<mml:mi>D</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the set of labeled document nodes, whose documents were collected from social media. <inline-formula>
<mml:math id="M44">
<mml:mi>F</mml:mi>
</mml:math>
</inline-formula> is the output feature dimension which is equal to 2 in depression detection, <inline-formula>
<mml:math id="M45">
<mml:mi>Y</mml:mi>
</mml:math>
</inline-formula> is the label indicator matrix, <inline-formula>
<mml:math id="M46">
<mml:msub>
<mml:mi>Y</mml:mi>
<mml:mi mathvariant="italic">df</mml:mi>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:math>
</inline-formula> indicates that document <inline-formula>
<mml:math id="M47">
<mml:mi>d</mml:mi>
</mml:math>
</inline-formula> belongs to category <inline-formula>
<mml:math id="M48">
<mml:mi>f</mml:mi>
</mml:math>
</inline-formula>. <inline-formula>
<mml:math id="M49">
<mml:msub>
<mml:mi>Z</mml:mi>
<mml:mi mathvariant="italic">df</mml:mi>
</mml:msub>
</mml:math>
</inline-formula> is the probability output by GCN that document <inline-formula>
<mml:math id="M50">
<mml:mi>d</mml:mi>
</mml:math>
</inline-formula> belongs to category <inline-formula>
<mml:math id="M51">
<mml:mi>f</mml:mi>
</mml:math>
</inline-formula>. Our framework is designed to ultimately yield superior results in tasks related to depression detection.</p>
</sec>
</sec>
<sec id="sec17">
<label>3.3</label>
<title>Emotion representation</title>
<p>MentalBERT and MentalRoBERTa are pre-trained language models tailored variants of the BERT and RoBERTa architectures, respectively, specifically designed for processing mental health-related text data. <xref ref-type="bibr" rid="ref27">Ji et al. (2021)</xref> employed a domain-adaptive pretraining approach, wherein they continued to fine-tune the models on a mental health corpus after initializing them with general pre-trained weights to adapt the models to the distinctive characteristics of mental health-related text. The mental health corpus utilized in this study comprises 13,671,785 sentences extracted from mental health-related posts on Reddit, sourced from various subreddits, such as &#x201C;r/depression,&#x201D; &#x201C;r/SuicideWatch,&#x201D; and &#x201C;r/Anxiety.&#x201D; The pre-training process was conducted using Huggingface&#x2019;s Transformers library, with the Masked Language Modeling (MLM) task employed, consistent with BERT and RoBERTa, where MentalBERT utilized static masking, whereas MentalRoBERTa employed dynamic masking. The experimental results demonstrate that MentalBERT and MentalRoBERTa outperform baseline models across a range of mental health detection tasks, with notable superiority in depression detection.</p>
<p>Additionally, RoBERTaDepressionDetection (<xref ref-type="bibr" rid="ref25">Hugging Face, 2025</xref>) is a fine-tuned variant of the twitter-roberta-base model, specifically designed for depression detection tasks. This model is initialized with the twitter-roberta-base model, a RoBERTa-base variant that has undergone extensive pre-training on approximately 58 million tweets. The model was further refined through fine-tuning by the developer, who utilized a corpus from the LT-EDI 2022 shared task, a dataset aimed at detecting depression through social media text analysis. In accordance with the task&#x2019;s requirements, the model was trained to categorize text into three distinct classes: moderate depression, severe depression, and non-depression. The training dataset comprises 53,909 sentences, with 7,884 sentences classified as non-depression, 36,114 sentences categorized as moderate depression, and 9,911 sentences labeled as severe depression. The average document length for each class is 4, 6, and 11 sentences, respectively, with corresponding average sentence lengths of 78, 100, and 140 words. During the fine-tuning process, the developer employed a Multilayer Perceptron (MLP) as the classifier and utilized the Adam optimizer.</p>
<p>Compared to baseline models, MentalBERT, MentalRoBERTa, and RoBERTaDepressionDetection significant advantages in generating emotion representations. These models achieve superior performance in capturing domain-specific semantic features through domain-adaptive pretraining or task-specific fine-tuning, thereby enhancing their accuracy. MentalBERT and MentalRoBERTa demonstrate particular strength in producing embeddings that effectively represent domain-specific terminology, emotional expressions, and contextual subtleties. In contrast, RoBERTaDepressionDetection excels in decoding depression-related linguistic patterns, such as mood fluctuations and symptom descriptions, present in social media texts. The domain-specific training employed in these models enables them to adapt more robustly to nuanced mental health expressions, including self-negation and anxiety, as well as common noise patterns found in social media texts, such as abbreviations and informal syntax. Furthermore, the emotion representations generated by these models can be directly applied to downstream tasks with reduced fine-tuning requirements, and have been shown to outperform general models in mental health detection tasks.</p>
</sec>
<sec id="sec18">
<label>3.4</label>
<title>Depression detection classification</title>
<p>We conduct an evaluation of the enhanced TextGCN model, which leverages emotion representation learned from fine-tuned pre-trained language models to perform depression detection in a post-based framework. This approach allows for the benefits of post-based classification to be realized without the necessity of incorporating social media components, such as historical posts. By constructing a graph of words and documents, we then pass it through a two-layer Graph Convolutional Network (GCN) designed for depression detection. Utilizing categorical cross-entropy as the loss function, we employ single-label classification to achieve accurate depression detection outcomes.</p>
</sec>
</sec>
<sec sec-type="results" id="sec19">
<label>4</label>
<title>Results</title>
<sec id="sec20">
<label>4.1</label>
<title>Datasets</title>
<p>We employed a diverse set of publicly available datasets to assess the efficacy of our model. The statistics and distribution of categories for each dataset are summarized in <xref ref-type="table" rid="tab1">Table 1</xref>, while <xref ref-type="fig" rid="fig2">Figure 2</xref> provides a visual representation of the categorical distribution.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>Dataset statistics.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Dataset No.</th>
<th align="center" valign="top">Original dataset name</th>
<th align="center" valign="top">Source</th>
<th align="center" valign="top">Number of posts</th>
<th align="center" valign="top">Positive/Negative</th>
<th align="center" valign="top">Average text length</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Dataset 1</td>
<td align="center" valign="middle">The Twitter depression dataset</td>
<td align="center" valign="middle">Twitter</td>
<td align="char" valign="middle" char=",">3,200</td>
<td align="char" valign="middle" char="/">843/2,357</td>
<td align="center" valign="middle">96</td>
</tr>
<tr>
<td align="left" valign="middle">Dataset 2</td>
<td align="center" valign="middle">The multiple languages twitter datasets (English-language)</td>
<td align="center" valign="middle">Twitter</td>
<td align="char" valign="middle" char=",">1,000</td>
<td align="char" valign="middle" char="/">500/500</td>
<td align="center" valign="middle">133</td>
</tr>
<tr>
<td align="left" valign="middle">Dataset 3</td>
<td align="center" valign="middle">The identifying-depression datasets</td>
<td align="center" valign="middle">Web Forums</td>
<td align="char" valign="middle" char=",">1,323</td>
<td align="char" valign="middle" char="/">390/933</td>
<td align="center" valign="middle">1,101</td>
</tr>
<tr>
<td align="left" valign="middle">Dataset 4</td>
<td align="center" valign="middle">The identifying-depression datasets</td>
<td align="center" valign="middle">Reddit</td>
<td align="char" valign="middle" char=",">1841</td>
<td align="char" valign="middle" char="/">1293/548</td>
<td align="center" valign="middle">1,123</td>
</tr>
<tr>
<td align="left" valign="middle">Dataset 5</td>
<td align="center" valign="middle">The identifying-depression datasets</td>
<td align="center" valign="middle">Reddit &#x0026; Web Forums</td>
<td align="char" valign="middle" char=",">2,821</td>
<td align="char" valign="middle" char="/">1340/1,481</td>
<td align="center" valign="middle">1,133</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Class distribution for each dataset.</p>
</caption>
<graphic xlink:href="fpsyg-16-1612769-g002.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bar chart comparing percentages of depression and non-depression across five datasets. Dataset 1: depression 26.34%, non-depression 73.66%. Dataset 2: both 50%. Dataset 3: depression 29.48%, non-depression 70.52%. Dataset 4: depression 29.77%, non-depression 70.23%. Dataset 5: depression 47.5%, non-depression 52.5%.</alt-text>
</graphic>
</fig>
<p>The Twitter depression dataset (<xref ref-type="bibr" rid="ref37">MacAvaney et al., 2021</xref>), which served as the basis for the practice dataset for the CLPsych 2021 competition, was constructed by collecting tweets related to depression, removing hashtags, and annotating them using binary classes (depression, D, 26.34%, and non-depression, ND, 73.66%). We denoted this dataset as &#x201C;Dataset 1&#x201D; and the majority of the dataset consists of brief, emotive posts with emojis.</p>
<p>In addition to the Twitter depression dataset, we also utilized the multiple languages Twitter datasets proposed by <xref ref-type="bibr" rid="ref11">Cha et al. (2022)</xref>. These datasets comprise 921&#x202F;k tweets from Korean users, 10&#x202F;M tweets from English users, and 15&#x202F;M tweets from Japanese users. Each language-specific dataset was labeled using a depression lexicon collected from previous studies on detecting depression on social media, with depression and non-depression categories assigned a binary classification (1 for depression, 0 for non-depression). In this study, we focused on the English-language dataset, which primarily consists of brief posts with emojis. This was denoted as &#x201C;Dataset 2&#x201D;.</p>
<p>Furthermore, we drew upon the Identifying-Depression Datasets collected by <xref ref-type="bibr" rid="ref44">Pirina and &#x00C7;&#x00F6;ltekin (2024)</xref>, which aggregated social media data from Reddit and web forums to identify depression. We annotated each post according to the labeling provided by the authors. The datasets were divided into three sub-datasets, denoted as &#x201C;Dataset 3,&#x201D; &#x201C;Dataset 4,&#x201D; and&#x201D; &#x201C;Dataset 5&#x201D;. The &#x201C;Dataset 3&#x201D; and &#x201C;Dataset 4&#x201D; consisted of long posts with emojis, sourced from web forums and Reddit, respectively. The &#x201C;Dataset 5&#x201D; combined content from both sources, providing a comprehensive dataset for depression detection.</p>
</sec>
<sec id="sec21">
<label>4.2</label>
<title>Evaluation metrics</title>
<p>To assess the overal performance of our depression detection models, we employ four widely used evaluation metrics: Accuracy, Precision, Recall, and class <italic>F</italic><sub>1</sub>-score. These metrics provide a comprehensive evaluation of the models&#x2019; ability to correctly identify depressive cases.</p>
<p>Accuracy is a fundamental metric that quantified as the proportion of correctly predicted instances over the total number of instances. It is defined as follows (<xref ref-type="disp-formula" rid="EQ5">Equation 5</xref>):</p><disp-formula id="EQ5">
<label>(5)</label>
<mml:math id="M52">
<mml:mtext>Accuracy</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>TN</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>TN</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FN</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula><p>where TP (true positive) represents the count of depressed users who are accurately identified. TN (true negative) signifies the count of non-depressed users who are accurately identified. FP (false positive) indicates the count of non-depressed users who are inaccurately identified. FN (false negative) denotes the count of depressed users who are inaccurately identified.</p>
<p>Precision is calculated as the ratio of true positives among all positive predictions. It is defined as follows (<xref ref-type="disp-formula" rid="EQ6">Equation 6</xref>):</p><disp-formula id="EQ6">
<label>(6)</label>
<mml:math id="M53">
<mml:mtext>Precision</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi>TP</mml:mi>
<mml:mrow>
<mml:mi>TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FP</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula>
<p>A high precision indicates that the model produces fewer false positives, which is crucial in clinical applications where wrong detection could lead to unnecessary interventions.</p>
<p>Recall is defined as the proportion of true positives that are correctly identified. It measures the ability of the model to correctly identify all actual depressive cases. It is defined as follows (<xref ref-type="disp-formula" rid="EQ7">Equation 7</xref>):</p><disp-formula id="EQ7">
<label>(7)</label>
<mml:math id="M54">
<mml:mtext>Recall</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mi>TP</mml:mi>
<mml:mrow>
<mml:mi>TP</mml:mi>
<mml:mo>+</mml:mo>
<mml:mi>FN</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula>
<p>A high recall value ensures that most depressive cases are correctly identified, ensuring that individuals in need receive appropriate attention.</p>
<p>The <italic>F</italic><sub>1</sub>-score, representing the harmonic mean of Precision and Recall, serves as a balanced measure of both, providing a holistic assessment of a model&#x2019;s performance. It is defined as follows (<xref ref-type="disp-formula" rid="EQ8">Equation 8</xref>):</p><disp-formula id="EQ8">
<label>(8)</label>
<mml:math id="M55">
<mml:msub>
<mml:mi>F</mml:mi>
<mml:mn>1</mml:mn>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mtext>Score</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#x00D7;</mml:mo>
<mml:mtext>Precision</mml:mtext>
<mml:mo>&#x00D7;</mml:mo>
<mml:mtext>Recall</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mtext>Precision</mml:mtext>
<mml:mo>+</mml:mo>
<mml:mtext>Recall</mml:mtext>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula>
<p>A higher <italic>F</italic><sub>1</sub>-score reflects a better trade-off between Precision and Recall, making it a valuable metric for evaluating depression detection models.</p>
</sec>
<sec id="sec22">
<label>4.3</label>
<title>Experiment setup</title>
<p>We propose an advanced TextGCN framework that leverages diverse emotion representations, derived from pre-trained language models, as input. To substantiate its efficacy, we conduct a comprehensive evaluation of its performance across five publicly available datasets in the depression domain, in comparison to the basic TextGCN with one-hot encoding. <xref ref-type="fig" rid="fig3">Figure 3</xref> shows the diagram of this experiment.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>The diagram of this experiment.</p>
</caption>
<graphic xlink:href="fpsyg-16-1612769-g003.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Flowchart depicting a machine learning process for depression detection. It consists of four sections: Data Collection, Data Preprocessing, Model Training, and Experimental Operation. Data Collection includes three GitHub sources for datasets. Data Preprocessing details selecting and merging datasets with labels. Model Training outlines steps like building a text graph, generating embeddings, and configuring training parameters. Experimental Operation covers evaluation metrics, baseline comparison, cross-dataset analysis, and result visualization. Arrows indicate the process flow between sections.</alt-text>
</graphic>
</fig>
<p>In our experiments, we assess the performance of our proposed system in relation to five pre-trained language models (PLMs) that utilize transformation-based approaches. The BERT model (<xref ref-type="bibr" rid="ref14">Devlin et al., 2018</xref>) is a transformer-based pre-trained language representation model that has achieved state-of-the-art results on numerous NLP tasks through its next sentence prediction and masked language modeling capabilities. RoBERTa (<xref ref-type="bibr" rid="ref36">Liu et al., 2019</xref>), a variant of BERT, builds upon the same training setup but incorporates dynamic masking, expands the training dataset, and increases the batch size to enhance its performance. In contrast, MentalBERT, MentalRoBERTa, and RoBETaDepressionDetection are domain-specific pre-trained language models, tailored to the depression detection task, and are built upon either BERT or RoBERTa, respectively. These models have demonstrated improved performance in mental health detection tasks and depression detection tasks through fine-tuning on mental health-related data.</p>
<p>We employed a 80:20 train/test split to train our model, with a training duration of 200 epochs and an early stopping criterion of 10 epochs, utilizing the Adam optimizer for training. To further ensure the robustness and generalizability of the proposed method, we also conducted K-fold cross-validation, where the dataset was partitioned into K subsets and each subset was used as the test set exactly once. We specified the following hyperparameters: a hidden dimension size of 200, a dropout rate of 0.5, a learning rate of 0.02, and a configuration of two GCN layers. Hyperparameter tuning was conducted using Optuna, a hyperparameter optimization framework. The optimized hyperparameters included the number of hidden layers (L&#x202F;=&#x202F;{2, 3, 4, 5}), hidden layer dimensions (H&#x202F;=&#x202F;{100, 200, 300, 400, 500}), dropout rates (dr&#x202F;=&#x202F;{0.01, 0.05, 0.1, 0.5}), learning rates (lr&#x202F;=&#x202F;{0.01, 0.02, 0.03, 0.04, 0.05}), and weight decay values (wd&#x202F;=&#x202F;{0, 0.005, 0.05}). We acknowledge the potential for information leakage resulting from the inclusion of test nodes in the graph during training. This issue may inadvertently introduce indirect supervision signals and affect the validity of performance evaluation. In future work, we plan to explore alternative graph construction strategies or transductive-to-inductive adaptations to mitigate such leakage and ensure a more rigorous experimental setup.</p>
</sec>
<sec id="sec23">
<label>4.4</label>
<title>Experiment results</title>
<p>Our model evaluation protocol involves a depression detection task, which serves as a benchmark for assessing the performance of our proposed approach. <xref ref-type="table" rid="tab2">Table 2</xref> provides a comparative analysis of our model&#x2019;s results with the initial TextGCN model, which relies solely on a one-hot vector input without incorporating pre-trained emotion representations or external knowledge. For each evaluation metric (Accuracy, Precision, Recall, <italic>F</italic><sub>1</sub>-Score), <italic>p</italic>-values were calculated by paired t-tests comparing each optimized model to the baseline (One Hot). Significant improvements are highlighted where <italic>p</italic>&#x202F;&#x003C;&#x202F;0.05.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>The results of the enhanced TextGCN with emotion representation learned from pre-trained language models.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Dataset No.</th>
<th align="center" valign="top">Models</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">Precision</th>
<th align="center" valign="top">Recall</th>
<th align="center" valign="top"><italic>F</italic><sub>1</sub>-score</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle" rowspan="6">Dataset 1</td>
<td align="center" valign="middle">One Hot</td>
<td align="char" valign="middle" char=".">0.7878</td>
<td align="char" valign="middle" char=".">0.6947</td>
<td align="char" valign="middle" char=".">0.3704</td>
<td align="char" valign="middle" char=".">0.4805</td>
</tr>
<tr>
<td align="center" valign="middle">BERT</td>
<td align="char" valign="middle" char=".">0.7929</td>
<td align="char" valign="middle" char=".">0.6485</td>
<td align="char" valign="middle" char=".">0.4913<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.5590<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTa</td>
<td align="char" valign="middle" char=".">0.7932</td>
<td align="char" valign="middle" char=".">0.6254</td>
<td align="char" valign="middle" char=".">0.5653<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.5932<sup>&#x002A;&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">MentalBERT</td>
<td align="char" valign="middle" char=".">0.7834</td>
<td align="char" valign="middle" char=".">0.6115</td>
<td align="char" valign="middle" char=".">0.5195<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.5616<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">MentalRoBERTa</td>
<td align="char" valign="middle" char=".">0.7939</td>
<td align="char" valign="middle" char=".">0.6252</td>
<td align="char" valign="middle" char=".">0.5714<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.5965<sup>&#x002A;&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTaDepressionDetection</td>
<td align="char" valign="middle" char=".">0.7940</td>
<td align="char" valign="middle" char=".">0.6255</td>
<td align="char" valign="middle" char=".">0.5712<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.5966<sup>&#x002A;&#x002A;</sup></td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="6">Dataset 2</td>
<td align="center" valign="middle">One Hot</td>
<td align="char" valign="middle" char=".">0.7727</td>
<td align="char" valign="middle" char=".">0.7755</td>
<td align="char" valign="middle" char=".">0.7747</td>
<td align="char" valign="middle" char=".">0.7747</td>
</tr>
<tr>
<td align="center" valign="middle">BERT</td>
<td align="char" valign="middle" char=".">0.7835</td>
<td align="char" valign="middle" char=".">0.7973</td>
<td align="char" valign="middle" char=".">0.7684</td>
<td align="char" valign="middle" char=".">0.7811</td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTa</td>
<td align="char" valign="middle" char=".">0.7876<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.7929</td>
<td align="char" valign="middle" char=".">0.7884<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.7891<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">MentalBERT</td>
<td align="char" valign="middle" char=".">0.7872<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.7987</td>
<td align="char" valign="middle" char=".">0.7763</td>
<td align="char" valign="middle" char=".">0.7857</td>
</tr>
<tr>
<td align="center" valign="middle">MentalRoBERTa</td>
<td align="char" valign="middle" char=".">0.7896<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.7910</td>
<td align="char" valign="middle" char=".">0.7960<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.7922<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTaDepressionDetection</td>
<td align="char" valign="middle" char=".">0.7869<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.7791</td>
<td align="char" valign="middle" char=".">0.8074<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.7927<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="6">Dataset 3</td>
<td align="center" valign="middle">One Hot</td>
<td align="char" valign="middle" char=".">0.8429</td>
<td align="char" valign="middle" char=".">0.6725</td>
<td align="char" valign="middle" char=".">0.8845</td>
<td align="char" valign="middle" char=".">0.7638</td>
</tr>
<tr>
<td align="center" valign="middle">BERT</td>
<td align="char" valign="middle" char=".">0.8119</td>
<td align="char" valign="middle" char=".">0.8205<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9229<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.8685<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTa</td>
<td align="char" valign="middle" char=".">0.8596<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.8836<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9118<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.8974<sup>&#x002A;&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">MentalBERT</td>
<td align="char" valign="middle" char=".">0.8279</td>
<td align="char" valign="middle" char=".">0.8369<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9258<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.8788<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">MentalRoBERTa</td>
<td align="char" valign="middle" char=".">0.9020<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9162<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9407<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9281<sup>&#x002A;&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTaDepressionDetection</td>
<td align="char" valign="middle" char=".">0.8958<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9037<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9465<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9245<sup>&#x002A;&#x002A;</sup></td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="6">Dataset 4</td>
<td align="center" valign="middle">One Hot</td>
<td align="char" valign="middle" char=".">0.7957</td>
<td align="char" valign="middle" char=".">0.9223</td>
<td align="char" valign="middle" char=".">0.7851</td>
<td align="char" valign="middle" char=".">0.8479</td>
</tr>
<tr>
<td align="center" valign="middle">BERT</td>
<td align="char" valign="middle" char=".">0.8328<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9449</td>
<td align="char" valign="middle" char=".">0.8177<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.8762<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTa</td>
<td align="char" valign="middle" char=".">0.8402<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9528</td>
<td align="char" valign="middle" char=".">0.8210<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.8813<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">MentalBERT</td>
<td align="char" valign="middle" char=".">0.8434<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.8832</td>
<td align="char" valign="middle" char=".">0.9041<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.8935<sup>&#x002A;&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">MentalRoBERTa</td>
<td align="char" valign="middle" char=".">0.8676<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9496</td>
<td align="char" valign="middle" char=".">0.8638<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9043<sup>&#x002A;&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTaDepressionDetection</td>
<td align="char" valign="middle" char=".">0.8654<sup>&#x002A;&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9437</td>
<td align="char" valign="middle" char=".">0.8646<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9024<sup>&#x002A;&#x002A;</sup></td>
</tr>
<tr>
<td align="left" valign="middle" rowspan="6">Dataset 5</td>
<td align="center" valign="middle">One Hot</td>
<td align="char" valign="middle" char=".">0.8986</td>
<td align="char" valign="middle" char=".">0.9462</td>
<td align="char" valign="middle" char=".">0.9172</td>
<td align="char" valign="middle" char=".">0.9314</td>
</tr>
<tr>
<td align="center" valign="middle">BERT</td>
<td align="char" valign="middle" char=".">0.7854</td>
<td align="char" valign="middle" char=".">0.8743</td>
<td align="char" valign="middle" char=".">0.8329</td>
<td align="char" valign="middle" char=".">0.8530</td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTa</td>
<td align="char" valign="middle" char=".">0.8532</td>
<td align="char" valign="middle" char=".">0.9233</td>
<td align="char" valign="middle" char=".">0.8766</td>
<td align="char" valign="middle" char=".">0.8992</td>
</tr>
<tr>
<td align="center" valign="middle">MentalBERT</td>
<td align="char" valign="middle" char=".">0.8252</td>
<td align="char" valign="middle" char=".">0.9238</td>
<td align="char" valign="middle" char=".">0.8355</td>
<td align="char" valign="middle" char=".">0.8772</td>
</tr>
<tr>
<td align="center" valign="middle">MentalRoBERTa</td>
<td align="char" valign="middle" char=".">0.9104<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9593<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9193</td>
<td align="char" valign="middle" char=".">0.9388<sup>&#x002A;</sup></td>
</tr>
<tr>
<td align="center" valign="middle">RoBERTaDepressionDetection</td>
<td align="char" valign="middle" char=".">0.9096<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9578<sup>&#x002A;</sup></td>
<td align="char" valign="middle" char=".">0.9197</td>
<td align="char" valign="middle" char=".">0.9382<sup>&#x002A;</sup></td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Statistical tests were conducted using paired <italic>t</italic>-tests between each model and the baseline. Statistically significant improvements were denoted by <sup>&#x002A;</sup><italic>p</italic>&#x202F;&#x003C;&#x202F;0.05, <sup>&#x002A;&#x002A;</sup><italic>p</italic>&#x202F;&#x003C;&#x202F;0.01, and <sup>&#x002A;&#x002A;&#x002A;</sup><italic>p</italic>&#x202F;&#x003C;&#x202F;0.001.</p>
</table-wrap-foot>
</table-wrap>
<p>Firstly, pre-trained models have demonstrated superior performance compared to One-Hot encoding across various tasks, particularly in terms of recall and <italic>F</italic><sub>1</sub>-score metrics. In dataset 1, the RobertaDepressionDetection model achieved a statistically significant improvement of 24.162% in <italic>F</italic><sub>1</sub>-score and 0.787% in accuracy over One-Hot encoding, showcasing its enhanced semantic capture capabilities. Similarly, in dataset 3, the MentalRoBERTa model outperformed One-Hot encoding by 21.511% in <italic>F</italic><sub>1</sub>-score and 7.012% in accuracy, further substantiating its superiority in domain adaptation. In dataset 5, pre-trained models experienced some decline in performance, but MentalRoBERTa retained marginal gains, indicating its robustness in handling diverse datasets.</p>
<p>Secondly, Roberta has been found to outperform BERT in most tasks, with notable improvements in <italic>F</italic><sub>1</sub>-score and accuracy. In dataset 3, RoBERTa surpassed BERT by 3.328% in <italic>F</italic><sub>1</sub>-score and 5.875% in accuracy, highlighting its enhanced performance in specific tasks. Furthermore, in dataset 5, RoBERTa achieved a 8.6325% higher accuracy and a 5.416% higher <italic>F</italic><sub>1</sub>-score compared to BERT, validating its superiority across various datasets.</p>
<p>Thirdly, the domain-adaptive pre-trained models (MentalBERT and MentalRoBERTa) and the depression detection fine-tuned model (RobertaDepressionDetection) have demonstrated enhanced performance compared to their base counterparts (BERT/RoBERTa) in specific tasks.</p>
<p>MentalBERT has consistently outperformed the basic BERT model in mental health-related tasks, as evident in dataset 5, where it achieved a notable 5.068% accuracy boost and a 2.837% <italic>F</italic><sub>1</sub>-score enhancement. However, in dataset 1, MentalBERT&#x2019;s accuracy was lower than BERT, indicating a trade-off between domain specialization and generalization. Conversely, MentalRoBERTa has consistently outperformed RoBERTa, as seen in dataset 3, where it achieved a 4.933% accuracy increase and a 3.421% <italic>F</italic><sub>1</sub>-score improvement. Additionally, RoBERTaDepressionDetection outperformed RoBERTa in dataset 3, with a 3.020% higher <italic>F</italic><sub>1</sub>-score, but only a 0.456% increase in accuracy in dataset 2, highlighting the dependence on data quality.</p>
<p>Additionally, as shown in <xref ref-type="table" rid="tab2">Table 2</xref>, the optimized models consistently outperformed the baseline across all metrics. Statistically significant improvements were denoted by <sup>&#x002A;</sup><italic>p</italic>&#x202F;&#x003C;&#x202F;0.05, <sup>&#x002A;&#x002A;</sup><italic>p</italic>&#x202F;&#x003C;&#x202F;0.01, and <sup>&#x002A;&#x002A;&#x002A;</sup><italic>p</italic>&#x202F;&#x003C;&#x202F;0.001.</p>
</sec>
</sec>
<sec sec-type="discussion" id="sec24">
<label>5</label>
<title>Discussion</title>
<p>The results of our experiments provide comprehensive insights into the performance of various fine-tuned pre-trained language models across different datasets, highlighting the importance of dataset quality and emotion representation. Our research results underscore the need for a multifaceted approach to optimization. From a dataset perspective, it is essential to consider platform-specific linguistic conventions, emoji semantics, class balance, and other relevant factors in the design phase. In model development, we recommend incorporating cutting-edge architectural innovations, domain adaptation techniques, and hybrid training strategies to optimize emotion representation for enhancing model performance. This analysis synthesizes empirical evidence to inform both technical implementation and data strategy decisions in the context of depression detection research.</p>
<sec id="sec25">
<label>5.1</label>
<title>Impact of text length and contextual richness</title>
<p><xref ref-type="table" rid="tab3">Table 3</xref> provides the text length statistics for the five datasets, whereas <xref ref-type="fig" rid="fig4">Figure 4</xref> visualizes the distribution of text lengths within these datasets. The average text length varies considerably across the datasets. Dataset 1 has a mean length of 96 words with a standard deviation of 53, ranging from 6 to 374 words. Dataset 2 shows a slightly longer average text length of 133 words (SD&#x202F;=&#x202F;85), with lengths ranging from 4 to 327 words. In contrast, Datasets 3 to 5 contain substantially longer texts. Dataset 3 has a mean length of 1,101 words (SD&#x202F;=&#x202F;817), with a minimum of 52 words and a maximum of 5,717. Dataset 4 exhibits a higher variability, with a mean of 1,123 words and a standard deviation of 1,328, spanning from 11 to 17,601 words. Dataset 5 is similar to Dataset 4, with a mean length of 1,133 words (SD&#x202F;=&#x202F;1,148), and the same minimum and maximum text lengths ranging from 11 to 17,601.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Text length statistics for the five datasets.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Dataset No.</th>
<th align="center" valign="top" colspan="4">Text length statistics</th>
</tr>
<tr>
<th align="center" valign="top">Mean</th>
<th align="center" valign="top">Std.</th>
<th align="center" valign="top">Min.</th>
<th align="center" valign="top">Max.</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="middle">Dataset 1</td>
<td align="center" valign="middle">96</td>
<td align="center" valign="top">53</td>
<td align="center" valign="top">6</td>
<td align="center" valign="top">374</td>
</tr>
<tr>
<td align="left" valign="middle">Dataset 2</td>
<td align="center" valign="middle">133</td>
<td align="center" valign="top">85</td>
<td align="center" valign="top">4</td>
<td align="center" valign="top">327</td>
</tr>
<tr>
<td align="left" valign="middle">Dataset 3</td>
<td align="center" valign="middle">1,101</td>
<td align="center" valign="top">817</td>
<td align="center" valign="top">52</td>
<td align="center" valign="top">5,717</td>
</tr>
<tr>
<td align="left" valign="middle">Dataset 4</td>
<td align="center" valign="middle">1,123</td>
<td align="center" valign="top">1,328</td>
<td align="center" valign="top">11</td>
<td align="center" valign="top">17,601</td>
</tr>
<tr>
<td align="left" valign="middle">Dataset 5</td>
<td align="center" valign="middle">1,133</td>
<td align="center" valign="top">1,148</td>
<td align="center" valign="top">11</td>
<td align="center" valign="top">17,601</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Distributions of text length across the five datasets. Subplots correspond to: <bold>(a)</bold> Dataset 1; <bold>(b)</bold> Dataset 2; <bold>(c)</bold> Dataset 3; <bold>(d)</bold> Dataset 4; and <bold>(e)</bold> Dataset 5.</p>
</caption>
<graphic xlink:href="fpsyg-16-1612769-g004.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bar charts display text length distributions for five datasets. Each chart shows frequency versus text length in characters. Average lengths are marked with red dashed lines. Charts vary in scale, with Dataset 1 peaking around one hundred, and Datasets 4 and 5 demonstrating more extended but less frequent distributions.</alt-text>
</graphic>
</fig>
<p>These statistics indicate that the datasets differ significantly in text length, from short texts in Datasets 1 and 2 to much longer and more variable texts in Datasets 3 to 5. Consistent with the findings of <xref ref-type="bibr" rid="ref53">Wongkoblap et al. (2021)</xref> models trained on longer posts (e.g., Dataset 3, Dataset 4, and Dataset 5) exhibited superior performance, likely attributable to richer contextual signals. Social media texts often convey nuanced expressions spanning multiple sentences, requiring the preservation of complete contextual integrity. Future research should explore hybrid methods to balance context preservation with computing efficiency.</p>
</sec>
<sec id="sec26">
<label>5.2</label>
<title>Impact of emoticons</title>
<p>In previous studies, emoticons were often regarded as noise and removed during mental health detection or emotion analysis (<xref ref-type="bibr" rid="ref38">Maghraby and Ali, 2022</xref>; <xref ref-type="bibr" rid="ref47">Samee et al., 2023</xref>). In this study, however, we preserve emoticons and emojis by treating them as individual tokens for contextualization. Meanwhile, emoticon recognition technology is emerging. For example, mapping emojis to affect-aware embeddings has shown significant potential for leveraging their affective value (<xref ref-type="bibr" rid="ref12">Chen et al., 2018</xref>). Although this study did not further utilize emojis beyond tokenization, future research could incorporate recognition techniques to better exploit the affective information carried by emoticons in social media data, without introducing additional noise.</p>
</sec>
<sec id="sec27">
<label>5.3</label>
<title>Impact of imbalanced data</title>
<p>Balanced datasets (e.g., <italic>F</italic><sub>1</sub>-score in Dataset2 &#x0026; Dataset5) outperformed skewed distributions, corroborating class imbalance as a critical challenge in mental health detection (<xref ref-type="bibr" rid="ref1">Adel et al., 2024</xref>). Notably, domain-specific models like MentalRoBERTa partially mitigated imbalance effects (Dataset 4 <italic>F</italic><sub>1</sub>-score&#x202F;+&#x202F;2.61%), suggesting that domain-aware feature learning compensates for distributional skewness. <xref ref-type="fig" rid="fig5">Figure 5</xref> illustrates the performance comparison on TextGCN models enhanced with emotion representation learned from five different pre-trained language models.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Performance comparison on TextGCN models enhanced with five different emotion representation.</p>
</caption>
<graphic xlink:href="fpsyg-16-1612769-g005.tif" mimetype="image" mime-subtype="tiff">
<alt-text content-type="machine-generated">Bar chart comparing F1 scores of different models across five datasets. Models are BERT, Roberta, MentalBERT, MentalRoberta, and RobertaDepressionDetection, with varying shades of blue. Dataset 1 scores range from 0.559 to 0.597, Dataset 2 from 0.789 to 0.793, Dataset 3 from 0.869 to 0.924, Dataset 4 from 0.876 to 0.904, and Dataset 5 from 0.853 to 0.939. Highest scores are seen in Dataset 5, particularly for RobertaDepressionDetection.</alt-text>
</graphic>
</fig>
</sec>
<sec id="sec28">
<label>5.4</label>
<title>Architectural advantages of RoBERTa</title>
<p>RoBERTa and RoBERTa-based models outperform BERT and BERT-based models in most tasks. This suggests that RoBERTa&#x2019;s unique features, such as dynamic masking and large-batch training, make it more adaptable to multitasking requirements. This aligns with clinical NLP studies where RoBERTa variants excel at detecting subtle symptom patterns (<xref ref-type="bibr" rid="ref31">Lewis et al., 2020</xref>).</p>
</sec>
<sec id="sec29">
<label>5.5</label>
<title>Domain adaptation vs. task-specific fine-tuning</title>
<p>MentalRoBERTa&#x2019;s success highlights the effect of domain adaptation, whereas RobertaDepressionDetection&#x2019;s data-dependent performance reveals the limitations of task-specific fine-tuning in low-resource settings. A promising direction involves hybrid strategies like staged training, which has achieved state-of-the-art results in biomedical NLP (<xref ref-type="bibr" rid="ref6">Beltagy et al., 2019</xref>).</p>
</sec>
<sec id="sec30">
<label>5.6</label>
<title>Domain specificity and data homology</title>
<p>MentalBERT and MentalRoBERTa exhibit superior performance on datasets comprising Reddit posts, given their pre-training data originates from Reddit, while RobertaDepressionDetection excels on Twitter datasets. The results reflect cross-platform generalization challenges arising from platform-specific linguistic norms, user language and demographic characteristics (<xref ref-type="bibr" rid="ref36">Liu et al., 2019</xref>). Adversarial domain adaptation, as explored in prior work, may mitigate such discrepancies (<xref ref-type="bibr" rid="ref51">Sun et al., 2019</xref>). But all pre-trained models outperform than the foundational ones. This emphasizes the pioneering exploratory value of the proposed framework in the field of early depression detection.</p>
</sec>
</sec>
<sec sec-type="conclusions" id="sec31">
<label>6</label>
<title>Conclusion</title>
<p>Due to the importance of protection of individual privacy, detecting depression through social media posts is a challenging task. Recent advancements in computer technology have enabled the development of novel algorithms for early depression detection, with some research focusing on enhancing contextual representations to improve model performance. Our work introduces a novel approach to text-based depression detection by incorporating pre-trained emotion representations into a graph-based classification model, TextGCN. This model effectively captures the semantic and syntactic relationships between words and documents, and we evaluate its performance on a depression detection task. Our results demonstrate that TextGCN with emotion representations outperforms baseline models across five datasets, with pre-trained models in specific domains showing superior performance compared to their baseline counterparts. This study proposes a novel framework for early depression detection, which has the potential to improve individual well-being and reduce the societal and economic burdens associated with untreated mental disorders.</p>
<p>Although emoticons and emojis were preserved as individual tokens to maintain contextual integrity, they were not incorporated into the embedding-level representations. Future work could explore mapping these elements to affect-aware embeddings to better leverage their emotional signals without introducing noise. Further more, our current approach is limited by the inability to dynamically generate input for TextGCN, relying solely on the pre-trained model&#x2019;s embeddings. This limitation may hinder achieving enhanced prediction performance, which could be realized through concurrent training with both the pre-trained language models and TextGCN. To address this limitation, we intend to develop better pre-trained language models with more balanced and targeted data in the future, with the goal of achieving improved prediction performance.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="sec32">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/Supplementary material, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="sec33">
<title>Author contributions</title>
<p>HM: Project administration, Validation, Formal analysis, Writing &#x2013; original draft, Data curation, Supervision, Conceptualization, Investigation, Visualization, Methodology. QH: Funding acquisition, Conceptualization, Resources, Methodology, Investigation, Visualization, Validation, Project administration, Writing &#x2013; original draft, Supervision, Formal analysis, Writing &#x2013; review &#x0026; editing, Software, Data curation.</p>
</sec>
<sec sec-type="funding-information" id="sec34">
<title>Funding</title>
<p>The author(s) declare that financial support was received for the research and/or publication of this article. This work was supported by Project of Zhejiang Federation of Humanities and Social Sciences (No. 2024B014) and Zhejiang Provincial Traditional Chinese Medicine Science and Technology Plan Project (No. 2024ZF059).</p>
</sec>
<sec sec-type="COI-statement" id="sec35">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="sec36">
<title>Generative AI statement</title>
<p>The authors declare that no Gen AI was used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="sec37">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="ref1"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Adel</surname><given-names>S.</given-names></name> <name><surname>Elmadany</surname><given-names>N.</given-names></name> <name><surname>Sharkas</surname><given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>AI-driven mental disorders categorization from social media: a deep learning pre-screening framework</article-title>. <conf-name>2024 International Conference on Machine Intelligence and Smart Innovation (ICMISI)</conf-name> (IEEE), <fpage>238</fpage>&#x2013;<lpage>243</lpage>.</citation></ref>
<ref id="ref2"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Altamura</surname><given-names>A. C.</given-names></name> <name><surname>Dell&#x2019;osso</surname><given-names>B.</given-names></name> <name><surname>Vismara</surname><given-names>S.</given-names></name> <name><surname>Mundo</surname><given-names>E.</given-names></name></person-group> (<year>2008</year>). <article-title>May duration of untreated illness influence the long-term course of major depressive disorder?</article-title> <source>Eur. Psychiatry</source> <volume>23</volume>, <fpage>92</fpage>&#x2013;<lpage>96</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eurpsy.2007.11.004</pub-id>, PMID: <pub-id pub-id-type="pmid">18248964</pub-id></citation></ref>
<ref id="ref3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Aydo&#x011F;an</surname><given-names>M.</given-names></name> <name><surname>Karci</surname><given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>Improving the accuracy using pre-trained word embeddings on deep neural networks for Turkish text classification</article-title>. <source>Phys. A: Stat. Mech. Appl.</source> <volume>541</volume>:<fpage>123288</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.physa.2019.123288</pub-id></citation></ref>
<ref id="ref4"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Ayyalasomayajula</surname><given-names>M. M. T.</given-names></name></person-group> (<year>2024</year>). <article-title>Analyzing Language Patterns for Depression Detection on Social Media: Insights from Reddit Data and Machine Learning Techniques</article-title>. TechRxiv. Available online at: <ext-link xlink:href="https://doi.org/10.36227/techrxiv.171340759.91176164/v1" ext-link-type="uri">https://doi.org/10.36227/techrxiv.171340759.91176164/v1</ext-link>. [Epub ahead of preprint]</citation></ref>
<ref id="ref5"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Balani</surname><given-names>S.</given-names></name> <name><surname>De Choudhury</surname><given-names>M.</given-names></name></person-group> (<year>2024</year>). <article-title>Detecting and characterizing mental health related self-disclosure in social media</article-title>. <conf-name>Proceedings of the 33rd Annual ACM Conference Extended Abstracts on Human Factors in Computing Systems</conf-name>. <fpage>1373</fpage>&#x2013;<lpage>1378</lpage>.</citation></ref>
<ref id="ref6"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Beltagy</surname><given-names>I.</given-names></name> <name><surname>Lo</surname><given-names>K.</given-names></name> <name><surname>Cohan</surname><given-names>A.</given-names></name></person-group> (<year>2019</year>). <article-title>SciBERT: a pretrained language model for scientific text</article-title>. <conf-name>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name></citation></ref>
<ref id="ref7"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Bucur</surname><given-names>A.-M.</given-names></name></person-group> (<year>2024</year>). &#x201C;<article-title>Leveraging LLM-generated data for detecting depression symptoms on social media</article-title>&#x201D; in <source>Experimental IR meets multilinguality, multimodality, and interaction</source> (<publisher-loc>Cham</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>193</fpage>&#x2013;<lpage>204</lpage>.</citation></ref>
<ref id="ref8"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bukh</surname><given-names>J. D.</given-names></name> <name><surname>Bock</surname><given-names>C.</given-names></name> <name><surname>Vinberg</surname><given-names>M.</given-names></name> <name><surname>Kessing</surname><given-names>L. V.</given-names></name></person-group> (<year>2013</year>). <article-title>The effect of prolonged duration of untreated depression on antidepressant treatment outcome</article-title>. <source>J. Affect. Disord.</source> <volume>145</volume>, <fpage>42</fpage>&#x2013;<lpage>48</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jad.2012.07.008</pub-id>, PMID: <pub-id pub-id-type="pmid">22854096</pub-id></citation></ref>
<ref id="ref9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cabral</surname><given-names>R. C.</given-names></name> <name><surname>Han</surname><given-names>S. C.</given-names></name> <name><surname>Poon</surname><given-names>J.</given-names></name> <name><surname>Nenadic</surname><given-names>G.</given-names></name></person-group> (<year>2024</year>). <article-title>MM-EMOG: multi-label emotion graph representation for mental health classification on social media</article-title>. <source>Robotics</source> <volume>13</volume>:<fpage>53</fpage>. doi: <pub-id pub-id-type="doi">10.3390/robotics13030053</pub-id></citation></ref>
<ref id="ref10"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Carey</surname><given-names>M.</given-names></name> <name><surname>Jones</surname><given-names>K.</given-names></name> <name><surname>Meadows</surname><given-names>G.</given-names></name> <name><surname>Sanson-Fisher</surname><given-names>R.</given-names></name> <name><surname>D'Este</surname><given-names>C.</given-names></name> <name><surname>Inder</surname><given-names>K.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>Accuracy of general practitioner unassisted detection of depression</article-title>. <source>Aust. N. Z. J. Psychiatry</source> <volume>48</volume>, <fpage>571</fpage>&#x2013;<lpage>578</lpage>. doi: <pub-id pub-id-type="doi">10.1177/0004867413520047</pub-id>, PMID: <pub-id pub-id-type="pmid">24413807</pub-id></citation></ref>
<ref id="ref11"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cha</surname><given-names>J.</given-names></name> <name><surname>Kim</surname><given-names>S.</given-names></name> <name><surname>Park</surname><given-names>E.</given-names></name></person-group> (<year>2022</year>). <article-title>A lexicon-based approach to examine depression detection in social media: the case of twitter and university community</article-title>. <source>Humanit. Soc. Sci. Commun.</source> <volume>9</volume>:<fpage>325</fpage>. doi: <pub-id pub-id-type="doi">10.1057/s41599-022-01313-2</pub-id>, PMID: <pub-id pub-id-type="pmid">36159708</pub-id></citation></ref>
<ref id="ref12"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Chen</surname><given-names>Y.</given-names></name> <name><surname>Yuan</surname><given-names>J.</given-names></name> <name><surname>You</surname><given-names>Q.</given-names></name> <name><surname>Luo</surname><given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Twitter sentiment analysis via bi-sense emoji embedding and attention-based LSTM</article-title>. <conf-name>Proceedings of the 26th ACM International Conference on Multimedia</conf-name>. <fpage>117</fpage>&#x2013;<lpage>125</lpage>.</citation></ref>
<ref id="ref13"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Couto</surname><given-names>M.</given-names></name> <name><surname>Perez</surname><given-names>A.</given-names></name> <name><surname>Parapar</surname><given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>Temporal word embeddings for early detection of signs of depression</article-title>. <conf-name>CIRCLE (Joint Conference of The Information Retrieval Communities in Europe)</conf-name></citation></ref>
<ref id="ref14"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Devlin</surname><given-names>J.</given-names></name> <name><surname>Chang</surname><given-names>M.-W.</given-names></name> <name><surname>Lee</surname><given-names>K.</given-names></name> <name><surname>Toutanova</surname><given-names>K.</given-names></name></person-group> (<year>2018</year>). <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>. <italic>arXiv</italic>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.1904.05154" ext-link-type="uri">https://doi.org/10.48550/arXiv.1904.05154</ext-link>. [Epub ahead of preprint]</citation></ref>
<ref id="ref15"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Dinkel</surname><given-names>H.</given-names></name> <name><surname>Wu</surname><given-names>M.</given-names></name> <name><surname>Yu</surname><given-names>K.</given-names></name></person-group> (<year>2019</year>). <article-title>Text-based depression detection on sparse data</article-title>. <italic>arXiv</italic>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.1904.05154" ext-link-type="uri">https://doi.org/10.48550/arXiv.1904.05154</ext-link>. [Epub ahead of preprint]</citation></ref>
<ref id="ref16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Doraiswamy</surname><given-names>P. M.</given-names></name> <name><surname>Blease</surname><given-names>C.</given-names></name> <name><surname>Bodner</surname><given-names>K.</given-names></name></person-group> (<year>2020</year>). <article-title>Artificial intelligence and the future of psychiatry: insights from a global physician survey</article-title>. <source>Artif. Intell. Med.</source> <volume>102</volume>:<fpage>101753</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.artmed.2019.101753</pub-id>, PMID: <pub-id pub-id-type="pmid">31980092</pub-id></citation></ref>
<ref id="ref17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fekadu</surname><given-names>A.</given-names></name> <name><surname>Demissie</surname><given-names>M.</given-names></name> <name><surname>Birhane</surname><given-names>R.</given-names></name> <name><surname>Medhin</surname><given-names>G.</given-names></name> <name><surname>Bitew</surname><given-names>T.</given-names></name> <name><surname>Hailemariam</surname><given-names>M.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Under detection of depression in primary care settings in low and middle-income countries: a systematic review and meta-analysis</article-title>. <source>Syst. Rev.</source> <volume>11</volume>:<fpage>21</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13643-022-01893-9</pub-id>, PMID: <pub-id pub-id-type="pmid">35123556</pub-id></citation></ref>
<ref id="ref18"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Goswami</surname><given-names>C.</given-names></name> <name><surname>Goswami</surname><given-names>N.</given-names></name> <name><surname>Israni</surname><given-names>D.</given-names></name></person-group> (<year>2024</year>). <article-title>An amalgamation of RNN and transformers for identifying depression at the preliminary stage</article-title>. <conf-name>2024 IEEE International Conference on Communication, Computing and Signal Processing (IICCCS)</conf-name>. <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</citation></ref>
<ref id="ref19"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Govindasamy</surname><given-names>K. A. L.</given-names></name> <name><surname>Palanichamy</surname><given-names>N.</given-names></name></person-group> (<year>2021</year>). <article-title>Depression detection using machine learning techniques on twitter data</article-title>. <conf-name>2021 5th International Conference on Intelligent Computing and Control Systems (ICICCS)</conf-name>. <fpage>960</fpage>&#x2013;<lpage>966</lpage>.</citation></ref>
<ref id="ref20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Guntuku</surname><given-names>S. C.</given-names></name> <name><surname>Yaden</surname><given-names>D. B.</given-names></name> <name><surname>Kern</surname><given-names>M. L.</given-names></name> <name><surname>Ungar</surname><given-names>L. H.</given-names></name> <name><surname>Eichstaedt</surname><given-names>J. C.</given-names></name></person-group> (<year>2017</year>). <article-title>Detecting depression and mental illness on social media: an integrative review</article-title>. <source>Curr. Opin. Behav. Sci.</source> <volume>18</volume>, <fpage>43</fpage>&#x2013;<lpage>49</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cobeha.2017.07.005</pub-id></citation></ref>
<ref id="ref21"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Had&#x017E;i&#x0107;</surname><given-names>B.</given-names></name> <name><surname>Ohse</surname><given-names>J.</given-names></name> <name><surname>Danner</surname><given-names>M.</given-names></name> <name><surname>Peperkorn</surname><given-names>N.</given-names></name> <name><surname>Mohammed</surname><given-names>P.</given-names></name> <name><surname>Shiban</surname><given-names>Y.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>AI-supported diagnostic of depression using clinical interviews: A pilot study</article-title>. <conf-name>Proceedings of the 19th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications (VISIGRAPP 2024)</conf-name>. <fpage>500</fpage>&#x2013;<lpage>507</lpage>.</citation></ref>
<ref id="ref22"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Han</surname><given-names>S. C.</given-names></name> <name><surname>Yuan</surname><given-names>Z.</given-names></name> <name><surname>Wang</surname><given-names>K.</given-names></name> <name><surname>Long</surname><given-names>S.</given-names></name> <name><surname>Poon</surname><given-names>J.</given-names></name></person-group> (<year>2022</year>). <article-title>Understanding graph convolutional networks for text classification</article-title>. <italic>arXiv</italic>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.2203.16060" ext-link-type="uri">https://doi.org/10.48550/arXiv.2203.16060</ext-link>. [Epub ahead of preprint]</citation></ref>
<ref id="ref23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Han</surname><given-names>Q.</given-names></name> <name><surname>Zhao</surname><given-names>C.</given-names></name></person-group> (<year>2025</year>). <article-title>Unleashing the potential of chatbots in mental health: bibliometric analysis</article-title>. <source>Front Psychiatry</source> <volume>16</volume>:<fpage>1494355</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpsyt.2025.1494355</pub-id>, PMID: <pub-id pub-id-type="pmid">39967582</pub-id></citation></ref>
<ref id="ref24"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Hong</surname><given-names>S.</given-names></name> <name><surname>Cohn</surname><given-names>A.</given-names></name> <name><surname>Hogg</surname><given-names>D.</given-names></name></person-group> (<year>2022</year>). <article-title>Using graph representation learning with schema encoders to measure the severity of depressive symptoms</article-title>. <conf-name>The Tenth International Conference on Learning Representations. The Tenth International Conference on Learning Representations</conf-name></citation></ref>
<ref id="ref25"><citation citation-type="other"><person-group person-group-type="author"><collab id="coll1">Hugging Face</collab></person-group> (<year>2025</year>). <article-title>paulagarciaserrano/roberta-depression-detection</article-title>. <ext-link xlink:href="https://huggingface.co/paulagarciaserrano/roberta-depression-detection" ext-link-type="uri">https://huggingface.co/paulagarciaserrano/roberta-depression-detection</ext-link>. (Accessed March 27, 2025)</citation></ref>
<ref id="ref26"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Islam</surname><given-names>M. R.</given-names></name> <name><surname>Kabir</surname><given-names>M. A.</given-names></name> <name><surname>Ahmed</surname><given-names>A.</given-names></name> <name><surname>Kamal</surname><given-names>A. R. M.</given-names></name> <name><surname>Wang</surname><given-names>H.</given-names></name> <name><surname>Ulhaq</surname><given-names>A.</given-names></name></person-group> (<year>2018</year>). <article-title>Depression detection from social network data using machine learning techniques</article-title>. <source>Health Inf. Sci. Syst.</source> <volume>6</volume>:<fpage>8</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s13755-018-0046-0</pub-id>, PMID: <pub-id pub-id-type="pmid">30186594</pub-id></citation></ref>
<ref id="ref27"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Ji</surname><given-names>S.</given-names></name> <name><surname>Zhang</surname><given-names>T.</given-names></name> <name><surname>Ansari</surname><given-names>L.</given-names></name> <name><surname>Fu</surname><given-names>J.</given-names></name> <name><surname>Tiwari</surname><given-names>P.</given-names></name> <name><surname>Cambria</surname><given-names>E.</given-names></name></person-group> (<year>2021</year>). <article-title>Mental BERT: publicly available pretrained language models for mental healthcare</article-title>. <italic>arXiv</italic>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.2110.15621" ext-link-type="uri">https://doi.org/10.48550/arXiv.2110.15621</ext-link>. [Epub ahead of preprint]</citation></ref>
<ref id="ref28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>John</surname><given-names>A.</given-names></name> <name><surname>Glendenning</surname><given-names>A. C.</given-names></name> <name><surname>Marchant</surname><given-names>A.</given-names></name> <name><surname>Montgomery</surname><given-names>P.</given-names></name> <name><surname>Stewart</surname><given-names>A.</given-names></name> <name><surname>Wood</surname><given-names>S.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Self-harm, suicidal Behaviours, and cyberbullying in children and young people: systematic review</article-title>. <source>J. Med. Internet Res.</source> <volume>20</volume>:<fpage>e129</fpage>. doi: <pub-id pub-id-type="doi">10.2196/jmir.9044</pub-id>, PMID: <pub-id pub-id-type="pmid">29674305</pub-id></citation></ref>
<ref id="ref29"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Lara</surname><given-names>J. S.</given-names></name> <name><surname>Aragon</surname><given-names>M. E.</given-names></name> <name><surname>Gonzalez</surname><given-names>F. A.</given-names></name> <name><surname>Montes-y-Gomez</surname><given-names>M.</given-names></name></person-group> (<year>2021</year>). <article-title>Deep bag-of-sub-emotions for depression detection in social media</article-title>. <italic>arXiv</italic>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.2103.01334" ext-link-type="uri">https://doi.org/10.48550/arXiv.2103.01334</ext-link>. [Epub ahead of preprint]</citation></ref>
<ref id="ref30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lestandy</surname><given-names>M.</given-names></name> <name><surname>Abdurrahim</surname></name></person-group>. (<year>2023</year>). <article-title>Exploring the impact of word embedding dimensions on depression data classification using BiLSTM model</article-title>. <source>Procedia Comput. Sci.</source> <volume>227</volume>, <fpage>298</fpage>&#x2013;<lpage>306</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.procs.2023.10.528</pub-id></citation></ref>
<ref id="ref31"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Lewis</surname><given-names>P.</given-names></name> <name><surname>Ott</surname><given-names>M.</given-names></name> <name><surname>Du</surname><given-names>J.</given-names></name> <name><surname>Stoyanov</surname><given-names>V.</given-names></name></person-group> (<year>2020</year>). <article-title>Pretrained language models for biomedical and clinical tasks: understanding and extending the state-of-the-art</article-title>. <conf-name>Proceedings of the 3rd Clinical Natural Language Processing Workshop</conf-name></citation></ref>
<ref id="ref32"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>H.</given-names></name> <name><surname>Yan</surname><given-names>Y.</given-names></name> <name><surname>Wang</surname><given-names>S.</given-names></name> <name><surname>Liu</surname><given-names>J.</given-names></name> <name><surname>Cui</surname><given-names>Y.</given-names></name></person-group> (<year>2023</year>). <article-title>Text classification on heterogeneous information network via enhanced GCN and knowledge</article-title>. <source>Neural Comput. Appl.</source> <volume>35</volume>, <fpage>14911</fpage>&#x2013;<lpage>14927</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00521-023-08494-0</pub-id></citation></ref>
<ref id="ref33"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Liang</surname><given-names>Y.</given-names></name> <name><surname>Mao</surname><given-names>C.</given-names></name> <name><surname>Luo</surname><given-names>Y.</given-names></name></person-group> (<year>2018</year>). <article-title>Graph convolutional networks for text classification</article-title>. <italic>arXiv</italic>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.1809.05679" ext-link-type="uri">https://doi.org/10.48550/arXiv.1809.05679</ext-link>. [Epub ahead of preprint]</citation></ref>
<ref id="ref34"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Lin</surname><given-names>Y.</given-names></name> <name><surname>Meng</surname><given-names>Y.</given-names></name> <name><surname>Sun</surname><given-names>X.</given-names></name> <name><surname>Han</surname><given-names>Q.</given-names></name> <name><surname>Kuang</surname><given-names>K.</given-names></name> <name><surname>Li</surname><given-names>J.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Bert GCN: transductive text classification by combining GCN and BERT</article-title>. <italic>arXiv</italic>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.2105.05727" ext-link-type="uri">https://doi.org/10.48550/arXiv.2105.05727</ext-link>. [Epub ahead of preprint]</citation></ref>
<ref id="ref35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>Y.</given-names></name></person-group> (<year>2024</year>). <article-title>Depression clinical detection model based on social media: a federated deep learning approach</article-title>. <source>J. Supercomput.</source> <volume>80</volume>, <fpage>7931</fpage>&#x2013;<lpage>7954</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11227-023-05754-7</pub-id></citation></ref>
<ref id="ref36"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>Y.</given-names></name> <name><surname>Ott</surname><given-names>M.</given-names></name> <name><surname>Goyal</surname><given-names>N.</given-names></name> <name><surname>Du</surname><given-names>J.</given-names></name> <name><surname>Joshi</surname><given-names>M.</given-names></name> <name><surname>Chen</surname><given-names>D.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>RoBERta: a robustly optimized BERT pretraining approach</article-title>. <italic>arXiv</italic>. Available online at: <ext-link xlink:href="https://doi.org/10.48550/arXiv.1907.11692" ext-link-type="uri">https://doi.org/10.48550/arXiv.1907.11692</ext-link>. [Epub ahead of preprint]</citation></ref>
<ref id="ref37"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>MacAvaney</surname><given-names>S.</given-names></name> <name><surname>Mittu</surname><given-names>A.</given-names></name> <name><surname>Coppersmith</surname><given-names>G.</given-names></name> <name><surname>Leintz</surname><given-names>J.</given-names></name> <name><surname>Resnik</surname><given-names>P.</given-names></name></person-group>. (<year>2021</year>) <article-title>Community-level research on suicidality prediction in a secure environment: overview of the CLPsych 2021 shared task</article-title>. <conf-name>Proceedings of the Seventh Workshop on Computational Linguistics and Clinical Psychology: Improving Access</conf-name>. <fpage>70</fpage>&#x2013;<lpage>80</lpage>.</citation></ref>
<ref id="ref38"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Maghraby</surname><given-names>A.</given-names></name> <name><surname>Ali</surname><given-names>H.</given-names></name></person-group> (<year>2022</year>). <article-title>Modern standard Arabic mood changing and depression dataset</article-title>. <source>Data Brief</source> <volume>41</volume>:<fpage>107999</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.dib.2022.107999</pub-id>, PMID: <pub-id pub-id-type="pmid">35274028</pub-id></citation></ref>
<ref id="ref39"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Malhotra</surname><given-names>A.</given-names></name> <name><surname>Jindal</surname><given-names>R.</given-names></name></person-group> (<year>2022</year>). <article-title>Deep learning techniques for suicide and depression detection from online social media: a scoping review</article-title>. <source>Appl. Soft Comput.</source> <volume>130</volume>:<fpage>109713</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.asoc.2022.109713</pub-id></citation></ref>
<ref id="ref40"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Manna</surname><given-names>S.</given-names></name> <name><surname>Nakai</surname><given-names>H.</given-names></name></person-group> (<year>2019</year>). <article-title>Effectiveness of word embeddings on classifiers: a case study with tweets</article-title>. 2019 <conf-name>IEEE 13th International Conference on Semantic Computing (ICSC)</conf-name></citation></ref>
<ref id="ref41"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Mihov</surname><given-names>I.</given-names></name> <name><surname>Chen</surname><given-names>H.</given-names></name> <name><surname>Qin</surname><given-names>X.</given-names></name> <name><surname>Ku</surname><given-names>W.-S.</given-names></name> <name><surname>Yan</surname><given-names>D.</given-names></name> <name><surname>Liu</surname><given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>MentalNet: Heterogeneous graph representation for early depression detection</article-title> <conf-name>2022 IEEE International Conference on Data Mining (ICDM)</conf-name>. <fpage>1113</fpage>&#x2013;<lpage>1118</lpage>.</citation></ref>
<ref id="ref42"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Orabi</surname><given-names>A. H.</given-names></name> <name><surname>Buddhitha</surname><given-names>P.</given-names></name> <name><surname>Orabi</surname><given-names>M. H.</given-names></name> <name><surname>Inkpen</surname><given-names>D.</given-names></name></person-group> (<year>2018</year>). <article-title>Deep learning for depression detection of twitter users</article-title>. <conf-name>Proceedings of the Fifth Workshop on Computational Linguistics and Clinical Psychology: From Keyboard to Clinic</conf-name></citation></ref>
<ref id="ref43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Philip Thekkekara</surname><given-names>J.</given-names></name> <name><surname>Yongchareon</surname><given-names>S.</given-names></name> <name><surname>Liesaputra</surname><given-names>V.</given-names></name></person-group> (<year>2024</year>). <article-title>An attention-based CNN-BiLSTM model for depression detection on social media text</article-title>. <source>Expert Syst. Appl.</source> <volume>249</volume>:<fpage>123834</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eswa.2024.123834</pub-id></citation></ref>
<ref id="ref44"><citation citation-type="confproc"><person-group person-group-type="author"><name><surname>Pirina</surname><given-names>I.</given-names></name> <name><surname>&#x00C7;&#x00F6;ltekin</surname><given-names>&#x00C7;.</given-names></name></person-group> (<year>2024</year>). <article-title>Identifying depression on Reddit: the effect of training data</article-title>. <conf-name>Proceedings of the 2018 EMNLP Workshop SMM4H: The 3rd Social Media Mining for Health Applications Workshop &#x0026; Shared Task</conf-name>. <fpage>9</fpage>&#x2013;<lpage>12</lpage>.</citation></ref>
<ref id="ref45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rezaeinia</surname><given-names>S. M.</given-names></name> <name><surname>Rahmani</surname><given-names>R.</given-names></name> <name><surname>Ghodsi</surname><given-names>A.</given-names></name> <name><surname>Veisi</surname><given-names>H.</given-names></name></person-group> (<year>2019</year>). <article-title>Sentiment analysis based on improved pre-trained word embeddings</article-title>. <source>Expert Syst. Appl.</source> <volume>117</volume>, <fpage>139</fpage>&#x2013;<lpage>147</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.eswa.2018.08.044</pub-id></citation></ref>
<ref id="ref46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rubin</surname><given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>The classification and statistical manual of mental health concerns: a proposed practical scientific alternative to the DSM and ICD</article-title>. <source>J. Humanist. Psychol.</source> <volume>58</volume>, <fpage>93</fpage>&#x2013;<lpage>114</lpage>. doi: <pub-id pub-id-type="doi">10.1177/0022167817718079</pub-id></citation></ref>
<ref id="ref47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Samee</surname><given-names>N. A.</given-names></name> <name><surname>Khan</surname><given-names>U.</given-names></name> <name><surname>Khan</surname><given-names>S.</given-names></name> <name><surname>Jamjoom</surname><given-names>M. M.</given-names></name> <name><surname>Sharif</surname><given-names>M.</given-names></name> <name><surname>Kim</surname><given-names>D. H.</given-names></name></person-group> (<year>2023</year>). <article-title>Safeguarding online spaces: a powerful fusion of federated learning, word embeddings, and emotional features for cyberbullying detection</article-title>. <source>IEEE Access</source> <volume>11</volume>, <fpage>124524</fpage>&#x2013;<lpage>124541</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2023.3329347</pub-id></citation></ref>
<ref id="ref48"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shin</surname><given-names>D.</given-names></name> <name><surname>Kim</surname><given-names>K.</given-names></name> <name><surname>Lee</surname><given-names>S.-B.</given-names></name> <name><surname>Lee</surname><given-names>C.</given-names></name> <name><surname>Bae</surname><given-names>Y. S.</given-names></name> <name><surname>Cho</surname><given-names>W. I.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Detection of depression and suicide risk based on text from clinical interviews using machine learning: possibility of a new objective diagnostic marker</article-title>. <source>Front Psychiatry</source> <volume>13</volume>:<fpage>801301</fpage>. doi: <pub-id pub-id-type="doi">10.3389/fpsyt.2022.801301</pub-id>, PMID: <pub-id pub-id-type="pmid">35686182</pub-id></citation></ref>
<ref id="ref49"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Smith</surname><given-names>K. M.</given-names></name> <name><surname>Renshaw</surname><given-names>P. F.</given-names></name> <name><surname>Bilello</surname><given-names>J.</given-names></name></person-group> (<year>2013</year>). <article-title>The diagnosis of depression: current and emerging methods</article-title>. <source>Compr. Psychiatry</source> <volume>54</volume>, <fpage>1</fpage>&#x2013;<lpage>6</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.comppsych.2012.06.006</pub-id>, PMID: <pub-id pub-id-type="pmid">22901834</pub-id></citation></ref>
<ref id="ref50"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Squires</surname><given-names>M.</given-names></name> <name><surname>Tao</surname><given-names>X.</given-names></name> <name><surname>Elangovan</surname><given-names>S.</given-names></name> <name><surname>Gururajan</surname><given-names>R.</given-names></name> <name><surname>Zhou</surname><given-names>X.</given-names></name> <name><surname>Acharya</surname><given-names>U. R.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Deep learning and machine learning in psychiatry: a survey of current progress in depression detection, diagnosis and treatment</article-title>. <source>Brain Inform</source> <volume>10</volume>:<fpage>10</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s40708-023-00188-6</pub-id>, PMID: <pub-id pub-id-type="pmid">37093301</pub-id></citation></ref>
<ref id="ref51"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname><given-names>B.</given-names></name> <name><surname>Zhang</surname><given-names>Y.</given-names></name> <name><surname>He</surname><given-names>J.</given-names></name> <name><surname>Xiao</surname><given-names>Y.</given-names></name> <name><surname>Xiao</surname><given-names>R.</given-names></name></person-group> (<year>2019</year>). <article-title>An automatic diagnostic network using skew-robust adversarial discriminative domain adaptation to evaluate the severity of depression</article-title>. <source>Comput. Methods Prog. Biomed.</source> <volume>173</volume>, <fpage>185</fpage>&#x2013;<lpage>195</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cmpb.2019.01.006</pub-id>, PMID: <pub-id pub-id-type="pmid">30683543</pub-id></citation></ref>
<ref id="ref52"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Uddin</surname><given-names>M. Z.</given-names></name> <name><surname>Dysthe</surname><given-names>K. K.</given-names></name> <name><surname>F&#x00F8;lstad</surname><given-names>A.</given-names></name> <name><surname>Brandtzaeg</surname><given-names>P. B.</given-names></name></person-group> (<year>2022</year>). <article-title>Deep learning for prediction of depressive symptoms in a large textual dataset</article-title>. <source>Neural Comput. Appl.</source> <volume>34</volume>, <fpage>721</fpage>&#x2013;<lpage>744</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s00521-021-06426-4</pub-id></citation></ref>
<ref id="ref53"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wongkoblap</surname><given-names>A.</given-names></name> <name><surname>Vadillo</surname><given-names>M. A.</given-names></name> <name><surname>Curcin</surname><given-names>V.</given-names></name></person-group> (<year>2021</year>). <article-title>Deep learning with anaphora resolution for the detection of tweeters with depression: algorithm development and validation study</article-title>. <source>JMIR Ment. Health</source> <volume>8</volume>:<fpage>e19824</fpage>. doi: <pub-id pub-id-type="doi">10.2196/19824</pub-id>, PMID: <pub-id pub-id-type="pmid">34383688</pub-id></citation></ref>
<ref id="ref54"><citation citation-type="other"><person-group person-group-type="author"><collab id="coll2">World Health Organization</collab></person-group>. (<year>2025</year>). <article-title>Depressive disorder (depression)</article-title>. Available online at: <ext-link xlink:href="https://www.who.int/news-room/fact-sheets/detail/depression" ext-link-type="uri">https://www.who.int/news-room/fact-sheets/detail/depression</ext-link>. (Accessed March 25, 2025)</citation></ref>
<ref id="ref55"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Xin</surname><given-names>A. W.</given-names></name> <name><surname>Nielson</surname><given-names>D. M.</given-names></name> <name><surname>Krause</surname><given-names>K. R.</given-names></name> <name><surname>Fiorini</surname><given-names>G.</given-names></name> <name><surname>Midgley</surname><given-names>N.</given-names></name> <name><surname>Pereira</surname><given-names>F.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Using large language models to detect outcomes in qualitative studies of adolescent depression</article-title>. <source>J. Am. Med. Inform. Assoc.</source> <volume>2024</volume>:<fpage>ocae298</fpage>. doi: <pub-id pub-id-type="doi">10.1093/jamia/ocae298</pub-id>, PMID: <pub-id pub-id-type="pmid">39661754</pub-id></citation></ref>
<ref id="ref56"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname><given-names>K.</given-names></name> <name><surname>Zhang</surname><given-names>T.</given-names></name> <name><surname>Ananiadou</surname><given-names>S.</given-names></name></person-group> (<year>2022</year>). <article-title>A mental state knowledge&#x2013;aware and contrastive network for early stress and depression detection on social media</article-title>. <source>Inf. Process. Manag.</source> <volume>59</volume>:<fpage>102961</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ipm.2022.102961</pub-id></citation></ref>
<ref id="ref9001"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>Y.</given-names></name> <name><surname>Han</surname><given-names>Q.</given-names></name></person-group> (<year>2025</year>). <article-title>Enhancing pre-trained language model by answering natural questions for event extraction</article-title>. <source>Front. Artif. Intell.</source> <volume>8</volume>:<fpage>1520290</fpage>. doi: <pub-id pub-id-type="doi">10.3389/frai.2025.1520290</pub-id></citation></ref>
</ref-list>
</back>
</article>