<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Neuroinform.</journal-id>
<journal-title>Frontiers in Neuroinformatics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Neuroinform.</abbrev-journal-title>
<issn pub-type="epub">1662-5196</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fninf.2025.1527582</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Neuroscience</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Contrastive self-supervised learning for neurodegenerative disorder classification</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Gryshchuk</surname> <given-names>Vadym</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2926481/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name><surname>Singh</surname> <given-names>Devesh</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn001"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/2601712/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Teipel</surname> <given-names>Stefan</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/122840/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Dyrba</surname> <given-names>Martin</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/379671/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/supervision/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<collab>the ADNI, AIBL, FTLDNI study groups</collab>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>German Center for Neurodegenerative Diseases (DZNE)</institution>, <addr-line>Rostock</addr-line>, <country>Germany</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Psychosomatic Medicine, Rostock University Medical Center</institution>, <addr-line>Rostock</addr-line>, <country>Germany</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Hamed Honari, Stanford University, United States</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Tingwei Quan, Huazhong University of Science and Technology, China</p>
<p>Mailyn Moreno, Centro de Investigaci&#x000F3;n en Computaci&#x000F3;n, Instituto Polit&#x000E9;cnico Nacional (CIC-IPN), Mexico</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Martin Dyrba <email>martin.dyrba&#x00040;dzne.de</email></corresp>
<fn fn-type="equal" id="fn001"><p>&#x02020;These authors have contributed equally to this work and share first authorship</p></fn></author-notes>
<pub-date pub-type="epub">
<day>17</day>
<month>02</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>19</volume>
<elocation-id>1527582</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>11</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>17</day>
<month>01</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2025 Gryshchuk, Singh, Teipel and Dyrba.</copyright-statement>
<copyright-year>2025</copyright-year>
<copyright-holder>Gryshchuk, Singh, Teipel and Dyrba</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license>
</permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>Neurodegenerative diseases such as Alzheimer&#x00027;s disease (AD) or frontotemporal lobar degeneration (FTLD) involve specific loss of brain volume, detectable <italic>in vivo</italic> using T1-weighted MRI scans. Supervised machine learning approaches classifying neurodegenerative diseases require diagnostic-labels for each sample. However, it can be difficult to obtain expert labels for a large amount of data. Self-supervised learning (SSL) offers an alternative for training machine learning models without data-labels.</p>
</sec>
<sec>
<title>Methods</title>
<p>We investigated if the SSL models can be applied to distinguish between different neurodegenerative disorders in an interpretable manner. Our method comprises a feature extractor and a downstream classification head. A deep convolutional neural network, trained with a contrastive loss, serves as the feature extractor that learns latent representations. The classification head is a single-layer perceptron that is trained to perform diagnostic group separation. We used <italic>N</italic> = 2,694 T1-weighted MRI scans from four data cohorts: two ADNI datasets, AIBL and FTLDNI, including cognitively normal controls (CN), cases with prodromal and clinical AD, as well as FTLD cases differentiated into its phenotypes.</p>
</sec>
<sec>
<title>Results</title>
<p>Our results showed that the feature extractor trained in a self-supervised way provides generalizable and robust representations for the downstream classification. For AD vs. CN, our model achieves 82% balanced accuracy on the test subset and 80% on an independent holdout dataset. Similarly, the Behavioral variant of frontotemporal dementia (BV) vs. CN model attains an 88% balanced accuracy on the test subset. The average feature attribution heatmaps obtained by the Integrated Gradient method highlighted hallmark regions, i.e., temporal gray matter atrophy for AD, and insular atrophy for BV.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>Our models perform comparably to state-of-the-art supervised deep learning approaches. This suggests that the SSL methodology can successfully make use of unannotated neuroimaging datasets as training data while remaining robust and interpretable.</p>
</sec></abstract>
<kwd-group>
<kwd>contrastive learning</kwd>
<kwd>self-supervised learning</kwd>
<kwd>neurodegenerative disorders</kwd>
<kwd>deep learning</kwd>
<kwd>structural magnetic resonance imaging</kwd>
<kwd>Alzheimer&#x00027;s disease</kwd>
<kwd>frontotemporal lobar degeneration</kwd>
</kwd-group>
<counts>
<fig-count count="3"/>
<table-count count="3"/>
<equation-count count="6"/>
<ref-count count="87"/>
<page-count count="13"/>
<word-count count="10292"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>1 Introduction</title>
<p>Neurodegenerative diseases such as Alzheimer&#x00027;s disease (AD) and frontotemporal dementia (FTD) are characterized by specific brain volume loss, which can be assessed <italic>in-vivo</italic> using structural magnetic resonance imaging (MRI). The usual radiological evaluation of MRI scans is performed mainly by visual examination, which is often time-consuming. Assistance systems for the automated detection of disease-specific patterns could be useful for better clinical diagnosis, as they can significantly decrease the evaluation time for radiologists and neurologists, and help them focus on relevant brain regions. Convolutional neural networks (CNNs) models can automatically identify neurodegenerative diseases from MRI scans and achieve state-of-the-art results in medical imaging tasks. Recent developments in the CNN architectures have in turn shaped the neuroimaging community, which is interested in automatic discovery of image features pertinent to neurological illnesses. Various tasks, such as disease diagnosis, pathology localization, anatomical region segmentation, etc., now rely on the use of CNNs (Dyrba et al., <xref ref-type="bibr" rid="B19">2021</xref>; Qiu et al., <xref ref-type="bibr" rid="B60">2020</xref>; Eitel et al., <xref ref-type="bibr" rid="B20">2021</xref>; Wen et al., <xref ref-type="bibr" rid="B82">2020</xref>; Han et al., <xref ref-type="bibr" rid="B27">2022</xref>). CNN models are primarily trained in a <italic>supervised</italic> manner by using an external ground-truth label. Generating such labels for data samples is often burdensome and costly. Furthermore, CNN models require a large amount of training data to achieve competitive results. Such large datasets are not easily available within the medical domain due to the high cost of data collection and the rarity of experts for annotations.</p>
<p>These constraints led us to reconsider the training of CNN models in a <italic>supervised</italic> manner, and to explore <italic>self-supervised learning (SSL)</italic> approaches. The SSL methods learn without any sample labels by utilizing the internal structure of the data to generate representative features. Architectures trained in a self-supervised manner are biologically plausible, provide extensive feature space, and can compete with supervised approaches (Orhan et al., <xref ref-type="bibr" rid="B55">2020</xref>).</p>
<p>Moreover, post hoc explanation methods have been developed within the field of eXplainable Artificial Intelligence (XAI) to interpret how deep neural networks make decisions. The XAI methods for explaining CNN models rely on local feature attribution methods, which assign a relevance score to input regions for a given input, model, and resulting output. However, only a handful of studies have explored attribution-based XAI methods within the field of self-supervised learning (SSL) applications, e.g., in the medical imaging domain (Chen et al., <xref ref-type="bibr" rid="B7">2023</xref>).</p>
<p>The main goal of our study was to explore, in a proof-of-concept study, SSL method&#x00027;s ability to learn generalizable features for dementia stage and type detection from structural MRI data. We hypothesized that SSL methods could learn meaningful structural representations, and resulting models could have comparable performances to supervised models. In this paper, we trained a CNN model with the SSL setup and then evaluated it on downstream classification tasks, binary and multi-class. We also explored a saliency mapping technique for highlighting relevant input regions. The main research questions were defined as: <italic>How does the contrastive SSL paradigm compare to the supervised learning paradigm in terms of predictive power? Are the models trained in contrastive self-supervised way on neuroimaging data interpretable?</italic></p>
</sec>
<sec id="s2">
<title>2 Background</title>
<sec>
<title>2.1 Self-supervised learning</title>
<p>Self-supervised learning (SSL) methods learn generalizable features without any data labels or ground truth information by solving an initial auxiliary task. The pretrained SSL models are then used for specific downstream tasks, e.g., identification of neurodegenerative disorders. Models trained under the SSL approach have found application in different domains, that is, image processing (Jing and Tian, <xref ref-type="bibr" rid="B37">2020</xref>), video processing (Schiappa et al., <xref ref-type="bibr" rid="B66">2023</xref>), and audio processing (Liu et al., <xref ref-type="bibr" rid="B46">2022a</xref>). Within the imaging domain, multiple auxiliary or so-called &#x0201C;pretext&#x0201D; tasks have been suggested previously: identifying data augmentations (Reed et al., <xref ref-type="bibr" rid="B61">2021</xref>; Chen et al., <xref ref-type="bibr" rid="B5">2020</xref>), rotation prediction (Chen et al., <xref ref-type="bibr" rid="B6">2019</xref>), patch position prediction (Doersch et al., <xref ref-type="bibr" rid="B13">2015</xref>; Noroozi and Favaro, <xref ref-type="bibr" rid="B53">2016</xref>; Wei et al., <xref ref-type="bibr" rid="B81">2019</xref>), image colorization (Larsson et al., <xref ref-type="bibr" rid="B43">2017</xref>, <xref ref-type="bibr" rid="B42">2016</xref>), and contrastive learning (Jaiswal et al., <xref ref-type="bibr" rid="B35">2020</xref>).</p>
<p>SSL methods could be thought of as an alternative to pre-training or automated feature learning step and are related to the way how young children learn (Orhan et al., <xref ref-type="bibr" rid="B55">2020</xref>). Particularly, contrastive SSL methods try to learn the general structure present within the data, by using <italic>supervisory signals</italic> extracted from the data itself independently of the ground truth for any specific use-case. In our study, we used contrastive learning due to its widespread application as a pretext task (Shurrab and Duwairi, <xref ref-type="bibr" rid="B71">2022</xref>; VanBerlo et al., <xref ref-type="bibr" rid="B79">2024</xref>).</p>
<sec>
<title>2.1.1 Formal definition of contrastive SSL</title>
<p>Contrastive learning tasks have received considerable attention within the SSL methods. Contrastive learning tasks aim to learn a latent space in which embeddings of similar data samples are pulled together, and embeddings of dissimilar data samples are pushed apart (Gutmann and Hyv&#x000E4;rinen, <xref ref-type="bibr" rid="B26">2010</xref>; Weng, <xref ref-type="bibr" rid="B83">2021</xref>; Chopra et al., <xref ref-type="bibr" rid="B9">2005</xref>). Various loss functions have been suggested to increase the quality of learned embeddings, and expedite the training. These include contrastive loss (Gutmann and Hyv&#x000E4;rinen, <xref ref-type="bibr" rid="B26">2010</xref>), triplet loss (Chechik et al., <xref ref-type="bibr" rid="B4">2010</xref>; Schroff et al., <xref ref-type="bibr" rid="B67">2015</xref>), N-pair loss (Sohn, <xref ref-type="bibr" rid="B73">2016</xref>), InfoNCE loss (Oord et al., <xref ref-type="bibr" rid="B54">2019</xref>), and Neighborhood-based loss (Sabokrou et al., <xref ref-type="bibr" rid="B63">2019</xref>) etc. Contrastive learning is based on the use of positive and negative data pairs (Grill et al., <xref ref-type="bibr" rid="B25">2020</xref>; Chen et al., <xref ref-type="bibr" rid="B5">2020</xref>), where a <italic>positive pair</italic> (<italic>i, j</italic>) consists of two similar data instances or views. In many studies, a data sample is paired with its own augmented variations to create such positive pairs. A <italic>negative pair</italic> generally contains two different data samples. The contrastive loss &#x02113; for a positive pair is formally defined as follows.</p>
<disp-formula id="E1"><label>(1)</label><mml:math id="M1"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtable style="text-align:axis;" equalrows="false" columnlines="none none none none none none none" equalcolumns="false" class="array"><mml:mtr><mml:mtd><mml:mi>&#x02113;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mfrac><mml:mrow><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mi>&#x003C4;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>N</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:msub><mml:mrow><mml:mo>&#x1D7D9;</mml:mo></mml:mrow><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>&#x02260;</mml:mo><mml:mi>i</mml:mi></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:msub><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mstyle mathvariant='bold'><mml:mtext>z</mml:mtext></mml:mstyle></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mi>&#x003C4;</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>Where &#x003C4; is a scaling factor called temperature, &#x1D7D9; is an indicator function with output values being 0 or 1, <italic>N</italic> is the number of training samples, exp(&#x000B7;) is the exponential function, and cos(&#x000B7;) is the cosine similarity function, over different <italic>z</italic> latent representation of the input.</p>
<p>The Nearest-Neighbor Contrastive Learning (NNCLR) method (Dwibedi et al., <xref ref-type="bibr" rid="B18">2021</xref>) extends the common contrastive loss by keeping a record of recent embeddings of augmented views in a queue <italic>Q</italic>. Thus, the pairs are not directly compared, rather a projection embedding that is most similar to a view is selected from <italic>Q</italic> for the comparison with another view. The NNCLR contrastive loss &#x02113;<sub><italic>n</italic></sub> is defined as:</p>
<disp-formula id="E2"><label>(2)</label><mml:math id="M2"><mml:mrow><mml:msub><mml:mi>&#x02113;</mml:mi><mml:mi>n</mml:mi></mml:msub><mml:mo stretchy='false'>(</mml:mo><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>=</mml:mo><mml:mo>&#x02212;</mml:mo><mml:mi>l</mml:mi><mml:mi>o</mml:mi><mml:mi>g</mml:mi><mml:mfrac><mml:mrow><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi mathvariant='script'>S</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>Q</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>j</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo>/</mml:mo><mml:mi>&#x003C4;</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mrow><mml:mstyle displaystyle='true'><mml:msubsup><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>N</mml:mi></mml:mrow></mml:msubsup><mml:mrow><mml:msub><mml:mo>&#x1D7D9;</mml:mo><mml:mrow><mml:mo stretchy='false'>[</mml:mo><mml:mi>k</mml:mi><mml:mo>&#x02260;</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy='false'>]</mml:mo></mml:mrow></mml:msub><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>s</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mi mathvariant='script'>S</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>Q</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mi>k</mml:mi></mml:msub><mml:mo stretchy='false'>)</mml:mo><mml:mo>/</mml:mo><mml:mi>&#x003C4;</mml:mi><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
<p>where <italic>S</italic>(<bold>z</bold>, <italic>Q</italic>) is the nearest neighbor function:</p>
<disp-formula id="E3"><label>(3)</label><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant='script'>S</mml:mi><mml:mo stretchy='false'>(</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mo>,</mml:mo><mml:mi>Q</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>=</mml:mo><mml:munder><mml:mrow><mml:mi>arg</mml:mi><mml:mtext>&#x000A0;</mml:mtext><mml:mi>min</mml:mi></mml:mrow><mml:mrow><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>q</mml:mi></mml:mstyle><mml:mo>&#x02208;</mml:mo><mml:mi>Q</mml:mi></mml:mrow></mml:munder><mml:msub><mml:mrow><mml:mrow><mml:mo>&#x02016;</mml:mo><mml:mrow><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>z</mml:mi></mml:mstyle><mml:mo>&#x02212;</mml:mo><mml:mstyle mathvariant='bold' mathsize='normal'><mml:mi>q</mml:mi></mml:mstyle></mml:mrow><mml:mo>&#x02016;</mml:mo></mml:mrow></mml:mrow><mml:mn>2</mml:mn></mml:msub><mml:mo>.</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
<sec>
<title>2.1.2 Self-supervised learning in medical imaging</title>
<p>Recent advancements in self-supervised learning (SSL) facilitate the training of models capable of effectively acquiring feature representations relevant to downstream tasks (Thomas et al., <xref ref-type="bibr" rid="B77">2024</xref>; VanBerlo et al., <xref ref-type="bibr" rid="B79">2024</xref>). When applied to imaging data, SSL methodologies primarily focus on image reconstruction (Hu et al., <xref ref-type="bibr" rid="B31">2021a</xref>; Zhou et al., <xref ref-type="bibr" rid="B87">2023</xref>), segmentation (Taleb et al., <xref ref-type="bibr" rid="B76">2020</xref>; Sun et al., <xref ref-type="bibr" rid="B74">2023</xref>), denoising (Pfaff et al., <xref ref-type="bibr" rid="B58">2024</xref>), and disease classification (Dufumier et al., <xref ref-type="bibr" rid="B17">2021</xref>; Jiang and Miao, <xref ref-type="bibr" rid="B36">2022</xref>; Gorade et al., <xref ref-type="bibr" rid="B24">2023</xref>). For example, the study by Taleb et al. (<xref ref-type="bibr" rid="B76">2020</xref>) introduces SSL pretext tasks, including patch-based prediction of latent representations and the augmentation prediction. In contrast, Hu et al. (<xref ref-type="bibr" rid="B31">2021a</xref>) suggests an alternative pretext task leveraging two parallel networks to minimize reconstruction loss. Additional research has used SSL on longitudinal Alzheimer&#x00027;s Disease (AD) MRI datasets to explore methods to integrate information from multiple imaging modalities (Fedorov et al., <xref ref-type="bibr" rid="B22">2021</xref>) or to predict the trajectory of cognitive performance and/or cognitive decline (Ouyang et al., <xref ref-type="bibr" rid="B56">2022</xref>; Zhao et al., <xref ref-type="bibr" rid="B86">2021</xref>).</p>
<p>Contrary to the aforementioned studies, which aimed at applying SSL techniques for the learning of feature representations within broader application area, our work assesses the effectiveness of these representations acquired through SSL in differentiating neurodegenerative disorders with an emphasis on the interpretability of the models.</p>
</sec>
</sec>
<sec>
<title>2.2 Convolutional neural network backbones</title>
<p>Convolutional neural networks (CNN) have been the state-of-the-art solutions for computer vision tasks for almost a decade. In the last few years, numerous approaches on the advancement of CNNs were proposed: introduction of skip connections (He et al., <xref ref-type="bibr" rid="B28">2016</xref>; Huang et al., <xref ref-type="bibr" rid="B33">2017</xref>), experimentation with model hyper-parameters such as kernel size (Ganjdanesh et al., <xref ref-type="bibr" rid="B23">2023</xref>), normalization strategies (Ioffe and Szegedy, <xref ref-type="bibr" rid="B34">2015</xref>) and activation functions (Dubey et al., <xref ref-type="bibr" rid="B15">2022</xref>; Apicella et al., <xref ref-type="bibr" rid="B2">2021</xref>), depthwise convolutions (Howard et al., <xref ref-type="bibr" rid="B30">2017</xref>), and model&#x00027;s block architecture (Sandler et al., <xref ref-type="bibr" rid="B64">2018</xref>).</p>
<p>With the introduction of attention priors, vision transformers (ViT) (Dosovitskiy et al., <xref ref-type="bibr" rid="B14">2020</xref>) soon became a viable alternative to purely convolutional models, and currently represent the state-of-the-art model architecture as generic vision backbones. ViTs were inspired by the transformer models applied to language processing tasks. To the best of our knowledge, there weren&#x00027;t attempts of systematically comparing attention priors with convolutional priors. However, in their study Liu et al. (<xref ref-type="bibr" rid="B47">2022b</xref>) culminated many of the CNN advancements proposed over the years, and compared the resulting ConvNeXt model with comparable vision transformers. ConvNeXt (Liu et al., <xref ref-type="bibr" rid="B47">2022b</xref>) was proposed as a purely convolutional model, which achieved favorable results on common vision benchmarks such as the ImageNet (Deng et al., <xref ref-type="bibr" rid="B12">2009</xref>) and the COCO (Lin et al., <xref ref-type="bibr" rid="B45">2014</xref>) datasets, sometimes even providing higher accuracy than competing ViT models. Notably, ConvNeXt achieved these results while maintaining the computational simplicity and efficiency of standard CNN models, highlighting the importance of convolutional priors for vision tasks.</p>
</sec>
<sec>
<title>2.3 Feature attribution</title>
<p>With the growing popularity of CNN models and these models becoming the off-the-shelf baselines, there has also been a growing need to understand them. Multiple studies have attempted to explain and interpret black-box CNN models. Within the domain of explainable AI (XAI), there are various methods to derive the importance of input features, i.e., the importance scores with respect to each prediction. These importance scores can be visualized by superimposing them on the input scans (Van der Velden et al., <xref ref-type="bibr" rid="B78">2022</xref>). Certain preferred methods of importance scoring are Layer-wise Relevance Propagation (LRP) (Montavon et al., <xref ref-type="bibr" rid="B52">2019</xref>; Kohlbrenner et al., <xref ref-type="bibr" rid="B40">2020</xref>), Gradient-weighted Class Activation Mapping (Grad-CAM) (Selvaraju et al., <xref ref-type="bibr" rid="B69">2020</xref>), and Integrated Gradients (IG) (Sundararajan et al., <xref ref-type="bibr" rid="B75">2017</xref>). Multiple studies have been conducted mapping importance scores to input regions, particularly within the neuroscience application of dementia detection (Dyrba et al., <xref ref-type="bibr" rid="B19">2021</xref>; Singh and Dyrba, <xref ref-type="bibr" rid="B72">2023</xref>; B&#x000F6;hle et al., <xref ref-type="bibr" rid="B3">2019</xref>; Leonardsen et al., <xref ref-type="bibr" rid="B44">2024</xref>; Wang et al., <xref ref-type="bibr" rid="B80">2023</xref>).</p>
</sec>
</sec>
<sec sec-type="methods" id="s3">
<title>3 Methods</title>
<sec>
<title>3.1 Neuroimaging datasets</title>
<p>We used T1-weighted brain MRI scans from publicly available neuroimaging repositories. The data scans in our study were pooled from the following data repositories: (i) the Alzheimer&#x00027;s Disease Neuroimaging Initiative (ADNI),<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> study phases ADNI2 and ADNI3, (ii) the Australian Imaging, Biomarker &#x00026; Lifestyle Flagship Study of Aging (AIBL),<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref> collected by the AIBL study group, and (iii) the Frontotemporal Lobar Degeneration Neuroimaging Initiative (FTLDNI).<xref ref-type="fn" rid="fn0003"><sup>3</sup></xref> In our study, the cognitively normal (CN) scan samples were consolidated from all three data cohorts. The ADNI and AIBL data cohorts provided samples with dementia due to Alzheimer&#x00027;s disease (AD) and mild cognitive impairment (MCI). While, FTLDNI was the only data cohort with samples categorized into different frontotemporal lobar degeneration (FTLD) phenotypes, i.e., the behavioral variant of frontotemporal dementia (BV), the semantic variant of frontotemporal dementia (SV), and the progressive non-fluent aphasia (PNFA). Notably, the data from ADNI3, ADNI2 and FTLDNI was used for training all models, and AIBL was used as independent test dataset.</p>
<p>We applied the &#x0201C;t1-linear pipeline&#x0201D; of the Clinica Python library (Routier et al., <xref ref-type="bibr" rid="B62">2021</xref>; Wen et al., <xref ref-type="bibr" rid="B82">2020</xref>) to preprocess the raw MRI scans. The pipeline uses the N4ITK method for bias field correction and the SyN algorithm from ANTs to perform an affine registration for alignment of each scan with the Montreal Neurological Institute (MNI) reference space. However, more advanced steps such as brain extraction, tissue segmentation, and non-linear warping were not performed. Some MRI scans were excluded due to severe quality issues, i.e., the presence of imaging artifacts such as blurring or ghosting, or missing diagnostic information.</p>
<p>Additionally, each scan was cropped to the size of 169 &#x000D7; 208 &#x000D7; 179 voxels with 1 mm isotropic resolution. After applying preprocessing methods, our study includes 841 scans from the ADNI2, 968 scans from the ADNI3, 612 scans from AIBL and 273 scans from FTLDNI. <xref ref-type="table" rid="T1">Table 1</xref> summarizes the sample statistics of the different data sources.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Sample statistics of study data per diagnosis state.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th/>
<th valign="top" align="center"><bold>CN</bold></th>
<th valign="top" align="center"><bold>AD</bold></th>
<th valign="top" align="center"><bold>MCI</bold></th>
<th/>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="5" style="background-color:#dee1e1"><bold>ADNI3</bold></td>
</tr>
<tr>
<td valign="top" align="left">Age: &#x003BC;(&#x003C3;)</td>
<td valign="top" align="center">74 (7)</td>
<td valign="top" align="center">77 (8.3)</td>
<td valign="top" align="center">74.6 (8)</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">MMSE: &#x003BC;(&#x003C3;)</td>
<td valign="top" align="center">29.4 (0.7)</td>
<td valign="top" align="center">20.8 (4.5)</td>
<td valign="top" align="center">27.9 (1.1)</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Sex: F/M</td>
<td valign="top" align="center">312/221</td>
<td valign="top" align="center">52/70</td>
<td valign="top" align="center">140/173</td>
<td/>
</tr>
<tr>
<td valign="top" align="left" colspan="5" style="background-color:#dee1e1"><bold>ADNI2</bold></td>
</tr>
<tr>
<td valign="top" align="left">Age: &#x003BC;(&#x003C3;)</td>
<td valign="top" align="center">75.8 (7)</td>
<td valign="top" align="center">76.2(7.6)</td>
<td valign="top" align="center">74.6 (7.9)</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">MMSE: &#x003BC;(&#x003C3;)</td>
<td valign="top" align="center">29.3 (0.7)</td>
<td valign="top" align="center">21.1(4.3)</td>
<td valign="top" align="center">27.8 (1.1)</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Sex: F/M</td>
<td valign="top" align="center">110/94</td>
<td valign="top" align="center">120/163</td>
<td valign="top" align="center">151/203</td>
<td/>
</tr>
<tr>
<td valign="top" align="left" colspan="5" style="background-color:#dee1e1"><bold>AIBL</bold></td>
</tr>
<tr>
<td valign="top" align="left">Age: &#x003BC;(&#x003C3;)</td>
<td valign="top" align="center">73.5 (6.4)</td>
<td valign="top" align="center">75.4 (7.9)</td>
<td valign="top" align="center">76.6 (6.5)</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">MMSE: &#x003BC;(&#x003C3;)</td>
<td valign="top" align="center">29.2 (0.8)</td>
<td valign="top" align="center">19.5 (5.8)</td>
<td valign="top" align="center">27.2 (1.3)</td>
<td/>
</tr>
<tr>
<td valign="top" align="left">Sex: F/M</td>
<td valign="top" align="center">239/182</td>
<td valign="top" align="center">51/37</td>
<td valign="top" align="center">41/62</td>
<td/>
</tr>
<tr style="background-color:#919498;color:#ffffff">
<td/>
<td valign="top" align="center"><bold>CN</bold></td>
<td valign="top" align="center"><bold>BV</bold></td>
<td valign="top" align="center"><bold>SV</bold></td>
<td valign="top" align="center"><bold>PNFA</bold></td>
</tr>
<tr>
<td valign="top" align="left" colspan="5" style="background-color:#dee1e1"><bold>FTLDNI</bold></td>
</tr>
<tr>
<td valign="top" align="left">Age: &#x003BC;(&#x003C3;)</td>
<td valign="top" align="center">64.3 (7.1)</td>
<td valign="top" align="center">62.1 (5.8)</td>
<td valign="top" align="center">62.7 (6.8)</td>
<td valign="top" align="center">68.9 (7.7)</td>
</tr>
<tr>
<td valign="top" align="left">MMSE: &#x003BC;(&#x003C3;)</td>
<td valign="top" align="center">29.7 (0.5)</td>
<td valign="top" align="center">22.6 (6.2)</td>
<td valign="top" align="center">22.5 (5.7)</td>
<td valign="top" align="center">24.9 (5.5)</td>
</tr>
<tr>
<td valign="top" align="left">Sex: F/M</td>
<td valign="top" align="center">72/58</td>
<td valign="top" align="center">23/48</td>
<td valign="top" align="center">14/23</td>
<td valign="top" align="center">19/16</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>CN, a cognitively normal state; AD, dementia due to Alzheimer&#x00027;s disease; MCI, mild cognitive impairment; BV, behavioral variant of frontotemporal dementia; SV, semantic variant of frontotemporal dementia; PNFA, progressive non-fluent aphasia; &#x003BC;, mean; &#x003C3;, standard deviation; MMSE, mini-mental state examination; F, female; M, male.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title>3.2 Proposed self-supervised learning pipeline</title>
<p>Our proposed method consists of two modules: a feature extractor and a classification head. The feature extractor is a convolutional neural network trained without any sample labels in a self-supervised manner. The classification head is a simple neural network subsequently trained in a supervised way. The proposed architecture is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Illustration of the proposed architecture. <bold>(Top)</bold> ConvNeXt, a CNN model, trained under a self-supervised learning paradigm, extracts features from coronal brain slices. <bold>(Bottom center)</bold> The classification head learns to classify neurodegenerative disorders from the extracted features. CNN, convolutional neural network; LN, layer normalization; Conv, convolutional operation; LN, Layer Normalization.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fninf-19-1527582-g0001.tif"/>
</fig>
<p>After executing the t1-linear pipeline of the Clinica library, we obtained a 3D image for the brain of each participant. However, we only used 2D convolutional operations, as they reduce the CNN parameter space and model complexity. We selected only the coronal plane for the present study. In each MRI sample, there were in total 208 coronal slices; however, we considered only 120 coronal slices in the middle. The slices from the middle contain the relevant regions, such as the hippocampus and the temporal lobe, which are reported to be affected already in the earliest stages of Alzheimer&#x00027;s disease (Whitwell et al., <xref ref-type="bibr" rid="B84">2008</xref>).</p>
<p><italic>Feature extractor:</italic> We used the ConvNeXt model (Liu et al., <xref ref-type="bibr" rid="B47">2022b</xref>) as the backbone for the SSL framework. It was trained with the NNCLR loss &#x02113;<sub><italic>n</italic></sub> to learn visual representations of input data (see <xref ref-type="disp-formula" rid="E2">Equation 2</xref>). We chose the NNCLR method as it provides a more generalizable learning paradigm by sampling semantic variations in the latent space and being less reliant on transformation from specific pretext tasks (Dwibedi et al., <xref ref-type="bibr" rid="B18">2021</xref>). We applied a series of random augmentations to a randomly selected coronal slice for the creation of positive pairs, as exemplified in <xref ref-type="fig" rid="F2">Figure 2</xref>.</p>
<fig id="F2" position="float">
<label>Figure 2</label>
<caption><p>Randomly applied data augmentations to the input during training. <bold>(A)</bold> Original. <bold>(B)</bold> Horizontal flip <bold>(C)</bold> Crop and resize. <bold>(D)</bold> Occlusion.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fninf-19-1527582-g0002.tif"/>
</fig>
<p>The loss optimized for a data batch was:</p>
<disp-formula id="E4"><label>(4)</label><mml:math id="M4"><mml:mrow><mml:mi>&#x02112;</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mn>2</mml:mn><mml:mi>N</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle='true'><mml:munderover><mml:mo>&#x02211;</mml:mo><mml:mrow><mml:mi>k</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mi>N</mml:mi></mml:munderover><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mi>&#x02113;</mml:mi><mml:mi>n</mml:mi></mml:msub><mml:mo stretchy='false'>(</mml:mo><mml:mn>2</mml:mn><mml:mi>k</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mi>k</mml:mi><mml:mo stretchy='false'>)</mml:mo><mml:mo>+</mml:mo><mml:msub><mml:mi>&#x02113;</mml:mi><mml:mi>n</mml:mi></mml:msub><mml:mo stretchy='false'>(</mml:mo><mml:mn>2</mml:mn><mml:mi>k</mml:mi><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mi>k</mml:mi><mml:mo>&#x02212;</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy='false'>)</mml:mo></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:mrow></mml:math></disp-formula>
<p>where &#x02113;<sub><italic>n</italic></sub> is the NNCLR loss from <xref ref-type="disp-formula" rid="E2">Equation 2</xref>, 2<italic>k</italic> &#x02212; 1 and 2<italic>k</italic> represent the indices of the same augmented slice, and <italic>N</italic> is the total number of training samples.</p>
<p>Specifically, we used the &#x0201C;tiny&#x0201D; variant ConvNeXt model (Liu et al., <xref ref-type="bibr" rid="B47">2022b</xref>) as our backbone model. It has a configuration with sequential blocks set to (3, 3, 9, 3) and the number of output channels equalling to (96, 192, 384, 768). ConvNeXt culminates in many architectural advancements such as larger 7x7 kernel sizes, skip connections, inverted bottleneck, Gaussian error linear units (GELU) as activation function, layer-wise normalization (LN) strategy instead of batch normalisations (BN), etc. The ConvNeXt model and pretrained model weights can be downloaded from the publicly available PyTorch library (Paszke et al., <xref ref-type="bibr" rid="B57">2019</xref>).</p>
<p><italic>Classification head:</italic> While using the ConvNeXt model as a feature extractor, we considered the output produced by a 2<italic>D</italic> adaptive average pooling layer after the last convolutional block as input for the subsequent &#x0201C;classification head&#x0201D; (<xref ref-type="fig" rid="F1">Figure 1</xref>). That means the classification head takes as input the latent feature representations of the MRI scans that where processed by the backbone CNN model. The dimension of the extracted feature vector per MRI slice is 768. Our classification head is a simple neural network consisting of a single fully-connected layer preceded by a layer normalization operation (<xref ref-type="fig" rid="F1">Figure 1</xref> bottom). A single-layer perceptron was chosen as the classification head to leverage the features extracted from the ConvNeXt feature extractor directly, and not transforming the features by applying multiple levels of nonlinearities. This design choice aims to preserve the integrity of the extracted features. Employing a single-layer perceptron is a widely recognized methodology, commonly referred to as <italic>linear evaluation</italic> or <italic>linear probing</italic> (Dubois et al., <xref ref-type="bibr" rid="B16">2023</xref>; Scheibenreif et al., <xref ref-type="bibr" rid="B65">2024</xref>; Kalibhat et al., <xref ref-type="bibr" rid="B39">2024</xref>).</p>
</sec>
<sec>
<title>3.3 Feature attribution</title>
<p>Integrated gradients (IG) can be applied to various data modalities, such as text, images, or structured data (Sundararajan et al., <xref ref-type="bibr" rid="B75">2017</xref>). IG was chosen over other feature-attribution methods because of its strong theoretical justifications, such as the completeness property of the integrated gradients. IG considers a straight path from some baseline to the input, and computes the gradients along that path. These accumulated gradients are called integrated gradients. However, this accumulation is an approximation of the actual integration of the gradients, and the number of steps taken between the baseline to the input determines the quality of this approximation. In our study, we set <italic>N</italic> = 50 as the number of integration steps taken between the baseline image and the input image. To calculate IG importance scores, a mean CN image was used as a baseline for the IG attribution method. We used the IG implementation provided by the Captum library (Kokhlikyan et al., <xref ref-type="bibr" rid="B41">2020</xref>) to calculate importance maps for MRI scans with respect to the classification task.</p>
</sec>
<sec>
<title>3.4 Experimental setup</title>
<p><italic>Training the feature extractor:</italic> We trained a feature extraction model (ConvNeXt) using the NNCLR method on ADNI3, ADNI2 and FTLDNI data for three learning trials. For each trial, we created random training and test sets. These sets were held constant for all experiments. If more than one MRI recording was available per participant, then we assigned all participant&#x00027;s MRI scans only to one set, thus avoiding data leakage. This resulted in 10% of data belonging to the test set.</p>
<p>The model was trained for 1,000 epochs using a batch size of 180 samples. The size of the NNCLR queue <italic>Q</italic> was set to 8,192. We applied three different data augmentation techniques with a probability of 0.5 to produce views visualized in <xref ref-type="fig" rid="F2">Figures 2B</xref>&#x02013;<xref ref-type="fig" rid="F2">D</xref>: horizontal flip, cropping and resizing, and occlusion. We experimented with different data sources to train the feature extractor, i.e., utilizing in-domain medical images vs. training with out-of-domain natural images. More details about model training and results could be found in the supplementary.</p>
<p><italic>Training the classification head:</italic> To determine if a 3D MRI scan belongs to a specific diagnostic group, we first derive the latent representation vectors for 2D coronal slices using the ConvNeXt feature extractor and then make a prediction for each slice using the classification head. For evaluation with the test data, we applied a majority voting procedure in which the group label that occurs the most frequently determined the final group assignment. We trained the classification head for 100 epochs, on the same three training trials that were used to train the feature extractors. We used a batch size of 64 samples and decayed the learning rate with cosine annealing after every 20 epochs.</p>
<p>We experimented with various setups for training a classification head while keeping the weights of the feature extractor frozen vs. unfrozen, i.e., letting the weights change during the classification head training. For the downstream task, we compared different multi-class classification heads, i.e., predicting four (CN, MCI, AD, BV) or three classes&#x02014;(CN, MCI, AD) and (CN, AD, BV), and binary classification heads&#x02014;(CN, AD), (CN, BV), and (AD, BV). Furthermore, we evaluated our models on the independent AIBL dataset, which was not used during training. The independent test dataset enabled us to assess the generalizability of our approach.</p>
<p>We used balanced accuracy, sensitivity (true positive rate), specificity (true negative rate), and the Matthews correlation coefficient (MCC) as evaluation metrics. Due to the class imbalance in our dataset, we have chosen balanced accuracy over simple accuracy in our study. Balanced accuracy is the average of the true positive rate and the true negative rate, and thus avoids the overestimation of model quality that (simple) accuracy generally shows in class imbalance scenarios. With the true positives <italic>TP</italic>, true negatives <italic>TN</italic>, false positives <italic>FP</italic>, and false negatives <italic>FN</italic>, the balanced accuracy is defined as:</p>
<disp-formula id="E5"><label>(5)</label><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mtext class="textrm" mathvariant="normal">Balanced Accuracy</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mfrac><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TP</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FN</mml:mtext></mml:mrow></mml:mfrac><mml:mo>&#x0002B;</mml:mo><mml:mfrac><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext></mml:mrow><mml:mrow><mml:mtext class="textrm" mathvariant="normal">TN</mml:mtext><mml:mo>&#x0002B;</mml:mo><mml:mtext class="textrm" mathvariant="normal">FP</mml:mtext></mml:mrow></mml:mfrac></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
<p>As shown in Chicco and Jurman (<xref ref-type="bibr" rid="B8">2020</xref>), the MCC should be preferred over the (simple) accuracy and the F1 score, as they could generate misleading results in unbalanced data sets. The MCC ranges between [&#x02212;1, 1]. To achieve a high MCC score, the classifier would have to make correct predictions on both the majority and minority classes. The MCC is formally defined as:</p>
<disp-formula id="E6"><label>(6)</label><mml:math id="M6"><mml:mtable class="eqnarray" columnalign="left"><mml:mtr><mml:mtd><mml:mrow><mml:mtext>MCC</mml:mtext><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mtext>TP</mml:mtext><mml:mo>&#x022C5;</mml:mo><mml:mtext>TN</mml:mtext><mml:mo>&#x02212;</mml:mo><mml:mtext>FP</mml:mtext><mml:mo>&#x022C5;</mml:mo><mml:mtext>FN</mml:mtext></mml:mrow><mml:mrow><mml:msqrt><mml:mrow><mml:mo stretchy='false'>(</mml:mo><mml:mtext>TP</mml:mtext><mml:mo>+</mml:mo><mml:mtext>FP</mml:mtext><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x022C5;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mtext>TP</mml:mtext><mml:mo>+</mml:mo><mml:mtext>FN</mml:mtext><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x022C5;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mtext>TN</mml:mtext><mml:mo>+</mml:mo><mml:mtext>FP</mml:mtext><mml:mo stretchy='false'>)</mml:mo><mml:mo>&#x022C5;</mml:mo><mml:mo stretchy='false'>(</mml:mo><mml:mtext>TN</mml:mtext><mml:mo>+</mml:mo><mml:mtext>FN</mml:mtext><mml:mo stretchy='false'>)</mml:mo></mml:mrow></mml:msqrt></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></disp-formula>
</sec>
</sec>
<sec sec-type="results" id="s4">
<title>4 Results</title>
<sec>
<title>4.1 Diagnostic group separation</title>
<p>We evaluated the manner in which the classification head could be configured. We compared multi-class vs. binary classification heads. <xref ref-type="table" rid="T2">Table 2</xref> shows the results achieved with our proposed architecture for the identification of neurodegenerative disorders, using a frozen ConvNeXt feature extractor trained under the NNCLR SSL paradigm on brain images. The reported numbers were averaged over three learning trials. For the binary (AD vs. CN) classification model, the balance accuracy reached 82% for the cross-validation test sets and 80% for the independent AIBL data cohort.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Classification results of our proposed architecture, consisting of a frozen feature extractor trained under a SSL paradigm, and a single-layer neural network as the downstream classification head.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th/>
<th valign="top" align="center"><bold>Balanced accuracy</bold></th>
<th valign="top" align="center"><bold>MCC</bold></th>
<th valign="top" align="center"><bold>Sensitivity</bold></th>
<th valign="top" align="center"><bold>Specificity</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="5" style="background-color:#dee1e1"><bold>Cross-validation test set (ADNI2/3 and FTLDNI)</bold></td>
</tr>
<tr>
<td valign="top" align="left">AD vs. MCI vs. CN vs. BV:</td>
<td valign="top" align="center">0.60 &#x000B1; 0.03</td>
<td valign="top" align="center">0.32 &#x000B1; 0.02</td>
<td valign="top" align="center">0.51 &#x000B1; 0.01</td>
<td valign="top" align="center">0.84 &#x000B1; 0.00</td>
</tr>
<tr>
<td valign="top" align="left">AD vs. MCI vs. CN:</td>
<td valign="top" align="center">0.56 &#x000B1; 0.02</td>
<td valign="top" align="center">0.32 &#x000B1; 0.03</td>
<td valign="top" align="center">0.55 &#x000B1; 0.02</td>
<td valign="top" align="center">0.78 &#x000B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">AD vs. CN vs. BV:</td>
<td valign="top" align="center">0.78 &#x000B1; 0.03</td>
<td valign="top" align="center">0.55 &#x000B1; 0.05</td>
<td valign="top" align="center">0.73 &#x000B1; 0.02</td>
<td valign="top" align="center">0.87 &#x000B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">BV vs. CN:</td>
<td valign="top" align="center">0.88 &#x000B1; 0.03</td>
<td valign="top" align="center">0.57 &#x000B1; 0.03</td>
<td valign="top" align="center">0.90 &#x000B1; 0.08</td>
<td valign="top" align="center">0.86 &#x000B1; 0.02</td>
</tr>
<tr>
<td valign="top" align="left">AD vs. CN:</td>
<td valign="top" align="center">0.82 &#x000B1; 0.04</td>
<td valign="top" align="center">0.61 &#x000B1; 0.08</td>
<td valign="top" align="center">0.82 &#x000B1; 0.05</td>
<td valign="top" align="center">0.82 &#x000B1; 0.03</td>
</tr>
<tr>
<td valign="top" align="left">AD vs. BV:</td>
<td valign="top" align="center">0.93 &#x000B1; 0.01</td>
<td valign="top" align="center">0.73 &#x000B1; 0.04</td>
<td valign="top" align="center">0.85 &#x000B1; 0.02</td>
<td valign="top" align="center">1.00 &#x000B1; 0.00</td>
</tr>
<tr>
<td valign="top" align="left" colspan="5" style="background-color:#dee1e1"><bold>Independent test set (AIBL)</bold></td>
</tr>
<tr>
<td valign="top" align="left">AD vs. MCI vs. CN:</td>
<td valign="top" align="center">0.53 &#x000B1; 0.01</td>
<td valign="top" align="center">0.30 &#x000B1; 0.03</td>
<td valign="top" align="center">0.69 &#x000B1; 0.01</td>
<td valign="top" align="center">0.84 &#x000B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">AD vs. CN:</td>
<td valign="top" align="center">0.80 &#x000B1; 0.01</td>
<td valign="top" align="center">0.59 &#x000B1; 0.01</td>
<td valign="top" align="center">0.66 &#x000B1; 0.02</td>
<td valign="top" align="center">0.94 &#x000B1; 0.01</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>In a multi-class setup, micro averages are reported for the sensitivity and specificity metrics. CN, cognitively normal; AD, dementia due to Alzheimer&#x00027;s disease; MCI, mild cognitive impairment; BV, behavioral variant of frontotemporal dementia; MCC, Matthews correlation coefficient.</p>
</table-wrap-foot>
</table-wrap>
<p>Upon comparing results from various settings of classification heads trained over a frozen feature extractor, we can observe a general trend, i.e., the binary classification for separating cognitively normal (CN) and Alzheimer&#x00027;s disease (AD) samples is a much simpler task than the 4-way multi-class classification of CN, mild cognitive impairment (MCI), AD and behavioral variant of frontotemporal dementia (BV) samples. This finding has often been reported in other studies in the field.</p>
<p>In the multi-class classification setting, the AD vs. MCI vs. CN model, often confuses MCI samples with CN or AD samples. This reflects the progressive nature of the Alzheimer&#x00027;s dementia, with MCI being intermediate stage between CN and AD. Interestingly, we found that the AD vs. MCI vs. CN vs. BV model is substantially better at separating BV samples from the other CN, MCI and AD samples, with the recall (=sensitivity) of the BV class being 0.89, compared to the average micro recall of the same model being 0.51. This finding points toward the model being sensitive to different underlying pathologies of different dementia diseases&#x02014;frontotemporal dementia and AD. The same fact could also be corroborated from the high performance metrics of the binary AD vs. BV model. In Section 5.1 below, we discuss the achieved results and compare them with the state of the art.</p>
</sec>
<sec>
<title>4.2 Model interpretability</title>
<p>To highlight the input regions that were found to be useful by the SSL model, we used the Integrated Gradients (IG) attribution method. IG calculates the importance scores for the input regions for a specified prediction label. The IG importance scores were calculated for every sample of the test data set (from ADNI2/3 and FTLDNI), on which our multi-class model (AD vs. CN vs. BV) makes a correct classification. <xref ref-type="fig" rid="F3">Figure 3</xref> presents mean IG importance scores for the disease types AD and BV, visualized over the brain scan of a healthy sample chosen from the ADNI cohort. While making a prediction toward the diseased classes, the red regions in the image highlight input regions representing the evidence for the diseased class, while the green regions in the image highlight input regions representing the evidence against the diseased class. The mean importance scores were thresholded to visualize the most relevant findings.</p>
<fig id="F3" position="float">
<label>Figure 3</label>
<caption><p>Mean attribution maps derived from the Integrated Gradients method for correctly identified AD and BV samples. Green and red color highlight pixel contributions to the model&#x00027;s prediction. Here, red highlights evidence for the respective disease classification and green indicates evidence against it. The attribution map overlay image was smoothed and thresholded to highlight relevant findings and improve visualization. AD, dementia due to Alzheimer&#x00027;s disease; BV, behavioral variant of frontotemporal dementia. <bold>(A)</bold> Slice: 0, Diagnosis: AD. <bold>(B)</bold> Slice: 60, Diagnosis: AD. <bold>(C)</bold> Slice: 0, Diagnosis: BV. <bold>(D)</bold> Slice: 60, Diagnosis: BV.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fninf-19-1527582-g0003.tif"/>
</fig>
</sec>
</sec>
<sec sec-type="discussion" id="s5">
<title>5 Discussion</title>
<sec>
<title>5.1 Feature learning</title>
<p>In our proposed SSL framework, we rely on signals that are derived from the data itself rather than on external classification target labels to train a feature extractor. We trained our SSL model while restricting input to a subset of 2D coronal slices. It should be noted that other SSL studies also avoided training 3D CNN with high input resolution and followed similar 2D approaches as our study (Couronn&#x000E9; et al., <xref ref-type="bibr" rid="B10">2021</xref>) or alternatively needed to drastically downscale the 3D images to a very low 64 &#x000D7; 64 &#x000D7; 64 resolution to reduce computing time (Ouyang et al., <xref ref-type="bibr" rid="B56">2022</xref>; Fedorov et al., <xref ref-type="bibr" rid="B22">2021</xref>).</p>
<p>Our AD vs. CN vs. BV multi-class model achieves a balanced accuracy of 78%. Certain fully supervised methods solve the same task, achieving performance metrics as&#x02014;Ma et al. (<xref ref-type="bibr" rid="B49">2020</xref>) reports (simple) accuracy of 86.0% from a model comparable to ours and 88.3% from a model with multimodal information sources and generative data augmentation, and Hu et al. (<xref ref-type="bibr" rid="B32">2021b</xref>) reports (simple) accuracy of 66.8% on a larger diverse dataset, and 91.8% on a smaller cleaner dataset. While our BV vs. CN binary model achieves a balanced accuracy of 88.2%. For the same task Moguilner et al. (<xref ref-type="bibr" rid="B51">2023</xref>) reports (simple) accuracy of 80% and 95% on MRI scans with 1.5T and 3T strength, respectively.</p>
<p>There are other SSL studies that report AD vs. CN group separation results on the ADNI dataset. Dufumier et al. (<xref ref-type="bibr" rid="B17">2021</xref>) reported an AUC score around 0.96. Ouyang et al. (<xref ref-type="bibr" rid="B56">2022</xref>) achieved a balanced accuracy between 81.9% and 83.6%, pre and post model finetuning. Seyfio&#x0011F;lu et al. (<xref ref-type="bibr" rid="B70">2022</xref>) using a vision transformer reported a mean simple accuracy of 83.4%. While there also other SSL applications that reported sub-optimal results, Chen et al. (<xref ref-type="bibr" rid="B7">2023</xref>) reported a balanced accuracy between 68.23% and 77.5% depending on model architecture used, while Jiang and Miao (<xref ref-type="bibr" rid="B36">2022</xref>) reported a balanced accuracy between 73.1% and 74% depending on the pretext task used. For the same task reported in these studies, our model with a frozen feature extractor, achieves a balanced accuracy of 82% on ADNI dataset, which is competitive with metrics reported in other studies. And on a holdout independent test set (AIBL), our model achieves a balanced accuracy of 80%, which is only a two-percent drop from the cross-validation testing of the model, highlighting the robustness of the model. It should noted that many studies don&#x00027;t evaluate their models on a holdout independent test set, which makes it is difficult to access their generalizability.</p>
<p>In <xref ref-type="table" rid="T3">Table 3</xref>, we compare our model evaluation results with the state-of-the-art studies that also used AIBL as an independent test dataset. Here, we compare our SSL model with other models trained in a supervised manner. Qiu et al. (<xref ref-type="bibr" rid="B60">2020</xref>), reports manual expert rating scores, with a simple accuracy metric of 82.3%. This performance level is comparable to that of our SSL models, which achieved the simple accuracy measure of 89.9% on the AIBL independent test set. It should be noted that some papers did not report the <italic>balanced accuracy</italic> measure, thus, their &#x0201C;simple&#x0201D; accuracy results might be biased toward the majority class of cognitively normal people who comprise 80% in the AIBL dataset for the group comparison AD vs. sCN.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Comparison of our proposed method with the state-of-the-art.</p></caption>
<table frame="box" rules="all">
<thead>
<tr style="background-color:#919498;color:#ffffff">
<th valign="top" align="left"><bold>Study training on the ADNI dataset</bold></th>
<th valign="top" align="left"><bold>Method details</bold></th>
<th valign="top" align="center"><bold>Balanced accuracy on the AIBL dataset</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Our method</td>
<td valign="top" align="left">SSL, 2D slice-level CNN</td>
<td valign="top" align="center">0.797 &#x000B1; 0.009</td>
</tr>
<tr>
<td valign="top" align="left">Wen et al. (<xref ref-type="bibr" rid="B82">2020</xref>)</td>
<td valign="top" align="left">SL, 2D slice-level CNN</td>
<td valign="top" align="center">0.756 &#x000B1; 0.015</td>
</tr>
<tr>
<td valign="top" align="left">Wen et al. (<xref ref-type="bibr" rid="B82">2020</xref>)</td>
<td valign="top" align="center">SL, 3D patch-level CNN</td>
<td valign="top" align="center">0.802 &#x000B1; 0.016</td>
</tr>
<tr>
<td valign="top" align="left">Wen et al. (<xref ref-type="bibr" rid="B82">2020</xref>)</td>
<td valign="top" align="left">SL, 3D subject-level CNN</td>
<td valign="top" align="center">0.862 &#x000B1; 0.016</td>
</tr>
<tr>
<td valign="top" align="left">Dyrba et al. (<xref ref-type="bibr" rid="B19">2021</xref>)</td>
<td valign="top" align="left">SL, 3D subject-level CNN</td>
<td valign="top" align="center">0.832 &#x000B1; 0.030</td>
</tr>
<tr style="background-color:#919498;color:#ffffff">
<td/>
<td/>
<td valign="top" align="left"><bold>Simple accuracy on the AIBL dataset</bold></td>
</tr>
<tr>
<td valign="top" align="left">Our method</td>
<td valign="top" align="left">SSL, 2D slice-level CNN</td>
<td valign="top" align="center">0.899 &#x000B1; 0.003</td>
</tr>
<tr>
<td valign="top" align="left">Qiu et al. (<xref ref-type="bibr" rid="B60">2020</xref>)</td>
<td valign="top" align="left">SL, 3D patch-level CNN</td>
<td valign="top" align="center">0.870 &#x000B1; 0.022</td>
</tr>
<tr>
<td valign="top" align="left">Han et al. (<xref ref-type="bibr" rid="B27">2022</xref>)</td>
<td valign="top" align="left">SL, 3D subject-level CNN</td>
<td valign="top" align="center">0.865</td>
</tr>
<tr>
<td valign="top" align="left">Han et al. (<xref ref-type="bibr" rid="B27">2022</xref>)</td>
<td valign="top" align="left">SL, 3D patch-level CNN</td>
<td valign="top" align="center">0.875</td>
</tr>
<tr>
<td valign="top" align="left">Qiu et al. (<xref ref-type="bibr" rid="B60">2020</xref>)</td>
<td valign="top" align="left">Expert Neurologists</td>
<td valign="top" align="center">0.823 &#x000B1; 0.094</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>The results are provided for studies that used the AIBL dataset for independent evaluation and the group comparison AD vs. CN. In some studies balanced accuracy was not reported, &#x0201C;simple&#x0201D; accuracy is provided instead, which might be biased toward the majority class (=CN). AD, dementia due to Alzheimer&#x00027;s disease; CN, cognitively normal; SSL, Self-supervised learning; SL, Supervised learning; CNN, Convolutional neural network.</p>
</table-wrap-foot>
</table-wrap>
<p>With regard to our achieved level of performance, we can conclude that the ConvNeXt model trained under a SSL paradigm learns generalizable features for the subsequent downstream classification tasks without requiring data sampling techniques or sophisticated data augmentations, and consequently achieving competitive results in comparison to other supervised approaches. The reported results show that our model learned meaningful feature representations in a self-supervised manner, which can be used successfully to separate different stages and types of dementia.</p>
</sec>
<sec>
<title>5.2 Neural network interpretability</title>
<p>We chose the SSL paradigm to extract more generalizable image features independently of a downstream task. However, the SSL paradigm also allows the backbone model to learn features of the brain that may correlate with a specific neurodegenerative disorder. We applied the Integrated Gradients (IG) method to interpret the models and provide insights into the significance of input regions for the predictions. The IG importance scores were calculated for samples from the test dataset for which our AD vs. CN vs. BV multi-class model makes correct classifications. <xref ref-type="fig" rid="F3">Figure 3</xref> illustrates the mean IG importance scores for classifying samples into the AD or BV group. In <xref ref-type="fig" rid="F3">Figures 3A, B</xref>, we see the hippocampus region highlighted in red for AD classification. Temporal lobe atrophy, specifically hippocampus atrophy, is a hallmark sign of Alzheimer&#x00027;s disease. In <xref ref-type="fig" rid="F3">Figures 3C, D</xref>, we see the insula and frontal lobe regions being highlighted in red. Insular atrophy is associated with the behavioral variant of frontotemporal dementia (Moguilner et al., <xref ref-type="bibr" rid="B51">2023</xref>; Seeley, <xref ref-type="bibr" rid="B68">2010</xref>; Luo et al., <xref ref-type="bibr" rid="B48">2020</xref>; Mandelli et al., <xref ref-type="bibr" rid="B50">2016</xref>). It is of great interest to see the IG maps separately highlighting regions, which in the literature are often associated with AD and BV pathology.</p>
<p>Furthermore, to our knowledge, only one previous study, Dadsetan et al. (<xref ref-type="bibr" rid="B11">2022</xref>), has systematically compared different pretext methods for training SSL models for AD progression prediction, while also employing an XAI method, i.e., GradCAM, to generate relevance maps to evaluate the learned features. However, the reported relevance maps were particularly diffuse and widespread, offering limited interpretability. In addition, as an ablation study, we investigated different XAI methods beyond IG, but the results of these experiments also produced diffuse, spiky and unspecific relevance maps. This highlights that the application of XAI methods to SSL methods remains an open area of research.</p>
<p>Notably, our model successfully learned to not consider tissue outside of the brain or regions outside of the skull. However, the derived attributions provide a rather general indication of important input regions throughout the brain, including primarily gray matter and white matter tissue. Few studies have pointed out the complex nature of IG importance scores that highlight multiple image features, both for and against a class instance, making their comprehension non-trivial (Adebayo et al., <xref ref-type="bibr" rid="B1">2018</xref>; Kakogeorgiou and Karantzalos, <xref ref-type="bibr" rid="B38">2021</xref>; Hiller et al., <xref ref-type="bibr" rid="B29">2025</xref>).</p>
</sec>
<sec>
<title>5.3 Limitations and future work</title>
<p>Our study uses only a subset of coronal slices to make sample-level classifications. We acknowledge that the selection of the full slice set along the coronal axis or selection of the full 3D MRI data could have a positive effect on classification performance; however, the main goal of the study was to investigate the application of SSL and to compare it with traditional supervised approaches; thus only a subset of slices along the coronal axis was chosen as input. Learning a 3D CNN is a computationally expensive problem for self-supervised learning, as it relies on (a) very large data corpus, (b) data augmentation algorithms which are markedly more computationally expensive in 3D due to the cubic time-complexity of the algorithms, and (c) many learning iterations as training typically converges much slower than in supervised learning. More specifically, training our models for 1,000 epochs on a single NVIDIA Quadro RTX 6000 GPU took on average 27 h. In the future, to train better feature extractors, we will incorporate more spatial neuroanatomical information, by combining three CNNs, i.e., one trained along each orthogonal planes&#x02014;axial, coronal, and sagittal, and hence learning feature representations for the full 3D MRI data, as was proposed for supervised models (Qiao et al., <xref ref-type="bibr" rid="B59">2021</xref>). Alternatively, a vision transformer model could also be explored to efficiently process smaller 3D patches of the brain (Qiu et al., <xref ref-type="bibr" rid="B60">2020</xref>; Wen et al., <xref ref-type="bibr" rid="B82">2020</xref>; Han et al., <xref ref-type="bibr" rid="B27">2022</xref>; Wolf et al., <xref ref-type="bibr" rid="B85">2023</xref>).</p>
<p>With regard to neural network interpretability and feature attribution, a comprehensive analysis of the salient features and feature attribution methods lies outside the scope of our current work. Although it remains to be seen whether the somewhat dispersed attribution maps we see in the current study are due to a difference in the training paradigm, i.e., SSL vs. supervised learning. To the best of our knowledge, no systematic efforts have been undertaken to compare the effects of training paradigm and attribution methods in highlighting disease-specific brain structures known in the clinical literature for different types of dementia. Additional experiments are required to holistically understand our SSL model and the informative importance of the generated maps. In our future work, we will explore other methods for feature attribution and methods to summarize attributions per brain region to assess if specific disease patterns emerge.</p>
<p>We also intend to include additional datasets in our future studies to learn more robust models. Specifically, we intend to add FTLD data cohorts.</p>
</sec>
<sec>
<title>5.4 Conclusion</title>
<p>We presented an architecture for the identification of neurodegenerative diseases from MRI data, consisting of a feature extractor and a classification head. The feature extractor used the ConvNeXt architecture as a backbone, which was trained under a self-supervised learning paradigm with nearest-neighbor contrastive learning (NNCLR) loss on brain MRI scans. The feature extractor model was used for subsequent downstream tasks by training only an additional single-layer neural network component which performs the classification. From our experiments, we show that CNN models trained under SSL paradigm have comparable performance to state-of-the-art CNN models trained in a supervised manner. With this presented approach, we provide a practical application of self-supervised learning on MRI data, as well as also demonstrate the application of attribution mapping methods for such systems to improve interpretability of the model&#x00027;s decision.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: Alzheimer&#x00027;s Disease Neuroimaging Initiative (ADNI) (<ext-link ext-link-type="uri" xlink:href="http://adni.loni.usc.edu/data-samples/access-data">http://adni.loni.usc.edu/data-samples/access-data</ext-link>), Australian Imaging Biomarkers and Lifestyle flagship study of aging (AIBL) (<ext-link ext-link-type="uri" xlink:href="https://aibl.csiro.au">https://aibl.csiro.au</ext-link>), and Frontotemporal Lobar Degeneration Neuroimaging Initiative (FTLDNI) (<ext-link ext-link-type="uri" xlink:href="https://memory.ucsf.edu/research-trials/research/allftd">https://memory.ucsf.edu/research-trials/research/allftd</ext-link>). Our source code for data processing, model training and evaluation, and creating attribution maps will be made publicly available at: (<ext-link ext-link-type="uri" xlink:href="https://github.com/VadymV/clinic-net">https://github.com/VadymV/clinic-net</ext-link>).</p>
</sec>
<sec sec-type="ethics-statement" id="s7">
<title>Ethics statement</title>
<p>The studies involving humans were approved by the respective neuroimaging initiatives internal review boards of each of the participating study sites. See <ext-link ext-link-type="uri" xlink:href="https://adni.loni.usc.edu">https://adni.loni.usc.edu</ext-link> and <ext-link ext-link-type="uri" xlink:href="https://aibl.csiro.au">https://aibl.csiro.au</ext-link> for details. All initiatives met common ethical standards in the collection of the data such as the Declaration of Helsinki. Analysis of the data was approved by the internal review board of the Rostock University Medical Center, reference number A 2020-0182. The studies were conducted in accordance with the local legislation and institutional requirements. Written informed consent for participation was not required from the participants or the participants&#x00027; legal guardians/next of kin in accordance with the national legislation and institutional requirements.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>VG: Conceptualization, Data curation, Methodology, Software, Visualization, Writing &#x02013; original draft. DS: Conceptualization, Software, Visualization, Writing &#x02013; original draft. ST: Conceptualization, Methodology, Supervision, Validation, Writing &#x02013; review &#x00026; editing. MD: Conceptualization, Methodology, Supervision, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec id="s9">
<title>For the ADNI, AIBL, FTLDNI study groups</title>
<p>A complete listing of ADNI investigators can be found at <ext-link ext-link-type="uri" xlink:href="https://adni.loni.usc.edu/wp-content/uploads/how_to_apply/ADNI_Acknowledgement_List.pdf">https://adni.loni.usc.edu/wp-content/uploads/how_to_apply/ADNI_Acknowledgement_List.pdf</ext-link>. AIBL researchers are listed at <ext-link ext-link-type="uri" xlink:href="https://www.aibl.csiro.au">https://www.aibl.csiro.au</ext-link>.</p>
</sec>
<sec sec-type="funding-information" id="s10">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. This study was supported by the German Research Foundation (Deutsche Forschungsgemeinschaft, DFG) under grant DY151/2-1, project ID 454834942. Data collection and sharing for this project was funded by the Alzheimer&#x00027;s Disease Neuroimaging Initiative (ADNI) (National Institutes of Health Grant U01 AG024904) and DOD ADNI (Department of Defense award number W81XWH-12-2-0012). ADNI is funded by the National Institute on Aging, the National Institute of Biomedical Imaging and Bioengineering, and through generous contributions from the following: AbbVie, Alzheimer&#x00027;s Association; Alzheimer&#x00027;s Drug Discovery Foundation; Araclon Biotech; BioClinica, Inc.; Biogen; Bristol-Myers Squibb Company; CereSpir, Inc.; Cogstate; Eisai Inc.; Elan Pharmaceuticals, Inc.; Eli Lilly and Company; EuroImmun; F. Hoffmann-La Roche Ltd and its affiliated company Genentech, Inc.; Fujirebio; GE Healthcare; IXICO Ltd.; Janssen Alzheimer Immunotherapy Research &#x00026; Development, LLC.; Johnson &#x00026; Johnson Pharmaceutical Research &#x00026; Development LLC.; Lumosity; Lundbeck; Merck &#x00026; Co., Inc.; Meso Scale Diagnostics, LLC.; NeuroRx Research; Neurotrack Technologies; Novartis Pharmaceuticals Corporation; Pfizer Inc.; Piramal Imaging; Servier; Takeda Pharmaceutical Company; and Transition Therapeutics. The Canadian Institutes of Health Research is providing funds to support ADNI clinical sites in Canada. Private sector contributions are facilitated by the Foundation for the National Institutes of Health (<ext-link ext-link-type="uri" xlink:href="https://www.fnih.org">https://www.fnih.org</ext-link>). The grantee organization is the Northern California Institute for Research and Education, and the study is coordinated by the Alzheimer&#x00027;s Therapeutic Research Institute at the University of Southern California. ADNI data are disseminated by the Laboratory for Neuro Imaging at the University of Southern California. Data collection and sharing for this project was funded by the Frontotemporal Lobar Degeneration Neuroimaging Initiative (National Institutes of Health Grant R01 AG032306). The study is coordinated through the University of California, San Francisco, Memory and Aging Center. FTLDNI data are disseminated by the Laboratory for Neuro Imaging at the University of Southern California. Data used in the preparation of this article was also obtained from the Australian Imaging Biomarkers and Lifestyle flagship study of aging (AIBL) funded by the Commonwealth Scientific and Industrial Research Organization (CSIRO). The AIBL researchers contributed data but did not participate in the analysis or writing of this report. AIBL study methodology has been reported previously (Ellis et al., <xref ref-type="bibr" rid="B21">2009</xref>).</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>ST served as member of advisory boards of Lilly, Eisai, and Biogen, and is member of the independent data safety and monitoring board of the study ENVISION (Biogen). The remaining authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest. The author(s) declared that they were an editorial board member of Frontiers, at the time of submission. This had no impact on the peer review process and the final decision.</p>
</sec>
<sec sec-type="ai-statement" id="s11">
<title>Generative AI statement</title>
<p>The author(s) declare that no Gen AI was used in the creation of this manuscript.</p>
</sec>
<sec sec-type="disclaimer" id="s12">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s13">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fninf.2025.1527582/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fninf.2025.1527582/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<title>Abbreviations</title>
<fn fn-type="abbr"><p>AD, Alzheimer&#x00027;s disease; ADNI, Alzheimer&#x00027;s Disease Neuroimaging Initiative; AIBL, Australian Imaging, Biomarker &#x00026; Lifestyle Flagship Study of Aging; BN, Batch normalization; BV, behavioral variant of frontotemporal dementia; CN, Cognitively normal participants; CNN, Convolutional neural network; ConvNeXT, A highly optimized CNN model architecture recently introduced by Liu et al. (<xref ref-type="bibr" rid="B47">2022b</xref>); DZNE, Deutsches Zentrum for Neurodegenerative Erkrankungen (German Center for Neurodegenerative Diseases); FTLD, Frontotemporal lobar degeneration; FTLDNI, Frontotemporal Lobar Degeneration Neuroimaging Initiative; GELU, Gaussian error linear units; Grad-CAM, Gradient-weighted class activation mapping; IG, Integrated gradients; InfoNCE, A form a contrastive loss metric, where NCE stands for Noise-Contrastive Estimation; LN, Layer-wise normalization; LRP, Layer-wise relevance propagation; MCC, Matthews correlation coefficient; MCI, Mild cognitive impairment; MNI, Montreal Neurological Institute; MRI, Magnetic resonance imaging; NNCLR, Nearest-Neighbor Contrastive Learning; PNFA, Progressive non-fluent aphasia; SL, Supervised learning; SSL, Self-supervised learning; SV, semantic variant of frontotemporal dementia; ViT, Vision Transformers; XAI, Explainable artificial intelligence.</p></fn></fn-group>
<fn-group>
<fn id="fn0001"><p><sup>1</sup>ADNI: <ext-link ext-link-type="uri" xlink:href="https://adni.loni.usc.edu/">https://adni.loni.usc.edu/</ext-link>.</p></fn>
<fn id="fn0002"><p><sup>2</sup>AIBL: <ext-link ext-link-type="uri" xlink:href="https://aibl.csiro.au/">https://aibl.csiro.au/</ext-link>.</p></fn>
<fn id="fn0003"><p><sup>3</sup>FTLDNI: <ext-link ext-link-type="uri" xlink:href="https://memory.ucsf.edu/research-trials/research/allftd">https://memory.ucsf.edu/research-trials/research/allftd</ext-link>.</p></fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Adebayo</surname> <given-names>J.</given-names></name> <name><surname>Gilmer</surname> <given-names>J.</given-names></name> <name><surname>Muelly</surname> <given-names>M.</given-names></name> <name><surname>Goodfellow</surname> <given-names>I.</given-names></name> <name><surname>Hardt</surname> <given-names>M.</given-names></name> <name><surname>Kim</surname> <given-names>B.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Sanity checks for saliency maps,&#x0201D;</article-title> in <source>Proceedings of the 32nd International Conference on Neural Information Processing Systems</source>, <fpage>9525</fpage>&#x02013;<lpage>9536</lpage>.</citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Apicella</surname> <given-names>A.</given-names></name> <name><surname>Donnarumma</surname> <given-names>F.</given-names></name> <name><surname>Isgr&#x000F3;</surname> <given-names>F.</given-names></name> <name><surname>Prevete</surname> <given-names>R.</given-names></name></person-group> (<year>2021</year>). <article-title>A survey on modern trainable activation functions</article-title>. <source>Neural Netw</source>. <volume>138</volume>, <fpage>14</fpage>&#x02013;<lpage>32</lpage>. <pub-id pub-id-type="doi">10.1016/j.neunet.2021.01.026</pub-id><pub-id pub-id-type="pmid">33611065</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>B&#x000F6;hle</surname> <given-names>M.</given-names></name> <name><surname>Eitel</surname> <given-names>F.</given-names></name> <name><surname>Weygandt</surname> <given-names>M.</given-names></name> <name><surname>Ritter</surname> <given-names>K.</given-names></name></person-group> (<year>2019</year>). <article-title>Layer-wise relevance propagation for explaining deep neural network decisions in MRI-based Alzheimer&#x00027;s disease classification</article-title>. <source>Front. Aging Neurosci</source>. <volume>11</volume>:<fpage>194</fpage>. <pub-id pub-id-type="doi">10.3389/fnagi.2019.00194</pub-id><pub-id pub-id-type="pmid">31417397</pub-id></citation></ref>
<ref id="B4">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chechik</surname> <given-names>G.</given-names></name> <name><surname>Sharma</surname> <given-names>V.</given-names></name> <name><surname>Shalit</surname> <given-names>U.</given-names></name> <name><surname>Bengio</surname> <given-names>S.</given-names></name></person-group> (<year>2010</year>). <article-title>Large scale online learning of image similarity through ranking</article-title>. <source>J. Mach. Learn. Res</source>. <volume>11</volume>, <fpage>1109</fpage>&#x02013;<lpage>1135</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-642-02172-5_2</pub-id><pub-id pub-id-type="pmid">30617800</pub-id></citation></ref>
<ref id="B5">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>Kornblith</surname> <given-names>S.</given-names></name> <name><surname>Norouzi</surname> <given-names>M.</given-names></name> <name><surname>Hinton</surname> <given-names>G.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;A simple framework for contrastive learning of visual representations,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>PMLR</publisher-loc>), <fpage>1597</fpage>&#x02013;<lpage>1607</lpage>.</citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>Zhai</surname> <given-names>X.</given-names></name> <name><surname>Ritter</surname> <given-names>M.</given-names></name> <name><surname>Lucic</surname> <given-names>M.</given-names></name> <name><surname>Houlsby</surname> <given-names>N.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Self-supervised GANs via auxiliary rotation loss,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>12154</fpage>&#x02013;<lpage>12163</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2019.01243</pub-id></citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chen</surname> <given-names>Y.</given-names></name> <name><surname>Lou</surname> <given-names>S.</given-names></name> <name><surname>Shuai</surname> <given-names>M.</given-names></name> <name><surname>He</surname> <given-names>K.</given-names></name> <name><surname>An</surname> <given-names>Z.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;CLCA: contrastive learning using combined additional information for Alzheimer&#x00027;s diagnosis,&#x0201D;</article-title> in <source>2023 3rd International Conference on Neural Networks, Information and Communication Engineering (NNICE)</source>, <fpage>316</fpage>&#x02013;<lpage>323</lpage>. <pub-id pub-id-type="doi">10.1109/NNICE58320.2023.10105726</pub-id></citation>
</ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Chicco</surname> <given-names>D.</given-names></name> <name><surname>Jurman</surname> <given-names>G.</given-names></name></person-group> (<year>2020</year>). <article-title>The advantages of the matthews correlation coefficient (MCC) over f1 score and accuracy in binary classification evaluation</article-title>. <source>BMC Genomics</source> <volume>21</volume>, <fpage>1</fpage>&#x02013;<lpage>13</lpage>. <pub-id pub-id-type="doi">10.1186/s12864-019-6413-7</pub-id><pub-id pub-id-type="pmid">31898477</pub-id></citation></ref>
<ref id="B9">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Chopra</surname> <given-names>S.</given-names></name> <name><surname>Hadsell</surname> <given-names>R.</given-names></name> <name><surname>LeCun</surname> <given-names>Y.</given-names></name></person-group> (<year>2005</year>). <article-title>&#x0201C;Learning a similarity metric discriminatively, with application to face verification,&#x0201D;</article-title> in <source>2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR&#x00027;05)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>539</fpage>&#x02013;<lpage>546</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2005.202</pub-id></citation>
</ref>
<ref id="B10">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Couronn&#x000E9;</surname> <given-names>R.</given-names></name> <name><surname>Vernhet</surname> <given-names>P.</given-names></name> <name><surname>Durrleman</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Longitudinal self-supervision to disentangle inter-patient variability from disease progression,&#x0201D;</article-title> in <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2021: 24th International Conference, Strasbourg, France, September 27-October 1, 2021, Proceedings, Part II 24</source> (<publisher-loc>Springer</publisher-loc>), <fpage>231</fpage>&#x02013;<lpage>241</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-87196-3_22</pub-id></citation>
</ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dadsetan</surname> <given-names>S.</given-names></name> <name><surname>Hejrati</surname> <given-names>M.</given-names></name> <name><surname>Wu</surname> <given-names>S.</given-names></name> <name><surname>Hashemifar</surname> <given-names>S.</given-names></name></person-group> (<year>2022</year>). <article-title>Cross-domain self-supervised deep learning for robust Alzheimer&#x00027;s disease progression modeling</article-title>. <source>arXiv preprint arXiv:2211.08559</source>.</citation>
</ref>
<ref id="B12">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Deng</surname> <given-names>J.</given-names></name> <name><surname>Dong</surname> <given-names>W.</given-names></name> <name><surname>Socher</surname> <given-names>R.</given-names></name> <name><surname>Li</surname> <given-names>L.-J.</given-names></name> <name><surname>Li</surname> <given-names>K.</given-names></name> <name><surname>Fei-Fei</surname> <given-names>L.</given-names></name></person-group> (<year>2009</year>). <article-title>&#x0201C;ImageNet: a large-scale hierarchical image database,&#x0201D;</article-title> in <source>2009 IEEE Conference on Computer Vision and Pattern Recognition</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>248</fpage>&#x02013;<lpage>255</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2009.5206848</pub-id><pub-id pub-id-type="pmid">26886976</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Doersch</surname> <given-names>C.</given-names></name> <name><surname>Gupta</surname> <given-names>A.</given-names></name> <name><surname>Efros</surname> <given-names>A. A.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Unsupervised visual representation learning by context prediction,&#x0201D;</article-title> in <source>Proceedings of the IEEE International Conference on Computer Vision</source>, <fpage>1422</fpage>&#x02013;<lpage>1430</lpage>. <pub-id pub-id-type="doi">10.1109/ICCV.2015.167</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dosovitskiy</surname> <given-names>A.</given-names></name> <name><surname>Beyer</surname> <given-names>L.</given-names></name> <name><surname>Kolesnikov</surname> <given-names>A.</given-names></name> <name><surname>Weissenborn</surname> <given-names>D.</given-names></name> <name><surname>Zhai</surname> <given-names>X.</given-names></name> <name><surname>Unterthiner</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>An image is worth 16x16 words: transformers for image recognition at scale</article-title>. <source>arXiv preprint arXiv:2010.11929</source>.</citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dubey</surname> <given-names>S. R.</given-names></name> <name><surname>Singh</surname> <given-names>S. K.</given-names></name> <name><surname>Chaudhuri</surname> <given-names>B. B.</given-names></name></person-group> (<year>2022</year>). <article-title>Activation functions in deep learning: a comprehensive survey and benchmark</article-title>. <source>Neurocomputing</source>. <volume>503</volume>, <fpage>92</fpage>&#x02013;<lpage>108</lpage>. <pub-id pub-id-type="doi">10.1016/j.neucom.2022.06.111</pub-id><pub-id pub-id-type="pmid">37369638</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dubois</surname> <given-names>Y.</given-names></name> <name><surname>Hashimoto</surname> <given-names>T.</given-names></name> <name><surname>Liang</surname> <given-names>P.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Evaluating self-supervised learning via risk decomposition,&#x0201D;</article-title> in <source>Proceedings of the 40th International Conference on Machine Learning, ICML&#x00027;23</source>.</citation>
</ref>
<ref id="B17">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Dufumier</surname> <given-names>B.</given-names></name> <name><surname>Gori</surname> <given-names>P.</given-names></name> <name><surname>Victor</surname> <given-names>J.</given-names></name> <name><surname>Grigis</surname> <given-names>A.</given-names></name> <name><surname>Wessa</surname> <given-names>M.</given-names></name> <name><surname>Brambilla</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Contrastive learning with continuous proxy meta-data for 3D MRI classification,&#x0201D;</article-title> in <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2021: 24th International Conference, Strasbourg, France, September 27-October 1, 2021, Proceedings, Part II 24</source> (<publisher-loc>Springer</publisher-loc>), <fpage>58</fpage>&#x02013;<lpage>68</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-87196-3_6</pub-id></citation>
</ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dwibedi</surname> <given-names>D.</given-names></name> <name><surname>Aytar</surname> <given-names>Y.</given-names></name> <name><surname>Tompson</surname> <given-names>J.</given-names></name> <name><surname>Sermanet</surname> <given-names>P.</given-names></name> <name><surname>Zisserman</surname> <given-names>A.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;With a little help from my friends: nearest-neighbor contrastive learning of visual representations,&#x0201D;</article-title> in <source>2021 IEEE/CVF International Conference on Computer Vision (ICCV)</source>, <fpage>9588</fpage>&#x02013;<lpage>9597</lpage>. <pub-id pub-id-type="doi">10.1109/ICCV48922.2021.00945</pub-id></citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Dyrba</surname> <given-names>M.</given-names></name> <name><surname>Hanzig</surname> <given-names>M.</given-names></name> <name><surname>Altenstein</surname> <given-names>S.</given-names></name> <name><surname>Bader</surname> <given-names>S.</given-names></name> <name><surname>Ballarini</surname> <given-names>T.</given-names></name> <name><surname>Brosseron</surname> <given-names>F.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Improving 3D convolutional neural network comprehensibility via interactive visualization of relevance maps: evaluation in Alzheimer&#x00027;s disease</article-title>. <source>Alzheimer&#x00027;s Res. Ther</source>. <volume>13</volume>, <fpage>1</fpage>&#x02013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1186/s13195-021-00924-2</pub-id><pub-id pub-id-type="pmid">34814936</pub-id></citation></ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Eitel</surname> <given-names>F.</given-names></name> <name><surname>Schulz</surname> <given-names>M.-A.</given-names></name> <name><surname>Seiler</surname> <given-names>M.</given-names></name> <name><surname>Walter</surname> <given-names>H.</given-names></name> <name><surname>Ritter</surname> <given-names>K.</given-names></name></person-group> (<year>2021</year>). <article-title>Promises and pitfalls of deep neural networks in neuroimaging-based psychiatric research</article-title>. <source>Exp. Neurol</source>. <volume>339</volume>:<fpage>113608</fpage>. <pub-id pub-id-type="doi">10.1016/j.expneurol.2021.113608</pub-id><pub-id pub-id-type="pmid">33513353</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ellis</surname> <given-names>K. A.</given-names></name> <name><surname>Bush</surname> <given-names>A. I.</given-names></name> <name><surname>Darby</surname> <given-names>D.</given-names></name> <name><surname>De Fazio</surname> <given-names>D.</given-names></name> <name><surname>Foster</surname> <given-names>J.</given-names></name> <name><surname>Hudson</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>The Australian Imaging, Biomarkers and Lifestyle (AIBL) study of aging: methodology and baseline characteristics of 1112 individuals recruited for a longitudinal study of Alzheimer&#x00027;s disease</article-title>. <source>Int. Psychoger</source>. <volume>21</volume>, <fpage>672</fpage>&#x02013;<lpage>687</lpage>. <pub-id pub-id-type="doi">10.1017/S1041610209009405</pub-id><pub-id pub-id-type="pmid">19470201</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Fedorov</surname> <given-names>A.</given-names></name> <name><surname>Wu</surname> <given-names>L.</given-names></name> <name><surname>Sylvain</surname> <given-names>T.</given-names></name> <name><surname>Luck</surname> <given-names>M.</given-names></name> <name><surname>DeRamus</surname> <given-names>T. P.</given-names></name> <name><surname>Bleklov</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;On self-supervised multimodal representation learning: an application to Alzheimer&#x00027;s disease,&#x0201D;</article-title> in <source>2021 IEEE 18th International Symposium on Biomedical Imaging (ISBI)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>1548</fpage>&#x02013;<lpage>1552</lpage>. <pub-id pub-id-type="doi">10.1109/ISBI48211.2021.9434103</pub-id><pub-id pub-id-type="pmid">38110045</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ganjdanesh</surname> <given-names>A.</given-names></name> <name><surname>Gao</surname> <given-names>S.</given-names></name> <name><surname>Huang</surname> <given-names>H.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;EffConv: efficient learning of kernel sizes for convolution layers of CNNs,&#x0201D;</article-title> in <source>Thirty Seventh AAAI Conference on Artificial Intelligence (AAAI 2023)</source>, <fpage>7604</fpage>&#x02013;<lpage>7612</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v37i6.25923</pub-id></citation>
</ref>
<ref id="B24">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gorade</surname> <given-names>V.</given-names></name> <name><surname>Mittal</surname> <given-names>S.</given-names></name> <name><surname>Singhal</surname> <given-names>R.</given-names></name></person-group> (<year>2023</year>). <article-title>Pacl: patient-aware contrastive learning through metadata refinement for generalized early disease diagnosis</article-title>. <source>Comput. Biol. Med</source>. <volume>167</volume>:<fpage>107569</fpage>. <pub-id pub-id-type="doi">10.1016/j.compbiomed.2023.107569</pub-id><pub-id pub-id-type="pmid">37865984</pub-id></citation></ref>
<ref id="B25">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Grill</surname> <given-names>J.-B.</given-names></name> <name><surname>Strub</surname> <given-names>F.</given-names></name> <name><surname>Altch&#x000E9;</surname> <given-names>F.</given-names></name> <name><surname>Tallec</surname> <given-names>C.</given-names></name> <name><surname>Richemond</surname> <given-names>P.</given-names></name> <name><surname>Buchatskaya</surname> <given-names>E.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>&#x0201C;Bootstrap your own latent-a new approach to self-supervised learning,&#x0201D;</article-title> in <source>Proceedings of the 34th International Conference on Neural Information Processing Systems</source> (<publisher-loc>Curran Associates Inc.</publisher-loc>), <fpage>21271</fpage>&#x02013;<lpage>21284</lpage>.</citation>
</ref>
<ref id="B26">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Gutmann</surname> <given-names>M.</given-names></name> <name><surname>Hyv&#x000E4;rinen</surname> <given-names>A.</given-names></name></person-group> (<year>2010</year>). <article-title>&#x0201C;Noise-contrastive estimation: a new estimation principle for unnormalized statistical models,&#x0201D;</article-title> in <source>Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics</source> (<publisher-loc>JMLR Workshop and Conference Proceedings</publisher-loc>), <fpage>297</fpage>&#x02013;<lpage>304</lpage>.</citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Han</surname> <given-names>K.</given-names></name> <name><surname>He</surname> <given-names>M.</given-names></name> <name><surname>Yang</surname> <given-names>F.</given-names></name> <name><surname>Zhang</surname> <given-names>Y.</given-names></name></person-group> (<year>2022</year>). <article-title>Multi-task multi-level feature adversarial network for joint Alzheimer&#x00027;s disease diagnosis and atrophy localization using sMRI</article-title>. <source>Phys. Med. Biol</source>. <volume>67</volume>:<fpage>085002</fpage>. <pub-id pub-id-type="doi">10.1088/1361-6560/ac5ed5</pub-id><pub-id pub-id-type="pmid">35299163</pub-id></citation></ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>He</surname> <given-names>K.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Ren</surname> <given-names>S.</given-names></name> <name><surname>Sun</surname> <given-names>J.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Deep residual learning for image recognition,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source>, <fpage>770</fpage>&#x02013;<lpage>778</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2016.90</pub-id></citation>
</ref>
<ref id="B29">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Hiller</surname> <given-names>B. C.</given-names></name> <name><surname>Bader</surname> <given-names>S.</given-names></name> <name><surname>Singh</surname> <given-names>D.</given-names></name> <name><surname>Kirste</surname> <given-names>T.</given-names></name> <name><surname>Becker</surname> <given-names>M.</given-names></name> <name><surname>Dyrba</surname> <given-names>M.</given-names></name></person-group> (<year>2025</year>). <article-title>&#x0201C;Evaluating the fidelity of explanations for convolutional neural networks in alzheimer&#x00027;s disease detection,&#x0201D;</article-title> in <source>Bildverarbeitung f&#x000FC;r die Medizin 2025, Informatik aktuell</source>, eds. <person-group person-group-type="editor"><name><surname>Maier</surname> <given-names>A.</given-names></name> <name><surname>Deserno</surname> <given-names>T. M.</given-names></name> <name><surname>Handels</surname> <given-names>H.</given-names></name> <name><surname>Maier-Hein</surname> <given-names>K.</given-names></name> <name><surname>Palm</surname> <given-names>C.</given-names></name> <name><surname>Tolxdorff</surname> <given-names>T.</given-names></name></person-group> (<publisher-loc>Wiesbaden</publisher-loc>: <publisher-name>Springer Fachmedien Wiesbaden</publisher-name>).</citation>
</ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Howard</surname> <given-names>A. G.</given-names></name> <name><surname>Zhu</surname> <given-names>M.</given-names></name> <name><surname>Chen</surname> <given-names>B.</given-names></name> <name><surname>Kalenichenko</surname> <given-names>D.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Weyand</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>Mobilenets: Efficient convolutional neural networks for mobile vision applications</article-title>. <source>arXiv preprint arXiv:1704.04861</source>.</citation>
</ref>
<ref id="B31">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>C.</given-names></name> <name><surname>Li</surname> <given-names>C.</given-names></name> <name><surname>Wang</surname> <given-names>H.</given-names></name> <name><surname>Liu</surname> <given-names>Q.</given-names></name> <name><surname>Zheng</surname> <given-names>H.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name></person-group> (<year>2021a</year>). <article-title>&#x0201C;Self-supervised learning for MRI reconstruction with a parallel network training framework,&#x0201D;</article-title> in <source>Medical Image Computing and Computer Assisted Intervention-MICCAI 2021: 24th International Conference, Strasbourg, France, September 27-October 1, 2021, Proceedings, Part VI 24</source> (<publisher-loc>Springer</publisher-loc>), <fpage>382</fpage>&#x02013;<lpage>391</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-87231-1_37</pub-id></citation>
</ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>J.</given-names></name> <name><surname>Qing</surname> <given-names>Z.</given-names></name> <name><surname>Liu</surname> <given-names>R.</given-names></name> <name><surname>Zhang</surname> <given-names>X.</given-names></name> <name><surname>Lv</surname> <given-names>P.</given-names></name> <name><surname>Wang</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2021b</year>). <article-title>Deep learning-based classification and voxel-based visualization of frontotemporal dementia and alzheimer&#x00027;s disease</article-title>. <source>Front. Neurosci</source>. <volume>14</volume>:<fpage>626154</fpage>. <pub-id pub-id-type="doi">10.3389/fnins.2020.626154</pub-id><pub-id pub-id-type="pmid">33551735</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>G.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Van Der Maaten</surname> <given-names>L.</given-names></name> <name><surname>Weinberger</surname> <given-names>K. Q.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Densely connected convolutional networks,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source> (<publisher-name>IEEE Computer Society</publisher-name>), <fpage>2261</fpage>&#x02013;<lpage>2269</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2017.243</pub-id></citation>
</ref>
<ref id="B34">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Ioffe</surname> <given-names>S.</given-names></name> <name><surname>Szegedy</surname> <given-names>C.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Batch normalization: accelerating deep network training by reducing internal covariate shift,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>PMLR</publisher-loc>), <fpage>448</fpage>&#x02013;<lpage>456</lpage>.<pub-id pub-id-type="pmid">35496726</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jaiswal</surname> <given-names>A.</given-names></name> <name><surname>Babu</surname> <given-names>A. R.</given-names></name> <name><surname>Zadeh</surname> <given-names>M. Z.</given-names></name> <name><surname>Banerjee</surname> <given-names>D.</given-names></name> <name><surname>Makedon</surname> <given-names>F.</given-names></name></person-group> (<year>2020</year>). <article-title>A survey on contrastive self-supervised learning</article-title>. <source>Technologies</source> <volume>9</volume>:<fpage>2</fpage>. <pub-id pub-id-type="doi">10.3390/technologies9010002</pub-id></citation>
</ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jiang</surname> <given-names>H.</given-names></name> <name><surname>Miao</surname> <given-names>C.</given-names></name></person-group> (<year>2022</year>). <article-title>&#x0201C;Pre-training 3D convolutional neural networks for prodromal Alzheimer&#x00027;s disease classification,&#x0201D;</article-title> in 2022 <source>International Joint Conference on Neural Networks (IJCNN)</source> (<publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x02013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/IJCNN55064.2022.9891966</pub-id></citation>
</ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jing</surname> <given-names>L.</given-names></name> <name><surname>Tian</surname> <given-names>Y.</given-names></name></person-group> (<year>2020</year>). <article-title>Self-supervised visual feature learning with deep neural networks: a survey</article-title>. <source>IEEE Trans. Pattern Anal. Mach. Intell</source>. <volume>43</volume>, <fpage>4037</fpage>&#x02013;<lpage>4058</lpage>. <pub-id pub-id-type="doi">10.1109/TPAMI.2020.2992393</pub-id><pub-id pub-id-type="pmid">32386141</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kakogeorgiou</surname> <given-names>I.</given-names></name> <name><surname>Karantzalos</surname> <given-names>K.</given-names></name></person-group> (<year>2021</year>). <article-title>Evaluating explainable artificial intelligence methods for multi-label deep learning classification tasks in remote sensing</article-title>. <source>Int. J. Appl. Earth Observ. Geoinform</source>. <volume>103</volume>:<fpage>102520</fpage>. <pub-id pub-id-type="doi">10.1016/j.jag.2021.102520</pub-id></citation>
</ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kalibhat</surname> <given-names>N.</given-names></name> <name><surname>Narang</surname> <given-names>K.</given-names></name> <name><surname>Firooz</surname> <given-names>H.</given-names></name> <name><surname>Sanjabi</surname> <given-names>M.</given-names></name> <name><surname>Feizi</surname> <given-names>S.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;Measuring self-supervised representation quality for downstream classification using discriminative features,&#x0201D;</article-title> in <source>Proceedings of the AAAI Conference on Artificial Intelligence</source>, <fpage>13031</fpage>&#x02013;<lpage>13039</lpage>. <pub-id pub-id-type="doi">10.1609/aaai.v38i12.29201</pub-id></citation>
</ref>
<ref id="B40">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kohlbrenner</surname> <given-names>M.</given-names></name> <name><surname>Bauer</surname> <given-names>A.</given-names></name> <name><surname>Nakajima</surname> <given-names>S.</given-names></name> <name><surname>Binder</surname> <given-names>A.</given-names></name> <name><surname>Samek</surname> <given-names>W.</given-names></name> <name><surname>Lapuschkin</surname> <given-names>S.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Toward best practice in explaining neural network decisions with LRP,&#x0201D;</article-title> in <source>2020 International Joint Conference on Neural Networks (IJCNN)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>1</fpage>&#x02013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1109/IJCNN48605.2020.9206975</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kokhlikyan</surname> <given-names>N.</given-names></name> <name><surname>Miglani</surname> <given-names>V.</given-names></name> <name><surname>Martin</surname> <given-names>M.</given-names></name> <name><surname>Wang</surname> <given-names>E.</given-names></name> <name><surname>Alsallakh</surname> <given-names>B.</given-names></name> <name><surname>Reynolds</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Captum: A unified and generic model interpretability library for pytorch</article-title>. <source>arXiv:2009.07896</source>.</citation>
</ref>
<ref id="B42">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Larsson</surname> <given-names>G.</given-names></name> <name><surname>Maire</surname> <given-names>M.</given-names></name> <name><surname>Shakhnarovich</surname> <given-names>G.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Learning representations for automatic colorization,&#x0201D;</article-title> in <source>Computer Vision-ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11&#x02013;14, 2016, Proceedings, Part IV 14</source> (<publisher-loc>Springer</publisher-loc>), <fpage>577</fpage>&#x02013;<lpage>593</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-46493-0_35</pub-id></citation>
</ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Larsson</surname> <given-names>G.</given-names></name> <name><surname>Maire</surname> <given-names>M.</given-names></name> <name><surname>Shakhnarovich</surname> <given-names>G.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Colorization as a proxy task for visual understanding,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source>, <fpage>840</fpage>&#x02013;<lpage>849</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2017.96</pub-id></citation>
</ref>
<ref id="B44">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Leonardsen</surname> <given-names>E. H.</given-names></name> <name><surname>Persson</surname> <given-names>K.</given-names></name> <name><surname>Gr&#x000F8;dem</surname> <given-names>E.</given-names></name> <name><surname>Dinsdale</surname> <given-names>N.</given-names></name> <name><surname>Schellhorn</surname> <given-names>T.</given-names></name> <name><surname>Roe</surname> <given-names>J. M.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Constructing personalized characterizations of structural brain aberrations in patients with dementia using explainable artificial intelligence</article-title>. <source>NPJ Dig. Med</source>. <volume>7</volume>:<fpage>110</fpage>. <pub-id pub-id-type="doi">10.1038/s41746-024-01123-7</pub-id><pub-id pub-id-type="pmid">38698139</pub-id></citation></ref>
<ref id="B45">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Lin</surname> <given-names>T.-Y.</given-names></name> <name><surname>Maire</surname> <given-names>M.</given-names></name> <name><surname>Belongie</surname> <given-names>S.</given-names></name> <name><surname>Hays</surname> <given-names>J.</given-names></name> <name><surname>Perona</surname> <given-names>P.</given-names></name> <name><surname>Ramanan</surname> <given-names>D.</given-names></name> <etal/></person-group>. (<year>2014</year>). <article-title>&#x0201C;Microsoft coco: common objects in context,&#x0201D;</article-title> in <source>Computer Vision-ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6&#x02013;12, 2014, Proceedings, Part V 13</source> (<publisher-loc>Springer</publisher-loc>), <fpage>740</fpage>&#x02013;<lpage>755</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-10602-1_48</pub-id></citation>
</ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>Mallol-Ragolta</surname> <given-names>A.</given-names></name> <name><surname>Parada-Cabaleiro</surname> <given-names>E.</given-names></name> <name><surname>Qian</surname> <given-names>K.</given-names></name> <name><surname>Jing</surname> <given-names>X.</given-names></name> <name><surname>Kathan</surname> <given-names>A.</given-names></name> <etal/></person-group>. (<year>2022a</year>). <article-title>Audio self-supervised learning: a survey</article-title>. <source>Patterns</source> <volume>3</volume>:<fpage>100616</fpage>. <pub-id pub-id-type="doi">10.1016/j.patter.2022.100616</pub-id><pub-id pub-id-type="pmid">36569546</pub-id></citation></ref>
<ref id="B47">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Mao</surname> <given-names>H.</given-names></name> <name><surname>Wu</surname> <given-names>C.-Y.</given-names></name> <name><surname>Feichtenhofer</surname> <given-names>C.</given-names></name> <name><surname>Darrell</surname> <given-names>T.</given-names></name> <name><surname>Xie</surname> <given-names>S.</given-names></name></person-group> (<year>2022b</year>). <article-title>&#x0201C;A convnet for the 2020s,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>11976</fpage>&#x02013;<lpage>11986</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR52688.2022.01167</pub-id></citation>
</ref>
<ref id="B48">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Luo</surname> <given-names>C.</given-names></name> <name><surname>Hu</surname> <given-names>N.</given-names></name> <name><surname>Xiao</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Gong</surname> <given-names>Q.</given-names></name> <name><surname>Lui</surname> <given-names>S.</given-names></name></person-group> (<year>2020</year>). <article-title>Comparison of gray matter atrophy in behavioral variant frontal temporal dementia and amyotrophic lateral sclerosis: a coordinate-based meta-analysis</article-title>. <source>Front. Aging Neurosci</source>. <volume>12</volume>:<fpage>14</fpage>. <pub-id pub-id-type="doi">10.3389/fnagi.2020.00014</pub-id><pub-id pub-id-type="pmid">32116647</pub-id></citation></ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>D.</given-names></name> <name><surname>Lu</surname> <given-names>D.</given-names></name> <name><surname>Popuri</surname> <given-names>K.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Beg</surname> <given-names>M. F.</given-names></name> <name><surname>Initiative</surname> <given-names>A. D. N.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Differential diagnosis of frontotemporal dementia, alzheimer&#x00027;s disease, and normal aging using a multi-scale multi-type feature generative adversarial deep neural network on structural magnetic resonance images</article-title>. <source>Front. Neurosci</source>. <volume>14</volume>:<fpage>853</fpage>. <pub-id pub-id-type="doi">10.3389/fnins.2020.00853</pub-id><pub-id pub-id-type="pmid">33192235</pub-id></citation></ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mandelli</surname> <given-names>M. L.</given-names></name> <name><surname>Vitali</surname> <given-names>P.</given-names></name> <name><surname>Santos</surname> <given-names>M.</given-names></name> <name><surname>Henry</surname> <given-names>M.</given-names></name> <name><surname>Gola</surname> <given-names>K.</given-names></name> <name><surname>Rosenberg</surname> <given-names>L.</given-names></name> <etal/></person-group>. (<year>2016</year>). <article-title>Two insular regions are differentially involved in behavioral variant FTD and nonfluent/agrammatic variant PPA</article-title>. <source>Cortex</source> <volume>74</volume>, <fpage>149</fpage>&#x02013;<lpage>157</lpage>. <pub-id pub-id-type="doi">10.1016/j.cortex.2015.10.012</pub-id><pub-id pub-id-type="pmid">26673947</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moguilner</surname> <given-names>S.</given-names></name> <name><surname>Whelan</surname> <given-names>R.</given-names></name> <name><surname>Adams</surname> <given-names>H.</given-names></name> <name><surname>Valcour</surname> <given-names>V.</given-names></name> <name><surname>Tagliazucchi</surname> <given-names>E.</given-names></name> <name><surname>Ib&#x000E1;&#x000F1;ez</surname> <given-names>A.</given-names></name></person-group> (<year>2023</year>). <article-title>Visual deep learning of unprocessed neuroimaging characterises dementia subtypes and generalises across non-stereotypic samples</article-title>. <source>EBioMedicine</source> <volume>90</volume>:<fpage>104540</fpage>. <pub-id pub-id-type="doi">10.1016/j.ebiom.2023.104540</pub-id><pub-id pub-id-type="pmid">36972630</pub-id></citation></ref>
<ref id="B52">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Montavon</surname> <given-names>G.</given-names></name> <name><surname>Binder</surname> <given-names>A.</given-names></name> <name><surname>Lapuschkin</surname> <given-names>S.</given-names></name> <name><surname>Samek</surname> <given-names>W.</given-names></name> <name><surname>M&#x000FC;ller</surname> <given-names>K.-R.</given-names></name></person-group> (<year>2019</year>). <source>Layer-Wise Relevance Propagation: An Overview</source>. Cham: Springer International <volume>Publishing</volume>, <fpage>193</fpage>&#x02013;<lpage>209</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-030-28954-6_10</pub-id></citation>
</ref>
<ref id="B53">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Noroozi</surname> <given-names>M.</given-names></name> <name><surname>Favaro</surname> <given-names>P.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Unsupervised learning of visual representations by solving Jigsaw puzzles,&#x0201D;</article-title> in <source>European Conference on Computer Vision</source> (<publisher-loc>Springer</publisher-loc>), <fpage>69</fpage>&#x02013;<lpage>84</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-319-46466-4_5</pub-id></citation>
</ref>
<ref id="B54">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Oord</surname> <given-names>A. V. D.</given-names></name> <name><surname>Li</surname> <given-names>Y.</given-names></name> <name><surname>Vinyals</surname> <given-names>O.</given-names></name></person-group> (<year>2019</year>). <article-title>Representation learning with contrastive predictive coding</article-title>. <source>arXiv preprint arXiv:1807.03748</source>.</citation>
</ref>
<ref id="B55">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Orhan</surname> <given-names>E.</given-names></name> <name><surname>Gupta</surname> <given-names>V.</given-names></name> <name><surname>Lake</surname> <given-names>B. M.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;Self-supervised learning through the eyes of a child,&#x0201D;</article-title> in <source>NIPS&#x00027;20: Proceedings of the 34th International Conference on Neural Information Processing Systems</source>, <fpage>9960</fpage>&#x02013;<lpage>9971</lpage>.</citation>
</ref>
<ref id="B56">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ouyang</surname> <given-names>J.</given-names></name> <name><surname>Zhao</surname> <given-names>Q.</given-names></name> <name><surname>Adeli</surname> <given-names>E.</given-names></name> <name><surname>Zaharchuk</surname> <given-names>G.</given-names></name> <name><surname>Pohl</surname> <given-names>K. M.</given-names></name></person-group> (<year>2022</year>). <article-title>Self-supervised learning of neighborhood embedding for longitudinal MRI</article-title>. <source>Med. Image Anal</source>. <volume>82</volume>:<fpage>102571</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2022.102571</pub-id><pub-id pub-id-type="pmid">36115098</pub-id></citation></ref>
<ref id="B57">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Paszke</surname> <given-names>A.</given-names></name> <name><surname>Gross</surname> <given-names>S.</given-names></name> <name><surname>Massa</surname> <given-names>F.</given-names></name> <name><surname>Lerer</surname> <given-names>A.</given-names></name> <name><surname>Bradbury</surname> <given-names>J.</given-names></name> <name><surname>Chanan</surname> <given-names>G.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>&#x0201C;Pytorch: an imperative style, high-performance deep learning library,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems</source> (<publisher-loc>Curran Associates, Inc.</publisher-loc>), <fpage>8024</fpage>&#x02013;<lpage>8035</lpage>.</citation>
</ref>
<ref id="B58">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pfaff</surname> <given-names>L.</given-names></name> <name><surname>Darwish</surname> <given-names>O.</given-names></name> <name><surname>Wagner</surname> <given-names>F.</given-names></name> <name><surname>Thies</surname> <given-names>M.</given-names></name> <name><surname>Vysotskaya</surname> <given-names>N.</given-names></name> <name><surname>Hossbach</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>Enhancing diffusion-weighted prostate MRI through self-supervised denoising and evaluation</article-title>. <source>Sci. Rep</source>. <volume>14</volume>:<fpage>24292</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-024-75007-x</pub-id><pub-id pub-id-type="pmid">39414914</pub-id></citation></ref>
<ref id="B59">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Qiao</surname> <given-names>H.</given-names></name> <name><surname>Chen</surname> <given-names>L.</given-names></name> <name><surname>Zhu</surname> <given-names>F.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;A fusion of multi-view 2D and 3D convolution neural network based MRI for Alzheimer&#x00027;s disease diagnosis,&#x0201D;</article-title> in <source>2021 43rd Annual International Conference of the IEEE Engineering in Medicine &#x00026;Biology Society (EMBC)</source> (<publisher-loc>IEEE</publisher-loc>), <fpage>3317</fpage>&#x02013;<lpage>3321</lpage>. <pub-id pub-id-type="doi">10.1109/EMBC46164.2021.9629923</pub-id><pub-id pub-id-type="pmid">34891950</pub-id></citation></ref>
<ref id="B60">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Qiu</surname> <given-names>S.</given-names></name> <name><surname>Joshi</surname> <given-names>P. S.</given-names></name> <name><surname>Miller</surname> <given-names>M. I.</given-names></name> <name><surname>Xue</surname> <given-names>C.</given-names></name> <name><surname>Zhou</surname> <given-names>X.</given-names></name> <name><surname>Karjadi</surname> <given-names>C.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Development and validation of an interpretable deep learning framework for Alzheimer&#x00027;s disease classification</article-title>. <source>Brain</source> <volume>143</volume>, <fpage>1920</fpage>&#x02013;<lpage>1933</lpage>. <pub-id pub-id-type="doi">10.1093/brain/awaa137</pub-id><pub-id pub-id-type="pmid">32357201</pub-id></citation></ref>
<ref id="B61">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Reed</surname> <given-names>C. J.</given-names></name> <name><surname>Metzger</surname> <given-names>S.</given-names></name> <name><surname>Srinivas</surname> <given-names>A.</given-names></name> <name><surname>Darrell</surname> <given-names>T.</given-names></name> <name><surname>Keutzer</surname> <given-names>K.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Selfaugment: automatic augmentation policies for self-supervised learning,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>2674</fpage>&#x02013;<lpage>2683</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR46437.2021.00270</pub-id></citation>
</ref>
<ref id="B62">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Routier</surname> <given-names>A.</given-names></name> <name><surname>Burgos</surname> <given-names>N.</given-names></name> <name><surname>D&#x000ED;az</surname> <given-names>M.</given-names></name> <name><surname>Bacci</surname> <given-names>M.</given-names></name> <name><surname>Bottani</surname> <given-names>S.</given-names></name> <name><surname>El-Rifai</surname> <given-names>O.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>Clinica: an open-source software platform for reproducible clinical neuroscience studies</article-title>. <source>Front. Neuroinform</source>. <volume>15</volume>:<fpage>689675</fpage>. <pub-id pub-id-type="doi">10.3389/fninf.2021.689675</pub-id><pub-id pub-id-type="pmid">34483871</pub-id></citation></ref>
<ref id="B63">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sabokrou</surname> <given-names>M.</given-names></name> <name><surname>Khalooei</surname> <given-names>M.</given-names></name> <name><surname>Adeli</surname> <given-names>E.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Self-supervised representation learning via neighborhood-relational encoding,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF International Conference on Computer Vision</source>, <fpage>8009</fpage>&#x02013;<lpage>8018</lpage>. <pub-id pub-id-type="doi">10.1109/ICCV.2019.00810</pub-id></citation>
</ref>
<ref id="B64">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sandler</surname> <given-names>M.</given-names></name> <name><surname>Howard</surname> <given-names>A.</given-names></name> <name><surname>Zhu</surname> <given-names>M.</given-names></name> <name><surname>Zhmoginov</surname> <given-names>A.</given-names></name> <name><surname>Chen</surname> <given-names>L.-C.</given-names></name></person-group> (<year>2018</year>). <article-title>&#x0201C;Mobilenetv2: inverted residuals and linear bottlenecks,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source>, <fpage>4510</fpage>&#x02013;<lpage>4520</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2018.00474</pub-id><pub-id pub-id-type="pmid">39300076</pub-id></citation></ref>
<ref id="B65">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Scheibenreif</surname> <given-names>L.</given-names></name> <name><surname>Mommert</surname> <given-names>M.</given-names></name> <name><surname>Borth</surname> <given-names>D.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;Parameter efficient self-supervised geospatial domain adaptation,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</source>, <fpage>27841</fpage>&#x02013;<lpage>27851</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR52733.2024.02630</pub-id></citation>
</ref>
<ref id="B66">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schiappa</surname> <given-names>M. C.</given-names></name> <name><surname>Rawat</surname> <given-names>Y. S.</given-names></name> <name><surname>Shah</surname> <given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>Self-supervised learning for videos: a survey</article-title>. <source>ACM Comput. Surv</source>. <volume>55</volume>, <fpage>1</fpage>&#x02013;<lpage>37</lpage>. <pub-id pub-id-type="doi">10.1145/3577925</pub-id></citation>
</ref>
<ref id="B67">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Schroff</surname> <given-names>F.</given-names></name> <name><surname>Kalenichenko</surname> <given-names>D.</given-names></name> <name><surname>Philbin</surname> <given-names>J.</given-names></name></person-group> (<year>2015</year>). <article-title>&#x0201C;Facenet: a unified embedding for face recognition and clustering,&#x0201D;</article-title> in <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</source>, <fpage>815</fpage>&#x02013;<lpage>823</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2015.7298682</pub-id></citation>
</ref>
<ref id="B68">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Seeley</surname> <given-names>W. W.</given-names></name></person-group> (<year>2010</year>). <article-title>Anterior insula degeneration in frontotemporal dementia</article-title>. <source>Brain Struct. Funct</source>. <volume>214</volume>, <fpage>465</fpage>&#x02013;<lpage>475</lpage>. <pub-id pub-id-type="doi">10.1007/s00429-010-0263-z</pub-id><pub-id pub-id-type="pmid">20512369</pub-id></citation></ref>
<ref id="B69">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Selvaraju</surname> <given-names>R. R.</given-names></name> <name><surname>Cogswell</surname> <given-names>M.</given-names></name> <name><surname>Das</surname> <given-names>A.</given-names></name> <name><surname>Vedantam</surname> <given-names>R.</given-names></name> <name><surname>Parikh</surname> <given-names>D.</given-names></name> <name><surname>Batra</surname> <given-names>D.</given-names></name></person-group> (<year>2020</year>). <article-title>Grad-cam: visual explanations from deep networks via gradient-based localization</article-title>. <source>Int. J. Comput. Vis</source>. <volume>128</volume>, <fpage>336</fpage>&#x02013;<lpage>359</lpage>. <pub-id pub-id-type="doi">10.1007/s11263-019-01228-7</pub-id></citation>
</ref>
<ref id="B70">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Seyfio&#x0011F;lu</surname> <given-names>M. S.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Kamath</surname> <given-names>P.</given-names></name> <name><surname>Gangolli</surname> <given-names>S.</given-names></name> <name><surname>Wang</surname> <given-names>S.</given-names></name> <name><surname>Grabowski</surname> <given-names>T.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;Brain-aware replacements for supervised contrastive learning in detection of Alzheimer&#x00027;s disease,&#x0201D;</article-title> in <source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source> (<publisher-loc>Springer</publisher-loc>), <fpage>461</fpage>&#x02013;<lpage>470</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-16431-6_44</pub-id><pub-id pub-id-type="pmid">38680538</pub-id></citation></ref>
<ref id="B71">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Shurrab</surname> <given-names>S.</given-names></name> <name><surname>Duwairi</surname> <given-names>R.</given-names></name></person-group> (<year>2022</year>). <article-title>Self-supervised learning methods and applications in medical imaging analysis: a survey</article-title>. <source>PeerJ Comput. Sci</source>. <volume>8</volume>:<fpage>e1045</fpage>. <pub-id pub-id-type="doi">10.7717/peerj-cs.1045</pub-id><pub-id pub-id-type="pmid">36091989</pub-id></citation></ref>
<ref id="B72">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Singh</surname> <given-names>D.</given-names></name> <name><surname>Dyrba</surname> <given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Comparison of CNN architectures for detecting Alzheimer&#x00027;s disease using relevance maps,&#x0201D;</article-title> in <source>Bildverarbeitung f&#x000FC;r die Medizin 2023</source> (<publisher-loc>Springer</publisher-loc>), <fpage>238</fpage>&#x02013;<lpage>243</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-658-41657-7_51</pub-id></citation>
</ref>
<ref id="B73">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Sohn</surname> <given-names>K.</given-names></name></person-group> (<year>2016</year>). <article-title>&#x0201C;Improved deep metric learning with multi-class n-pair loss objective,&#x0201D;</article-title> in <source>Proceedings of the 30th International Conference on Neural Information Processing Systems</source> (<publisher-name>Curran Associates Inc.</publisher-name>), <fpage>1857</fpage>&#x02013;<lpage>1865</lpage>.<pub-id pub-id-type="pmid">33828896</pub-id></citation></ref>
<ref id="B74">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sun</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>L.</given-names></name> <name><surname>Gao</surname> <given-names>K.</given-names></name> <name><surname>Ying</surname> <given-names>S.</given-names></name> <name><surname>Lin</surname> <given-names>W.</given-names></name> <name><surname>Humphreys</surname> <given-names>K. L.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Self-supervised learning with application for infant cerebellum segmentation and analysis</article-title>. <source>Nat. Commun</source>. <volume>14</volume>:<fpage>4717</fpage>. <pub-id pub-id-type="doi">10.1038/s41467-023-40446-z</pub-id><pub-id pub-id-type="pmid">37543620</pub-id></citation></ref>
<ref id="B75">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Sundararajan</surname> <given-names>M.</given-names></name> <name><surname>Taly</surname> <given-names>A.</given-names></name> <name><surname>Yan</surname> <given-names>Q.</given-names></name></person-group> (<year>2017</year>). <article-title>&#x0201C;Axiomatic attribution for deep networks,&#x0201D;</article-title> in <source>Proceedings of the 34th International Conference on Machine Learning</source> (<publisher-loc>PMLR</publisher-loc>), <fpage>3319</fpage>&#x02013;<lpage>3328</lpage>.</citation>
</ref>
<ref id="B76">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Taleb</surname> <given-names>A.</given-names></name> <name><surname>Loetzsch</surname> <given-names>W.</given-names></name> <name><surname>Danz</surname> <given-names>N.</given-names></name> <name><surname>Severin</surname> <given-names>J.</given-names></name> <name><surname>Gaertner</surname> <given-names>T.</given-names></name> <name><surname>Bergner</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>&#x0201C;3D self-supervised methods for medical imaging,&#x0201D;</article-title> in <source>Proceedings of the 34th International Conference on Neural Information Processing Systems</source> (<publisher-name>Curran Associates Inc.</publisher-name>), <fpage>18158</fpage>&#x02013;<lpage>18172</lpage>.</citation>
</ref>
<ref id="B77">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Thomas</surname> <given-names>A. W.</given-names></name> <name><surname>R&#x000E9;</surname> <given-names>C.</given-names></name> <name><surname>Poldrack</surname> <given-names>R. A.</given-names></name></person-group> (<year>2024</year>). <article-title>&#x0201C;Self-supervised learning of brain dynamics from broad neuroimaging data,&#x0201D;</article-title> in <source>Proceedings of the 36th International Conference on Neural Information Processing Systems, NIPS &#x00027;22, Red Hook, NY, USA</source> (<publisher-name>Curran Associates Inc.</publisher-name>).</citation>
</ref>
<ref id="B78">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Van der Velden</surname> <given-names>B. H.</given-names></name> <name><surname>Kuijf</surname> <given-names>H. J.</given-names></name> <name><surname>Gilhuijs</surname> <given-names>K. G.</given-names></name> <name><surname>Viergever</surname> <given-names>M. A.</given-names></name></person-group> (<year>2022</year>). <article-title>Explainable artificial intelligence (XAI) in deep learning-based medical image analysis</article-title>. <source>Med. Image Anal</source>. <volume>79</volume>:<fpage>102470</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2022.102470</pub-id><pub-id pub-id-type="pmid">35576821</pub-id></citation></ref>
<ref id="B79">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>VanBerlo</surname> <given-names>B.</given-names></name> <name><surname>Hoey</surname> <given-names>J.</given-names></name> <name><surname>Wong</surname> <given-names>A.</given-names></name></person-group> (<year>2024</year>). <article-title>A survey of the impact of self-supervised pretraining for diagnostic tasks in medical x-ray, CT, MRI, and ultrasound</article-title>. <source>BMC Med. Imaging</source> <volume>24</volume>:<fpage>79</fpage>. <pub-id pub-id-type="doi">10.1186/s12880-024-01253-0</pub-id><pub-id pub-id-type="pmid">38580932</pub-id></citation></ref>
<ref id="B80">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>D.</given-names></name> <name><surname>Honnorat</surname> <given-names>N.</given-names></name> <name><surname>Fox</surname> <given-names>P. T.</given-names></name> <name><surname>Ritter</surname> <given-names>K.</given-names></name> <name><surname>Eickhoff</surname> <given-names>S. B.</given-names></name> <name><surname>Seshadri</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Deep neural network heatmaps capture alzheimer&#x00027;s disease patterns reported in a large meta-analysis of neuroimaging studies</article-title>. <source>Neuroimage</source> <volume>269</volume>:<fpage>119929</fpage>. <pub-id pub-id-type="doi">10.1016/j.neuroimage.2023.119929</pub-id><pub-id pub-id-type="pmid">36740029</pub-id></citation></ref>
<ref id="B81">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wei</surname> <given-names>C.</given-names></name> <name><surname>Xie</surname> <given-names>L.</given-names></name> <name><surname>Ren</surname> <given-names>X.</given-names></name> <name><surname>Xia</surname> <given-names>Y.</given-names></name> <name><surname>Su</surname> <given-names>C.</given-names></name> <name><surname>Liu</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2019</year>). <article-title>&#x0201C;Iterative reorganization with weak spatial constraints: solving arbitrary Jigsaw puzzles for unsupervised representation learning,&#x0201D;</article-title> in <source>Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition</source>, <fpage>1910</fpage>&#x02013;<lpage>1919</lpage>. <pub-id pub-id-type="doi">10.1109/CVPR.2019.00201</pub-id></citation>
</ref>
<ref id="B82">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wen</surname> <given-names>J.</given-names></name> <name><surname>Thibeau-Sutre</surname> <given-names>E.</given-names></name> <name><surname>Diaz-Melo</surname> <given-names>M.</given-names></name> <name><surname>Samper-Gonz&#x000E1;lez</surname> <given-names>J.</given-names></name> <name><surname>Routier</surname> <given-names>A.</given-names></name> <name><surname>Bottani</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Convolutional neural networks for classification of Alzheimer&#x00027;s disease: overview and reproducible evaluation</article-title>. <source>Med. Image Anal</source>. <volume>63</volume>:<fpage>101694</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2020.101694</pub-id><pub-id pub-id-type="pmid">32417716</pub-id></citation></ref>
<ref id="B83">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Weng</surname> <given-names>L.</given-names></name></person-group> (<year>2021</year>). <source>Contrastive representation learning</source>. Available at: <ext-link ext-link-type="uri" xlink:href="https://lilianweng.github.io/posts/2021-05-31-contrastive/">https://lilianweng.github.io/posts/2021-05-31-contrastive/</ext-link> (accessed January 9, 2024).</citation>
</ref>
<ref id="B84">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Whitwell</surname> <given-names>J. L.</given-names></name> <name><surname>Shiung</surname> <given-names>M. M.</given-names></name> <name><surname>Przybelski</surname> <given-names>S. A.</given-names></name> <name><surname>Weigand</surname> <given-names>S. D.</given-names></name> <name><surname>Knopman</surname> <given-names>D. S.</given-names></name> <name><surname>Boeve</surname> <given-names>B. F.</given-names></name> <etal/></person-group>. (<year>2008</year>). <article-title>MRI patterns of atrophy associated with progression to AD in amnestic mild cognitive impairment</article-title>. <source>Neurology</source> <volume>70</volume>, <fpage>512</fpage>&#x02013;<lpage>520</lpage>. <pub-id pub-id-type="doi">10.1212/01.wnl.0000280575.77437.a2</pub-id><pub-id pub-id-type="pmid">17898323</pub-id></citation></ref>
<ref id="B85">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Wolf</surname> <given-names>D.</given-names></name> <name><surname>Payer</surname> <given-names>T.</given-names></name> <name><surname>Lisson</surname> <given-names>C. S.</given-names></name> <name><surname>Lisson</surname> <given-names>C. G.</given-names></name> <name><surname>Beer</surname> <given-names>M.</given-names></name> <name><surname>G&#x000F6;tz</surname> <given-names>M.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>Self-supervised pre-training with contrastive and masked autoencoder methods for dealing with small datasets in deep learning for medical imaging</article-title>. <source>Sci. Rep</source>. <volume>13</volume>:<fpage>20260</fpage>. <pub-id pub-id-type="doi">10.1038/s41598-023-46433-0</pub-id><pub-id pub-id-type="pmid">37985685</pub-id></citation></ref>
<ref id="B86">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname> <given-names>Q.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Adeli</surname> <given-names>E.</given-names></name> <name><surname>Pohl</surname> <given-names>K. M.</given-names></name></person-group> (<year>2021</year>). <article-title>Longitudinal self-supervised learning</article-title>. <source>Med. Image Anal</source>. <volume>71</volume>:<fpage>102051</fpage>. <pub-id pub-id-type="doi">10.1016/j.media.2021.102051</pub-id><pub-id pub-id-type="pmid">33882336</pub-id></citation></ref>
<ref id="B87">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>B.</given-names></name> <name><surname>Dey</surname> <given-names>N.</given-names></name> <name><surname>Schlemper</surname> <given-names>J.</given-names></name> <name><surname>Mohseni Salehi</surname> <given-names>S. S.</given-names></name> <name><surname>Liu</surname> <given-names>C.</given-names></name> <name><surname>Duncan</surname> <given-names>J. S.</given-names></name> <etal/></person-group>. (<year>2023</year>). <article-title>&#x0201C;DSFormer: a dual-domain self-supervised transformer for accelerated multi-contrast MRI reconstruction,&#x0201D;</article-title> in <source>2023 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 4955&#x02013;4964, Los Alamitos, CA, USA</source> (<publisher-loc>IEEE Computer Society</publisher-loc>). <pub-id pub-id-type="doi">10.1109/WACV56688.2023.00494</pub-id></citation>
</ref>
</ref-list>
</back>
</article>