<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="systematic-review" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Oncol.</journal-id>
<journal-title>Frontiers in Oncology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Oncol.</abbrev-journal-title>
<issn pub-type="epub">2234-943X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fonc.2021.788819</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Oncology</subject>
<subj-group>
<subject>Systematic Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Trends in Development of Novel Machine Learning Methods for the Identification of Gliomas in Datasets That Include Non-Glioma Images: A Systematic Review</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Subramanian</surname>
<given-names>Harry</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1452486"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dey</surname>
<given-names>Rahul</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Brim</surname>
<given-names>Waverly Rose</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1565743"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Tillmanns</surname>
<given-names>Niklas</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1503253"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Cassinelli Petersen</surname>
<given-names>Gabriel</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1503298"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Brackett</surname>
<given-names>Alexandria</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Mahajan</surname>
<given-names>Amit</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1411188"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Johnson</surname>
<given-names>Michele</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Malhotra</surname>
<given-names>Ajay</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Aboian</surname>
<given-names>Mariam</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Department of Radiology and Biomedical Imaging, Yale School of Medicine</institution>, <addr-line>New Haven, CT</addr-line>, <country>United States</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Harvey Cushing/John Hay Whitney Medical Library, Yale School of Medicine</institution>, <addr-line>New Haven, CT</addr-line>, <country>United States</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Alireza Mansouri, The Pennsylvania State University (PSU), United States</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: Bahram Mohajer, Johns Hopkins University, United States; Mohammad Hamghalam, Shenzhen University, China</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Mariam Aboian, <email xlink:href="mailto:mariam.aboian@yale.edu">mariam.aboian@yale.edu</email>
</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Neuro-Oncology and Neurosurgical Oncology, a section of the journal Frontiers in Oncology</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>12</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>11</volume>
<elocation-id>788819</elocation-id>
<history>
<date date-type="received">
<day>03</day>
<month>10</month>
<year>2021</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>12</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2021 Subramanian, Dey, Brim, Tillmanns, Cassinelli Petersen, Brackett, Mahajan, Johnson, Malhotra and Aboian</copyright-statement>
<copyright-year>2021</copyright-year>
<copyright-holder>Subramanian, Dey, Brim, Tillmanns, Cassinelli Petersen, Brackett, Mahajan, Johnson, Malhotra and Aboian</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<sec>
<title>Purpose</title>
<p>Machine learning has been applied to the diagnostic imaging of gliomas to augment classification, prognostication, segmentation, and treatment planning. A systematic literature review was performed to identify how machine learning has been applied to identify gliomas in datasets which include non-glioma images thereby simulating normal clinical practice.</p>
</sec>
<sec>
<title>Materials and Methods</title>
<p>Four databases were searched by a medical librarian and confirmed by a second librarian for all articles published prior to February 1, 2021: Ovid Embase, Ovid MEDLINE, Cochrane trials (CENTRAL), and Web of Science-Core Collection. The search strategy included both keywords and controlled vocabulary combining the terms for: artificial intelligence, machine learning, deep learning, radiomics, magnetic resonance imaging, glioma, as well as related terms. The review was conducted in stepwise fashion with abstract screening, full text screening, and data extraction. Quality of reporting was assessed using TRIPOD criteria.</p>
</sec>
<sec>
<title>Results</title>
<p>A total of 11,727 candidate articles were identified, of which 12 articles were included in the final analysis. Studies investigated the differentiation of normal from abnormal images in datasets which include gliomas (7 articles) and the differentiation of glioma images from non-glioma or normal images (5 articles). Single institution datasets were most common (5 articles) followed by BRATS (3 articles). The median sample size was 280 patients. Algorithm testing strategies consisted of five-fold cross validation (5 articles), and the use of exclusive sets of images within the same dataset for training and for testing (7 articles). Neural networks were the most common type of algorithm (10 articles). The accuracy of algorithms ranged from 0.75 to 1.00 (median 0.96, 10 articles). Quality of reporting assessment utilizing TRIPOD criteria yielded a mean individual TRIPOD ratio of 0.50 (standard deviation 0.14, range 0.37 to 0.85).</p>
</sec>
<sec>
<title>Conclusion</title>
<p>Systematic review investigating the identification of gliomas in datasets which include non-glioma images demonstrated multiple limitations hindering the application of these algorithms to clinical practice. These included limited datasets, a lack of generalizable algorithm training and testing strategies, and poor quality of reporting. The development of more robust and heterogeneous datasets is needed for algorithm development. Future studies would benefit from using external datasets for algorithm testing as well as placing increased attention on quality of reporting standards.</p>
</sec>
<sec>
<title>Systematic Review Registration</title>
<p><uri xlink:href="www.crd.york.ac.uk/prospero/display_record.php?ID=CRD42020209938">www.crd.york.ac.uk/prospero/display_record.php?ID=CRD42020209938</uri>, International Prospective Register of Systematic Reviews (PROSPERO 2020 CRD42020209938).</p>
</sec>
</abstract>
<kwd-group>
<kwd>artificial intelligence</kwd>
<kwd>bias</kwd>
<kwd>brain tumor</kwd>
<kwd>diagnostic imaging</kwd>
<kwd>glioma</kwd>
<kwd>machine learning</kwd>
<kwd>Magnetic Resonance Imaging</kwd>
<kwd>segmentation</kwd>
</kwd-group>
<counts>
<fig-count count="7"/>
<table-count count="3"/>
<equation-count count="0"/>
<ref-count count="26"/>
<page-count count="10"/>
<word-count count="3932"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>As the healthcare needs of the population increase and the volume of imaging grows, there is a critical need for computer assisted models to provide support to radiologists in routine clinical practice. Brain tumors, and specifically gliomas, are of particular interest to neuro-oncologists and radiologists. Machine learning research in neuro-oncology has become increasingly popular as sufficient computing power and large datasets have come to be more available to researchers. Machine learning refers to a subset of artificial intelligence consisting of algorithms that analyze data without explicit programming (<xref ref-type="bibr" rid="B1">1</xref>, <xref ref-type="bibr" rid="B2">2</xref>). Deep learning is a subtype of machine learning that utilizes neural networks, which refer to algorithm models composed of neurons represented by nodes and interconnections between nodes (<xref ref-type="bibr" rid="B3">3</xref>). Machine learning has been applied to the diagnostic imaging of gliomas to augment classification, prognostication, segmentation, and treatment planning (<xref ref-type="bibr" rid="B4">4</xref>). Algorithms which can differentiate gliomas from other entities such as normal examinations, stroke, or demyelinating disease remain in the early stages of development. Until now, most studies have focused on brain tumor segmentation accuracy, and provide segmentation algorithms which are developed on datasets containing only glioma images. The identification of gliomas in a heterogeneous group of images is a critical function but less well studied. In clinical practice, most studies contain normal images or other non-oncologic pathology. Algorithms developed on datasets containing only glioma images are unlikely to be generalizable to clinical practice. Therefore, in this study we investigate how machine learning has been applied to the identification of gliomas in datasets which contain non-glioma images. A systematic review was performed to assess the existing body of literature and identify the most optimal targets for future research.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<title>Materials and Methods</title>
<p>A systematic literature review was performed to identify how machine learning has been applied to identify gliomas in datasets which include non-glioma images, thereby simulating normal clinical practice. The study was registered with the International Prospective Register of Systematic Reviews (PROSPERO, CRD42020209938) and conducted in concordance with preferred reporting items for systematic review and meta-analysis protocols (PRISMA-P) guidelines (<xref ref-type="bibr" rid="B5">5</xref>). The primary literature search is summarized in the PRISMA flow diagram in <xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1</bold>
</xref>, and involved a query of four databases to identify all published articles investigating machine learning and gliomas. The queried databases were Ovid Embase, Ovid MEDLINE, Cochrane trials (CENTRAL), and Web of Science-Core Collection. The initial search included articles published prior to September 1, 2020, and a second search was performed to identify articles published between September 1, 2020 and February 1, 2021. The search strategy included both keywords and controlled vocabulary combining the terms for: artificial intelligence, machine learning, deep learning, radiomics, magnetic resonance imaging, glioma, as well as related terms. The search strategy and syntax are demonstrated in <xref ref-type="supplementary-material" rid="SF1">
<bold>Supplementary Figure S1</bold>
</xref>. The search was executed by a medical librarian and reviewed by a second institutional librarian.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>PRISMA flow diagram depicting the systematic review search strategy. (MRI, magnetic resonance imaging; MRS, magnetic resonance spectroscopy; PET, positron emission tomography.).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-11-788819-g001.tif"/>
</fig>
<p>Screening of the articles was performed by two independent reviewers (H.S. and M.A.), which includes one board certified neuroradiologist (M.A.), utilizing Covidence (Covidence systematic review software, Veritas Health Innovation, Melbourne, Australia. Available at <uri xlink:href="http://www.covidence.org">www.covidence.org</uri>). Articles were initially screened by title and abstract, after which the remaining articles were screened by full text. To meet inclusion criteria, the articles were required to be original research, investigate machine learning, investigate gliomas in human subjects, be published in the English language, and utilize imaging with either MRI, MRS, or PET. Further screening was then performed to identify articles which investigated the identification of gliomas in datasets including non-glioma images. Each reviewer screened each article independently and disagreement was resolved by discussion.</p>
<p>Data extraction was performed by two independent reviewers (H.S. and R.D.). Each reviewer extracted the whole data independently and disagreement was resolved by discussion. Major data points included the study objective, dataset, number of patients and images, machine learning algorithm training and testing strategy, and magnetic resonance imaging (MRI) sequences. Quantitative data was also collected where available, including accuracy, sensitivity, specificity, area under the receiver operating characteristic curve (AUC) and Dice coefficient. When multiple algorithms were evaluated in a study, the best performing algorithm was reported.</p>
<p>Risk of bias assessment was performed using Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis (TRIPOD) guidelines (<xref ref-type="bibr" rid="B6">6</xref>). The TRIOPD checklist contains 22 primary features as well as multiple subitems, resulting in a total of 37 features. A TRIPOD score was created using 1 possible point for each subitem. Adherence to a subitem was given 1 point, while non-adherence was scored as 0 points. Features not assessed in an article due to the nature of the study were deemed as not applicable and excluded from analysis. The primary features are title (1 point), abstract (1 point), introduction - background and objectives (2 points), methods - source of data (2 points), methods - participants (3 points), methods - outcome (2 points), methods - predictors (2 points), methods - sample data (1 point), methods - missing data (1 point), methods - statistical analysis (5 points), methods - risk groups (1 point), methods - development and validation (1 point), results - participants (3 points), results - model development (2 points), results - model specification (2 points), results - model performance (1 point), results - model updating (1 point), discussion - limitations (1 point), discussion - interpretation (2 points), discussion - implications (1 point), supplementary information (1 point) and funding (1 point). The individual TRIPOD ratio was calculated for each article as the ratio of the TRIPOD score to the maximum possible points calculated from the included features. The TRIPOD adherence ratio for each feature was calculated as the ratio of the total points for a specific feature to the total possible points from all of the articles assessing that feature.</p>
<p>Descriptive statistics were calculated and visualized using GraphPad Prism version 9.1.2 for Windows, GraphPad Software, San Diego, California USA, <uri xlink:href="http://www.graphpad.com">www.graphpad.com</uri>.</p>
</sec>
<sec id="s3" sec-type="results">
<title>Results</title>
<p>The primary literature search returned 11,727 candidate articles, of which 90 duplicates were removed. The remaining 11,637 articles were screened using title and abstract, of which 10,502 articles that did not involve neuro-oncology were excluded. The full text of the remaining 1,135 articles was reviewed, of which 438 articles were excluded. The 438 excluded articles consisted of 172 conference abstracts, 140 articles not utilizing machine learning, 62 not representing original research, 22 not published in the English language, 15 not investigating gliomas, 11 not utilizing MRI, magnetic resonance spectroscopy (MRS), or positron emission tomography (PET) imaging, 9 not utilizing human subjects, and 7 duplicate articles. The remaining 697 articles underwent further review, of which 685 articles were excluded and 12 articles (<xref ref-type="bibr" rid="B7">7</xref>&#x2013;<xref ref-type="bibr" rid="B18">18</xref>) investigating the use of machine learning to identify gliomas in datasets which include non-glioma images were identified for inclusion in the final analysis.</p>
<p>The main data points extracted from the 12 articles are summarized in <xref ref-type="table" rid="T1">
<bold>Table&#xa0;1</bold>
</xref>. The distribution of the objective of the articles is depicted in <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2</bold>
</xref>, the distribution of datasets utilized is depicted in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>, and algorithm testing strategies are depicted in <xref ref-type="fig" rid="f4">
<bold>Figure&#xa0;4</bold>
</xref>. Seven articles investigated the differentiation of normal from abnormal images in datasets which include gliomas, and five articles investigated the differentiation of glioma images from non-glioma or normal images. The most frequent dataset used was a single institution dataset (5 articles, of which 4 used the Harvard Medical School dataset), followed by the Multimodal Brain Tumor Image Segmentation Benchmark (BRATS; 3 articles), multicenter datasets (2 articles), and The Cancer Imaging Archive (TCIA; 2 articles). BRATS (<xref ref-type="bibr" rid="B19">19</xref>&#x2013;<xref ref-type="bibr" rid="B21">21</xref>) and TCIA (<xref ref-type="bibr" rid="B22">22</xref>) are publicly available databases of annotated MR images of gliomas. The ground truth in the BRATS and TCIA datasets is defined by pathology. Additionally, there was pathologic ground truth in the single institution dataset used by Dube et&#xa0;al. In the Harvard Medical School (<xref ref-type="bibr" rid="B23">23</xref>) dataset used by four studies, the method of ground truth establishment is unknown. Additionally, in the two studies using other multicenter datasets, the method to establish ground truth is unknown for at least part of the data.</p>
<table-wrap id="T1" position="float">
<label>Table&#xa0;1</label>
<caption>
<p>Summary of articles (n=12).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Author</th>
<th valign="top" align="center">Year of Publication</th>
<th valign="top" align="center">Purpose</th>
<th valign="top" align="center">Dataset</th>
<th valign="top" align="center">Ground Truth</th>
<th valign="top" align="center">Number of Patients</th>
<th valign="top" align="center">Training Strategy</th>
<th valign="top" align="center">Validation Strategy</th>
<th valign="top" align="center">Testing Strategy</th>
<th valign="top" align="center">MRI Sequences</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Al-Saffar et&#xa0;al. (<xref ref-type="bibr" rid="B7">7</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">TCIA 2013</td>
<td valign="top" align="left">Pathology</td>
<td valign="top" align="left">130</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">FLAIR</td>
</tr>
<tr>
<td valign="top" align="left">Kaur et&#xa0;al. (<xref ref-type="bibr" rid="B10">10</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Multicenter</td>
<td valign="top" align="left">Unknown</td>
<td valign="top" align="left">717</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">T1, T1c, T2, FLAIR</td>
</tr>
<tr>
<td valign="top" align="left">Kharrat et&#xa0;al. (<xref ref-type="bibr" rid="B11">11</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">BRATS 2013 and 2015</td>
<td valign="top" align="left">Pathology</td>
<td valign="top" align="left">304</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">T1, T1c, T2, FLAIR</td>
</tr>
<tr>
<td valign="top" align="left">Reddy et al. (<xref ref-type="bibr" rid="B12">12</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Harvard Medical School</td>
<td valign="top" align="left">Unknown</td>
<td valign="top" align="left">Not specified (298 images)</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">T2</td>
</tr>
<tr>
<td valign="top" align="left">Samikannu et al. (<xref ref-type="bibr" rid="B14">14</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">BRATS 2015</td>
<td valign="top" align="left">Pathology</td>
<td valign="top" align="left">176</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">Not specified</td>
</tr>
<tr>
<td valign="top" align="left">Ural et al. (<xref ref-type="bibr" rid="B16">16</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Multicenter</td>
<td valign="top" align="left">Unknown</td>
<td valign="top" align="left">300</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">T1, T1c, T2, FLAIR, DWI</td>
</tr>
<tr>
<td valign="top" align="left">Kale et al. (<xref ref-type="bibr" rid="B9">9</xref>)</td>
<td valign="top" align="center">2019</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Harvard Medical School</td>
<td valign="top" align="left">Unknown</td>
<td valign="top" align="left">Not specified (400 images)</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">T2</td>
</tr>
<tr>
<td valign="top" align="left">Rudie et al. (<xref ref-type="bibr" rid="B13">13</xref>)</td>
<td valign="top" align="center">2019</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Non-glioma</td>
<td valign="top" align="left">BRATS 2018</td>
<td valign="top" align="left">Pathology</td>
<td valign="top" align="left">351</td>
<td valign="top" align="left">10-Fold Cross Validation</td>
<td valign="top" align="left">10-Fold Cross Validation</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">T1, T1c, T2, FLAIR</td>
</tr>
<tr>
<td valign="top" align="left">Talo et al. (<xref ref-type="bibr" rid="B15">15</xref>)</td>
<td valign="top" align="center">2019</td>
<td valign="top" align="left">Normal <italic>vs</italic> abnormal</td>
<td valign="top" align="left">Harvard Medical School</td>
<td valign="top" align="left">Unknown</td>
<td valign="top" align="left">42</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">T2</td>
</tr>
<tr>
<td valign="top" align="left">Wong et al. (<xref ref-type="bibr" rid="B17">17</xref>)</td>
<td valign="top" align="center">2018</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">TCIA 2017</td>
<td valign="top" align="left">Pathology</td>
<td valign="top" align="left">280</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">T1c</td>
</tr>
<tr>
<td valign="top" align="left">Zhang et al. (<xref ref-type="bibr" rid="B18">18</xref>)</td>
<td valign="top" align="center">2013</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Harvard Medical School</td>
<td valign="top" align="left">Unknown</td>
<td valign="top" align="left">Not specified (90 images)</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">5-Fold Cross Validation</td>
<td valign="top" align="left">T2</td>
</tr>
<tr>
<td valign="top" align="left">Dube et al. (<xref ref-type="bibr" rid="B8">8</xref>)</td>
<td valign="top" align="center">2006</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">UCLA Brain Tumor Database</td>
<td valign="top" align="left">Pathology</td>
<td valign="top" align="left">60</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">None</td>
<td valign="top" align="left">Separate images within same dataset</td>
<td valign="top" align="left">T2</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Distribution of article objectives.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-11-788819-g002.tif"/>
</fig>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>Distribution of datasets.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-11-788819-g003.tif"/>
</fig>
<fig id="f4" position="float">
<label>Figure&#xa0;4</label>
<caption>
<p>Distribution of machine learning algorithm testing strategies.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-11-788819-g004.tif"/>
</fig>
<p>Algorithm training and testing strategies consisted of five-fold cross validation (5 articles), and use of exclusive sets of images within the same dataset for training and for testing (7 articles). The range of sample sizes is shown in <xref ref-type="fig" rid="f5">
<bold>Figure&#xa0;5</bold>
</xref>. The median sample size was 280 patients (reported in 9 articles, range 42 to 717). The three articles not reporting the number of patients did report the number of images, with a median of 298 images (range 90 to 400). The sequences of magnetic resonance images used in each study was variable, consisting of some combination of T1-weighted, T2-weighted, contrast enhanced T1-weigthed, T2 fluid attenuated inversion recovery (FLAIR), and diffusion weighted (DWI) images.</p>
<fig id="f5" position="float">
<label>Figure&#xa0;5</label>
<caption>
<p>Scatterplot demonstrating the number of patients used in each article (n = 9, 3 articles did not report the number of patients).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-11-788819-g005.tif"/>
</fig>
<p>A description of the machine learning algorithms is presented in <xref ref-type="table" rid="T2">
<bold>Table&#xa0;2</bold>
</xref>. The most common algorithm was a neural network used in 10 articles, while two articles used support vector machine algorithms. A wide variety of neural networks were used, including five articles which developed novel algorithms. The quantitative results are demonstrated in <xref ref-type="table" rid="T3">
<bold>Table&#xa0;3</bold>
</xref>, which summarizes the testing performance of each algorithm, and includes accuracy, sensitivity, specificity, AUC, and Dice coefficient. When multiple algorithms were evaluated in a study, the best performing algorithm was reported. The most commonly reported metric was accuracy, which ranged from 0.75 to 1.00 (median 0.96, 10 articles). When segmentation was investigated, the Dice coefficient was reported, which ranged from 0.92 to 0.98 (2 articles). A random effects meta-analysis was attempted, however could not be performed due to the lack of available data (<xref ref-type="bibr" rid="B24">24</xref>). The AUC was reported in only one of 12 articles and therefore not suitable for meta-analysis. Furthermore, for algorithm accuracy the standard deviation or confidence interval was only reported in three articles and therefore also not sufficient to perform an unbiased and generalizable meta-analysis (<xref ref-type="bibr" rid="B25">25</xref>).</p>
<table-wrap id="T2" position="float">
<label>Table&#xa0;2</label>
<caption>
<p>Summary of machine learning algorithms (n=12).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Author</th>
<th valign="top" align="center">Year of Publication</th>
<th valign="top" align="center">Purpose</th>
<th valign="top" align="center">Machine Learning Algorithm</th>
<th valign="top" align="center">Neural Network Type</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Al-Saffar et&#xa0;al. (<xref ref-type="bibr" rid="B7">7</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">Novel (residual neural network)</td>
</tr>
<tr>
<td valign="top" align="left">Kaur et&#xa0;al. (<xref ref-type="bibr" rid="B10">10</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">AlexNet, GoogleNet, ResNet50, ResNet101, VGG, VGG-19, InceptionV3, and InceptionResNetV2</td>
</tr>
<tr>
<td valign="top" align="left">Kharrat et&#xa0;al. (<xref ref-type="bibr" rid="B11">11</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">Novel (3D neural network)</td>
</tr>
<tr>
<td valign="top" align="left">Reddy et al. (<xref ref-type="bibr" rid="B12">12</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">Novel (extreme learning machine)</td>
</tr>
<tr>
<td valign="top" align="left">Samikannu et al. (<xref ref-type="bibr" rid="B14">14</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">Novel (convolutional neural network)</td>
</tr>
<tr>
<td valign="top" align="left">Ural et al. (<xref ref-type="bibr" rid="B16">16</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">Modified AlexNet and VGG</td>
</tr>
<tr>
<td valign="top" align="left">Kale et al. (<xref ref-type="bibr" rid="B9">9</xref>)</td>
<td valign="top" align="center">2019</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">Novel (back propagation neural network)</td>
</tr>
<tr>
<td valign="top" align="left">Rudie et al. (<xref ref-type="bibr" rid="B13">13</xref>)</td>
<td valign="top" align="center">2019</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Non-glioma</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">3D U-Net</td>
</tr>
<tr>
<td valign="top" align="left">Talo et al. (<xref ref-type="bibr" rid="B15">15</xref>)</td>
<td valign="top" align="center">2019</td>
<td valign="top" align="left">Normal <italic>vs</italic> abnormal</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">ResNet34</td>
</tr>
<tr>
<td valign="top" align="left">Wong et al. (<xref ref-type="bibr" rid="B17">17</xref>)</td>
<td valign="top" align="center">2018</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">Neural network</td>
<td valign="top" align="left">Modified VGG</td>
</tr>
<tr>
<td valign="top" align="left">Zhang et al. (<xref ref-type="bibr" rid="B18">18</xref>)</td>
<td valign="top" align="center">2013</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Support vector machine</td>
<td valign="top" align="left">N/A</td>
</tr>
<tr>
<td valign="top" align="left">Dube et al. (<xref ref-type="bibr" rid="B8">8</xref>)</td>
<td valign="top" align="center">2006</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Support vector machine</td>
<td valign="top" align="left">N/A</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>N/A, Not applicable.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T3" position="float">
<label>Table&#xa0;3</label>
<caption>
<p>Summary of algorithm testing performance (n=12).</p>
</caption>
<table frame="hsides">
<thead>
<tr>
<th valign="top" align="left">Author</th>
<th valign="top" align="center">Year of Publication</th>
<th valign="top" align="center">Purpose</th>
<th valign="top" align="center">Machine Learning Algorithm</th>
<th valign="top" align="center">Accuracy (Standard Deviation)</th>
<th valign="top" align="center">Sensitivity</th>
<th valign="top" align="center">Specificity</th>
<th valign="top" align="center">AUC</th>
<th valign="top" align="center">Dice coefficient</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Al-Saffar et&#xa0;al. (<xref ref-type="bibr" rid="B7">7</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">Novel (residual neural network)</td>
<td valign="top" align="center">0.9491 (NR)</td>
<td valign="top" align="center">0.9689</td>
<td valign="top" align="center">0.9637</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
</tr>
<tr>
<td valign="top" align="left">Kaur et&#xa0;al. (<xref ref-type="bibr" rid="B10">10</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">AlexNet</td>
<td valign="top" align="center">1 (0)</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">1</td>
<td valign="top" align="center">NR</td>
</tr>
<tr>
<td valign="top" align="left">Kharrat et&#xa0;al. (<xref ref-type="bibr" rid="B11">11</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">Novel (3D neural network)</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">0.98</td>
</tr>
<tr>
<td valign="top" align="left">Reddy et al. (<xref ref-type="bibr" rid="B12">12</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Novel (extreme learning machine)</td>
<td valign="top" align="center">0.94 (0.23)</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">0.95</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
</tr>
<tr>
<td valign="top" align="left">Samikannu et al. (<xref ref-type="bibr" rid="B14">14</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">Novel (convolutional neural network)</td>
<td valign="top" align="center">0.991 (NR)</td>
<td valign="top" align="center">0.971</td>
<td valign="top" align="center">0.987</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
</tr>
<tr>
<td valign="top" align="left">Ural et al. (<xref ref-type="bibr" rid="B16">16</xref>)</td>
<td valign="top" align="center">2020</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Modified AlexNet and VGG</td>
<td valign="top" align="center">0.927 (NR)</td>
<td valign="top" align="center">0.968</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
</tr>
<tr>
<td valign="top" align="left">Kale et al. (<xref ref-type="bibr" rid="B9">9</xref>)</td>
<td valign="top" align="center">2019</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Novel (back propagation neural network)</td>
<td valign="top" align="center">1.0 (0.0002)</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
</tr>
<tr>
<td valign="top" align="left">Rudie et al. (<xref ref-type="bibr" rid="B13">13</xref>)</td>
<td valign="top" align="center">2019</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Non-glioma</td>
<td valign="top" align="left">3D U-Net</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">0.92</td>
</tr>
<tr>
<td valign="top" align="left">Talo et al. (<xref ref-type="bibr" rid="B15">15</xref>)</td>
<td valign="top" align="center">2019</td>
<td valign="top" align="left">Normal <italic>vs</italic> abnormal</td>
<td valign="top" align="left">ResNet34</td>
<td valign="top" align="center">0.9787 (NR)</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
</tr>
<tr>
<td valign="top" align="left">Wong et al. (<xref ref-type="bibr" rid="B17">17</xref>)</td>
<td valign="top" align="center">2018</td>
<td valign="top" align="left">Glioma <italic>vs</italic>. Normal</td>
<td valign="top" align="left">Modified VGG</td>
<td valign="top" align="center">0.82 (NR)</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
</tr>
<tr>
<td valign="top" align="left">Zhang et al. (<xref ref-type="bibr" rid="B18">18</xref>)</td>
<td valign="top" align="center">2013</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Support vector machine</td>
<td valign="top" align="center">0.9778 (NR)</td>
<td valign="top" align="center">0.9812</td>
<td valign="top" align="center">0.92</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
</tr>
<tr>
<td valign="top" align="left">Dube et al. (<xref ref-type="bibr" rid="B8">8</xref>)</td>
<td valign="top" align="center">2006</td>
<td valign="top" align="left">Normal <italic>vs</italic>. Abnormal</td>
<td valign="top" align="left">Support vector machine</td>
<td valign="top" align="center">0.75 (NR)</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
<td valign="top" align="center">NR</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>NR, Not reported.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Assessment of the quality of reporting using TRIPOD criteria yielded a mean individual TRIPOD ratio of 0.50 (standard deviation 0.14, range 0.37 to 0.85). Individual TRIPOD scores are depicted in <xref ref-type="fig" rid="f6">
<bold>Figure&#xa0;6</bold>
</xref> and feature TRIPOD adherence scores are depicted in <xref ref-type="fig" rid="f7">
<bold>Figure&#xa0;7</bold>
</xref>. Due to the inherent nature of the articles, no study created risk groups or discussed model updating. Both subitems of model specification were also not fully discussed in any article. In addition, both subitems of model development were fully included in only two articles. The maximum possible points for an individual article ranged from 26 to 29 when accounting for non-applicable features (the theoretical maximum points with all features included would be 37). Of the eligible features, the poorest adherence was seen with the title (0 adherent articles), abstract (1 adherent article), missing data (1 adherent article), results - participants (0 adherent articles) and model performance (2 adherent articles).</p>
<fig id="f6" position="float">
<label>Figure&#xa0;6</label>
<caption>
<p>Individual TRIPOD Ratio, calculated for each article as the ratio of the TRIOPD score to the maximum possible score.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-11-788819-g006.tif"/>
</fig>
<fig id="f7" position="float">
<label>Figure&#xa0;7</label>
<caption>
<p>TRIPOD Adherence Ratio, calculated for each feature as the ratio of the total points scored to the total possible points for that feature. Notably, two features (risk groups and model updating) were not assessed in any article and therefore not included in the analysis.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fonc-11-788819-g007.tif"/>
</fig>
<p>Additional linear regression analysis was performed to identify any predictor of algorithm accuracy. A comparison of algorithm accuracy and the sample size is demonstrated in <xref ref-type="supplementary-material" rid="SF2">
<bold>Supplementary Figure S2</bold>
</xref>, and shows no significant relationship, with an R<sup>2</sup> of 0.1204 (P = 0.45). A comparison of algorithm accuracy and the individual TRIPOD ratio is demonstrated in <xref ref-type="supplementary-material" rid="SF3">
<bold>Supplementary Figure S3</bold>
</xref>, and shows no significant relationship, with an R<sup>2</sup> of 0.01578 (P = 0.73).</p>
</sec>
<sec id="s4" sec-type="discussion">
<title>Discussion</title>
<p>A systematic review of the literature identified 12 studies which investigate the use of machine learning to identify gliomas in datasets which include non-glioma images. This scenario most closely simulates routine clinical practice, where gliomas are intermixed with normal examinations and non-oncologic pathologies. Moreover, these algorithms may have the potential to support a screening program for gliomas in the future. The studies were all published between 2006 and 2020, with nine published from 2019 to 2020, reflecting the increasing popularity of machine learning research in recent years. The five studies using BRATS or TCIA datasets included only normal images or glioma images. These datasets are more generalizable to clinical practice than those containing only glioma images, however still lack other routinely encountered pathologies. The remaining seven studies utilizing other single institution or multicenter datasets included a mix of normal, glioma, and other pathologic images. The other pathologies included stroke, Alzheimer&#x2019;s disease, multiple sclerosis, and meningioma, among others. This data is more representative of routine clinical practice, however still comes with limitations. There are a wide variety of healthcare settings, such as a tertiary or academic medical center, small hospital, or outpatient practice, each with different patient populations and pathologies. Additionally, datasets from different locations around the world will demonstrate different heterogeneity based on regional variations.</p>
<p>There are major limitations with the algorithm training and testing strategies. The description of algorithm training, validation, and testing strategies is heterogenous across studies. Often in machine learning research, validation and testing are used interchangeably, however this leads to confusion in the evaluation of algorithm performance. Validation should be reserved for the description of algorithm finetuning using data separate from the training data. Testing should be used to describe the unbiased evaluation of an algorithm using data separate from the training and validation sets. Each study reported training and testing data, however many studies used the term validation for what should actually be described as testing. Only two studies performed a true validation in addition to training and testing, Al-Saffar et&#xa0;al. used 5-fold cross validation for training and validation followed by a separate set of images within the same dataset for testing, and Rudie et&#xa0;al. used 10-fold cross validation for training and validation followed by a separate set of images within the same dataset for testing. None of the 12 studies tested their algorithms on external data. This poses a major limitation to the generalizability of these algorithms. In the United States, this also hinders the ability for approval by the Food and Drug Administration, which recommends algorithms be tested on external datasets.</p>
<p>Overall, there appears to be limited availability of high-quality data to train these machine learning algorithms. The number of patients in the datasets was low, with no study reaching 1,000 patients, and one study dropping as low as 42 patients. As a result of low sample sizes, the k-fold cross validation technique was commonly used for algorithm training, and five studies even used k-fold cross validation to test their algorithms. This technique is optimal for providing more data with a small sample size, but comes with the drawback of increased overtraining and decreased generalizability when applying the algorithm to an outside dataset. Additionally, nine studies used the same three datasets: BRATS, TCIA, and Harvard Medical School. Only two studies used datasets compiled from multiple institutions. This highlights a need to develop larger and more clinically applicable datasets to perform more robust machine learning research. Moreover, it will be critical to develop datasets that closely represent the mix of pathology encountered in each individual hospital, because this will vary between different institutions and practice settings. This will potentially fabricate the need for hospital specific dataset creation for the translation of algorithms.</p>
<p>Risk of bias analysis using TRIPOD criteria revealed that the quality of reporting was insufficient to draw any conclusion about algorithm generalizability. On average, there was adherence to only half of the reporting standards, with a large variation between studies. The poorest adherence was noted with the title and abstract, the method for handling missing data, the description of study participants within the results section, and the reporting of model performance. Specifically for model performance, the confidence interval of the discrimination measure was reported in only two studies. It is important to note that the TRIOPD criteria were primary developed for studies that used conventional multivariate regression prediction models rather than machine learning models, and TRIPOD-AI criteria are currently in development to specifically address the reporting of artificial intelligence and machine learning models (<xref ref-type="bibr" rid="B26">26</xref>). Poor quality of reporting also limited the ability to perform a meta-analysis, as AUC was reported in only one study, and the standard deviation for accuracy was reported in only three studies. Overall, the current analysis demonstrates that a substantial portion of information needed for translating algorithms to clinical practice is not available.</p>
</sec>
<sec id="s5">
<title>Conclusion</title>
<p>Systematic review of the literature identified machine learning algorithms which can identify gliomas in datasets containing non-glioma images, which are the most suitable algorithms for integration into general clinical workflow. Such algorithms may also serve as the basis for a potential brain tumor screening program. Severe limitations hindering the application of these algorithms to clinical practice were identified, including limited datasets, the lack of generalizable algorithm training and testing strategies, and poor quality of reporting. There is a need to develop more robust and heterogeneous datasets, which can be applied to individual clinical practice settings. Future studies would benefit from using external datasets for algorithm testing as well as placing increased attention on quality of reporting standards.</p>
</sec>
<sec id="s6" sec-type="data-availability">
<title>Data Availability Statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SF1">
<bold>Supplementary Material</bold>
</xref>. Further inquiries can be directed to the corresponding author.</p>
</sec>
<sec id="s7" sec-type="author-contributions">
<title>Author Contributions</title>
<p>All authors listed have made a substantial, direct, and intellectual contribution to the work and approved it for publication.</p>
</sec>
<sec id="s8" sec-type="funding-information">
<title>Funding</title>
<p>This publication was made possible by KL2 TR001862 (MA) from the National Center for Advancing Translational Science (NCATS), components of the National Institutes of Health (NIH), and NIH roadmap for Medical Research. This publication also received support from the American Society of Neuroradiology Fellow Award 2018 (MA).</p>
</sec>
<sec id="s9">
<title>Author Disclaimer</title>
<p>Its contents are solely the responsibility of the authors and do not necessarily represent the official view of NIH.</p>
</sec>
<sec id="s10" sec-type="COI-statement">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s11" sec-type="disclaimer">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<sec id="s12" sec-type="supplementary-material">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fonc.2021.788819/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fonc.2021.788819/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Image_1.jpeg" id="SF1" mimetype="image/jpeg">
<label>Supplementary Figure&#xa0;1</label>
<caption>
<p>Search strategy and syntax. A total of four databases were searched: Embase, Ovid MEDLINE(R), Cochrane CENTRAL (trials), and Web of Science.</p>
</caption>
</supplementary-material>
<supplementary-material xlink:href="Image_2.jpeg" id="SF2" mimetype="image/jpeg">
<label>Supplementary Figure&#xa0;2</label>
<caption>
<p>Linear regression analysis demonstrates no significant relationship between algorithm accuracy and sample size (R<sup>2</sup> of 0.1204, P = 0.45).</p>
</caption>
</supplementary-material>
  <supplementary-material xlink:href="DataSheet_1.docx" id="SF3" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
<label>Supplementary Figure&#xa0;3</label>
<caption>
<p>Linear regression analysis demonstrates no significant relationship between algorithm accuracy and individual TRIOPD ratio (R<sup>2</sup> of 0.01578, P = 0.73).</p>
</caption>
</supplementary-material>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>S</given-names>
</name>
<name>
<surname>Summers</surname> <given-names>RM</given-names>
</name>
</person-group>. <article-title>Machine Learning and Radiology</article-title>. <source>Med Image Anal</source> (<year>2012</year>) <volume>16</volume>:<page-range>933&#x2013;51</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.media.2012.02.005</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Badillo</surname> <given-names>S</given-names>
</name>
<name>
<surname>Banfai</surname> <given-names>B</given-names>
</name>
<name>
<surname>Birzele</surname> <given-names>F</given-names>
</name>
<name>
<surname>Davydov</surname> <given-names>II</given-names>
</name>
<name>
<surname>Hutchinson</surname> <given-names>L</given-names>
</name>
<name>
<surname>Kam-Thong</surname> <given-names>T</given-names>
</name>
<etal/>
</person-group>. <article-title>An Introduction to Machine Learning</article-title>. <source>Clin Pharmacol Ther</source> (<year>2020</year>) <volume>107</volume>(<issue>4</issue>):<page-range>871&#x2013;85</page-range>. doi: <pub-id pub-id-type="doi">10.1002/cpt.1796</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zaharchuk</surname> <given-names>G</given-names>
</name>
<name>
<surname>Gong</surname> <given-names>E</given-names>
</name>
<name>
<surname>Wintermark</surname> <given-names>M</given-names>
</name>
<name>
<surname>Rubin</surname> <given-names>D</given-names>
</name>
<name>
<surname>Langlotz</surname> <given-names>CP</given-names>
</name>
</person-group>. <article-title>Deep Learning in Neuroradiology</article-title>. <source>AJNR Am J Neuroradiol</source> (<year>2018</year>) <volume>39</volume>:<page-range>1776&#x2013;84</page-range>. doi: <pub-id pub-id-type="doi">10.3174/ajnr.A5543</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lotan</surname> <given-names>E</given-names>
</name>
<name>
<surname>Jain</surname> <given-names>R</given-names>
</name>
<name>
<surname>Razavian</surname> <given-names>N</given-names>
</name>
<name>
<surname>Fatterpekar</surname> <given-names>GM</given-names>
</name>
<name>
<surname>Lui</surname> <given-names>YW</given-names>
</name>
</person-group>. <article-title>State of the Art: Machine Learning Applications in Glioma Imaging</article-title>. <source>AJR Am J Roentgenol</source> (<year>2019</year>) <volume>212</volume>:<fpage>26</fpage>&#x2013;<lpage>37</lpage>. doi: <pub-id pub-id-type="doi">10.2214/AJR.18.20218</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moher</surname> <given-names>D</given-names>
</name>
<name>
<surname>Shamseer</surname> <given-names>L</given-names>
</name>
<name>
<surname>Clarke</surname> <given-names>M</given-names>
</name>
<name>
<surname>Ghersi</surname> <given-names>D</given-names>
</name>
<name>
<surname>Liberati</surname> <given-names>A</given-names>
</name>
<name>
<surname>Petticrew</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>Preferred Reporting Items for Systematic Review and Meta-Analysis Protocols (PRISMA-P) 2015 Statement</article-title>. <source>Syst Rev</source> (<year>2015</year>) <volume>4</volume>(<volume>1</volume>):<page-range>1</page-range>. doi: <pub-id pub-id-type="doi">10.1186/2046-4053-4-1</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Collins</surname> <given-names>GS</given-names>
</name>
<name>
<surname>Reitsma</surname> <given-names>JB</given-names>
</name>
<name>
<surname>Altman</surname> <given-names>DG</given-names>
</name>
<name>
<surname>Moons</surname> <given-names>KG</given-names>
</name>
</person-group>. <article-title>Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis (TRIPOD): The TRIPOD Statement</article-title>. <source>BMJ</source> (<year>2015</year>) <volume>350</volume>:<fpage>g7594</fpage>. doi: <pub-id pub-id-type="doi">10.1161/CIRCULATIONAHA.114.014508</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al-Saffar</surname> <given-names>ZA</given-names>
</name>
<name>
<surname>Yildirim</surname> <given-names>T</given-names>
</name>
</person-group>. <article-title>A Novel Approach to Improving Brain Image Classification Using Mutual Information-Accelerated Singular Value Decomposition</article-title>. <source>IEEE Access</source> (<year>2020</year>) <volume>8</volume>:<page-range>52575&#x2013;87</page-range>. doi: <pub-id pub-id-type="doi">10.1109/ACCESS.2020.2980728</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dube</surname> <given-names>S</given-names>
</name>
<name>
<surname>El-Saden</surname> <given-names>S</given-names>
</name>
<name>
<surname>Cloughesy</surname> <given-names>TF</given-names>
</name>
<name>
<surname>Sinha</surname> <given-names>U</given-names>
</name>
</person-group>. <article-title>Content Based Image Retrieval for MR Image Studies of Brain Tumors</article-title>. <source>Conf Proc IEEE Eng Med Biol Soc</source> (<year>2006</year>) <volume>2006</volume>:<page-range>3337&#x2013;40</page-range>. doi: <pub-id pub-id-type="doi">10.1109/IEMBS.2006.260262</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kale</surname> <given-names>VV</given-names>
</name>
<name>
<surname>Hamde</surname> <given-names>ST</given-names>
</name>
<name>
<surname>Holambe</surname> <given-names>RS</given-names>
</name>
</person-group>. <article-title>Multi Class Disorder Detection of Magnetic Resonance Brain Images Using Composite Features and Neural Network</article-title>. <source>BioMed Eng Lett</source> (<year>2019</year>) <volume>9</volume>:<page-range>221&#x2013;31</page-range>. doi: <pub-id pub-id-type="doi">10.1007/s13534-019-00103-1</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kaur</surname> <given-names>T</given-names>
</name>
<name>
<surname>Gandhi</surname> <given-names>TK</given-names>
</name>
</person-group>. <article-title>Deep Convolutional Neural Networks With Transfer Learning for Automated Brain Image Classification</article-title>. <source>Mach Vision Appl Vol</source> (<year>2020</year>) <volume>31</volume>:<fpage>20</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s00138-020-01069-2</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kharrat</surname> <given-names>A</given-names>
</name>
<name>
<surname>Neji</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>A System for Brain Image Segmentation and Classification Based on Three-Dimensional Convolutional Neural Network</article-title>. <source>Computacion Y Sistemas</source> (<year>2020</year>) <volume>24</volume>:<page-range>1617&#x2013;26</page-range>. doi: <pub-id pub-id-type="doi">10.13053/cys-24-4-3058</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reddy</surname> <given-names>AVN</given-names>
</name>
<name>
<surname>Krishna</surname> <given-names>CP</given-names>
</name>
<name>
<surname>Mallick</surname> <given-names>PK</given-names>
</name>
</person-group>. <article-title>An Image Classification Framework Exploring the Capabilities of Extreme Learning Machines and Artificial Bee Colon</article-title>. <source>Neural Computing Appl</source> (<year>2020</year>) <volume>32</volume>:<page-range>3079&#x2013;99</page-range>. doi: <pub-id pub-id-type="doi">10.1007/s00521-019-04385-5</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rudie</surname> <given-names>JD</given-names>
</name>
<name>
<surname>Weiss</surname> <given-names>DA</given-names>
</name>
<name>
<surname>Saluja</surname> <given-names>R</given-names>
</name>
<name>
<surname>Rauschecker</surname> <given-names>AM</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>J</given-names>
</name>
<name>
<surname>Sugrue</surname> <given-names>L</given-names>
</name>
<etal/>
</person-group>. <article-title>Multi-Disease Segmentation of Gliomas and White Matter Hyperintensities in the BraTS Data Using a 3D Convolutional Neural Network</article-title>. <source>Front Comput Neurosci</source> (<year>2019</year>) <volume>13</volume>:<page-range>84</page-range>. doi: <pub-id pub-id-type="doi">10.3389/fncom.2019.00084</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Samikannu</surname> <given-names>R</given-names>
</name>
<name>
<surname>Ravi</surname> <given-names>R</given-names>
</name>
<name>
<surname>Murugan</surname> <given-names>S</given-names>
</name>
<name>
<surname>Diarra</surname> <given-names>B</given-names>
</name>
</person-group>. <article-title>An Efficient Image Analysis Framework for the Classification of Glioma Brain Images Using CNN Approach</article-title>. <source>Computers Mater &amp; Continua</source> (<year>2020</year>) <volume>63</volume>:<page-range>1133&#x2013;42</page-range>. doi: <pub-id pub-id-type="doi">10.32604/cmc.2020.08578</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Talo</surname> <given-names>M</given-names>
</name>
<name>
<surname>Baloglu</surname> <given-names>UB</given-names>
</name>
<name>
<surname>Y&#x131;ld&#x131;r&#x131;m</surname> <given-names>&#xd6;</given-names>
</name>
<name>
<surname>Acharya</surname> <given-names>UR</given-names>
</name>
</person-group>. <article-title>Application of Deep Transfer Learning for Automated Brain Abnormality Classification Using MR Images</article-title>. <source>Cogn Syst Res</source> (<year>2019</year>) <volume>54</volume>:<page-range>176&#x2013;88</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.cogsys.2018.12.007</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ural</surname> <given-names>B</given-names>
</name>
<name>
<surname>&#xd6;z&#x131;&#x15f;&#x131;k</surname> <given-names>P</given-names>
</name>
<name>
<surname>Hardala&#xe7;</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>An Improved Computer Based Diagnosis System for Early Detection of Abnormal Lesions in the Brain Tissues With Using Magnetic Resonance and Computerized Tomography Images</article-title>. <source>Multimedia Tools Appl</source> (<year>2020</year>) <volume>79</volume>:<page-range>15613&#x2013;34</page-range>. doi: <pub-id pub-id-type="doi">10.1007/s11042-019-07823-7</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wong</surname> <given-names>KCL</given-names>
</name>
<name>
<surname>Syeda-Mahmood</surname> <given-names>T</given-names>
</name>
<name>
<surname>Moradi</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Building Medical Image Classifiers With Very Limited Data Using Segmentation Networks</article-title>. <source>Med Image Anal</source> (<year>2018</year>) <volume>49</volume>:<page-range>105&#x2013;16</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.media.2018.07.010</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>S</given-names>
</name>
<name>
<surname>Ji</surname> <given-names>G</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>Z</given-names>
</name>
</person-group>. <article-title>An MR Brain Images Classifier System <italic>via</italic> Particle Swarm Optimization and Kernel Support Vector Machine</article-title>. <source>ScientificWorldJournal</source> (<year>2013</year>) <volume>2013</volume>:<fpage>130134</fpage>. doi: <pub-id pub-id-type="doi">10.1155/2013/130134</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Menze</surname> <given-names>BH</given-names>
</name>
<name>
<surname>Jakab</surname> <given-names>A</given-names>
</name>
<name>
<surname>Bauer</surname> <given-names>S</given-names>
</name>
<name>
<surname>Kalpathy-Cramer</surname> <given-names>J</given-names>
</name>
<name>
<surname>Farahani</surname> <given-names>K</given-names>
</name>
<name>
<surname>Kirby</surname> <given-names>J</given-names>
</name>
<etal/>
</person-group>. <article-title>The Multimodal Brain Tumor Image Segmentation Benchmark (BRATS)</article-title>. <source>IEEE Trans Med Imaging</source> (<year>2015</year>) <volume>34</volume>(<issue>10</issue>):<fpage>1993</fpage>&#x2013;<lpage>2024</lpage>. doi: <pub-id pub-id-type="doi">10.1109/TMI.2014.2377694</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bakas</surname> <given-names>S</given-names>
</name>
<name>
<surname>Akbari</surname> <given-names>H</given-names>
</name>
<name>
<surname>Sotiras</surname> <given-names>A</given-names>
</name>
<name>
<surname>Bilello</surname> <given-names>M</given-names>
</name>
<name>
<surname>Rozycki</surname> <given-names>M</given-names>
</name>
<name>
<surname>Kirby</surname> <given-names>JS</given-names>
</name>
<etal/>
</person-group>. <article-title>Advancing The Cancer Genome Atlas Glioma MRI Collections With Expert Segmentation Labels and Radiomic Features</article-title>. <source>Sci Data</source> (<year>2017</year>) <volume>4</volume>:<page-range>170117</page-range>. doi: <pub-id pub-id-type="doi">10.1038/sdata.2017.117</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bakas</surname> <given-names>S</given-names>
</name>
<name>
<surname>Reyes</surname> <given-names>M</given-names>
</name>
<name>
<surname>Jakab</surname> <given-names>A</given-names>
</name>
<name>
<surname>Bauer</surname> <given-names>S</given-names>
</name>
<name>
<surname>Rempfler</surname> <given-names>M</given-names>
</name>
<name>
<surname>Crimi</surname> <given-names>A</given-names>
</name>
<etal/>
</person-group>. <article-title>Identifying the Best Machine Learning Algorithms for Brain Tumor Segmentation, Progression Assessment, and Overall Survival Prediction in the BRATS Challenge</article-title>. <source>xrXiv</source> (<year>2018</year>). preprint arXiv:1811.02629</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Clark</surname> <given-names>K</given-names>
</name>
<name>
<surname>Vendt</surname> <given-names>B</given-names>
</name>
<name>
<surname>Smith</surname> <given-names>K</given-names>
</name>
<name>
<surname>Freymann</surname> <given-names>J</given-names>
</name>
<name>
<surname>Kirby</surname> <given-names>J</given-names>
</name>
<name>
<surname>Koppel</surname> <given-names>P</given-names>
</name>
<etal/>
</person-group>. <article-title>The Cancer Imaging Archive (TCIA): Maintaining and Operating a Public Information Repository</article-title>. <source>J Digit Imaging</source> (<year>2013</year>) <volume>26</volume>(<issue>6</issue>):<page-range>1045&#x2013;57</page-range>. doi: <pub-id pub-id-type="doi">10.1007/s10278-013-9622-7</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Johnson</surname> <given-names>KA</given-names>
</name>
<name>
<surname>Becker</surname> <given-names>JA</given-names>
</name>
</person-group>. <source>The Whole Brain Atlas</source> (<year>1999</year>). Available at: <uri xlink:href="https://www.med.harvard.edu/aanlib/home.html">https://www.med.harvard.edu/aanlib/home.html</uri>.</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Riley</surname> <given-names>RD</given-names>
</name>
<name>
<surname>Higgins</surname> <given-names>JP</given-names>
</name>
<name>
<surname>Deeks</surname> <given-names>JJ</given-names>
</name>
</person-group>. <article-title>Interpretation of Random Effects Meta-Analyses</article-title>. <source>BMJ</source> (<year>2011</year>) <volume>342</volume>:<fpage>d549</fpage>. doi: <pub-id pub-id-type="doi">10.1136/bmj.d549</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Yao</surname> <given-names>Z</given-names>
</name>
<name>
<surname>Li</surname> <given-names>C</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>X</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>H</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>C</given-names>
</name>
</person-group>. <article-title>A Step-by-Step Guide to the Systematic Review and Meta-Analysis of Diagnostic and Prognostic Test Accuracy Evaluations</article-title>. <source>Br J Cancer</source> (<year>2013</year>) <volume>108</volume>:<page-range>2299&#x2013;303</page-range>. doi: <pub-id pub-id-type="doi">10.1038/bjc.2013.185</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Collins</surname> <given-names>GS</given-names>
</name>
<name>
<surname>Moons</surname> <given-names>KGM</given-names>
</name>
</person-group>. <article-title>Reporting of Artificial Intelligence Prediction Models</article-title>. <source>Lancet</source> (<year>2019</year>) <volume>393</volume>:<page-range>1577&#x2013;9</page-range>. doi: <pub-id pub-id-type="doi">10.1016/S0140-6736(19)30037-6</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>