<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="brief-report" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Endocrinol.</journal-id>
<journal-title>Frontiers in Endocrinology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Endocrinol.</abbrev-journal-title>
<issn pub-type="epub">1664-2392</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fendo.2022.853863</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Endocrinology</subject>
<subj-group>
<subject>Perspective</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Analysis of Half a Billion Datapoints Across Ten Machine-Learning Algorithms Identifies Key Elements Associated With Insulin Transcription in Human Pancreatic Islet Cells</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wong</surname>
<given-names>Wilson K. M.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1631154"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Thorat</surname>
<given-names>Vinod</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1634572"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Joglekar</surname>
<given-names>Mugdha V.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn003">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/236359"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dong</surname>
<given-names>Charlotte X.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Lee</surname>
<given-names>Hugo</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chew</surname>
<given-names>Yi Vee</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bhave</surname>
<given-names>Adwait</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Hawthorne</surname>
<given-names>Wayne J.</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/996324"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Engin</surname>
<given-names>Feyza</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/628958"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Pant</surname>
<given-names>Aniruddha</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Dalgaard</surname>
<given-names>Louise T.</given-names>
</name>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/360044"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Bapat</surname>
<given-names>Sharda</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<xref ref-type="author-notes" rid="fn004">
<sup>&#x2021;</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Hardikar</surname>
<given-names>Anandwardhan A.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff6">
<sup>6</sup>
</xref>
<xref ref-type="author-notes" rid="fn001">
<sup>*</sup>
</xref>
<xref ref-type="author-notes" rid="fn004">
<sup>&#x2021;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/1518003"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Diabetes and Islet Biology Group, School of Medicine, Western Sydney University</institution>, <addr-line>Campbelltown, NSW</addr-line>, <country>Australia</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Healthcare Analytics, AlgoAnalytics</institution>, <addr-line>Pune</addr-line>, <country>India</country>
</aff>
<aff id="aff3">
<sup>3</sup>
<institution>Department of Biomolecular Chemistry, School of Medicine and Public Health, University of Wisconsin-Madison</institution>, <addr-line>Madison, WI</addr-line>, <country>United States</country>
</aff>
<aff id="aff4">
<sup>4</sup>
<institution>Centre for Transplant and Renal Research, Westmead Institute for Medical Research, University of Sydney</institution>, <addr-line>Westmead, NSW</addr-line>, <country>Australia</country>
</aff>
<aff id="aff5">
<sup>5</sup>
<institution>Division of Endocrinology, Diabetes &amp; Metabolism, Department of Medicine, School of Medicine and Public Health, University of Wisconsin-Madison</institution>, <addr-line>Madison, WI</addr-line>, <country>United States</country>
</aff>
<aff id="aff6">
<sup>6</sup>
<institution>Department of Science and Environment, Roskilde University</institution>, <addr-line>Roskilde</addr-line>, <country>Denmark</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>Edited by: Vikash Chandra, University of Helsinki, Finland</p>
</fn>
<fn fn-type="edited-by">
<p>Reviewed by: OM Prakash Dwivedi, University of Helsinki, Finland</p>
</fn>
<fn fn-type="corresp" id="fn001">
<p>*Correspondence: Sharda Bapat, <email xlink:href="mailto:sbapat@algoanalytics.com">sbapat@algoanalytics.com</email>; Anandwardhan A. Hardikar, <email xlink:href="mailto:A.Hardikar@westernsydney.edu.au">A.Hardikar@westernsydney.edu.au</email>
</p>
</fn>
<fn fn-type="equal" id="fn003">
<p>&#x2020;These authors have contributed equally to this work and share first authorship</p>
</fn>
<fn fn-type="equal" id="fn004">
<p>&#x2021;These authors have contributed equally to this work and share last authorship</p>
</fn>
<fn fn-type="other" id="fn002">
<p>This article was submitted to Diabetes: Molecular Mechanisms, a section of the journal Frontiers in Endocrinology</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>03</month>
<year>2022</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>853863</elocation-id>
<history>
<date date-type="received">
<day>13</day>
<month>01</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>22</day>
<month>02</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2022 Wong, Thorat, Joglekar, Dong, Lee, Chew, Bhave, Hawthorne, Engin, Pant, Dalgaard, Bapat and Hardikar</copyright-statement>
<copyright-year>2022</copyright-year>
<copyright-holder>Wong, Thorat, Joglekar, Dong, Lee, Chew, Bhave, Hawthorne, Engin, Pant, Dalgaard, Bapat and Hardikar</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Machine learning (ML)-workflows enable unprejudiced/robust evaluation of complex datasets. Here, we analyzed over 490,000,000 data points to compare 10 different ML-workflows in a large (N=11,652) training dataset of human pancreatic single-cell (sc-)transcriptomes to identify genes associated with the presence or absence of insulin transcript(s). Prediction accuracy/sensitivity of each ML-workflow was tested in a separate validation dataset (N=2,913). Ensemble ML-workflows, in particular Random Forest ML-algorithm delivered high predictive power (AUC=0.83) and sensitivity (0.98), compared to other algorithms. The transcripts identified through these analyses also demonstrated significant correlation with insulin in bulk RNA-seq data from human islets. The top-10 features, (including <italic>IAPP, ADCYAP1, LDHA</italic> and <italic>SST</italic>) common to the three Ensemble ML-workflows were significantly dysregulated in scRNA-seq datasets from Ire-1&#x3b1;<sup>&#x3b2;-/-</sup> mice that demonstrate dedifferentiation of pancreatic &#x3b2;-cells in a model of type 1 diabetes (T1D) and in pancreatic single cells from individuals with type 2 Diabetes (T2D). Our findings provide direct comparison of ML-workflows in big data analyses, identify key elements associated with insulin transcription and provide workflows for future analyses.</p>
</abstract>
<kwd-group>
<kwd>machine-learning (ML) algorithms</kwd>
<kwd>insulin</kwd>
<kwd>diabetes</kwd>
<kwd>beta-cell</kwd>
<kwd>single-cell RNA-sequencing (scRNAseq)</kwd>
<kwd>human islet</kwd>
</kwd-group>
<counts>
<fig-count count="3"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="45"/>
<page-count count="10"/>
<word-count count="4757"/>
</counts>
</article-meta>
</front>
<body>
<sec id="s1" sec-type="intro">
<title>Introduction</title>
<p>Recent years have witnessed a surge in single-cell transcriptomic technologies; many already generating newer data and insights to address specific biological questions. Machine learning (ML) algorithms offer an unbiased mathematical workflow that facilitates the identification of complex relationships across variables. ML workflows involve an orderly set of instructions using automated, unbiased &#x2018;learning&#x2019; processes usually targeted towards developing (training) a model that can be validated in a separate (test) dataset (<xref ref-type="bibr" rid="B1">1</xref>). One goal of ML algorithms is to analyze big data to identify variables that cannot be recognized through conventional biostatistical techniques, and enhance development of predictive algorithms (<xref ref-type="bibr" rid="B2">2</xref>, <xref ref-type="bibr" rid="B3">3</xref>).</p>
<p>Currently, several ML algorithms are available to researchers handling big data in omics-based high content analyses. These can be broadly divided into two categories: supervised and unsupervised algorithms (<xref ref-type="bibr" rid="B4">4</xref>). Supervised methods (such as decision tree) derive relationships between one dependent and multiple independent variables using a training set and then apply that knowledge in the testing set for predictive/efficacy analysis. Unsupervised methods derive patterns/data clusters amongst all available variables. ML algorithms have been used to unravel patterns/clustering in high-density transcriptome analyses (<xref ref-type="bibr" rid="B5">5</xref>) or to build associations (<xref ref-type="bibr" rid="B6">6</xref>) or for predictions in several biological processes such as determining DNA methylation states in single cells (<xref ref-type="bibr" rid="B7">7</xref>), identifying signatures of lipid or metabolite species (<xref ref-type="bibr" rid="B8">8</xref>) or microRNAs (<xref ref-type="bibr" rid="B9">9</xref>) in predicting transition from gestational diabetes to type 2 diabetes as well as in genetic studies (<xref ref-type="bibr" rid="B10">10</xref>). There are multiple ML algorithms available and it may present a challenge to select the most appropriate method for a particular dataset to answer a specific question. We, therefore, decided to compare different ML methodologies to (i) rank different ML methods for their performance on a large dataset (of 490,855,065 scRNA-sequencing data points) and (ii) understand the most important variables associated with insulin transcription.</p>
<p>Previous studies (<xref ref-type="bibr" rid="B11">11</xref>&#x2013;<xref ref-type="bibr" rid="B14">14</xref>) from several laboratories have identified master regulatory transcription factors that regulate the embryonic development of insulin-producing islet &#x3b2;-cells. Although transcription factor-mediated insulin transcription regulation is a well-known mechanism during the development of insulin-producing cells, it is also recognized that active genes localized on different chromosomal regions can dynamically regulate gene transcription in post-natal life (<xref ref-type="bibr" rid="B15">15</xref>). One approach to identify genes associated with insulin gene transcription is through single-cell (sc)RNA-seq-based big data analysis.</p>
<p>Here, we examined the performance of 10 different ML algorithms in a curated human pancreatic single-cell sequencing dataset of 490,855,065 data points (N=14,565 single cells and 33,701 expressed gene features). The aims of this study were (i) to provide a comparative account of the predictive potential of 10 different commonly used ML workflows (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>), and (ii) to use existing scRNA-seq datasets in identifying genes (variables) associated with or important for determining insulin transcript-containing cells.</p>
</sec>
<sec id="s2" sec-type="materials|methods">
<title>Materials and Methods</title>
<sec id="s2_1">
<title>Pancreatic Single-Cell (sc)RNA Sequencing Datasets and Analyses</title>
<sec id="s2_1_1">
<title>Human Pancreatic Single-Cell Sequencing Datasets</title>
<p>The pancreatic single-cell sequencing dataset (N=14,890) was extracted using the Panc8 data (<xref ref-type="bibr" rid="B16">16</xref>) containing multiple publicly available scRNA-seq transcriptomes (GSE84133, GSE85241, E-MTAB-5061,GSE81076, GSE86469). The original publications citing the listed GEO datasets (GSE84133, GSE85241, E-MTAB-5061, GSE81076, GSE86469) add up to a total of 31 pancreas samples across all the studies. Clinical and/or donor details are available for 26 of these samples; seven of which were indicated to be from donors with type 2 diabetes. The number of cell types in the combined single cell dataset (Panc8) can be found in the metadata of the SeuratData (version 0.2.1), using the command &#x201c;panc8@meta.data&#x201d; (in R studio version 1.2.5033). This panc8 dataset contains scRNA-seq data from acinar (n=1864), activated stellate (n=474), alpha-(n=4615), beta-(n=3679), delta-(n=1013), ductal (n=1954), endothelial (n=296), epsilon (n=30), gamma (n=625), macrophage (n=79), mast (n=56), quiescent stellate (n=180) and schwann (n=25) cell transcriptomes. Analysis was carried out by using R studio version 1.2.5033 as detailed in SOM.</p>
</sec>
<sec id="s2_1_2">
<title>Ire1&#x3b1;<sup>&#x3b2;-/-</sup> Mouse Pancreatic Single-Cell Dataset</title>
<p>Single-cell RNA-seq dataset from pancreatic islets of Ire1&#x3b1;<sup>fl/fl</sup> (N=1,163 single-cell transcriptomes from one mouse) and Ire1&#x3b1;<sup>&#x3b2;&#x2013;/&#x2013;</sup> (N=1,683 single-cell transcriptomes from two mice) were obtained through GSE144471 (<xref ref-type="bibr" rid="B17">17</xref>). The &#x3b2;-cells (Ire1&#x3b1;<sup>fl/fl</sup>: 830 cells; Ire1&#x3b1;<sup>&#x3b2;&#x2013;/&#x2013;</sup>: 816 cells) were separated from the dataset and the expression values of selected genes were evaluated in the &#x3b2;-cell population.</p>
</sec>
<sec id="s2_1_3">
<title>T2D Pancreatic Single-Cell (sc)RNA Sequencing Dataset</title>
<p>Pancreatic single cell normalized read dataset of adult ND (with no diabetes; N=4) and T2D (N=10) donors were obtained from GSE154126 (<xref ref-type="bibr" rid="B18">18</xref>). The adult ND (N=296) and T2D (N=505) insulin transcribing cells were compared and used for validation. In this dataset, insulin-transcribing cells were identified and defined as any single cell that contained (non-zero) <italic>INS</italic> transcript.</p>
</sec>
<sec id="s2_1_4">
<title>Human Pancreatic Single-Cell Sequencing Classification and Analyses</title>
<p>Deidentified datasets were shared with data scientists. A random number generator function was used to allocate 80% of samples to a training set. Analyses were carried using Python (Ver:3.4), wherein the data was imported, transposed, edited to delete INS and INS-IGF2 columns from the data frame and labeled (label=0 where INS=0 and label=1 where INS&gt;0). Classifiers were initialized and model trained using the discovery (80%) data set. Predictive analyses were then carried out on the validation (20%) set and the resulting accuracy metrics were saved to compare the feature importance. Selected classifiers (Random Forest, Gradient Boosting, Decision Tree Classifier, Logistic Regression, Multinomial Naive Bayes Classifier, ADA Boost Classifier, Linear Discriminant Analysis, Ridge Classifier, KNeighbors Classifier and Linear Support Vector Classifier) were used on the same set.</p>
</sec>
</sec>
<sec id="s2_2">
<title>Pancreatic Islet RNA Sequencing Dataset and Analysis</title>
<sec id="s2_2_1">
<title>Human Pancreatic Islet Bulk RNA Sequencing Dataset</title>
<p>Human pancreatic islet RNA-seq dataset was obtained from GSE152111 (<xref ref-type="bibr" rid="B6">6</xref>). RNA-seq dataset contains n=66 human islet samples, across 65 different donors with no diabetes. Two of the 66 RNA-seq samples were duplicates from the same donor and their RNA-seq profile highly correlative (Pearson r=0.99) to each other. The average of the duplicates of this donor was calculated prior to analysis. Data was analyzed in DEseq values. DEseq values are the normalized RNA-seq. DEseq compares the different read depths between samples by estimating the effective library size (using the estimate size factors function). The size factor for each sample is the median raw count of a gene&#x2019;s geometric mean across all samples. DEseq normalization involves dividing the raw count of a gene in a sample by the size factor. The implementation of DEseq have been described previously (<xref ref-type="bibr" rid="B19">19</xref>).</p>
</sec>
</sec>
<sec id="s2_3">
<title>Pathway Analysis</title>
<p>To analyze enrichment for &#x3b2;-cell pathways, lists of pancreatic single-cell features generated/predicted by ML algorithms (Random forest, Gradient boosting, Decision tree classifier and ADA Boost classifier) were compared with &#x3b2; cell-expressed genes (E-GEOD-20966) using Gene Ontology (GO) over-representation analysis on Pantherdb.org (<xref ref-type="bibr" rid="B20">20</xref>). Preanalytical workflows included cleaning up entries not mapping to protein-coding gene symbols in E-GEOD-20966. Gene lists for each ML algorithm consisted of up to the top 100 genes as predictors of insulin expression, which were compared against the data set of &#x3b2;-cell expressed genes (N=13,165 from E-GEOD-20966). Overrepresentation analysis using GO categories for biological processes (GO: BP) was performed using binomial testing using false-detection-rate to correct for multiple testing. Lists of significantly enriched pathways associated with each ML algorithm were compared using Venn diagrams (<xref ref-type="bibr" rid="B21">21</xref>).</p>
</sec>
<sec id="s2_4">
<title>Statistical Analysis</title>
<p>The R software (ver. 3.6.1; R Foundation for Statistical Computing, Vienna, Austria) was used to create the categorical bubble plot using the packages ggplot2 (3.3.3), ggpubr (0.4.0) and proto (1.0.0). Spearman correlation matrix analysis was generated through using R packages corrplot (0.90), Hmisc (4.6.0), dplyr (1.0.7) and readxl (1.3.1) in R and Rstudio software. Statistical software, Microsoft Excel (ver. 2016; Microsoft, Redmond, WA, USA), the R software and/or GraphPad Prism (ver. 8.4.1; GraphPad Software, San Diego, CA, USA) were used for univariate test comparisons and Benjamini-Hochberg method for multiple testing.</p>
</sec>
</sec>
<sec id="s3" sec-type="results">
<title>Results</title>
<sec id="s3_1">
<title>Machine Learning (ML) Algorithms Yield Varying Performance Outputs</title>
<p>The scRNA-seq data were obtained from public databanks (GSE84133, GSE85241, E-MTAB-5061, GSE81076, GSE86469) of human pancreatic single-cell transcriptomes. We first randomized this available pancreatic scRNA-seq transcriptomic data and allocated 80% of samples to a discovery/training set (Training; N=11,652 samples) and remaining into a validation/testing set (Test; N=2,913 samples). With the availability of several ML algorithms (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>), we probed the discovery dataset using 10 different ML workflows (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1A</bold>
</xref>) to identify features highly associated with the presence of insulin transcripts in a single cell. Genes (features) identified as the most important/predictive variables for each of these ML workflows were used to identify insulin transcript-containing cells from the validation set (remainder 20% of the samples). Validation results of the identified gene features from each of the 10 ML workflows are presented in the form of receiver operator characteristic (ROC) curves (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1B</bold>
</xref>). The top three ML algorithms; Gradient boosting, Random Forest and ADA boost (all Ensemble workflows), demonstrated similar performance returning an Area Under Curve (AUC) of between 0.83 &#x2013; 0.86. A confusion matrix is presented below each ROC curve dataset (<xref ref-type="fig" rid="f1">
<bold>Figure&#xa0;1B</bold>
</xref>) to demonstrate the false-positive and false-negative predictions within every workflow. These analyses show that although Ensemble machine learning workflows are the best in predicting insulin-transcribing cells, other workflows, such as logistic regression, also perform closely to the Ensemble methods.</p>
<fig id="f1" position="float">
<label>Figure&#xa0;1</label>
<caption>
<p>Study design and performance of different ML workflows. A flowchart of our analytical plan is presented in <bold>(A)</bold>. Previously published datasets of single-cell RNA-sequencing analyses from pancreatic islet cell preparations were randomly divided into a training (N = 11,652) and a validation (N = 2,913) set. The learning phase (Training) involved identifying features (genes) and their associated weights/coefficients in each of the 10 machine learning (ML) methods (listed 1-10). Weighted features were used in the prediction of insulin transcription (across 10 ML algorithms) to test the performance of these models in an independent validation set of samples (N = 2,913). ROC curve plots for each ML algorithm using validation set data are presented in <bold>(B)</bold>. The area under the curve (AUC) for the tested workflows are presented along with a confusion matrix below the plot. Percent values are rounded off to the nearest integer (and hence may not sum up to an absolute 100%) and represent true negative (red), true positive (green), false positive (yellow) and false negative (blue) samples identified in the validation set.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-13-853863-g001.tif"/>
</fig>
</sec>
<sec id="s3_2">
<title>Ensemble ML Workflows to Identify Genes Associated With Insulin Transcription</title>
<p>The scRNA-seq datasets obtained from public databanks of human pancreatic single-cell transcriptomes were classified as insulin-transcribing (1) or those with no insulin (0) (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2A</bold>
</xref>). As described earlier, all the three Ensemble ML workflows presented with an AUC that was better than any of the other ML workflows tested in our ROC curve analysis. Ensemble workflows also presented with high accuracy (&#x2265;87%), precision (&#x2265;0.89), and sensitivity (&#x2265;0.95), which was comparable to other popular workflows such as logistic regression (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>). As Ensemble ML workflows such as Random Forest use a collection of decision trees (forest), we decided to compare the performance of the top three (Ensemble) workflows to a single (Decision tree) algorithm. The relative contribution of the top 10 features (genes) from each of these ML workflows are presented as radar plots (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>), whilst the longer list of genes ranked by their importance is presented in <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;2</bold>
</xref>. <italic>IAPP</italic>, <italic>ADCYAP1</italic>, <italic>LDHA</italic> and <italic>SST</italic> were common to all three Ensemble workflows. We then examined the pathways targeted by these features (genes) identified through each of the Ensemble and Decision Tree classifier by comparing them to a separate islet &#x3b2;-cell dataset (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2D</bold>
</xref>). Number of GO terms enriched across all four ML workflows (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2D</bold>
</xref>) suggests several common pathways (including insulin secretion) targeted by the features identified through these analysis (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2D</bold>
</xref> and <xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;3</bold>
</xref>). These genes were also validated in a bulk RNA-seq dataset (GSE152111, n=66) of human islet samples (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;1</bold>
</xref>). In this analysis (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;1</bold>
</xref>), most of these gene transcripts had significant positive correlation with insulin transcript. While some of the gene transcripts such as <italic>LDHA</italic>, <italic>CRP</italic>, <italic>RPS15</italic> and <italic>RPL35</italic> negatively correlated with insulin transcript in human islets.</p>
<fig id="f2" position="float">
<label>Figure&#xa0;2</label>
<caption>
<p>Performance and application of learned features in understanding insulin gene transcription. <bold>(A)</bold> A 2D clustering of pancreatic single cells assessed in this study using UMAP (Uniform Manifold Approximation and Projection plot). Cellular subtypes based on the UMAP clustering algorithm are labeled and graded (scale, inset) as per the level of insulin gene transcripts. <bold>(B)</bold> The performance of learning models on accurately identifying insulin-positive (1) and insulin-negative (0) single cells from the validation dataset are presented. <bold>(C)</bold> Relative weighted rank contributions of the top 10 genes in each of the four listed ML algorithms are presented as spider plots plotted in the order of importance (starting clockwise at 12-O&#x2019;clock position). Percent representation of each of the genes indicates their relative contribution in the set on the spider plot with a logarithmic scale (center=1% and outer circle=100%). A comparison of the gene features identified by the top three ensemble workflows is presented along with those identified by the Decision Tree classifier. <bold>(D)</bold> Pathways targeted by up to the top 100 features (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;2</bold>
</xref>) from each of the four selected ML methods (RF, Random Forest; GB, Gradient Boosting; ADAB, ADA Boost; DT, Decision Tree) identified using gene ontology (GO) function analysis are presented in the Venn diagram. Number of GO terms enriched and common for top features (genes) in each ML method are plotted. <bold>(E)</bold> All significantly dysregulated genes identified from and common to the four ML algorithms <bold>(C)</bold> presented herein were assessed in the scRNA-seq dataset from Ire1&#x3b1;<sup>&#x3b2;-/-</sup> mice. Bubble plot presenting fold-change and statistical significance (q-value) for each of the genes in Ire1&#x3b1;<sup>fl/fl</sup> and Ire1&#x3b1;<sup>&#x3b2;&#x2013;/&#x2013;</sup> mice are shown. Blue color represents downregulation while red color indicates increased abundance of transcripts in Ire1&#x3b1;<sup>&#x3b2;-/-</sup> mice compared to control.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-13-853863-g002.tif"/>
</fig>
</sec>
<sec id="s3_3">
<title>Insulin-Associated Genes Are Dysregulated During &#x3b2;-Cell Dedifferentiation</title>
<p>Dedifferentiation of &#x3b2;-cells, characterized by the loss of expression of key &#x3b2;-cell maturation marker genes with an accompanying reduction in insulin secretion, has been observed in mouse models of type 1 (T1D) and type 2 (T2D) diabetes, as well as in individuals with diabetes (<xref ref-type="bibr" rid="B22">22</xref>&#x2013;<xref ref-type="bibr" rid="B25">25</xref>). We questioned if the expression of gene variables identified and validated (<italic>in silico</italic>) as being associated with insulin gene transcription (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>) are dysregulated in a mouse model of T1D with evidence of islet dedifferentiation. Transient dedifferentiation of islet &#x3b2;-cells was recently reported in an established T1D preclinical mouse model upon &#x3b2;-cell-specific deletion of a key stress response gene, <italic>Ire1&#x3b1;</italic>, (Ire1&#x3b1;<sup>&#x3b2;-/-</sup>) (<xref ref-type="bibr" rid="B17">17</xref>). These mice also demonstrated reduced &#x3b2;-cell number as well as diminished expression of insulin transcripts in &#x3b2;-cells compared to control (Ire-1&#x3b1;<sup>fl/fl</sup>) mice. Therefore, we evaluated the expression of the total 25 gene transcripts that made up the top 10 features across the four different ML workflows (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>) in the single cell datasets generated from these (Ire1&#x3b1;<sup>&#x3b2;-/-</sup> and Ire1&#x3b1;<sup>fl/fl</sup>) islets. Twelve of these features were not significantly different between Ire1&#x3b1;<sup>&#x3b2;-/-</sup> and Ire1&#x3b1;<sup>fl/fl</sup> islets. However, the remaining thirteen features were significantly dysregulated in &#x3b2;-cells of Ire-1&#x3b1;<sup>&#x3b2;-/-</sup> mice that were undergoing dedifferentiation (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2E</bold>
</xref>). Dedifferentiating &#x3b2;-cells showed significant downregulation of five key genes; <italic>Iapp</italic>, <italic>MafA</italic>, <italic>Pcsk1n</italic>, <italic>Atp5e</italic> and <italic>Ldha</italic>, whilst all other insulin-associated gene transcripts showed significantly higher levels (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2E</bold>
</xref>).</p>
<p>In type 2 diabetes (T2D), it is known that <italic>INS</italic> transcript expression is reduced. Therefore we validated the top, common gene features (<italic>IAPP</italic>, <italic>SST</italic>, <italic>MAFA</italic>, <italic>ADCYAP1</italic> and <italic>LDHA</italic>) from the three ML workflows using a separate publicly available single-cell RNA-seq dataset from non-diabetic (ND) vs T2D adult human pancreas (GSE154126 (<xref ref-type="bibr" rid="B18">18</xref>)). Four of the five genes (<italic>IAPP</italic>, <italic>SST</italic>, <italic>MAFA</italic>, <italic>ADCYAP1</italic>), were significantly lower in T2D insulin-transcribing cells compared to ND insulin-transcribing cells (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;4</bold>
</xref>).</p>
</sec>
</sec>
<sec id="s4" sec-type="discussion">
<title>Discussion</title>
<p>In this study, we compared the performance characteristics of 10 different ML algorithms, (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;1</bold>
</xref>) that are currently used in big data analyses. We analyzed a scRNA-seq dataset that was randomly split to a larger (80%; 392,684,052 data points) training set involving model learning, and then a smaller (20%; 98,171,013 data points) validation set. All algorithms identified a set of genes (features) that associate with insulin-production (1) defined as the presence of one or more transcripts of insulin in a sample, or no insulin production (0) from the 11,652 single cells analyzed in the training test. We validated the predictive features identified through each ML workflow in the validation/test set of 2,913 single cell transcriptomes. ML workflows that returned high performance (based on AUC, sensitivity/specificity) were selected and the top 10 genes (ranked by their importance) in each of those ML methods were re-validated in discrete mouse and human datasets that model beta cell dedifferentiation (summarized in <xref ref-type="fig" rid="f3">
<bold>Figure&#xa0;3</bold>
</xref>).</p>
<fig id="f3" position="float">
<label>Figure&#xa0;3</label>
<caption>
<p>A summary of study design and results. Workflow and findings of our study are presented in this schematic, which illustrates the steps in discovery and validation of the important gene features associated with insulin transcript in pancreatic single cell transcriptomes. We further confirm these features to be dysregulated during &#x3b2;-cell dedifferentiation in a T1D mouse model and in individuals with T2D.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fendo-13-853863-g003.tif"/>
</fig>
<p>Our analysis provides two major outcomes that are of interest to a broad range of data analysts and biologists. First, a comparison of the ML algorithms identified Ensemble-based ML methods as the best performing algorithms in our analyses. Logistic regression performed closest to Ensemble methods, in line with previous reports in clinical datasets (<xref ref-type="bibr" rid="B26">26</xref>). We then compared Ensemble methodologies to the Decision tree algorithm. Decision tree offers the often-desired simplistic model generation method as compared to Ensemble methods such as Random Forest. The latter builds multiple decision trees independently and offers an overall learning model that is closest to the best possible prediction. Indeed, Decision tree was determined to be a weaker predictor than the Random Forest as the latter reduces variance using different sample sets (bootstrap) in training, randomizing feature subsets, and combining the predictive learning by building multiple decision trees. Random Forest prediction outcomes were similar to gradient boosting, which also builds a set of decision trees, but one tree at a time. The bagging and boosting approach used in ADA/Gradient boosting methods seems to have offered better accuracy and performance in insulin prediction analysis than those observed using Random Forest, whereas the Random Forest algorithm offered the highest sensitivity (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2B</bold>
</xref>) amongst all methodologies tested.</p>
<p>The other outcome from this analysis is the identification of genes that are associated with and predictive of insulin gene transcription in single cells. Since bulk RNA-sequencing studies do not offer the desired single-cell resolution to identify transcriptional regulation at a cellular level, our analyses provide a firsthand view of insulin gene transcriptional determinants identified through an unbiased, big data machine learning approach. The top three methodologies (based on high AUC values) belonged to Ensemble machine learning workflow. Weighted relative importance of the top-10 most important features are compared (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>). Interestingly, five genes were common to the top 10 features from all the algorithms compared &#x2013; <italic>IAPP, ADCYAP1, MAFA, SST</italic> and <italic>LDHA</italic>. The top-ranked gene associated with insulin gene transcription across all the Ensemble workflows was <italic>IAPP</italic>. Islet amyloid polypeptide (<italic>IAPP</italic>) and insulin are known to be expressed in pancreatic islet &#x3b2;-cells and co-secreted in response to changes in glucose concentration (<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B28">28</xref>). Their mRNA levels are also regulated by glucose. The promoters of both these genes share similar cis-acting sequence elements, and both bind the master regulatory transcription factor <italic>PDX1 (</italic>
<xref ref-type="bibr" rid="B27">27</xref>). <italic>FoxA2</italic> (<italic>HNF-3&#x3b2;</italic>) negatively regulates <italic>IAPP</italic> promoter activity (<xref ref-type="bibr" rid="B29">29</xref>) and has also been shown to suppress insulin gene expression (<xref ref-type="bibr" rid="B30">30</xref>). Although insulin gene expression is known to be regulated by several islet-enriched transcription factors, <italic>MafA</italic> is the most well recognized &#x3b2;-cell-specific activator of insulin gene expression (<xref ref-type="bibr" rid="B31">31</xref>). The selection of <italic>MAFA</italic> as a key feature by three of the compared ML approaches tested through this analysis is therefore not surprising. The inclusion of <italic>SST</italic> in the top three gene features is intriguing. Somatostatin expression is known to be important in control of insulin release and ablation of somatostatin-expressing delta cells impairs pancreatic islet function and cause neonatal death in rodents (<xref ref-type="bibr" rid="B32">32</xref>). SST analogs were shown to inhibit the release of insulin <italic>via</italic> the activation of both ATP sensitive K+ channels and G protein-coupled inward rectifier K+ channels (<xref ref-type="bibr" rid="B33">33</xref>). Another candidate that was identified through these analyses is <italic>MTRNR2L8</italic>, a neuroprotective and antiapoptotic peptide derived from a portion of the mitochondrial <italic>MT-RNR2</italic> gene and reported in fetal as well as adult beta cells (<xref ref-type="bibr" rid="B34">34</xref>). <italic>ADCYAP1</italic> stimulates insulin secretion in a glucose-dependent manner (<xref ref-type="bibr" rid="B35">35</xref>) and genetic screening in T2D Caucasians indicated the presence of two SNPs in exons 3 and 5 of this gene to be associated with T2D (<xref ref-type="bibr" rid="B36">36</xref>). Finally, <italic>LDHA</italic>, which was also selected through these unbiased analyses across the top-three ML workflows is a pancreatic &#x3b2;-cell disallowed gene (<xref ref-type="bibr" rid="B37">37</xref>&#x2013;<xref ref-type="bibr" rid="B39">39</xref>) and human <italic>LDHA</italic> levels are predictive of insulin transcription (<xref ref-type="bibr" rid="B40">40</xref>). Consistent with these previous reports, our validation analysis in human islets RNA-seq data, demonstrated negative correlation of <italic>LDHA</italic> and positive correlation of <italic>ADCYAP1</italic>, <italic>MAFA</italic> and <italic>SST</italic> transcripts with insulin (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Figure&#xa0;1</bold>
</xref>). Together, these algorithms help in identifying a set of genes expressed in or disallowed from insulin-producing pancreatic &#x3b2;-cells.</p>
<p>Mouse models often provide the validation to understand mechanisms that cannot be tested in human studies. The Ire1&#x3b1;<sup>&#x3b2;-/-</sup> mouse offers a unique model, wherein pancreatic &#x3b2;-cells transiently dedifferentiate during early post-natal life, allowing these knockout mice to escape immune-mediated &#x3b2;-cell destruction and T1D in later life (<xref ref-type="bibr" rid="B17">17</xref>). Analysis of islet single cell sequencing data from this model identified genes that were significantly dysregulated in &#x3b2;-cells of Ire1&#x3b1;&#x3b2;-/- mice when compared to control (Ire1&#x3b1;<sup>fl/fl</sup>) mice. Eight of thirteen features (from the top 10 features in each of the four ML workflows, <xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2C</bold>
</xref>), which showed significant dysregulation between Ire1&#x3b1;<sup>&#x3b2;-/-</sup> and Ire1&#x3b1;<sup>&#x3b2;fl/fl</sup> mice are upregulated in &#x3b2;-cells of Ire1&#x3b1;<sup>&#x3b2;-/-</sup> mice (<xref ref-type="fig" rid="f2">
<bold>Figure&#xa0;2E</bold>
</xref>). Analysis of T2D islet single cell data also revealed down-regulation of four common gene features (<italic>IAPP, SST, MAFA</italic> and <italic>ADCYAP1</italic> identified across our three top ML workflows) in T2D compared to ND insulin transcribing cells (<xref ref-type="supplementary-material" rid="SM1">
<bold>Supplementary Table&#xa0;4</bold>
</xref>). Interestingly, Delta Like Non-Canonical Notch Ligand 1 (<italic>DLK1</italic>) was also significantly downregulated in T2D compared to ND insulin transcribing cells (Mann-Whitney test P-value=0.0017). The imprinted region of chromosome 14q32.2, contains microRNA cluster of <italic>DLK1</italic>-<italic>MEG3</italic> which are highly expressed and more specific in human &#x3b2;-cells compared to &#x3b1;-cells. Previous study had also shown that in T2D human islets, the <italic>MEG3</italic>-microRNA locus expression levels are significantly lower (<xref ref-type="bibr" rid="B41">41</xref>). The 14q32 locus of microRNAs (such as co-expression of miR-376a and miR-432) also have been shown to target and suppress the expression of <italic>IAPP (</italic>
<xref ref-type="bibr" rid="B41">41</xref>), that was one of the top features in our analyses.</p>
<sec id="s4_1">
<title>Strength and Limitation</title>
<p>This is a first demonstration comparing multiple ML algorithms to identify key genes associated with insulin transcription using a big dataset of over 490 million data points. As anticipated, Ensemble methods perform better than most other workflows and identified a set of genes that corroborate with previous reports of transcriptional regulation of insulin in mouse and human &#x3b2;-cells. These findings indicate that unbiased ML workflows for big data analyses can generate biologically meaningful results, when applied to large training datasets. Our study provides the codes/scripts for other researchers to use in existing as well as emerging datasets for identification of gene candidates associated with other genetic pathways (e.g., related to <italic>GCG</italic> or <italic>GCK</italic>) in future or to genes recognized to be associated with T2D GWAS datasets. We recognize that there are several limitations: we are unsure as to why some other well known candidates (such as <italic>PDX1</italic>, and <italic>NEUROD</italic>) were not selected by our top predictive models. An explanation is that we used a whole pancreatic single cell dataset and that the predictive models generated through filtering out &#x3b2;-cells may be more enriched for known pro-endocrine gene regulators such as <italic>PDX1</italic>. The other explanation is that although <italic>PDX1</italic> is a key regulator, the transcript levels in these datasets using multiple scRNA-seq technologies may not be sufficient considering the sequencing depth offered by some of these scRNA-seq workflows. It would be of interest to explore &#x3b2;-cell factors associated with insulin transcript levels through subset analyses in &#x3b2;-cell types. This is becoming increasingly important to the islet community as differences in insulin transcripts across islet &#x3b2;-cells [i.e., &#x3b2;-cell heterogeneity (<xref ref-type="bibr" rid="B42">42</xref>)] may drive optimal &#x3b2;-cell function (<xref ref-type="bibr" rid="B43">43</xref>) as well as diabetes progression.</p>
<p>We recognize that exhaustive [e.g., LOOCV (<xref ref-type="bibr" rid="B44">44</xref>)] as well as non-exhaustive cross-validation approaches [such as K-fold cross-validation (<xref ref-type="bibr" rid="B45">45</xref>)] were not performed here. Such cross-validation approaches, although useful in assessing how results will generalize to an independent dataset, are mostly used in the validation of much smaller datasets. In big data analyses, the use of such cross-validation methodologies would limit the analyses to only those with an access to high-end cluster computing. The 10 different ML scripts used in these analyses are designed to work on a high-end personal computing device (i7 processor with 4 cores and 32GB RAM or better). We believe that the application of such ML algorithms to the expanding scRNA-seq datasets would lead to the confirmation/validation of current as well as identification of determinants of gene transcription, thereby accelerating innovation in discovery of gene targets in biology and medicine.</p>
</sec>
</sec>
<sec id="s5" sec-type="data-availability">
<title>Data Availability Statement</title>
<p>Publicly available datasets were analyzed in this study. This data can be found here: NCBI, GEO, GSE152111 GSE144471, Panc8 (GSE84133, GSE85241, E-MTAB-5061, GSE81076, GSE86469). The data codes/scripts are available through <uri xlink:href="https://github.com/Isletbiology/ML">https://github.com/Isletbiology/ML</uri>.</p>
</sec>
<sec id="s6" sec-type="author-contributions">
<title>Author Contributions</title>
<p>Conceptualization: AH. Methodology: AH, AP, SB, and LD. Software: VT, MJ, WW, CD, HL, FE, and LD. Validation: WW, MJ, VT, CD, and AB. Data curation: WW, MJ, VT, CD, HL, YC, WH, FE, LD, and AH. Writing&#x2014;original draft: AH, MJ, WW, and LD. Review and editing: all authors. Visualization: AH. Supervision: AH. Project administration: AH. Funding acquisition: AH. All authors contributed to the article and approved the submitted version.</p>
</sec>
<sec id="s7" sec-type="funding-information">
<title>Funding</title>
<p>The research presented herein has been funded through grants from the Australian Research Council Future Fellowship (FT110100254) the Juvenile Diabetes Research Foundation (JDRF) Australia T1D Clinical Research Network (JDRF/4-CDA2016-228-MB) and the Visiting Professorships (2016-18 and 2019-22) from the Danish Diabetes Academy, funded by the Novo Nordisk Foundation, grant number NNF17SA0031406 to AH. WW is supported through the Leona M. and Harry B. Helmsley Charitable Trust (Grant 2018PG-T1D009) in collaboration with the JDRF Australian Type 1 Diabetes Clinical Research Network funding (Grant 3-SRA-2019-694-M-B) to AH. MJ was supported through a JDRF International Advanced Post-doctoral award (3-APF-2016-178-A-N) and currently a transition award from JDRFI (1-FAC-2021-1063-A-N). AH acknowledges the support through the Ainsworth Medical Research Fund, Western Sydney University School of Medicine, Australia. FE was supported by a grant from the JDRF-5-CDA-2014-184-A-N and currently through grants from NIH NIDDK (1R01DK130919-01 and 1R56DK128136-01). HL is supported by NIH National Research Service Award T32 GM007215.</p>
</sec>
<sec id="s8" sec-type="COI-statement">
<title>Conflict of Interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="s9" sec-type="disclaimer">
<title>Publisher&#x2019;s Note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
</body>
<back>
<ack>
<title>Acknowledgments</title>
<p>AH acknowledges the initial interactions with Tune Pers, University of Copenhagen, Denmark.</p>
</ack>
<sec id="s10" sec-type="supplementary-material">
<title>Supplementary Material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fendo.2022.853863/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fendo.2022.853863/full#supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zou</surname> <given-names>J</given-names>
</name>
<name>
<surname>Huss</surname> <given-names>M</given-names>
</name>
<name>
<surname>Abid</surname> <given-names>A</given-names>
</name>
<name>
<surname>Mohammadi</surname> <given-names>P</given-names>
</name>
<name>
<surname>Torkamani</surname> <given-names>A</given-names>
</name>
<name>
<surname>Telenti</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>A Primer on Deep Learning in Genomics</article-title>. <source>Nat Genet</source> (<year>2019</year>) <volume>51</volume>:<page-range>12&#x2013;8</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s41588-018-0295-5</pub-id>
</citation>
</ref>
<ref id="B2">
<label>2</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Culos</surname> <given-names>A</given-names>
</name>
<name>
<surname>Tsai</surname> <given-names>AS</given-names>
</name>
<name>
<surname>Stanley</surname> <given-names>N</given-names>
</name>
<name>
<surname>Becker</surname> <given-names>M</given-names>
</name>
<name>
<surname>Ghaemi</surname> <given-names>MS</given-names>
</name>
<name>
<surname>McIlwain</surname> <given-names>DR</given-names>
</name>
<etal/>
</person-group>. <article-title>Integration of Mechanistic Immunological Knowledge Into a Machine Learning Pipeline Improves Predictions</article-title>. <source>Nat Mach Intell</source> (<year>2020</year>) <volume>2</volume>:<page-range>619&#x2013;28</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s42256-020-00232-8</pub-id>
</citation>
</ref>
<ref id="B3">
<label>3</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zheng</surname> <given-names>A</given-names>
</name>
<name>
<surname>Lamkin</surname> <given-names>M</given-names>
</name>
<name>
<surname>Zhao</surname> <given-names>H</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>C</given-names>
</name>
<name>
<surname>Su</surname> <given-names>H</given-names>
</name>
<name>
<surname>Gymrek</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Deep Neural Networks Identify Sequence Context Features Predictive of Transcription Factor Binding</article-title>. <source>Nat Mach Intell</source> (<year>2021</year>) <volume>3</volume>:<page-range>172&#x2013;80</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s42256-020-00282-y</pub-id>
</citation>
</ref>
<ref id="B4">
<label>4</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xu</surname> <given-names>C</given-names>
</name>
<name>
<surname>Jackson</surname> <given-names>SA</given-names>
</name>
</person-group>. <article-title>Machine Learning and Complex Biological Data</article-title>. <source>Genome Biol</source> (<year>2019</year>) <volume>20</volume>:<fpage>76</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13059-019-1689-0</pub-id>
</citation>
</ref>
<ref id="B5">
<label>5</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hu</surname> <given-names>J</given-names>
</name>
<name>
<surname>Li</surname> <given-names>X</given-names>
</name>
<name>
<surname>Hu</surname> <given-names>G</given-names>
</name>
<name>
<surname>Lyu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Susztak</surname> <given-names>K</given-names>
</name>
<name>
<surname>Li</surname> <given-names>M</given-names>
</name>
</person-group>. <article-title>Iterative Transfer Learning With Neural Network for Clustering and Cell Type Classification in Single-Cell RNA-Seq Analysis</article-title>. <source>Nat Mach Intell</source> (<year>2020</year>) <volume>2</volume>:<page-range>607&#x2013;18</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s42256-020-00233-7</pub-id>
</citation>
</ref>
<ref id="B6">
<label>6</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wong</surname> <given-names>WKM</given-names>
</name>
<name>
<surname>Joglekar</surname> <given-names>MV</given-names>
</name>
<name>
<surname>Saini</surname> <given-names>V</given-names>
</name>
<name>
<surname>Jiang</surname> <given-names>G</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>CX</given-names>
</name>
<name>
<surname>Chaitarvornkit</surname> <given-names>A</given-names>
</name>
<etal/>
</person-group>. <article-title>Machine Learning Workflows Identify a microRNA Signature of Insulin Transcription in Human Tissues</article-title>. <source>iScience</source> (<year>2021</year>) <volume>24</volume>:<fpage>102379</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.isci.2021.102379</pub-id>
</citation>
</ref>
<ref id="B7">
<label>7</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Angermueller</surname> <given-names>C</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>HJ</given-names>
</name>
<name>
<surname>Reik</surname> <given-names>W</given-names>
</name>
<name>
<surname>Stegle</surname> <given-names>O</given-names>
</name>
</person-group>. <article-title>DeepCpG: Accurate Prediction of Single-Cell DNA Methylation States Using Deep Learning</article-title>. <source>Genome Biol</source> (<year>2017</year>) <volume>18</volume>:<fpage>67</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s13059-017-1189-z</pub-id>
</citation>
</ref>
<ref id="B8">
<label>8</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lai</surname> <given-names>M</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Ronnett</surname> <given-names>GV</given-names>
</name>
<name>
<surname>Wu</surname> <given-names>A</given-names>
</name>
<name>
<surname>Cox</surname> <given-names>BJ</given-names>
</name>
<name>
<surname>Dai</surname> <given-names>FF</given-names>
</name>
<etal/>
</person-group>. <article-title>Amino Acid and Lipid Metabolism in Post-Gestational Diabetes and Progression to Type 2 Diabetes: A Metabolic Profiling Study</article-title>. <source>PloS Med</source> (<year>2020</year>) <volume>17</volume>:<fpage>e1003112</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pmed.1003112</pub-id>
</citation>
</ref>
<ref id="B9">
<label>9</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Joglekar</surname> <given-names>MV</given-names>
</name>
<name>
<surname>Wong</surname> <given-names>WKM</given-names>
</name>
<name>
<surname>Ema</surname> <given-names>FK</given-names>
</name>
<name>
<surname>Georgiou</surname> <given-names>HM</given-names>
</name>
<name>
<surname>Shub</surname> <given-names>A</given-names>
</name>
<name>
<surname>Hardikar</surname> <given-names>AA</given-names>
</name>
<etal/>
</person-group>. <article-title>Postpartum Circulating microRNA Enhances Prediction of Future Type 2 Diabetes in Women With Previous Gestational Diabetes</article-title>. <source>Diabetologia</source> (<year>2021</year>) <volume>64</volume>:<page-range>1516&#x2013;26</page-range>. doi: <pub-id pub-id-type="doi">10.1007/s00125-021-05429-z</pub-id>
</citation>
</ref>
<ref id="B10">
<label>10</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schrider</surname> <given-names>DR</given-names>
</name>
<name>
<surname>Ayroles</surname> <given-names>J</given-names>
</name>
<name>
<surname>Matute</surname> <given-names>DR</given-names>
</name>
<name>
<surname>Kern</surname> <given-names>AD</given-names>
</name>
</person-group>. <article-title>Supervised Machine Learning Reveals Introgressed Loci in the Genomes of Drosophila Simulans and D</article-title>. <source>Sechellia PloS Genet</source> (<year>2018</year>) <volume>14</volume>:<fpage>e1007341</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pgen.1007341</pub-id>
</citation>
</ref>
<ref id="B11">
<label>11</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stoffers</surname> <given-names>DA</given-names>
</name>
<name>
<surname>Zinkin</surname> <given-names>NT</given-names>
</name>
<name>
<surname>Stanojevic</surname> <given-names>V</given-names>
</name>
<name>
<surname>Clarke</surname> <given-names>WL</given-names>
</name>
<name>
<surname>Habener</surname> <given-names>JF</given-names>
</name>
</person-group>. <article-title>Pancreatic Agenesis Attributable to a Single Nucleotide Deletion in the Human IPF1 Gene Coding Sequence</article-title>. <source>Nat Genet</source> (<year>1997</year>) <volume>15</volume>:<page-range>106&#x2013;10</page-range>. doi: <pub-id pub-id-type="doi">10.1038/ng0197-106</pub-id>
</citation>
</ref>
<ref id="B12">
<label>12</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Harrison</surname> <given-names>KA</given-names>
</name>
<name>
<surname>Thaler</surname> <given-names>J</given-names>
</name>
<name>
<surname>Pfaff</surname> <given-names>SL</given-names>
</name>
<name>
<surname>Gu</surname> <given-names>H</given-names>
</name>
<name>
<surname>Kehrl</surname> <given-names>JH</given-names>
</name>
</person-group>. <article-title>Pancreas Dorsal Lobe Agenesis and Abnormal Islets of Langerhans in Hlxb9-Deficient Mice</article-title>. <source>Nat Genet</source> (<year>1999</year>) <volume>23</volume>:<page-range>71&#x2013;5</page-range>. doi: <pub-id pub-id-type="doi">10.1038/12674</pub-id>
</citation>
</ref>
<ref id="B13">
<label>13</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Oliver-Krasinski</surname> <given-names>JM</given-names>
</name>
<name>
<surname>Stoffers</surname> <given-names>DA</given-names>
</name>
</person-group>. <article-title>On the Origin of the Beta Cell</article-title>. <source>Genes Dev</source> (<year>2008</year>) <volume>22</volume>:<fpage>1998</fpage>&#x2013;<lpage>2021</lpage>. doi: <pub-id pub-id-type="doi">10.1101/gad.1670808</pub-id>
</citation>
</ref>
<ref id="B14">
<label>14</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Doyle</surname> <given-names>MJ</given-names>
</name>
<name>
<surname>Sussel</surname> <given-names>L</given-names>
</name>
</person-group>. <article-title>Nkx2.2 Regulates Beta-Cell Function in the Mature Islet</article-title>. <source>Diabetes</source> (<year>2007</year>) <volume>56</volume>:<fpage>1999</fpage>&#x2013;<lpage>2007</lpage>. doi: <pub-id pub-id-type="doi">10.2337/db06-1766</pub-id>
</citation>
</ref>
<ref id="B15">
<label>15</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Osborne</surname> <given-names>CS</given-names>
</name>
<name>
<surname>Chakalova</surname> <given-names>L</given-names>
</name>
<name>
<surname>Brown</surname> <given-names>KE</given-names>
</name>
<name>
<surname>Carter</surname> <given-names>D</given-names>
</name>
<name>
<surname>Horton</surname> <given-names>A</given-names>
</name>
<name>
<surname>Debrand</surname> <given-names>E</given-names>
</name>
<etal/>
</person-group>. <article-title>Active Genes Dynamically Colocalize to Shared Sites of Ongoing Transcription</article-title>. <source>Nat Genet</source> (<year>2004</year>) <volume>36</volume>:<page-range>1065&#x2013;71</page-range>. doi: <pub-id pub-id-type="doi">10.1038/ng1423</pub-id>
</citation>
</ref>
<ref id="B16">
<label>16</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Stuart</surname> <given-names>T</given-names>
</name>
<name>
<surname>Butler</surname> <given-names>A</given-names>
</name>
<name>
<surname>Hoffman</surname> <given-names>P</given-names>
</name>
<name>
<surname>Hafemeister</surname> <given-names>C</given-names>
</name>
<name>
<surname>Papalexi</surname> <given-names>E</given-names>
</name>
<name>
<surname>Mauck</surname> <given-names>WM</given-names>
</name>
<etal/>
</person-group>. <article-title>Comprehensive Integration of Single-Cell Data</article-title>. <source>Cell</source> (<year>2019</year>) <volume>177</volume>:<fpage>1888</fpage>&#x2013;<lpage>902.e21</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cell.2019.05.031</pub-id>
</citation>
</ref>
<ref id="B17">
<label>17</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname> <given-names>H</given-names>
</name>
<name>
<surname>Lee</surname> <given-names>YS</given-names>
</name>
<name>
<surname>Harenda</surname> <given-names>Q</given-names>
</name>
<name>
<surname>Pietrzak</surname> <given-names>S</given-names>
</name>
<name>
<surname>Oktay</surname> <given-names>HZ</given-names>
</name>
<name>
<surname>Schreiber</surname> <given-names>S</given-names>
</name>
<etal/>
</person-group>. <article-title>Beta Cell Dedifferentiation Induced by IRE1alpha Deletion Prevents Type 1 Diabetes</article-title>. <source>Cell Metab</source> (<year>2020</year>) <volume>31</volume>:<fpage>822</fpage>&#x2013;<lpage>36.e5</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.cmet.2020.03.002</pub-id>
</citation>
</ref>
<ref id="B18">
<label>18</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Avrahami</surname> <given-names>D</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>YJ</given-names>
</name>
<name>
<surname>Schug</surname> <given-names>J</given-names>
</name>
<name>
<surname>Feleke</surname> <given-names>E</given-names>
</name>
<name>
<surname>Gao</surname> <given-names>L</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C</given-names>
</name>
<etal/>
</person-group>. <article-title>Single-Cell Transcriptomics of Human Islet Ontogeny Defines the Molecular Basis of Beta-Cell Dedifferentiation in T2D</article-title>. <source>Mol Metab</source> (<year>2020</year>) <volume>42</volume>:<fpage>101057</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.molmet.2020.101057</pub-id>
</citation>
</ref>
<ref id="B19">
<label>19</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Anders</surname> <given-names>S</given-names>
</name>
<name>
<surname>Huber</surname> <given-names>W</given-names>
</name>
</person-group>. <article-title>Differential Expression Analysis for Sequence Count Data</article-title>. <source>Genome Biol</source> (<year>2010</year>) <volume>11</volume>:<fpage>R106</fpage>. doi: <pub-id pub-id-type="doi">10.1186/gb-2010-11-10-r106</pub-id>
</citation>
</ref>
<ref id="B20">
<label>20</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mi</surname> <given-names>H</given-names>
</name>
<name>
<surname>Muruganujan</surname> <given-names>A</given-names>
</name>
<name>
<surname>Huang</surname> <given-names>X</given-names>
</name>
<name>
<surname>Ebert</surname> <given-names>D</given-names>
</name>
<name>
<surname>Mills</surname> <given-names>C</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>X</given-names>
</name>
<etal/>
</person-group>. <article-title>Protocol Update for Large-Scale Genome and Gene Function Analysis With the PANTHER Classification System (V</article-title>. <source>14.0) Nat Protoc</source> (<year>2019</year>) <volume>14</volume>:<page-range>703&#x2013;21</page-range>. doi: <pub-id pub-id-type="doi">10.1038/s41596-019-0128-8</pub-id>
</citation>
</ref>
<ref id="B21">
<label>21</label>
<citation citation-type="web">
<person-group person-group-type="author">
<name>
<surname>Oliveros</surname> <given-names>JC</given-names>
</name>
</person-group>. <source>Venny: An Interactive Tool for Comparing Lists With Venn&#x2019;s Diagrams</source>. Available at: <uri xlink:href="https://bioinfogp.cnb.csic.es/tools/venny/index.html">https://bioinfogp.cnb.csic.es/tools/venny/index.html</uri>.</citation>
</ref>
<ref id="B22">
<label>22</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>YJ</given-names>
</name>
<name>
<surname>Schug</surname> <given-names>J</given-names>
</name>
<name>
<surname>Won</surname> <given-names>KJ</given-names>
</name>
<name>
<surname>Liu</surname> <given-names>C</given-names>
</name>
<name>
<surname>Naji</surname> <given-names>A</given-names>
</name>
<name>
<surname>Avrahami</surname> <given-names>D</given-names>
</name>
<etal/>
</person-group>. <article-title>Single-Cell Transcriptomics of the Human Endocrine Pancreas</article-title>. <source>Diabetes</source> (<year>2016</year>) <volume>65</volume>:<page-range>3028&#x2013;38</page-range>. doi: <pub-id pub-id-type="doi">10.2337/db16-0405</pub-id>
</citation>
</ref>
<ref id="B23">
<label>23</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cinti</surname> <given-names>F</given-names>
</name>
<name>
<surname>Bouchi</surname> <given-names>R</given-names>
</name>
<name>
<surname>Kim-Muller</surname> <given-names>JY</given-names>
</name>
<name>
<surname>Ohmura</surname> <given-names>Y</given-names>
</name>
<name>
<surname>Sandoval</surname> <given-names>PR</given-names>
</name>
<name>
<surname>Masini</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>Evidence of Beta-Cell Dedifferentiation in Human Type 2 Diabetes</article-title>. <source>J Clin Endocrinol Metab</source> (<year>2016</year>) <volume>101</volume>:<page-range>1044&#x2013;54</page-range>. doi: <pub-id pub-id-type="doi">10.1210/jc.2015-2860</pub-id>
</citation>
</ref>
<ref id="B24">
<label>24</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guo</surname> <given-names>S</given-names>
</name>
<name>
<surname>Dai</surname> <given-names>C</given-names>
</name>
<name>
<surname>Guo</surname> <given-names>M</given-names>
</name>
<name>
<surname>Taylor</surname> <given-names>B</given-names>
</name>
<name>
<surname>Harmon</surname> <given-names>JS</given-names>
</name>
<name>
<surname>Sander</surname> <given-names>M</given-names>
</name>
<etal/>
</person-group>. <article-title>Inactivation of Specific Beta Cell Transcription Factors in Type 2 Diabetes</article-title>. <source>J Clin Invest</source> (<year>2013</year>) <volume>123</volume>:<page-range>3305&#x2013;16</page-range>. doi: <pub-id pub-id-type="doi">10.1172/JCI65390</pub-id>
</citation>
</ref>
<ref id="B25">
<label>25</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Weir</surname> <given-names>GC</given-names>
</name>
<name>
<surname>Bonner-Weir</surname> <given-names>S</given-names>
</name>
</person-group>. <article-title>Five Stages of Evolving Beta-Cell Dysfunction During Progression to Diabetes</article-title>. <source>Diabetes</source> (<year>2004</year>) <volume>53 Suppl 3</volume>:<page-range>S16&#x2013;21</page-range>. doi: <pub-id pub-id-type="doi">10.2337/diabetes.53.suppl_3.S16</pub-id>
</citation>
</ref>
<ref id="B26">
<label>26</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lynam</surname> <given-names>AL</given-names>
</name>
<name>
<surname>Dennis</surname> <given-names>JM</given-names>
</name>
<name>
<surname>Owen</surname> <given-names>KR</given-names>
</name>
<name>
<surname>Oram</surname> <given-names>RA</given-names>
</name>
<name>
<surname>Jones</surname> <given-names>AG</given-names>
</name>
<name>
<surname>Shields</surname> <given-names>BM</given-names>
</name>
<etal/>
</person-group>. <article-title>Logistic Regression has Similar Performance to Optimised Machine Learning Algorithms in a Clinical Setting: Application to the Discrimination Between Type 1 and Type 2 Diabetes in Young Adults</article-title>. <source>Diagn Progn Res</source> (<year>2020</year>) <volume>4</volume>:<fpage>6</fpage>. doi: <pub-id pub-id-type="doi">10.1186/s41512-020-00075-2</pub-id>
</citation>
</ref>
<ref id="B27">
<label>27</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Macfarlane</surname> <given-names>WM</given-names>
</name>
<name>
<surname>Campbell</surname> <given-names>SC</given-names>
</name>
<name>
<surname>Elrick</surname> <given-names>LJ</given-names>
</name>
<name>
<surname>Oates</surname> <given-names>V</given-names>
</name>
<name>
<surname>Bermano</surname> <given-names>G</given-names>
</name>
<name>
<surname>Lindley</surname> <given-names>KJ</given-names>
</name>
<etal/>
</person-group>. <article-title>Glucose Regulates Islet Amyloid Polypeptide Gene Transcription in a PDX1- and Calcium-Dependent Manner</article-title>. <source>J Biol Chem</source> (<year>2000</year>) <volume>275</volume>:<page-range>15330&#x2013;5</page-range>. doi: <pub-id pub-id-type="doi">10.1074/jbc.M908045199</pub-id>
</citation>
</ref>
<ref id="B28">
<label>28</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mulder</surname> <given-names>H</given-names>
</name>
<name>
<surname>Ahren</surname> <given-names>B</given-names>
</name>
<name>
<surname>Sundler</surname> <given-names>F</given-names>
</name>
</person-group>. <article-title>Islet Amyloid Polypeptide and Insulin Gene Expression Are Regulated in Parallel by Glucose <italic>In Vivo</italic> in Rats</article-title>. <source>Am J Physiol</source> (<year>1996</year>) <volume>271</volume>:<page-range>E1008&#x2013;14</page-range>. doi: <pub-id pub-id-type="doi">10.1152/ajpendo.1996.271.6.E1008</pub-id>
</citation>
</ref>
<ref id="B29">
<label>29</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shepherd</surname> <given-names>LM</given-names>
</name>
<name>
<surname>Campbell</surname> <given-names>SC</given-names>
</name>
<name>
<surname>Macfarlane</surname> <given-names>WM</given-names>
</name>
</person-group>. <article-title>Transcriptional Regulation of the IAPP Gene in Pancreatic Beta-Cells</article-title>. <source>Biochim Biophys Acta</source> (<year>2004</year>) <volume>1681</volume>:<fpage>28</fpage>&#x2013;<lpage>37</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.bbaexp.2004.09.009</pub-id>
</citation>
</ref>
<ref id="B30">
<label>30</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname> <given-names>H</given-names>
</name>
<name>
<surname>Gauthier</surname> <given-names>BR</given-names>
</name>
<name>
<surname>Hagenfeldt-Johansson</surname> <given-names>KA</given-names>
</name>
<name>
<surname>Iezzi</surname> <given-names>M</given-names>
</name>
<name>
<surname>Wollheim</surname> <given-names>CB</given-names>
</name>
</person-group>. <article-title>Foxa2 (HNF3beta) Controls Multiple Genes Implicated in Metabolism-Secretion Coupling of Glucose-Induced Insulin Release</article-title>. <source>J Biol Chem</source> (<year>2002</year>) <volume>277</volume>:<page-range>17564&#x2013;70</page-range>. doi: <pub-id pub-id-type="doi">10.1074/jbc.M111037200</pub-id>
</citation>
</ref>
<ref id="B31">
<label>31</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Matsuoka</surname> <given-names>TA</given-names>
</name>
<name>
<surname>Artner</surname> <given-names>I</given-names>
</name>
<name>
<surname>Henderson</surname> <given-names>E</given-names>
</name>
<name>
<surname>Means</surname> <given-names>A</given-names>
</name>
<name>
<surname>Sander</surname> <given-names>M</given-names>
</name>
<name>
<surname>Stein</surname> <given-names>R</given-names>
</name>
</person-group>. <article-title>The MafA Transcription Factor Appears to be Responsible for Tissue-Specific Expression of Insulin</article-title>. <source>Proc Natl Acad Sci USA</source> (<year>2004</year>) <volume>101</volume>:<page-range>2930&#x2013;3</page-range>. doi: <pub-id pub-id-type="doi">10.1073/pnas.0306233101</pub-id>
</citation>
</ref>
<ref id="B32">
<label>32</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Badi</surname> <given-names>I</given-names>
</name>
<name>
<surname>Mancinelli</surname> <given-names>L</given-names>
</name>
<name>
<surname>Polizzotto</surname> <given-names>A</given-names>
</name>
<name>
<surname>Ferri</surname> <given-names>D</given-names>
</name>
<name>
<surname>Zeni</surname> <given-names>F</given-names>
</name>
<name>
<surname>Burba</surname> <given-names>I</given-names>
</name>
<etal/>
</person-group>. <article-title>miR-34a Promotes Vascular Smooth Muscle Cell Calcification by Downregulating SIRT1 (Sirtuin 1) and Axl (AXL Receptor Tyrosine Kinase)</article-title>. <source>Arterioscler Thromb Vasc Biol</source> (<year>2018</year>) <volume>38</volume>:<page-range>2079&#x2013;90</page-range>. doi: <pub-id pub-id-type="doi">10.1161/ATVBAHA.118.311298</pub-id>
</citation>
</ref>
<ref id="B33">
<label>33</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smith</surname> <given-names>PA</given-names>
</name>
<name>
<surname>Sellers</surname> <given-names>LA</given-names>
</name>
<name>
<surname>Humphrey</surname> <given-names>PP</given-names>
</name>
</person-group>. <article-title>Somatostatin Activates Two Types of Inwardly Rectifying K+ Channels in MIN-6 Cells</article-title>. <source>J Physiol</source> (<year>2001</year>) <volume>532</volume>:<page-range>127&#x2013;42</page-range>. doi: <pub-id pub-id-type="doi">10.1111/j.1469-7793.2001.0127g.x</pub-id>
</citation>
</ref>
<ref id="B34">
<label>34</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Blodgett</surname> <given-names>DM</given-names>
</name>
<name>
<surname>Nowosielska</surname> <given-names>A</given-names>
</name>
<name>
<surname>Afik</surname> <given-names>S</given-names>
</name>
<name>
<surname>Pechhold</surname> <given-names>S</given-names>
</name>
<name>
<surname>Cura</surname> <given-names>AJ</given-names>
</name>
<name>
<surname>Kennedy</surname> <given-names>NJ</given-names>
</name>
<etal/>
</person-group>. <article-title>Novel Observations From Next-Generation RNA Sequencing of Highly Purified Human Adult and Fetal Islet Cell Subsets</article-title>. <source>Diabetes</source> (<year>2015</year>) <volume>64</volume>:<page-range>3172&#x2013;81</page-range>. doi: <pub-id pub-id-type="doi">10.2337/db15-0039</pub-id>
</citation>
</ref>
<ref id="B35">
<label>35</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Filipsson</surname> <given-names>K</given-names>
</name>
<name>
<surname>Kvist-Reimer</surname> <given-names>M</given-names>
</name>
<name>
<surname>Ahren</surname> <given-names>B</given-names>
</name>
</person-group>. <article-title>The Neuropeptide Pituitary Adenylate Cyclase-Activating Polypeptide and Islet Function</article-title>. <source>Diabetes</source> (<year>2001</year>) <volume>50</volume>:<page-range>1959&#x2013;69</page-range>. doi: <pub-id pub-id-type="doi">10.2337/diabetes.50.9.1959</pub-id>
</citation>
</ref>
<ref id="B36">
<label>36</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname> <given-names>HF</given-names>
</name>
</person-group>. <article-title>Genetic Variation Screening and Association Studies of the Adenylate Cyclase Activating Polypeptide 1 (ADCYAP1) Gene in Patients With Type 2 Diabetes</article-title>. <source>Hum Mutat</source> (<year>2002</year>) <volume>19</volume>:<page-range>572&#x2013;3</page-range>. doi: <pub-id pub-id-type="doi">10.1002/humu.9034</pub-id>
</citation>
</ref>
<ref id="B37">
<label>37</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rutter</surname> <given-names>GA</given-names>
</name>
<name>
<surname>Pullen</surname> <given-names>TJ</given-names>
</name>
<name>
<surname>Hodson</surname> <given-names>DJ</given-names>
</name>
<name>
<surname>Martinez-Sanchez</surname> <given-names>A</given-names>
</name>
</person-group>. <article-title>Pancreatic Beta-Cell Identity, Glucose Sensing and the Control of Insulin Secretion</article-title>. <source>Biochem J</source> (<year>2015</year>) <volume>466</volume>:<page-range>203&#x2013;18</page-range>. doi: <pub-id pub-id-type="doi">10.1042/BJ20141384</pub-id>
</citation>
</ref>
<ref id="B38">
<label>38</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rutter</surname> <given-names>GA</given-names>
</name>
<name>
<surname>Pullen</surname> <given-names>TJ</given-names>
</name>
</person-group>. <article-title>Comment on: Schuit Et Al. Beta-Cell-Specific Gene Repression: A Mechanism to Protect Against Inappropriate or Maladjusted Insulin Secretion</article-title>? <source>Diabetes</source> (<year>2012</year>) <volume>61</volume>:<page-range>969&#x2013;75</page-range>. doi: <pub-id pub-id-type="doi">10.2337/db12-0775</pub-id>
</citation>
</ref>
<ref id="B39">
<label>39</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Schuit</surname> <given-names>F</given-names>
</name>
<name>
<surname>Van Lommel</surname> <given-names>L</given-names>
</name>
<name>
<surname>Granvik</surname> <given-names>M</given-names>
</name>
<name>
<surname>Goyvaerts</surname> <given-names>L</given-names>
</name>
<name>
<surname>de Faudeur</surname> <given-names>G</given-names>
</name>
<name>
<surname>Schraenen</surname> <given-names>A</given-names>
</name>
<etal/>
</person-group>. <article-title>Beta-Cell-Specific Gene Repression: A Mechanism to Protect Against Inappropriate or Maladjusted Insulin Secretion</article-title>? <source>Diabetes</source> (<year>2012</year>) <volume>61</volume>:<page-range>969&#x2013;75</page-range>. doi: <pub-id pub-id-type="doi">10.2337/db11-1564</pub-id>
</citation>
</ref>
<ref id="B40">
<label>40</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cantley</surname> <given-names>J</given-names>
</name>
<name>
<surname>Walters</surname> <given-names>SN</given-names>
</name>
<name>
<surname>Jung</surname> <given-names>MH</given-names>
</name>
<name>
<surname>Weinberg</surname> <given-names>A</given-names>
</name>
<name>
<surname>Cowley</surname> <given-names>MJ</given-names>
</name>
<name>
<surname>Whitworth</surname> <given-names>TP</given-names>
</name>
<etal/>
</person-group>. <article-title>A Preexistent Hypoxic Gene Signature Predicts Impaired Islet Graft Function and Glucose Homeostasis</article-title>. <source>Cell Transplant</source> (<year>2013</year>) <volume>22</volume>:<page-range>2147&#x2013;59</page-range>. doi: <pub-id pub-id-type="doi">10.3727/096368912X658728</pub-id>
</citation>
</ref>
<ref id="B41">
<label>41</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kameswaran</surname> <given-names>V</given-names>
</name>
<name>
<surname>Bramswig</surname> <given-names>NC</given-names>
</name>
<name>
<surname>McKenna</surname> <given-names>LB</given-names>
</name>
<name>
<surname>Penn</surname> <given-names>M</given-names>
</name>
<name>
<surname>Schug</surname> <given-names>J</given-names>
</name>
<name>
<surname>Hand</surname> <given-names>NJ</given-names>
</name>
<etal/>
</person-group>. <article-title>Epigenetic Regulation of the DLK1-MEG3 microRNA Cluster in Human Type 2 Diabetic Islets</article-title>. <source>Cell Metab</source> (<year>2014</year>) <volume>19</volume>:<page-range>135&#x2013;45</page-range>. doi: <pub-id pub-id-type="doi">10.1016/j.cmet.2013.11.016</pub-id>
</citation>
</ref>
<ref id="B42">
<label>42</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Joglekar</surname> <given-names>MV</given-names>
</name>
<name>
<surname>Dong</surname> <given-names>CX</given-names>
</name>
<name>
<surname>Wong</surname> <given-names>WKM</given-names>
</name>
<name>
<surname>Dalgaard</surname> <given-names>LT</given-names>
</name>
<name>
<surname>Hardikar</surname> <given-names>AA</given-names>
</name>
</person-group>. <article-title>A Bird&#x2019;s Eye View of the Dynamics of Pancreatic Beta-Cell Heterogeneity</article-title>. <source>Acta Physiol (Oxf)</source> (<year>2021</year>) <volume>233</volume>:<fpage>e13664</fpage>. doi: <pub-id pub-id-type="doi">10.1111/apha.13664</pub-id>
</citation>
</ref>
<ref id="B43">
<label>43</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benninger</surname> <given-names>RKP</given-names>
</name>
<name>
<surname>Hodson</surname> <given-names>DJ</given-names>
</name>
</person-group>. <article-title>New Understanding of Beta-Cell Heterogeneity and <italic>In Situ</italic> Islet Function</article-title>. <source>Diabetes</source> (<year>2018</year>) <volume>67</volume>:<page-range>537&#x2013;47</page-range>. doi: <pub-id pub-id-type="doi">10.2337/dbi17-0040</pub-id>
</citation>
</ref>
<ref id="B44">
<label>44</label>
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zou</surname> <given-names>M</given-names>
</name>
<name>
<surname>Zhang</surname> <given-names>PJ</given-names>
</name>
<name>
<surname>Wen</surname> <given-names>XY</given-names>
</name>
<name>
<surname>Chen</surname> <given-names>L</given-names>
</name>
<name>
<surname>Tian</surname> <given-names>YP</given-names>
</name>
<name>
<surname>Wang</surname> <given-names>Y</given-names>
</name>
</person-group>. <article-title>A Novel Mixed Integer Programming for Multi-Biomarker Panel Identification by Distinguishing Malignant From Benign Colorectal Tumors</article-title>. <source>Methods</source> (<year>2015</year>) <volume>83</volume>:<fpage>3</fpage>&#x2013;<lpage>17</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.ymeth.2015.05.011</pub-id>
</citation>
</ref>
<ref id="B45">
<label>45</label>
<citation citation-type="book">
<person-group person-group-type="author">
<name>
<surname>Dankers</surname> <given-names>F</given-names>
</name>
<name>
<surname>Traverso</surname> <given-names>A</given-names>
</name>
<name>
<surname>Wee</surname> <given-names>L</given-names>
</name>
<name>
<surname>van Kuijk</surname> <given-names>SMJ</given-names>
</name>
</person-group>. <article-title>Prediction Modeling Methodology</article-title>. In: <person-group person-group-type="author">
<name>
<surname>Kubben</surname> <given-names>P</given-names>
</name>
<name>
<surname>Dumontier</surname> <given-names>M</given-names>
</name>
<name>
<surname>Dekker</surname> <given-names>A</given-names>
</name>
</person-group>, editors. <source>Fundamentals of Clinical Data Science</source>. <publisher-loc>Cham (CH)</publisher-loc> (<year>2019</year>). p. <page-range>101&#x2013;20</page-range>.</citation>
</ref>
</ref-list>
</back>
</article>