<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.3" xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Genet.</journal-id>
<journal-title>Frontiers in Genetics</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Genet.</abbrev-journal-title>
<issn pub-type="epub">1664-8021</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1135260</article-id>
<article-id pub-id-type="doi">10.3389/fgene.2023.1135260</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Genetics</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>MSC-CSMC: A multi-objective semi-supervised clustering algorithm based on constraints selection and multi-source constraints for gene expression data</article-title>
<alt-title alt-title-type="left-running-head">Wang et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fgene.2023.1135260">10.3389/fgene.2023.1135260</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name>
<surname>Wang</surname>
<given-names>Zeyuan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2091377/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gu</surname>
<given-names>Hong</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/703448/overview"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Zhao</surname>
<given-names>Minghui</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Li</surname>
<given-names>Dan</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2217423/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Wang</surname>
<given-names>Jia</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/703145/overview"/>
</contrib>
</contrib-group>
<aff id="aff1">
<sup>1</sup>
<institution>Faculty of Electronic Information and Electrical Engineering</institution>, <institution>Dalian University of Technology</institution>, <addr-line>Dalian</addr-line>, <addr-line>Liaoning</addr-line>, <country>China</country>
</aff>
<aff id="aff2">
<sup>2</sup>
<institution>Department of Breast Surgery</institution>, <institution>Second Hospital of Dalian Medical University</institution>, <addr-line>Dalian</addr-line>, <addr-line>Liaoning</addr-line>, <country>China</country>
</aff>
<author-notes>
<fn fn-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1805195/overview">Suyan Tian</ext-link>, Jilin University, China</p>
</fn>
<fn fn-type="edited-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/995307/overview">Guojun Liu</ext-link>, Xi&#x2019;an University of Finance and Economics, China</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1058883/overview">Changjing Zhuge</ext-link>, Beijing University of Technology, China</p>
</fn>
<corresp id="c001">&#x2a;Correspondence: Dan Li, <email>ldan@dlut.edu.cn</email>; Jia Wang, <email>wangjia77@hotmail.com</email>
</corresp>
<fn fn-type="other">
<p>This article was submitted to Computational Genomics, a section of the journal Frontiers in Genetics</p>
</fn>
</author-notes>
<pub-date pub-type="epub">
<day>27</day>
<month>02</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>14</volume>
<elocation-id>1135260</elocation-id>
<history>
<date date-type="received">
<day>31</day>
<month>12</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>02</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2023 Wang, Gu, Zhao, Li and Wang.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Wang, Gu, Zhao, Li and Wang</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Many clustering techniques have been proposed to group genes based on gene expression data. Among these methods, semi-supervised clustering techniques aim to improve clustering performance by incorporating supervisory information in the form of pairwise constraints. However, noisy constraints inevitably exist in the constraint set obtained on the practical unlabeled dataset, which degenerates the performance of semi-supervised clustering. Moreover, multiple information sources are not integrated into multi-source constraints to improve clustering quality. To this end, the research proposes a new multi-objective semi-supervised clustering algorithm based on constraints selection and multi-source constraints (MSC-CSMC) for unlabeled gene expression data. The proposed method first uses the gene expression data and the gene ontology (GO) that describes gene annotation information to form multi-source constraints. Then, the multi-source constraints are applied to the clustering by improving the constraint violation penalty weight in the semi-supervised clustering objective function. Furthermore, the constraints selection and cluster prototypes are put into the multi-objective evolutionary framework by adopting a mixed chromosome encoding strategy, which can select pairwise constraints suitable for clustering tasks through synergistic optimization to reduce the negative influence of noisy constraints. The proposed MSC-CSMC algorithm is testified using five benchmark gene expression datasets, and the results show that the proposed algorithm achieves superior performance.</p>
</abstract>
<kwd-group>
<kwd>semi-supervised clustering</kwd>
<kwd>constraint selection</kwd>
<kwd>multi-source constraints</kwd>
<kwd>gene expression data</kwd>
<kwd>multi-objective optimization</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="s1">
<title>1 Introduction</title>
<p>The rapid development of microarray technology has generated a large amount of gene expression data and mining the inherent patterns in the massive gene expression data is a major challenge in the current bioinformatics field (<xref ref-type="bibr" rid="B5">Bandyopadhyay et al., 2007</xref>; <xref ref-type="bibr" rid="B29">Pirooznia et al., 2008</xref>). As an important unsupervised data mining method, clustering has become a powerful tool for gene expression data analysis. One of the main tasks of gene expression data clustering is to identify co-expressed genomes, which is a useful tool for further research on gene function (<xref ref-type="bibr" rid="B5">Bandyopadhyay et al., 2007</xref>; <xref ref-type="bibr" rid="B9">Chen et al., 2019</xref>). Compared with the unsupervised clustering methods, the semi-supervised clustering methods use prior information to guide the clustering process through data labels or pairwise constraints, which can effectively improve the performance of clustering (<xref ref-type="bibr" rid="B37">Wagstaff et al., 2001</xref>; <xref ref-type="bibr" rid="B8">Bilenko et al., 2004</xref>; <xref ref-type="bibr" rid="B40">Yin et al., 2010</xref>).</p>
<p>For semi-supervised clustering algorithms, the pairwise constraints are usually used to describe if two data belong to the same cluster. Specifically, the must-link constraint (ML) means that two data must be divided into the same cluster, and the cannot-link constraint (CL) means that two data must be divided into different clusters. The quality of the selected pairwise constraints is of vital importance, which significantly affects the performance of semi-supervised clustering algorithms (<xref ref-type="bibr" rid="B17">Grira et al., 2008</xref>; <xref ref-type="bibr" rid="B36">Vu et al., 2012</xref>; <xref ref-type="bibr" rid="B25">Masud et al., 2019</xref>; <xref ref-type="bibr" rid="B2">Abin and Vu, 2020</xref>). The pairwise constraints can be generated by directly using part of the known data labels (<xref ref-type="bibr" rid="B20">Lai et al., 2021</xref>) or by using an active learning method (<xref ref-type="bibr" rid="B25">Masud et al., 2019</xref>). In practical, most gene expression data are unlabeled, for which it is impossible to obtain pairwise constraints based on labels. <xref ref-type="bibr" rid="B36">Vu et al. (2012)</xref> indicated that the generation of the pairwise constraints should mainly focus on the data samples on the cluster boundaries, which are more likely to be misclassified. To this end, <xref ref-type="bibr" rid="B6">Basu et al. (2004)</xref> developed a farthest-first traversal scheme-based active learning method to obtain pairwise constraints. However, this method has been reported to be sensitive to noise (<xref ref-type="bibr" rid="B12">Davidson and Qi, 2008</xref>). <xref ref-type="bibr" rid="B17">Grira et al. (2008)</xref> proposed an active learning method to generate pairwise constraints by determining cluster boundary data using membership obtained by fuzzy clustering. <xref ref-type="bibr" rid="B36">Vu et al. (2012)</xref> identified data in sparse regions based on <italic>k</italic>-nearest neighbor graphs and constructed pairwise constraints. However, it was claimed that some pairwise constraints might not be generated by this method (<xref ref-type="bibr" rid="B2">Abin and Vu, 2020</xref>). <xref ref-type="bibr" rid="B23">Liu et al. (2018)</xref> proposed an entropy-based query strategy to select the most uncertain pairwise constraints. <xref ref-type="bibr" rid="B1">Abin (2018)</xref> proposed a random walk approach on the adjacency graph of data for querying informative constraints. <xref ref-type="bibr" rid="B25">Masud et al. (2019)</xref> used local density estimation to identify the most informative objects as pairwise constraints. <xref ref-type="bibr" rid="B2">Abin and Vu (2020)</xref> proposed a density tracking method which takes into account the density relationship between data, and uses the information about boundaries and skeleton of clusters to generate the pairwise constraints.</p>
<p>Although the above methods can automatically mine and learn the pairwise constraints of unlabeled datasets through different approaches, there are inevitably noisy constraints, i.e., constraints inconsistent with the ground-truth clusters, in the obtained pairwise constraints (<xref ref-type="bibr" rid="B40">Yin et al., 2010</xref>; <xref ref-type="bibr" rid="B20">Lai et al., 2021</xref>). However, the existing semi-supervised clustering algorithms are mostly based on the assumption that pairwise constraints conform to real cluster information, and usually susceptible to noisy constraints. Therefore, it is necessary to implement constraints selection, where noisy constraints are filtered out, and only pairwise constraints that are beneficial for semi-supervised clustering are retained. In addition, most of the pairwise-constraints-based semi-supervised clustering algorithms were developed for single-source constraints, i.e., the pairwise constraints are obtained only from the data itself. In real-world applications, many data also possess related domain information. For example, Gene Ontology (GO) (<xref ref-type="bibr" rid="B4">Ashburner et al., 2000</xref>), which describes gene products in terms of their associated biological processes, cellular components and molecular functions, can further provide gene annotation information for gene expression data. In this paper, the multi-source constraints are the pairwise constraints formed by the data itself and domain information. Apparently, compared with the single-source pairwise constraints based solely on gene expression data, the multi-source constraints formed by the fusion of gene ontology can provide more comprehensive information about the structure of gene clusters and help to guide semi-supervised clustering to obtain more accurate clustering results.</p>
<p>Aiming at the unlabeled gene expression data and from the perspective of reducing the negative impact of noisy constraints and integrating multi-source constraints, a method called multi-objective semi-supervised clustering algorithm based on constraints selection and multi-source constraints (MSC-CSMC) is proposed in this research. At first, the proposed algorithm uses gene expression data and GO information to generate multi-source pairwise constraints. Then, under the multi-objective optimization framework of Non-dominated Sorting Genetic Algorithm-II (NSGA-II), the constraints selection and the cluster prototypes are collaboratively optimized to realize the selection of pairwise constraints suitable for clustering with respect to the multi-source constraints and to improve the accuracy of semi-supervised clustering of gene expression data by reducing the negative impact of noisy constraints.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>2 Methods</title>
<p>In this section, the details of our proposed MSC-CSMC algorithm are described. Our proposed method consists of two parts. Firstly, multi-source pairwise constraints are generated by integrating gene expression and gene ontology (GO) information. Then, by using the improved penalty weights as well as mixed chromosome encoding strategy of cluster prototype and constraints selection, multi-objective semi-supervised clustering based on constraints selection and multi-source constraints is performed to identify co-expressed gene groups. The workflow of MSC-CSMC is shown in <xref ref-type="fig" rid="F1">Figure 1</xref>.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Workflow of MSC-CSMC. <bold>(A)</bold> Generation of multi-source pairwise constraints. <bold>(B)</bold> Multi-objective semi-supervised clustering.</p>
</caption>
<graphic xlink:href="fgene-14-1135260-g001.tif"/>
</fig>
<sec id="s2-1">
<title>2.1 Generation of multi-source pairwise constraints</title>
<p>Gene expression data and gene ontology (GO) describe gene-related information from the abundance of mRNA of genes and gene annotation. Compared with the method only using gene expression data, the combination of these two aspects of information can help to further improve the clustering accuracy of gene expression data (<xref ref-type="bibr" rid="B16">Giri and Saha, 2020</xref>; <xref ref-type="bibr" rid="B21">Li et al., 2022</xref>). In this paper, we use gene expression data and gene ontology information to generate multi-source pairwise constraints for semi-supervised clustering.</p>
<p>In view of the superior performance of the density tracking method (<xref ref-type="bibr" rid="B2">Abin and Vu, 2020</xref>), we use this method to generate the initial gene expression constraint set. The method consists of three steps: density estimation, density following, and constraints generation. Let <inline-formula id="inf1">
<mml:math id="m1">
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula> denote a <italic>d</italic>-dimensional gene expression dataset with <italic>n</italic> genes. Gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>&#x2019;s density is obtained by<disp-formula id="e1">
<mml:math id="m2">
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mi>max</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
</mml:math>
<label>(1)</label>
</disp-formula>where <italic>N</italic>
<sub>
<italic>b</italic>
</sub>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>) is the set of <italic>b</italic> nearest genes of gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>; <inline-formula id="inf2">
<mml:math id="m3">
<mml:msub>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:mo>&#x22c5;</mml:mo>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:math>
</inline-formula> is the Euclidean distance. Based on the density in <xref ref-type="disp-formula" rid="e1">Formula 1</xref>, the density tracking method constructs density chains according to the density relationship between data. Specifically, starting from each gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, the closest gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub> &#x2208; <italic>N</italic>
<sub>
<italic>b</italic>
</sub>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>) whose density is greater than that of <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> is selected, and the relation between them is recorded as density chain <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> &#x2192; <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>. Then start from gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub> and continue the above density tracking until there exists no gene whose density is greater than that of the gene at the end of the chain. Consequently, the density chain <italic>Chains</italic> (<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>) can be denoted as <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> &#x2192; <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub> &#x2192; &#x22ef; &#x2192; <bold>
<italic>x</italic>
</bold>
<sub>
<italic>e</italic>
</sub>. After constructing all the density chains, the total times of gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> appearing in all the chains is referred to as centrality and denoted by <italic>Centrality</italic> (<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>). The sum of centrality with respect to all genes in a density chain is used as the centrality of the density chain. All density chains with a common endpoint are considered connected density chains and the points belonging to them are considered to be in the same density group. Besides, the impurity of gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> is defined as follows:<disp-formula id="e2">
<mml:math id="m4">
<mml:mi>I</mml:mi>
<mml:mi>m</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>g</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>G</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
<mml:mi>s</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:munderover>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:munder>
<mml:mi mathvariant="double-struck">I</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>r</mml:mi>
<mml:mi>o</mml:mi>
<mml:mi>u</mml:mi>
<mml:mi>p</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>g</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#xd7;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>D</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>t</mml:mi>
<mml:mi>y</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>e</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(2)</label>
</disp-formula>with &#x7c;<italic>Groups</italic>&#x7c; being the total number of groups, <italic>S</italic> &#x3d; {<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> &#x222a; <italic>N</italic>
<sub>
<italic>b</italic>
</sub>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>)}, <italic>Group</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>) being the group index of <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>, <inline-formula id="inf3">
<mml:math id="m5">
<mml:mi mathvariant="double-struck">I</mml:mi>
</mml:math>
</inline-formula> being the indictor function.</p>
<p>According to the density, impurity, density chain, and density group of the data, the density tracking method proposes three assumptions for mining informative pairwise constraints. Let &#x3a9; denote the pairwise constraint set, whose elements satisfy the following key assumptions: (1) providing feasible information about the boundary data of clusters; (2) providing feasible information about the boundary between various clusters; (3) providing feasible information about the skeleton of clusters. Among them, assumptions (1) and (3) are used to generate the must-link constraint set &#x3a9;<sub>
<italic>ML</italic>
</sub>, assumption (2) is used to generate the cannot-link constraint set &#x3a9;<sub>
<italic>CL</italic>
</sub>. With the subsets &#x3a9;<sub>
<italic>ML</italic>
</sub> and &#x3a9;<sub>
<italic>CL</italic>
</sub>, the penalization can be constructed for the cost function of the clustering. The workflow of density tracking method is given in <xref ref-type="fig" rid="F2">Figure 2</xref>. The initial gene expression constraint set &#x3a9; &#x3d; &#x3a9;<sub>
<italic>ML</italic>
</sub> &#x222a; &#x3a9;<sub>
<italic>CL</italic>
</sub> is generated as follows.<list list-type="simple">
<list-item>
<p>1. For each gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, calculate its <italic>Density</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>) and <italic>Impurity</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>). Construct density chain <italic>Chains</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>) and density group <italic>Group</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>), get the centrality of density chain. Initialize &#x3a9;<sub>
<italic>ML</italic>
</sub> &#x3d; &#x2205;, &#x3a9;<sub>
<italic>CL</italic>
</sub> &#x3d; &#x2205;;</p>
</list-item>
<list-item>
<p>2. Select gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> in descending order of <italic>Impurity</italic> (<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>), query the nearest neighbor gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub> that is not in its density group <italic>Group</italic> (<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>), and add the pairwise constraint (<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>) into the cannot-link constraint set, i.e., &#x3a9;<sub>
<italic>CL</italic>
</sub> &#x3d; &#x3a9;<sub>
<italic>CL</italic>
</sub> &#x222a; {(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>)}.</p>
</list-item>
<list-item>
<p>3. Select gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> in descending order of <italic>Impurity</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>), and find the next gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub> along its density chain <italic>Chains</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>). Let <italic>&#x25b;</italic> &#x3e; 0 denote the density drop rate. If <italic>Density</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>) &#x2265;<italic>&#x25b;</italic>&#xd7; <italic>Density</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>e</italic>
</sub>), then add the pairwise constraint (<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>) to the must-link constraint set, i.e., &#x3a9;<sub>
<italic>ML</italic>
</sub> &#x3d; &#x3a9;<sub>
<italic>ML</italic>
</sub> &#x222a; {(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>)};</p>
</list-item>
<list-item>
<p>4. Select the density chain <italic>Chains</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>) in descending order of the centrality of the density chain, start from the starting gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, select the gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub> with an interval, and add the pairwise constraint (<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>) to the must-link constraint set, i.e., &#x3a9;<sub>
<italic>ML</italic>
</sub> &#x3d; &#x3a9;<sub>
<italic>ML</italic>
</sub> &#x222a; {(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>)}.</p>
</list-item>
</list>
</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Workflow of density tracking method.</p>
</caption>
<graphic xlink:href="fgene-14-1135260-g002.tif"/>
</fig>
<p>For a set of genes to be analyzed, each gene can be annotated with several GO terms. Thus, the functional similarity between genes can be deduced based on the term similarity. In the proposed MSC-CSMC algorithm, we adopt the aggregate information content (AIC) (<xref ref-type="bibr" rid="B35">Song et al., 2014</xref>) to measure the semantic similarity of GO terms <italic>t</italic>
<sub>1</sub> and <italic>t</italic>
<sub>2</sub>:<disp-formula id="e3">
<mml:math id="m6">
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">AIC</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2229;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>W</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mi>V</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mi>V</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(3)</label>
</disp-formula>with<disp-formula id="equ1">
<mml:math id="m7">
<mml:mi>S</mml:mi>
<mml:mi>W</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>exp</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>/</mml:mo>
<mml:mi>I</mml:mi>
<mml:mi>C</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mi>S</mml:mi>
<mml:mi>V</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:munder>
<mml:mi>S</mml:mi>
<mml:mi>W</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>Here, <italic>T</italic>
<sub>
<italic>t</italic>
</sub> is the set of ancestors of term <italic>t</italic> in the GO graph, <italic>p</italic>(<italic>t</italic>) is the frequency of the term appearing in the GO database, <italic>IC</italic>(<italic>t</italic>) &#x3d; &#x2212; log&#x2009; <italic>p</italic>(<italic>t</italic>) is the information content of term <italic>t</italic>. The higher the annotation frequency, the more general the information contained and the smaller the corresponding <italic>IC</italic> value. <italic>SW</italic>(<italic>t</italic>) normalizes the knowledge reflected by 1/<italic>IC</italic>(<italic>t</italic>), describing the semantic weight of term <italic>t</italic>. Consequently, the functional similarity of genes <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> and <bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub> can be obtained as follows:<disp-formula id="e4">
<mml:math id="m8">
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>O</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:munder>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>n</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:munder>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>n</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:munder>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="|" close="|">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>n</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x007C;</mml:mi>
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>n</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mi>&#x007C;</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(4)</label>
</disp-formula>where<disp-formula id="equ2">
<mml:math id="m9">
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi>max</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>n</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:munder>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>i</mml:mi>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>A</mml:mi>
<mml:mi>I</mml:mi>
<mml:mi>C</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</disp-formula>is the similarity of gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> and term <italic>t</italic>
<sub>2</sub>. <italic>ann</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>) and <italic>ann</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>) represent the sets of GO terms that annotate the two genes, respectively. The cardinalities of <italic>ann</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>) and <italic>ann</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>) are denoted by &#x7c;<italic>ann</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>)&#x7c; and &#x7c;<italic>ann</italic>(<bold>
<italic>x</italic>
</bold>
<sub>
<italic>j</italic>
</sub>)&#x7c;, respectively.</p>
<p>The gene function similarity obtained through GO can also reflect the pairwise constraint relationship between genes to a certain extent. In the proposed MSC-CSMC algorithm, gene pairs with a similarity of more than 0.9 constitute the GO must-link constraint set <inline-formula id="inf4">
<mml:math id="m10">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, gene pairs with a similarity less than 0.1 constitute the GO cannot-link constraint set <inline-formula id="inf5">
<mml:math id="m11">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, and then generate the GO pairwise constraint set <inline-formula id="inf6">
<mml:math id="m12">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x222a;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>. Finally, the gene expression pairwise constraint set &#x3a9; and the gene ontology pairwise constraint set &#x3a9;&#x2a; together constitute multi-source constraints for gene clustering.</p>
</sec>
<sec id="s2-2">
<title>2.2 Semi-supervised clustering objective functions based on multi-source constraints</title>
<p>At present, multi-objective optimization has gradually become a mainstream method for solving gene expression data clustering problems, which can achieve better clustering results on gene expression data compared with single-objective optimization methods. In the unsupervised multi-objective clustering problem of gene expression data, the cluster validity indices <italic>J</italic>
<sub>
<italic>FCM</italic>
</sub> (<xref ref-type="bibr" rid="B7">Bezdek et al., 1981</xref>) and <italic>XB</italic> (<xref ref-type="bibr" rid="B39">Xie and Beni, 1991</xref>), which measure the intra-cluster compactness and inter-cluster separation respectively, are commonly used as objective functions to realize the evolution of decision variables based on two conflicting objectives (<xref ref-type="bibr" rid="B5">Bandyopadhyay et al., 2007</xref>; <xref ref-type="bibr" rid="B26">Maulik et al., 2009</xref>; <xref ref-type="bibr" rid="B28">Mukhopadhyay et al., 2013</xref>; <xref ref-type="bibr" rid="B21">Li et al., 2022</xref>). In this paper, the proposed MSC-CSMC algorithm uses <italic>XB</italic> and the function based on quadratic-regularized fuzzy c-means with constraint violation penalty, namely, <italic>J</italic>
<sub>
<italic>P</italic>
</sub> (<xref ref-type="bibr" rid="B27">Mei, 2019</xref>), as the objective functions. Furthermore, the constraint violation penalty weights in <italic>J</italic>
<sub>
<italic>P</italic>
</sub> are improved to achieve semi-supervised clustering of gene expression data based on the multi-source constraints in the NSGA-II framework. The objective functions of <italic>XB</italic> and <italic>J</italic>
<sub>
<italic>P</italic>
</sub> are as follows:<disp-formula id="e5">
<mml:math id="m13">
<mml:mi>X</mml:mi>
<mml:mi>B</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:munderover>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mi>min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>&#x2260;</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:munder>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
</mml:math>
<label>(5)</label>
</disp-formula>
<disp-formula id="e6">
<mml:math id="m14">
<mml:msub>
<mml:mrow>
<mml:mi>J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b7;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x22a4;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(6)</label>
</disp-formula>Here,<disp-formula id="equ3">
<mml:math id="m15">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mo movablelimits="false" form="prefix">&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:math>
</disp-formula>is the <italic>c</italic>th cluster prototype. <italic>k</italic> is the number of clusters, parameters <italic>&#x3b7;</italic> and <italic>&#x3b2;</italic> control the level of fuzziness and the contribution of the penalty term during clustering, respectively. <italic>u</italic>
<sub>
<italic>ic</italic>
</sub> is the membership degree of the datum <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> belonging to the <italic>c</italic>th cluster, obtained by<disp-formula id="e7">
<mml:math id="m16">
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b7;</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b2;</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(7)</label>
</disp-formula>
<disp-formula id="e8">
<mml:math id="m17">
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>F</mml:mi>
<mml:mi>C</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>M</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mfenced open="&#x2016;" close="&#x2016;">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:math>
<label>(8)</label>
</disp-formula>
<disp-formula id="e9">
<mml:math id="m18">
<mml:msubsup>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>P</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:munderover accentunder="false" accent="true">
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:math>
<label>(9)</label>
</disp-formula>where <italic>w</italic>
<sub>
<italic>ij</italic>
</sub> &#x2208; <bold>
<italic>W</italic>
</bold> is the penalty weight for violating pairwise constraint <inline-formula id="inf7">
<mml:math id="m19">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>. In order to simultaneously consider both the gene expression constraint set &#x3a9; &#x3d; &#x3a9;<sub>
<italic>ML</italic>
</sub> &#x222a; &#x3a9;<sub>
<italic>CL</italic>
</sub> and gene ontology constraint set <inline-formula id="inf8">
<mml:math id="m20">
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x222a;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, that is, the multi-source constraints proposed in this paper, we improve the constraint violation penalty weights through the following analysis: (1) if pairwise constraint <inline-formula id="inf9">
<mml:math id="m21">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> exists in both &#x3a9;<sub>
<italic>ML</italic>
</sub> and <inline-formula id="inf10">
<mml:math id="m22">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, or in both &#x3a9;<sub>
<italic>CL</italic>
</sub> and <inline-formula id="inf11">
<mml:math id="m23">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, it means that the same category information of gene pair <inline-formula id="inf12">
<mml:math id="m24">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> can be obtained from gene expression and gene annotation, so the weight of violating this constraint should be increased; (2) if pairwise constraint <inline-formula id="inf13">
<mml:math id="m25">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> exists in &#x3a9;<sub>
<italic>ML</italic>
</sub> but not in <inline-formula id="inf14">
<mml:math id="m26">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, or exists in &#x3a9;<sub>
<italic>CL</italic>
</sub> but not in <inline-formula id="inf15">
<mml:math id="m27">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, it indicates that the category information of gene pair <inline-formula id="inf16">
<mml:math id="m28">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> is not clear enough, thus the penalty weight <italic>w</italic>
<sub>
<italic>ij</italic>
</sub> should be decreased; (3) if pairwise constraint <inline-formula id="inf17">
<mml:math id="m29">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> exists in both &#x3a9;<sub>
<italic>ML</italic>
</sub> and <inline-formula id="inf18">
<mml:math id="m30">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, or in both &#x3a9;<sub>
<italic>CL</italic>
</sub> and <inline-formula id="inf19">
<mml:math id="m31">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:math>
</inline-formula>, it should be regarded as a contradictory constraint and removed from the constraint sets &#x3a9; and <italic>&#x3a9;</italic>&#x2a;. Based on the above idea, the MSC-CSMC algorithm proposed in this paper improves the constraint violation penalty weight as follows:<disp-formula id="e10">
<mml:math id="m32">
<mml:msub>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="">
<mml:mrow>
<mml:mtable class="aligned">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#x2009;and&#x2009;</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2209;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#x2009;and&#x2009;</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2209;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>1</mml:mn>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#x2009;and&#x2009;</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>M</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mtext>&#x2009;and&#x2009;</mml:mtext>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2208;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold">&#x3a9;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>C</mml:mi>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mtext>&#x2009;otherwise&#x2009;</mml:mtext>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(10)</label>
</disp-formula>with <italic>&#x3b8;</italic> &#x3e; 0 being the GO action parameter. It can be seen that the improved penalty weights can effectively integrate the gene expression and Gene Ontology information, and provide reasonable violation penalty for pairwise constraints in semi-supervised clustering.</p>
</sec>
<sec id="s2-3">
<title>2.3 Mixed chromosome encoding strategy used in MSC-CSMC</title>
<p>For the purpose of co-optimizing the constraints selection and clustering in the process of multi-objective evolution, a mixed encoding strategy combining the constraints selection and cluster prototype is adopted, as shown in <xref ref-type="fig" rid="F3">Figure 3</xref>. Let <bold>
<italic>P</italic>
</bold> denote the genetic population, <italic>N</italic> be the population size, and <italic>s</italic> be the number of pairwise constraints to be selected. Considering the existence of noisy constraints in the initial pairwise constraint set and to improve the search efficiency of the algorithm, 2<italic>s</italic> constraints are randomly selected from the initial pairwise constraint set to generate the candidate constraint set &#x3a9;<sub>
<italic>p</italic>
</sub>, and a serial number is assigned for each pairwise constraint. For a gene expression dataset with <italic>k</italic> clusters <inline-formula id="inf20">
<mml:math id="m33">
<mml:mi mathvariant="bold-italic">X</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msup>
</mml:math>
</inline-formula>, the <italic>r</italic>th individual in the <italic>l</italic>th generation <inline-formula id="inf21">
<mml:math id="m34">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> consists of two parts: the cluster prototype <inline-formula id="inf22">
<mml:math id="m35">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> and the constraints selection <inline-formula id="inf23">
<mml:math id="m36">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>. Among them, <inline-formula id="inf24">
<mml:math id="m37">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> encode <italic>k</italic> cluster prototypes <inline-formula id="inf25">
<mml:math id="m38">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> with real numbers, <inline-formula id="inf26">
<mml:math id="m39">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> encode the serial numbers of <italic>s</italic> pairwise constraints <inline-formula id="inf27">
<mml:math id="m40">
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2264;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi>s</mml:mi>
<mml:mo>,</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>j</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> selected from &#x3a9;<sub>
<italic>p</italic>
</sub> with integers.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>The mixed chromosome encoding strategy used in MSC-CSMC.</p>
</caption>
<graphic xlink:href="fgene-14-1135260-g003.tif"/>
</fig>
<p>In the proposed algorithm, the two parts of the chromosomes are initialized separately. For the cluster prototype part, in order to ensure initialization quality and population diversity, half of the individuals are encoded as the <italic>k</italic> cluster prototypes obtained by the density peak method (<xref ref-type="bibr" rid="B31">Rodriguez and Laio, 2014</xref>), and the other half are encoded from the randomly generated cluster prototypes. For the constraints selection part of each individual, the components are initialized with non-repeated random integers in <inline-formula id="inf28">
<mml:math id="m41">
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mn>1,2</mml:mn>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>.</p>
</sec>
<sec id="s2-4">
<title>2.4 Genetic operations</title>
<p>In the genetic evolution process of the MSC-CSMC algorithm, the roulette wheel strategy is first used to implement the selection. Since the NSGA-II algorithm tends to select individuals with lower non-domination ranks, for the <italic>r</italic>th individual <inline-formula id="inf29">
<mml:math id="m42">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> of the <italic>l</italic>th generation, the selection probability (<xref ref-type="bibr" rid="B46">Zhou and Zhu, 2018</xref>) is calculated as follows:<disp-formula id="e11">
<mml:math id="m43">
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">rank</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:math>
<label>(11)</label>
</disp-formula>Here, <inline-formula id="inf30">
<mml:math id="m44">
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> is the selection parameter, <italic>f</italic>
<sub>
<italic>rank</italic>
</sub> is the non-domination rank of individual <inline-formula id="inf31">
<mml:math id="m45">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>.</p>
<p>For the parent individuals <inline-formula id="inf32">
<mml:math id="m46">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> and <inline-formula id="inf33">
<mml:math id="m47">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>, let the crossover probability be <italic>p</italic>
<sub>
<italic>c</italic>
</sub>, different crossover operators are used for the cluster prototypes and constraints selection. Among them, <inline-formula id="inf34">
<mml:math id="m48">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> and <inline-formula id="inf35">
<mml:math id="m49">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> generate offspring through the normal distribution crossover operator (<xref ref-type="bibr" rid="B43">Zhang and Luo, 2009</xref>), and the offspring cluster prototypes are:<disp-formula id="e12">
<mml:math id="m50">
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1.481</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:math>
<label>(12)</label>
</disp-formula>
<disp-formula id="e13">
<mml:math id="m51">
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1.481</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#xd7;</mml:mo>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:math>
<label>(13)</label>
</disp-formula>where <inline-formula id="inf36">
<mml:math id="m52">
<mml:mi>N</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> is a random variable of normal distribution. The constraints selection <inline-formula id="inf37">
<mml:math id="m53">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> and <inline-formula id="inf38">
<mml:math id="m54">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> adopts the single-point crossover operator, for a random integer <italic>rand</italic>
<sub>
<italic>c</italic>
</sub> in <inline-formula id="inf39">
<mml:math id="m55">
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>, the offspring constraints selections are:<disp-formula id="e14">
<mml:math id="m56">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(14)</label>
</disp-formula>
<disp-formula id="e15">
<mml:math id="m57">
<mml:mi mathvariant="bold-italic">o</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:mi mathvariant="bold-italic">f</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">s</mml:mi>
<mml:mi mathvariant="bold-italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(15)</label>
</disp-formula>If repeated pairwise constraints appear after crossover, non-repeated pairwise constraints are randomly selected from the candidate constraint set &#x3a9;<sub>
<italic>p</italic>
</sub> as a replacement. For individual <bold>
<italic>P</italic>
</bold>
<sub>
<italic>r</italic>
</sub>(<italic>l</italic>), different mutation operators are adopted for the two parts. The polynomial mutation operator (<xref ref-type="bibr" rid="B33">Rousseeuw, 1987</xref>) is applied for <inline-formula id="inf40">
<mml:math id="m58">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:msubsup>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, where site <italic>v</italic>
<sub>
<italic>r</italic>,<italic>ci</italic>
</sub> mutates with probability <italic>p</italic>
<sub>
<italic>m</italic>
</sub>:<disp-formula id="e16">
<mml:math id="m59">
<mml:msubsup>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#xd7;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>c</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>d</mml:mi>
</mml:math>
<label>(16)</label>
</disp-formula>where, <italic>v</italic>
<sub>
<italic>u</italic>
</sub> and <italic>v</italic>
<sub>
<italic>l</italic>
</sub> are the upper and lower bounds of the cluster prototype, respectively. For normalized gene expression data, the bounds are set to 1 and 0. <italic>&#x3b4;</italic> is determined as follows (<xref ref-type="bibr" rid="B13">Deb and Tiwari, 2008</xref>):<disp-formula id="e17">
<mml:math id="m60">
<mml:mi>&#x3b4;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="">
<mml:mrow>
<mml:mtable class="array">
<mml:mtr>
<mml:mtd columnalign="center">
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:msup>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b7;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b7;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3c;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="center">
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mrow>
</mml:mfenced>
<mml:msubsup>
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>c</mml:mi>
<mml:mi>i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b7;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>&#x3b7;</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:msup>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
<mml:mi>a</mml:mi>
<mml:mi>n</mml:mi>
<mml:mi>d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x2265;</mml:mo>
<mml:mn>0.5</mml:mn>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:math>
<label>(17)</label>
</disp-formula>Here, <italic>&#x3b7;</italic>
<sub>
<italic>m</italic>
</sub> is the distribution index, <italic>rand</italic>
<sub>
<italic>m</italic>
</sub> is a random number in <inline-formula id="inf41">
<mml:math id="m61">
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mn>0,1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>. For <inline-formula id="inf42">
<mml:math id="m62">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>, random mutation is used, that is, first randomly select a position in <inline-formula id="inf43">
<mml:math id="m63">
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>s</mml:mi>
<mml:mi>e</mml:mi>
<mml:mi>t</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:msubsup>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>, and then replace its value with a random integer in <inline-formula id="inf44">
<mml:math id="m64">
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mn>1,2</mml:mn>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> that is not repeated with others. In summary, the procedure of the MSC-CSMC algorithm is shown as follows:</p>
<p>Input: Gene expression dataset <bold>
<italic>X</italic>
</bold>, number of neighbors <italic>b</italic>, density drop rate <italic>&#x25b;</italic>, population size <italic>N</italic>, maximal number of generations <italic>L</italic>
<italic>
<sub>max</sub>
</italic>, number of clusters <italic>k</italic>, fuzzy parameter <italic>&#x3b7;</italic>, penalty parameter <italic>&#x3b2;</italic>, constraint number <italic>s</italic>, GO action parameter <italic>&#x3b8;</italic>, selection parameter <italic>&#x3b1;</italic>, crossover probability <italic>p</italic>
<sub>
<italic>c</italic>
</sub>, mutation probability <italic>p</italic>
<sub>
<italic>m</italic>
</sub>, and distribution index <italic>&#x3b7;</italic>
<sub>
<italic>m</italic>
</sub>.<list list-type="simple">
<list-item>
<p>Step 1: Generate gene expression pairwise constraint sets &#x3a9; based on density tracking method.</p>
</list-item>
<list-item>
<p>Step 2: Calculate the functional similarity of genes based on AIC, and generate the gene ontology pairwise constraint set <italic>&#x3a9;</italic>&#x2a;. Then delete the contradictory constraints, and determine the penalty weight matrix <bold>
<italic>W</italic>
</bold> corresponding to the multi-source constraints based on <xref ref-type="disp-formula" rid="e10">Formula 10</xref>.</p>
</list-item>
<list-item>
<p>Step 3: Randomly select 2<italic>s</italic> pairwise constraints from the initial constraint set to construct the candidate constraint set &#x3a9;<sub>
<italic>p</italic>
</sub>, and initialize the population.</p>
</list-item>
<list-item>
<p>Step 4: When the genetic generation index is <inline-formula id="inf45">
<mml:math id="m65">
<mml:mi>l</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1,2</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>, for each individual <inline-formula id="inf46">
<mml:math id="m66">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">P</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>l</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mspace width="0.3333em"/>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>r</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>, decode to obtain the cluster prototypes and the selected pairwise constraints. Update the membership degree according to <xref ref-type="disp-formula" rid="e7">Formulas 7</xref>-<xref ref-type="disp-formula" rid="e9">9</xref>, and calculate the individual fitness values based on <xref ref-type="disp-formula" rid="e5">Formulas 5</xref>-<xref ref-type="disp-formula" rid="e6">6</xref>.</p>
</list-item>
<list-item>
<p>Step 5: According to the individual fitness values, calculate the non-domination rank and crowding distance of each individual.</p>
</list-item>
<list-item>
<p>Step 6: Apply selection, crossover, and mutation based on <xref ref-type="disp-formula" rid="e11">Formulas 11</xref>-<xref ref-type="disp-formula" rid="e17">17</xref>, and update the individual fitness values according to <xref ref-type="disp-formula" rid="e5">Formulas 5</xref>-<xref ref-type="disp-formula" rid="e6">6</xref>.</p>
</list-item>
<list-item>
<p>Step 7: Merge the parent and offspring populations, and select the next-generation according to the elite retention strategy.</p>
</list-item>
<list-item>
<p>Step 8: If <inline-formula id="inf47">
<mml:math id="m67">
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="&#x230a;" close="&#x230b;">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>.</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mn>5</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula> or <inline-formula id="inf48">
<mml:math id="m68">
<mml:mi>l</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="&#x230a;" close="&#x230b;">
<mml:mrow>
<mml:mn>0</mml:mn>
<mml:mo>.</mml:mo>
<mml:mspace width="0.3333em"/>
<mml:mn>8</mml:mn>
<mml:mo>&#xd7;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>L</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">max</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>, update the penalty parameter <italic>&#x3b2;</italic> &#x3d; 2 &#xd7; <italic>&#x3b2;</italic> to increase the penalty for violating the currently selected constraints.</p>
</list-item>
<list-item>
<p>Step 9: Set <italic>l</italic> &#x3d; <italic>l</italic> &#x2b; 1, repeat Steps 4-8 until the maximal number of generations <italic>L</italic>
<italic>
<sub>max</sub>
</italic> is reached.</p>
</list-item>
</list>
</p>
<p>Output: The Pareto optimal solutions.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>3 Results</title>
<sec id="s3-1">
<title>3.1 Datasets</title>
<p>In this study, five benchmark gene expression datasets, namely, Yeast Galactose Metabolism, Yeast Cell Cycle, Yeast Sporulation, Serum, and Arabidopsis are used for the experiment.</p>
<p>The Yeast Galactose Metabolism dataset (<xref ref-type="bibr" rid="B18">Ideker et al., 2001</xref>) is composed of 205 genes whose expression patterns reflect four functional categories. The gene expression profiles were measured with four replicate assays across 20 time points. The Yeast Cell Cycle dataset (<xref ref-type="bibr" rid="B10">Cho et al., 1998</xref>) contains the expression levels of 384 genes involved in yeast cell cycle regulation at 17 time points, and these data are related with five phases of cell cycle. The Yeast sporulation dataset (<xref ref-type="bibr" rid="B11">Chu et al., 1998</xref>) contains the expression levels of more than 6,000 genes measured during the sporulation process of budding yeast across seven time points. The genes that showed no significant changes in expression during the harvesting were excluded, and the resulting set consists of 474 genes. The Serum dataset (<xref ref-type="bibr" rid="B19">Iyer et al., 1999</xref>) contains the expression levels of 517 human genes. The dataset has 13 dimensions corresponding to 12 time points and 1 unsynchronized sample. The Arabidopsis dataset (<xref ref-type="bibr" rid="B30">Reymond et al., 2000</xref>) consists of 138 <italic>Arabidopsis Thaliana</italic> genes. Each gene has eight expression values that correspond to eight time points. The details of the datasets are shown in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Description of datasets.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Dataset</th>
<th align="center">Number of genes</th>
<th align="center">Number of features</th>
<th align="center">Number of clusters</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">Yeast Galactose Metabolism</td>
<td align="center">205</td>
<td align="center">80</td>
<td align="center">4</td>
</tr>
<tr>
<td align="center">Yeast Cell Cycle</td>
<td align="center">384</td>
<td align="center">17</td>
<td align="center">5</td>
</tr>
<tr>
<td align="center">Yeast Sporulation</td>
<td align="center">474</td>
<td align="center">7</td>
<td align="center">6</td>
</tr>
<tr>
<td align="center">Serum</td>
<td align="center">517</td>
<td align="center">13</td>
<td align="center">6</td>
</tr>
<tr>
<td align="center">Arabidopsis</td>
<td align="center">138</td>
<td align="center">8</td>
<td align="center">4</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="s3-2">
<title>3.2 Model evaluation criteria and parameter assignment</title>
<p>In order to evaluate the effectiveness of the model, the silhouette index (<xref ref-type="bibr" rid="B33">Rousseeuw, 1987</xref>) is chosen as the evaluation criterion for the clustering results. For gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub>, the silhouette width is calculated as follows:<disp-formula id="e18">
<mml:math id="m69">
<mml:mi>S</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mi>b</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>&#x2212;</mml:mo>
<mml:mi>a</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>max</mml:mi>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mi>a</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mo>,</mml:mo>
<mml:mi>b</mml:mi>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:mfrac>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>i</mml:mi>
<mml:mo>&#x2264;</mml:mo>
<mml:mi>n</mml:mi>
</mml:math>
<label>(18)</label>
</disp-formula>Here, <italic>a</italic>(<italic>i</italic>) is the average distance from gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> to other genes in the same cluster, <italic>b</italic>(<italic>i</italic>) is the minimum average distance between gene <bold>
<italic>x</italic>
</bold>
<sub>
<italic>i</italic>
</sub> and genes in the other clusters. The silhouette index <italic>SI</italic> of dataset <bold>
<italic>X</italic>
</bold> is the mean value of the silhouette widths of all genes, with <inline-formula id="inf49">
<mml:math id="m70">
<mml:mi>S</mml:mi>
<mml:mi>I</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="[" close="]">
<mml:mrow>
<mml:mo>&#x2212;</mml:mo>
<mml:mn>1,1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:math>
</inline-formula>. A greater <italic>SI</italic> value represents the algorithm with better clustering quality. Besides, as suggested by (<xref ref-type="bibr" rid="B34">Saha and Bandyopadhyay, 2013</xref>), the final solution of MSC-CSMS is selected from Pareto optimal solutions by using the silhouette index.</p>
<p>According to (<xref ref-type="bibr" rid="B27">Mei, 2019</xref>) and (<xref ref-type="bibr" rid="B2">Abin and Vu, 2020</xref>), the parameters of MSC-CSMC are assigned as follows: <italic>&#x25b;</italic> &#x3d; 0.8, <italic>b</italic> &#x3d; 10, <italic>&#x3b7;</italic> &#x3d; 0.001, <italic>&#x3b2;</italic> &#x3d; 0.1, <italic>N</italic> &#x3d; 100, <italic>L</italic>
<italic>
<sub>max</sub>
</italic> &#x3d; 300, <italic>&#x3b1;</italic> &#x3d; 0.3, <italic>&#x3b7;</italic>
<sub>
<italic>m</italic>
</sub> &#x3d; 5, <italic>p</italic>
<sub>
<italic>c</italic>
</sub> &#x3d; 0.8, <italic>p</italic>
<sub>
<italic>m</italic>
</sub> &#x3d; 0.1. The number of pairwise constraints <italic>s</italic> is chosen as 0, 5, 10, 15, 20, and 25. In gene expression data analysis, the determination of the number of clusters <italic>k</italic> is an open problem. Generally, there are two approaches to determine the value of <italic>k</italic>; one is to directly set it as the true number of clusters (<xref ref-type="bibr" rid="B41">Yu et al., 2018</xref>; <xref ref-type="bibr" rid="B45">Zhao et al., 2021</xref>; <xref ref-type="bibr" rid="B21">Li et al., 2022</xref>; <xref ref-type="bibr" rid="B22">Liu et al., 2022</xref>; <xref ref-type="bibr" rid="B38">Wu and Ma, 2022</xref>); The other approach is applicable to the case where the true number of clusters is unknown, in which the variation range of <italic>k</italic> is determined firstly, and the <italic>k</italic> corresponding to the optimal value of an index (Silhouette index, Dunn index, Davies&#x2013;Bouldin index, <italic>etc.</italic>) can be chosen as the optimal number of clusters (<xref ref-type="bibr" rid="B14">Gao et al., 2019</xref>; <xref ref-type="bibr" rid="B3">Acharya et al., 2020</xref>; <xref ref-type="bibr" rid="B24">L&#xf3;pez-Cort&#xe9;s et al., 2020</xref>; <xref ref-type="bibr" rid="B42">Zhang et al., 2022</xref>). In this paper, we adopt the first approach, and the number of clusters <italic>k</italic> is selected according to <xref ref-type="table" rid="T1">Table 1</xref>. In order to analyze the impact of the GO action parameter <italic>&#x3b8;</italic>, we set <italic>&#x3b8;</italic> from 0.1 to 0.9&#xa0;at intervals of 0.1 under the condition that the number of the pairwise constraints is 15. The results are shown in <xref ref-type="fig" rid="F4">Figure 4</xref>. It can be seen that the value of <italic>SI</italic> barely changes as <italic>&#x3b8;</italic> increases, which means that the algorithm is not very sensitive to the value of <italic>&#x3b8;</italic>. For Yeast Galactose Metabolism, Yeast Cell Cycle, Yeast Sporulation, Serum, and Arabidopsis, the <italic>&#x3b8;</italic> values are respectively set to 0.4, 0.7, 0.6, 0.5, and 0.4, which lead to the optimal clustering performances.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>The impact of parameter <italic>&#x3b8;</italic> on <italic>SI</italic> tested on different datasets. <bold>(A)</bold> Yeast Galactose Metabolism <bold>(B)</bold> Yeast Cell Cycle <bold>(C)</bold> Yeast Sporulation <bold>(D)</bold> Serum <bold>(E)</bold> Arabidopsis.</p>
</caption>
<graphic xlink:href="fgene-14-1135260-g004.tif"/>
</fig>
</sec>
<sec id="s3-3">
<title>3.3 Result analysis and model comparison</title>
<p>For the purpose of inspecting the performance of the proposed MSC-CSMC algorithm, several advanced semi-supervised clustering algorithms based on single-source constraints, including COP-Kmeans (<xref ref-type="bibr" rid="B37">Wagstaff et al., 2001</xref>), PCKMeans (<xref ref-type="bibr" rid="B6">Basu et al., 2004</xref>), MPCKMeans (<xref ref-type="bibr" rid="B8">Bilenko et al., 2004</xref>), PCCA (<xref ref-type="bibr" rid="B17">Grira et al., 2008</xref>), PCFCMq (<xref ref-type="bibr" rid="B27">Mei, 2019</xref>) and MSC-CS (<xref ref-type="bibr" rid="B44">Zhao and Li, 2022</xref>), are used for comparison. Among them, the MSC-CS algorithm is the single-source constrained version of MSC-CSMC, which does not consider the annotation information provided by GO. In the above algorithms, the pairwise constraints are randomly selected from the initial gene expression constraint set &#x3a9;. To avoid the influence of randomness, each method is run for ten times under the same number of pairwise constraints, and the mean value of the clustering results is taken as the final result. The <italic>SI</italic> values of all seven algorithms applied to five datasets are shown in <xref ref-type="table" rid="T2">Tables 2</xref>&#x2013;<xref ref-type="table" rid="T6">6</xref>, the optimal solutions in each row are highlighted in bold.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>
<italic>SI</italic> values on Yeast Galactose Metabolism with different number of constraints.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">
<italic>s</italic>
</th>
<th align="center">COP-Kmeans</th>
<th align="center">PCKMeans</th>
<th align="center">MPCKMeans</th>
<th align="center">PCCA</th>
<th align="center">PCFCMq</th>
<th align="center">MSC-CS</th>
<th align="center">MSC-CSMC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0</td>
<td align="center">0. 384</td>
<td align="center">0. 254</td>
<td align="center">0. 305</td>
<td align="center">0.525</td>
<td align="center">0. 465</td>
<td align="center">
<bold>0. 566</bold>
</td>
<td align="center">
<bold>0. 566</bold>
</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">0. 423</td>
<td align="center">0. 479</td>
<td align="center">0. 258</td>
<td align="center">0.348</td>
<td align="center">0. 254</td>
<td align="center">0. 583</td>
<td align="center">
<bold>0. 628</bold>
</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">0. 460</td>
<td align="center">0. 484</td>
<td align="center">0. 471</td>
<td align="center">0.144</td>
<td align="center">0. 274</td>
<td align="center">0. 592</td>
<td align="center">
<bold>0. 631</bold>
</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">0. 458</td>
<td align="center">0. 484</td>
<td align="center">0. 463</td>
<td align="center">0.198</td>
<td align="center">0. 402</td>
<td align="center">0. 645</td>
<td align="center">
<bold>0. 668</bold>
</td>
</tr>
<tr>
<td align="center">20</td>
<td align="center">0. 459</td>
<td align="center">0. 457</td>
<td align="center">0. 370</td>
<td align="center">0.383</td>
<td align="center">0. 351</td>
<td align="center">0. 645</td>
<td align="center">
<bold>0. 668</bold>
</td>
</tr>
<tr>
<td align="center">25</td>
<td align="center">0. 445</td>
<td align="center">0. 433</td>
<td align="center">0. 413</td>
<td align="center">0.351</td>
<td align="center">0. 290</td>
<td align="center">0. 645</td>
<td align="center">
<bold>0. 668</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the optimal solutions in each row.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T3" position="float">
<label>TABLE 3</label>
<caption>
<p>
<italic>SI</italic> values on Yeast Cell Cycle with different number of constraints.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">
<italic>s</italic>
</th>
<th align="center">COP-Kmeans</th>
<th align="center">PCKMeans</th>
<th align="center">MPCKMeans</th>
<th align="center">PCCA</th>
<th align="center">PCFCMq</th>
<th align="center">MSC-CS</th>
<th align="center">MSC-CSMC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0</td>
<td align="center">0. 256</td>
<td align="center">0. 252</td>
<td align="center">0. 281</td>
<td align="center">0.350</td>
<td align="center">0. 408</td>
<td align="center">
<bold>0. 436</bold>
</td>
<td align="center">
<bold>0. 436</bold>
</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">0. 264</td>
<td align="center">0. 250</td>
<td align="center">0. 251</td>
<td align="center">0.115</td>
<td align="center">0. 385</td>
<td align="center">0. 456</td>
<td align="center">
<bold>0. 497</bold>
</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">0. 273</td>
<td align="center">0. 227</td>
<td align="center">0. 203</td>
<td align="center">0.208</td>
<td align="center">0. 409</td>
<td align="center">0. 519</td>
<td align="center">
<bold>0. 542</bold>
</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">0. 258</td>
<td align="center">0. 275</td>
<td align="center">0. 202</td>
<td align="center">0.133</td>
<td align="center">0. 408</td>
<td align="center">0. 528</td>
<td align="center">
<bold>0. 594</bold>
</td>
</tr>
<tr>
<td align="center">20</td>
<td align="center">0. 282</td>
<td align="center">0. 263</td>
<td align="center">0. 322</td>
<td align="center">0.229</td>
<td align="center">0. 408</td>
<td align="center">0. 530</td>
<td align="center">
<bold>0. 606</bold>
</td>
</tr>
<tr>
<td align="center">25</td>
<td align="center">0. 264</td>
<td align="center">0. 261</td>
<td align="center">0. 318</td>
<td align="center">0.267</td>
<td align="center">0. 409</td>
<td align="center">0. 584</td>
<td align="center">
<bold>0. 607</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the optimal solutions in each row.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T4" position="float">
<label>TABLE 4</label>
<caption>
<p>
<italic>SI</italic> values on Yeast Sporulation with different number of constraints.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">
<italic>s</italic>
</th>
<th align="center">COP-Kmeans</th>
<th align="center">PCKMeans</th>
<th align="center">MPCKMeans</th>
<th align="center">PCCA</th>
<th align="center">PCFCMq</th>
<th align="center">MSC-CS</th>
<th align="center">MSC-CSMC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0</td>
<td align="center">0. 329</td>
<td align="center">0. 328</td>
<td align="center">0. 345</td>
<td align="center">0.400</td>
<td align="center">0.364</td>
<td align="center">
<bold>0. 491</bold>
</td>
<td align="center">
<bold>0. 491</bold>
</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">0. 331</td>
<td align="center">0. 354</td>
<td align="center">0. 411</td>
<td align="center">0.067</td>
<td align="center">0.463</td>
<td align="center">0. 520</td>
<td align="center">
<bold>0. 528</bold>
</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">0. 324</td>
<td align="center">0. 429</td>
<td align="center">0. 404</td>
<td align="center">0.164</td>
<td align="center">0.420</td>
<td align="center">0. 525</td>
<td align="center">
<bold>0. 531</bold>
</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">0. 300</td>
<td align="center">0. 404</td>
<td align="center">0. 409</td>
<td align="center">0.325</td>
<td align="center">0.434</td>
<td align="center">
<bold>0. 565</bold>
</td>
<td align="center">0. 556</td>
</tr>
<tr>
<td align="center">20</td>
<td align="center">0. 324</td>
<td align="center">0. 403</td>
<td align="center">0. 405</td>
<td align="center">0.235</td>
<td align="center">0.416</td>
<td align="center">0. 571</td>
<td align="center">
<bold>0. 592</bold>
</td>
</tr>
<tr>
<td align="center">25</td>
<td align="center">0. 346</td>
<td align="center">0. 396</td>
<td align="center">0. 394</td>
<td align="center">0.286</td>
<td align="center">0.413</td>
<td align="center">0. 592</td>
<td align="center">
<bold>0. 594</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the optimal solutions in each row.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T5" position="float">
<label>TABLE 5</label>
<caption>
<p>
<italic>SI</italic> values on Serum with different number of constraints.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">
<italic>s</italic>
</th>
<th align="center">COP-Kmeans</th>
<th align="center">PCKMeans</th>
<th align="center">MPCKMeans</th>
<th align="center">PCCA</th>
<th align="center">PCFCMq</th>
<th align="center">MSC-CS</th>
<th align="center">MSC-CSMC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0</td>
<td align="center">0. 212</td>
<td align="center">0. 208</td>
<td align="center">0. 186</td>
<td align="center">0.290</td>
<td align="center">0.270</td>
<td align="center">
<bold>0. 312</bold>
</td>
<td align="center">
<bold>0. 312</bold>
</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">0. 210</td>
<td align="center">0. 205</td>
<td align="center">0. 211</td>
<td align="center">0.080</td>
<td align="center">0.264</td>
<td align="center">
<bold>0. 327</bold>
</td>
<td align="center">0. 325</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">0. 200</td>
<td align="center">0. 202</td>
<td align="center">0. 197</td>
<td align="center">0.146</td>
<td align="center">0.271</td>
<td align="center">
<bold>0. 341</bold>
</td>
<td align="center">0. 340</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">0. 200</td>
<td align="center">0. 181</td>
<td align="center">0. 184</td>
<td align="center">0.235</td>
<td align="center">0.264</td>
<td align="center">0. 354</td>
<td align="center">
<bold>0. 362</bold>
</td>
</tr>
<tr>
<td align="center">20</td>
<td align="center">0. 198</td>
<td align="center">0. 206</td>
<td align="center">0. 217</td>
<td align="center">0.144</td>
<td align="center">0.262</td>
<td align="center">0. 368</td>
<td align="center">
<bold>0. 385</bold>
</td>
</tr>
<tr>
<td align="center">25</td>
<td align="center">0. 193</td>
<td align="center">0. 202</td>
<td align="center">0. 185</td>
<td align="center">0.238</td>
<td align="center">0.269</td>
<td align="center">0. 379</td>
<td align="center">
<bold>0. 403</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the optimal solutions in each row.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T6" position="float">
<label>TABLE 6</label>
<caption>
<p>
<italic>SI</italic> values on Arabidopsis with different number of constraints.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">
<italic>s</italic>
</th>
<th align="center">COP-Kmeans</th>
<th align="center">PCKMeans</th>
<th align="center">MPCKMeans</th>
<th align="center">PCCA</th>
<th align="center">PCFCMq</th>
<th align="center">MSC-CS</th>
<th align="center">MSC-CSMC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">0</td>
<td align="center">0. 220</td>
<td align="center">0. 223</td>
<td align="center">0. 197</td>
<td align="center">0.314</td>
<td align="center">0.353</td>
<td align="center">
<bold>0. 358</bold>
</td>
<td align="center">
<bold>0. 358</bold>
</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">0. 207</td>
<td align="center">0. 216</td>
<td align="center">0. 192</td>
<td align="center">-0.151</td>
<td align="center">0.353</td>
<td align="center">0. 368</td>
<td align="center">
<bold>0. 373</bold>
</td>
</tr>
<tr>
<td align="center">10</td>
<td align="center">0. 212</td>
<td align="center">0. 210</td>
<td align="center">0. 206</td>
<td align="center">0.046</td>
<td align="center">0.353</td>
<td align="center">0. 373</td>
<td align="center">
<bold>0. 387</bold>
</td>
</tr>
<tr>
<td align="center">15</td>
<td align="center">0. 200</td>
<td align="center">0. 201</td>
<td align="center">0. 185</td>
<td align="center">0.106</td>
<td align="center">0.354</td>
<td align="center">0. 375</td>
<td align="center">
<bold>0. 394</bold>
</td>
</tr>
<tr>
<td align="center">20</td>
<td align="center">0. 197</td>
<td align="center">0. 189</td>
<td align="center">0. 185</td>
<td align="center">0.308</td>
<td align="center">0.344</td>
<td align="center">0. 381</td>
<td align="center">
<bold>0. 396</bold>
</td>
</tr>
<tr>
<td align="center">25</td>
<td align="center">0. 187</td>
<td align="center">0. 187</td>
<td align="center">0. 181</td>
<td align="center">0.335</td>
<td align="center">0.352</td>
<td align="center">0. 389</td>
<td align="center">
<bold>0. 397</bold>
</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>The bold values indicate the optimal solutions in each row.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>According to <xref ref-type="table" rid="T2">Tables 2</xref>&#x2013;<xref ref-type="table" rid="T6">6</xref>, it can be seen that the proposed MSC-CSMS algorithm and its single-source constraint version MSC-CS can always achieve optimal and suboptimal clustering results on five gene expression datasets, demonstrating the effectiveness of the constraints selection. The mixed chromosome encoding strategy combining the constraint selection and cluster prototype can find the pairwise constraints suitable for clustering in the co-evolution process and improve clustering accuracy, and the highly accurate clustering results can further improve the constraint selection ability of the algorithm in turn. Conversely, the algorithms for comparison are based on the assumption that the pairwise constraints conform to the real cluster information and are easily affected by noisy constraints. This is consistent with the analysis of the negative effects of noisy constraints by (<xref ref-type="bibr" rid="B40">Yin et al., 2010</xref>) and (<xref ref-type="bibr" rid="B20">Lai et al., 2021</xref>). In addition, the MSC-CSMC algorithm is better than MSC-CS in most cases, indicating that using multi-source constraints can improve the performance of semi-supervised clustering. The gene ontology used to generate multi-source pairwise constraints in our MSC-CSMC algorithm can explain gene expression profiles from the perspective of gene function. By effectively integrating the gene expression and Gene Ontology information, the proposed penalty weights can provide reasonable violation penalty for pairwise constraints.</p>
<p>In the case of <italic>s</italic> &#x3d; 0, that is, there is no pairwise constraint, both MSC-CSMC and MSC-CS degenerate into unsupervised multi-objective clustering methods, turning out the same result. Compared with PCFCMq, which uses <italic>J</italic>
<sub>
<italic>P</italic>
</sub> as the single objective function, the better performance of MSC-CSMC and MSC-CS shows the advantages of using multi-objective optimization in clustering gene expression data.</p>
<p>Among the comparison algorithms, the performance of the PCFCMq algorithm, which is based on fuzzy clustering, is generally better than the hard clustering-based COP-Kmeans, PCKMeans, and MPCKMeans algorithms. According to (<xref ref-type="bibr" rid="B15">Gasch and Eisen, 2002</xref>), genes may be co-expressed with different genomes under different measurement conditions, and there is usually overlap between gene clusters. Therefore, compared with hard clustering algorithms, fuzzy clustering algorithms are more suitable for analyzing gene expression data. Furthermore, due to the proposed constraints selection and multi-source constraint fusion strategy, the MSC-CSMC algorithm achieves better clustering results than the PCFCMq algorithm. In terms of the robustness of the clustering results, the performances of semi-supervised clustering algorithms for comparison fluctuate with the increase of pairwise constraints, which is mainly due to the quality of randomly selected pairwise constraints. As stated by <xref ref-type="bibr" rid="B20">Lai et al. (2021)</xref>, even non-noisy constraints that conform to the real cluster information may have a negative impact on the clustering results, which further illustrates the necessity of constraints selection in semi-supervised clustering algorithms. The proposed MSC-CSMC algorithm can select pairwise constraints suitable for clustering based on the co-evolution of the cluster prototype and constraints selection, which guarantees both accuracy and stability of the clustering results.</p>
<p>To illustrate the consistency of the gene clusters obtained by the MSC-CSMC algorithm, the Eisen plots and cluster profile plots corresponding to the clustering results of five datasets are shown in <xref ref-type="fig" rid="F5">Figure 5</xref> and <xref ref-type="fig" rid="F6">Figure 6</xref>. In the Eisen plots, each row corresponds to a gene, each column to a time point (sample), and each entry of the plot represents the expression level of a gene at a specific time point by coloring the corresponding cell. To illustrate more clearly the gene clusters obtained by MSC-CSMC, the genes partitioned into the same cluster are placed together. In the cluster profile plots, the X- and <italic>Y</italic>-axis represent the time points and gene expression values, respectively. The expression values of genes partitioned into the same cluster are plotted in the same subplot. In the subplots, each green line indicates the normalized expression values of a gene over all time points, and the black line represents the mean expression level of the genes in the corresponding cluster. It can be seen in the Eisen plots that the color patterns (expression levels) of genes in the same cluster are similar to each other, while genes in different clusters show different color patterns. According to <xref ref-type="fig" rid="F6">Figure 6</xref>, the cluster profiles of different clusters are different from each other, and the cluster profiles within a cluster reveal consistency.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Eisen plots of the gene clusters obtianed by MSC-CSMC. <bold>(A)</bold> Yeast Galactose Metabolism with the number of constraints <italic>s</italic> &#x3d;5 <bold>(B)</bold> Yeast Cell Cycle with the number of constraints <italic>s</italic> &#x3d;10 <bold>(C)</bold> Yeast Sporulation with the number of constraints <italic>s</italic> &#x3d;15 <bold>(D)</bold> Serum with the number of constraints <italic>s</italic> &#x3d;20 <bold>(E)</bold> Arabidopsis with the number of constraints <italic>s</italic> &#x3d;25.</p>
</caption>
<graphic xlink:href="fgene-14-1135260-g005.tif"/>
</fig>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Cluster profile plots of the gene clusters obtianed by MSC-CSMC. <bold>(A)</bold> Yeast Galactose Metabolism with the number of constraints <italic>s</italic> &#x3d;5 <bold>(B)</bold> Yeast Cell Cycle with the number of constraints <italic>s</italic> &#x3d;10 <bold>(C)</bold> Yeast Sporulation with the number of constraints <italic>s</italic> &#x3d;15 <bold>(D)</bold> Serum with the number of constraints <italic>s</italic> &#x3d;20 <bold>(E)</bold> Arabidopsis with the number of constraints <italic>s</italic> &#x3d;25.</p>
</caption>
<graphic xlink:href="fgene-14-1135260-g006.tif"/>
</fig>
<p>In order to inspect the biological significance of the gene clusters obtained by the MSC-CSMC algorithm, enrichment analysis is carried out using the GO annotation database, which results in the significant GO terms shared by genes in each cluster and their corresponding <italic>p</italic>-values. Taking the case where the number of pairwise constraints in the Yeast Sporulation dataset is 15 as an example, we focus on the three most significant GO terms (corresponding to the three lowest <italic>p</italic>-values) in each of the six clusters obtained by each algorithm. <xref ref-type="fig" rid="F7">Figure 7</xref> shows the plot of the average <italic>p</italic>-values. To illustrate the difference significantly, the <italic>p</italic>-values are negative log-transformed and the clusters are sorted in descending order according to the transformed values. <xref ref-type="table" rid="T7">Table 7</xref> reports the three most significant GO terms and the corresponding <italic>p</italic>-values in each cluster obtained by MSC-CSMC.</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Average negative logarithmic <italic>p</italic>-values of the three most significant GO terms for each of the six clusters on Yeast Sporulation with the number of constraints <italic>s</italic> &#x3d;15.</p>
</caption>
<graphic xlink:href="fgene-14-1135260-g007.tif"/>
</fig>
<table-wrap id="T7" position="float">
<label>TABLE 7</label>
<caption>
<p>The three most significant GO terms and the corresponding <italic>p</italic>-values for each of the six clusters obtained by MSC-CSMC on Yeast Sporulation.</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="center">Gene cluster</th>
<th align="center">GO term</th>
<th align="center">
<italic>p</italic>-value</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="center">1</td>
<td align="center">meiotic cell cycle (GO:0051321)</td>
<td align="center">1.42E-53</td>
</tr>
<tr>
<td align="left"/>
<td align="center">meiotic cell cycle process (GO:1903046)</td>
<td align="center">4.33E-51</td>
</tr>
<tr>
<td align="left"/>
<td align="center">peptide biosynthetic process (GO:004304)</td>
<td align="center">2.07E-48</td>
</tr>
<tr>
<td align="center">2</td>
<td align="center">sporulation (GO:0043934)</td>
<td align="center">2.17E-45</td>
</tr>
<tr>
<td align="left"/>
<td align="center">translation (GO:0006412)</td>
<td align="center">4.08E-44</td>
</tr>
<tr>
<td align="left"/>
<td align="center">sporulation resulting in formation of a cellular spore (GO:0030435)</td>
<td align="center">1.02E-40</td>
</tr>
<tr>
<td align="center">3</td>
<td align="center">meiotic cell cycle (GO:0051321)</td>
<td align="center">2.50E-30</td>
</tr>
<tr>
<td align="left"/>
<td align="center">meiotic nuclear division (GO:0140013)</td>
<td align="center">3.85E-28</td>
</tr>
<tr>
<td align="left"/>
<td align="center">nuclear division (GO:0000280)</td>
<td align="center">1.16E-26</td>
</tr>
<tr>
<td align="center">4</td>
<td align="center">cell cycle process (GO: 0022402)</td>
<td align="center">7.37E-23</td>
</tr>
<tr>
<td align="left"/>
<td align="center">cell cycle (GO: 0007049)</td>
<td align="center">3.67E-22</td>
</tr>
<tr>
<td align="left"/>
<td align="center">cell wall organization (GO: 0071555)</td>
<td align="center">3.46E-22</td>
</tr>
<tr>
<td align="center">5</td>
<td align="center">cell development (GO: 0048468)</td>
<td align="center">6.15E-20</td>
</tr>
<tr>
<td align="left"/>
<td align="center">ascospore formation (GO: 0030437)</td>
<td align="center">1.45E-19</td>
</tr>
<tr>
<td align="left"/>
<td align="center">anatomical structure development (GO: 0048856)</td>
<td align="center">3.53E-19</td>
</tr>
<tr>
<td align="center">6</td>
<td align="center">small molecule metabolic process (GO: 0044281)</td>
<td align="center">2.51E-11</td>
</tr>
<tr>
<td align="left"/>
<td align="center">amino-acid betaine metabolic process (GO: 0006577)</td>
<td align="center">3.16E-09</td>
</tr>
<tr>
<td align="left"/>
<td align="center">carnitine metabolic process (GO: 0009437)</td>
<td align="center">3.15E-09</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>From <xref ref-type="fig" rid="F7">Figure 7</xref>, it can be seen that the curve corresponding to MSC-CSMC is higher than those of the other algorithms, indicating that MSC-CSMC gains the result with the highest biological significance. Moreover, all the <italic>p</italic>-values of the significant GO terms listed in <xref ref-type="table" rid="T7">Table 7</xref> are far less than 0.01, indicating that the MSC-CSMC algorithm can identify biologically relevant gene clusters.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s4">
<title>4 Conclusion</title>
<p>Aiming at the problem that current semi-supervised clustering methods based on pairwise constraints are easily affected by noisy constraints and do not take the fusion of multi-source constraints into account, in this paper, we propose a multi-objective semi-supervised clustering algorithm based on constraints selection and multi-source constraints (MSC-CSMC). The proposed algorithm uses gene expression data and GO information to generate multi-source pairwise constraints and applies the multi-source constraints to the semi-supervised clustering process through improved constraint violation penalty weights. On this basis, a collaborative multi-objective optimization framework for constraints selection and cluster prototypes is constructed, and the negative impact of the noisy constraints is reduced by selecting pairwise constraints suitable for clustering. Experimental results on multiple gene expression datasets show that the MSC-CSMC algorithm effectively improves the performance of semi-supervised clustering. The validity of the proposed method proposed is not limited to the cluster analysis of gene expression data. Other semi-supervised clustering studies with multi-source information or constrained selection requirements can also be enlightened.</p>
<p>The effectiveness of the algorithm in this paper has been verified in small and medium-sized gene expression datasets. With the increase in the data size, the augment in the number of decision variables in the process of multi-objective evolution will lead to a decrease in algorithm efficiency and optimization performance. Therefore, the next step is to use decision variable analysis and other methods to design a multi-objective evolution strategy of the algorithm so as to further improve the applicability of the algorithm in practical clustering problems. In addition, we will also try to use various evaluation indices and design a multi-objective optimization framework with variable coding length (<xref ref-type="bibr" rid="B32">Rodr&#xed;guez-M&#xe9;ndez et al., 2019</xref>) to optimize the number of clusters for gene expression data.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article, further inquiries can be directed to the corresponding authors.</p>
</sec>
<sec id="s6">
<title>Author contributions</title>
<p>DL proposed the idea. ZW and MZ did the experiment. ZW, JW, and DL summarized the results and finished the manuscript. All authors proofread the manuscript.</p>
</sec>
<sec sec-type="COI-statement" id="s7">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s8">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abin</surname>
<given-names>A. A.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A random walk approach to query informative constraints for clustering</article-title>. <source>IEEE Trans. Cybern.</source> <volume>48</volume>, <fpage>2272</fpage>&#x2013;<lpage>2283</lpage>. <pub-id pub-id-type="doi">10.1109/TCYB.2017.2731868</pub-id>
</citation>
</ref>
<ref id="B2">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Abin</surname>
<given-names>A. A.</given-names>
</name>
<name>
<surname>Vu</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A density-based approach for querying informative constraints for clustering</article-title>. <source>Expert Syst. Appl.</source> <volume>161</volume>, <fpage>113690</fpage>. <pub-id pub-id-type="doi">10.1016/j.eswa.2020.113690</pub-id>
</citation>
</ref>
<ref id="B3">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Acharya</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Saha</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Pradhan</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Multi-factored gene-gene proximity measures exploiting biological knowledge extracted from gene ontology: Application in gene clustering</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>17</volume>, <fpage>207</fpage>&#x2013;<lpage>219</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2018.2849362</pub-id>
</citation>
</ref>
<ref id="B4">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ashburner</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Ball</surname>
<given-names>C. A.</given-names>
</name>
<name>
<surname>Blake</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Botstein</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Butler</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Cherry</surname>
<given-names>J. M.</given-names>
</name>
<etal/>
</person-group> (<year>2000</year>). <article-title>Gene ontology: Tool for the unification of biology. The gene ontology consortium</article-title>. <source>Nat. Genet.</source> <volume>25</volume>, <fpage>25</fpage>&#x2013;<lpage>29</lpage>. <pub-id pub-id-type="doi">10.1038/75556</pub-id>
</citation>
</ref>
<ref id="B5">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bandyopadhyay</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mukhopadhyay</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Maulik</surname>
<given-names>U.</given-names>
</name>
</person-group> (<year>2007</year>). <article-title>An improved algorithm for clustering gene expression data</article-title>. <source>Bioinformatics</source> <volume>23</volume>, <fpage>2859</fpage>&#x2013;<lpage>2865</lpage>. <pub-id pub-id-type="doi">10.1093/bioinformatics/btm418</pub-id>
</citation>
</ref>
<ref id="B6">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Basu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Banerjee</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Mooney</surname>
<given-names>R. J.</given-names>
</name>
</person-group> (<year>2004</year>). &#x201c;<article-title>Active semi-supervision for pairwise constrained clustering</article-title>,&#x201d; in <conf-name>Proceedings of the 2004 SIAM International Conference on Data Mining</conf-name> (<publisher-loc>Philadelphia, Pennsylvania</publisher-loc>: <publisher-name>SIAM</publisher-name>), <fpage>333</fpage>&#x2013;<lpage>344</lpage>. <pub-id pub-id-type="doi">10.1137/1.9781611972740.31</pub-id>
</citation>
</ref>
<ref id="B7">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bezdek</surname>
<given-names>J. C.</given-names>
</name>
<name>
<surname>Coray</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Gunderson</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Watson</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1981</year>). <article-title>Detection and characterization of cluster substructure i. linear structure: Fuzzy c-lines</article-title>. <source>SIAM J. Appl. Math.</source> <volume>40</volume>, <fpage>339</fpage>&#x2013;<lpage>357</lpage>. <pub-id pub-id-type="doi">10.1137/0140029</pub-id>
</citation>
</ref>
<ref id="B8">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Bilenko</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Basu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Mooney</surname>
<given-names>R. J.</given-names>
</name>
</person-group> (<year>2004</year>). &#x201c;<article-title>Integrating constraints and metric learning in semi-supervised clustering</article-title>,&#x201d; in <conf-name>Proceedings of the Twenty-First International Conference on Machine Learning</conf-name> (<publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>), <fpage>11</fpage>. <pub-id pub-id-type="doi">10.1145/1015330.1015360</pub-id>
</citation>
</ref>
<ref id="B9">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J. Z.</given-names>
</name>
<name>
<surname>Wu</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Subspace weighting co-clustering of gene expression data</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>16</volume>, <fpage>352</fpage>&#x2013;<lpage>364</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2017.2705686</pub-id>
</citation>
</ref>
<ref id="B10">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Cho</surname>
<given-names>R. J.</given-names>
</name>
<name>
<surname>Campbell</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Winzeler</surname>
<given-names>E. A.</given-names>
</name>
<name>
<surname>Steinmetz</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Conway</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Wodicka</surname>
<given-names>L.</given-names>
</name>
<etal/>
</person-group> (<year>1998</year>). <article-title>A genome-wide transcriptional analysis of the mitotic cell cycle</article-title>. <source>Mol. Cell</source> <volume>2</volume>, <fpage>65</fpage>&#x2013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.1016/s1097-2765(00)80114-8</pub-id>
</citation>
</ref>
<ref id="B11">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chu</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>DeRisi</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Eisen</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Mulholland</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Botstein</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Brown</surname>
<given-names>P. O.</given-names>
</name>
<etal/>
</person-group> (<year>1998</year>). <article-title>The transcriptional program of sporulation in budding yeast</article-title>. <source>Science</source> <volume>282</volume>, <fpage>699</fpage>&#x2013;<lpage>705</lpage>. <pub-id pub-id-type="doi">10.1126/science.282.5389.699</pub-id>
</citation>
</ref>
<ref id="B12">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Davidson</surname>
<given-names>I.</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2008</year>). &#x201c;<article-title>Finding alternative clusterings using constraints</article-title>,&#x201d; in <conf-name>2008 Eighth IEEE International Conference on Data Mining</conf-name> (<publisher-loc>Pisa, Italy</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>773</fpage>&#x2013;<lpage>778</lpage>. <pub-id pub-id-type="doi">10.1109/ICDM.2008.141</pub-id>
</citation>
</ref>
<ref id="B13">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deb</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Tiwari</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Omni-optimizer: A generic evolutionary algorithm for single and multi-objective optimization</article-title>. <source>Eur. J. Operational Res.</source> <volume>185</volume>, <fpage>1062</fpage>&#x2013;<lpage>1087</lpage>. <pub-id pub-id-type="doi">10.1016/j.ejor.2006.06.042</pub-id>
</citation>
</ref>
<ref id="B14">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>W.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>An ensemble strategy to predict prognosis in ovarian cancer based on gene modules</article-title>. <source>Front. Genet.</source> <volume>10</volume>, <fpage>366</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2019.00366</pub-id>
</citation>
</ref>
<ref id="B15">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gasch</surname>
<given-names>A. P.</given-names>
</name>
<name>
<surname>Eisen</surname>
<given-names>M. B.</given-names>
</name>
</person-group> (<year>2002</year>). <article-title>Exploring the conditional coregulation of yeast gene expression through fuzzy k-means clustering</article-title>. <source>Genome Biol.</source> <volume>3</volume>, <fpage>RESEARCH0059</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1186/gb-2002-3-11-research0059</pub-id>
</citation>
</ref>
<ref id="B16">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Giri</surname>
<given-names>S. J.</given-names>
</name>
<name>
<surname>Saha</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). &#x201c;<article-title>Multi-view gene clustering using gene ontology and expression-based similarities</article-title>,&#x201d; in <conf-name>2020 IEEE Congress on Evolutionary Computation (CEC)</conf-name> (<publisher-loc>Glasgow, UK</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1109/CEC48606.2020.9185885</pub-id>
</citation>
</ref>
<ref id="B17">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Grira</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Crucianu</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Boujemaa</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Active semi-supervised fuzzy clustering</article-title>. <source>Pattern Recognit.</source> <volume>41</volume>, <fpage>1834</fpage>&#x2013;<lpage>1844</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2007.10.004</pub-id>
</citation>
</ref>
<ref id="B18">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ideker</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Thorsson</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Ranish</surname>
<given-names>J. A.</given-names>
</name>
<name>
<surname>Christmas</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Buhler</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Eng</surname>
<given-names>J. K.</given-names>
</name>
<etal/>
</person-group> (<year>2001</year>). <article-title>Integrated genomic and proteomic analyses of a systematically perturbed metabolic network</article-title>. <source>Science</source> <volume>292</volume>, <fpage>929</fpage>&#x2013;<lpage>934</lpage>. <pub-id pub-id-type="doi">10.1126/science.292.5518.929</pub-id>
</citation>
</ref>
<ref id="B19">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Iyer</surname>
<given-names>V. R.</given-names>
</name>
<name>
<surname>Eisen</surname>
<given-names>M. B.</given-names>
</name>
<name>
<surname>Ross</surname>
<given-names>D. T.</given-names>
</name>
<name>
<surname>Schuler</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Moore</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>J. C.</given-names>
</name>
<etal/>
</person-group> (<year>1999</year>). <article-title>The transcriptional program in the response of human fibroblasts to serum</article-title>. <source>Science</source> <volume>283</volume>, <fpage>83</fpage>&#x2013;<lpage>87</lpage>. <pub-id pub-id-type="doi">10.1126/science.283.5398.83</pub-id>
</citation>
</ref>
<ref id="B20">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lai</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>He</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>An adaptive robust semi-supervised clustering framework using weighted consensus of random <italic>k</italic> k-means ensemble</article-title>. <source>IEEE Trans. Knowl. Data Eng.</source> <volume>33</volume>, <fpage>1877</fpage>&#x2013;<lpage>1890</lpage>.</citation>
</ref>
<ref id="B21">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Li</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Gu</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Qin</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>A joint optimization framework integrated with biological knowledge for clustering incomplete gene expression data</article-title>. <source>Soft Comput.</source> <volume>2022</volume>, <fpage>1</fpage>&#x2013;<lpage>18</lpage>. <pub-id pub-id-type="doi">10.1007/s00500-022-07180-y</pub-id>
</citation>
</ref>
<ref id="B22">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Isocell: An approach to enhance single cell clustering by integrating isoform-level expression through orthogonal projection</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>20</volume>, <fpage>1</fpage>&#x2013;<lpage>475</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2022.3147193</pub-id>
</citation>
</ref>
<ref id="B23">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Liu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Xiao</surname>
<given-names>Z.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Entropy-based active sparse subspace clustering</article-title>. <source>Multimedia Tools Appl.</source> <volume>77</volume>, <fpage>22281</fpage>&#x2013;<lpage>22297</lpage>. <pub-id pub-id-type="doi">10.1007/s11042-018-5945-1</pub-id>
</citation>
</ref>
<ref id="B24">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>L&#xf3;pez-Cort&#xe9;s</surname>
<given-names>X. A.</given-names>
</name>
<name>
<surname>Matamala</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Maldonado</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Mora-Poblete</surname>
<given-names>F.</given-names>
</name>
<name>
<surname>Scapim</surname>
<given-names>C. A.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>A deep learning approach to population structure inference in inbred lines of maize</article-title>. <source>Front. Genet.</source> <volume>11</volume>, <fpage>543459</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2020.543459</pub-id>
</citation>
</ref>
<ref id="B25">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Masud</surname>
<given-names>M. A.</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>J. Z.</given-names>
</name>
<name>
<surname>Zhong</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Fu</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Generate pairwise constraints from unlabeled data for semi-supervised clustering</article-title>. <source>Data and Knowl. Eng.</source> <volume>123</volume>, <fpage>101715</fpage>. <pub-id pub-id-type="doi">10.1016/j.datak.2019.101715</pub-id>
</citation>
</ref>
<ref id="B26">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Maulik</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Mukhopadhyay</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Bandyopadhyay</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>Combining pareto-optimal clusters using supervised learning for identifying co-expressed genes</article-title>. <source>BMC Bioinforma.</source> <volume>10</volume>, <fpage>27</fpage>&#x2013;<lpage>16</lpage>. <pub-id pub-id-type="doi">10.1186/1471-2105-10-27</pub-id>
</citation>
</ref>
<ref id="B27">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mei</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Semisupervised fuzzy clustering with partition information of subsets</article-title>. <source>IEEE Trans. Fuzzy Syst.</source> <volume>27</volume>, <fpage>1726</fpage>&#x2013;<lpage>1737</lpage>. <pub-id pub-id-type="doi">10.1109/tfuzz.2018.2889010</pub-id>
</citation>
</ref>
<ref id="B28">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Mukhopadhyay</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Maulik</surname>
<given-names>U.</given-names>
</name>
<name>
<surname>Bandyopadhyay</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>An interactive approach to multiobjective clustering of gene expression patterns</article-title>. <source>IEEE Trans. Biomed. Eng.</source> <volume>60</volume>, <fpage>35</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1109/TBME.2012.2220765</pub-id>
</citation>
</ref>
<ref id="B29">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Pirooznia</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>J. Y.</given-names>
</name>
<name>
<surname>Yang</surname>
<given-names>M. Q.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>A comparative study of different machine learning methods on microarray gene expression data</article-title>. <source>BMC Genomics</source> <volume>9</volume>, <fpage>S13</fpage>&#x2013;<lpage>S13</lpage>. <pub-id pub-id-type="doi">10.1186/1471-2164-9-S1-S13</pub-id>
</citation>
</ref>
<ref id="B30">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Reymond</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Weber</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Damond</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Farmer</surname>
<given-names>E. E.</given-names>
</name>
</person-group> (<year>2000</year>). <article-title>Differential gene expression in response to mechanical wounding and insect feeding in arabidopsis</article-title>. <source>Plant Cell</source> <volume>12</volume>, <fpage>707</fpage>&#x2013;<lpage>720</lpage>. <pub-id pub-id-type="doi">10.1105/tpc.12.5.707</pub-id>
</citation>
</ref>
<ref id="B31">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rodriguez</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Laio</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Machine learning. Clustering by fast search and find of density peaks</article-title>. <source>Science</source> <volume>344</volume>, <fpage>1492</fpage>&#x2013;<lpage>1496</lpage>. <pub-id pub-id-type="doi">10.1126/science.1242072</pub-id>
</citation>
</ref>
<ref id="B32">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rodr&#xed;guez-M&#xe9;ndez</surname>
<given-names>I. A.</given-names>
</name>
<name>
<surname>Ure&#xf1;a</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Herrera-Viedma</surname>
<given-names>E.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Fuzzy clustering approach for brain tumor tissue segmentation in magnetic resonance images</article-title>. <source>Soft Comput.</source> <volume>23</volume>, <fpage>10105</fpage>&#x2013;<lpage>10117</lpage>. <pub-id pub-id-type="doi">10.1007/s00500-018-3565-3</pub-id>
</citation>
</ref>
<ref id="B33">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rousseeuw</surname>
<given-names>P. J.</given-names>
</name>
</person-group> (<year>1987</year>). <article-title>Silhouettes: A graphical aid to the interpretation and validation of cluster analysis</article-title>. <source>J. Comput. Appl. Math.</source> <volume>20</volume>, <fpage>53</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1016/0377-0427(87)90125-7</pub-id>
</citation>
</ref>
<ref id="B34">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Saha</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bandyopadhyay</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>A generalized automatic clustering algorithm in a multiobjective framework</article-title>. <source>Appl. Soft Comput.</source> <volume>13</volume>, <fpage>89</fpage>&#x2013;<lpage>108</lpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2012.08.005</pub-id>
</citation>
</ref>
<ref id="B35">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Song</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Srimani</surname>
<given-names>P. K.</given-names>
</name>
<name>
<surname>Philip</surname>
<given-names>S. Y.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J. Z.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>Measure the semantic similarity of go terms using aggregate information content</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>11</volume>, <fpage>468</fpage>&#x2013;<lpage>476</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2013.176</pub-id>
</citation>
</ref>
<ref id="B36">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vu</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Labroche</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Bouchon-Meunier</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2012</year>). <article-title>Improving constrained clustering with active query selection</article-title>. <source>Pattern Recognit.</source> <volume>45</volume>, <fpage>1749</fpage>&#x2013;<lpage>1758</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2011.10.016</pub-id>
</citation>
</ref>
<ref id="B37">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Wagstaff</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Cardie</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Rogers</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Schr&#xf6;dl</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2001</year>). &#x201c;<article-title>Constrained k-means clustering with background knowledge</article-title>,&#x201d; in <conf-name>Proceedings of the Eighteenth International Conference on Machine Learning</conf-name> (<publisher-loc>Burlington, MA, USA</publisher-loc>: <publisher-name>Morgan Kaufmann Publishers Inc.</publisher-name>), <fpage>577</fpage>&#x2013;<lpage>584</lpage>. <pub-id pub-id-type="doi">10.5555/645530.655669</pub-id>
</citation>
</ref>
<ref id="B38">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wu</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Ma</surname>
<given-names>X.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Network-based structural learning nonnegative matrix factorization algorithm for clustering of scrna-seq data</article-title>. <source>IEEE/ACM Trans. Comput. Biol. Bioinforma.</source> <volume>20</volume>, <fpage>1</fpage>&#x2013;<lpage>575</lpage>. <pub-id pub-id-type="doi">10.1109/TCBB.2022.3161131</pub-id>
</citation>
</ref>
<ref id="B39">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Xie</surname>
<given-names>X. L.</given-names>
</name>
<name>
<surname>Beni</surname>
<given-names>G.</given-names>
</name>
</person-group> (<year>1991</year>). <article-title>A validity measure for fuzzy clustering</article-title>. <source>IEEE Trans. Pattern Analysis Mach. Intell.</source> <volume>13</volume>, <fpage>841</fpage>&#x2013;<lpage>847</lpage>. <pub-id pub-id-type="doi">10.1109/34.85677</pub-id>
</citation>
</ref>
<ref id="B40">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yin</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hu</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Semi-supervised clustering with metric learning: An adaptive kernel method</article-title>. <source>Pattern Recognit.</source> <volume>43</volume>, <fpage>1320</fpage>&#x2013;<lpage>1333</lpage>. <pub-id pub-id-type="doi">10.1016/j.patcog.2009.11.005</pub-id>
</citation>
</ref>
<ref id="B41">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Yu</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wong</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>You</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Han</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<year>2018</year>). <article-title>Semi-supervised ensemble clustering based on selected constraint projection</article-title>. <source>IEEE Trans. Knowl. Data Eng.</source> <volume>30</volume>, <fpage>2394</fpage>&#x2013;<lpage>2407</lpage>. <pub-id pub-id-type="doi">10.1109/tkde.2018.2818729</pub-id>
</citation>
</ref>
<ref id="B42">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Peng</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Yan</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Multigatae: A novel cancer subtype identification method based on multi-omics and attention mechanism</article-title>. <source>Front. Genet.</source> <volume>13</volume>, <fpage>855629</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2022.855629</pub-id>
</citation>
</ref>
<ref id="B43">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhang</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Luo</surname>
<given-names>W. J.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>X. F.</given-names>
</name>
</person-group> (<year>2009</year>). <article-title>A normal distribution crossover for epsilon-moea</article-title>. <source>J. Softw.</source> <volume>20</volume>, <fpage>305</fpage>&#x2013;<lpage>314</lpage>. <pub-id pub-id-type="doi">10.3724/sp.j.1001.2009.00305</pub-id>
</citation>
</ref>
<ref id="B44">
<citation citation-type="confproc">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2022</year>). &#x201c;<article-title>Multi-objective semi-supervised clustering algorithm based on constraint set optimization for gene expression data</article-title>,&#x201d; in <conf-name>2022 41st Chinese Control Conference (CCC)</conf-name> (<publisher-loc>Hefei, China</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>6570</fpage>&#x2013;<lpage>6575</lpage>. <pub-id pub-id-type="doi">10.23919/CCC55666.2022.9902131</pub-id>
</citation>
</ref>
<ref id="B45">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhao</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Deng</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Rfcell: A gene selection approach for scrna-seq clustering based on permutation and random forest</article-title>. <source>Front. Genet.</source> <volume>27</volume>, <fpage>665843</fpage>. <pub-id pub-id-type="doi">10.3389/fgene.2021.665843</pub-id>
</citation>
</ref>
<ref id="B46">
<citation citation-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zhou</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Zhu</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Kernel-based multiobjective clustering algorithm with automatic attribute weighting</article-title>. <source>Soft Comput.</source> <volume>22</volume>, <fpage>3685</fpage>&#x2013;<lpage>3709</lpage>. <pub-id pub-id-type="doi">10.1007/s00500-017-2590-y</pub-id>
</citation>
</ref>
</ref-list>
</back>
</article>