<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="editorial" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Plant Sci.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Plant Science</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Plant Sci.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1664-462X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpls.2026.1795967</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Editorial</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Editorial: Machine learning for mining plant functional genes</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Sun</surname><given-names>Shanwen</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/882776/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Zou</surname><given-names>Quan</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/531759/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Dou</surname><given-names>Lijun</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>*</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/963027/overview"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &amp; editing</role>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>College of Life Science, Northeast Forestry University</institution>, <city>Harbin</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>Institute of Fundamental and Frontier Sciences, University of Electronic Science and Technology of China</institution>, <city>Chengdu</city>,&#xa0;<country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Cleveland Clinic Research, Cleveland Clinic</institution>, <city>Cleveland</city>, <state>OH</state>,&#xa0;<country country="us">United States</country></aff>
<author-notes>
<corresp id="c001"><label>*</label>Correspondence: Lijun Dou, <email xlink:href="mailto:doulijun777@163.com">doulijun777@163.com</email>; Quan Zou, <email xlink:href="mailto:zouquan@nclab.net">zouquan@nclab.net</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-25">
<day>25</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>17</volume>
<elocation-id>1795967</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>16</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>11</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Sun, Zou and Dou.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Sun, Zou and Dou</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-25">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<kwd-group>
<kwd>large language models</kwd>
<kwd>machine learning</kwd>
<kwd>multi-omics integration</kwd>
<kwd>plant functional genomics</kwd>
<kwd>regulatory network inference</kwd>
</kwd-group>
<funding-group>
<award-group id="gs1">
<funding-source id="sp1">
<institution-wrap>
<institution>Natural Science Foundation of Heilongjiang Province</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100005046</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<award-group id="gs2">
<funding-source id="sp2">
<institution-wrap>
<institution>National Natural Science Foundation of China</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100001809</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<award-group id="gs3">
<funding-source id="sp3">
<institution-wrap>
<institution>China Postdoctoral Science Foundation</institution>
<institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open_funder_registry">10.13039/501100002858</institution-id>
</institution-wrap>
</funding-source>
</award-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by Natural Science Foundation of Heilongjiang Province (YQ2024F002); the National Natural Science Foundation of China (62273086); and China Postdoctoral Science Foundation (2024M760379).</funding-statement>
</funding-group>
<counts>
<fig-count count="0"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="4"/>
<page-count count="3"/>
<word-count count="924"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Functional and Applied Plant Genomics</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
<notes notes-type="frontiers-research-topic">
<p>Editorial on the Research Topic <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/research-topics/68930/machine-learning-for-mining-plant-functional-genes/articles">Machine learning for mining plant functional genes</ext-link>
</p>
</notes>
</front>
<body>
<p>Plants in natural and agricultural environments are continuously challenged by diverse biotic and abiotic stresses, placing sustained pressure on crop productivity and global food security. The identification and functional characterization of plant genes underlying stress adaptation, development, and agronomic traits are therefore central to modern plant biology and precision breeding (<xref ref-type="bibr" rid="B1">Gaccione et&#xa0;al., 2025</xref>; <xref ref-type="bibr" rid="B4">Zhang et&#xa0;al., 2025</xref>). Although advances in high-throughput sequencing have generated vast genomic resources, experimentally validated functional annotations remain limited, leaving a substantial fraction of plant genes poorly characterized. A central challenge in contemporary plant genomics is thus to bridge the gap between rapidly expanding data volumes and biologically meaningful functional insights (<xref ref-type="bibr" rid="B3">Zhang et&#xa0;al., 2025</xref>). Recent progress in machine learning (ML) has created unprecedented opportunities to address this challenge. By integrating heterogeneous data types, spanning genome sequences, epigenomic marks, transcriptomic profiles, protein features, metabolite measurements, and regulatory interactions, ML-based approaches can model complex, nonlinear relationships that are difficult to resolve using conventional analytical frameworks (<xref ref-type="bibr" rid="B2">Sasse et&#xa0;al., 2024</xref>). Within this context, the Research Topic Machine Learning for Mining Plant Functional Genes brings together eight studies that collectively demonstrate how data-driven computational strategies are reshaping functional gene discovery, regulatory analysis, and trait dissection in plants.</p>
<p>A critical methodological shift in recent years is the emergence of foundation models (FMs) and large language models (LLMs) for biological sequence analysis. By conceptualizing DNA as a structured language, these models leverage large-scale pretraining to generate transferable representations that capture latent regulatory and functional features. Compared with task-specific models, FMs offer enhanced generalization, cross-species transferability, and scalability, making them particularly attractive for plant systems characterized by genomic complexity and limited functional annotations. Providing a systematic overview of this paradigm, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fpls.2025.1611992">Xu et&#xa0;al.</ext-link> present a mini-review synthesizing recent advances in foundation models for plant molecular biology. The review traces the evolution from general DNA language models to plant-specific tools and highlights key challenges unique to plant systems, including polyploidy, repetitive genomes, and sparse experimental annotations. By outlining future directions such as multimodal integration and computational efficiency, this work establishes a conceptual framework for understanding how FMs are redefining computational plant biology and guiding next-generation model development.</p>
<p>Building on this FM paradigm, several contributions demonstrate how representation learning can be applied to concrete biological problems. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fpls.2025.1626539">Zhang et&#xa0;al.</ext-link> applied a DNABERT-2&#x2013;based framework combined with gradient boosting to identify DNA N6-methyladenine modifications in rice, illustrating how foundation models can enhance epigenetic marker detection while mitigating data sparsity. This work exemplifies a broader shift toward pretraining-based strategies in plant genomics, with implications for cross-species prediction and regulatory annotation.</p>
<p>Extending LLM-based approaches to cis-regulatory element discovery, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fpls.2025.1618174">Pu et&#xa0;al.</ext-link> developed an enhancer identification framework that couples DNABERT-2 feature extraction with a support vector machine classifier. Beyond predictive performance, this study addresses the interpretability challenge inherent to deep learning by introducing a differential entropy&#x2013;based analysis to monitor class separation during fine-tuning. The results provide theoretical insight into training dynamics and offer a principled strategy for model optimization, highlighting the growing emphasis on transparency and interpretability in genomic ML.</p>
<p>In parallel with advances in representation learning, architectural innovation is driving progress in modeling gene expression and regulatory complexity. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fpls.2025.1718258">Guo et&#xa0;al.</ext-link> proposed a hybrid framework that integrates Transformer-based global attention with state space models to efficiently capture both long-range dependencies and local regulatory motifs. Validated across multiple crop species, this approach demonstrates improved accuracy and generalization over conventional convolutional architectures, underscoring the importance of model design choices in decoding plant regulatory syntax.</p>
<p>Beyond sequence-centric modeling, graph-based learning emerges as a powerful strategy for uncovering regulatory interactions mediated by non-coding RNAs. Addressing the sparsity and noise characteristic of experimental interaction data, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fpls.2025.1617495">Liao et&#xa0;al.</ext-link> introduced an interpretable graph representation learning framework for predicting plant RNA&#x2013;RNA interactions. By combining robustness-enhancing masking strategies with biologically interpretable decoding, this work advances the analysis of post-transcriptional regulatory networks, particularly in the context of stress responses.</p>
<p>Several studies focus directly on agriculturally relevant traits and stress adaptation. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fpls.2025.1629794">Qiao et&#xa0;al.</ext-link> developed a machine learning framework for identifying saline&#x2013;alkali tolerance genes, explicitly addressing data imbalance and sequence divergence through cost-sensitive learning and evolutionary feature extraction. Importantly, their analysis links predictive features to known physiological mechanisms, illustrating how ML can generate biologically interpretable insights rather than purely statistical predictions. Complementing this gene-centric perspective, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fpls.2025.1659345">Saavedra et&#xa0;al.</ext-link> applied interpretable ML to whole-genome methylation data to resolve dormancy stages in sweet cherry. By integrating ensemble learning with feature attribution analysis, the study identifies epigenetic markers associated with developmental transitions and agronomic traits, demonstrating the potential of ML-derived biomarkers for crop management.</p>
<p>Finally, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fpls.2025.1601899">Ju et&#xa0;al.</ext-link> exemplify the value of multi-omics integration by combining genome-wide association analysis with transcriptomic and metabolomic data to dissect seed germination mechanisms in sorghum. By linking genetic variation to hormone signaling and metabolic flux, this systems-level approach moves beyond association toward mechanistic understanding, reinforcing the importance of integrative frameworks in functional gene discovery.</p>
<p>Taken together, the contributions in this Research Topic highlight the transformative role of machine learning and foundation models in plant functional genomics. By advancing representation learning, model architecture, interpretability, and multi-omics integration, these studies move the field beyond traditional sequence-based annotation toward predictive, mechanism-aware, and application-oriented frameworks. Continued synergy between computational innovation and experimental validation will be essential for translating these advances into resilient, high-yield crops capable of meeting future agricultural and environmental challenges.</p>
</body>
<back>
<sec id="s1" sec-type="author-contributions">
<title>Author contributions</title>
<p>SS: Writing &#x2013; original draft, Writing &#x2013; review &amp; editing. QZ: Writing &#x2013; review &amp; editing. LD: Writing &#x2013; review &amp; editing.</p></sec>
<sec id="s3" sec-type="COI-statement">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p></sec>
<sec id="s4" sec-type="ai-statement">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec id="s5" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p></sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Gaccione</surname> <given-names>L.</given-names></name>
<name><surname>Toppino</surname> <given-names>L.</given-names></name>
<name><surname>Bolger</surname> <given-names>M.</given-names></name>
<name><surname>Schmidt</surname> <given-names>M.</given-names></name>
<name><surname>Tassone</surname> <given-names>M. R.</given-names></name>
<name><surname>Sulli</surname> <given-names>M.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Graph-based pangenomes and pan-phenome provide a cornerstone for eggplant biology and breeding</article-title>. <source>Nat. Commun.</source> <volume>16</volume>, <fpage>9919</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41467-025-64866-1</pub-id>, PMID: <pub-id pub-id-type="pmid">41219210</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Sasse</surname> <given-names>A.</given-names></name>
<name><surname>Chikina</surname> <given-names>M.</given-names></name>
<name><surname>Mostafavi</surname> <given-names>S.</given-names></name>
</person-group> (<year>2024</year>). 
<article-title>Unlocking gene regulation with sequence-to-function models</article-title>. <source>Nat. Methods</source> <volume>21</volume>, <fpage>1374</fpage>&#x2013;<lpage>1377</lpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1038/s41592-024-02331-5</pub-id>, PMID: <pub-id pub-id-type="pmid">39122947</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>S.</given-names></name>
<name><surname>Chen</surname> <given-names>S.</given-names></name>
<name><surname>Fu</surname> <given-names>Z.</given-names></name>
<name><surname>Li</surname> <given-names>F.</given-names></name>
<name><surname>Chen</surname> <given-names>Q.</given-names></name>
<name><surname>Ma</surname> <given-names>J.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Integration of digital phenotyping, GWAS, and transcriptomic analysis revealed a key gene for bud size in tea plant (<italic>Camellia sinensis</italic>)</article-title>. <source>Horticulture Res.</source> <volume>12</volume>, <fpage>uhaf051</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1093/hr/uhaf051</pub-id>, PMID: <pub-id pub-id-type="pmid">40271457</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name><surname>Zhang</surname> <given-names>H.</given-names></name>
<name><surname>Liu</surname> <given-names>X.</given-names></name>
<name><surname>Liu</surname> <given-names>W.</given-names></name>
<name><surname>Wang</surname> <given-names>S.</given-names></name>
<name><surname>Li</surname> <given-names>Y.</given-names></name>
<name><surname>Xiang</surname> <given-names>W.</given-names></name>
<etal/>
</person-group>. (<year>2025</year>). 
<article-title>Deep learning-based annotation of plant abiotic stress resistance genes for crops</article-title>. <source>Plant J.</source> <volume>124</volume>, <fpage>e70556</fpage>. doi:&#xa0;<pub-id pub-id-type="doi">10.1111/tpj.70556</pub-id>, PMID: <pub-id pub-id-type="pmid">41194493</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn id="n1" fn-type="custom" custom-type="edited-by">
<p>Edited and reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1875967"> Jiedan Chen</ext-link>, Chinese Academy of Agricultural Sciences, China</p></fn>
</fn-group>
</back>
</article>