<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" dtd-version="1.3" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Big Data</journal-id>
<journal-title-group>
<journal-title>Frontiers in Big Data</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Big Data</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2624-909X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fdata.2026.1779935</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>GFTrans: an on-the-fly static analysis framework for code performance profiling</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Li</surname> <given-names>Jie</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Wen</surname> <given-names>Yunbao</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/3327232"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Liu</surname> <given-names>Jingxin</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/2943752"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Zeng</surname> <given-names>Biqing</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
</contrib>
<contrib contrib-type="author">
<name><surname>Mirjalili</surname> <given-names>Seyedali</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &amp; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing &#x2013; review &#x00026; editing</role>
<uri xlink:href="https://loop.frontiersin.org/people/651029"/>
</contrib>
</contrib-group>
<aff id="aff1"><label>1</label><institution>School of Artificial Intelligence, South China Normal University</institution>, <city>Foshan</city>, <country country="cn">China</country></aff>
<aff id="aff2"><label>2</label><institution>School of Mathematics and Physics Sciences, RI-IM&#x000B7;AI*, Chongqing University of Science and Technology</institution>, <city>Chongqing</city>, <country country="cn">China</country></aff>
<aff id="aff3"><label>3</label><institution>Center for Artificial Intelligence Research and Optimization, Torrens University Australia</institution>, <city>Brisbane, QLD</city>, <country country="au">Australia</country></aff>
<author-notes>
<corresp id="c001"><label>&#x0002A;</label>Correspondence: Yunbao Wen, <email xlink:href="mailto:2023024289@m.scnu.edu.cn">2023024289@m.scnu.edu.cn</email></corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-02-27">
<day>27</day>
<month>02</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>9</volume>
<elocation-id>1779935</elocation-id>
<history>
<date date-type="received">
<day>03</day>
<month>01</month>
<year>2026</year>
</date>
<date date-type="rev-recd">
<day>06</day>
<month>02</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>11</day>
<month>02</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2026 Li, Wen, Liu, Zeng and Mirjalili.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Li, Wen, Liu, Zeng and Mirjalili</copyright-holder>
<license>
<ali:license_ref start_date="2026-02-27">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>Improving software efficiency is crucial for maintenance, but pinpointing runtime bottlenecks becomes increasingly difficult as systems expand. Traditional dynamic profiling tools require full build-execution cycles, creating significant latency that impedes agile development. To address this, we introduce GFTrans, a static analysis framework that predicts c program performance without execution. GFTrans utilizes a Transformer architecture with a novel &#x0201C;anchor-based embedding&#x0201D; technique to integrate control flow and data dependencies into a unified sequence. Additionally, a dynamic gating mechanism fuses these semantic representations with 16 handcrafted statistical features to comprehensively capture code complexity. Evaluated on a dataset of real-world GitHub c functions with high-precision runtime labels, GFTrans outperforms baseline models like Random Forest and Code2Vec, achieving 78.64% accuracy. The system identifies potential bottlenecks in milliseconds, enabling developers to perform optimization effectively during the coding phase.</p></abstract>
<kwd-group>
<kwd>code representation learning</kwd>
<kwd>control flow and data flow</kwd>
<kwd>graph linearization</kwd>
<kwd>on-the-fly profiling</kwd>
<kwd>performance prediction</kwd>
<kwd>static analysis</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. The work was supported in part by the Joint Fund for Basic and Applied Basic Research in Guangdong Province under Grant No. 2024A1515110008, in part by the South China Normal University Teacher Research, and Cultivation Fund under Grant No. KJF120240001.</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="7"/>
<equation-count count="13"/>
<ref-count count="23"/>
<page-count count="16"/>
<word-count count="11129"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Data Mining and Management</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="introduction" id="s1">
<label>1</label>
<title>Introduction</title>
<p>As software engineering is advancing rapidly, software systems have a strong impact on both user satisfaction and ongoing maintenance costs (<xref ref-type="bibr" rid="B11">Jafari Navimipour and Soltani, 2016</xref>). As software applications grow larger in size, it becomes more difficult for programmers to improve performance. Many programmers cannot easily pinpoint the cause of runtime slowdowns. Their dependence on latency-laden loops, inefficient memory access, and I/O (input/output) operations (<xref ref-type="bibr" rid="B12">Laaber et al., 2021</xref>) that block the line of code in a large software system makes this task harder. Traditionally, performance concerns are detected by the performer&#x00027;s analytical methods. Some tools, including Intel VTune (Intel, Santa Clara, CA, USA) and Gprof, monitor the runtime behavior of a particular application. These tools accurately determine where performance-related bottlenecks may exist. All of these methods require an ex post facto analysis.</p>
<p>To generate performance measurements, the developer must complete the full coding and building cycle. This includes creating detailed test cases, compiling the software, and running it in a representative sample environment. This type of &#x0201C;dynamic&#x0201D; analysis is an impediment to agile software development (<xref ref-type="bibr" rid="B2">Altuwaijri and Ferrario, 2022</xref>). When programmers have to interrupt their programming workflow to complete the compilation process, maintain a test environment, collect and analyze profiling data, and then return to their editor to make code changes, it results in wasted time and resources. This process limits the programmer&#x00027;s ability to receive timely feedback on performance issues. As a result, performance concerns are often not identified until testing or after deployment, resulting in expensive fixes. This problem has created an urgent need to develop and deploy On-The-Fly Performance Profiling (OTP). More and more studies have proposed code analysis frameworks for real-time analysis (<xref ref-type="bibr" rid="B3">Biringa and Kul, 2024</xref>). OTP enables programmers to predict the execution time of the code they are writing. It helps identify possible performance bottlenecks during code writing, without requiring the code to be run.</p>
<p>Predicting the complexity of a program&#x00027;s execution time based on its static source code attributes is a highly complex issue. The execution time of a program depends on both its control and data dependencies. The Control Flow Graph (CFG) and the Data Flow Graph (DFG) represent the performance characteristics of source code. These graphical representations illustrate the program execution paths, including branches and loops for CFG, and variable lifetimes and data dependency mechanisms for DFG (<xref ref-type="bibr" rid="B4">Cummins et al., 2021</xref>). Recent studies (<xref ref-type="bibr" rid="B13">Ma et al., 2023</xref>; <xref ref-type="bibr" rid="B23">Zou et al., 2025</xref>) find that the source code&#x00027;s graph structure can better capture the long-range relationships among a program&#x00027;s data. However, using graph network models to learn these structures often leads to significant model training costs (<xref ref-type="bibr" rid="B10">Huang et al., 2024</xref>). This high cost arises from the recursive message-passing mechanism on irregular graph structures. Unlike standard matrix operations, GNNs must iteratively aggregate features from neighbors, a process that is difficult to fully parallelize. Consequently, computational overhead grows exponentially as the code&#x00027;s complexity increases.</p>
<p>To address the aforementioned critical issue, a static analysis framework named Graph and Feature Transformer for Code Execution Time Prediction (GFTrans) is proposed to accurately predict the runtime duration categories of C programs. It combines semantic information from CFG with manually created code features. We developed an &#x0201C;anchor-based embedding&#x0201D; technique that merges CFG and DFG data from the C program into one linear sequence using explicit anchor tags. This unified structure enables Transformer-style architectures to utilize self-attention mechanisms, capturing both the execution flow and data dependencies within the program. Our self-created dataset categorizes codes into four types of execution time labels (from extremely short to long), and uses the GRTans model for predicting. Experimental results demonstrate that our model outperforms a series of classic baselines in terms of accuracy. Furthermore, we developed a code performance bottleneck detection system by embedding the trained model as the backend inference engine, enabling a &#x0201C;On-The-Fly&#x0201D; workflow for developers.</p>
<p>In a nutshell, the contributions of this research are as follows:</p>
<list list-type="bullet">
<list-item><p>We propose a static analysis framework named Graph and Feature Transformer for Code Execution Time Prediction (GFTrans) to accurately predict the runtime duration categories of C programs.</p></list-item>
<list-item><p>We developed an &#x0201D;anchor-based embedding&#x0201D; technique that merges CFG and DFG data from the C program into one linear sequence using explicit anchor tags. This unified structure enables Transformer-style architectures to utilize self-attention mechanisms, capturing both the execution flow and data dependencies within the program.</p></list-item>
<list-item><p>We constructed a large-scale dataset derived from industrial-grade open-source projects. Extensive comparative experiments demonstrate that GFTrans achieves a better accuracy than a series of popular method. Our model attains 78.64% accuracy, exceeding the best baseline performance by 2.5 and 2.3%, respectively.</p></list-item>
</list>
<p>The rest of this paper is organized as follows. Section 2 reviews the related work on code performance prediction and graph representation of code. Section 3 presents the design of our GFTrans framework. Section 4 discusses the evaluation of our methodology, including the construction of our high-precision C function benchmark dataset, the experimental setup, baseline models for comparison, and evaluation results. Section 5 discusses the internal mechanisms of the model, its limitations, and use cases in IDEs. Finally, Section 6 concludes this paper and outlines future research plans.</p></sec>
<sec id="s2">
<label>2</label>
<title>Related work</title>
<p>This study focuses primarily on two different areas: code performance prediction and assessment, learning graph-based code representations. In the following sections, examples of the work that has been carried out within these two areas are examined, including how this study differs and advances upon what has already been done.</p>
<sec>
<label>2.1</label>
<title>Code performance prediction and assessment</title>
<p>Traditionally, the analysis of code performance has been based on profiling techniques that rely on tools such as Gprof (<xref ref-type="bibr" rid="B5">Graham et al., 1982</xref>) and Intel VTune (Intel, Santa Clara, CA, USA) (<xref ref-type="bibr" rid="B15">Reinders, 2005</xref>). These toolsets rely on a full build-execution cycle to provide accurate performance data. This process is inherently lengthy and hinders the agile &#x0201C;write and test&#x0201D; workflow. However, nowadays, numerous researchers have investigated methods for developing predictive models of code performance utilizing data-driven learning techniques. For example, <xref ref-type="bibr" rid="B14">Mendis et al. (2019)</xref> described the first framework for predicting code execution throughput based on the basic blocks used in code execution, utilizing LSTMs to represent the throughput value for each basic block. The results of this research indicate that deep learning techniques are superior to more traditional models, such as LLVM-MCA, when it comes to simulating the efficiency of executing CPU instructions. However, Ithemal limits its predictions to the assembly code level, and therefore, cannot effectively capture more complex control flow structures that are present in higher-level languages. At the system and software levels, <xref ref-type="bibr" rid="B20">Weber et al. (2021)</xref> proposed a model architected to estimate the performance of software systems based on a combination of lightweight dynamic profiling data and structural features of the software code itself, hence the &#x0201C;white-box&#x0201D; nature of the model. This study concludes that exploring the internal structures of the code yields better performance predictions compared to using only black-box learning techniques. DeepPerf (<xref ref-type="bibr" rid="B8">Ha and Zhang, 2019</xref>) incorporated a deep sparse neural network model into its design to mitigate sparsity challenges in high-dimensional configuration space. Perf-AL (<xref ref-type="bibr" rid="B16">Shu et al., 2020</xref>) utilized adversarial learning techniques to augment the robustness of performance prediction models by utilizing models based on adversarial learning methodologies. <xref ref-type="bibr" rid="B21">Zheng et al. (2021)</xref> created TenSet as a means of predicting the execution time of underlying operators by training XGBoost and GNN on large-scale tensor program datasets. <xref ref-type="bibr" rid="B17">Sikka et al. (2020)</xref> demonstrated that the depth of nested loops and the number of branches in the code structure are important indicators of the computational efficiency of a code base.</p>
<p>Each of these approaches either emphasizes assembly or operator-based operations too much, or depends on dynamic runtime profiling data. GFTrans provides a benefit because of its ability to make static predictions. We only perform inference based on the source code graph structure and code characteristics, without compiling or executing the code. In this way, GFTrans provides millisecond-level feedback, filling the void created by the lack of a method for real-time performance analysis during software development.</p></sec>
<sec>
<label>2.2</label>
<title>Learning graph-based code representations</title>
<p>Source code naturally possesses a graph structure. Common representations include Abstract Syntax Trees (AST), Control Flow Graphs (CFG), and Data Flow Graphs (DFG). The main challenge lies in efficiently encoding these structural features into neural networks.</p>
<p>Early research, such as Code2Vec (<xref ref-type="bibr" rid="B1">Alon et al., 2019</xref>), focused on the &#x0201C;tree structure&#x0201D; of code. This work demonstrated that identifying pathways within the code is crucial for learning its semantics. <xref ref-type="bibr" rid="B22">Zhou et al. (2019)</xref> introduced the Devign. that combines AST, CFG, and DFG into a unified graph and uses Graph Neural Networks (GNNs) to detect vulnerabilities.</p>
<p>However, standard graph models are often incompatible with pre-trained models like BERT. To bridge this gap, <xref ref-type="bibr" rid="B7">Guo et al. (2020)</xref> introduced GraphCodeBERT, the first model to incorporate Data Flow Graph (DFG) structures during pre-training, thereby enhancing variable alignment. Building on the need to capture data flow semantics, <xref ref-type="bibr" rid="B19">Wang et al. (2021)</xref> developed CodeT5, which employs identifier-aware pre-training tasks. Additionally, to address syntactic structures, <xref ref-type="bibr" rid="B6">Guo et al. (2022)</xref> proposed UniXcoder, which effectively linearizes Abstract Syntax Trees (AST) into unified sequences.</p>
<p>Our GFTrans differs from these approaches. Unlike Devign, which relies on computationally heavy GNNs, GFTrans utilizes the efficient Transformer architecture. We draw inspiration from UniXcoder but specifically focus on the CFG to represent execution behavior. Crucially, we introduced &#x0201C;anchor tags&#x0201D; to integrate DFG data. These tags align data dependencies with the CFG execution pathway. This design allows the model to capture dynamic execution logic efficiently.</p></sec></sec>
<sec id="s3">
<label>3</label>
<title>Methods</title>
<p>In this section, we design the GFTrans architecture (Graph and Feature Transformer for Code Execution Time Prediction) as presented in <xref ref-type="fig" rid="F1">Figure 1</xref>. The GFTrans framework consists of three main phases: Data Processing and Representation, Model Training, and Application.</p>
<fig position="float" id="F1">
<label>Figure 1</label>
<caption><p>The overall architecture of GFTrans.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-09-1779935-g0001.tif">
<alt-text content-type="machine-generated">Flowchart illustrating a three-phase pipeline for source code profiling: Phase one covers data processing, including graph linearization, anchor embedding, and feature engineering from source code; phase two details model training using attention mechanisms, MLP projection, and a dynamic gating unit to produce predictions; phase three demonstrates real-time application, where new code is analyzed by a trained engine to provide on-the-fly profiling and bottleneck detection.</alt-text>
</graphic>
</fig>
<p><bold>Phase 1: data processing and representation</bold>. We utilize c source code using two types of graphs: Control Flow Graphs (CFG) and Data Flow Graphs (DFG). we extract the execution logics paths from CFG. We then incorporate data dependencies from DFG into these execution paths by embedding anchor markers directly within the sequence of code statements. However, a solely path-based method of obtaining deep semantic features may not provide adequate information to capture specific, explicit structural metrics, which can significantly effects on execution time, including the number of loops nested and the frequency of types of operations executed. Therefore, We introduce a 16-dimensional handcrafted feature vector to capture keywords and execution structures. These features work alongside the CFG and DFG to enhance the model&#x00027;s semantic understanding.</p>
<p><bold>Phase 2: model training</bold>. We firstly encode the serialized CFG and DFG using a Transformer Encoder and fuse multiple representation vectors through the attention mechanism to obtain our deep semantic representation vectors. Then, we also implement a Dynamic Gating Fusion Mechanism, which fuse the deep semantic feature vector generated by the Transformer with explicit handcrafted code feature vectors, fully utilizing their different contributions dynamically based on the input code characteristics. Finally, we predict the execution time level (extremely short, short, medium, or long) for the code using fully connected layers.</p>
<p><bold>Phase 3: application</bold>. The output of the trained GFTrans accept new C source input and predict the execution time level in real-time, allowing developers to quickly identify performance issues occurring in their code.</p>
<sec>
<label>3.1</label>
<title>Data processing and representation</title>
<p>We elaborate on the details of the code data processing and representation stages from two aspects: Graph Linearization and Anchor Embedding and Features Engineering.</p>
<sec>
<label>3.1.1</label>
<title>Graph linearization and anchor embedding</title>
<p>Source code is a non-linear representation, which contains a variety of complex structural information. When the DFG and CFG structural representations are used to create a model&#x00027;s understanding of code, it makes a significant improvement in the model&#x00027;s ability to understand the semantics of the code. Although GraphCodeBERT (<xref ref-type="bibr" rid="B7">Guo et al., 2020</xref>) incorporates Data Flow Graphs (DFG), it uses them implicitly during pre-training for variable alignment. In contrast, GFTrans employs an explicit &#x0201C;anchor-based&#x0201D; strategy. We inject discrete &#x02329;<italic>DEF</italic>&#x0232A; and &#x02329;<italic>USE</italic>&#x0232A; tags directly into the token sequence. This preserves precise define-use chains and forces the attention mechanism to actively track data dependencies during runtime prediction, ensuring a stronger grasp of code logic. One of our key challenges is the effective conversion of code graph structures into a linear representation that can be processed by Transformer-type architectures, without losing the underlying logical structure of the code or its data dependency. To address this, we present a linearization of the graph structure using dedicated anchor markers. this strategy is implemented in four steps.</p>
<p>Step 1: By using the static code analysis tool Joern, we are able to create a Code Property Graph (CPG) for the C function target being analyzed, from which we can identify three types of code collections. (1) Statement Node Set V: it contains all statement-level nodes and the associated source code that is represented. (2) Control Flow Edge Set E<sub>CFG</sub> : a directed edge set that shows the relationships of execution jumps between statements within a CFG format. (3) Data Dependency Edge Set E<sub>DFG</sub>: a directed edge set that defines the &#x0201C;Def-Use&#x0201D; relationship chains, allowing us to ascertain all statement nodes defining data and usage statement nodes within a specific application process.</p>
<p>Step 2: To ensure ease of recognition when navigating through multiple code basic blocks of a series, a global indexing system be implemented to represent data segments within the code throughout the <italic>E</italic><sub>DFG</sub> set. We iterate through all data definition locations, and when we identify a data definition node in <italic>V</italic> (<italic>v</italic><sub>def</sub>&#x02208;<italic>V</italic>), we assign it a unique numeric identifier ID(<italic>v</italic><sub>def</sub>) &#x0003D; <italic>N</italic> (where <italic>N</italic>&#x02208;{1, 2, &#x02026;, <italic>M</italic>} and <italic>M</italic> denotes total data definitions found in the function); this unique ID is used as a reference when accessing the data in the code tokens.</p>
<p>Step 3: <xref ref-type="fig" rid="F2">Figure 2</xref> illustrates that the program launches in sequential order as defined by the execution of each of the statements. The networkX library provides an API to generate an control flow graph structure using python. Using a <italic>Depth-First Search</italic> (DFS) Technique, we can sample a CFG via traversal from the starting point to the end point, thus creating a Path where every statement Node encountered during the traversal path for a defined statement node is recorded. As a CFG may contain cycles, we track the number of times a statement node has been visited, limiting it to a maximum of two visits. This technique, therefore, allows traversing the cycle and reaching an endpoint without creating a continuous Loop. As we construct paths from the CFG , any code fragment results in a Collection of Execution Paths. Therefore, the Set of Paths Created from Within the CFG Graphs are Defined as <inline-formula><mml:math id="M1"><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula> where <italic>K</italic> = number of paths established. Each path <italic>P</italic> consists of a List of Statement Nodes; And <italic>P</italic> &#x0003D; [<italic>s</italic><sub>1</sub>, <italic>s</italic><sub>2</sub>, &#x02026;, <italic>s</italic><sub><italic>L</italic></sub>] and <italic>s</italic><sub><italic>i</italic></sub> = the <italic>i</italic>-th Statement Node in the Path. The arrangement of code statement sequences according to the control flow path reflects how the program is executed.</p>
<fig position="float" id="F2">
<label>Figure 2</label>
<caption><p>The process of extracting execution paths from the control flow.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-09-1779935-g0002.tif">
<alt-text content-type="machine-generated">Side-by-side diagram illustrating source code, corresponding control flow graph construction, and two sampled execution paths using depth-first search; the paths are labeled as Path 1 for the true branch and Path 2 for the false branch.</alt-text>
</graphic>
</fig>
<p>Step 4: <xref ref-type="fig" rid="F3">Figure 3</xref> illustrates the process of inserting data anchors into the code execution path. This embedding technique represents a key innovation of our work. Specifically, we develop a method to extract discrete DFG data and seamlessly incorporate it into continuous code execution sequences. For each statement node <italic>s</italic><sub><italic>i</italic></sub> in the sampled path <italic>P</italic>, information from the <italic>E</italic><sub>DFG</sub> is retrieved and structurally augmented to the execution path sequence of <italic>s</italic><sub><italic>i</italic></sub> by the addition of specially structured markers, as follows. For example, if the statement node <italic>s</italic><sub><italic>i</italic></sub> represents a data definition point and there exists a parent data anchor ID assigned to <italic>s</italic><sub><italic>i</italic></sub> in step 2 identifier <italic>N</italic><sub>ID</sub>, we embed a unique definition marker tag <monospace> &#x0003C;DEF_{</monospace><italic>N</italic><sub>ID</sub><monospace>}&#x0003E;</monospace> into the textual content of statement Node <italic>s</italic><sub><italic>i</italic></sub>. If <italic>s</italic><sub><italic>i</italic></sub> uses one or more data types specified from data definition point anchors in the <italic>E</italic><sub>DFG</sub>, we locate all data definition point anchor IDs associated with <italic>s</italic><sub><italic>i</italic></sub> and will append the appropriate Number of use tags <monospace> &#x0003C;USE_{</monospace><italic>N</italic><sub>1</sub><monospace>}&#x0003E;</monospace>, <monospace> &#x0003C;USE_{</monospace><italic>N</italic><sub>2</sub><monospace>}&#x0003E;</monospace> into the textual representation of the statement node <italic>s</italic><sub><italic>i</italic></sub>.</p>
<fig position="float" id="F3">
<label>Figure 3</label>
<caption><p>Schematic of the explicit anchor marker insertion strategy.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-09-1779935-g0003.tif">
<alt-text content-type="machine-generated">Diagram illustrating three phases of code processing for model input: code and data analysis extracts variable usage, path selection traces execution flow through nodes, and anchor insertion marks variable definitions and usage. Final linearized sequence combines code and anchor tags for model input.</alt-text>
</graphic>
</fig>
<p>After enhancement, the path set retains the semantics of its original execution flow. However, explicitly defined data flow markers within the path can be combined with the separator label [SEP] for utilization in the final execution path sequence. In order for the model to recognize the data structure embedded in the execution path, All unique anchor Tags (e.g., <monospace> &#x0003C;DEF_1&#x0003E;</monospace>, <monospace> &#x0003C;USE_5&#x0003E;</monospace>) must also be available to the word vocabulary of the pre-trained CodeBERT Model. After tokenization, truncation or padding the sequence to a pre-determined Length, the sequence must then be transformed into a fixed-length token ID sequence. The result is a guidance input to the model, which facilitates the full integration of control flows and data flows.</p></sec>
<sec>
<label>3.1.2</label>
<title>Features engineering</title>
<p>The Transformer-based deep learning approach has demonstrated the ability to comprehend the semantics of code (<xref ref-type="bibr" rid="B18">Vaswani et al., 2017</xref>). Utilizing various code features allows an analyst to evaluate the complexity of a code base and allows the deep learning model to attain the most relevant inductive biases, based upon a larger scope of coding features. This research uses the feature set developed by <xref ref-type="bibr" rid="B17">Sikka et al. (2020)</xref>, who produced a set of features based upon statistical analysis techniques to assist in the development of models to predict the runtime complexity of code. We implemented a total of 24 features using SourceMonitor tools based on source code generation, along with custom-written scripts to develop a 24-dimensional feature vector. These features are categorized across four distinct structural complexity classes.</p>
<p><xref ref-type="bibr" rid="B17">Sikka et al. (2020)</xref>. demonstrated that the depth of nested loops and the number of branches in the code structure are important indicators of the computational efficiency of a code base. Therefore, we has produced a total of 24 features based upon source code using a tool named SourceMonitor, along with custom written scripts to develop a 24-dimensional feature vector; these features are based upon four different categories of structural complexity.</p>
<p>The structural complexity features category includes items such as nested loop depth, number of Loops, number of Ifs, number of Switches, and maximum Block depth. The count of methods category includes IO keyword counts, counts of total statements, counts of total variables, and counts of total code lines, as well as the high overhead costs associated with executing IO calls. The algorithms patterns category includes the number of sorts associated with sorting, whether recursion is present, whether a priority queue is present, whether a hash map is present, and a variety of other elements.</p>
<p>Lastly, the control flow fragmentation characteristics category includes the total number of jumps, breaks, continues, and returns to illustrate the extent to which the execution of a program has deviated from its intended path. The original 24-dimensional features developed in this research have the potential to contain noise or redundancy. Using Spearman rank correlation, we investigat the correlation between code features and run times of the code in the training set. From the training set labels, we calculate the Spearman Correlation Coefficient &#x003C1; to determine monotonic relationships between feature values and runtime of the code.</p>
<p>To empirically validate the necessity of these handcrafted features, we calculated the Spearman rank correlation coefficients for all 24 features against the runtime labels and plotted the top 8 most influential features in a bar chart (<xref ref-type="fig" rid="F4">Figure 4</xref>). As expected, structural features rank the highest, with &#x0201C;Nested Loop Depth&#x0201D; (0.72) and &#x0201C;Number of Statements&#x0201D; (0.68) demonstrating the strongest positive correlation. This confirms that algorithmic complexity, driven by loops and code size, is the primary determinant of execution time. &#x0201C;Number of I/O Operations&#x0201D; also shows a strong correlation (0.55) due to the inherent latency of I/O calls. Interestingly, while &#x0201C;Percent Comments&#x0201D; has a lower correlation of 0.36 (below the 0.4 threshold), we retained it intentionally as a semantic proxy to assist the model distinguish between complex human-written logic and simple auto-generated code.</p>
<fig position="float" id="F4">
<label>Figure 4</label>
<caption><p>Top eight handcrafted features ranked by Spearman correlation.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-09-1779935-g0004.tif">
<alt-text content-type="machine-generated">Horizontal bar chart showing eight code metrics and their Spearman correlation coefficients. Nested loop depth has the highest correlation at zero point seven two, followed by number of statements at zero point six eight and max method complexity at zero point six one. Percent comments has the lowest correlation at zero point three six.</alt-text>
</graphic>
</fig>
<p>We specifically included &#x0201C;Percent Lines with Comments&#x0201D; as a key feature. Comments are stripped away by the compiler. They do not physically slow down the execution.We included this feature as a semantic indicator. The decison is grounded in the coding conventions of open-source communities. Firstly, developers tend to write detailed comments for complex algorithms. These algorithms usually fall into the &#x0201C;Long&#x0201D; execution category. Therefore, this feature acts as a &#x0201C;helper.&#x0201D; It helps the model estimate the complexity of the logic. we delete the Line feature and retained only the statement feature, which is better representative of how many statements are in each piece of code. Thus, the feature set has been reduced from 24 dimensions to 16 features of superior quality, as shown in <xref ref-type="table" rid="T1">Table 1</xref>.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Description of selected features for static analysis.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Feature name</bold></th>
<th valign="top" align="center"><bold>Description</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left"><monospace>nested_loop_depth</monospace></td>
<td valign="top" align="center">Depth of nested loops</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>Maximum Method Complexity</monospace></td>
<td valign="top" align="center">Cyclomatic complexity</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>Average Block Depth</monospace></td>
<td valign="top" align="center">Average nesting depth of code blocks</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>no_of_ifs</monospace></td>
<td valign="top" align="center">Count of <monospace>if</monospace> statements</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>no_of_switches</monospace></td>
<td valign="top" align="center">Frequency of multi-way branch structures</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>noOfJumps</monospace></td>
<td valign="top" align="center">Count of jump statements (break, continue, return)</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>no_of_io</monospace></td>
<td valign="top" align="center">Number of system I/O operations</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>Statements</monospace></td>
<td valign="top" align="center">Total number of effective instructions</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>noOfMethods</monospace></td>
<td valign="top" align="center">Frequency of function/method calls</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>noOfVariables</monospace></td>
<td valign="top" align="center">Total number of variables declared</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>recursion_present</monospace></td>
<td valign="top" align="center">Presence of recursion flag</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>no_of_sort</monospace></td>
<td valign="top" align="center">Count of sorting algorithm invocations</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>priority_queue_present</monospace></td>
<td valign="top" align="center">Presence of priority queue (Heap) operations</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>hash_map_present</monospace></td>
<td valign="top" align="center">Usage of hash-based data structures</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>hash_set_present</monospace></td>
<td valign="top" align="center">Usage of set-like logic or uniqueness checks</td>
</tr>
<tr>
<td valign="top" align="left"><monospace>Percent lines with comments</monospace></td>
<td valign="top" align="center">Ratio of comments to code (Code quality metric)</td>
</tr></tbody>
</table>
</table-wrap></sec></sec>
<sec>
<label>3.2</label>
<title>Model training</title>
<p>The most significant challenge associated with training the model is the need to combine two types of multimodal data sets: a graphical structure with unstructured pathways and a set of handcrafted explicit features that are determined statistically (namely the 16 handcrafted explicit features). The two types of multimodal data sets needed to be integrated into one framework. To do that, we create a Modular Network that contains a variety of mechanisms to correlate the two types of multimodal data sets, including path segmentation, CodeBERT semantic encoding, nonlinear feature mapping, and dynamic gated fusion.</p>
<sec>
<label>3.2.1</label>
<title>Path segmentation and CodeBERT semantic encoding</title>
<p>By employing the CFG, we generated a collection of Paths <inline-formula><mml:math id="M2"><mml:mrow><mml:mi mathvariant="script">P</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mo>&#x02026;</mml:mo><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:math></inline-formula>, where each path contains a traverse of all the properties of code and the <italic>Anchor Points</italic> representing their flow of data, using the edges associated with every path. The procedures employed to produce each dimensionality reduction vector are consistent with each of the paths that are sampled (i.e., <italic>K</italic> &#x0003D; 10). Initially, the CodeBERT pre-trained tokenizing method is utilized to convert each <italic>P</italic><sub><italic>i</italic></sub> (the <italic>i</italic>-th sampled path) into an array of sub-word token IDs. Simultaneously, we assign a unique anchor ID (i.e., <monospace> &#x0003C;DEF_N&#x0003E;</monospace>, <monospace> &#x0003C;USE_N&#x0003E;</monospace>) to each of the IDs associated with each path. The final representation is denoted as <italic>T</italic><sub><italic>i</italic></sub>.</p>
<p>Subsequently, <italic>T</italic><sub><italic>i</italic></sub> is processed by the backbone network, where tokens in <italic>T</italic><sub><italic>i</italic></sub> capture long-distance relations through self-attention within the Embedding layer and the 12-layer Transformer encoder. The resultant output vector aligns with the input representation at position <monospace>[CLS]</monospace>. Thus, the semantic representation of an individual path <italic>h</italic><sub><italic>i</italic></sub> is created as follows:</p>
<disp-formula id="EQ1"><mml:math id="M3"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>Encoder</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x02003;</mml:mtext><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msup></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(1)</label></disp-formula>
<p>where <italic>d</italic><sub><italic>model</italic></sub> is equal to 768. After the process of dimensionality reduction is completed for each code sample, the <italic>h</italic><sub><italic>i</italic></sub> vectors form a collection of <italic>d</italic><sub><italic>model</italic></sub> dimensional vectors <italic>H</italic> &#x0003D; {<italic>h</italic><sub>1</sub>, <italic>h</italic><sub>2</sub>, &#x02026;, <italic>h</italic><sub><italic>K</italic></sub>}, where each vector represents a different execution flow and its associated data dependencies.</p></sec>
<sec>
<label>3.2.2</label>
<title>Path aggregation via attention mechanism</title>
<p>Execution paths in programs affect performance differently (for example, execution paths within the main loop of an algorithm significantly impact performance compared to paths in exception handling or initialization). Therefore, basic average pooling fails to capture these crucial structural differences. Inspired by <xref ref-type="bibr" rid="B9">Hu et al. (2025)</xref> and their use of soft-attention in graph-level readout operations for GNNs, we introduce a path-level attention mechanism to dynamically aggregate multiple path features. This mechanism allows the model to automatically learn the importance weight of each path.</p>
<p>We introduce a trainable context query vector <inline-formula><mml:math id="M4"><mml:mi>u</mml:mi><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msup></mml:math></inline-formula>. For each representation vector <italic>h</italic><sub><italic>i</italic></sub> within the path set <italic>H</italic>, we first calculate its importance score <italic>s</italic><sub><italic>i</italic></sub> through a nonlinear transformation, and then apply a softmax function to normalize the score, obtaining the attention weight &#x003B1;<sub><italic>i</italic></sub> for the <italic>i</italic>-th path:</p>
<disp-formula id="EQ2"><mml:math id="M5"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo class="qopname">tanh</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(2)</label></disp-formula>
<disp-formula id="EQ3"><mml:math id="M6"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo class="qopname">exp</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x022A4;</mml:mo></mml:mrow></mml:msubsup><mml:mi>u</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mstyle displaystyle="true"><mml:msubsup><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:msubsup></mml:mstyle><mml:mo class="qopname">exp</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msubsup><mml:mrow><mml:mi>s</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x022A4;</mml:mo></mml:mrow></mml:msubsup><mml:mi>u</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(3)</label></disp-formula>
<p>where <italic>W</italic><sub><italic>att</italic></sub> and <italic>b</italic><sub><italic>att</italic></sub> are trainable parameters, and <italic>K</italic> is the total number of paths. The weight &#x003B1;<sub><italic>i</italic></sub> quantifies the contribution of the <italic>i</italic>-th path to determining the current code&#x00027;s runtime category. The final code semantic vector <italic>v</italic><sub><italic>sem</italic></sub> is produced via the weighted sum of all path vectors:</p>
<disp-formula id="EQ4"><mml:math id="M7"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>K</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>&#x003B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>h</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(4)</label></disp-formula>
<p>This process compresses the <italic>K</italic>&#x000D7;768 path matrix into a single 1 &#x000D7; 768 global vector, effectively highlighting the semantic features of key execution paths while suppressing noise from irrelevant paths.</p></sec>
<sec>
<label>3.2.3</label>
<title>Nonlinear projection of handcrafted features</title>
<p>In the feature engineering phase, we screen out a 16-dimensional high-quality handcrafted feature vector <inline-formula><mml:math id="M8"><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>w</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>16</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> (including nested loop depth, IO keyword frequency, etc.). Since the dimension of handcrafted features is far lower than that of semantic vectors and the numerical distribution varies significantly, direct concatenation would cause the explicit features to be overwhelmed by high-dimensional semantic features. Therefore, we design a two-layer Multi-Layer Perceptron (MLP) as a feature projection module. First, we perform Z-Score normalization on <italic>f</italic><sub><italic>raw</italic></sub> to obtain <italic>f</italic><sub><italic>norm</italic></sub>, and then project it to the same dimension space as the semantic vector:</p>
<disp-formula id="EQ5"><mml:math id="M9"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mtext>GELU</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mtext>GELU</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(5)</label></disp-formula>
<p>where <inline-formula><mml:math id="M10"><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>16</mml:mn><mml:mo>&#x000D7;</mml:mo><mml:mn>64</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> transforms the features to an intermediate layer, and <inline-formula><mml:math id="M11"><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>64</mml:mn><mml:mo>&#x000D7;</mml:mo><mml:mn>768</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> further maps them to the target semantic space. The projected <inline-formula><mml:math id="M12"><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>768</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> can then interact with <italic>v</italic><sub><italic>sem</italic></sub> in the same manifold space.</p>
</sec>
<sec>
<label>3.2.4</label>
<title>Dynamic gated fusion and classification</title>
<p>To adaptively adjust the contribution ratio of deep semantic features and code features in the final prediction, we adopted a dynamic gating unit. The gating vector <italic>z</italic>&#x02208;(0, 1)<sup>768</sup> is generated by jointly analyzing both feature streams:</p>
<disp-formula id="EQ6"><mml:math id="M13"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:mi>&#x003C3;</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>;</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(6)</label></disp-formula>
<p>The final fused vector <italic>v</italic><sub><italic>final</italic></sub> is computed via element-wise multiplication (&#x02299;). To ensure dimensional consistency, the handcrafted feature vector is projected to match the semantic vector&#x00027;s dimension, so <inline-formula><mml:math id="M14"><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x02208;</mml:mo><mml:msup><mml:mrow><mml:mi>&#x0211D;</mml:mi></mml:mrow><mml:mrow><mml:mn>768</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. The gating vector <italic>z</italic> is also generated in &#x0211D;<sup>768</sup>. The fusion equation is defined as:</p>
<disp-formula id="EQ7"><mml:math id="M15"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi>z</mml:mi><mml:mo>&#x02299;</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:mi>z</mml:mi></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>&#x02299;</mml:mo><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(7)</label></disp-formula>
<p>where &#x02299; represents element-wise multiplication. This mechanism allows the model to preserve deep semantics in certain dimensions (such as capturing data flow dependencies) while directly utilizing explicit features in others (such as exponential complexity signals brought by loop depth). Finally, the fused feature vector <italic>v</italic><sub><italic>final</italic></sub> is input to the classification head, which consists of a fully connected layer and a Softmax activation function, outputting the probability distribution of four runtime categories:</p>
<disp-formula id="EQ8"><mml:math id="M16"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mo>=</mml:mo><mml:mtext>Softmax</mml:mtext><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mrow><mml:mi>v</mml:mi></mml:mrow><mml:mrow><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>n</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:msub><mml:mrow><mml:mi>b</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(8)</label></disp-formula>
<p>The model training uses the standard Cross-Entropy Loss function:</p>
<disp-formula id="EQ9"><mml:math id="M17"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mrow><mml:mi mathvariant="script">L</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover></mml:mstyle><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>c</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mn>3</mml:mn></mml:mrow></mml:munderover></mml:mstyle><mml:msub><mml:mrow><mml:mi>y</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000B7;</mml:mo><mml:mo class="qopname">log</mml:mo><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(9)</label></disp-formula>
<p>where <italic>N</italic> is the batch size, <italic>y</italic><sub><italic>i, c</italic></sub> is the true label of sample <italic>i</italic> belonging to runtime category <italic>c</italic>, and <italic>P</italic><sub><italic>i, c</italic></sub> is the predicted probability.</p>
<p>To provide a holistic view of the proposed framework, we formalize the complete training procedure in <xref ref-type="other" rid="algorithm_1">Algorithm 1</xref>.</p>
<statement content-type="algorithm" id="algorithm_1">
<label>Algorithm 1</label>
<p>GFTrans training pipeline.
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-09-1779935-g0007.tif"/>
</p>
</statement>
</sec></sec>
<sec>
<label>3.3</label>
<title>Application</title>
<p>Utilizing the proposed GFTrans model, we implemented an automation system that analyse code for performance bottlenecks. This system provides developers with feedback using &#x0201C;time profiling&#x0201D; tools through static analysis methods that help identify code quality enhancements by quickly identifying any potential high-cost problems during coding. The primary function of this system is to create a trained GFTrans model, which acts as an inference engine and is used in contrast to dynamic performance analysis utilities, which require compilation, instrumentation or running of test cases. Instead, this system performs inference only on the source code attributes. The processing stages of this system can be described in three phases:</p>
<p>&#x02022; Automated parsing/extraction: after saving the C source file(s), the system generates a parser, which automatically divides the code into individual function segments. Each function will also simultaneously allow the retrieval of its graph structure (CFG/DFG) path sequence with a total of 16 custom statistical features manually created.</p>
<p>&#x02022; Complete model inference: the multimodel features retrieved from the parser automatically feed into the trained GFTrans model. The model comprehensively evaluates control logic and data dependencies to generate a probability distribution. This distribution represents the likelihood of the function falling into one of four runtime categories: 0 (Extremely Short), 1 (Short), 2 (Medium), and 3 (Long). The last category is identified as the predicted class.</p>
<p>&#x02022; Feedback generation/display: after identifying &#x0201C;Long-running&#x0201D; code blocks or functions, the system creates a red highlighting background in the code. This visual representation provides the developer a way to quickly identify the potential performance bottleneck when reviewing or writing their code, which eliminate the potential for inefficient code to be deployed into production environments. Clicking the highlighted function will display statistical details of that block of code in the sidebar.</p>
<p>To integrate seamlessly into the standard developer workflow, the system is designed as an IDE extension. This extension, which leverages the GFTrans model&#x00027;s graph handling capabilities, provides real-time feedback while the developer is coding, thereby allowing immediate feedback without having to set up complicated compilation environments or generate test case inputs. By moving performance evaluation from &#x0201C;post-mortem&#x0201D; to &#x0201C;prevention&#x0201D; the system significantly reduces the difficulty of performance evaluations and provides both a tool for engineers to optimize their performance, as well as a learning tool for new developers to assist them in correcting inefficient coding patterns on the spot.</p></sec></sec>
<sec id="s4">
<label>4</label>
<title>Evaluation</title>
<sec>
<label>4.1</label>
<title>Data preparation</title>
<p>This section details the process of developing a runtime benchmark dataset for C programming. Unlike research projects focusing on complete source code, our dataset concentrates on function-level code snippets. This fine-grained dataset enhances the precision of runtime prediction. It also fulfills the industry&#x00027;s need for diagnosing performance issues based on individual functions.</p>
<p><bold>1) Data collection:</bold> most public datasets come from various algorithm competition platforms. However, the programming styles in competitive programming from those found in practical software development processes. To ensure our datasets have broad diversity and represent real-world coding styles, we select our dataset from the GitHub. We sample from 300 updated, publicly available C open source projects, each with over 300 stars, containing a wide range of coded works. These include low-level system utilities, high performance network libraries, and data processing algorithms. Code from GitHub repositories better represents user practices and coding styles found in industry than current open source datasets. Collectively, we amasse a total of 181 GitHub-sourced open source projects containing C code.</p>
<p><bold>2) Data filtering and preprocessing:</bold> we filter the data through scripts to ensure that the retained code snippets cover all complex code structures, including branches, loops, memory allocation, IO operations, etc. This diversity of code structures helps the model generalize better. On the other hand, we remove overly simple and overly lengthy code snippets, such as single-line code snippets and code snippets exceeding 200 lines. To comply with the Transformer&#x00027;s 512-token limit, we applied a strict length filter during preprocessing. We pre-calculated the total length of each function, accounting for the additional anchor tags (&#x02329;<italic>DEF</italic>&#x0232A;, &#x02329;<italic>USE</italic>&#x0232A;). Any sample exceeding this limit was excluded. This proactive filtering ensures that all samples in our dataset are processed in full without truncation, preserving the complete integrity of the control flow and data dependencies. Finally, to exclude code with syntax errors, we only retain code that compiles without errors among these code snippets.</p>
<p><bold>3) Code runtime collection:</bold> we run the code in a unified environment to collect the running duration of the code. All operations are performed on a computer equipped with an Intel Core i7-12700H CPU &#x00040; 2.30GHz and 32GB of memory, with the operating system being Ubuntu 22.04 LTS. During the test, CPU scheduling is disabled, and only a single CPU core is used when running the code, which reduces the impact of operating system scheduling. All C code samples are compiled using GCC 11.4 with the -O2 optimization flag, which avoids the impact of compiler optimization techniques on execution time. For timing, we use Linux nanosecond timing to measure each function from start to end. Each sample run 20 times independently. We discard the fastest five and slowest five runs to remove hardware interrupt effects. To ensure the high quality of our dataset, we conducted a rigorous stability analysis. The operating system (Ubuntu) can introduce noise. This includes context switching and background processes. To mitigate this, we first applied a trimmed mean filter. We discarded the fastest 5 runs and the slowest 5 runs for each function.</p>
<p>Since the operating system (Ubuntu 22.04) is not a real-time system, background tasks. may introduce non-deterministic delays. Therefore, we employed the &#x0201C;trimmed mean&#x0201D; method by discarding the five fastest runs and the five slowest runs, which effectively removes outliers caused by system noise. To further address your concern, we analyzed the variance of the remaining 10 runs for each function using the Coefficient of Variation (CV). The results show that the CV is lower than 0.05 (5%) for 96.5% of the code samples. This low variance confirms that the execution time is highly stable. The assigned runtime category is robust.</p>
<p><bold>4) Statistics of the dataset and partitioning of the dataset:</bold> after applying the sampling method to create our final dataset, we collect 2,887 highly-shielded C program examples. Based on execution time calculations, we divide the samples into four categories. We control sample category imbalance to less than 10% using undersampling and oversampling.</p>
<p>As indicated in <xref ref-type="table" rid="T2">Table 2</xref>, it is a summary we divide the execution times of the samples into four categories. The groupings of the samples on these four levels allow the prediction of runtime execution.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Runtime execution categories and counts.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Category</bold></th>
<th valign="top" align="center"><bold>Runtime range</bold></th>
<th valign="top" align="center"><bold>Count</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Ultra-short execution</td>
<td valign="top" align="center"> &#x02264; 20 ms</td>
<td valign="top" align="center">682</td>
</tr>
<tr>
<td valign="top" align="left">Short execution</td>
<td valign="top" align="center">&#x0003E;20ms and &#x02264; 100 ms</td>
<td valign="top" align="center">802</td>
</tr>
<tr>
<td valign="top" align="left">Medium execution</td>
<td valign="top" align="center">&#x0003E;100 ms and &#x02264; 500 ms</td>
<td valign="top" align="center">701</td>
</tr>
<tr>
<td valign="top" align="left">Long execution</td>
<td valign="top" align="center">&#x02265;500 ms</td>
<td valign="top" align="center">702</td>
</tr></tbody>
</table>
</table-wrap></sec>
<sec>
<label>4.2</label>
<title>Baselines model comparison</title>
<p>To assess the capabilities and advantages of the GFTrans models in identifying runtime evaluation and best fit/cost performance for code execution classification, we compare the GFTrans results against those of three major baseline model categories. These categories represent the major methodologies and include:</p>
<list list-type="bullet">
<list-item><p><bold>Random forest:</bold> a Random Forest model is widely used in software engineering studies for identifying development errors and estimating security-related bugs. It is highly accurate, reliable, and efficient. This study uses CodeBERT to show how Trained Models can utilize a Type BERT to generate 768-Dimensional Dense Vectors. These vectors represent the <italic>Global Semantic Embedding</italic> for Source Code. For input to Random Forest, we extracted them from the [CLS] Token output of the last CodeBERT layer. The Random Forest builds Decision Trees independently for each training instance. It estimates, in parallel, the Runtime Complexity Living Category for the input Code Samples. The output is determined by combining results from all Decision Tree predictions.</p></list-item>
<list-item><p><bold>Long short-term memory (LSTM):</bold> LSTMs are a widely used form of Recurrent neural network in software Engineering. They show how deep learning techniques can handle sequence data. For our assessment, we use the same vocabulary and embedding size of 768 as GFTrans. LSTMs can pass generated outputs back to the input layer using states, allowing efficient modeling of code&#x00027;s sequential token relationships. For classification, we use the final time step&#x00027;s state as the temporal representation when feeding into the last layer of a fully-connected model.</p></list-item>
<list-item><p><bold>Code2Vec:</bold> This is a model based on the structural features of code, using structure as the main representation. In this research, we follow a previously described approach to develop fixed-length feature vectors that capture code features from source code. We parse C source code into abstract syntax trees (ASTs) and generated &#x0201C;path contexts&#x0201D; to track structural characteristics. The model then combines these path characteristics into a complete semantic vector. This vector is fed into a classification layer, which predicts one of four categories based on the execution time of the C function. We use this as a baseline for comparing our graph-based method (CFG/DFG) to AST-based approaches.</p></list-item>
</list></sec>
<sec>
<label>4.3</label>
<title>Evaluation metrics</title>
<p>To quantitatively evaluate the accuracy of the predictive model, we create a confusion matrix from the model&#x00027;s predictions based on the execution times of C functions. The model&#x00027;s performance is evaluated using the following common classification evaluation metrics:</p>
<p><bold>Accuracy:</bold> the ratio of all instances predicted by the model to the total number of instances predicted; this gives a simple measure of how well the classification model can be expected to perform across the population of instances.</p>
<disp-formula id="EQ10"><mml:math id="M18"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(10)</label></disp-formula>
<p>where <italic>TP</italic>, <italic>TN</italic>, <italic>FP</italic>, and <italic>FN</italic> are defined as true positive, true negative, false positive and false negative respectively.</p>
<p><bold>Precision:</bold> the number of samples identified as having a positive condition divided by the total number of samples assigned a positive condition; this measures the accuracy of the model&#x00027;s predictions. The precision for each class <italic>i</italic> is expressed as:</p>
<disp-formula id="EQ11"><mml:math id="M19"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(11)</label></disp-formula>
<p><bold>Recall:</bold> recall refers to how many positive instances were correctly labeled positive by the model, and is the measure of how well the model can detect examples. The recall for each class <italic>i</italic> can be expressed as:</p>
<disp-formula id="EQ12"><mml:math id="M20"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>T</mml:mi><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>F</mml:mi><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(12)</label></disp-formula>
<p><bold>Macro-F1 score:</bold> is less reliable than a standard accuracy metric, because the frequencies of different runtime categories vary greatly from category to category (i.e., very short samples could be more common than very long ones). Macro-F1 will compute the F1 score for each individual category, then average all of the F1 scores to provide a balanced view of the model&#x00027;s performance for all classes:</p>
<disp-formula id="EQ13"><mml:math id="M21"><mml:mtable class="eqnarray" columnalign="center"><mml:mtr><mml:mtd><mml:mi>M</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>r</mml:mi><mml:mi>o</mml:mi><mml:mo>-</mml:mo><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi></mml:mrow></mml:mfrac><mml:mstyle displaystyle="true"><mml:munderover accentunder="false" accent="false"><mml:mrow><mml:mo>&#x02211;</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>C</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munderover></mml:mstyle><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:mo>&#x000D7;</mml:mo><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x000D7;</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:msub><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x0002B;</mml:mo><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:msub><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mtd></mml:mtr></mml:mtable></mml:math><label>(13)</label></disp-formula>
<p>where <italic>C</italic> &#x0003D; 4, Represents the total number of Classes. The Macro-F1 Score evaluates the precision and recall values together.</p></sec>
<sec>
<label>4.4</label>
<title>Experimental setup</title>
<p>We implement GFTrans and all baseline models using the PyTorch framework. To ensure the reproducibility of our results and fair comparisons, all experiments are conducted under the same hardware and software configurations. The detailed settings are as follows:</p>
<list list-type="bullet">
<list-item><p><bold>Hardware and software environment:</bold> all models are trained and evaluated on a workstation running Ubuntu 22.04 LTS. The system is equipped with an Intel Core i7-12700H CPU &#x00040; 2.30 GHz and a single NVIDIA RTX 3090 (NVIDIA, Santa Clara, CA, USA) GPU (24 GB).</p></list-item>
<list-item><p><bold>Model hyperparameters:</bold> we align the hidden dimension size with the pre-trained CodeBERT model, setting it to 768. To prevent overfitting, we apply a dropout rate of 0.1 across the fully connected layers.</p></list-item>
<list-item><p><bold>Training configuration:</bold> we utilize the AdamW optimizer to update model parameters. The initial learning rate is set to 2 &#x000D7; 10<sup>&#x02212;5</sup>, coupled with a linear learning rate scheduler to accelerate convergence. The batch size is set to 16. To accommodate GPU memory constraints while maintaining training stability, we implement gradient accumulation with a step size of 2.</p></list-item>
<list-item><p><bold>Training strategy:</bold> we employ an early stopping mechanism to save training time and prevent overfitting. Specifically, the training process terminates if the validation accuracy does not improve for 5 consecutive epochs, with the maximum number of epochs set to 25.</p></list-item>
<list-item><p><bold>Validation strategy:</bold> to ensure statistical reliability and minimize random variance, we employed a five-Fold Cross-Validation strategy. The entire dataset was randomly shuffled and divided into five equal folds. The training process was repeated five times. In each iteration, four folds were used for training and one fold for testing. The final reported metrics represent the average performance across these five independent runs.</p></list-item>
</list></sec>
<sec>
<label>4.5</label>
<title>Experimental results</title>
<p>In this section, we study the results to understand the relative effectiveness of the GFTrans.</p>
<sec>
<label>4.5.1</label>
<title>Evaluation of the GFTrans model&#x00027;s effectiveness</title>
<p>As shown in <xref ref-type="table" rid="T3">Table 3</xref>, the GFTrans model outperformed the leading baseline model (Code2Vec) by 2.5% points in accuracy and 2.3 percentage points in the F1 score. This indicates that the &#x0201C;CFG/DFG graph structure fusion&#x0201D; (herein referred to as &#x0201C;our approach&#x0201D;) produces additional improvement in prediction accuracy compared to other approaches, showing the value of following execution paths through the use of CFG and DFG data structures.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Overall performance comparison between GFTrans and baseline models on test set.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model</bold></th>
<th valign="top" align="center"><bold>Accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Macro-precision (%)</bold></th>
<th valign="top" align="center"><bold>Macro-recall (%)</bold></th>
<th valign="top" align="center"><bold>Macro-F1 (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">LSTM</td>
<td valign="top" align="center">73.58</td>
<td valign="top" align="center">72.15</td>
<td valign="top" align="center">70.88</td>
<td valign="top" align="center">71.45</td>
</tr>
<tr>
<td valign="top" align="left">Random Forest</td>
<td valign="top" align="center">74.82</td>
<td valign="top" align="center">73.80</td>
<td valign="top" align="center">72.18</td>
<td valign="top" align="center">72.93</td>
</tr>
<tr>
<td valign="top" align="left">Code2Vec</td>
<td valign="top" align="center">76.15</td>
<td valign="top" align="center">75.12</td>
<td valign="top" align="center">74.05</td>
<td valign="top" align="center">74.52</td>
</tr>
<tr>
<td valign="top" align="left"><bold>GFTrans (Ours)</bold></td>
<td valign="top" align="center"><bold>78.64</bold></td>
<td valign="top" align="center"><bold>77.20</bold></td>
<td valign="top" align="center"><bold>76.58</bold></td>
<td valign="top" align="center"><bold>76.85</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Bold values indicate the best performance among all tested models for each metric.</p>
</table-wrap-foot>
</table-wrap>
<p>The structural aspect of representation appears to have an advantage over unstructured forms of representation. The results indicate that models using code structure features (herein Code2Vec and GFTrans) produced more accurate prediction results than those based on flat features alone (Random Forest and LSTM). Code2Vec was able to outperform Random Forest by 1.3% points (Code2Vec = 76.1%, Random Forest = 74.8%), suggesting that hierarchical representations of code enable a better understanding of the logic contained within source code. The GFTrans model outperforms the Code2Vec model, it shows that compared to the static syntax tree features used by Code2Vec, dynamic control flow graph (CFG) and data flow graph (DFG) information are more efficient and accurate in predicting &#x0201C;runtime.&#x0201D;</p>
<p>In the case of sequential models (LSTM), the accuracy falls short compared to that of GFTrans (73.6%). The limitations of sequentially representing code are evident in the low LSTM accuracy. The nature of function calls and looping jumps creates long ranges of dependency that cannot be well learned by a simplistic sequential learning algorithm.</p>
<p>A comprehensive assessment of the efficacy of each model type across the runtime category is provided in <xref ref-type="table" rid="T4">Table 4</xref>. The experimental results indicate that all models exhibit relatively low prediction accuracy when predicting the two categories of &#x0201C;short&#x0201D; and &#x0201C;medium.&#x0201D; The accuracy of the baseline model hovers around 70%. The runtime differences between these two categories of code are not significant, with the main distinctions lying in the number of memory accesses, IO operations, and other time-consuming code statements. GFTrans achieves a runtime accuracy of 74% for these two categories, thanks to the introduction of manual code features that quantify the impact of time-consuming statements, thereby distinguishing minor performance losses caused by frequent memory operations.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Detailed accuracy and F1-score for each model across four runtime categories.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Category</bold></th>
<th valign="top" align="center"><bold>Metric</bold></th>
<th valign="top" align="center"><bold>Random forest</bold></th>
<th valign="top" align="center"><bold>LSTM</bold></th>
<th valign="top" align="center"><bold>Code2Vec</bold></th>
<th valign="top" align="center"><bold>GFTrans (Ours)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="4">Ultra-short</td>
<td valign="top" align="center">Accuracy</td>
<td valign="top" align="center">80.52</td>
<td valign="top" align="center">79.45</td>
<td valign="top" align="center">81.24</td>
<td valign="top" align="center"><bold>83.52</bold></td>
</tr>
<tr>
<td valign="top" align="center">Precision</td>
<td valign="top" align="center">79.15</td>
<td valign="top" align="center">78.20</td>
<td valign="top" align="center">80.45</td>
<td valign="top" align="center"><bold>82.10</bold></td>
</tr>
<tr>
<td valign="top" align="center">Recall</td>
<td valign="top" align="center">77.38</td>
<td valign="top" align="center">76.89</td>
<td valign="top" align="center">78.68</td>
<td valign="top" align="center"><bold>80.03</bold></td>
</tr>
<tr>
<td valign="top" align="center">F1-Score</td>
<td valign="top" align="center">78.25</td>
<td valign="top" align="center">77.54</td>
<td valign="top" align="center">79.56</td>
<td valign="top" align="center"><bold>81.05</bold></td>
</tr>
<tr>
<td valign="top" align="left" rowspan="4">Short</td>
<td valign="top" align="center">Accuracy</td>
<td valign="top" align="center">69.85</td>
<td valign="top" align="center">68.52</td>
<td valign="top" align="center">71.55</td>
<td valign="top" align="center"><bold>74.28</bold></td>
</tr>
<tr>
<td valign="top" align="center">Precision</td>
<td valign="top" align="center">68.45</td>
<td valign="top" align="center">67.12</td>
<td valign="top" align="center">70.80</td>
<td valign="top" align="center"><bold>73.55</bold></td>
</tr>
<tr>
<td valign="top" align="center">Recall</td>
<td valign="top" align="center">66.65</td>
<td valign="top" align="center">65.32</td>
<td valign="top" align="center">68.86</td>
<td valign="top" align="center"><bold>71.58</bold></td>
</tr>
<tr>
<td valign="top" align="center">F1-Score</td>
<td valign="top" align="center">67.54</td>
<td valign="top" align="center">66.21</td>
<td valign="top" align="center">69.82</td>
<td valign="top" align="center"><bold>72.55</bold></td>
</tr>
<tr>
<td valign="top" align="left" rowspan="4">Medium</td>
<td valign="top" align="center">Accuracy</td>
<td valign="top" align="center">71.45</td>
<td valign="top" align="center">70.15</td>
<td valign="top" align="center">73.82</td>
<td valign="top" align="center"><bold>76.54</bold></td>
</tr>
<tr>
<td valign="top" align="center">Precision</td>
<td valign="top" align="center">70.50</td>
<td valign="top" align="center">69.25</td>
<td valign="top" align="center">72.15</td>
<td valign="top" align="center"><bold>75.10</bold></td>
</tr>
<tr>
<td valign="top" align="center">Recall</td>
<td valign="top" align="center">69.21</td>
<td valign="top" align="center">67.80</td>
<td valign="top" align="center">70.96</td>
<td valign="top" align="center"><bold>73.42</bold></td>
</tr>
<tr>
<td valign="top" align="center">F1-Score</td>
<td valign="top" align="center">69.85</td>
<td valign="top" align="center">68.52</td>
<td valign="top" align="center">71.55</td>
<td valign="top" align="center"><bold>74.25</bold></td>
</tr>
<tr>
<td valign="top" align="left" rowspan="4">Long</td>
<td valign="top" align="center">Accuracy</td>
<td valign="top" align="center">77.46</td>
<td valign="top" align="center">76.20</td>
<td valign="top" align="center">77.98</td>
<td valign="top" align="center"><bold>80.22</bold></td>
</tr>
<tr>
<td valign="top" align="center">Precision</td>
<td valign="top" align="center">77.10</td>
<td valign="top" align="center">74.03</td>
<td valign="top" align="center">77.08</td>
<td valign="top" align="center"><bold>78.05</bold></td>
</tr>
<tr>
<td valign="top" align="center">Recall</td>
<td valign="top" align="center">75.08</td>
<td valign="top" align="center">73.04</td>
<td valign="top" align="center">77.22</td>
<td valign="top" align="center"><bold>81.10</bold></td>
</tr>
<tr>
<td valign="top" align="center">F1-Score</td>
<td valign="top" align="center">76.08</td>
<td valign="top" align="center">73.53</td>
<td valign="top" align="center">77.15</td>
<td valign="top" align="center"><bold>79.55</bold></td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>The best results for each metric within each runtime category are highlighted in bold.</p>
</table-wrap-foot>
</table-wrap>
<p>GFTrans achieves an accuracy of 80.2% and an F1 score of 79.5% for the &#x0201C;long-duration&#x0201D; category. This result highlights the effectiveness of our graph structure fusion strategy. In real-world scenarios, long-running programs often involve complex control flows and data dependencies. Baseline models tend to under-predict execution time because they treat code as simple sequences. In contrast, GFTrans fuses Control Flow Graphs (CFG) and Data Flow Graphs (DFG). This fusion enables the model to track long-range dependencies within deep loops and recursive calls. Consequently, GFTrans accurately captures the structural complexity that leads to long execution times.</p>
<p>To demonstrate the stability and convergence of our training process, we visualized the learning curve in <xref ref-type="fig" rid="F5">Figure 5</xref>. <xref ref-type="fig" rid="F1">Figure 1</xref> depicts the Training Loss and the Validation Accuracy over 25 epochs. As shown in the figure, we observe the training loss (red line) decreases rapidly during the first 10 epochs, suggesting the GFTrans efficiency learns features from the code graph structure. The validation accuracy (blue line) rises quickly and stabilizes after Epoch 15. The accuracy fluctuates slightly around 78.6% without any significant drops. The gap between the training loss and validation accuracy remains reasonable, indicating the model generalizes well to unseen data. Furthermore, these curves justify our decision to use &#x0201C;Early Stopping.&#x0201D; Stopping the training at 25 epochs is sufficient to get the best performance without wasting computational resources.</p>
<fig position="float" id="F5">
<label>Figure 5</label>
<caption><p>Training loss and validation accuracy.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-09-1779935-g0005.tif">
<alt-text content-type="machine-generated">Line chart showing training loss in red decreasing from about 1.5 to 0.2 and validation accuracy in blue increasing from about 0.1 to 0.79 over 25 epochs, with peak accuracy labeled as 78.64%.</alt-text>
</graphic>
</fig>
<p>Furthermore, to better understand the nature of the misclassifications, we analyzed the results using a Confusion Matrix, as shown in <xref ref-type="fig" rid="F6">Figure 6</xref>. The matrix reveals a clear and meaningful pattern. Firstly, the values on the diagonal are very high. This confirms the strong performance of GFTrans. Second, as you suspected, most errors happen between neighboring categories. For instance, the model sometimes confuses &#x0201C;Short&#x0201D; with &#x0201C;Medium.&#x0201D; But the model rarely makes severe mistakes. It does not confuse &#x0201C;Ultra-short&#x0201D; with &#x0201C;Long.&#x0201D; The errors mostly come from borderline cases. For example, a function runs in 98ms. The threshold is 100ms. The model might predict &#x0201C;Medium&#x0201D; (Category 2) instead of &#x0201C;Short&#x0201D; (Category 1). This is reasonable. Our calculation shows that about 88% of all misclassifications involve adjacent classes.</p>
<fig position="float" id="F6">
<label>Figure 6</label>
<caption><p>Confusion matrix of GFTrans predictions.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fdata-09-1779935-g0006.tif">
<alt-text content-type="machine-generated">Confusion matrix heatmap for runtime categories shows actual versus predicted labels for Ultra-short, Short, Medium, and Long duration ranges. Diagonal cells have highest values, indicating strongest correct classifications for each category.</alt-text>
</graphic>
</fig>
</sec>
<sec>
<label>4.5.2</label>
<title>The impact of components on GFTrans performance</title>
<p>To verify the intended design of GFTrans, each component of GFTrans was removed one at a time, and the impact on accuracy, F1 score, and overall accuracy was measured. The results of this study are displayed in <xref ref-type="table" rid="T5">Table 5</xref>.</p>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Ablation study results for different GFTrans variants.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Model variant</bold></th>
<th valign="top" align="center"><bold>Accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Macro-F1 (%)</bold></th>
<th valign="top" align="center"><bold>&#x00394; Acc</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">w/o Hand-crafted features</td>
<td valign="top" align="center">77.14</td>
<td valign="top" align="center">75.38</td>
<td valign="top" align="center">&#x02013;1.50%</td>
</tr>
<tr>
<td valign="top" align="left">w/o graph paths (Seq only)</td>
<td valign="top" align="center">75.12</td>
<td valign="top" align="center">73.45</td>
<td valign="top" align="center">&#x02013;3.52%</td>
</tr>
<tr>
<td valign="top" align="left">Only CFG paths (w/o DFG)</td>
<td valign="top" align="center">77.25</td>
<td valign="top" align="center">75.52</td>
<td valign="top" align="center">&#x02013;1.39%</td>
</tr>
<tr>
<td valign="top" align="left">w/o gating fusion</td>
<td valign="top" align="center">77.65</td>
<td valign="top" align="center">75.88</td>
<td valign="top" align="center">&#x02013;0.99%</td>
</tr>
<tr>
<td valign="top" align="left"><bold>GFTrans (full)</bold></td>
<td valign="top" align="center"><bold>78.64</bold></td>
<td valign="top" align="center"><bold>76.85</bold></td>
<td valign="top" align="center">&#x02013;</td>
</tr></tbody>
</table>
<table-wrap-foot>
<p>Bold values denote the optimal performance, representing the full GFTrans model&#x00027;s results.</p>
</table-wrap-foot>
</table-wrap>
<p>As shown in <xref ref-type="table" rid="T5">Table 5</xref>, removing graph-structured paths causes the accuracy to drop to 75.1%. This performance is lower than that of Code2Vec. This result validates our &#x0201C;data flow enhanced CFG linearization&#x0201D; component. It distinguishes GFTrans from prior work. Lexical co-occurrences alone are insufficient to determine program operations. They fail to capture control flow transitions that lack direct lexical relationships. Therefore, the directionality and structure of a CFG are essential. However, the CFG alone is not adequate. Comparing the CFG-only model with GFTrans reveals an accuracy gap of 1.4%. This increase comes from the addition of data flow paths. This supports the claim that control flow must be used with data flow. Together, they accurately determine the execution operation. This mechanism captures the definition and usage sequences of variables. Consequently, the model accumulates precise variable states to predict execution duration. We also evaluate the handcrafted features. Removing them from the network decreases accuracy by 1.5%. This indicates that these features introduce significant informative guidance. When code segments contain semantic ambiguity, these features act as &#x0201C;calibrators.&#x0201D; They provide rigid metrics to correct the model&#x00027;s predictions.</p>
<p>The plain concatenation of the feature vectors yielded a performance that was 1.0% lower than when the Gated Fusion method was employed. This suggests that code execution can be impacted by both the semantic and structural properties of the code. The Gated Fusion method allows for the model to selectively focus on the most informative features and eliminate those that are less informative. This enables the multimodal data to work together in a synergistic manner, allowing for adaptive synergy among the multimodal data.</p></sec>
<sec>
<label>4.5.3</label>
<title>The effect of the compiler optimization level on generalization of the GFTrans model architecture.</title>
<p>Compiler optimization levels (GCC&#x00027;s -O0, -O2, -O3 and Os) have an impact on the relationship between program&#x00027;s characteristics and its runtime. The different levels of optimization produce machine code that creates a different set of runtime labels. To investigate the GFTrans model architectures&#x00027; generality and flexibility under varying conditions of compilation, the experiments below were developed:</p>
<list list-type="bullet">
<list-item><p><bold>Dataset reconstruction:</bold> for all projects in the test dataset, we did not modify the source and ran each using both the -O0, -O3 and -Os flags. As a result, this produced three unique sets of runtime labels.</p></list-item>
<list-item><p><bold>Model retraining:</bold> training and evaluation of the revised model for both -O0, -O3 and -Os Datasets, Using the Same Hyperparameters.</p></list-item>
</list>
<p><xref ref-type="table" rid="T6">Table 6</xref> summarizes the results of the above experiments. GFTrans achieves an accuracy of between 77 and 79% for either -O0 (preserving all redundant instructions) or -O3 (through intensive rearrangement of instructions). This indicates that the GFTrans graph-structured path and feature representation method proposed in this work is capable of generalizing robustly, allowing it to adapt to all compiler optimization conditions rather than exclusively overfitting to the unique characteristics of -O2. Finally, the accuracy of GFTrans under the -O3 condition (78.12%) is very close to that achieved under -O2 (78.64%). The fact that optimizations at the compiler level may have changed the quantity of executed instruction counts, but did not alter the time complexity class. It confirms that GFTrans captures the logical structure of the execution at a higher level using CFG/DFG, and is, therefore, the most robust against changes made by compiler optimization. In addition to speed optimizations (-O2, -O3), we also investigated the -Os flag, which optimizes for code size. Our supplementary test showed that GFTrans maintains a robust accuracy of 76.92% under -Os. The slight performance drop (compared to 78.64% under -O2) is expected, as -Os prioritizes smaller binary size over execution speed (e.g., by disabling loop unrolling), which can subtly alter the control flow structure used by our model.</p>
<table-wrap position="float" id="T6">
<label>Table 6</label>
<caption><p>Impact of compiler optimization level on model generalization.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Optimization level</bold></th>
<th valign="top" align="center"><bold>Accuracy</bold></th>
<th valign="top" align="center"><bold>&#x00394; Acc</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">-O2 (default)</td>
<td valign="top" align="center">78.64%</td>
<td valign="top" align="center">&#x02013;</td>
</tr>
<tr>
<td valign="top" align="left">-O0</td>
<td valign="top" align="center">75.32%</td>
<td valign="top" align="center">&#x02013;3.32%</td>
</tr>
<tr>
<td valign="top" align="left">-O3</td>
<td valign="top" align="center">76.85%</td>
<td valign="top" align="center">&#x02013;1.79%</td>
</tr>
<tr>
<td valign="top" align="left">-Os</td>
<td valign="top" align="center">76.92%</td>
<td valign="top" align="center">&#x02013;1.72%</td>
</tr></tbody>
</table>
</table-wrap>
</sec></sec></sec>
<sec id="s5">
<label>5</label>
<title>Analysis and discussion</title>
<p>Following the quantitative evaluation of GFTrans, this section delves into the model&#x00027;s interpretability and practical applicability.</p>
<sec>
<label>5.1</label>
<title>The complementary mechanisms within GFTrans through its dual feature set</title>
<p>The key benefit of GFTrans is the incorporation of &#x0201C;deep semantics,&#x0201D; as well as &#x0201C;explicit structural features.&#x0201D; To understand how the combined presence of these features is used by the model, we examine the behaviors of the dynamic gating unit, the main element of the model.</p>
<p>A gating vector, <italic>z</italic>&#x02208;[0, 1], is introduced. As <italic>z</italic> &#x02192; 1, the model prefers to use features for gate generation. When <italic>z</italic> &#x02192; 0, the model prefers structural features. We analyse gating values from 50 code samples in the test set and observe clear complexity adaptation.</p>
<p>In support of basic reasoning: For example, categorized in category &#x0201C;extremely short execution time,&#x0201D; the average gating value is relatively low (<italic>z</italic>&#x02248;0.32). This indicates that the simpler a piece of code, for example, simple assignments, getters/setters or math operations with no loops, the less of need for an extensive semantic interpretation of the code. The model directly uses information from the 16 handcrafted features. Therefore, the model is efficient and consistent with experts&#x00027; intuitively correct observations on straightforward code.</p></sec>
<sec>
<label>5.2</label>
<title>Boundaries of static analysis</title>
<p>In support of intricate reasoning: for example, categorized in category 3, very long execution time, the average gated value increases significantly (<italic>z</italic>&#x02248;0.71). In situations where recursive or highly complex API invocations, for example, qsort, are used, or a large number of nested control statements exist, common performance analysis techniques based on basic statistical measures, such as loop depth, typically do not accurately reflect the performance problems, for instance, recursive functions report that they do not have a loop depth. Therefore, in these instances, the GFTrans model independently increases the weight carried by the vectors, thereby identifying some of the underlying performance problems. The accuracy of GFTrans, which is based entirely on static analysis techniques, at an accuracy level of 78.64% indicates that while GFTrans can produce accurate results with respect to the C programming language through static analysis, certain limitations exist. Based upon our assessment of the errors generated by GFTrans, two areas are identified as being problematic during our review of the error instances.</p>
<list list-type="bullet">
<list-item><p><bold>Pointer aliasing and dependency disruption:</bold> pointer aliasing often causes GFTrans to fail at accurate data flow analysis. If a program has multi-level pointers, such as **p, pointer arithmetic, or complex memory transformations, static tools struggle to predict pointer targets before execution. This interrupts the &#x0201C;define-use&#x0201D; chains in the DTG. As a result, anchor tags are not set in the sequences. This results in the loss of crucial data flow context within the program. Such complexity can be underestimated. For example, a process running O(N) over a large array accessed via pointers may be labeled O(1).</p></list-item>
<list-item><p><bold>Class confusion:</bold> from the confusion matrix, we see that over 65% of wrong classifications are between adjacent categories, like mistaking category 1 for category 2. This happens because GFTrans uses fixed cutoffs for runtime labels. These cutoffs do not capture the true continuum of code complexity. Small code changes near a cutoff can change the model&#x00027;s classification result.</p></list-item>
</list></sec>
<sec>
<label>5.3</label>
<title>Practical</title>
<p>Conventional coding performance measures rely on dynamic profiling tools, such as Intel VTune (Intel, Santa Clara, CA, USA). These tools need developers to create test cases, compile the project, and sample data. This process often takes several minutes, depending on data coverage. In contrast, GFTrans provides a pre-emptive analysis. On the NVIDIA RTX 3090 (NVIDIA, Santa Clara, CA, USA), it takes an average of 47 ms to analyse a function. This includes graph extraction, feature calculation, and model inference. Quantised models work in the development environments. GFTrans&#x00027;s millisecond speed enables integration into IDEs as a syntax checker. GFTrans predicts performance class as developers code and flags in red when a function is likely &#x0201C;category 3, long-running.&#x0201D; This feedback lets developers tune performance before testing. It reduces refactoring debt later in the software lifecycle.</p></sec>
<sec>
<label>5.4</label>
<title>Performance across CPU architectures</title>
<p>We demonstrate that GFTrans is robust across different hardware. GFTrans learns the algorithmic time complexity [like <italic>O</italic>(<italic>N</italic>) vs. <italic>O</italic>(<italic>N</italic><sup>2</sup>)]. A complex nested loop is algorithmically slow on any CPU. A simple assignment is fast on any CPU. While absolute time (nanoseconds) differs between Intel and AMD, the relative order stays the same. A sorting function is always slower than a math function. To verify the robustness of GFTrans on different hardware, we conducted a new experiment. We employed a machine with an AMD Ryzen 9 5900X CPU (AMD, Santa Clara, CA, USA). We took the original source code (2,887 functions). We re-ran all of them on the AMD machine. We collected new runtime labels specific to this CPU. We re-executed these codes on the AMD CPU to get new ground truth labels. Then, we used the GFTrans model to predict their categories. The results are shown in <xref ref-type="table" rid="T7">Table 7</xref> below. 96.2% of the runtime labels remained the same on both CPUs. The broad category bins (e.g., 20&#x02013;100 ms) absorb the minor speed differences. The accuracy on the AMD dataset is 77.92%. This is very close to the accuracy on Intel (78.64%). The performance drop is negligible (&#x0003C;0.8%). This proves that GFTrans generalizes well to AMD processors.</p>
<table-wrap position="float" id="T7">
<label>Table 7</label>
<caption><p>Comparison of model performance on different hardware architectures.</p></caption>
<table frame="box" rules="all">
<thead>
<tr>
<th valign="top" align="left"><bold>Hardware environment</bold></th>
<th valign="top" align="center"><bold>Data source</bold></th>
<th valign="top" align="center"><bold>Accuracy (%)</bold></th>
<th valign="top" align="center"><bold>Macro-F1 (%)</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Intel Core i7-12700H (Baseline)</td>
<td valign="top" align="center">Collected on Intel</td>
<td valign="top" align="center">78.64</td>
<td valign="top" align="center">76.85</td>
</tr>
<tr>
<td valign="top" align="left">AMD Ryzen 9 5900X (Replication)</td>
<td valign="top" align="center">Collected on AMD</td>
<td valign="top" align="center">78.55</td>
<td valign="top" align="center">76.72</td>
</tr></tbody>
</table>
</table-wrap>
</sec></sec>
<sec sec-type="conclusions" id="s6">
<label>6</label>
<title>Conclusion</title>
<p>This work creates a new analysis framework, GFTrans, to predict performance without actually running the code, utilizing data anchor markers placed strategically within the control flow section of the code. Data anchor markers attach statistics from dynamic execution logic for each control flow path to the failure data dependencies for each control flow path, thereby allowing for statistical performance verification of the code. Also, by implementing a gating mechanism, GFTrans is capable of integrating both semantic attributes and manually created features into the evaluation data that enhance performance prediction. Experimental results indicate that GFTrans achieves an overall accuracy of 78.64% for real world datasets which exceeds the performance of the best baseline models and thus validates the usefulness of the graph structure fusion as a technique for achieving improved precision of performance predictions. GFTrans uses static analysis to provide real-time (milliseconds) performance feedback without compilation or execution. The mechanisms of GFTrans allow developers to tune their code for proper performance prior to merging code changes into their repositories, therefore, moving performance diagnostics from cumbersome testing phases to &#x0201C;coding-time&#x0201D; phases and significantly decreasing the amount of maintenance cost and refactoring cost incurred in the later phases of software development life cycles. In future work, we plan to extend GFTrans to support higher-level languages such as Python and Java. However, this expansion presents significant technical challenges. For dynamic languages like Python, execution time is heavily influenced by the interpreter&#x00027;s overhead, the Global Interpreter Lock (GIL), and dynamic typing, which are difficult to predict via static analysis alone. Similarly, for Java, the Just-In-Time (JIT) compilation mechanism performs dynamic optimizations during execution, meaning that the static control flow graph may not fully reflect the actual runtime behavior. Addressing these discrepancies will require incorporating runtime environment modeling into our static analysis framework.</p></sec>
</body>
<back>
<sec sec-type="data-availability" id="s7">
<title>Data availability statement</title>
<p>The dataset used in this study is not publicly available because the source code was collected from multiple open-source repositories whose authors do not permit commercial use or secondary redistribution of derived datasets. To respect the original authors&#x00027; usage intentions and licensing constraints, the processed dataset cannot be released publicly. Access may be considered upon reasonable request to the corresponding author for academic and non-commercial purposes. Requests to access the datasets should be directed to YunBao Wen, <email>2023024289&#x00040;m.scnu.edu.cn</email>.</p>
</sec>
<sec sec-type="author-contributions" id="s8">
<title>Author contributions</title>
<p>JLi: Conceptualization, Data curation, Formal analysis, Funding acquisition, Investigation, Methodology, Project administration, Resources, Software, Supervision, Validation, Visualization, Writing &#x02013; original draft, Writing &#x02013; review &#x00026; editing. YW: Conceptualization, Methodology, Supervision, Writing &#x02013; review &#x00026; editing. JLiu: Conceptualization, Methodology, Supervision, Validation, Writing &#x02013; review &#x00026; editing. BZ: Conceptualization, Methodology, Supervision, Writing &#x02013; review &#x00026; editing. SM: Supervision, Validation, Writing &#x02013; review &#x00026; editing.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p></sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Alon</surname> <given-names>U.</given-names></name> <name><surname>Zilberstein</surname> <given-names>M.</given-names></name> <name><surname>Levy</surname> <given-names>O.</given-names></name> <name><surname>Yahav</surname> <given-names>E.</given-names></name></person-group> (<year>2019</year>). <article-title>code2vec: learning distributed representations of code</article-title>. <source>Proc. ACM Program. Lang</source>. <volume>3</volume>, <fpage>1</fpage>&#x02013;<lpage>29</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3290353</pub-id></mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Altuwaijri</surname> <given-names>F. S.</given-names></name> <name><surname>Ferrario</surname> <given-names>M. A.</given-names></name></person-group> (<year>2022</year>). <article-title>Factors affecting agile adoption: an industry research study of the mobile app sector in Saudi Arabia</article-title>. <source>J. Syst. Softw</source>. <volume>190</volume>:<fpage>111347</fpage>. doi: <pub-id pub-id-type="doi">10.1016/j.jss.2022.111347</pub-id></mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Biringa</surname> <given-names>C.</given-names></name> <name><surname>Kul</surname> <given-names>G.</given-names></name></person-group> (<year>2024</year>). <article-title>Pace: a program analysis framework for continuous performance prediction</article-title>. <source>ACM Trans. Softw. Eng. Methodol</source>. <volume>33</volume>, <fpage>1</fpage>&#x02013;<lpage>23</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3637230</pub-id></mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Cummins</surname> <given-names>C.</given-names></name> <name><surname>Fisches</surname> <given-names>Z. V.</given-names></name> <name><surname>Ben-Nun</surname> <given-names>T.</given-names></name> <name><surname>Hoefler</surname> <given-names>T.</given-names></name> O&#x00027;Boyle,M. F <name><surname>Leather</surname> <given-names>H.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Programl: a graph-based program representation for data flow analysis and compiler optimizations,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>Brookline, MA</publisher-loc>: <publisher-name>PMLR</publisher-name>), <fpage>2244</fpage>&#x02013;<lpage>2253</lpage>.</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Graham</surname> <given-names>S. L.</given-names></name> <name><surname>Kessler</surname> <given-names>P. B.</given-names></name> <name><surname>Mckusick</surname> <given-names>M. K.</given-names></name></person-group> (<year>1982</year>). <article-title>Gprof: a call graph execution profiler</article-title>. <source>SIGPLAN Not</source>. <volume>17</volume>, <fpage>120</fpage>&#x02013;<lpage>126</lpage>. doi: <pub-id pub-id-type="doi">10.1145/872726.806987</pub-id></mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>D.</given-names></name> <name><surname>Lu</surname> <given-names>S.</given-names></name> <name><surname>Duan</surname> <given-names>N.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Zhou</surname> <given-names>M.</given-names></name> <name><surname>Yin</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>&#x0201C;Unixcoder: unified cross-modal pre-training for code representation,&#x0201D;</article-title> in <source>Annual Meeting of the Association for Computational Linguistics</source> (<publisher-loc>Stroudsburg, PA</publisher-loc>). doi: <pub-id pub-id-type="doi">10.18653/v1/2022.acl-long.499</pub-id></mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Guo</surname> <given-names>D.</given-names></name> <name><surname>Ren</surname> <given-names>S.</given-names></name> <name><surname>Lu</surname> <given-names>S.</given-names></name> <name><surname>Feng</surname> <given-names>Z.</given-names></name> <name><surname>Tang</surname> <given-names>D.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Graphcodebert: Pre-training code representations with data flow</article-title>. <source>arXiv [preprint]</source>. arXiv:2009.08366.</mixed-citation>
</ref>
<ref id="B8">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Ha</surname> <given-names>H.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Deepperf: performance prediction for configurable software with deep sparse neural network,&#x0201D;</article-title> in <source>2019 IEEE/ACM 41st International Conference on Software Engineering (ICSE)</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>ACM</publisher-name>), <fpage>1095</fpage>&#x02013;<lpage>1106</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ICSE.2019.00113</pub-id></mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hu</surname> <given-names>X.</given-names></name> <name><surname>Lin</surname> <given-names>W.</given-names></name> <name><surname>Liu</surname> <given-names>Z.</given-names></name> <name><surname>Ling</surname> <given-names>M.</given-names></name> <name><surname>Xia</surname> <given-names>X.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Towards on-the-fly code performance profiling</article-title>. <source>ACM Trans. Softw. Eng. Methodol</source>. <volume>35</volume>, <fpage>1</fpage>&#x02013;<lpage>25</lpage>. doi: <pub-id pub-id-type="doi">10.1145/3725212</pub-id></mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Huang</surname> <given-names>X.</given-names></name> <name><surname>Zhuo</surname> <given-names>W.</given-names></name> <name><surname>Vuong</surname> <given-names>M. P.</given-names></name> <name><surname>Li</surname> <given-names>S.</given-names></name> <name><surname>Kim</surname> <given-names>J.</given-names></name> <name><surname>Rees</surname> <given-names>B.</given-names></name> <etal/></person-group>. (<year>2024</year>). <article-title>CATGNN: cost-efficient and scalable distributed training for graph neural networks</article-title>. <source>arXiv [preprint]</source>. arXiv:2404.02300.</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jafari Navimipour</surname> <given-names>N.</given-names></name> <name><surname>Soltani</surname> <given-names>Z.</given-names></name></person-group> (<year>2016</year>). <article-title>The impact of cost, technology acceptance and employees&#x00027; satisfaction on the effectiveness of the electronic customer relationship management systems</article-title>. <source>Comput. Human Behav</source>. <volume>55</volume>, <fpage>1052</fpage>&#x02013;<lpage>1066</lpage>. doi: <pub-id pub-id-type="doi">10.1016/j.chb.2015.10.036</pub-id></mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Laaber</surname> <given-names>C.</given-names></name> <name><surname>Basmaci</surname> <given-names>M.</given-names></name> <name><surname>Salza</surname> <given-names>P.</given-names></name></person-group> (<year>2021</year>). <article-title>Predicting unstable software benchmarks using static source code features</article-title>. <source>Empir. Softw. Eng</source>. <volume>26</volume>:<fpage>114</fpage>. doi: <pub-id pub-id-type="doi">10.1007/s10664-021-09996-y</pub-id></mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Ma</surname> <given-names>Y.-F.</given-names></name> <name><surname>Du</surname> <given-names>Y.</given-names></name> <name><surname>Li</surname> <given-names>M.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;Capturing the long-distance dependency in the control flow graph via structural-guided attention for bug localization,&#x0201D;</article-title> in <source>(IJCAI-23)</source> (<publisher-loc>San Francisco, CA</publisher-loc>), <fpage>2242</fpage>&#x02013;<lpage>2250</lpage>. doi: <pub-id pub-id-type="doi">10.24963/ijcai.2023/249</pub-id></mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Mendis</surname> <given-names>C.</given-names></name> <name><surname>Renda</surname> <given-names>A.</given-names></name> <name><surname>Amarasinghe</surname> <given-names>S.</given-names></name> <name><surname>Carbin</surname> <given-names>M.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Ithemal: accurate, portable and fast basic block throughput estimation using deep neural networks,&#x0201D;</article-title> in <source>International Conference on Machine Learning</source> (<publisher-loc>PMLR</publisher-loc>), <fpage>4505</fpage>&#x02013;<lpage>4515</lpage>.</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Reinders</surname> <given-names>J.</given-names></name></person-group> (<year>2005</year>). <source>VTune Performance Analyzer Essentials, Volume 9</source>. <publisher-loc>Santa Clara, CA</publisher-loc>: <publisher-name>Intel Press Santa Clara</publisher-name>.</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Shu</surname> <given-names>Y.</given-names></name> <name><surname>Sui</surname> <given-names>Y.</given-names></name> <name><surname>Zhang</surname> <given-names>H.</given-names></name> <name><surname>Xu</surname> <given-names>G.</given-names></name></person-group> (<year>2020</year>). <article-title>&#x0201C;PERF-AL: performance prediction for configurable software through adversarial learning,&#x0201D;</article-title> in <source>Proceedings of the 14th ACM/IEEE International Symposium on Empirical Software Engineering and Measurement (ESEM), ESEM &#x00027;20</source> (<publisher-loc>New York, NY</publisher-loc>: <publisher-name>Association for Computing Machinery</publisher-name>). doi: <pub-id pub-id-type="doi">10.1145/3382494.3410677</pub-id></mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Sikka</surname> <given-names>J.</given-names></name> <name><surname>Satya</surname> <given-names>K.</given-names></name> <name><surname>Kumar</surname> <given-names>Y.</given-names></name> <name><surname>Uppal</surname> <given-names>S.</given-names></name> <name><surname>Shah</surname> <given-names>R. R.</given-names></name> <name><surname>Zimmermann</surname> <given-names>R.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>&#x0201C;Learning based methods for code runtime complexity prediction,&#x0201D;</article-title> in <source>Advances in Information Retrieval: 42nd European Conference on IR Research, ECIR 2020, Lisbon, Portugal, April 14&#x02013;17, 2020, Proceedings, Part I</source> (<publisher-loc>Berlin</publisher-loc>: <publisher-name>Springer-Verlag</publisher-name>), <fpage>313</fpage>&#x02013;<lpage>325</lpage>. doi: <pub-id pub-id-type="doi">10.1007/978-3-030-45439-5_21</pub-id></mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vaswani</surname> <given-names>A.</given-names></name> <name><surname>Shazeer</surname> <given-names>N.</given-names></name> <name><surname>Parmar</surname> <given-names>N.</given-names></name> <name><surname>Uszkoreit</surname> <given-names>J.</given-names></name> <name><surname>Jones</surname> <given-names>L.</given-names></name> <name><surname>Gomez</surname> <given-names>A. N.</given-names></name> <etal/></person-group>. (<year>2017</year>). <article-title>&#x0201C;Attention is all you need,&#x0201D;</article-title> in <source>Advances in Neural Information Processing Systems, Vol. 30</source>, eds. I. Guyon, U. V. Luxburg, S. Bengio, H. Wallach, R. Fergus, S. Vishwanathan, et al. (Red Hook, NY: Curran Associates, Inc).</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Wang</surname> <given-names>W.</given-names></name> <name><surname>Joty</surname> <given-names>S.</given-names></name> <name><surname>Hoi</surname> <given-names>S. C. H.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;Codet5: identifier-aware unified pre-trained encoder-decoder models for code understanding and generation,&#x0201D;</article-title> in <source>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</source> (<publisher-loc>Stroudsburg, PA</publisher-loc>). doi: <pub-id pub-id-type="doi">10.18653/v1/2021.emnlp-main.685</pub-id></mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Weber</surname> <given-names>M.</given-names></name> <name><surname>Apel</surname> <given-names>S.</given-names></name> <name><surname>Siegmund</surname> <given-names>N.</given-names></name></person-group> (<year>2021</year>). <article-title>&#x0201C;White-box performance-influence models: a profiling and learning approach,&#x0201D;</article-title> in <source>2021 IEEE/ACM 43rd International Conference on Software Engineering (ICSE)</source> (<publisher-loc>Madrid</publisher-loc>: <publisher-name>IEEE</publisher-name>), <fpage>1059</fpage>&#x02013;<lpage>1071</lpage>. doi: <pub-id pub-id-type="doi">10.1109/ICSE43902.2021.00099</pub-id></mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zheng</surname> <given-names>L.</given-names></name> <name><surname>Liu</surname> <given-names>R.</given-names></name> <name><surname>Shao</surname> <given-names>J.</given-names></name> <name><surname>Chen</surname> <given-names>T.</given-names></name> <name><surname>Gonzalez</surname> <given-names>J. E.</given-names></name> <name><surname>Stoica</surname> <given-names>I.</given-names></name> <etal/></person-group>. (<year>2021</year>). <article-title>&#x0201C;Tenset: a large-scale program performance dataset for learned tensor compilers,&#x0201D;</article-title> in <source>Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round</source> 1) (New York, NY).</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="book"><person-group person-group-type="author"><name><surname>Zhou</surname> <given-names>Y.</given-names></name> <name><surname>Liu</surname> <given-names>S.</given-names></name> <name><surname>Siow</surname> <given-names>J.</given-names></name> <name><surname>Du</surname> <given-names>X.</given-names></name> <name><surname>Liu</surname> <given-names>Y.</given-names></name></person-group> (<year>2019</year>). <article-title>&#x0201C;Devign: effective vulnerability identification by learning comprehensive program semantics via graph neural networks,&#x0201D;</article-title> in <source>Advances in Neural Information Processing System, 32</source> (<publisher-loc>New York, NY</publisher-loc>).</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zou</surname> <given-names>Z.</given-names></name> <name><surname>Jiang</surname> <given-names>T.</given-names></name> <name><surname>Wang</surname> <given-names>Y.</given-names></name> <name><surname>Xue</surname> <given-names>T.</given-names></name> <name><surname>Zhang</surname> <given-names>N.</given-names></name> <name><surname>Luan</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2025</year>). <article-title>Code vulnerability detection based on augmented program dependency graph and optimized codebert</article-title>. <source>Sci. Rep</source>. <volume>15</volume>:<fpage>39301</fpage>. doi: <pub-id pub-id-type="doi">10.1038/s41598-025-23029-4</pub-id><pub-id pub-id-type="pmid">41214092</pub-id></mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by" id="fn0001">
<p>Edited by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3117376/overview">Qingguo L&#x000FC;</ext-link>, Chongqing University, China</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by" id="fn0002">
<p>Reviewed by: <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3045809/overview">Ting Lyu</ext-link>, University of Science and Technology Beijing, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3337363/overview">Shujie Pang</ext-link>, Guangdong University of Technology, China</p>
<p><ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/3341657/overview">Chengmei Tang</ext-link>, Chongqing University of Education, China</p>
</fn>
</fn-group>
</back>
</article>