<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Manuf. Technol.</journal-id>
<journal-title-group>
<journal-title>Frontiers in Manufacturing Technology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Manuf. Technol.</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">2813-0359</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1614335</article-id>
<article-id pub-id-type="doi">10.3389/fmtec.2025.1614335</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Research</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Bayesian experimental design in production engineering: a comprehensive performance and robustness study</article-title>
<alt-title alt-title-type="left-running-head">Leyendecker et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmtec.2025.1614335">10.3389/fmtec.2025.1614335</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Leyendecker</surname>
<given-names>Lars</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2775658"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Project administration" vocab-term-identifier="https://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing &#x2013; review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Gonzalez Degetau</surname>
<given-names>Ana Maria</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing &#x2013; review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Formal analysis" vocab-term-identifier="https://credit.niso.org/contributor-roles/formal-analysis/">Formal Analysis</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; original draft" vocab-term-identifier="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing &#x2013; original draft</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Visualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Validation" vocab-term-identifier="https://credit.niso.org/contributor-roles/validation/">Validation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Investigation" vocab-term-identifier="https://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Software" vocab-term-identifier="https://credit.niso.org/contributor-roles/software/">Software</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Bata</surname>
<given-names>Katharina</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing &#x2013; review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Emonts</surname>
<given-names>Jessica</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing &#x2013; review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Conceptualization" vocab-term-identifier="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Data curation" vocab-term-identifier="https://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Methodology" vocab-term-identifier="https://credit.niso.org/contributor-roles/methodology/">Methodology</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Schmitz</surname>
<given-names>Angela</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3244244"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing &#x2013; review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Supervision" vocab-term-identifier="https://credit.niso.org/contributor-roles/supervision/">Supervision</role>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Schmitt</surname>
<given-names>Robert H.</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2781385"/>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Writing &#x2013; review &#x26; editing" vocab-term-identifier="https://credit.niso.org/contributor-roles/Writing - review &#x26; editing/">Writing &#x2013; review and editing</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Funding acquisition" vocab-term-identifier="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role vocab="credit" vocab-identifier="https://credit.niso.org/" vocab-term="Resources" vocab-term-identifier="https://credit.niso.org/contributor-roles/resources/">Resources</role>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Production Quality, Fraunhofer Institute for Production Technology IPT</institution>, <city>Aachen</city>, <country country="DE">Germany</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Karlsruhe Institute of Technology KIT, Scientific Computing Center (SCC)</institution>, <city>Karlsruhe</city>, <country country="DE">Germany</country>
</aff>
<aff id="aff3">
<label>3</label>
<institution>Department of Mechanical Engineering, University of Applied Sciences Aachen</institution>, <city>Aachen</city>, <country country="DE">Germany</country>
</aff>
<aff id="aff4">
<label>4</label>
<institution>Institute of Product Development and Engineering Design, Faculty of Process Engineering, Energy and Mechanical Systems, TH K&#xf6;ln&#x2014;University of Applied Sciences</institution>, <city>K&#xf6;ln</city>, <country country="DE">Germany</country>
</aff>
<aff id="aff5">
<label>5</label>
<institution>Laboratory for Machine Tools and Production Engineering (WZL) of RWTH Aachen University</institution>, <city>Aachen</city>, <country country="DE">Germany</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Lars Leyendecker, <email xlink:href="mailto:lars.leyendecker@ipt.fraunhofer.de">lars.leyendecker@ipt.fraunhofer.de</email>
</corresp>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-01-08">
<day>08</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2025</year>
</pub-date>
<volume>5</volume>
<elocation-id>1614335</elocation-id>
<history>
<date date-type="received">
<day>18</day>
<month>04</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>28</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>28</day>
<month>11</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Leyendecker, Gonzalez Degetau, Bata, Emonts, Schmitz and Schmitt.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Leyendecker, Gonzalez Degetau, Bata, Emonts, Schmitz and Schmitt</copyright-holder>
<license>
<ali:license_ref start_date="2026-01-08">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<p>In production engineering, the identification of optimal process parameters is essential to advance product quality and overall equipment effectiveness. Optimizing and adapting process parameters through experimental design is relevant for different phases of the life cycle of a production process: (i) design and development of new processes, (ii) failure analysis and optimization, and (iii) adaptation and calibration in series production. Existing experimental design approaches tend to be inefficient because they comprise static, non-adaptive methodologies that separate experiment design from execution and analysis. Instead, Bayesian Optimization (BO) offers an adaptive and data-efficient methodology for experimental design termed Bayesian experimental design (BED). In BED, the selection of an experiment is re-evaluated in each iteration based on previous experiment results according to an acquisition function that aims to maximize the informational content of each experiment. However, the configuration of BO algorithms for specific optimization problems requires extensive knowledge of both BO and process characteristics. The mean and covariance functions of the surrogate model, the acquisition function, and initial data sampling must be individually configured and significantly influence overall optimization performance, preventing widespread adoption in production engineering practice. To guide the configuration of BO algorithms for optimizing production processes, in this paper, we perform an extensive benchmark study with a total of 15,360 experiments. We evaluate the performance of a variety of BO algorithm configurations (including kernels, acquisition functions, and initial sampling sizes) on a total of eight optimization problems with a noiseless and a noisy variant each. The performance and robustness analysis reveals significant performance differences between individual BO algorithm configurations. The results of our benchmarking serve as empirical references based on which we derive actionable guidelines for the application of BED in production engineering.</p>
</abstract>
<kwd-group>
<kwd>Bayesian optimization</kwd>
<kwd>Bayesian experimental design</kwd>
<kwd>process optimization</kwd>
<kwd>production</kwd>
<kwd>manufacturing</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. Founded by &#x201c;ICNAP - International Center for Networked, Adaptive Production&#x201d;. A Fraunhofer Initiative.</funding-statement>
</funding-group>
<counts>
<fig-count count="10"/>
<table-count count="2"/>
<equation-count count="7"/>
<ref-count count="44"/>
<page-count count="19"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Digital Manufacturing</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<label>1</label>
<title>Introduction</title>
<p>In production engineering, the technical, ecological, and economical performance of production processes depend on the parameter settings that configure the behavior of the process. To investigate the relationship (response surface) between process inputs and outputs, and therefore to find parameters that are optimal with respect to an arbitrary objective function, experimental parameter studies are performed. The goal of experimental design is to identify the set of process parameters (also called factors) that are most relevant to the performance of the process and to determine performance-optimal factor levels <xref ref-type="bibr" rid="B18">Freiesleben et al. (2020)</xref>; <xref ref-type="bibr" rid="B41">Rainforth et al. (2023)</xref>. Production processes are typically considered black-box systems, involve highly complex, high-dimensional design and objective spaces, and physical experimentation is time-, cost-, and resource intensive. According to the so-called polylemma of production, process optimization relies on human intuition, trial-and-error, and slow optimization cycles <xref ref-type="bibr" rid="B43">Schmitt and Pfeifer (2015)</xref>. Traditional statistical experimental design methodologies and metaheuristics comprise full and fractional factorial Design of Experiments (DoE) <xref ref-type="bibr" rid="B36">Montgomery (2020)</xref>; <xref ref-type="bibr" rid="B12">Durakovic (2017)</xref>, one-factor-at-a-time (OFAT), Taguchi Method <xref ref-type="bibr" rid="B31">Logothetis and Wynn (1989)</xref>, Response Surface Modeling <xref ref-type="bibr" rid="B42">Sarabia and Ortiz (2009)</xref>, Latin Hypercube Sampling <xref ref-type="bibr" rid="B48">Tang (1993)</xref>, or optimal designs <xref ref-type="bibr" rid="B44">Smucker et al. (2018)</xref>.</p>
<p>Alternatively, Bayesian Optimization (BO) provides a model-based framework for adaptive experimental design using information-theoretic principles <xref ref-type="bibr" rid="B41">Rainforth et al. (2023)</xref>. More specifically, BO is a sequential decision-making strategy for the optimization of arbitrary objective functions. In particular, BO is especially suited for optimizing expensive-to-evaluate black-box functions that i) do not have a closed-form representation, ii) do not provide function derivatives, and iii) only allow for point-wise evaluation <xref ref-type="bibr" rid="B20">Garnett (2023)</xref>. BO consists of two core components: a surrogate model used for modeling the to-be-optimized objective function and an acquisition function that is sampled for guiding the selection of to-be-evaluated parameter sets. During optimization, the surrogate model is being continuously updated from a prior to a posterior belief by applying the Bayes theorem after new observations have been collected. The acquisition function utilizes the uncertainty quantification of the surrogate model to maximize the information gain of each experiment while balancing the exploration-exploitation trade-off. The process optimization using BO is performed until a pre-defined termination criterion (e.g., maximum number of experiments, pre-defined quality-level) is fulfilled. This concept of BO stems from early 1970s&#x2013;1980s <xref ref-type="bibr" rid="B34">Mo&#x10d;kus (1975)</xref>; <xref ref-type="bibr" rid="B35">Mockus (1989)</xref> and has suffered till the recent past from computational bottlenecks hindering wide-spread application <xref ref-type="bibr" rid="B41">Rainforth et al. (2023)</xref>. However, given the advancements of the recent years and the success of BO for hyperparameter optimization and neural architecture search, BO has regained popularity and rapid progress over the past 10&#xa0;years <xref ref-type="bibr" rid="B20">Garnett (2023)</xref>; <xref ref-type="bibr" rid="B41">Rainforth et al. (2023)</xref>. The utilization of BO algorithms for sequential experimental design in scientific and engineering experimentation is termed Bayesian experimental design (BED). To this end, BED has been applied in material science <xref ref-type="bibr" rid="B10">Dieb and Tsuda (2018)</xref>, manufacturing <xref ref-type="bibr" rid="B32">Maurya (2016)</xref>, additive manufacturing <xref ref-type="bibr" rid="B9">Deneault et al. (2021)</xref>; <xref ref-type="bibr" rid="B22">Guidetti et al. (2022)</xref>, laser processing <xref ref-type="bibr" rid="B13">Duris et al. (2020)</xref>, fluid dynamics <xref ref-type="bibr" rid="B11">Diessner et al. (2022)</xref>, biotechnology <xref ref-type="bibr" rid="B29">Leyendecker et al. (2025)</xref>; <xref ref-type="bibr" rid="B30">Liang and Lai (2021)</xref>, plasma coating <xref ref-type="bibr" rid="B22">Guidetti et al. (2022)</xref> and information technology <xref ref-type="bibr" rid="B23">Haghanifar et al. (2020)</xref>. The challenges in applying BO in manufacturing technology are, in particular, the high costs of experimentation and machine downtime, mixed variable types, collaboration with and acceptance by process experts, measurability of quality characteristics and measurement noise, and safe exploration. Additionally, a key challenge in successfully utilizing BO in production engineering is to find an optimal configuration of the BED algorithm comprising the configuration of the surrogate model and its mean and kernel functions, the acquisition function, and the initial design (number of data points to initialize the optimization). The BED configuration must be chosen depending on the characteristics of the optimization problem, i.e., the production process to be optimized, and precisely tuned to achieve optimal results.</p>
<sec id="s1-1">
<label>1.1</label>
<title>Literature review</title>
<p>Previous studies have explored the impact of BED configuration, typically focusing on two components, most often the surrogate model and the acquisition function, in combination. In the field of materials science, Liang et al. <xref ref-type="bibr" rid="B30">Liang and Lai (2021)</xref> conducted a benchmark study evaluating different Gaussian Process-based surrogates alongside three acquisition functions, highlighting the critical role of proper initialization and exploration strategies. Diessner et al. <xref ref-type="bibr" rid="B11">Diessner et al. (2022)</xref> applied BED in the context of computational fluid dynamics, performing a benchmark that examined the effects of acquisition functions and initial sampling sizes. Similarly, <xref ref-type="bibr" rid="B28">Le Riche and Picheny (2021)</xref> investigated various surrogate models and initial design sizes on a standardized test set <xref ref-type="bibr" rid="B15">Finck et al. (2010)</xref>. While these contributions offer valuable insights into individual components of BED configuration, none of them addresses the combined interdependencies of acquisition function, surrogate model, and initial design. This study aims to close that gap by systematically examining the interactions among these three key components in the context of production processes.</p>
</sec>
<sec id="s1-2">
<label>1.2</label>
<title>Approach and contribution of this paper</title>
<p>To investigate the optimization performance of different BO-configurations, we design and select a total of eight engineering-focused synthetic test functions (optimization problems) with different characteristics, complexities, and known optimal solutions. We perform a benchmark study by applying different BO-configurations to all optimization problems and compare the individual performances. As proposed by Bossek et al. <xref ref-type="bibr" rid="B6">Bossek et al. (2020a)</xref>, we utilize the Dominated Hypervolume (HV) performance metric to consider both the success rate and efficiency of optimization. It is important to note that this study focuses on production processes, which influences certain methodological choices. In particular, since the number of adjustable parameters in such processes typically does not exceed six <xref ref-type="bibr" rid="B25">Ilzarbe et al. (2008)</xref>; <xref ref-type="bibr" rid="B2">Arboretti et al. (2022)</xref>, high-dimensional optimization problems are not considered. The general goal of this investigation is to derive practical guidelines for configuring BED algorithms tailored to the optimization of production processes. The key contributions of this paper are:<list list-type="simple">
<list-item>
<p>1. We highlight the importance and challenges of experimental design in production engineering and propose BED as a promising data-driven methodology</p>
</list-item>
<list-item>
<p>2. We provide a methodology for benchmarking BO algorithms</p>
</list-item>
<list-item>
<p>3. By applying this methodology, we perform an extensive benchmark study across eight physically motivated test functions comprising a total of 15,360 experiments</p>
</list-item>
<list-item>
<p>4. The benchmarking results provide empirical references and we derive actionable guidelines for the configuration of BO and the application of BED in manufacturing process optimization</p>
</list-item>
<list-item>
<p>5. We outline the remaining challenges and derive further research needs to promote the adoption of BED in production engineering</p>
</list-item>
</list>
</p>
<p>The paper is structured as follows: After a brief introduction to process optimization in production and experimental design in <xref ref-type="sec" rid="s1">Section 1</xref>, Materials and Methods (<xref ref-type="sec" rid="s2">Section 2</xref>) outlines the fundamentals of BED and presents details of the benchmark study. In <xref ref-type="sec" rid="s3">Section 3</xref>, we present the results of our study, which we further interpret and discuss in <xref ref-type="sec" rid="s4">Section 4</xref>. The paper closes with a final conclusion and outlook in <xref ref-type="sec" rid="s5">Section 5</xref>. For supplementary material, please refer to the <xref ref-type="sec" rid="s12">Supplementary Appendix 1</xref>.</p>
</sec>
</sec>
<sec sec-type="methods" id="s2">
<label>2</label>
<title>Methodology</title>
<p>This section provides the theoretical concepts of BED by first describing the fundamentals of experimental design in the engineering domain (see <xref ref-type="sec" rid="s2-1">Section 2.1</xref>). The fundamentals of BO are given in <xref ref-type="sec" rid="s2-2">Section 2.2</xref> and BED is outlined in <xref ref-type="sec" rid="s2-3">Section 2.3</xref>. In <xref ref-type="sec" rid="s2-4">Section 2.4</xref>, we explain the scope, research aspects, and methodological approach of our benchmark study.</p>
<sec id="s2-1">
<label>2.1</label>
<title>Fundamentals of experimental design</title>
<p>In production engineering, experimental design is the process of identifying key influential parameters (called factors) and determining the interaction of these factors on the output of the process and modeling the corresponding response surface. Accordingly, engineers and process operators utilize experimental design methodologies to identify the most influential process parameters and subsequently determine the optimal factor values using statistical analysis. Inherent in every optimization is the exploration-exploitation dilemma <xref ref-type="bibr" rid="B5">Berger-Tal et al. (2014)</xref>. Accordingly, a decision must be made for each candidate selection as to whether to explore the design space or search in the vicinity of the already known best solutions. Therefore, in engineering practice, a distinction can be made between four levels of precision during execution. First, screening aims to rapidly localize the important factors in the initial design space. Second, in characterization, a narrowed search is conducted to identify the most influential factors. Third, optimization aims to determine the optimal factor levels. Finally, validation serves to ensure that the process is capable of consistently producing products that meet the predetermined quality specifications. Whereas good experimental designs ensure the validity of the optimal factor values found, excellent designs retain a high ratio between the extracted information and the invested resources <xref ref-type="bibr" rid="B26">Jankovic et al. (2021)</xref>. <xref ref-type="fig" rid="F1">Figure 1</xref> schematically visualizes the framework for process optimization under the assumption of unknown system behavior. The behavior of the system can be described as a function <inline-formula id="inf1">
<mml:math id="m1">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">o</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>;</mml:mo>
<mml:mi mathvariant="bold">z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> that transforms the input vector <inline-formula id="inf2">
<mml:math id="m2">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">X</mml:mi>
<mml:mo>&#x2286;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> into an <inline-formula id="inf3">
<mml:math id="m3">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-dimensional target vector <inline-formula id="inf4">
<mml:math id="m4">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">Y</mml:mi>
<mml:mo>&#x2286;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> under the potential influence of uncontrollable parameters and disturbance values <inline-formula id="inf5">
<mml:math id="m5">
<mml:mrow>
<mml:mi mathvariant="bold">z</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and a noise term <inline-formula id="inf6">
<mml:math id="m6">
<mml:mrow>
<mml:mi>&#x3b5;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. In addition to the actual target variable vector <inline-formula id="inf7">
<mml:math id="m7">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, the process model can provide additional data in the form of process monitoring observations <inline-formula id="inf8">
<mml:math id="m8">
<mml:mrow>
<mml:mi mathvariant="bold">o</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>o</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2208;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>. We assume that we do not have analytical knowledge about the process and that the system does not possess a closed-form representation, does not provide functional derivatives, and only allows for point-wise evaluation <xref ref-type="bibr" rid="B20">Garnett (2023)</xref>. Furthermore, the optimization problem of finding <inline-formula id="inf9">
<mml:math id="m9">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>arg</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>max</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> can include multiple controllable and uncontrollable influencing factors and &#x2013; depending on the number of target variables &#x2013; can be either single or multi-objective. Besides the complexity of both design and objective spaces, the complexity and difficulty of the optimization problem is inherently determined by the complexity of the underlying system behavior. Depending on whether experiment design and experimentation along with experiment validation are performed in an iterative manner, a distinction can be made between sequential and non-sequential experimental design approaches.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Schematic black-box production process model for experimental design.</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g001.tif">
<alt-text content-type="machine-generated">Diagram showing a production process represented as a blue box labeled &#x22;Production Process (objective function) y, o = f(x, z).&#x22; Inputs include controllable parameters (x1 to xn) and external influences (z1 to zr), labeled as independent variables. Outputs include quality features (y1 to ym) labeled as dependent variables and process monitoring observations (o1 to os). Arrows indicate input to output flow.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-2">
<label>2.2</label>
<title>Fundamentals of Bayesian Optimization (BO)</title>
<p>Optimization is an innate human behavior <xref ref-type="bibr" rid="B20">Garnett (2023)</xref> and optimization problems are pervasive in scientific and industrial fields that require optimization algorithms to be as efficient as possible <xref ref-type="bibr" rid="B49">Wang et al. (2022)</xref>. In contrast to well-known metaheuristics that require large numbers of experiments and function evaluations, BO &#x2013; with its model-based, adaptive, and active optimization policies &#x2013; promises to be much more data-efficient in finding a global optimum (minimum or maximum) of an unknown objective function <xref ref-type="bibr" rid="B30">Liang and Lai (2021)</xref>. In general, the model structure of a BO algorithm comprises two core components: 1) a surrogate model (see <xref ref-type="sec" rid="s2-2-1">Section 2.2.1</xref>) and 2) an acquisition function (see <xref ref-type="sec" rid="s2-2-2">Section 2.2.2</xref>). The surrogate model aims to faithfully approximate the input-output behavior of the system to be optimized. The acquisition function indirectly defines the optimization policy by assessing the value of future observations and therefore guiding the parameter selection process <xref ref-type="bibr" rid="B20">Garnett (2023)</xref>. For starting the optimization, BO requires an initial dataset <inline-formula id="inf10">
<mml:math id="m10">
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> that is the collection of observations of the input-output behavior of the system. In BO, the Bayes theorem is applied to incorporate a prior belief to maximize the informational content and therefore the value of each new experiment <xref ref-type="bibr" rid="B13">Duris et al. (2020)</xref>. BO utilizes the Bayes theorem to iteratively update its prior distribution (prior) after the dataset <inline-formula id="inf11">
<mml:math id="m11">
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> has been extended with new observations. The prior is updated to form the posterior distribution (posterior). The prior represents a belief about the behavior of the objective function <inline-formula id="inf12">
<mml:math id="m12">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. The posterior distribution is used to compute and optimize the acquisition function in order to sample parameter combinations with high informational content for conducting new experiments.</p>
<sec id="s2-2-1">
<label>2.2.1</label>
<title>Surrogate model</title>
<p>BO requires a probabilistic surrogate model that provides estimates and uncertainties of the objective function <inline-formula id="inf13">
<mml:math id="m13">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <xref ref-type="bibr" rid="B13">Duris et al. (2020)</xref>. In this work, for surrogate models, we solely consider non-parametric Gaussian processes <inline-formula id="inf14">
<mml:math id="m14">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>G</mml:mi>
<mml:mi>P</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, the most widely adopted surrogate model. We define the GP surrogate model as <inline-formula id="inf15">
<mml:math id="m15">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>&#x223c;</mml:mo>
<mml:mi>G</mml:mi>
<mml:mi>P</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>,</mml:mo>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. To perform both prediction and uncertainty quantification, a GP utilizes a mean function <inline-formula id="inf16">
<mml:math id="m16">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> to specify the expected value of <inline-formula id="inf17">
<mml:math id="m17">
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and a covariance function <inline-formula id="inf18">
<mml:math id="m18">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> <xref ref-type="bibr" rid="B13">Duris et al. (2020)</xref>; <xref ref-type="bibr" rid="B21">Greenhill et al. (2020)</xref>. The covariance function determines the covariance between the function values <inline-formula id="inf19">
<mml:math id="m19">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf20">
<mml:math id="m20">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> corresponding to a pair of input parameters <inline-formula id="inf21">
<mml:math id="m21">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf22">
<mml:math id="m22">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> <xref ref-type="bibr" rid="B20">Garnett (2023)</xref>. In comparison with the mean function, careful design of the covariance function is of higher criticality for the fidelity of the model and the experimentation&#x2019;s sample path behavior <xref ref-type="bibr" rid="B20">Garnett (2023)</xref>.</p>
</sec>
<sec id="s2-2-2">
<label>2.2.2</label>
<title>Acquisition function</title>
<p>For the optimization of an expensive-to-evaluate function with black-box behavior, BO defines an optimization policy by introducing a substitute optimization problem utilizing a so-called acquisition function. In contrast to regular objective functions, the acquisition function is differentiable, inexpensive to evaluate and is derived from <inline-formula id="inf23">
<mml:math id="m23">
<mml:mrow>
<mml:mi>m</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf24">
<mml:math id="m24">
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2032;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> <xref ref-type="bibr" rid="B21">Greenhill et al. (2020)</xref>. Therefore, well-established numerical optimization algorithms can be utilized to iteratively optimize the acquisition function in order to propose parameter sets to be evaluated next. The acquisition function <inline-formula id="inf25">
<mml:math id="m25">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="script">X</mml:mi>
<mml:mo>&#x2192;</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> assigns a score to each parameter combination within the design space reflecting the value of each experiment for solving the optimization problem <xref ref-type="bibr" rid="B20">Garnett (2023)</xref>. The acquisition function performs the trade-off between exploration and exploitation and therefore strongly influences the sample path behavior and optimization efficiency. Besides knowledge gradient, entropy search, and predictive entropy search, the most widely adopted single-objective acquisition function is expected improvement <xref ref-type="bibr" rid="B17">Frazier (2018)</xref>.</p>
</sec>
</sec>
<sec id="s2-3">
<label>2.3</label>
<title>Bayesian experimental design (BED)</title>
<p>
<xref ref-type="fig" rid="F2">Figure 2</xref> describes the iterative operating principle of BED: Starting with the design space <inline-formula id="inf26">
<mml:math id="m26">
<mml:mrow>
<mml:mi mathvariant="script">X</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> consisting of factors and associated factor limits, which are typically predefined by process experts, as well as initial data <inline-formula id="inf27">
<mml:math id="m27">
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>N</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula>, BED iteratively proposes new experiments (Step 1). In each iteration, experiments are conducted using the proposed factor value vector <inline-formula id="inf28">
<mml:math id="m28">
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (Step 2). Formally, this step can be viewed as a query of the true objective function <inline-formula id="inf29">
<mml:math id="m29">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">z</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> to measure the response vector. As a result of the evaluation of the experiment, the objective vector <inline-formula id="inf30">
<mml:math id="m30">
<mml:mrow>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is obtained. An iteration can comprise a single experiment or an arbitrary number of experiments. In these cases, one refers to single-point or batch experimentation. The results of the experiment <inline-formula id="inf31">
<mml:math id="m31">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi mathvariant="bold">x</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi mathvariant="bold">y</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> are added to the dataset <inline-formula id="inf32">
<mml:math id="m32">
<mml:mrow>
<mml:mi mathvariant="script">D</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (Step 3). In this way, BED receives feedback on the result of the experiment to update the Gaussian surrogate model (Step 4). Based on the quality of the solution, the fulfillment of the termination condition is checked (Step 5). The acceptance criterion can be arbitrarily defined and, for example, take into account quality feature requirements, a maximum number of experiments, or time, cost, or resource limitations. If the acceptance criterion is satisfied, the optimization is terminated. Otherwise, optimization continues as long as the termination condition is not met. By sampling a new set of factor values through optimization of the acquisition function, the next iteration is entered.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Bayesian experimental design procedure and Bayesian optimization components (based on Firas <xref ref-type="bibr" rid="B1">Al-Hafez (2021)</xref>).</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g002.tif">
<alt-text content-type="machine-generated">Flowchart illustrating the process of Bayesian Optimization Fine Tuning. It includes elements such as design and objective spaces, an initial dataset, and a series of updates and evaluations involving Gaussian processes and parameter vectors. Observations are marked on a graph showing the GP Posterior Mean, Posterior Uncertainty, and Acquisition Function. The cycle involves updating datasets, conducting experiments, and evaluating results. Key terms like &#x22;Maximum of Acquisition Function&#x22; and &#x22;Fulfillment Check of the Acceptance Criterion&#x22; are highlighted in blue boxes.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-4">
<label>2.4</label>
<title>Composition of the benchmark study</title>
<p>In this section we describe the scope and characteristics of our benchmark study to investigate the suitability of different BED algorithms on different types of optimization problems. The scope of this study is limited to single-objective functions, single-point evaluations, numerical, continuous and unconstrained problems and controllable parameters. The study design comprises three components: First, the BED algorithm configuration options (configuration space) (see <xref ref-type="sec" rid="s2-4-1">Section 2.4.1</xref>), second, the optimization problems (performance space) (see <xref ref-type="sec" rid="s2-4-2">Section 2.4.2</xref>), and third, the performance metrics (performance space) (see <xref ref-type="sec" rid="s2-4-3">Section 2.4.3</xref>).</p>
<sec id="s2-4-1">
<label>2.4.1</label>
<title>Definition of the configuration space</title>
<p>The components of BED algorithms investigated in this study comprise the kernel (also known as covariance function), the acquisition function, and the initial sampling design. Building upon the findings from the studies of <xref ref-type="bibr" rid="B38">Palar and Shimoyama (2019)</xref>; <xref ref-type="bibr" rid="B39">Picheny et al. (2013)</xref>; <xref ref-type="bibr" rid="B28">Le Riche and Picheny (2021)</xref>; <xref ref-type="bibr" rid="B30">Liang and Lai (2021)</xref>, this work considers four kernels: RBF, Matern05, Matern15, Matern25, as they belong to the standard portfolio of BO and can be applied to different processes. Each of these isotropic kernels <inline-formula id="inf33">
<mml:math id="m33">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is also investigated with their anisotropic counterpart utilizing automatic relevance detection (ARD) <xref ref-type="bibr" rid="B14">Duvenaud (2014)</xref>. ARD implicitly determines the relevance of the input parameters and aims to enhance the modeling accuracy and optimization efficiency.</p>
<p>Regarding the acquisition function <inline-formula id="inf34">
<mml:math id="m34">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>, expected improvement (EI), probability of improvement (PI), and upper confidence bound (UCB) (with exploration scale of <inline-formula id="inf35">
<mml:math id="m35">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.2</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>) are examined, along with a variation of EI known as Noisy Expected Improvement or NEI, which is specifically designed to handle noisy problems more effectively. For a detailed study of the presented acquisition functions, please refer to <xref ref-type="bibr" rid="B20">Garnett (2023)</xref>.</p>
<p>To define the initial design, three choices must be made: the initial sampling size, the initial sampling strategy, and the number of independent runs. The initial sampling size <inline-formula id="inf36">
<mml:math id="m36">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is a key component for determining the exploration phase of BED before fitting the GP model. Following the methodology outlined in <xref ref-type="bibr" rid="B28">Le Riche and Picheny (2021)</xref>, this study considers three initial sampling sizes: Small (S, five trials), Medium (M, 10 trials), and Large (L, 30 trials), while maintaining a fixed total budget of 35 trials. The decision to set the initial sampling size independently of the dimensionality of the problem aligns with the study&#x2019;s focus on experimental design for production processes, which typically involve no more than six parameters and a maximum of 30 trials <xref ref-type="bibr" rid="B25">Ilzarbe et al. (2008)</xref>. This restriction confines the study to low-dimensional spaces, whereas for high-dimensional problems, adjustments to the initial sampling size would be necessary.</p>
<p>Within various sampling strategies, pseudo-random Sobol sampling is chosen due to its ability to effectively cover the parameter space under the specific conditions encountered. Due to the stochastic nature of the Sobol algorithm, it is decisive to perform multiple independent runs for each BO configuration on each test function. Following the recommendations of <xref ref-type="bibr" rid="B33">Mersmann et al. (2010)</xref>, a total of ten independent runs are conducted, using ten different random seeds. Each random seed is applied to each BO configuration, ensuring that all configurations start with the same initial data and that no configuration benefits from random fluctuations. A detailed description of the Sobol algorithm can be found in <xref ref-type="sec" rid="s1-1">Section 1.1</xref> of the <xref ref-type="sec" rid="s12">Supplementary Material</xref>.</p>
</sec>
<sec id="s2-4-2">
<label>2.4.2</label>
<title>Definition of the problem space</title>
<p>To examine the performance of different BO algorithm configurations on different optimization problems, we create artificial datasets utilizing a total of eight analytic test functions (see <xref ref-type="fig" rid="F4">Figure 4</xref>). Four of these eight test functions, namely, <italic>F1, F2, F3, F4</italic>, are mathematical problems. The remaining four (<italic>AdaptedBranin, Borehole, OTLCircuit, WingWeight</italic>) originate from <xref ref-type="bibr" rid="B16">Forrester et al. (2008)</xref>; <xref ref-type="bibr" rid="B47">Surjanovic and Bingham (2021)</xref> and comprise physically motivated optimization problems. Please refer to the <xref ref-type="sec" rid="s12">Supplementary Material Section 1.2</xref> for a detailed description of the optimization problems.</p>
<p>Consistent with previous benchmark studies <xref ref-type="bibr" rid="B40">Qin et al. (2021)</xref>; <xref ref-type="bibr" rid="B39">Picheny et al. (2013)</xref>; <xref ref-type="bibr" rid="B38">Palar and Shimoyama (2019)</xref>; <xref ref-type="bibr" rid="B19">Gan et al. (2021)</xref>, this research investigates both noiseless and noisy versions of objective functions. To introduce random noise to the output of the noisy functions, a noise level of 0.1 is employed, following the approach outlined in <xref ref-type="bibr" rid="B40">Qin et al. (2021)</xref> and <xref ref-type="bibr" rid="B19">Gan et al. (2021)</xref>. In each trial, a pseudorandom number ranging between 0 and 0.1 with a uniform distribution is generated. This number is then multiplied with the standard deviation (SD) of the objective of the respective function and added to the output value. The SD of the function&#x2019;s objective is calculated on the basis of a random uniform sampling of size 10,000. This methodology allows for a controlled examination and comparison of the effects of noise. It is essential to emphasize that the noise in this work is homoscedastic, meaning it does not depend on the sequential course of the experiment. Heteroscedastic noise is not considered in this study.</p>
</sec>
<sec id="s2-4-3">
<label>2.4.3</label>
<title>Definition of the performance space</title>
<p>In this study, we focus on three key metrics to evaluate the performance of individual BED configurations on the optimization problems: solution quality, robustness, and efficiency. We employ the multi-objective dominated hypervolume (HV) metric, as proposed by <xref ref-type="bibr" rid="B7">Bossek et al. (2020b)</xref>. According to <xref ref-type="disp-formula" rid="e1">Equation 1</xref>, the HV metric integrates robustness (measured by the probability of failure <inline-formula id="inf37">
<mml:math id="m37">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) and efficiency (measured by the running time of successful experiments <inline-formula id="inf38">
<mml:math id="m38">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>). Lower values of <inline-formula id="inf39">
<mml:math id="m39">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf40">
<mml:math id="m40">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> yield higher HV values, indicating superior overall performance. It should be noted that the HV metric was chosen because of its efficiency-robustness trade-off, since the number of required experiments constitutes the key cost driver in the optimization of production engineering systems. The HV metric is illustrated in <xref ref-type="fig" rid="F3">Figure 3</xref> (left) and calculated according to <xref ref-type="statement" rid="Algorithm_1">Algorithm 1</xref>.<disp-formula id="e1">
<mml:math id="m41">
<mml:mrow>
<mml:mtext>HV</mml:mtext>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(1)</label>
</disp-formula>
</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Dominated Hypervolume (HV) adapted from <xref ref-type="bibr" rid="B7">Bossek et al. (2020b)</xref> (left) and HV span plot with classification areas (green: qualified, yellow: undetermined, red: non-qualified) (right) (<inline-formula id="inf41">
<mml:math id="m42">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: probability of failure, <inline-formula id="inf42">
<mml:math id="m43">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: running time of successful runs, <inline-formula id="inf43">
<mml:math id="m44">
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>: optimization budget, <inline-formula id="inf44">
<mml:math id="m45">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: non-qualified threshold, <inline-formula id="inf45">
<mml:math id="m46">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: qualified threshold, <inline-formula id="inf46">
<mml:math id="m47">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>: width of undetermined classification area).</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g003.tif">
<alt-text content-type="machine-generated">Two scatter plots illustrate different data levels. The left plot features points of Levels A, B, and C marked as green circles, orange triangles, and blue stars, respectively. Contour lines indicate HV values with a shaded region labeled \( (1-p)(T-r_s) \). The right plot shows black points on a background divided into red, yellow, and green areas, with contour lines for HV values \( t_{nq} \) and \( t_q \). Both plots have axes labeled \( r_s \) and \( p_f \) on the horizontal and vertical axes, respectively.</alt-text>
</graphic>
</fig>
<p>
<statement content-type="algorithm" id="Algorithm_1">
<label>Algorithm 1</label>
<title>Calculate Dominated Hypervolume (HV)</title>
<p>
<list list-type="simple">
<list-item>
<p>1: Let <inline-formula id="inf47">
<mml:math id="m48">
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> be a test function</p>
</list-item>
<list-item>
<p>2: Let <inline-formula id="inf48">
<mml:math id="m49">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>k</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be a BO configuration</p>
</list-item>
<list-item>
<p>3: Let <inline-formula id="inf49">
<mml:math id="m50">
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>w</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be a single trial</p>
</list-item>
<list-item>
<p>4: Let <inline-formula id="inf50">
<mml:math id="m51">
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> be the number of Sobol trials, <inline-formula id="inf51">
<mml:math id="m52">
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>5: Let <inline-formula id="inf52">
<mml:math id="m53">
<mml:mrow>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> be the number of Bayesian trials, <inline-formula id="inf53">
<mml:math id="m54">
<mml:mrow>
<mml:mi>V</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>6: Let <inline-formula id="inf54">
<mml:math id="m55">
<mml:mrow>
<mml:mi>w</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>u</mml:mi>
<mml:mo>&#x2b;</mml:mo>
<mml:mi>v</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> be the total budget, <inline-formula id="inf55">
<mml:math id="m56">
<mml:mrow>
<mml:mi>W</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>U</mml:mi>
<mml:mo>&#x222a;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>7: Let <inline-formula id="inf56">
<mml:math id="m57">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">{</mml:mo>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mo>&#x2026;</mml:mo>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">}</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> be a single run of total runs <inline-formula id="inf57">
<mml:math id="m58">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>8: <bold>for</bold> each combination <inline-formula id="inf58">
<mml:math id="m59">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>9: &#x2003;&#x2003;<bold>for</bold> each trial <inline-formula id="inf59">
<mml:math id="m60">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>10: &#x2003;&#x2003;&#x2003;Calculate relative deviation <inline-formula id="inf60">
<mml:math id="m61">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> according to (2)</p>
</list-item>
<list-item>
<p>11: &#x2003;&#x2003;<bold>end for</bold>
</p>
</list-item>
<list-item>
<p>12: &#x2003;&#x2003;<bold>for</bold> each run <inline-formula id="inf61">
<mml:math id="m62">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>13: &#x2003;&#x2003;&#x2003;<bold>if</bold> the algorithm finds an optimal solution within <inline-formula id="inf62">
<mml:math id="m63">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> according to (3) <bold>then</bold>
</p>
</list-item>
<list-item>
<p>14: &#x2003;&#x2003;&#x2003;&#x2003;Set <inline-formula id="inf63">
<mml:math id="m64">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">u</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>15: &#x2003;&#x2003;&#x2003;<bold>else</bold>
</p>
</list-item>
<list-item>
<p>16: &#x2003;&#x2003;&#x2003;&#x2003;Set <inline-formula id="inf64">
<mml:math id="m65">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">u</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>17: &#x2003;&#x2003;&#x2003;<bold>end if</bold>
</p>
</list-item>
<list-item>
<p>18: &#x2003;&#x2003;<bold>end for</bold>
</p>
</list-item>
<list-item>
<p>19: &#x2003;&#x2003;Define successful runs set <inline-formula id="inf65">
<mml:math id="m66">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> according to (4)</p>
</list-item>
<list-item>
<p>20: &#x2003;&#x2003;Calculate probability of failure <inline-formula id="inf66">
<mml:math id="m67">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> according to (5)</p>
</list-item>
<list-item>
<p>21: &#x2003;&#x2003;Calculate running time <inline-formula id="inf67">
<mml:math id="m68">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> according to (6)</p>
</list-item>
<list-item>
<p>22: &#x2003;&#x2003;Calculate dominated Hypervolume <inline-formula id="inf68">
<mml:math id="m69">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mtext>HV</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> according to (7)</p>
</list-item>
<list-item>
<p>23: <bold>end for</bold>
</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>
<list list-type="bullet">
<list-item>
<p>The relative deviation is calculated as the normalized difference between the known optimal solution <inline-formula id="inf69">
<mml:math id="m70">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> and the observed solution <inline-formula id="inf70">
<mml:math id="m71">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> at each trial <inline-formula id="inf71">
<mml:math id="m72">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> for each run <inline-formula id="inf72">
<mml:math id="m73">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (<xref ref-type="disp-formula" rid="e2">Equation 2</xref>). This normalization is performed to ensure comparability between different test functions.</p>
</list-item>
</list>
<disp-formula id="e2">
<mml:math id="m74">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="|" close="|">
<mml:mrow>
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x2212;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(2)</label>
</disp-formula>
<list list-type="bullet">
<list-item>
<p>A run <inline-formula id="inf73">
<mml:math id="m75">
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is considered successful if the algorithm has found an optimal solution within the tolerance range of <inline-formula id="inf74">
<mml:math id="m76">
<mml:mrow>
<mml:mo>&#xb1;</mml:mo>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> relative to the known optimal solution for each test function (<xref ref-type="disp-formula" rid="e3">Equation 3</xref>). In this work a value of <inline-formula id="inf75">
<mml:math id="m77">
<mml:mrow>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.05</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> is utilized for all test functions, corresponding to an optimization of <inline-formula id="inf76">
<mml:math id="m78">
<mml:mrow>
<mml:mn>95</mml:mn>
<mml:mi>%</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
</list-item>
</list>
<disp-formula id="e3">
<mml:math id="m79">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">u</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="">
<mml:mrow>
<mml:mtable class="cases">
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>1</mml:mn>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">f</mml:mi>
<mml:mspace width="1em"/>
<mml:mo>&#x2203;</mml:mo>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3c;</mml:mo>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>&#x2a;</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd columnalign="left">
<mml:mn>0</mml:mn>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd columnalign="left">
<mml:mi mathvariant="normal">o</mml:mi>
<mml:mi mathvariant="normal">t</mml:mi>
<mml:mi mathvariant="normal">h</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">r</mml:mi>
<mml:mi mathvariant="normal">w</mml:mi>
<mml:mi mathvariant="normal">i</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(3)</label>
</disp-formula>
<list list-type="bullet">
<list-item>
<p>Set of successful runs (<xref ref-type="disp-formula" rid="e4">Equation 4</xref>):</p>
</list-item>
</list>
<disp-formula id="e4">
<mml:math id="m80">
<mml:mrow>
<mml:mi>S</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mfenced open="{" close="}">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>,</mml:mo>
<mml:mi>&#x2026;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfenced>
<mml:mtext>&#x2003;</mml:mtext>
<mml:mtext>&#x2009;with</mml:mtext>
<mml:mtext>&#x2003;</mml:mtext>
<mml:msub>
<mml:mrow>
<mml:mtext>success</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(4)</label>
</disp-formula>
<list list-type="bullet">
<list-item>
<p>The probability of failure <inline-formula id="inf77">
<mml:math id="m81">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> for one BO configuration <inline-formula id="inf78">
<mml:math id="m82">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and a test function <inline-formula id="inf79">
<mml:math id="m83">
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> over all <inline-formula id="inf80">
<mml:math id="m84">
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> runs can be defined as <xref ref-type="disp-formula" rid="e5">Equation 5</xref>:</p>
</list-item>
</list>
<disp-formula id="e5">
<mml:math id="m85">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munderover>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>m</mml:mi>
</mml:mrow>
</mml:munderover>
</mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">u</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">c</mml:mi>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
<mml:mi mathvariant="normal">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(5)</label>
</disp-formula>
<list list-type="bullet">
<list-item>
<p>The running time <inline-formula id="inf81">
<mml:math id="m86">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:math>
</inline-formula> for one BO configuration <inline-formula id="inf82">
<mml:math id="m87">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and a test function <inline-formula id="inf83">
<mml:math id="m88">
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is the first successful trial <inline-formula id="inf84">
<mml:math id="m89">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> within the Bayesian trials where the set tolerance was achieved (<xref ref-type="disp-formula" rid="e6">Equation 6</xref>). It is aggregated through all successful runs <inline-formula id="inf85">
<mml:math id="m90">
<mml:mrow>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and normalized to the total budget <inline-formula id="inf86">
<mml:math id="m91">
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>. This last step sets the reference time <inline-formula id="inf87">
<mml:math id="m92">
<mml:mrow>
<mml:mi>T</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> defined in the <inline-formula id="inf88">
<mml:math id="m93">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> equation of <xref ref-type="bibr" rid="B7">Bossek et al. (2020b)</xref> to 1.</p>
</list-item>
</list>
<disp-formula id="e6">
<mml:math id="m94">
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi>w</mml:mi>
</mml:mrow>
</mml:mfrac>
<mml:mo>&#x22c5;</mml:mo>
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mo stretchy="false">&#x7c;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo stretchy="false">&#x7c;</mml:mo>
</mml:mrow>
</mml:mfrac>
<mml:mstyle displaystyle="true">
<mml:munder>
<mml:mrow>
<mml:mo>&#x2211;</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>S</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mstyle>
<mml:munder>
<mml:mrow>
<mml:mi>arg min</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
<mml:mo>&#x2208;</mml:mo>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mrow>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mi mathvariant="normal">&#x394;</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi>y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
<mml:mrow>
<mml:mi>j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
<label>(6)</label>
</disp-formula>
<list list-type="bullet">
<list-item>
<p>Finally, the dominated Hypervolume <inline-formula id="inf89">
<mml:math id="m95">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mtext>HV</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> of a BO configuration <inline-formula id="inf90">
<mml:math id="m96">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> on a test function <inline-formula id="inf91">
<mml:math id="m97">
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> can be calculated as follows (<xref ref-type="disp-formula" rid="e7">Equation 7</xref>).</p>
</list-item>
</list>
<disp-formula id="e7">
<mml:math id="m98">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mtext>HV</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>&#x3d;</mml:mo>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
<mml:mfenced open="(" close=")">
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo>&#x2212;</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfenced>
</mml:mrow>
</mml:math>
<label>(7)</label>
</disp-formula>
</p>
<p>Higher values of HV indicate better performance of the BO configuration, while lower values suggest inefficiency, lack of robustness, or a combination of both. The minimal HV-value is 0, with <inline-formula id="inf92">
<mml:math id="m99">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1.0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf93">
<mml:math id="m100">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>1.0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, while the maximum HV-value is given at <inline-formula id="inf94">
<mml:math id="m101">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and a minimal running time of <inline-formula id="inf95">
<mml:math id="m102">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.14</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>, resulting in a maximum of HV<inline-formula id="inf96">
<mml:math id="m103">
<mml:mrow>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.86</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. The minimal running time of <inline-formula id="inf97">
<mml:math id="m104">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.14</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> is the result of dividing the minimum number of initial samples by the fixed budget <inline-formula id="inf98">
<mml:math id="m105">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mn>5</mml:mn>
<mml:mo>/</mml:mo>
<mml:mn>35</mml:mn>
<mml:mo>&#x2248;</mml:mo>
<mml:mn>0.142</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>.</p>
<p>In order to compare different BO configurations and assess their suitability on the various test functions, we utilize a classification approach based on the resulting HV and taking into account the interrelationships between kernels, acquisition functions and initial dataset size. Each configuration (i.e., each kernel, acquisition function, and initial dataset size) is classified as qualified if it results in a good performance for optimizing the test function, non-qualified if it leads to poor performance, or undetermined if there is no clear outcome regarding its performance. This classification approach is depicted in <xref ref-type="statement" rid="Algorithm_2">Algorithm 2</xref>.</p>
<p>Since each individual configuration appears in multiple combinations with other configuration parameters, simple aggregation techniques, such as computing the median HV across all runs, may distort the actual performance. For example, if the RBF kernel performs well when paired with EI, NEI, and UCB, but poorly with PI, its overall median HV may be skewed downward, thus underrepresenting its true capability. While a variance analysis could reveal the degree to which a configuration influences outcomes, it does not provide insight into the quality of the performance itself, which is essential for this study. Therefore, a more nuanced classification method is applied that considers both performance level and variability across combinations.</p>
<p>
<statement content-type="algorithm" id="Algorithm_2">
<label>Algorithm 2</label>
<title>Classify configurations</title>
<p>
<list list-type="simple">
<list-item>
<p>1: <bold>for</bold> each test function <inline-formula id="inf99">
<mml:math id="m106">
<mml:mrow>
<mml:mi>i</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in the problem space <inline-formula id="inf100">
<mml:math id="m107">
<mml:mrow>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>&#xa0;<bold>do</bold>
</p>
</list-item>
<list-item>
<p>2: &#x2003;Initialize lists: <inline-formula id="inf101">
<mml:math id="m108">
<mml:mrow>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>//Qualified configurations</p>
</list-item>
<list-item>
<p>3: &#x2003;Initialize lists: <inline-formula id="inf102">
<mml:math id="m109">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>Q</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>//Non-qualified configurations</p>
</list-item>
<list-item>
<p>4: &#x2003;Initialize lists: <inline-formula id="inf103">
<mml:math id="m110">
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mi>D</mml:mi>
<mml:mo>&#x2190;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>//Undetermined configurations</p>
</list-item>
<list-item>
<p>5: &#x2003;Let <inline-formula id="inf104">
<mml:math id="m111">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> be a component of a BO configuration (<inline-formula id="inf105">
<mml:math id="m112">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> being a kernel, acquisition function or initial sampling size)</p>
</list-item>
<list-item>
<p>6: &#x2003;<bold>for</bold> all trials where the configuration contains <inline-formula id="inf106">
<mml:math id="m113">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in <inline-formula id="inf107">
<mml:math id="m114">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>7: &#x2003;&#x2003;Compute median of <inline-formula id="inf108">
<mml:math id="m115">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mtext>HV</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> for the trials</p>
</list-item>
<list-item>
<p>8: &#x2003;&#x2003;<bold>if</bold> median of <inline-formula id="inf109">
<mml:math id="m116">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mtext>HV</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is in non-qualified area <bold>then</bold>
</p>
</list-item>
<list-item>
<p>9: &#x2003;&#x2003;&#x2003;Add <inline-formula id="inf110">
<mml:math id="m117">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf111">
<mml:math id="m118">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>10: &#x2003;&#x2003;<bold>end if</bold>
</p>
</list-item>
<list-item>
<p>11: &#x2003;<bold>end for</bold>
</p>
</list-item>
<list-item>
<p>12: &#x2003;<bold>for</bold> all configurations where <inline-formula id="inf112">
<mml:math id="m119">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> in <inline-formula id="inf113">
<mml:math id="m120">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> and not in <inline-formula id="inf114">
<mml:math id="m121">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> <bold>do</bold>
</p>
</list-item>
<list-item>
<p>13: &#x2003;&#x2003;Compute median of <inline-formula id="inf115">
<mml:math id="m122">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mtext>HV</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>14: &#x2003;&#x2003;<bold>if</bold> median of <inline-formula id="inf116">
<mml:math id="m123">
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mtext>HV</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>I</mml:mi>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:math>
</inline-formula> is in qualified area <bold>then</bold>
</p>
</list-item>
<list-item>
<p>15: &#x2003;&#x2003;&#x2003;Add <inline-formula id="inf117">
<mml:math id="m124">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf118">
<mml:math id="m125">
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>16: &#x2003;&#x2003;<bold>else</bold>
</p>
</list-item>
<list-item>
<p>17: &#x2003;&#x2003;&#x2003;Add <inline-formula id="inf119">
<mml:math id="m126">
<mml:mrow>
<mml:mi>c</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to <inline-formula id="inf120">
<mml:math id="m127">
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</p>
</list-item>
<list-item>
<p>18: &#x2003;&#x2003;<bold>end if</bold>
</p>
</list-item>
<list-item>
<p>19: &#x2003;<bold>end for</bold>
</p>
</list-item>
<list-item>
<p>20: <bold>end for</bold>
</p>
</list-item>
</list>
</p>
</statement>
</p>
<p>To classify the BO configurations effectively, the HV values ranging from 0 to 0.86 are divided into three distinct areas: non-qualified <inline-formula id="inf121">
<mml:math id="m128">
<mml:mrow>
<mml:mi>N</mml:mi>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (red), qualified <inline-formula id="inf122">
<mml:math id="m129">
<mml:mrow>
<mml:mi>Q</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (green), and undetermined <inline-formula id="inf123">
<mml:math id="m130">
<mml:mrow>
<mml:mi>U</mml:mi>
<mml:mi>D</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> (yellow) (<xref ref-type="fig" rid="F3">Figure 3</xref> (right)). Each black point represents an experiment with a specific BO configuration <inline-formula id="inf124">
<mml:math id="m131">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula>. The limits of the areas, represented by the thresholds <inline-formula id="inf125">
<mml:math id="m132">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf126">
<mml:math id="m133">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, are determined based on the distribution of HV values obtained from the experiments for each test function. The width of the undetermined area, denoted by <inline-formula id="inf127">
<mml:math id="m134">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>u</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, also varies according to the characteristics of the test function. To determine the non-qualified threshold <inline-formula id="inf128">
<mml:math id="m135">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the qualified threshold <inline-formula id="inf129">
<mml:math id="m136">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, a comparative and adaptive approach is followed based on the obtained benchmark results in noiseless and noisy cases, separately. For <inline-formula id="inf130">
<mml:math id="m137">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the configuration variable with the lowest median HV value is identified, and the threshold is placed just above the 50th percentile of the median of the single configurations. This approach ensures that configurations with poor performance are eliminated from consideration. For <inline-formula id="inf131">
<mml:math id="m138">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, the variables with the best distribution of HV are taken into account, and the threshold is set below the 25th percentile of the best configurations. By adopting this approach, configurations that show superior performance are identified.</p>
<p>It is important to note that the thresholds can be adjusted according to specific requirements of the user, such as demanding higher efficiency or robustness. However, in this study, the comparative approach is chosen to provide practical and general recommendations for the selected BO configurations. At the conclusion of the evaluation, each individual configuration is classified as either qualified, non-qualified, or undetermined for each test function. This analysis allows for a more informed and nuanced assessment of the performance of each configuration in optimizing the test functions. By classifying the configurations in this manner, practical recommendations can be made regarding the suitability of different BO configurations for specific test functions in terms of robustness and efficiency.</p>
</sec>
<sec id="s2-4-4">
<label>2.4.4</label>
<title>Summary of the benchmark study</title>
<p>As a summary, <xref ref-type="fig" rid="F4">Figure 4</xref> provides a final overview of the key characteristics of our study. It encompasses the problem space characteristics, BO algorithm configurations and performance metrics. The evaluation process involves 16 optimization problems, consisting of eight functions with two noise levels. In the BO configuration space, a total of 96 configurations are tested, which is obtained by combining eight kernels, four acquisition functions, and three initial sampling sizes in a full-factorial manner. This results in a thorough evaluation of 1,536 configurations. In order to achieve statistical significance, ten experiments with different random seeds are carried out and evaluated for each configuration. In total, the study comprises 15,360 experiments. The performance metrics include both the metrics for each individual experiment and the classification metrics used to compare the BO configurations with each other. This comprehensive approach allows for a thorough investigation of BO algorithms and provides valuable insights for making informed decisions when selecting suitable configurations for different optimization tasks.</p>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Overview of the benchmark study.</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g004.tif">
<alt-text content-type="machine-generated">Problem space characteristics include eight test functions and two noise levels, leading to sixteen configurations. BO algorithm configurations consist of eight kernels, four acquisition functions, and three initial sampling sizes, resulting in ninety-six configurations. A total of fifteen thousand three hundred sixty experiments are conducted with ten runs per configuration. Performance space evaluation metrics include dominated hypervolume, robustness, and efficiency with assessments of BO configurations as qualified, undetermined, or non-qualified.</alt-text>
</graphic>
</fig>
<p>The study is conducted in the following way: First, the BO configuration <inline-formula id="inf132">
<mml:math id="m139">
<mml:mrow>
<mml:mi>&#x3b8;</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:mi>K</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>&#x3b1;</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>u</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:mrow>
</mml:math>
</inline-formula> is defined, and a random seed is set to ensure stochastic robustness. Using the Sobol sampling strategy, the test function is evaluated with an initial sampling size of <inline-formula id="inf133">
<mml:math id="m140">
<mml:mrow>
<mml:mi>u</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mi>S</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>M</mml:mi>
<mml:mo>,</mml:mo>
<mml:mi>L</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> to collect the initial dataset. Subsequently, the GP model with the kernel <inline-formula id="inf134">
<mml:math id="m141">
<mml:mrow>
<mml:mi>k</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is fitted. The acquisition function <inline-formula id="inf135">
<mml:math id="m142">
<mml:mrow>
<mml:mi>&#x3b1;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> is called to select candidates for the next evaluation of the test function, yielding the corresponding objective value. We define a fixed budget of trials for each experiment run across all problems, enabling a comparison of the problems and their complexity. The choice of the fixed budget is based on the study of <xref ref-type="bibr" rid="B25">Ilzarbe et al. (2008)</xref> who investigated the use of DOE in different engineering applications. Of the 77 reviewed articles, 77% ran a number of trials less or equal to 30 per experiment. Based on this review, a fix budget of 35 trials per experiment is set for all configurations and test functions. If the number of trials has not exceeded the total budget of 35 trials, an additional trial is performed. This process is repeated until a total of 10 runs with different random seeds have been executed.</p>
</sec>
</sec>
</sec>
<sec sec-type="results" id="s3">
<label>3</label>
<title>Results</title>
<p>In this section, the results of the benchmark study (<xref ref-type="sec" rid="s2-4">Section 2.4</xref>) are outlined according to the following structure: In <xref ref-type="sec" rid="s3-1">Section 3.1</xref>, preliminary results provide an overview of the analysis and narrow subsequent examination. Subsequently, in <xref ref-type="sec" rid="s3-2">Section 3.2</xref>, emphasis is placed on evaluating the responsiveness of the test functions. This analysis offers insight into the overall optimization level achievable for each test function, irrespective of specific BO configurations. The overarching goal is to uncover the importance of selecting appropriate BO configurations for specific test functions, while underscoring variations in their optimization capabilities. In <xref ref-type="sec" rid="s3-3">Section 3.3</xref>, a detailed examination of individual BO configurations is conducted with a focus on specific test functions. This analysis leads to the qualification of individual kernels, acquisition functions, and initial sampling sizes.</p>
<sec id="s3-1">
<label>3.1</label>
<title>Preliminaries</title>
<p>In this Section, an initial overview of the performance of the BO configurations is provided. The aim of these preliminary observations is to gain a first impression of the results and identify any emerging trends in the data, regardless of the specific test function being examined. This analysis helps to narrow down the focus of the study and identify areas of interest for further investigation.</p>
<p>
<xref ref-type="fig" rid="F5">Figure 5</xref> displays the resulting HV of all experiments conducted on each test function, considering both noisy and noiseless scenarios. The HV values are plotted based on two key components: the probability of failure <inline-formula id="inf136">
<mml:math id="m143">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the running time of successful experiments <inline-formula id="inf137">
<mml:math id="m144">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, as depicted in <xref ref-type="fig" rid="F3">Figure 3</xref>. Each test function has the same number of points (8 kernels x 4 acquisition functions x 3 sizes x 2 noise levels &#x3d; 192), and in some functions, they overlap. It is important to note that the points in all three subfigures represent the same data, but are differentiated by color labels based on the specific configuration. The upper subfigure provides insights into the performance of different kernels, the middle subfigure examines the impact of various acquisition functions, and the lower subfigure explores the influence of the initial sampling size. This evaluation yields three main findings:<list list-type="simple">
<list-item>
<p>&#x2022; The test functions F1, F2, and OTLCircuit consistently exhibit higher HV values (lower values of <inline-formula id="inf138">
<mml:math id="m145">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf139">
<mml:math id="m146">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>) in all combinations of BO configurations. This suggests that these functions are comparatively simpler compared to the other test functions.</p>
</list-item>
<list-item>
<p>&#x2022; Upon closer examination of the initial sampling sizes, it is evident that all experiments conducted with a large initial dataset exhibit higher running times and consequently lower values of HV. Across all test functions, no experiment with this configuration surpasses an HV value of 0.15, regardless of the choice of acquisition function or kernel. This observation can be mathematically explained by the fact that none of the experiment points have values of <inline-formula id="inf140">
<mml:math id="m147">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> below 0.85. This is due to the minimum running time achievable by the (L)-experiments occurring at trial 30, which when divided by the total budget of 35 trials (as explained in <xref ref-type="disp-formula" rid="e6">Equation 6</xref>), results in a value of 0.85.</p>
</list-item>
<list-item>
<p>&#x2022; No other obvious trend can be observed with respect to single kernels or acquisition functions across the different test functions. While there is a tendency of underperformance of Matern05 for F1 or F2, this trend is not consistently observed in all other test functions. The absence of obvious trends, apart from the ones mentioned earlier, highlights the need for further investigation and analysis to gain a deeper understanding of the performance of different configurations and their interrelationships in various test functions.</p>
</list-item>
</list>
</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>HV over all test functions for different kernel functions. <bold>(a)</bold> HV over all test functions for different kernel functions, acquisition functions. <bold>(b)</bold> HV over all test functions for different acquisition functions, and initial dataset sizes. <bold>(c)</bold> HV over all test functions for different initial dataset sizes.</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g005.tif">
<alt-text content-type="machine-generated">Nine scatter plots arranged in three groups labeled a, b, and c. Each group includes plots for functions F1 to F4 and applications AdaptedBranin, Borehole, OTLCircuit, and WingWeight. Group a uses colors for kernel types, group b uses colors for acquisition functions, and group c uses colors for different input data sizes. Each plot maps \( r_s \) on the x-axis against \( P_f \) on the y-axis, with circles representing different values.</alt-text>
</graphic>
</fig>
<p>Based on these findings, the subsequent evaluation will only include small (S) and medium (M) initial sampling sizes, excluding large ones (L). It has been demonstrated that larger initial sampling sizes result in lower efficiency without offering significant advantages in robustness.</p>
</sec>
<sec id="s3-2">
<label>3.2</label>
<title>Analysis of the responsiveness of the test functions</title>
<p>In this Section, the results of the responsiveness of the test functions under optimization with the selected BO configurations are shown. The evaluation considers the overall performance metric HV, the robustness measured by the probability of failure <inline-formula id="inf141">
<mml:math id="m148">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and the efficiency measured by the running time <inline-formula id="inf142">
<mml:math id="m149">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. Additionally, the analysis distinguishes between noiseless and noisy data, providing insights into the noise sensitivity of the optimization for each test function. <xref ref-type="sec" rid="s3-2-1">Section 3.2.1</xref> addresses the average optimization level that can be achieved for each test function, elucidating the similarities or differences in the optimization capability of the BO configurations. <xref ref-type="sec" rid="s3-2-2">Section 3.2.2</xref> zooms in on individual aspects of robustness and efficiency.</p>
<sec id="s3-2-1">
<label>3.2.1</label>
<title>Optimization level</title>
<p>The overall optimization level for each test function is depicted in <xref ref-type="fig" rid="F6">Figure 6</xref>. It presents boxplot distributions of the HV values for all experiments, categorized by the eight test functions and distinguishing between noiseless and noisy data. The position of the box, indicated by the median HV value, represents the level of optimization that can be achieved for each function. The length of the box, represented by the interquartile range (IQR), provides insights into the range of HV values covered by the BO configurations. Functions with lower IQRs indicate that a wide range of BO configurations can achieve results close to the median HV value, indicating simplicity of the function. On the other hand, functions with higher IQRs suggest that not all configurations are equally effective in optimizing them, indicating a greater variability in performance and the need for more specific configurations. A rough grouping can be made according to the comparison between HV median and IQR:<list list-type="alpha-upper">
<list-item>
<p>F1, F2, OTLCircuit show higher HV and smaller IQR values in both noiseless and noisy cases.</p>
</list-item>
<list-item>
<p>F4, WingWeight show medium HV and higher IQR values, similar in noiseless and noisy cases.</p>
</list-item>
<list-item>
<p>F3, AdaptedBranin, Borehole show lower HV and higher IQR values and differences between noiseless and noisy cases.</p>
</list-item>
</list>
</p>
<fig id="F6" position="float">
<label>FIGURE 6</label>
<caption>
<p>Distribution of HV values for test functions, given noiseless and noisy data. Higher values are better.</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g006.tif">
<alt-text content-type="machine-generated">Horizontal box plot comparing noiseless and noisy datasets across different scenarios named F1, F2, OTLCircuit, F4, WingWeight, F3, AdaptedBranin, and Borehole. Each pair consists of green (noiseless) and orange (noisy) boxes, indicating variability and central tendency. The x-axis represents &#x22;HV &#x2192; max!&#x22; ranging from 0.0 to 0.8, with several outliers marked as diamonds.</alt-text>
</graphic>
</fig>
<p>We observe the following results for each of these groups:<list list-type="alpha-upper">
<list-item>
<p>The test functions F1, F2, and OTLCircuit exhibit median HV values of approximately 0.7, with OTLCircuit having more outliers towards lower HV values, suggesting there are some configurations that are clearly less qualified than the others. The IQR of all three functions is less than 0.12, indicating a similar optimization potential with most of the BO configurations. Notably, for these three functions, the optimization results on noisy data appear to be similar to those on noiseless data, suggesting a low sensitivity to noise in these particular functions.</p>
</list-item>
<list-item>
<p>The test functions F4 and WingWeight show median HV values around 0.5. For F4, the IQR is 0.19 in noiseless cases and 0.11 in noisy cases. In the case of WingWeight, the IQR is 0.28 for both noiseless and noisy cases, with the noiseless case slightly skewed towards higher HV values. The long whiskers of both functions towards lower HV values indicate the lower performance of some BO configurations.</p>
</list-item>
<list-item>
<p>The functions F3, AdaptedBranin, and Borehole exhibit a more heterogeneous group in terms of optimization results. For F3, the median HV values are around 0.2, and the IQR is approximately 0.2 in both noiseless and noisy cases, making it the test function with the poorest overall performance. In contrast, AdaptedBranin and Borehole exhibit median HV values of 0.56 and 0.45, respectively, with larger IQRs of nearly 0.4 in noiseless cases. When considering noisy data, the median HV values decrease to 0.2 for both functions, and the IQRs reduce to approximately 0.14. Among all the functions, the difference in performance between noiseless and noisy cases is most pronounced for these two functions. These observations highlight the complexity and sensitivity of the optimization process for these particular test functions.</p>
</list-item>
</list>
</p>
<p>In summary, the test functions exhibit varying levels of optimization difficulty across three groups. Group A functions are relatively easier to optimize, Group B functions pose moderate challenges with no significant noise-related differences, while Group C functions, especially noisy cases, present the highest complexity and varied optimization success. These findings emphasize the influence of function complexity and noise on BO performance. This underlines the relevance of a precise algorithm configuration when dealing with complex and noisy optimization problems.</p>
</sec>
<sec id="s3-2-2">
<label>3.2.2</label>
<title>Robustness and efficiency</title>
<p>To gain insights into the robustness and efficiency in all BO configurations, a closer look at the two components of HV is taken: the probability of failure <inline-formula id="inf143">
<mml:math id="m150">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and the running time <inline-formula id="inf144">
<mml:math id="m151">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>. <xref ref-type="fig" rid="F7">Figure 7</xref> provides visual representations of the distribution of <inline-formula id="inf145">
<mml:math id="m152">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf146">
<mml:math id="m153">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for each test function over all BO configurations. Keeping the grouping defined above, following observations can be made:<list list-type="alpha-upper">
<list-item>
<p>F1 and F2 consistently achieve a 100% success rate <inline-formula id="inf147">
<mml:math id="m154">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.0</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> across almost all configurations, indicating their high robustness in optimization, independent of the BO configuration used. Similarly, the OTLCircuit function demonstrates a 90% success rate with 75% of the configurations, while the outliers should be considered as non-robust configurations. In terms of efficiency, these three functions can be effectively optimized with 75% of the configurations, as they are able to reach the predefined tolerance level within 0.3 of the total budget. These observations hold true for both noiseless and noisy cases, highlighting the resilience of these functions to noise interference.</p>
</list-item>
<list-item>
<p>F4 exhibits an overall efficiency of around <inline-formula id="inf148">
<mml:math id="m155">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.46</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> across all configurations. While there are some configurations that are not qualified in terms of robustness, most of them perform well in this regard. On the other hand, the WingWeight function shows a narrow IQR for the running time, suggesting that the majority of configurations achieve a high efficiency around <inline-formula id="inf149">
<mml:math id="m156">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.4</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. However, there are some outliers indicating that certain configurations may struggle to reach optimal efficiency. In terms of the probability of failure, WingWeight displays a wider IQR, indicating a greater variation in robustness across different BO configurations. This highlights the importance of carefully selecting the appropriate BO configuration for this particular test function, as there can be significant differences in performance and robustness among the various configurations.</p>
</list-item>
<list-item>
<p>In terms of efficiency, both AdaptedBranin and Borehole exhibit similar behavior. The median values of the running time <inline-formula id="inf150">
<mml:math id="m157">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> approximate to 0.4 for noiseless cases and shift towards 0.6 for noisy cases. Furthermore, all configurations achieve these efficiency levels with a relatively narrow interquartile range (IQR) of less than 0.17. On the other hand, the F3 function appears to be less affected by noise in terms of efficiency, with median <inline-formula id="inf151">
<mml:math id="m158">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> values of around 0.6 for both noiseless and noisy cases.</p>
</list-item>
</list>
</p>
<fig id="F7" position="float">
<label>FIGURE 7</label>
<caption>
<p>Distribution of probability of failure and running time for test functions, given noiseless and noisy data. Lower values are superior.</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g007.tif">
<alt-text content-type="machine-generated">Box plots compare the performance of various functions (F1, F2, OTL Circuit, F4, Wing Weight, F3, Adapted Branin, Borehole) across two parameters: minimum \( p_f \) and \( r_s \). Each function has two box plots for noiseless (blue) and noisy (yellow) data, with additional outliers represented as diamonds. The left panel focuses on \( p_f \), while the right focuses on \( r_s \).</alt-text>
</graphic>
</fig>
<p>When it comes to robustness, noiseless AdaptedBranin and Borehole show a probability of failure <inline-formula id="inf152">
<mml:math id="m159">
<mml:mrow>
<mml:mo stretchy="false">(</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">)</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> of 0.1 with 50% of the BO configurations, indicating relatively high robustness. However, the remaining configurations exhibit lower levels of robustness, as reflected in the wider IQR values. In the noisy cases, AdaptedBranin and Borehole have <inline-formula id="inf153">
<mml:math id="m160">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> median values of 0.5, with IQR values of 0.2 and 0.4, respectively. For the F3 function, the probability of failure is consistently 0.5 in both noiseless and noisy cases, with wider IQR values of 0.6 and 0.4, respectively.</p>
<p>From these observations, it can be concluded that the selection of qualified BO configurations is decisive to achieve a high level of robustness for these test functions. Specifically, for AdaptedBranin and Borehole, certain configurations demonstrate good performance in terms of efficiency and robustness, while others may lead to suboptimal results. Therefore, careful consideration of the specific BO configuration is essential to ensure effective optimization for these test functions.</p>
<p>
<xref ref-type="table" rid="T1">Table 1</xref> provides the numerical values of the results discussed earlier, including the median values and IQRs (in parentheses) of HV, <inline-formula id="inf154">
<mml:math id="m161">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf155">
<mml:math id="m162">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> for each test function, keeping the established groups. These values offer a quantitative representation of the overall performance of the BO configurations on the selected test functions. It can be observed that F1 shows the best optimization results with highest HV values, while F3 exhibits the worst performance under all test functions. Additionally, a notable difference can be noticed between the noiseless and noisy cases for AdaptedBranin and Borehole, particularly in terms of robustness. The values for probability of failure in the noisy cases are substantially higher compared to the noiseless cases, indicating that the presence of noise has a significant impact on the performance of BO configurations on these functions. This highlights the need to carefully consider the influence of noise when optimizing these test functions using BO.</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Overview of HV, <inline-formula id="inf156">
<mml:math id="m163">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>, and <inline-formula id="inf157">
<mml:math id="m164">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula> median values for each test function and BO configurations (IQR values in parenthesis).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th rowspan="2" align="center">Group</th>
<th rowspan="2" align="center">Test function</th>
<th colspan="2" align="center">HV <inline-formula id="inf158">
<mml:math id="m165">
<mml:mrow>
<mml:mi>&#x2191;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th colspan="2" align="center">
<inline-formula id="inf159">
<mml:math id="m166">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
<th colspan="2" align="center">
<inline-formula id="inf160">
<mml:math id="m167">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mi>&#x2193;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>
</th>
</tr>
<tr>
<th align="center">Noiseless</th>
<th align="center">Noisy</th>
<th align="center">Noiseless</th>
<th align="center">Noisy</th>
<th align="center">Noiseless</th>
<th align="center">Noisy</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td rowspan="6" align="center">A</td>
<td align="center">F1</td>
<td align="center">0.71</td>
<td align="center">0.71</td>
<td align="center">0.00</td>
<td align="center">0.00</td>
<td align="center">0.29</td>
<td align="center">0.29</td>
</tr>
<tr>
<td align="center">F1 (IQR)</td>
<td align="center">(0.11)</td>
<td align="center">(0.10)</td>
<td align="center">(0.00)</td>
<td align="center">(0.00)</td>
<td align="center">(0.11)</td>
<td align="center">(0.10)</td>
</tr>
<tr>
<td align="center">F2</td>
<td align="center">0.70</td>
<td align="center">0.70</td>
<td align="center">0.00</td>
<td align="center">0.00</td>
<td align="center">0.30</td>
<td align="center">0.30</td>
</tr>
<tr>
<td align="center">F2 (IQR)</td>
<td align="center">(0.09)</td>
<td align="center">(0.07)</td>
<td align="center">(0.00)</td>
<td align="center">(0.00)</td>
<td align="center">(0.09)</td>
<td align="center">(0.07)</td>
</tr>
<tr>
<td align="center">OTLCircuit</td>
<td align="center">0.70</td>
<td align="center">0.70</td>
<td align="center">0.00</td>
<td align="center">0.00</td>
<td align="center">0.29</td>
<td align="center">0.29</td>
</tr>
<tr>
<td align="center">OTLCircuit (IQR)</td>
<td align="center">(0.10)</td>
<td align="center">(0.06)</td>
<td align="center">(0.10)</td>
<td align="center">(0.10)</td>
<td align="center">(0.05)</td>
<td align="center">(0.08)</td>
</tr>
<tr>
<td rowspan="4" align="center">B</td>
<td align="center">F4</td>
<td align="center">0.53</td>
<td align="center">0.46</td>
<td align="center">0.00</td>
<td align="center">0.10</td>
<td align="center">0.46</td>
<td align="center">0.48</td>
</tr>
<tr>
<td align="center">F4 (IQR)</td>
<td align="center">(0.19)</td>
<td align="center">(0.11)</td>
<td align="center">(0.13)</td>
<td align="center">(0.20)</td>
<td align="center">(0.09)</td>
<td align="center">(0.07)</td>
</tr>
<tr>
<td align="center">WingWeight</td>
<td align="center">0.50</td>
<td align="center">0.48</td>
<td align="center">0.1</td>
<td align="center">0.1</td>
<td align="center">0.38</td>
<td align="center">0.43</td>
</tr>
<tr>
<td align="center">WingWeight (IQR)</td>
<td align="center">(0.28)</td>
<td align="center">(0.27)</td>
<td align="center">(0.30)</td>
<td align="center">(0.50)</td>
<td align="center">(0.11)</td>
<td align="center">(0.11)</td>
</tr>
<tr>
<td rowspan="6" align="center">C</td>
<td align="center">F3</td>
<td align="center">0.20</td>
<td align="center">0.18</td>
<td align="center">0.50</td>
<td align="center">0.50</td>
<td align="center">0.61</td>
<td align="center">0.60</td>
</tr>
<tr>
<td align="center">F3 (IQR)</td>
<td align="center">(0.23)</td>
<td align="center">(0.19)</td>
<td align="center">(0.60)</td>
<td align="center">(0.40)</td>
<td align="center">(0.15)</td>
<td align="center">(0.16)</td>
</tr>
<tr>
<td align="center">AdaptedBranin</td>
<td align="center">0.56</td>
<td align="center">0.20</td>
<td align="center">0.10</td>
<td align="center">0.50</td>
<td align="center">0.40</td>
<td align="center">0.60</td>
</tr>
<tr>
<td align="center">AdaptedBranin (IQR)</td>
<td align="center">(0.41)</td>
<td align="center">(0.12)</td>
<td align="center">(0.60)</td>
<td align="center">(0.20)</td>
<td align="center">(0.14)</td>
<td align="center">(0.15)</td>
</tr>
<tr>
<td align="center">Borehole</td>
<td align="center">0.45</td>
<td align="center">0.20</td>
<td align="center">0.05</td>
<td align="center">0.50</td>
<td align="center">0.46</td>
<td align="center">0.63</td>
</tr>
<tr>
<td align="center">Borehole (IQR)</td>
<td align="center">(0.36)</td>
<td align="center">(0.16)</td>
<td align="center">(0.63)</td>
<td align="center">(0.40)</td>
<td align="center">(0.17)</td>
<td align="center">(0.17)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Following conclusions can be drawn out of this analysis:<list list-type="alpha-upper">
<list-item>
<p>F1, F2, and OTLCircuit can be considered relatively simple functions in both noiseless and noisy cases. Regardless of the specific BO configurations used (excluding outliers), these functions exhibit robustness and efficiency.</p>
</list-item>
<list-item>
<p>F4 and WingWeight represent the next level of complexity. There are no significant differences between noiseless and noisy cases. While most configurations achieve the desired efficiency, there are some configurations that lack the desired robustness.</p>
</list-item>
<list-item>
<p>F3, AdaptedBranin, and Borehole are the most complex functions in the problem space. They exhibit lower efficiency levels compared to other functions. Moreover, the behavior differs significantly between noiseless and noisy cases, and certain BO configurations demonstrate higher levels of robustness and efficiency. This underscores the importance of examining the configurations in greater detail.</p>
</list-item>
</list>
</p>
</sec>
</sec>
<sec id="s3-3">
<label>3.3</label>
<title>Analysis of optimization performance of BO configurations</title>
<p>After examining the general responsiveness of the test functions in <xref ref-type="sec" rid="s3-2">Section 3.2</xref>, a more detailed analysis is conducted on the individual BO configurations. The objective is to determine the appropriateness of each configuration for optimizing specific test functions, following the systematic approach described in <xref ref-type="sec" rid="s2-4-3">Section 2.4.3</xref>. Streamlining this process, the analysis seeks to identify whether certain single kernels, acquisition functions, or initial sampling sizes can be clearly classified as qualified or non-qualified, irrespective of their combination. The thresholds for each test function are shown in <xref ref-type="fig" rid="F8">Figure 8</xref>. Configurations with HV values inside the red are classified as non-qualified. Configurations with HV values in the yellow are classified as undetermined. Configurations with HV values in the green area are classified as qualified for optimization. For each test function, the end of the green area marks the maximal optimization achieved by the best BO configuration on that function. The test functions are grouped according to <xref ref-type="sec" rid="s3-2">Section 3.2</xref>, which provides insights into the complexity of optimizing each test function.</p>
<fig id="F8" position="float">
<label>FIGURE 8</label>
<caption>
<p>HV thresholds for all test function classifications.</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g008.tif">
<alt-text content-type="machine-generated">Bar chart comparing various algorithms, labeled F1, F2, OTLCircuit, F4, WingWeight, F3, AdaptedBranin, and Borehole, with groups A, B, and C. Bars are color-coded in red, yellow, and green, indicating performance across different metrics on a scale from 0.0 to 1.0.</alt-text>
</graphic>
</fig>
<p>Group A (F1, F2, OTLCircuit) has a wide non-qualified area, indicating that optimizations under <inline-formula id="inf161">
<mml:math id="m168">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.7</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> are not acceptable for this kind of functions and that most of the BO configurations achieve an optimization level of higher than <inline-formula id="inf162">
<mml:math id="m169">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mi>V</mml:mi>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.75</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. The thresholds of second group B (F4, WingWeight) shift in the middle, with wider undetermined areas. This suggests that the best optimization to achieve with appropriate BO configurations lies on the qualified area with values of <inline-formula id="inf163">
<mml:math id="m170">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> between 0.6 and 0.7, while choosing an inappropriate BO configuration results in <inline-formula id="inf164">
<mml:math id="m171">
<mml:mrow>
<mml:mi>H</mml:mi>
<mml:mi>V</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> values between 0 and 0.46. Group C (F3, AdaptedBranin, Borehole) presents the most complex scenario. With significant differences between non-qualified and qualified areas, the optimization of these functions is expected to be lower, with varying ranges between 0.55 and 0.7 depending on the function. This leads to the fact that choosing the right combination of BO configuration is essential for achieving the high optimization possible. To address these inquiries, a detailed analysis of F3 and Borehole in <xref ref-type="sec" rid="s3-3-1">Sections 3.3.1</xref> and <xref ref-type="sec" rid="s3-3-2">3.3.2</xref> is conducted. Further results for the other test functions can be found in <xref ref-type="sec" rid="s12">Supplementary Appendix 1.3</xref>. By analyzing the BO configurations in their different combinations and classifying them into the qualification areas, an assessment on the single kernels, acquisition functions and initial sampling sizes can be made. This is based on the statistical procedure described on <xref ref-type="sec" rid="s2-4-3">Section 2.4.3</xref>, which reduces the non-qualified configurations when they present non-optimal results irrespective of their combination.</p>
<sec id="s3-3-1">
<label>3.3.1</label>
<title>F3 - Analysis and classification</title>
<p>In this section, the results for F3 are presented. The classification ranges are as follows: non-qualified, <inline-formula id="inf165">
<mml:math id="m172">
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:mn>0.0</mml:mn>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula> undetermined, ]<inline-formula id="inf166">
<mml:math id="m173">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:math>
</inline-formula>[ and qualified, <inline-formula id="inf167">
<mml:math id="m174">
<mml:mrow>
<mml:mo stretchy="false">[</mml:mo>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>,</mml:mo>
<mml:mn>0.86</mml:mn>
</mml:mrow>
<mml:mo stretchy="false">]</mml:mo>
</mml:mrow>
</mml:math>
</inline-formula>, with the thresholds <inline-formula id="inf168">
<mml:math id="m175">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.15</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf169">
<mml:math id="m176">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.31</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula>. The configurations falling within each area are identified and analyzed to provide insights into their performance on the F3 test function. In <xref ref-type="fig" rid="F9">Figure 9a</xref>, the results of the classification approach for the noiseless F3 function are depicted. Among the kernels, RBF, Matern05, and Matern15 are classified as non-qualified, as are UCB and PI among the acquisition functions, and medium (M) initial sampling size. After excluding these configurations, the following configurations can be classified as qualified: small (S) initial sampling size, EI and NEI for the acquisition functions, and all kernels with ARD. The classification for the Matern25 kernel remains undetermined.</p>
<fig id="F9" position="float">
<label>FIGURE 9</label>
<caption>
<p>Classification of BO configurations for the noiseless F3. <bold>(a)</bold> Noiseless F3 and the noisy F3. <bold>(b)</bold> Noisy F3 test functions.</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g009.tif">
<alt-text content-type="machine-generated">Comparison of box plots for various algorithms across two configuration groups in sections &#x22;a&#x22; and &#x22;b&#x22;. Each section shows &#x22;All configurations&#x22; and &#x22;Reduced configurations,&#x22; with hypervolume (HV) on the x-axis. Algorithms include RBF, Matern kernels, ARD variations, EI, UCB, PI, NEI, S, and M. Graphs illustrate data spread and median values, highlighting differences in performance for the configurations.</alt-text>
</graphic>
</fig>
<p>In <xref ref-type="fig" rid="F9">Figure 9b</xref>, the results of the classification approach for the noisy F3 function are presented. In this case, all isotropic kernels are excluded, as well as UCB, PI, and the M initial sampling size. The overall optimization performance is lower than in noiseless cases. Among the remaining configurations, RBF-ARD, Matern15-ARD and Matern25-ARD among the kernels, NEI among the acquisition functions and small initial sampling size can be classified as qualified. The classification for the rest of the configurations remains undetermined. Indeed, it is interesting to observe that the same trend can be observed between noiseless and noisy cases for the F3 function. The noisy case shifts all values into lower optimization levels, indicating the impact of noise on the performance of the BO configurations. However, despite this shift, the clear recommendations for qualified configurations remain consistent. For both noiseless and noisy cases, kernels with ARD, EI, or NEI and a small (S) initial sampling size are recommended for optimizing the F3 function. This highlights the robustness and effectiveness of these configurations, making them reliable choices for practical applications.</p>
</sec>
<sec id="s3-3-2">
<label>3.3.2</label>
<title>Borehole - Analysis and classification</title>
<p>In this section, the results for Borehole are presented. The qualification thresholds are set to <inline-formula id="inf170">
<mml:math id="m177">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>n</mml:mi>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.19</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> and <inline-formula id="inf171">
<mml:math id="m178">
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi>t</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>q</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>&#x3d;</mml:mo>
<mml:mn>0.54</mml:mn>
</mml:mrow>
</mml:math>
</inline-formula> with minimal and maximal HV values of zero and 0.86. The configurations falling within each area are identified and analyzed to provide insights into their performance on the Borehole test function. <xref ref-type="fig" rid="F10">Figure 10a</xref> illustrates the results of the classification approach for the noiseless Borehole function. Based on the analysis, the Matern05 kernel and the PI acquisition function are classified as non-qualified. Among the initial sampling sizes, both small (S) and medium (M) sizes present similar results. After excluding the non-qualified configurations RBF-ARD, Matern15-ARD, and Matern25-ARD are classified as qualified. All other configurations fall into the undetermined area, indicating that their performance on the Borehole function is lower and requires a dedicated combination of the other configuration variables.</p>
<fig id="F10" position="float">
<label>FIGURE 10</label>
<caption>
<p>Classification of BO configurations for the noiseless Borehole. <bold>(a)</bold> Noiseless Borehole and the noisy Borehole. <bold>(b)</bold> Noisy Borehole test functions.</p>
</caption>
<graphic xlink:href="fmtec-05-1614335-g010.tif">
<alt-text content-type="machine-generated">Box plots depict hypervolume (\(HV\)) analysis for different kernel and acquisition functions across two configurations: all and reduced. The plots, labeled &#x27;a&#x27; and &#x27;b&#x27;, compare parameters like RBF, Mat&#xE9;rn, EI, UCB, and others. Each plot shows variations in \(HV\) with different color-coded sections, highlighting performance differences. Plots on the left represent all configurations, while those on the right focus on reduced configurations, illustrating variations and similarities in performance metrics through their spread and distribution.</alt-text>
</graphic>
</fig>
<p>In <xref ref-type="fig" rid="F10">Figure 10b</xref>, the results of the classification approach for the noisy Borehole function are presented. According to the analysis, the isotropic RBF and Matern05 kernels, as well as the PI acquisition function, are classified as non-qualified for the noisy Borehole function. All other configurations fall into the undetermined area, with no single configuration being qualified for optimization with higher values than HV &#x3d; 0.56. However, it is worth noting that there is one outlier at HV &#x3d; 0.48, indicating that the combination of Matern15-ARD, NEI, and small initial sampling size achieves a comparatively higher performance, although none of these configurations can be single classified as qualified. There is a clear difference in the reduction of performance between the noiseless and noisy cases. The noisy Borehole function shows a significant decrease in optimization performance compared to its noiseless counterpart. However, the differences between the single configurations in the noisy case are lower than in the noiseless one.</p>
</sec>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<label>4</label>
<title>Discussion</title>
<p>In this section, the results for all test functions are discussed. We evaluate the performance of the BO configurations on all test functions and provide actionable guidelines. <xref ref-type="table" rid="T2">Table 2</xref> presents the classification results of all single BO configurations on the noiseless and noisy test functions. While the classification of BO configurations is performed for each individual test function, some general observations can be made that apply across different test functions. These general observations provide valuable insights into the overall performance of certain configurations, enabling users to make informed decisions and tailor their BO algorithms more effectively to specific optimization tasks. In the following, the results are discussed individually for kernels, acquisition functions, and initial sampling sizes.</p>
<table-wrap id="T2" position="float">
<label>TABLE 2</label>
<caption>
<p>Classification for noiseless (0) and noisy (1) test functions. Green: qualified, yellow: undetermined, red: non-qualified. Kernels with ARD are grayed out because, in one-dimensional functions like F2, there is no difference between isotropic and anisotropic kernels.</p>
</caption>
<table>
<tbody valign="top">
<tr>
<td align="left">
<inline-graphic xlink:href="fmtec-05-1614335-fx1.tif">
<alt-text content-type="machine-generated">Matrix chart comparing different kernels, acquisition functions, and initial sizes with configurations labeled F1, F2&#x2a;, OTL Circuit, F4, Wing Weight, F3, Adapted Branin, and Borehole. Colors indicate data values under groupings A, B, and C, with shades of green, yellow, and red used to represent various performance outcomes. Each configuration includes noise levels of zero and one.</alt-text>
</inline-graphic>
</td>
</tr>
</tbody>
</table>
</table-wrap>
<sec id="s4-1">
<label>4.1</label>
<title>Kernels</title>
<p>Across the test functions, the Matern05 kernel is consistently classified as non-qualified, indicating its poor performance in optimizing the selected problems. However, in the case of AdaptedBranin and F4, Matern05 is categorized as undetermined, suggesting that its performance is not clearly better or worse than other configurations. For AdaptedBranin, it is undetermined for both noiseless and noisy cases, and for F4, it is undetermined only for noisy cases. This indicates that the performance of Matern05 on these two functions is not as straightforward as in other cases, and it may require further investigation to understand its behavior. Overall, Matern05 shows inferior performance across most test functions, making it a less preferable choice for optimizing these problems. Matern05-ARD performs poorly in all test functions and is never classified as the best option in both noiseless and noisy cases. For group C, it seems to be a plausible option in noiseless cases, but not a good option in noisy ones. Its overall performance is consistently inferior compared to other configurations, reinforcing the observation that Matern05-ARD is not a general recommended choice for optimizing the test functions. Given its consistently poor performance, it is advisable to avoid using Matern05-ARD as a kernel configuration when applying BO to these test functions. RBF performs satisfactorily in optimizing the simple functions of group A. In group B, it is classified as qualified for noiseless functions and undetermined for noisy ones. However, for the complex group C, RBF encounters challenges in optimizing F3, remains undetermined for AdaptedBranin, and is undetermined for noiseless Borehole and non-qualified for its noisy case. Overall, RBF shows decent performance for simple functions but struggles in more complex and noisy scenarios. The RBF-ARD and Matern25-ARD kernels do indeed exhibit similar behaviors in many cases. For group A, they both perform well on both noiseless and noisy functions. However, for groups B and C, they show the trend of performing worse on noisy functions compared to noiseless ones. Matern25-ARD struggles particularly on the noisy functions F4, WingWeight, and AdaptedBranin, being consistently classified as non-qualified in these cases. On F3 and Borehole, Matern25-ARD is classified as qualified in noiseless cases and undetermined in noisy cases. On the other hand, RBF-ARD achieves the highest performance on F3 in both noisy and noiseless cases. It generally presents a qualified-undetermined classification in noiseless-noisy cases and is only classified as non-qualified in the noisy case of F4. Overall both RBF-ARD and Matern25-ARD kernels show potential for optimizing noiseless functions, while presenting an acceptable performance on noisy functions.</p>
<p>Matern25 performs well in group A (simple functions) but encounters difficulties in handling noisy cases for the more complex groups B and C. It is classified as undetermined for the Borehole function in both noiseless and noisy cases. Similar to RBF-ARD and Matern25-ARD, the performance of Matern25 is mixed, with drops in optimization observed in noisy conditions. While Matern25 shows satisfactory performance for simple functions, its ability to handle noise and complexity diminishes for more challenging optimization problems. This findings go hand in hand with the ones of <xref ref-type="bibr" rid="B38">Palar and Shimoyama (2019)</xref> and <xref ref-type="bibr" rid="B28">Le Riche and Picheny (2021)</xref>. Matern15 and Matern15-ARD configurations show promising performance. They are classified as qualified for most test functions, offering good results. The isotropic Matern15 kernel outperforms the anisotropic one in group B (F4 and WingWeight), while the anisotropic version performs better in group C (F3, AdaptedBranin, and Borehole). Indeed, and as a general conclusion, group B seems to be optimized better by isotropic kernels and group C by anisotropic ones. Further investigation about the problems&#x2019; landscape is needed, to adequately recommend a certain kernel for a given problem. However as a general recommendation, Matern15 and Matern15-ARD seem suitable for optimizing a wide range of problems.</p>
</sec>
<sec id="s4-2">
<label>4.2</label>
<title>Acquisition functions</title>
<p>Among all acquisition functions, PI consistently performs worse than all the other options, with the exception of F4 where it is classified in noiseless and noisy cases as undetermined. In the noisy case, UCB and EI are worse, considered non-qualified. These observations highlight the general recommendation of not using PI as a default choice for an unknown process without further investigation. This finding is consistent with previous works <xref ref-type="bibr" rid="B4">Benjamins et al. (2022)</xref>; <xref ref-type="bibr" rid="B3">Ath et al. (2021)</xref>, which have also explained the poor performance of PI due to its greedy nature.</p>
<p>UCB has a better performance than PI, but still encounter difficulties in several functions. In both noiseless and noisy F1 and F3 is UCB classified as non-qualified, as well as for noisy F4. It remains undetermined for AdaptedBranin and Borehole and further simpler noisy functions. The performance of UCB could be due to its fixed <inline-formula id="inf172">
<mml:math id="m179">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula>-parameter, which may prioritize exploitation over exploration, limiting its ability to effectively explore the search space and find the global optimum. As a result, UCB is generally not recommended as a default choice for an unknown process without investigating the influence of the <inline-formula id="inf173">
<mml:math id="m180">
<mml:mrow>
<mml:mi>&#x3b2;</mml:mi>
</mml:mrow>
</mml:math>
</inline-formula> parameter and carefully tuning it for specific optimization tasks. This finding contradicts the outcomes of studies by <xref ref-type="bibr" rid="B40">Qin et al. (2021)</xref> and <xref ref-type="bibr" rid="B11">Diessner et al. (2022)</xref>, who reported better results with UCB than with EI. Further investigation must be made regarding this acquisition function in production fields. Among all acquisition functions, EI and NEI stand out as the best performing options. EI performs consistently well in all test functions, but it encounters difficulties in optimizing noisy F4 and is undetermined in noisy WingWeight, F3 and both noiseless AdaptedBranin and Borehole. On the other hand, NEI shows the best performance across all test functions, both in noiseless and noisy cases, and although is undetermined in some functions, it is not outperformed by any other acquisition function. As a result, NEI appears to be the better default choice for new, unknown processes, providing robust and efficient optimization capabilities.</p>
</sec>
<sec id="s4-3">
<label>4.3</label>
<title>Initial sampling sizes</title>
<p>In group A, the small initial sampling size (S) clearly outperforms the medium one (M) in both noiseless and noisy cases. Similarly, in group C, the smaller sampling size tends to perform better than the medium one. Only in the noisy cases of group B, medium initial sampling seems to represent a better option than small initial sampling sizes. This could be due to the lack of exploration at the beginning of the experiment. In general, based on the performance across six of eight test functions (groups A and C), the best option would be to begin with a small (S) initial dataset and prioritize the efficiency of the algorithm, underlining the state of the art presented in the introduction. If, during the course of the experiment, it is observed that the optimization is not sufficient, a couple of exploratory trials could be implemented in the mid-time to compensate for the possible lack of exploration at the initial steps. Such adaptive approaches to enhance a better balance between exploration and exploitation are under investigation <xref ref-type="bibr" rid="B4">Benjamins et al. (2022)</xref>; <xref ref-type="bibr" rid="B24">Hoffman et al. (2010)</xref>.</p>
</sec>
<sec id="s4-4">
<label>4.4</label>
<title>Summary of actionable guidelines for applying BED in manufacturing</title>
<p>In summary, we deduce the following findings and guidelines regarding the configuration of the kernel, acquisition function, and initial sampling size. In general, it appears that there is no one-fits-all solution for the different optimization problems, but rather that the different characteristics of the optimization problems place different demands on BO configuration. For kernels, RBF presents a reasonable choice for simple test functions, while we recommend Matern15-ARD as a reasonable default option for complex optimization problems. In principle, it is advisable to use anisotropic (ARD) kernels for more complex problems, such as those typically encountered in manufacturing. Since noise negatively impacts optimization performance, process and measurement noise should be minimized by precisely calibrating both actuators and sensors of the manufacturing process. With regard to acquisition functions, it appears that the exploration behavior has a significant influence on optimization performance, especially in the case of complex problems. Based on their exploratory behavior, we recommend EI and NEI as qualified default options, while PI and the investigated UCB configuration are not suggested. In terms of initial sampling size, we recommend keeping the additionally randomly generated experiment data small and instead leaving the search for the optimum to the BO algorithm with a sufficiently exploratory acquisition function. Already existing datasets for which no further experiments need to be conducted should nevertheless be utilized to initialize the BO algorithms. In addition, it is decisive to perform screening and characterization trials prior to optimization to determine the most critical parameters and associated parameter ranges, thus keeping the dimensionality of the optimization problem as small but also as influential as possible.</p>
</sec>
</sec>
<sec sec-type="conclusion" id="s5">
<label>5</label>
<title>Conclusion</title>
<p>Optimization of production processes is an ongoing challenge for manufacturing companies in order to continuously improve product quality and process productivity, increase the overall equipment effectiveness, and thus remain economically competitive. Process optimization is becoming more complex given that a rising number of process parameters and objectives must be precisely adjusted to each other (e.g., due to the growing efficiency concerns, tighter quality specifications, and shorter product life cycles). Traditional experimental design methods are no longer able to cope with the increasing complexity of process optimization. With Bayesian Optimization (BO), Bayesian Experimental Design (BED) has evolved as an adaptive, data-driven approach to efficiently find optimal parameters in black-box optimization problems in the engineering domain. However, to successfully utilize BO in engineering use-cases, BO algorithms have to be precisely configured to the given problem. To investigate the performance of individual configurations of the BO algorithm for different optimization problems and to unravel insights that allow the derivation of practical guidelines, we designed and conducted a BED benchmark study comprising a total of 15,360 experiments.</p>
<p>As a result of our study, we present an extensive performance and robustness analysis that unveils significant performance differences between individual BO algorithms on different optimization problems. The results of the benchmark study provide empirical references and actionable guidelines for the configuration of BED. The study advocates BED as an adaptive, data-efficient tool for optimizing process parameters, achieving 95% precision within a budget of 35 iterations using the best-qualified configurations at various levels of complexity. We show that there is no universally optimal BO configuration. For complex optimization problems, particularly in manufacturing, anisotropic kernels such as Matern15-ARD are recommended, while exploration-oriented acquisition functions like EI or NEI offer robust default choices. Randomly generated initial experiments should be kept small and instead leave the search for the optimum to a sufficiently exploratory BO algorithm.</p>
<p>Furthermore, the results underscore the significant role of benchmark studies in not only identifying optimal BO configurations but also highlighting an existing research gap in terms of understanding the interplay between the characteristics of production processes and BED performance. The performance of the BO configuration unveils distinct intrinsic patterns in various test functions, indicating shared responses among certain test functions to the optimization process. Importantly, the results of our study fails to unravel a clear relationship between the characteristics of optimization problems and the performance of BO configurations. This suggests that current characteristics do not adequately capture the inherent patterns of the response of test functions to optimization. A focus of applied research must therefore lie on the investigation and identification of production process and data characteristics that correlate strongly with the performance of different BO algorithms, allowing one to make a profound configuration decision for successfully applying BED to new optimization problems. The limitations of our study include its focus on single-objective optimization and that it does not include a further examination of different hyperparameter sets for both kernels and acquisition functions. To further establish BED in production engineering practice and to accelerate process optimization and reduce development costs, our further research focuses on the collaboration between BED and domain experts comprising the integration of expert knowledge into BED. We examine the extension to multi-objective optimization cases and investigate the communication between domain experts to increase comprehensibility, and thus facilitate user acceptance and widespread adoption in industry applications.</p>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The raw data supporting the conclusions of this article will be made available by the authors, without undue reservation.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>LL: Visualization, Writing &#x2013; original draft, Formal Analysis, Project administration, Methodology, Validation, Conceptualization, Investigation, Data curation, Supervision, Writing &#x2013; review and editing, Software. AG: Writing &#x2013; review and editing, Formal Analysis, Writing &#x2013; original draft, Methodology, Visualization, Conceptualization, Validation, Investigation, Software, Data curation. KB: Writing &#x2013; review and editing, Methodology, Conceptualization, Supervision. JE: Writing &#x2013; review and editing, Conceptualization, Data curation, Methodology. AS: Writing &#x2013; review and editing, Supervision. RS: Writing &#x2013; review and editing, Funding acquisition, Resources.</p>
</sec>
<ack>
<title>Acknowledgements</title>
<p>This work is based on the master&#x2019;s thesis of Ana Maria Gonzalez Degetau entitled &#x201c;Bayesian Machine Learning for Data-driven Optimization in Production Processes: A Benchmark Study&#x201d; as part of the joint ICNAP research project &#x201c;evolve&#x201d; between Fraunhofer Institute for Production Technology IPT, Fraunhofer Institute for Laser Technology ILT and Fraunhofer Institute for Microbiology and Applied Ecology IME, and in cooperation with the Institute of Product Development and Engineering Design, Faculty of Process Engineering, Energy and Mechanical Systems of TH K&#xf6;ln - University of Applied Sciences.</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The author(s) declared that this work was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="disclaimer" id="s11">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s12">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fmtec.2025.1614335/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fmtec.2025.1614335/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="DataSheet1.pdf" id="SM1" mimetype="application/pdf" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1933026/overview">Kanak Kalita</ext-link>, Vel Tech Dr. RR &#x26; Dr. SR Technical University, India</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1457346/overview">Otilia Manta</ext-link>, Romanian Academy, Romania</p>
<p>
<ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1207017/overview">Carlo Graziani</ext-link>, Argonne National Laboratory (DOE), United States</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Al-Hafez</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Finding the optimal learning rate using bayesian optimization</article-title>.</mixed-citation>
</ref>
<ref id="B2">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Arboretti</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Ceccato</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pegoraro</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Salmaso</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Design of experiments and machine learning for product innovation: a systematic literature review</article-title>. <source>Qual. Reliab. Eng. Int.</source> <volume>38</volume>, <fpage>1131</fpage>&#x2013;<lpage>1156</lpage>. <pub-id pub-id-type="doi">10.1002/qre.3025</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ath</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Everson</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Rahat</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fieldsend</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Greed is good: exploration and exploitation trade-offs in Bayesian optimisation</article-title>. <source>ACM Trans. Evol. Learn. Optim.</source> <volume>1</volume>, <fpage>1</fpage>&#x2013;<lpage>22</lpage>. <pub-id pub-id-type="doi">10.1145/3425501</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Benjamins</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Raponi</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Jankovic</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>van der Blom</surname>
<given-names>K.</given-names>
</name>
<name>
<surname>Santoni</surname>
<given-names>M. L.</given-names>
</name>
<name>
<surname>Lindauer</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Pi is back! switching acquisition functions in bayesian optimization</article-title>
</mixed-citation>
</ref>
<ref id="B5">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Berger-Tal</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Nathan</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Meron</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Saltz</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2014</year>). <article-title>The exploration-exploitation dilemma: a multidisciplinary framework</article-title>. <source>PLOS ONE</source> <volume>9</volume>, <fpage>1</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0095693</pub-id>
<pub-id pub-id-type="pmid">24756026</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Bossek</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Doerr</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Kerschke</surname>
<given-names>P.</given-names>
</name>
</person-group> (<year>2020a</year>). &#x201c;<article-title>Initial design strategies and their effects on sequential model-based optimization: an exploratory case study based on bbob</article-title>,&#x201d; in <source>Proceedings of the 2020 genetic and evolutionary computation conference</source>, <fpage>778</fpage>&#x2013;<lpage>786doi</lpage>. <pub-id pub-id-type="doi">10.1145/3377930.3390155</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Bossek</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kerschke</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Trautmann</surname>
<given-names>H.</given-names>
</name>
</person-group> (<year>2020b</year>). <article-title>A multi-objective perspective on performance assessment and automated selection of single-objective optimization algorithms</article-title>. <source>Appl. Soft Comput.</source> <volume>88</volume>, <fpage>105901</fpage>. <pub-id pub-id-type="doi">10.1016/j.asoc.2019.105901</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Deneault</surname>
<given-names>J. R.</given-names>
</name>
<name>
<surname>Chang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Myung</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Hooper</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Armstrong</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Pitt</surname>
<given-names>M.</given-names>
</name>
<etal/>
</person-group> (<year>2021</year>). <article-title>Toward autonomous additive manufacturing: bayesian optimization on a 3d printer</article-title>. <source>MRS Bull.</source> <volume>46</volume>, <fpage>566</fpage>&#x2013;<lpage>575</lpage>. <pub-id pub-id-type="doi">10.1557/s43577-021-00051-1</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Dieb</surname>
<given-names>T. M.</given-names>
</name>
<name>
<surname>Tsuda</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2018</year>). in <source>Machine learning-based experimental design in materials science</source>. Editor <person-group person-group-type="editor">
<name>
<surname>Tanaka</surname>
<given-names>I.</given-names>
</name>
</person-group> (<publisher-loc>Singapore</publisher-loc>: <publisher-name>Springer Singapore</publisher-name>), <fpage>65</fpage>&#x2013;<lpage>74</lpage>. <pub-id pub-id-type="doi">10.1007/978-981-10-7617-6_4</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Diessner</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>O&#x2019;Connor</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Wynn</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Laizet</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Guan</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Wilson</surname>
<given-names>K.</given-names>
</name>
<etal/>
</person-group> (<year>2022</year>). <article-title>Investigating bayesian optimization for expensive-to-evaluate black box functions: application in fluid dynamics</article-title>. <source>Front. Appl. Math. Statistics</source> <volume>8</volume>, <fpage>1076296</fpage>. <pub-id pub-id-type="doi">10.3389/fams.2022.1076296</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Durakovic</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>2017</year>). &#x201c;<source>Design of experiments application, concepts, examples: state of the art</source>,&#x201d; <volume>5</volume>. <publisher-name>Periodicals of Engineering and Natural Sciences PEN. International University of Sarajevo</publisher-name>. <pub-id pub-id-type="doi">10.21533/pen.v5i3.145</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Duris</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Kennedy</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Hanuka</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Shtalenkova</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Edelen</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Egger</surname>
<given-names>A.</given-names>
</name>
<etal/>
</person-group> (<year>2020</year>). <article-title>Bayesian optimization of a free-electron laser</article-title>. <source>Phys. Rev. Lett.</source> <volume>124</volume>, <fpage>2825</fpage>. <pub-id pub-id-type="doi">10.1103/PhysRevLett.124.124801</pub-id>
<pub-id pub-id-type="pmid">32281869</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Duvenaud</surname>
<given-names>D. K.</given-names>
</name>
</person-group> (<year>2014</year>). <source>Automatic model construction with Gaussian processes</source>. <publisher-name>Doctoral dissertation, University of Cambridge</publisher-name>. <comment>Available online at: <ext-link ext-link-type="uri" xlink:href="https://api.semanticscholar.org/CorpusID:107112403">https://api.semanticscholar.org/CorpusID:107112403</ext-link>
</comment>
</mixed-citation>
</ref>
<ref id="B15">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Finck</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Hansen</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Ros</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Auger</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2010</year>). <article-title>Real-parameter black-box optimization benchmarking 2010: presentation of the noiseless functions</article-title>
</mixed-citation>
</ref>
<ref id="B16">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Forrester</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Sbester</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Keane</surname>
<given-names>A. J.</given-names>
</name>
</person-group> (<year>2008</year>). <source>Engineering design via surrogate modelling</source>. <publisher-loc>Chichester, UK</publisher-loc>: <publisher-name>John Wiley and Sons</publisher-name>. <pub-id pub-id-type="doi">10.1002/9780470770801</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Frazier</surname>
<given-names>P. I.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>A tutorial on bayesian optimization</article-title>
</mixed-citation>
</ref>
<ref id="B18">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Freiesleben</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Keim</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Grutsch</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Machine learning and design of experiments: alternative approaches or complementary methodologies for quality improvement?</article-title> <source>Qual. Reliab. Eng. Int.</source> <volume>36</volume>, <fpage>1837</fpage>&#x2013;<lpage>1848</lpage>. <pub-id pub-id-type="doi">10.1002/qre.2579</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gan</surname>
<given-names>W.</given-names>
</name>
<name>
<surname>Ji</surname>
<given-names>Z.</given-names>
</name>
<name>
<surname>Liang</surname>
<given-names>Y.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Acquisition functions in bayesian optimization</article-title>. <source>Int. Conf. Big Data &#x26; Artif. Intell. and sotware Eng.</source> <volume>2</volume>, <fpage>129</fpage>&#x2013;<lpage>135</lpage>. <pub-id pub-id-type="doi">10.1109/icbase53849.2021.00032</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Garnett</surname>
<given-names>R.</given-names>
</name>
</person-group> (<year>2023</year>). <source>Bayesian optimization</source>. <publisher-name>United Kingdom: TJ Books Limited, Padstow Cornwall</publisher-name>.</mixed-citation>
</ref>
<ref id="B21">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Greenhill</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Rana</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Gupta</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Vellanki</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Venkatesh</surname>
<given-names>S.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Bayesian optimization for adaptive experimental design: a review</article-title>. <source>IEEE Access</source> <volume>8</volume>, <fpage>13937</fpage>&#x2013;<lpage>13948</lpage>. <pub-id pub-id-type="doi">10.1109/ACCESS.2020.2966228</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guidetti</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Rupenyan</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Fassl</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Nabavi</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Lygeros</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Advanced manufacturing configuration by sample-efficient batch bayesian optimization</article-title>. <source>IEEE Robotics Automation Lett.</source> <volume>7</volume>, <fpage>11886</fpage>&#x2013;<lpage>11893</lpage>. <pub-id pub-id-type="doi">10.1109/LRA.2022.3208370</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Haghanifar</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>McCourt</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Cheng</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Wuenschell</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Ohodnicki</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Leu</surname>
<given-names>P. W.</given-names>
</name>
</person-group> (<year>2020</year>). <article-title>Discovering high-performance broadband and broad angle antireflection surfaces by machine learning</article-title>. <source>Optica</source> <volume>7</volume>, <fpage>784</fpage>. <pub-id pub-id-type="doi">10.1364/OPTICA.387938</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Hoffman</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Brochu</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Freitas</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2010</year>). <source>Portfolio allocation for bayesian optimization</source>. <publisher-name>UAI</publisher-name>. <pub-id pub-id-type="doi">10.48550/arXiv.1009.5419</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ilzarbe</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>&#xc1;lvarez</surname>
<given-names>M. J.</given-names>
</name>
<name>
<surname>Viles</surname>
<given-names>E.</given-names>
</name>
<name>
<surname>Tanco</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2008</year>). <article-title>Practical applications of design of experiments in the field of engineering: a bibliographical review</article-title>. <source>Qual. Reliab. Eng. Int.</source> <volume>24</volume>, <fpage>417</fpage>&#x2013;<lpage>428</lpage>. <pub-id pub-id-type="doi">10.1002/qre.909</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jankovic</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Chaudhary</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Goia</surname>
<given-names>F.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Designing the design of experiments (doe) &#x2013; an investigation on the influence of different factorial designs on the characterization of complex systems</article-title>. <source>Energy Build.</source> <volume>250</volume>, <fpage>111298</fpage>. <pub-id pub-id-type="doi">10.1016/j.enbuild.2021.111298</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Le Riche</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Picheny</surname>
<given-names>V.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Revisiting bayesian optimization in the light of the coco benchmark</article-title>. <source>Struct. Multidiscip. Optim.</source> <volume>64</volume>, <fpage>3063</fpage>&#x2013;<lpage>3087</lpage>. <pub-id pub-id-type="doi">10.1007/s00158-021-02977-1</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Leyendecker</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Nausch</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Wergers</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Scheffler</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Schmitt</surname>
<given-names>R. H.</given-names>
</name>
</person-group> (<year>2025</year>). <article-title>Bayesian experimental design for optimizing medium composition and biomass formation of tobacco by-2 cell suspension cultures in stirred-tank bioreactors</article-title>. <source>Front. Bioeng. Biotechnol.</source> <volume>13</volume>, <fpage>1617319</fpage>. <pub-id pub-id-type="doi">10.3389/fbioe.2025.1617319</pub-id>
<pub-id pub-id-type="pmid">41050446</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Liang</surname>
<given-names>Q.</given-names>
</name>
<name>
<surname>Lai</surname>
<given-names>L.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Scalable bayesian optimization accelerates process optimization of penicillin production</article-title>,&#x201d; in <source>NeurIPS 2021 AI for science workshop</source>.</mixed-citation>
</ref>
<ref id="B31">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Logothetis</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Wynn</surname>
<given-names>H. P.</given-names>
</name>
</person-group> (<year>1989</year>). <source>Quality through design: experimental design, off-line quality control and Taguchi&#x2019;s contributions</source>, <volume>7</volume>. <publisher-loc>Oxford</publisher-loc>: <publisher-name>Clarendon Press</publisher-name>.</mixed-citation>
</ref>
<ref id="B32">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Maurya</surname>
<given-names>A.</given-names>
</name>
</person-group> (<year>2016</year>). &#x201c;<article-title>Bayesian optimization for predicting rare internal failures in manufacturing processes</article-title>,&#x201d; in <source>2016 IEEE international conference on big data (big data)</source> (<publisher-name>IEEE</publisher-name>), <fpage>2036</fpage>&#x2013;<lpage>2045</lpage>. <pub-id pub-id-type="doi">10.1109/BigData.2016.7840827</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Mersmann</surname>
<given-names>O.</given-names>
</name>
<name>
<surname>Preuss</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Trautmann</surname>
<given-names>H.</given-names>
</name>
<name>
<surname>Ppsn</surname>
<given-names>X. I.</given-names>
</name>
<name>
<surname>Schaefer</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Cotta</surname>
<given-names>C.</given-names>
</name>
<etal/>
</person-group> (<year>2010</year>). &#x201c;<article-title>Benchmarking evolutionary algorithms: towards exploratory landscape analysis</article-title>,&#x201d; in <source>Parallel problem solving from Nature</source> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer Berlin Heidelberg</publisher-name>), <fpage>73</fpage>&#x2013;<lpage>82</lpage>.</mixed-citation>
</ref>
<ref id="B34">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Mo&#x10d;kus</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1975</year>). &#x201c;<article-title>On bayesian methods for seeking the extremum</article-title>,&#x201d; in <source>Optimization techniques IFIP technical conference Novosibirsk, July 1&#x2013;7, 1974</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Goos</surname>
<given-names>G.</given-names>
</name>
<name>
<surname>Hartmanis</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Brinch Hansen</surname>
<given-names>P.</given-names>
</name>
<name>
<surname>Gries</surname>
<given-names>D.</given-names>
</name>
<name>
<surname>Moler</surname>
<given-names>C.</given-names>
</name>
<name>
<surname>Seegm&#xfc;ller</surname>
<given-names>G.</given-names>
</name>
<etal/>
</person-group> (<publisher-loc>Berlin, Heidelberg</publisher-loc>: <publisher-name>Springer Berlin Heidelberg</publisher-name>), <fpage>400</fpage>&#x2013;<lpage>404</lpage>. <pub-id pub-id-type="doi">10.1007/3-540-07165-2_55</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Mockus</surname>
<given-names>J.</given-names>
</name>
</person-group> (<year>1989</year>). <source>Bayesian approach to global optimization: theory and applications, vol. 37 of mathematics and its applications Soviet series</source>. <publisher-loc>Dordrecht</publisher-loc>: <publisher-name>Kluwer Acad. Publ</publisher-name>.</mixed-citation>
</ref>
<ref id="B36">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Montgomery</surname>
<given-names>D. C.</given-names>
</name>
</person-group> (<year>2020</year>). <source>Design and analysis of experiments</source>. <edition>tenth edition edn</edition>. <publisher-loc>Hoboken, NJ</publisher-loc>: <publisher-name>Wiley</publisher-name>.</mixed-citation>
</ref>
<ref id="B38">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Palar</surname>
<given-names>P. S.</given-names>
</name>
<name>
<surname>Shimoyama</surname>
<given-names>K.</given-names>
</name>
</person-group> (<year>2019</year>). <article-title>Efficient global optimization with ensemble and selection of kernel functions for engineering design</article-title>. <source>Struct. Multidiscip. Optim.</source> <volume>59</volume>, <fpage>93</fpage>&#x2013;<lpage>116</lpage>. <pub-id pub-id-type="doi">10.1007/s00158-018-2053-9</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Picheny</surname>
<given-names>V.</given-names>
</name>
<name>
<surname>Wagner</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Ginsbourger</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2013</year>). <article-title>A benchmark of kriging-based infill criteria for noisy optimization</article-title>. <source>Struct. Multidiscip. Optim.</source> <volume>48</volume>, <fpage>607</fpage>&#x2013;<lpage>626</lpage>. <pub-id pub-id-type="doi">10.1007/s00158-013-0919-4</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Qin</surname>
<given-names>N.</given-names>
</name>
<name>
<surname>Zhou</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>J.</given-names>
</name>
<name>
<surname>Shen</surname>
<given-names>C.</given-names>
</name>
</person-group> (<year>2021</year>). &#x201c;<article-title>Bayesian optimization: model comparison with different benchmark functions</article-title>,&#x201d; in <source>2021 international conference on signal processing and machine learning (CONF-SPML) (IEEE)</source>, <fpage>329</fpage>&#x2013;<lpage>333</lpage>. <pub-id pub-id-type="doi">10.1109/CONF-SPML54095.2021.00071</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Rainforth</surname>
<given-names>T.</given-names>
</name>
<name>
<surname>Foster</surname>
<given-names>A.</given-names>
</name>
<name>
<surname>Ivanova</surname>
<given-names>D. R.</given-names>
</name>
<name>
<surname>Smith</surname>
<given-names>F. B.</given-names>
</name>
</person-group> (<year>2023</year>). <article-title>Modern bayesian experimental design</article-title>
</mixed-citation>
</ref>
<ref id="B42">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Sarabia</surname>
<given-names>L.</given-names>
</name>
<name>
<surname>Ortiz</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2009</year>). &#x201c;<article-title>1.12 - response surface methodology</article-title>,&#x201d; in <source>Comprehensive chemometrics</source>. Editors <person-group person-group-type="editor">
<name>
<surname>Brown</surname>
<given-names>S. D.</given-names>
</name>
<name>
<surname>Tauler</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Walczak</surname>
<given-names>B.</given-names>
</name>
</person-group> (<publisher-loc>Oxford</publisher-loc>: <publisher-name>Elsevier</publisher-name>), <fpage>345</fpage>&#x2013;<lpage>390</lpage>. <pub-id pub-id-type="doi">10.1016/B978-044452701-1.00083-1</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<mixed-citation publication-type="book">
<person-group person-group-type="author">
<name>
<surname>Schmitt</surname>
<given-names>R.</given-names>
</name>
<name>
<surname>Pfeifer</surname>
<given-names>T.</given-names>
</name>
</person-group> (<year>2015</year>). <source>Qualit&#xe4;tsmanagement: Strategien - Methoden - Techniken</source>, <volume>5</volume>. <publisher-name>M&#x00FC;nchen: Hanser eLibrary</publisher-name>. <pub-id pub-id-type="doi">10.3139/9783446440821</pub-id>
</mixed-citation>
</ref>
<ref id="B44">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Smucker</surname>
<given-names>B.</given-names>
</name>
<name>
<surname>Krzywinski</surname>
<given-names>M.</given-names>
</name>
<name>
<surname>Altman</surname>
<given-names>N.</given-names>
</name>
</person-group> (<year>2018</year>). <article-title>Optimal experimental design</article-title>. <source>Nat. Methods</source> <volume>15</volume>, <fpage>559</fpage>&#x2013;<lpage>560</lpage>. <pub-id pub-id-type="doi">10.1038/s41592-018-0083-2</pub-id>
<pub-id pub-id-type="pmid">30065369</pub-id>
</mixed-citation>
</ref>
<ref id="B47">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Surjanovic</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Bingham</surname>
<given-names>D.</given-names>
</name>
</person-group> (<year>2021</year>). <article-title>Virtual library of simulation experiments: test functions and datasets</article-title>
</mixed-citation>
</ref>
<ref id="B48">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Tang</surname>
<given-names>B.</given-names>
</name>
</person-group> (<year>1993</year>). <article-title>Orthogonal array-based latin hypercubes</article-title>. <source>J. Am. Stat. Assoc.</source> <volume>88</volume>, <fpage>1392</fpage>&#x2013;<lpage>1397</lpage>. <pub-id pub-id-type="doi">10.1080/01621459.1993.10476423</pub-id>
</mixed-citation>
</ref>
<ref id="B49">
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Wang</surname>
<given-names>X.</given-names>
</name>
<name>
<surname>Jin</surname>
<given-names>Y.</given-names>
</name>
<name>
<surname>Schmitt</surname>
<given-names>S.</given-names>
</name>
<name>
<surname>Olhofer</surname>
<given-names>M.</given-names>
</name>
</person-group> (<year>2022</year>). <article-title>Recent advances in bayesian optimization</article-title>
</mixed-citation>
</ref>
</ref-list>
</back>
</article>