<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Archiving and Interchange DTD v2.3 20070202//EN" "archivearticle.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="systematic-review">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Psychol.</journal-id>
<journal-title>Frontiers in Psychology</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Psychol.</abbrev-journal-title>
<issn pub-type="epub">1664-1078</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fpsyg.2022.1063607</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Psychology</subject>
<subj-group>
<subject>Systematic Review</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>To kill or not to kill: A systematic literature review of high-stakes moral decision-making measures and their psychometric properties</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Ni</surname> <given-names>Benjamin Kai</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<xref ref-type="author-notes" rid="fn002"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1976926/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Burns</surname> <given-names>Bruce D.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Mak</surname> <given-names>Karina K. L.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/645977/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Lah</surname> <given-names>Suncica</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Silva</surname> <given-names>Diego S.</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name><surname>Goldwater</surname> <given-names>Micah B.</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="author-notes" rid="fn003"><sup>&#x02020;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/155590/overview"/>
</contrib>
<contrib contrib-type="author">
<name><surname>Kleitman</surname> <given-names>Sabina</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/30945/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>School of Psychology, University of Sydney</institution>, <addr-line>Sydney, NSW</addr-line>, <country>Australia</country></aff>
<aff id="aff2"><sup>2</sup><institution>School of Public Health, University of Sydney</institution>, <addr-line>Sydney, NSW</addr-line>, <country>Australia</country></aff>
<author-notes>
<fn fn-type="edited-by"><p>Edited by: Cesar Merino-Soto, Universidad de San Martin de Porres, Peru</p></fn>
<fn fn-type="edited-by"><p>Reviewed by: Christine Linda Cook, National Chengchi University, Taiwan; Franca Crippa, University of Milano-Bicocca, Italy</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Benjamin Kai Ni &#x02709; <email>beni6908&#x00040;uni.sydney.edu.au</email></corresp>
<fn fn-type="other" id="fn001"><p>This article was submitted to Quantitative Psychology and Measurement, a section of the journal Frontiers in Psychology</p></fn>
<fn fn-type="other" id="fn002"><p>&#x02020;ORCID: Benjamin Kai Ni <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0002-0197-3854">orcid.org/0000-0002-0197-3854</ext-link></p></fn>
<fn fn-type="other" id="fn003"><p>Micah B. Goldwater <ext-link ext-link-type="uri" xlink:href="https://orcid.org/0000-0001-8052-9497">orcid.org/0000-0001-8052-9497</ext-link></p></fn></author-notes>
<pub-date pub-type="epub">
<day>09</day>
<month>01</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2022</year>
</pub-date>
<volume>13</volume>
<elocation-id>1063607</elocation-id>
<history>
<date date-type="received">
<day>07</day>
<month>10</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>05</day>
<month>12</month>
<year>2022</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2023 Ni, Burns, Mak, Lah, Silva, Goldwater and Kleitman.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Ni, Burns, Mak, Lah, Silva, Goldwater and Kleitman</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license> </permissions>
<abstract>
<sec>
<title>Introduction</title>
<p>The present systematic review investigates the psychological tools available for capturing high-stakes decisions involving life-death content and their psychometric properties. Valid measurement of these individual differences will provide crucial information in the personnel selection and training in fields where high-stakes moral issues exist (e.g., military, medicine). To our knowledge, this is the first systematic examination of such instruments.</p>
</sec>
<sec>
<title>Methods</title>
<p>Systematic searches of 6 electronic databases were conducted according to the PRISMA guidelines. An appraisal tool evaluated the quality of identified measures. Twenty studies met pre-determined inclusion criteria. Moral decision-making was assessed with either a self-report scale (<italic>n</italic> = 3) or moral dilemmas (<italic>n</italic> = 17).</p>
</sec>
<sec>
<title>Results</title>
<p>The findings identified two measures, the Defining Issues Test and the Oxford Utilitarianism Scale as psychometrically sound measures of moral decision-making. However, they are unlikely to be considered &#x0201C;gold standard&#x0201D; measures due to their theoretically specific, but limited, scope. Overall, the findings suggest that research in the area has been scattered. There is a lack of consensus on the definition of moral decision-making, and a lack of cross-validation on how different measures of moral decision-making relate to each other. This presents a gap between theory and empirical measurement in moral decision-making. Further work is needed for a unified conceptualization of moral decision-making to pave the way to both theory development and the development of well-validated measurement tools, and this review provides a critical foundation for both.</p>
</sec></abstract>
<kwd-group>
<kwd>moral decision-making</kwd>
<kwd>moral reasoning</kwd>
<kwd>moral dilemma</kwd>
<kwd>individual difference</kwd>
<kwd>psychometrics</kwd>
<kwd>measurement</kwd>
</kwd-group>
<counts>
<fig-count count="1"/>
<table-count count="5"/>
<equation-count count="0"/>
<ref-count count="53"/>
<page-count count="27"/>
<word-count count="15738"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Our world is full of volatile situations, including the current pandemic and recent wars, in which individuals must make high-stakes moral decisions. For example, in the first wave of the COVID-19 pandemic where medical staff and resources were overwhelmed, doctors and nurses were faced with moral decisions about whether to prioritize younger patients (who have a greater chance of survival) and whether to prohibit family visits to patients in ICU (Kuylen et al., <xref ref-type="bibr" rid="B33">2021</xref>).</p>
<p>Given how important these consequences can be, it is important to ask if there are individual differences in moral decision-making in high-stakes situations which involve life-death decisions, and can we measure such differences? The existence of individual differences would imply that there are distinct and stable patterns in how people think, feel, and, importantly, act morally. Measuring individual differences would allow us to better understand, capture, and predict moral decision-making There are diverse government and private institutions that need to have established protocols of screening and selecting people who face high-stakes moral dilemmas (e.g., dealing with the sick, prisoners, victims of war) often in high-pressure situations. Having standardized and systematic information on measuring not moral reasoning, but moral decision-making is essential to generate and sustain trust in such organizations and to set the ground rules for their personnel. Thus, a systematic assessment of moral decision-making measures is vital as it will provide a much-needed foundation for screening and selection of personnel in fields where encounters with contentious high-stake moral issues are likely, such as military, medical, and legal professions.</p>
<sec>
<title>Moral decision-making: Concept and definition</title>
<p>Moral decision-making refers to any decisions made within the &#x0201C;moral domain&#x0201D;, including judgments, evaluations, and response choices (Smetana, <xref ref-type="bibr" rid="B48">2006</xref>). However, the term &#x0201C;moral decision-making&#x0201D; has not always been used in past research, and instead terms such as &#x0201C;moral reasoning,&#x0201D; &#x0201C;moral judgment,&#x0201D; and &#x0201C;moral cognition&#x0201D; (Garrigan et al., <xref ref-type="bibr" rid="B19">2018</xref>) have been used, sometimes interchangeably. Moral reasoning has also been defined as decision-making that includes moral and ethical components (Bucciarelli et al., <xref ref-type="bibr" rid="B5">2008</xref>; Mart&#x000ED;-Vilar et al., <xref ref-type="bibr" rid="B40">2021</xref>). However, moral decision-making may not be dependent on reasoning and cognition alone (Richardson, <xref ref-type="bibr" rid="B47">2018</xref>), rather emotion and intuition may also play key roles (Greene et al., <xref ref-type="bibr" rid="B22">2001</xref>; Haidt, <xref ref-type="bibr" rid="B23">2001</xref>). Therefore, we consider moral decision-making an umbrella term that encompasses reasoning, emotions, and intuitions regarding ethical and moral questions.</p>
</sec>
<sec>
<title>Which is the focus, action, or actor?</title>
<p>Most contemporary research in moral decision-making has employed an act-based approach (Uhlmann et al., <xref ref-type="bibr" rid="B50">2015</xref>). Specifically, researchers are interested in how individuals come to believe whether an <italic>action</italic> is morally right or wrong. A classic example is the Trolley Dilemma (Foot, <xref ref-type="bibr" rid="B18">1967</xref>) where one must consider whether a trolley, which is on track to kill several people, should be actively diverted to another track where it will kill one person instead. Such sacrificial dilemmas have been adopted from philosophy and used to empirically probe what factors are taken into account in moral decision-making (see Christensen and Gomila, <xref ref-type="bibr" rid="B10">2012</xref> for a review). In the act-based approach it is the characteristics of the situation that are the focus of moral decision making.</p>
<p>In contrast to the act-based approach, recent research suggests that a <italic>person</italic>-centered approach may yield a better understanding of people&#x00027;s moral judgment. This approach focuses on individuals as the unit of analysis for moral evaluations rather than on acts (Uhlmann et al., <xref ref-type="bibr" rid="B50">2015</xref>). This approach proposes that people are fundamentally motivated to acquire information about the moral character of others. Therefore, the features of an act that seem most informative of character often hold more weight than either the consequences of the act or whether a moral rule has been broken. Indeed, there is growing evidence to suggest that when faced with moral judgements people are focused on making inferences about moral character (Pizarro and Tannenbaum, <xref ref-type="bibr" rid="B44">2012</xref>; Goodwin et al., <xref ref-type="bibr" rid="B21">2014</xref>). Such moral character inferences are unlikely to be only a product of the features of the situation but also the traits of the maker of the inferences. This suggests that there could be robust individual differences in moral decision-making. That is, people make <italic>systematic</italic> choices about what is morally right or wrong despite varying situational factors. This paper systematically reviews evidence for individual differences in act-based moral research.</p>
</sec>
<sec>
<title>Sources of individual difference in moral decision-making</title>
<p>There are two influential theories in moral psychology that may elucidate how individuals may differ in moral decision-making&#x02014;Kohlberg&#x00027;s (<xref ref-type="bibr" rid="B31">1984</xref>) Moral Development Theory and Haidt&#x00027;s (<xref ref-type="bibr" rid="B23">2001</xref>) Social Intuitionist Theory. Given that there could be measures of individual differences in moral decision-making that are based on each of these theories, we should first describe them.</p>
<sec>
<title>Moral development theory</title>
<p>Kohlberg&#x00027;s Moral Development Theory posits that moral development entails employing increasingly complex <italic>cognitive</italic> rationales for moral decision-making (Lapsley, <xref ref-type="bibr" rid="B34">1992</xref>). The increasing complexity in <italic>cognitive</italic> processes is detailed in six qualitatively different stages, where moral development entails progression from the first to the last stage (Mathes, <xref ref-type="bibr" rid="B41">2021</xref>).</p>
<p>In the first two stages, known as the pre-conventional stages, moral decision-making is concerned with instrumental purposes. In other words, individuals&#x00027; moral behaviors in these stages are acted out for the purpose of avoiding punishment and obtaining pleasure (Mathes, <xref ref-type="bibr" rid="B41">2021</xref>). In Stages 3 and 4, known as the conventional stages, the moral behaviors of individuals are concerned with social norms and conventions, as well as interpersonal (e.g., family and friends) and social (e.g., authority) approval (Blasi, <xref ref-type="bibr" rid="B3">1990</xref>). In the last two stages, known as the post-conventional stages, moral decision-making is driven by clearly defined moral principles that are independent of the authority of groups holding these principles and one&#x00027;s identification with these groups (&#x000CD;saksson, <xref ref-type="bibr" rid="B26">1979</xref>).</p>
<p>According to Moral Development Theory, individual differences in moral decision-making may arise from differences in moral development and maturity. However, there are several unresolved issues with this approach. First, the Moral Development Theory argues for a universal sequential trajectory of development. Attributing individual differences in moral decision-making merely to levels of moral maturity is almost certainly an oversimplification. Second, the theory&#x00027;s narrow focus on complex cognitive processes potentially excludes other factors (e.g., emotion, intuition) important to moral decision-making. Research on &#x0201C;moral dumbfounding&#x0201D; found that people can judge offensive yet harmless acts (e.g., incest with birth control) to be wrong but are unable to explain their reasoning or provide a justification (Haidt et al.).<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> Therefore, Haidt et al. (see text footnote 1) argued that judgment in moral dumbfounding tasks is based on automatic and intuitive processes (e.g., feelings of rightness or wrongness). Thus, differences in moral decision-making between individuals cannot be explained by <italic>cognitive</italic> processes alone.</p>
<p>Despite these criticisms Kohlberg&#x00027;s theory has been influential on psychological research into moral decision making. As a result, there are measures of individual differences based on this theory that we expect to be part of this systematic review.</p>
</sec>
<sec>
<title>The social intuitionist model</title>
<p>The second influential theory in moral psychology is Haidt&#x00027;s (<xref ref-type="bibr" rid="B23">2001</xref>) Social Intuitionist Model. The Social Intuitionist Model argues against a rationalist model where moral judgments and decisions are reached through complex <italic>cognitive</italic> processes. Instead, Haidt (<xref ref-type="bibr" rid="B23">2001</xref>) argues that moral judgments and decisions are dependent primarily on one&#x00027;s moral <italic>emotions</italic> and <italic>intuitions</italic>. Moral reasoning, in his view, mostly serves as a <italic>post-hoc</italic> process to justify the established moral judgment.</p>
<p>The Social Intuitionist Model argues that moral intuition, much like language, evolved as a major adaptation for a social species while also requiring shaping from social and cultural institutions (Haidt, <xref ref-type="bibr" rid="B23">2001</xref>). Therefore, it is both innate and enculturated. One&#x00027;s moral intuition can be considered a mixed product of innate predispositions (Fiske, <xref ref-type="bibr" rid="B14">1991</xref>, <xref ref-type="bibr" rid="B15">1992</xref>) and a unique developmental environment consisting of family, peers, and culture (Whiting and Child, <xref ref-type="bibr" rid="B52">1953</xref>; Harris, <xref ref-type="bibr" rid="B25">1995</xref>). Variation in people&#x00027;s moral intuitions may provide a basis for individual differences in moral decision-making that are not dependent on reasoning alone.</p>
<p>Emotions also play an important role for individuals in moral decision-making. Haidt (<xref ref-type="bibr" rid="B24">2003</xref>) discusses several families of emotions that are of relevance: other-condemning (contempt, anger, disgust), self-conscious (shame, embarrassment, guilt), other-suffering (sympathy, compassion), other-praising (gratitude, awe, elevation). Haidt (<xref ref-type="bibr" rid="B24">2003</xref>) argues that emotions place the person in a motivational and cognitive state in which there is an increased tendency to engage in actions that fulfill the emotion-related goals (e.g., revenge, comforting). Malti and Krettenauer (<xref ref-type="bibr" rid="B38">2013</xref>) conducted a meta-analysis and found that the ability to attribute emotion to moral actions (e.g., guilt over moral transgression, pride over prosocial actions) is linked to prosocial and antisocial behaviors among children and adolescents. Therefore, variability in the ability and tendencies for a broad range of moral emotions certainly has implications for moral decision-making. Thus, a comprehensive theory of the psychology of moral decision-making should bring these theories together by positing that moral decision-making is a broad construct that encompasses <italic>reasoning, emotions</italic>, and <italic>intuitions</italic>.</p>
<p>A comprehensive systematic literature review, using a standardized quality appraisal tool, is needed to apprehend and evaluate psychometric properties of the different high-stakes moral decision measures that are rooted in the different theories, aiming to clarify and possibly integrate them for future research. The findings will inform theories of moral decision-making, including their key models and definitions. We will first outline the findings from existing literature reviews, including their shortcomings. Second, we will define the key aims of this review. Third, we will situate different measures within their relevant frameworks while evaluating their psychometric properties, providing a key foundation for an informed assessment of their usefulness to capture high-stakes moral decision-making. We will then determine a gold standard measure of moral decision-making using the focus and definition proposed in this review.</p>
</sec>
</sec>
<sec>
<title>The present systematic review</title>
<p>Two influential psychological theories outlined the possibility that people can differ meaningfully in moral decision-making. The next step is to ask how empirical research has tried to measure these differences in moral decision-making.</p>
<sec>
<title>Existing systematic reviews and meta-analyses</title>
<p>To date, two studies have systematically reviewed existing measures of moral decision-making. Villegas de Posada and Vargas-Trujillo&#x00027;s (<xref ref-type="bibr" rid="B51">2015</xref>) meta-analysis found that the development of moral reasoning positively correlated with domain-specific actions (real life, honesty, altruism, and resistance to conformity) and domain-general actions. Mart&#x000ED;-Vilar et al. (<xref ref-type="bibr" rid="B40">2021</xref>) conducted a systematic review of existing moral reasoning measures and their reported psychometric properties. They identified 21 measures that could fall under one of four categories: (1) Kohlbergian Models, (2) Prosocial Moral Reasoning Models, (3) Moral Dilemmas, and (4) Other or Unspecified Models. While 21 measures were identified, only a few measures were represented in most of the studies examined and the rest received limited testing. The Defining Issues Test (Rest, <xref ref-type="bibr" rid="B45">1974</xref>), based on the Kohlbergian Model, was one of the most commonly used measures. While these systematic reviews are informative of the current state of empirical research in moral decision-making, they have two shortcomings that the present review seeks to address. First, the quality of the evidence for the moral decision-making instruments&#x00027; psychometric properties was not evaluated against pre-determined criteria. A standardized criteria framework for measuring the quality of evidence allows for a <italic>systematic</italic> examination of each measure&#x00027;s psychometric properties, as well as a comparison of their relative strengths. One measure may reliably measure a narrow aspect of moral decision-making, whereas another measure captures broader aspects but less reliably. These differences across measures can inform our selection of measures for different purposes.</p>
<p>Second, the studies considered were not only focused on high-stakes (life and death) situations. Instead, they allowed substantial variability in the context in which moral decisions were made (e.g., business, education, medicine, engineering, and science). However, this contextual variability may be problematic. The context often included domain-specific moral issues that are already addressed by guidelines and policies (e.g., there is a &#x0201C;correct&#x0201D; answer determined by an authority) and thus cannot capture meaningful differences in individual choices that do not follow prescribed rules. In contrast, measures of moral decision-making that involve life and death result in dilemmas where there is less consensus on what the right decision or judgment is. Importantly, determining whether systematic individual differences exist here can help us to understand, and predict, moral decision-making and behaviors, and thus aid development of theories of moral decision-making. From the applied perspective, this information provides a key platform for the screening and selection of personnel in various fields where people have to face high-stakes decisions, such as military, medical, and legal professions.</p>
</sec>
<sec>
<title>Systematic review: Aims</title>
<p>Therefore, the present systematic review aims to: (1) identify and examine existing measures of moral decision-making that involve life/death content when no clear and agreed rules exist; (2) evaluate the psychometric properties presented in construction and validation studies against a standardized quality appraisal tool (Terwee et al., <xref ref-type="bibr" rid="B49">2007</xref>); (3) discuss the conceptualization of the construct and assess the usefulness of the identified measures; and (4) ascertain whether a gold standard measure of moral decision-making using the broad definition adopted in this review exists, and if not whether promising measures exist. The present review will follow the PRISMA Statement and guidelines for conducting and reporting systematic reviews (Liberati et al., <xref ref-type="bibr" rid="B35">2009</xref>).</p>
</sec>
</sec>
</sec>
<sec sec-type="methods" id="s2">
<title>Methods</title>
<sec>
<title>Search strategy</title>
<p>Electronic searches were conducted in six databases (see <xref ref-type="fig" rid="F1">Figure 1</xref>): PsycINFO, Web of Science, Scopus, Medline, Embase, and the ProQuest Military Database. These databases were selected based on the focus of this systematic literature review on life/death content, thus we included medical and military databases in addition to the three more general scientific databases. The final search was conducted in all databases on 13th May 2021. Relevant studies were identified using a combination of keywords. PsycINFO, Medline, and Embase also allow searching by subject headings, which are subsequently used to attain additional papers not captured by keyword searches. Generally, the search strategy aims to identify an intersection of studies that focused on (1) moral decision-making, and (2) measurement.</p>
<fig id="F1" position="float">
<label>Figure 1</label>
<caption><p>Flow diagram of the study selection process from systematic searches.</p></caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="fpsyg-13-1063607-g0001.tif"/>
</fig>
<p>Reference lists of all included studies were also manually screened for potentially relevant publications. Additionally, potential validation studies were searched by manually screening studies that cited the original measure construction studies on Google Scholar. The search for additional validation studies through Google Scholar was conducted between 14th July 2021 and 29th July 2021.</p>
</sec>
<sec>
<title>Inclusion criteria</title>
<p>Peer-reviewed journal articles, book chapters, and unpublished dissertations were included in the review if they were an original quantitative research study that developed and/or validated a measure of moral decision-making. Studies were included if their aims were to develop or validate a measure of moral decision-making: (1) contains life/death content, or (2) includes sacrificial moral dilemma(s). Studies were included if the sample consisted of at least 50% adults (i.e., over 18 years of age). Thus, some studies that tested high school students were also included, but this characteristic was recorded. Studies published in the English language were included, regardless of whether the study used a non-English speaking sample, but this characteristic was also recorded.</p>
</sec>
<sec>
<title>Exclusion criteria</title>
<p>Studies were excluded if more than 50% of the sample were not adults, were a from non-peer-reviewed journal, conference proceedings, non-empirical studies, or were not written in the English language. Studies were excluded if the measures of moral decision-making: (1) did not contain life/death measures, or (2) did not include sacrificial moral dilemma(s).</p>
</sec>
<sec>
<title>Selection process</title>
<p>The entire selection process was conducted by BN and KM authors. Search results were initially screened by title and abstract to exclude studies that did not meet the inclusion criteria. For the remaining papers, full-text papers were obtained and evaluated in accordance with the inclusion/exclusion criteria.</p>
</sec>
<sec>
<title>Data extraction and quality assessment</title>
<p>The psychometric properties of all measures in the included studies were assessed using a published quality appraisal tool (Terwee et al., <xref ref-type="bibr" rid="B49">2007</xref>) developed to assess the quality of health status questionnaires&#x00027; validity, reliability, and responsiveness. Although moral decision-making is not in the domain of health status, the quality appraisal tool has been used in reviews that assessed the psychometric properties of individual difference measures (e.g., imposter phenomenon; Mak et al., <xref ref-type="bibr" rid="B37">2019</xref>). Therefore, this measurement framework was considered an appropriate tool for evaluating studies that examined the psychometric properties of moral decision-making measures.</p>
<p>The appraisal framework evaluates nine properties: (1) content validity, (2) internal consistency, (3) criterion validity, (4) construct validity, (5) reproducibility-agreement, (6) reproducibility-reliability, (7) responsiveness, (8) floor or ceiling effects, and (9) interpretability. The definitions and criteria of quality for each psychometric property are displayed in <xref ref-type="table" rid="T1">Table 1</xref>. Similar to Mak et al.&#x00027;s (<xref ref-type="bibr" rid="B37">2019</xref>) study, certain criteria from the original framework were amended due to the nature of the moral decision-making measures. These amendments were noted in <xref ref-type="table" rid="T1">Table 1</xref>. For example, item selection, a criterion of content validity, should only be applied to the original test construction studies and not follow-up validation studies.</p>
<table-wrap position="float" id="T1">
<label>Table 1</label>
<caption><p>Summary of search terms from six databases.</p></caption>
<table frame="box" rules="all">
<thead><tr>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Database</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Search terms</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">PsycINFO</td>
<td valign="top" align="left">((Decision Making <bold>AND</bold> (Morality <bold>OR</bold> Ethics)) <bold>OR</bold> &#x0201C;moral decision making&#x0201D; &#x0201C;ethical decision making&#x0201D; <bold>OR</bold> &#x0201C;moral reasoning&#x0201D;) <bold>AND</bold> (<italic>Measurement</italic><sup>a</sup> OR &#x0201C;moral dilemma&#x0201D;)</td>
</tr> <tr>
<td valign="top" align="left">Embase<sup>b</sup></td>
<td valign="top" align="left">(<italic>Decision-Making</italic> <bold>AND</bold> (<italic>Morals</italic> <bold>OR</bold> <italic>Ethics</italic>)) <bold>AND</bold> (&#x0201C;moral decision making&#x0201D; <bold>OR</bold> &#x0201C;ethical decision making&#x0201D; <bold>OR</bold> &#x0201C;moral reasoning&#x0201D;)</td>
</tr> <tr>
<td valign="top" align="left">Medline<sup>c</sup></td>
<td valign="top" align="left">(<italic>Decision-Making</italic> <bold>AND</bold> (<italic>Morals</italic> <bold>OR</bold> <italic>Ethics</italic>)) <bold>AND</bold> (&#x0201C;moral decision making&#x0201D; <bold>OR</bold> &#x0201C;ethical decision making&#x0201D; <bold>OR</bold> &#x0201C;moral reasoning&#x0201D;)</td>
</tr> <tr>
<td valign="top" align="left">Web of science</td>
<td valign="top" align="left">(&#x0201C;ethical decision making&#x0201D; <bold>OR</bold> &#x0201C;moral decision making&#x0201D; <bold>OR</bold> &#x0201C;moral reasoning&#x0201D;) <bold>AND</bold> (measurement <bold>OR</bold> psychometr&#x0002A;<bold>OR</bold> &#x0201C;moral dilemma&#x0201D;)</td>
</tr> <tr>
<td valign="top" align="left">Scopus</td>
<td valign="top" align="left">(&#x0201C;ethical decision making&#x0201D; <bold>OR</bold> &#x0201C;moral decision making&#x0201D; <bold>OR</bold> &#x0201C;moral reasoning&#x0201D;) <bold>AND</bold> (measurement <bold>OR</bold> psychometr&#x0002A;<bold>OR</bold> &#x0201C;moral dilemma&#x0201D;)</td>
</tr>
<tr>
<td valign="top" align="left">Proquest military database</td>
<td valign="top" align="left">(&#x0201C;ethical decision making&#x0201D; <bold>OR</bold> &#x0201C;moral decision making&#x0201D; <bold>OR</bold> &#x0201C;moral reasoning&#x0201D;) <bold>AND</bold> (measurement <bold>OR</bold> psychometr&#x0002A;<bold>OR</bold> &#x0201C;moral dilemma&#x0201D;)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Subject headings are in <italic>italics</italic>. Keywords are in quotation marks (&#x0201C;&#x0201D;).</p>
<p><sup>a</sup>Related subheadings for measurement are also selected to include more relevant results. For the full syntax of search terms (see <xref ref-type="supplementary-material" rid="SM1">Appendix A</xref>).</p>
<p><sup>b, c</sup>Embase and Medline do not have &#x0201C;Measurement&#x0201D; as a subject heading. Therefore, searches of subject headings and keywords were solely focused on moral decision-making. Relevant papers are screened and selected manually.</p>
</table-wrap-foot>
</table-wrap>
<p>Each assessed criterion received a rating score of &#x0201C;&#x0002B;&#x0201D; as good, &#x0201C;?&#x0201D; as intermediately rated, &#x0201C;&#x02013;&#x0201D; as negatively rated, or a &#x0201C;0&#x0201D; if no information was provided on that criterion for a specific study. An &#x0201C;N/A&#x0201D; (not applicable) rating was assigned for a particular criterion if it is impossible to evaluate the criterion due to the research design used in the study. For example, responsiveness is a criterion assessing how well the measure detects clinically important changes over time, which is not applicable to studies that are non-longitudinal.</p>
<p>The two researchers (BN and KM) independently evaluated each included study and evaluated their psychometric properties against the amended quality framework. Discrepancies in scoring were discussed at calibration meetings to arrive at a consensus.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<p>The initial search returned 2,187 results (including 342 duplicates) but after screening, most excluded because the study: (1) was not a validation study, (2) the measure used was qualitative or not life-or-death related, or (3) not published in the English language. The flow diagram in <xref ref-type="fig" rid="F1">Figure 1</xref> documents the review process.</p>
<p>Overall, we were left with 20 studies to fully evaluate. The identified measures in these studies generally adopted one of two formats: moral dilemmas or self-report scales. Moral dilemma measures were used in 16 included studies. The main moral dilemma measures identified included: (1) the Defining Issues Test (Rest, <xref ref-type="bibr" rid="B45">1974</xref>) and its revised versions, (2) measures using the Process Dissociation (PD) Model (Conway and Gawronski, <xref ref-type="bibr" rid="B12">2013</xref>), and (3) measures using the Consequences, Norms, Inaction (CNI) Model (Gawronski et al., <xref ref-type="bibr" rid="B20">2017</xref>). Three self-report measures were identified in the remaining three studies. <xref ref-type="table" rid="T2">Table 2</xref> describes the included studies organized by the type of measurement and ascending year of publication within the same group.</p>
<table-wrap position="float" id="T2">
<label>Table 2</label>
<caption><p>Adapted criteria for quality of psychometric properties and scoring system (Terwee et al., <xref ref-type="bibr" rid="B49">2007</xref>).</p></caption>
<table frame="box" rules="all">
<thead><tr>
<th style="background-color:#919497"/>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Property</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Definition</bold></th>
<th style="background-color:#919497"/>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Quality criteria</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Criteria amendment</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">1</td>
<td valign="top" align="left">Content validity</td>
<td valign="top" align="left">The extent to which the domain of interest is comprehensively sampled by the items in the questionnaire</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">A clear description is provided of the measurement aim, the target population, the concepts that are being measured, and the item selection <bold>AND</bold> target population and (investigators OR experts) were involved in item selection;</td>
<td valign="top" align="left">(1) Target population - clear description of the <bold>sample characteristics</bold> (e.g., undergraduate students, M and SD and/or range of age, gender).<break/> (2) Item selection should be theoretically driven - only relevant to test construction papers.</td>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">?</td>
<td valign="top" align="left">A clear description of above-mentioned aspects is lacking <bold>OR</bold> only target population involved <bold>OR</bold> doubtful design or method;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">&#x02212;</td>
<td valign="top" align="left">No target population involvement;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">0</td>
<td valign="top" align="left">No information found on target population involvement.</td>
<td/>
</tr> <tr>
<td valign="top" align="left">2</td>
<td valign="top" align="left">Internal consistency</td>
<td valign="top" align="left">The extent to which items in a (sub)scale are intercorrelated, thus measuring the same construct</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">Factor analyses performed on adequate sample size (7 &#x0002A; &#x00023; items and &#x02265;100) AND Cronbach&#x00027;s alpha(s) calculated per dimension AND Cronbach&#x00027;s alpha(s) between 0.70 and 0.95;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">?</td>
<td valign="top" align="left">No factor analysis OR doubtful design or method;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">&#x02212;</td>
<td valign="top" align="left">Cronbach&#x00027;s alpha(s) &#x0003C; 0.70 or &#x0003E;0.95, despite adequate design and method;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">0</td>
<td valign="top" align="left">No information found on internal consistency.</td>
<td/>
</tr> <tr>
<td valign="top" align="left">3</td>
<td valign="top" align="left">Criterion validity</td>
<td valign="top" align="left">The extent to which scores on a particular questionnaire relate to a gold standard</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">Convincing arguments that gold standard is &#x0201C;gold&#x0201D; AND correlation with gold standard &#x02265; 0.70;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">?</td>
<td valign="top" align="left">No convincing arguments that gold standard is &#x0201C;gold&#x0201D; OR doubtful design or method;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">&#x02212;</td>
<td valign="top" align="left">Correlation with gold standard &#x0003C; 0.70, despite adequate design and method;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">0</td>
<td valign="top" align="left">No information found on criterion validity.</td>
<td valign="top" align="left">There is a gold standard that the researchers haven&#x00027;t referred to</td>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">N/A</td>
<td/>
<td valign="top" align="left">No gold standard mentioned</td>
</tr>
<tr>
<td valign="top" align="left">4</td>
<td valign="top" align="left">Construct validity</td>
<td valign="top" align="left">The extent to which scores on a particular questionnaire relate to other measures in a manner that is consistent with theoretically derived hypotheses concerning the concepts that are being measured</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">Specific hypotheses were formulated AND at least 75% of the results are in accordance with these hypotheses;</td>
<td valign="top" align="left">(1) Main hypothesis confirmed.<break/> (2) 50% instead of 75% are in accordance with these hypotheses.<break/> (3) Hypotheses should be about proposed relationships between the measure and other theoretically related constructs or about proposed group differences as opposed to hypothesized factor structure. (4)<break/> Statements of examining convergent and divergent validity are sufficient to be considered as hypotheses when assessing construct validity.</td>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">?</td>
<td valign="top" align="left">Doubtful design or method (e.g., no hypotheses);</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">&#x02212;</td>
<td valign="top" align="left">Less than 75% of hypotheses were confirmed, despite adequate design and methods;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">0</td>
<td valign="top" align="left">No information found on construct validity.</td>
<td valign="top" align="left">No hypotheses or exploratory hypotheses only.</td>
</tr> <tr>
<td valign="top" align="left">5</td>
<td valign="top" align="left">Reproducibility: agreement</td>
<td valign="top" align="left">The extent to which the scores on repeated measures are close to each other (absolute measurement error)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">MIC &#x0003C; SDC OR MIC outside the LOA OR convincing arguments that agreement is acceptable;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">?</td>
<td valign="top" align="left">Doubtful design or method OR (MIC not defined AND no convincing arguments that agreement is acceptable);</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">&#x02212;</td>
<td valign="top" align="left">MIC &#x02265; SDC OR MIC equals or inside LOA, despite adequate design and method;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">0</td>
<td valign="top" align="left">No information found on agreement.</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">N/A</td>
<td/>
<td valign="top" align="left">Study is non-longitudinal.</td>
</tr> <tr>
<td valign="top" align="left">6</td>
<td valign="top" align="left">Reproducibility: reliability</td>
<td valign="top" align="left">The extent to which patients<xref ref-type="table-fn" rid="TN1"><sup>a</sup></xref> can be distinguished from each other, despite measurement errors (relative measurement error)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">ICC or weighted Kappa &#x02265; 0.70;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">?</td>
<td valign="top" align="left">Doubtful design or method (e.g., time interval not mentioned);</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">&#x02212;</td>
<td valign="top" align="left">ICC or weighted Kappa &#x0003C; 0.70, despite adequate design and method;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">0</td>
<td valign="top" align="left">No information found on reliability.</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">N/A</td>
<td/>
<td valign="top" align="left">Study is non-longitudinal.</td>
</tr> <tr>
<td valign="top" align="left">7</td>
<td valign="top" align="left">Responsiveness</td>
<td valign="top" align="left">The ability of a questionnaire to detect clinically<xref ref-type="table-fn" rid="TN1"><sup>a</sup></xref> important changes over time</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">SDC or SDC &#x0003C; MIC OR MIC outside the LOA OR RR &#x0003E; 1.96 OR AUC &#x02265; 0.70;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">?</td>
<td valign="top" align="left">Doubtful design or method;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">&#x02212;</td>
<td valign="top" align="left">SDC or SDC &#x02265; MIC OR MIC equals or inside LOA OR RR &#x02264; 1.96 OR AUC &#x0003C; 0.70, despite adequate design and methods;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">0</td>
<td valign="top" align="left">No information found on responsiveness.</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">N/A</td>
<td/>
<td valign="top" align="left">Study is non-longitudinal.</td>
</tr> <tr>
<td valign="top" align="left">8</td>
<td valign="top" align="left">Floor and ceiling effects</td>
<td valign="top" align="left">The number of respondents who achieved the lowest or highest possible score</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left"> &#x02264; 15% of the respondents achieved the highest or lowest possible scores;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">?</td>
<td valign="top" align="left">Doubtful design or method;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">&#x02212;</td>
<td valign="top" align="left">&#x0003E;15% of the respondents achieved the highest or lowest possible scores, despite adequate design and methods;</td>
<td/>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">0</td>
<td valign="top" align="left">No information found on interpretation.</td>
<td/>
</tr> <tr>
<td valign="top" align="left">9</td>
<td valign="top" align="left">Interpretability</td>
<td valign="top" align="left">The degree to which one can assign qualitative meaning to quantitative scores</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">Mean and SD scores presented of at least four relevant subgroups of patients and MIC defined;</td>
<td valign="top" align="left">Relevant subgroups = groups that differ in meaningful ways (e.g., demographics, between-subjects experimental manipulation).</td>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">?</td>
<td valign="top" align="left">Doubtful design or method OR less than four subgroups OR no MIC defined;</td>
<td valign="top" align="left">Incomplete presentation of means and SD scores</td>
</tr>
<tr>
<td/>
<td/>
<td/>
<td valign="top" align="left">0</td>
<td valign="top" align="left">No information found on interpretation.</td>
<td/>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>Adapted with permission from &#x0201C;Impostor phenomenon measurement scales: a systematic review&#x0201D;, by Mak et al. (<xref ref-type="bibr" rid="B37">2019</xref>).</p>
<fn id="TN1"><p><sup>a</sup>These terms are used in Terwee et al. (<xref ref-type="bibr" rid="B49">2007</xref>) but were interpreted more broadly here.</p></fn>
<p><italic>M</italic>, mean; SD, standard deviation; MIC, minimal important change (smallest difference in score in the domain of interest which patients perceive as beneficial and would agree to, in the absence of side effects and excessive costs); SDC, smallest detectable change (smallest within person change, above measurement error. A positive is given when SDC or the limits of agreement are smaller than the MIC); LOA, limits of agreement; ICC, intraclass correlation; RR, responsiveness ratio; AUC, area under the curve; &#x0002B;, Positive rating; ?, Intermediate rating; &#x02212;, Negative rating; 0, No information provided; N/A, Not applicable.</p>
</table-wrap-foot>
</table-wrap>
<p>It is important to notice that there is substantial variability in what the moral decision-making measures aim to measure and their theoretical basis. This implies that there is a lack of consensus regarding the construct of moral decision-making, and therefore each measure may only capture some of its dimensions. The next section gives a brief description of the identified measures.</p>
<sec>
<title>Measures based on moral dilemmas</title>
<sec>
<title>The Defining Issues Test and revised versions</title>
<p>The Defining Issues Test (DIT; Rest, <xref ref-type="bibr" rid="B45">1974</xref>) and its revised versions, the DIT-2 (Rest et al., <xref ref-type="bibr" rid="B46">1999</xref>), and the behavioral Defining Issues Test (bDIT; Choi et al., <xref ref-type="bibr" rid="B8">2019</xref>), were used in seven studies. The Defining Issues Test was based on Kohlberg&#x00027;s (<xref ref-type="bibr" rid="B31">1984</xref>) Moral Development Theory. The DIT consists of six sacrificial dilemma stories. After each story, the participant is given a list of reasons for (e.g., sacrificing a life to save more lives) or against an action (e.g., not killing anyone even if it saves others) and asked to rank and rate the importance of each reason. These reasons stem from Kohlberg&#x00027;s stages (2&#x02013;6) of moral reasoning and can be grouped into three categories: personal interests (Stage 2), maintaining social norms (Stages 3 and 4), or post-conventional perspectives (Stages 5A, 5B, and 6). The DIT quantifies a person&#x00027;s moral development by their likelihood of endorsing post-conventional reasons (i.e., the P score). While the DIT produces several scores representing reliance on each stage of moral development (i.e., stage scores from stages 2&#x02013;6), the P score is the most widely used index (Rest et al., <xref ref-type="bibr" rid="B46">1999</xref>).</p>
<p>The DIT-2 contains five of the six dilemmas from the original DIT with updated language and generates the N2 instead of P score. The N2 score, like the P score, considers the preference for post-conventional reasoning. In addition, the N2 score takes into account the disagreement with less sophisticated schemas (Rest et al., <xref ref-type="bibr" rid="B46">1999</xref>).</p>
<p>The bDIT contains three dilemmas and measures behavioral responses, such as reaction time (Choi et al., <xref ref-type="bibr" rid="B8">2019</xref>). Instead of rating the importance of reasons for action/inaction, participants were given a limited amount of time to select one of the three presented behavioral responses. These three responses represent the three moral schemas: Personal Interest, Maintaining Norms, and Post-Conventional Reasoning.</p>
</sec>
<sec>
<title>Measures using the Process Dissociation Model</title>
<p>A set of moral dilemmas that evaluates a person&#x00027;s inclinations for utilitarianism and deontology separately was created by Conway and Gawronski (<xref ref-type="bibr" rid="B12">2013</xref>). This set of moral dilemmas was based on the Process Dissociation (PD) Model (Jacoby, <xref ref-type="bibr" rid="B27">1991</xref>). Psychologists typically define utilitarianism as the principle whereby the morality of an action is determined by its consequences (Conway and Gawronski, <xref ref-type="bibr" rid="B12">2013</xref>). On the other hand, deontology is defined as the principle that the morality of an action is determined by its intrinsic nature (e.g., causing harm is wrong regardless of the consequences). While earlier sacrificial dilemmas pit utilitarianism against deontology (e.g., Greene et al., <xref ref-type="bibr" rid="B22">2001</xref>), Conway and Gawronski (<xref ref-type="bibr" rid="B12">2013</xref>) argued that the endorsement of one does not necessarily imply a rejection of the other. Therefore, participants&#x00027; ratings of the appropriateness of action in 20 dilemmas were analyzed using process dissociation to extract inclinations toward both utilitarianism and deontology. Jang (<xref ref-type="bibr" rid="B28">2020</xref>) translated the PD into Korean and conducted a study to validate the measure.</p>
</sec>
<sec>
<title>Measures using the CNI model</title>
<p>The CNI model (Gawronski et al., <xref ref-type="bibr" rid="B20">2017</xref>) further developed the Process Dissociation Model by addressing another problem with the traditional approach. In addition to the two inclinations underlined by the Process Dissociation Model, Gawronski et al. (<xref ref-type="bibr" rid="B20">2017</xref>) argued that there is a third component, a general tendency for inaction, that may play a role in moral decision-making. In a morally ambiguous situation, a person may prefer to not act because they do not want to inject themselves into events, rather than due to a strong inclination toward deontology or utilitarianism. In traditional moral dilemmas, the action always leads to sacrificial killing, which conflates with a preference for inaction. Using the multinomial processing tree method, Gawronski et al. (<xref ref-type="bibr" rid="B20">2017</xref>) developed the CNI model, which is a new set of 24 dilemmas that measured participants&#x00027; sensitivity to <bold>C</bold>onsequences (inclination for utilitarianism in the Process Dissociation Model), sensitivity to <bold>N</bold>orms (inclination for deontology in the Process Dissociation Model), and a general tendency for Inaction. K&#x000F6;rner et al. (<xref ref-type="bibr" rid="B32">2020</xref>) expanded the battery from 24 to 48 to improve its suitability in individual difference research.</p>
</sec>
<sec>
<title>Other moral dilemmas</title>
<p>The remaining moral dilemma studies each identified one measure. Bore&#x00027;s (<xref ref-type="bibr" rid="B4">2001</xref>) Morality of Justice and Care (MOJAC) scale conceptualized moral dilemmas as the conflict between the rights of the individual (e.g., stealing a drug to save one&#x00027;s sick wife) and the rights of the collective (e.g., stealing is wrong).</p>
<p>Christensen et al. (<xref ref-type="bibr" rid="B9">2014</xref>) systematically developed a battery of moral dilemmas based on four conceptually meaningful factors: personal force, benefit recipient, evitability, and intentionality. Additionally, contextual factors such as the word count, framing, situational antecedents, number of individuals involved, types of trade-off (e.g., killing vs. stealing, lying), and whether your action will be known to others, were controlled for. Christensen et al. (<xref ref-type="bibr" rid="B9">2014</xref>) were interested in whether their conceptual factors influenced participants&#x00027; decisions, arousal, valence, and reaction times.</p>
<p>Fleischhut et al. (<xref ref-type="bibr" rid="B17">2017</xref>) investigated the effect of hindsight in moral decision-making. They were interested in how moral decisions are influenced if participants had information on their actions&#x00027; consequences. Fleischhut et al. (<xref ref-type="bibr" rid="B17">2017</xref>) generated dilemmas in which actions to avert negative outcomes had probable side effects, and then created three information conditions. In the <italic>foresight</italic> condition, participants were provided with no further information and asked for a decision. In the <italic>hindsight-good</italic> and <italic>hindsight-bad</italic> conditions, participants were given additional information stating that the negative side effects either occurred (<italic>bad</italic> condition) or did not occur (<italic>good</italic> condition). Participants were asked to judge the permissibility of the action and the probability of the negative side effect occurring in the future.</p>
<p>Kimhi (<xref ref-type="bibr" rid="B30">2014</xref>) developed moral dilemmas in war-related scenarios (e.g., whether to open fire on the enemy at the risk of harming civilians). Participants&#x00027; decisions, their perceived appropriateness, confidence, the difficulty of their decisions, and the estimated probability of specific outcomes (e.g., civilians being killed) were measured.</p>
<p>Lotto et al. (<xref ref-type="bibr" rid="B36">2014</xref>) investigated the effects of intention and self-involvement in moral decision-making. They constructed 75 moral dilemmas consisting of 30 &#x0201C;instrumental dilemmas,&#x0201D; 30 &#x0201C;incidental dilemmas,&#x0201D; and 15 fillers. Instrumental dilemmas described killing an individual as a means to save others (e.g., killing and taking an innocent person&#x00027;s organs to treat five patients in need of transplants). On the other hand, incidental dilemmas described killing an individual as a foreseen but unintended consequence (e.g., switching the trolley onto another track where there is another worker). Additionally, approximately half of the dilemmas in each condition were self-involved (i.e., killing saves one&#x00027;s own life and others), and half were other-involved (i.e., killing saves others only). Lotto et al. (<xref ref-type="bibr" rid="B36">2014</xref>) were interested in the effects of intention and self-involvement on participants&#x00027; decisions, their rating of an action&#x00027;s moral acceptability, and their affective reactions.</p>
<p>Carmona-Perera et al. (<xref ref-type="bibr" rid="B6">2013</xref>) translated and adapted the moral dilemmas from Greene et al. (<xref ref-type="bibr" rid="B22">2001</xref>). The moral dilemmas were adapted to investigate brain activities when participants were dealing with morally conflicting situations. The battery of dilemmas consisted of three groups: non-moral stories, moral-impersonal stories (e.g., flipping a switch to divert the trolley from killing five workers), and moral-personal stories (e.g., pushing a man off the bridge to stop the trolley from killing five workers). More personal moral dilemmas were expected to be more conflicting and associated with both less willingness to take action and heightened brain activity. The personal dilemmas were further divided into high-conflict (dilemmas that had low consensus on the appropriate decision in previous studies) and low-conflict (dilemmas that had high consensus). Carmona-Perera et al. (<xref ref-type="bibr" rid="B6">2013</xref>) were interested in the decisions participants would make, the difficulty they felt when making the decision, and the proportion of congruent decisions as an index of rationality (e.g., saying no to risky investment decisions).</p>
</sec>
</sec>
<sec>
<title>Description of self-report scales</title>
<sec>
<title>ABB scale</title>
<p>The ABB scale, named after the initials of the authors&#x02014;Abdellaoui et al. (<xref ref-type="bibr" rid="B1">2016</xref>)&#x02014;was created to measure people&#x00027;s judgments on personal, conventional, and moral transgressions. For each type of transgression, four scenarios were given and participants rated how serious and how defensible the action is, and whether the transgressor should be rejected.</p>
</sec>
<sec>
<title>Oxford Utilitarianism Scale</title>
<p>Kahane et al.&#x00027;s (<xref ref-type="bibr" rid="B29">2018</xref>) Oxford Utilitarianism Scale aimed to measure two aspects of utilitarianism. The first aspect, instrumental harm, measures whether individuals find causing harm permissible if it leads to more moral good overall. The second aspect, impartial beneficence, assesses whether individuals maximize overall moral goodness even if it conflicts with self-interest (e.g., donating one&#x00027;s majority of income to charity). Kahane et al. (<xref ref-type="bibr" rid="B29">2018</xref>) argued that existing moral dilemma measures predominantly focused on <italic>instrumental harm</italic> but overlooked <italic>impartial beneficence</italic>. To address this gap, the Oxford Utilitarianism Scale measures both of these factors.</p>
</sec>
<sec>
<title>Punishment Orientation Questionnaire</title>
<p>The Punishment Orientation Questionnaire (POQ; Yamamoto and Maeder, <xref ref-type="bibr" rid="B53">2019</xref>) aimed to measure what principles people engage with when thinking about punishment. The POQ captures two general principles that underlie the motivations behind punishment&#x02014;utilitarianism (i.e., deterrence of future transgression) and retributivism (i.e., an eye for an eye). Furthermore, each principle is divided into a Prohibitive dimension and a Permissive dimension, resulting in four subscales: (1) Prohibitive Utilitarianism (limiting punishment based on utility), (2) Prohibitive Retributivism (aversion to punishing if it means hurting innocent people), (3) Permissive Utilitarianism (willingness to give harsh punishment based on the benefits thereof), and (4) Permissive Retributivism (desire for just desserts).</p>
</sec>
</sec>
<sec>
<title>Assessment of psychometric properties</title>
<p>The assessment of psychometric properties was conducted in accordance with the amended version of the quality appraisal framework defined by Terwee et al. (<xref ref-type="bibr" rid="B49">2007</xref>). Two reviewers independently rated each included study against the nine psychometric properties of the appraisal framework (Terwee et al., <xref ref-type="bibr" rid="B49">2007</xref>). Agreement between the two reviewers on the criteria of adequacy was 87.78% and this equates to a Kappa of <italic>k</italic> = 0.87. Kappa is an inter-rater agreement statistic that controls for the agreement expected based on chance alone and a Kappa of 0.87 represents a substantial degree of agreement between raters (Cohen, <xref ref-type="bibr" rid="B11">1960</xref>). <xref ref-type="table" rid="T3">Table 3</xref> presents detailed information on the measures&#x00027; factorial structure, reliability estimates, and findings in relation to other variables or group differences.</p>
<table-wrap position="float" id="T3">
<label>Table 3</label>
<caption><p>Included study descriptions.</p></caption>
<table frame="box" rules="all">
<thead><tr>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>References</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Measures</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Study type</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Statistical analysis</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Number of scenarios/items</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Questions asked</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Study population</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Age (mean)</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Sex ratio<xref ref-type="table-fn" rid="TN2"><sup>a</sup></xref></bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="9" style="background-color:#e0e1e3"><bold>Moral dilemmas</bold></td>
</tr> <tr>
<td valign="top" align="left" colspan="9" style="background-color:#e0e1e3"><bold>DIT</bold></td>
</tr> <tr>
<td valign="top" align="left">Martin et al. (<xref ref-type="bibr" rid="B39">1977</xref>)</td>
<td/>
<td valign="top" align="left">Validation</td>
<td valign="top" align="left">ANOVA</td>
<td valign="top" align="left">Six scenarios</td>
<td valign="top" align="left">Importance on 12 items for each scenario</td>
<td valign="top" align="left">Sample 1: 60 junior high school, sample 2: 200 high school students, sample 3: 105 college students</td>
<td valign="top" align="left">Sample 1: 13.9 years, sample 2: 17.3 years, sample 3: 20.2 years</td>
<td valign="top" align="left">Sample 1: 33 males, 27 females, sample 2: 93 males, 107 females, sample 4: 34 males, 71 females</td>
</tr> <tr>
<td valign="top" align="left">Davison and Robbins (<xref ref-type="bibr" rid="B13">1978</xref>)</td>
<td/>
<td valign="top" align="left">Validation</td>
<td valign="top" align="left">Cronbach&#x00027;s &#x003B1;, test-retest reliability, <italic>t</italic>-test, correlation</td>
<td valign="top" align="left">Six scenarios</td>
<td valign="top" align="left">Importance on 12 items for each scenario</td>
<td valign="top" align="left">1,703 from six samples including high school students, undergraduate and graduate students, and adults</td>
<td valign="top" align="left">Ranged 15&#x02013;82 years</td>
<td valign="top" align="left">Most samples reported to have approximately even split between males and females.</td>
</tr> <tr>
<td valign="top" align="left" colspan="9" style="background-color:#e0e1e3"><bold>DIT-2</bold></td>
</tr> <tr>
<td valign="top" align="left">Rest et al. (<xref ref-type="bibr" rid="B46">1999</xref>)</td>
<td/>
<td valign="top" align="left">Adaptation</td>
<td valign="top" align="left">ANOVA, correlation, Cronbach&#x00027;s &#x003B1;, regression, <italic>t</italic>-test</td>
<td valign="top" align="left">Five scenarios</td>
<td valign="top" align="left">Importance on 12 items for each scenario</td>
<td valign="top" align="left">Sample 1: 47 ninth-grade students, sample 2: 35 senior high graduates, new freshmen, sample 3: 65 college seniors, sample 4: 53 graduate school and professional school students</td>
<td valign="top" align="left">Sample 1: 14.64 years, SD = 0.53, sample 2: 18.51 years, SD = 2.03, sample 3: 21.55 years, SD = 3.11, sample 4: 29.06 years, SD = 5.90</td>
<td valign="top" align="left">Sample 1: 34% female, sample 2: 77% female, sample 3: 77% female, sample 4: 45% female</td>
</tr> <tr>
<td valign="top" align="left">Mitchell (<xref ref-type="bibr" rid="B43">2000</xref>)</td>
<td/>
<td valign="top" align="left">Validation</td>
<td valign="top" align="left">ANOVA, factor analysis, reliability, ANCOVA, correlation</td>
<td valign="top" align="left">Five scenarios</td>
<td valign="top" align="left">Importance on 12 items for each scenario</td>
<td valign="top" align="left">1,534 consisted of 26 samples collected by a third-party research center from 1998 to 1999.</td>
<td/>
<td valign="top" align="left">606 males, 904 females</td>
</tr> <tr>
<td valign="top" align="left">Mayhew et al. (<xref ref-type="bibr" rid="B42">2015</xref>)</td>
<td/>
<td valign="top" align="left">Validation</td>
<td valign="top" align="left">Regression</td>
<td valign="top" align="left">Five scenarios</td>
<td valign="top" align="left">Action, 12-item scale</td>
<td valign="top" align="left">923 (first-year undergraduates in the US)</td>
<td valign="top" align="left">Not reported</td>
<td valign="top" align="left">38.4% male, 61.6% female</td>
</tr> <tr>
<td valign="top" align="left">Choi et al. (<xref ref-type="bibr" rid="B7">2020</xref>)</td>
<td/>
<td valign="top" align="left">Validation</td>
<td valign="top" align="left">CFA</td>
<td valign="top" align="left">Five scenarios</td>
<td valign="top" align="left">Importance on 12 items for each scenario</td>
<td valign="top" align="left">39,409 (US citizens in university, collected by a third-party research center between 2000 and 2009)</td>
<td valign="top" align="left">Ranged 17&#x02013;26 years</td>
<td valign="top" align="left">21,139 males (47.2%), 23,272 females (52%)</td>
</tr>
<tr>
<td valign="top" align="left" colspan="9" style="background-color:#e0e1e3"><bold>bDIT</bold></td>
</tr> <tr>
<td valign="top" align="left">Choi et al. (<xref ref-type="bibr" rid="B8">2019</xref>)</td>
<td/>
<td valign="top" align="left">Adaptation</td>
<td valign="top" align="left">Reliability (tetrachoric correlation), differential item functioning analysis, logistic regression, ANOVA</td>
<td valign="top" align="left">Three scenarios</td>
<td valign="top" align="left">Behavioral decision and eight questions asking the rationale behind decision for each story</td>
<td valign="top" align="left">353 (introductory psychology students in the US)</td>
<td valign="top" align="left">18.64 years, SD = 1.20</td>
<td valign="top" align="left">81 males, 271 females</td>
</tr> <tr>
<td valign="top" align="left" colspan="9" style="background-color:#e0e1e3"><bold>Process Dissociation Model</bold></td>
</tr> <tr>
<td valign="top" align="left">Conway and Gawronski (<xref ref-type="bibr" rid="B12">2013</xref>)</td>
<td/>
<td valign="top" align="left">Construction</td>
<td valign="top" align="left">S1: <italic>t</italic>-test, correlation, regression, S2: <italic>t</italic>-test, correlation, ANOVA, S3: <italic>t</italic>-test, ANOVA</td>
<td valign="top" align="left">20 scenarios</td>
<td valign="top" align="left">S1: appropriateness, S2: appropriateness, S3: appropriateness</td>
<td valign="top" align="left">S1: 112 (undergraduate students), S2: 57 (undergraduate students), S3: 275 (MTurk)</td>
<td valign="top" align="left">S1: 19.23 years, SD = 5.20, S2: 18.37 years, SD = 0.96, S3: 34.08 years, SD = 11.73</td>
<td valign="top" align="left">S1: 30 males, 82 females, S2: 28 males, 29 females, S3: 118 males, 156 females</td>
</tr> <tr>
<td valign="top" align="left">Jang (<xref ref-type="bibr" rid="B28">2020</xref>)</td>
<td/>
<td valign="top" align="left">Validation</td>
<td valign="top" align="left">Correlation, Mann-Whitney test</td>
<td valign="top" align="left">20 scenarios</td>
<td valign="top" align="left">Appropriateness, probability of taking action, how happy</td>
<td valign="top" align="left">465 (Korean adults)</td>
<td valign="top" align="left">31.37 years, SD = 14.20</td>
<td valign="top" align="left">163 males, 300 females</td>
</tr> <tr>
<td valign="top" align="left" colspan="9" style="background-color:#e0e1e3"><bold>CNI model</bold></td>
</tr> <tr>
<td valign="top" align="left">Gawronski et al. (<xref ref-type="bibr" rid="B20">2017</xref>)</td>
<td/>
<td valign="top" align="left">Construction</td>
<td valign="top" align="left">S1: <italic>t</italic>-test, <italic>g</italic>-test, S2: <italic>t</italic>-test, <italic>g</italic>-test, S3: <italic>t</italic>-test, <italic>g</italic>-test, S4: <italic>t</italic>-test, <italic>g</italic>-test</td>
<td valign="top" align="left">24 scenarios</td>
<td valign="top" align="left">S1: acceptability, S2: acceptability, S3: acceptability, action, S4: acceptability</td>
<td valign="top" align="left">S1a: 201 (MTurk), S1b: 197 (MTurk), S2a: 194 (MTurk), S2b: 194 (MTurk), S3a: 186 (MTurk), S3b: 189 (MTurk), S4a: 184 (MTurk), S4b: 198 (MTurk)</td>
<td valign="top" align="left">S1a: 32.20 years, SD = 10.96, S1b: 35.77 years, SD = 11.47, S2a: 34.26 years, SD = 11.90, S2b: 36.36 years, SD = 12.40, S3a: 35.77 years, SD = 12.79, S3b: 34.72 years, SD = 10.69, S4a: not reported, S4b: not reported</td>
<td valign="top" align="left">S1a: 106 males, 95 females, S1b: 95 males, 102 females, S2a: 96 males, 97 females, S2b: 103 males, 91 females, S3a: 86 males, 100 females, S3b: 91 males, 98 females, S4a: not reported, S4b: not reported</td>
</tr> <tr>
<td valign="top" align="left">K&#x000F6;rner et al. (<xref ref-type="bibr" rid="B32">2020</xref>)</td>
<td/>
<td valign="top" align="left">Adaptation, validation</td>
<td valign="top" align="left">S1: correlation, S2: correlation</td>
<td valign="top" align="left">48 scenarios</td>
<td valign="top" align="left">S1: acceptability, S2: action</td>
<td valign="top" align="left">S1a: 161 (MTurk), S1b: 177 (MTurk), S2a: 196 (MTurk), S2b: 189 (MTurk)</td>
<td valign="top" align="left">S1a: 37 years, SD = 11, S1b: 33 years, SD = 9, S2a: 35 years, SD = 10, S2b: 34 years, SD = 9</td>
<td valign="top" align="left">S1a: 84 males, 72 females, S1b: 105 males, 65 females, S2a: 93 males, 102 females, S2b: 96 males, 90 females</td>
</tr>
<tr>
<td valign="top" align="left" colspan="9" style="background-color:#e0e1e3"><bold>Other moral dilemmas</bold></td>
</tr> <tr>
<td valign="top" align="left">Bore (<xref ref-type="bibr" rid="B4">2001</xref>)</td>
<td/>
<td valign="top" align="left">Construction, adaptation, validation</td>
<td valign="top" align="left">S1: Cronbach&#x00027;s &#x003B1;, PCA, correlation, S2: Cronbach&#x00027;s &#x003B1;, PCA, correlation, S3: Cronbach&#x00027;s &#x003B1;, PCA, correlation, S4: Cronbach&#x00027;s &#x003B1;, <italic>t</italic>-test, correlation, second-order factor analysis (varimax rotation), S5: Cronbach&#x00027;s &#x003B1;, <italic>t</italic>-test, ANOVA, correlation, test-retest reliability, PCA, S6: <italic>t</italic>-test, S7: Cronbach&#x00027;s &#x003B1;, ANOVA, S8a: Cronbach&#x00027;s &#x003B1;, <italic>t</italic>-test, regression, ANOVA, S8b: Interview (non-quantitative), S9: correlation, regression (stepwise), PCA</td>
<td valign="top" align="left">S1: 35 items (three scenarios), S2: 45 items (four scenarios), S3-6: 24 items (three scenarios), S7: 24 items, except for the New Zealand samples, which completed the 45-item version, S8a: 45 items, S8b: 45 items</td>
<td valign="top" align="left">Decision items (e.g., action should be taken/is prohibited/is not important)</td>
<td valign="top" align="left">S1: 882 (Medical school applicants), S2: 2,906 (Medical school applicants), S3: 2,862 (Medial school applicants), S4: 84 (first year psychology students), 82 (Medical school applicants), S5: 232 (Bachelor of Medicine students), S6: 16 (ethical clinicians), S7: 2,862 (sample from S3), 113 (New Zealand medical students, sample A), 123 (New Zealand medical students, sample B), 360 (Israel medical school applicants, sample A), 626 (Israel medical school applicants, sample B), 67 (Fiji medical students), S8a: 58 (Medical school applicants), S8b: 45 (sample from S8a), S9: 113 (Medical students, sample A), 123 (Medical students, sample B)</td>
<td valign="top" align="left">S1: 19.3 years, SD = 3.9, S2: 18.6 years, SD = 3.3, S3: 19.9 years, SD = 4.8, S4: 22.5 years, SD = 8.2, S5: 23.6 years, SD = 5.0, S6: Not reported, S7: 19.9 years, SD = 4.8 (sample from S3), 20.2 years, SD = 3.0 (New Zealand sample A), 19.9 years, SD = 2.4 (New Zealand sample B), 22.7 years, SD = 3.7 (Israel sample A), 22.5 years, SD = 2.5 (Israel sample B), 19.0 years, SD = 2.3 (Fiji sample), S8a: not reported, S8b: not reported, S9: 20.2 years, SD = 3.0 (sample A), 19.9 years, SD = 2.4 (sample B)</td>
<td valign="top" align="left">S1: 368 males, 510 females, S2: 1,267 males, 1,634 females, S3: 1,334 males, 1,525 females, S4: 42 males, 121 females, S5: 104 males, 128 females, S6: Not reported, S7: 1,334 males, 1,525 females (sample from S3), 50 males, 62 females (New Zealand sample A), 44 males, 79 females (New Zealand sample B), 185 males, 174 females (Israel sample A), 294 males, 323 females (Israel sample B), 29 males, 33 females (Fiji sample), S8a: 31 males, 27 females, S8b: 25 males, 20 females, S9: 50 males, 62 females (sample A), 44 males, 78 females (sample B)</td>
</tr> <tr>
<td valign="top" align="left">Carmona-Perera et al. (<xref ref-type="bibr" rid="B6">2013</xref>)</td>
<td/>
<td valign="top" align="left">Translation, adaptation, validation</td>
<td valign="top" align="left">ANOVA</td>
<td valign="top" align="left">60 scenarios</td>
<td valign="top" align="left">Action, difficulty, congruency.</td>
<td valign="top" align="left">154 (Spanish undergraduates)</td>
<td valign="top" align="left">21.51 years, ranged 18&#x02013;54 years</td>
<td valign="top" align="left">29 males, 120 females</td>
</tr>
<tr>
<td valign="top" align="left">Christensen et al. (<xref ref-type="bibr" rid="B9">2014</xref>)</td>
<td/>
<td valign="top" align="left">Adaptation, validation</td>
<td valign="top" align="left">S1: ANOVA, <italic>t</italic>-test, regression, S2: ANOVA, <italic>t</italic>-test</td>
<td valign="top" align="left">46 scenarios</td>
<td valign="top" align="left">Rate level of arousal, rate perceived valence of the dilemma (S1) additional question re: action (S2)</td>
<td valign="top" align="left">S1: 62 (undergraduate psychology students), S2: 43 (undergraduate psychology students)</td>
<td valign="top" align="left">S1: 21.0 years, SD = 5.35 S2: 20.65, SD = 5.52</td>
<td valign="top" align="left">S1: 19 males, 43 females, S2: 13 males, 30 females</td>
</tr> <tr>
<td valign="top" align="left">Kimhi (<xref ref-type="bibr" rid="B30">2014</xref>)</td>
<td/>
<td valign="top" align="left">Construction</td>
<td valign="top" align="left">Correlation, path analysis</td>
<td valign="top" align="left">One scenario</td>
<td valign="top" align="left">Reaction (Y/N) to seven possibilities of action, rate level of confidence on 5pt likert scale to each decision</td>
<td valign="top" align="left">346 Israeli Defense Force soldiers (202 regular and 144 active reserve)</td>
<td valign="top" align="left">22.50 years</td>
<td valign="top" align="left">Not reported</td>
</tr> <tr>
<td valign="top" align="left">Lotto et al. (<xref ref-type="bibr" rid="B36">2014</xref>)</td>
<td/>
<td valign="top" align="left">Construction</td>
<td valign="top" align="left">ANOVA</td>
<td valign="top" align="left">75 scenarios</td>
<td valign="top" align="left">Action</td>
<td valign="top" align="left">120 (University students)</td>
<td valign="top" align="left">19.96 years, SD = 2.70</td>
<td valign="top" align="left">55 males, 65 females</td>
</tr> <tr>
<td valign="top" align="left">Fleischhut et al. (<xref ref-type="bibr" rid="B17">2017</xref>)</td>
<td/>
<td valign="top" align="left">Construction</td>
<td valign="top" align="left">ANOVA, correlations</td>
<td valign="top" align="left">Six scenarios</td>
<td valign="top" align="left">Appropriateness, moral permissibility, probability of outcomes, probability estimate, rank importance</td>
<td valign="top" align="left">731 (MTurk)</td>
<td valign="top" align="left">32.6 years, SD = 12.1</td>
<td valign="top" align="left">405 males, 326 females</td>
</tr> <tr>
<td valign="top" align="left" colspan="9" style="background-color:#e0e1e3"><bold>Self-report scales</bold></td>
</tr> <tr>
<td valign="top" align="left">Abdellaoui et al. (<xref ref-type="bibr" rid="B1">2016</xref>)</td>
<td valign="top" align="left">ABB</td>
<td valign="top" align="left">Construction</td>
<td valign="top" align="left">S1: ANOVA, PCA, correlation, S2: ANOVA</td>
<td valign="top" align="left">12 item scale</td>
<td/>
<td valign="top" align="left">S1: 521, S2: 193 (prison inmates)</td>
<td valign="top" align="left">S1: 28.1 years, SD = 7.4, S2: 23.72 years</td>
<td valign="top" align="left">S1: 58.93% male, 41.97 female, S2: 193 males (100%)</td>
</tr> <tr>
<td valign="top" align="left">Kahane et al. (<xref ref-type="bibr" rid="B29">2018</xref>)</td>
<td valign="top" align="left">OUS</td>
<td valign="top" align="left">Construction, validation</td>
<td valign="top" align="left">S1: EFA and CFA, S2: CFA, correlation, S3: <italic>t</italic>-test, correlation</td>
<td valign="top" align="left">Nine item scale</td>
<td/>
<td valign="top" align="left">S1: 960 (MTurk), S2: 282 (MTurk), S3: 81 (experts in Moral Philosophy)</td>
<td valign="top" align="left">S1: 35 years, SD = 12.11, S2: 39 years, SD = 12.66, S3: 32 years, SD = 9.72</td>
<td valign="top" align="left">S1: 489 females, S2: 178 females, S3: 23 females</td>
</tr>
<tr>
<td valign="top" align="left">Yamamoto and Maeder (<xref ref-type="bibr" rid="B53">2019</xref>)</td>
<td valign="top" align="left">POQ</td>
<td valign="top" align="left">Construction, validation</td>
<td valign="top" align="left">S1: EFA, IRT, S2: CFA, S3: correlations, regression</td>
<td valign="top" align="left">17 item scale</td>
<td/>
<td valign="top" align="left">S1: 199 (MTurk), S2: 188 (MTurk), S3: 179 (MTurk)</td>
<td valign="top" align="left">S1: 33.9 years, SD = 10.7, S2: 32.8 years, SD = 10.6, S3: 36 years, SD = 10.3</td>
<td valign="top" align="left">S1: 69 males, 120 females, S2: Not reported, S3: 95 males, 84 females</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TN2"><p><sup>a</sup>There are missing values of sex/gender in some studies. Therefore, numbers reported in Sex Ratio do not necessarily add up to total sample size.</p></fn>
<p>bDIT, behavioral Defining Issues Test; DIT, Defining Issues Test; DIT-2, Defining Issues Test Version 2; MD, moral dilemma; MOJAC, Moral Orientation of Justice and Care Scale; PMC, Professional Moral Courage Scale; POQ, punishment orientation questionnaire; OUS, The Oxford Utilitarianism Scale; ABB, the scale of social and moral judgments (named after the initials of the authors); URPO, utilitarian and retributive punishment orientation; ANOVA, analysis of variance; DIF, differential item functioning analysis; ANCOVA, analysis of covariance; CFA, confirmatory factor analysis; EFA, exploratory factor analysis; IRT, item response theory; PAF, principle axis factoring.</p>
</table-wrap-foot>
</table-wrap>
<p>The ratings of psychometric properties for each study are reported in <xref ref-type="table" rid="T4">Table 4</xref>. None of the studies reported information on floor and ceiling effects. Therefore, all studies were assigned a score of &#x0201C;0&#x0201D; for no information reported on floor and ceiling effects. The ratings of psychometric properties in Terwee et al.&#x00027;s (<xref ref-type="bibr" rid="B49">2007</xref>) framework are reported in <xref ref-type="table" rid="T5">Table 5</xref>.</p>
<table-wrap position="float" id="T4">
<label>Table 4</label>
<caption><p>Summary of factorial structure, reliability estimates, and relationships of the measure with other variables/group differences.</p></caption>
<table frame="box" rules="all">
<thead><tr>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Study</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Factorial structure and reliability</bold></th>
<th valign="top" align="left" colspan="4" style="background-color:#919497; color:#ffffff"><bold>Relationship with other variables/group differences</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="6" style="background-color:#e0e1e3"><bold>DIT</bold></td>
</tr> <tr>
<td valign="top" align="left">Martin et al. (<xref ref-type="bibr" rid="B39">1977</xref>)</td>
<td valign="top" align="left">Kirstof&#x00027;s reliability (0.70)</td>
<td valign="top" align="left" colspan="4">College &#x0003E; high school &#x0003E; junior high</td>
</tr> <tr>
<td valign="top" align="left">Davison and Robbins (<xref ref-type="bibr" rid="B13">1978</xref>)</td>
<td valign="top" align="left">Cronbach&#x00027;s &#x003B1; (0.77&#x02013;0.82), test-retest (0.71&#x02013;0.81; 2&#x02013;4 years)</td>
<td valign="top" align="left" colspan="4">Cognitive ability (<italic>r</italic> = 0.43<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> comprehension of moral issues (<italic>r</italic> = 0.65<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> law and order orientation (<italic>r</italic> = &#x02212;0.50<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> political tolerance (<italic>r</italic> = 0.50<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>)</td>
</tr> <tr>
<td valign="top" align="left" colspan="6" style="background-color:#e0e1e3"><bold>DIT-2</bold></td>
</tr> <tr>
<td valign="top" align="left">Rest et al. (<xref ref-type="bibr" rid="B46">1999</xref>)</td>
<td valign="top" align="left">Cronbach&#x00027;s &#x003B1; (0.81)</td>
<td valign="top" align="left" colspan="4">DIT P score (<italic>r</italic> = 0.71<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> education level (<italic>r</italic> = 0.69<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> age (<italic>r</italic> = 0.56<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> attitudes toward human rights (higher scores indicate greater advocacy for civil liberties, <italic>r</italic> = 0.50<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>)</td>
</tr> <tr>
<td valign="top" align="left">Mitchell (<xref ref-type="bibr" rid="B43">2000</xref>)</td>
<td valign="top" align="left">Three-factor solution (personal interest, maintaining norms, post-conventional), Cronbach&#x00027;s &#x003B1;&#x00027;s (0.727 for N2 score, 0.619 for <italic>P</italic> score)</td>
<td valign="top" align="left" colspan="4">Higher education &#x0003E; lower education, liberal &#x0003E; conservative, women &#x0003E; men</td>
</tr> <tr>
<td valign="top" align="left">Mayhew et al. (<xref ref-type="bibr" rid="B42">2015</xref>)</td>
<td/>
<td valign="top" align="left" colspan="4">Took DIT-2 three times &#x0003E; took DIT-2 two times</td>
</tr> <tr>
<td valign="top" align="left">Choi et al. (<xref ref-type="bibr" rid="B7">2020</xref>)</td>
<td valign="top" align="left">Bi-factor model with a general factor G and 3 lower-order factors (personal interest, maintaining norms, post-conventional), Cronbach&#x00027;s alpha (0.840)</td>
<td valign="top" align="left" colspan="4"></td>
</tr> <tr>
<td valign="top" align="left" colspan="6" style="background-color:#e0e1e3"><bold>bDIT</bold></td>
</tr> <tr>
<td valign="top" align="left">Choi et al. (<xref ref-type="bibr" rid="B8">2019</xref>)</td>
<td valign="top" align="left">Tetrachoric correlation (0.74)</td>
<td valign="top" align="left">DIT (<italic>r</italic> = 0.71)</td>
<td/>
<td/>
<td/>
</tr> <tr>
<td valign="top" align="left" colspan="6" style="background-color:#e0e1e3"><bold>Process Dissociation Model</bold></td>
</tr> <tr>
<td valign="top" align="left">Conway and Gawronski (<xref ref-type="bibr" rid="B12">2013</xref>)</td>
<td/>
<td valign="top" align="left" colspan="2"><bold>Deontological</bold> inclination:</td>
<td valign="top" align="left" colspan="2"><bold>Utilitarian</bold> inclination:</td>
</tr>
<tr>
<td/>
<td/>
<td valign="top" align="left" colspan="2">Utilitarian inclination (<italic>r</italic> = 0.09), empathic concern (<italic>r</italic> = 0.28<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> perspective-taking (<italic>r</italic> = 0.32<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> religiosity (<italic>r</italic> = 0.26<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> moral identity internalization (<italic>r</italic> = 0.22<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>)</td>
<td valign="top" align="left" colspan="2">Need for cognition (<italic>r</italic> = 0.18), moral identity internalization (<italic>r</italic> = 0.23<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> high cognitive load &#x0003C; low cognitive load</td>
</tr> <tr>
<td valign="top" align="left">Jang (<xref ref-type="bibr" rid="B28">2020</xref>)</td>
<td/>
<td valign="top" align="left" colspan="2"><bold>Deontological</bold> inclination</td>
<td valign="top" align="left" colspan="2"><bold>Utilitarian</bold> inclination</td>
</tr>
<tr>
<td/>
<td/>
<td valign="top" align="left" colspan="2">Utilitarian inclination (<italic>r</italic> = &#x02212;0.23<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> Oxford Utilitarianism Scale (<italic>r</italic> = &#x02212;0.27<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> antisocial personality disorder (<italic>r</italic> = &#x02212;0.13<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> antisocial tendencies (<italic>r</italic> = &#x02212;0.17<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> older &#x0003C; younger</td>
<td valign="top" align="left" colspan="2">Oxford Utilitarianism Scale (<italic>r</italic> = 0.18<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> women &#x0003C; men, higher education &#x0003C; lower education</td>
</tr> <tr>
<td valign="top" align="left" colspan="6" style="background-color:#e0e1e3"><bold>CNI model</bold></td>
</tr> <tr>
<td valign="top" align="left">Gawronski et al. (<xref ref-type="bibr" rid="B20">2017</xref>)</td>
<td/>
<td valign="top" align="left">Sensitivity to <bold>consequences</bold></td>
<td valign="top" align="left">Sensitivity to <bold>norms</bold></td>
<td valign="top" align="left" colspan="2">Tendency for <bold>inaction</bold></td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">High psychopathy &#x0003C; low psychopathy</td>
<td valign="top" align="left">Women &#x0003E; men, high psychopathy &#x0003C; low psychopathy</td>
<td valign="top" align="left" colspan="2">Women &#x0003E; men, high cognitive load &#x0003E; low cognitive load, action &#x0003E; judgment, high psychopathy &#x0003C; low psychopathy</td>
</tr> <tr>
<td valign="top" align="left">K&#x000F6;rner et al. (<xref ref-type="bibr" rid="B32">2020</xref>)</td>
<td/>
<td/>
<td valign="top" align="left">Sensitivity to <bold>consequences</bold></td>
<td valign="top" align="left">Sensitivity to <bold>norms</bold></td>
<td valign="top" align="left">Tendency for <bold>inaction</bold></td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">Psychopathy</td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.194<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref> to &#x02212;0.357<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.494<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref> to &#x02212;0.613<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.143 to &#x02212;0.299<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">Empathic concern</td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.051 to 0.144</td>
<td valign="top" align="left"><italic>r</italic> = 0.175<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref> to 0.384<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.023 to 0.164<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref></td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">Need for cognition</td>
<td valign="top" align="left"><italic>r</italic> = 0.022 to 0.166<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = 0.077 to 0.270<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = 0.027 to 0.112</td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">Impartial beneficence</td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.078 to &#x02212;0.202<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.172<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref> to &#x02212;0.348<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.010 to &#x02212;0.287<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">Instrumental harm</td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.029 to &#x02212;0.142</td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.411<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref> to &#x02212;0.561<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.145<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref> to &#x02212;0.239<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">Behavioral inhibition</td>
<td valign="top" align="left"><italic>r</italic> = 0.032 to 0.157</td>
<td valign="top" align="left"><italic>r</italic> = 0.053 to 0.167<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.038 to 0.098</td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">Behavioral activation</td>
<td valign="top" align="left"><italic>r</italic> = 0.004 to &#x02212;0.279<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.050 to &#x02212;0.149<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.040 to &#x02212;0.180<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref></td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">Moral identity internalization</td>
<td valign="top" align="left"><italic>r</italic> = 0.107 to 0.199<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = 0.347<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref> to 0.466<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = 0.087 to 0.238<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
</tr> <tr>
<td/>
<td/>
<td valign="top" align="left">religiosity</td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.146<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref> to &#x02212;0.350<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = 0.101 to &#x02212;0.235<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref></td>
<td valign="top" align="left"><italic>r</italic> = &#x02212;0.181<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref> to 0.011</td>
</tr> <tr>
<td valign="top" align="left" colspan="6" style="background-color:#e0e1e3"><bold>Other moral dilemmas</bold></td>
</tr> <tr>
<td valign="top" align="left">Bore (<xref ref-type="bibr" rid="B4">2001</xref>)</td>
<td valign="top" align="left">One-factor solution from PCA, Cronbach&#x00027;s &#x003B1;: 0.88 (35-item), 0.90 (45 item), 0.83 (24-item), Test-retest (0.77, 1 year)</td>
<td valign="top" align="left" colspan="2">Age (<italic>r</italic> = 0.2<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref> to 0.23<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>), gender (<italic>r</italic> = 0.08<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref> to 0.14<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> DIT P score (&#x02212;0.10<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>), DIT decision (0.51<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> narcissism (<italic>r</italic> = &#x02212;0.15<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref> to &#x02212;0.16<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> aloofness (<italic>r</italic> = &#x02212;0.04 to &#x02212;0.15<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/></td>
<td valign="top" align="left" colspan="2">Confidence (<italic>r</italic> = 0.10<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref> to 0.19<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> empathy (<italic>r</italic> = &#x02212;0.01 to 0.00), power (<italic>r</italic> = &#x02212;0.19<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>), hedonism (<italic>r</italic> = &#x02212;0.27<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> benevolence (<italic>r</italic> = 0.20<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>), conformity (<italic>r</italic> = 0.30<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>)</td>
</tr> <tr>
<td valign="top" align="left">Carmona-Perera et al. (<xref ref-type="bibr" rid="B6">2013</xref>)</td>
<td valign="top" align="left">Cronbach&#x00027;s &#x003B1; (0.705)</td>
<td valign="top" align="left" colspan="4">Affirmative decisions: non-moral &#x0003E; moral impersonal &#x0003E; moral personal, high-conflict &#x0003E; low-conflict, perceived difficulty: moral personal &#x0003E; moral impersonal &#x00026; non-moral, high-conflict &#x0003E; low-conflict, congruent answers: non-moral &#x0003E; moral impersonal &#x00026; moral personal</td>
</tr> <tr>
<td valign="top" align="left">Christensen et al. (<xref ref-type="bibr" rid="B9">2014</xref>)</td>
<td/>
<td valign="top" align="left" colspan="2">Arousal: personal force &#x0003E; impersonal force, self-beneficial &#x0003E; other-beneficial, empathy (<italic>r</italic> = 0.289<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>), valence: personal force (more negative), self-beneficial (more negative), personal force &#x000D7; intentionality (accidental harm was rated more negative than instrumental harm in impersonal force condition), benefit recipient &#x000D7; intentionality (accidental harm was rated as more negative than instrumental harm in self-beneficial condition)</td>
<td valign="top" align="left" colspan="2">Reaction Time: personal &#x0003C; impersonal, self-beneficial &#x0003C; other-beneficial, arousal (<italic>r</italic> = &#x02212;0.434<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> EMPATHY (<italic>r</italic> = &#x02212;0.325<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Kimhi (<xref ref-type="bibr" rid="B30">2014</xref>)</td>
<td valign="top" align="left">Cronbach&#x00027;s &#x003B1;&#x00027;s: 0.70 (decision) 0.74 (confidence) 0.80 (difficulty)</td>
<td valign="top" align="left" colspan="4">Compared to regular soldiers, reserve soldiers are more likely to take action (<italic>r</italic> = &#x02212;0.141<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> feel more confident (<italic>r</italic> = 0.107<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>) and less difficult (<italic>r</italic> = &#x02212;0.102) about the decision, and more likely to be left-wing oriented (<italic>r</italic> = &#x02212;0.262<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>). Left-wing political orientation was associated with less likelihood of action (<italic>r</italic> = &#x02212;0.264<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>) and higher perceived difficulty about the decision (<italic>r</italic> = 0.211<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>).</td>
</tr> <tr>
<td valign="top" align="left">Lotto et al. (<xref ref-type="bibr" rid="B36">2014</xref>)</td>
<td/>
<td valign="top" align="left" colspan="2">Affirmative decisions: incidental killing &#x0003E; instrumental killing, self-beneficial &#x0003E; other-beneficial, decision times: incidental killing &#x0003E; instrumental killing, acceptability ratings: incidental killing &#x0003E; instrumental killing, self-beneficial &#x0003C; other-beneficial, valence ratings: self-beneficial (more unpleasant), women (more unpleasant)</td>
<td valign="top" align="left" colspan="2">Arousal ratings: incidental killing &#x0003E; instrumental killing, self-beneficial &#x0003E; other-beneficial, women &#x0003E; men</td>
</tr> <tr>
<td valign="top" align="left">Fleischhut et al. (<xref ref-type="bibr" rid="B17">2017</xref>)</td>
<td/>
<td valign="top" align="left" colspan="4">Impermissibility rating: foresight (no info on consequence) &#x00026; hindsight-bad (side effects of action occurred) &#x0003E; hindsight-good (side effects of action did not occur) estimated likelihood of side effects occurring: foresight &#x00026; hindsight-bad &#x0003E; hindsight-good participants who judged action as impermissible &#x0003E; participants who judged action as permissible (<italic>r</italic> = &#x02212;0.39)</td>
</tr> <tr>
<td valign="top" align="left" colspan="6" style="background-color:#e0e1e3"><bold>Self-report scales</bold></td>
</tr> <tr>
<td valign="top" align="left">Abdellaoui et al. (<xref ref-type="bibr" rid="B1">2016</xref>)</td>
<td valign="top" align="left">Three-factor solution, Cronbach&#x00027;s &#x003B1;:0.88 (overall), 0.78 (moral values), 0.82 (conventional values), 0.80 (personal values)</td>
<td valign="top" align="left" colspan="4">Seriousness: inmates &#x0003C; control defensibility/excusability: inmates &#x0003C; control tolerance (seeing transgression as serious but more defensible and not rejecting the transgressor): inmates &#x0003C; control</td>
</tr> <tr>
<td valign="top" align="left">Kahane et al. (<xref ref-type="bibr" rid="B29">2018</xref>)</td>
<td valign="top" align="left">Two-factor solution, Cronbach&#x00027;s &#x003B1;:0.81 (impartial beneficence), 0.79 (instrumental harm)</td>
<td valign="top" align="left" colspan="2">Impartial beneficence: instrumental harm (<italic>r</italic> = 0.14<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>), explicit utilitarianism (<italic>r</italic> = 0.37<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>), classic sacrificial dilemmas (<italic>r</italic> = &#x02212;0.21<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> greater good dilemmas (<italic>r</italic> = 0.50<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> empathic concern (<italic>r</italic> = 0.33<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> identification with all of humanity (<italic>r</italic> = 0.33<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> hypothetical donation (<italic>r</italic> = 0.40<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> environmental protection (<italic>r</italic> = 0.14<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> religiosity (<italic>r</italic> = 0.15<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>)</td>
<td valign="top" align="left" colspan="2">Instrumental harm: explicit utilitarianism (<italic>r</italic> = 0.13<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>), classic sacrificial dilemmas (<italic>r</italic> = &#x02212;0.32<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> greater good dilemmas (<italic>r</italic> = 0.07<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> empathic concern (<italic>r</italic> = &#x02212;0.16<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> identification with all of humanity (<italic>r</italic> = &#x02212;0.19<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> environmental protection (<italic>r</italic> = &#x02212;0.21<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> economic conservatism (<italic>r</italic> = 0.18<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> social conservatism (<italic>r</italic> = 0.18<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>)</td>
</tr>
<tr>
<td valign="top" align="left">Yamamoto and Maeder (<xref ref-type="bibr" rid="B53">2019</xref>)</td>
<td valign="top" align="left">Four-factor solution, Cronbach&#x00027;s &#x003B1;: 0.84 (permissive retributive), 0.85 (permissive utilitarian), 0.79 (prohibitive retributive), 0.80 (prohibitive utilitarian)</td>
<td valign="top" align="left">Permissive retributive: future-time orientation (<italic>r</italic> = 0.23<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>), positive affect (<italic>r</italic> = 0.36<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> death penalty for retribution (<italic>r</italic> = 0.55<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> death penalty for deterrence (<italic>r</italic> = 0.49<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>)</td>
<td valign="top" align="left">Permissive utilitarian: past-time orientation (<italic>r</italic> = 0.21<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> positive affect (<italic>r</italic> = 0.30<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> death penalty for retribution (<italic>r</italic> = 0.57<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> death penalty for deterrence (<italic>r</italic> = 0.64<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> death qualification<xref ref-type="table-fn" rid="TN3"><sup>a</sup></xref> (<italic>r</italic> = &#x02212;0.39<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>)</td>
<td valign="top" align="left">Prohibitive retributive: positive affect (<italic>r</italic> = &#x02212;0.28<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> negative affect (<italic>r</italic> = &#x02212;0.15<xref ref-type="table-fn" rid="TN4"><sup>&#x0002A;</sup></xref>), death penalty for retribution (<italic>r</italic> = &#x02212;0.39<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> death penalty for deterrence (<italic>r</italic> = &#x02212;0.52<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>)</td>
<td valign="top" align="left">Prohibitive utilitarian: positive affect (<italic>r</italic> = &#x02212;0.16<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> death penalty for retribution (<italic>r</italic> = &#x02212;0.27<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> death penalty for deterrence (<italic>r</italic> = &#x02212;0.38<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>),<break/> death qualification (<italic>r</italic> = 0.30<xref ref-type="table-fn" rid="TN5"><sup>&#x0002A;&#x0002A;</sup></xref>)</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn id="TN3"><p><sup>a</sup>Death qualification is a question asking participants whether their beliefs about the death penalty will impair their performance of their duties as jurors; higher scores indicate a response that prior beliefs about the death penalty do not interfere with performance of duty as a juror.</p></fn>
<fn id="TN4"><p><sup>&#x0002A;</sup><italic>p</italic> &#x02264; 0.05,</p></fn> 
<fn id="TN5"><p><sup>&#x0002A;&#x0002A;</sup><italic>p</italic> &#x02264; 0.01.</p></fn>
</table-wrap-foot>
</table-wrap>
<table-wrap position="float" id="T5">
<label>Table 5</label>
<caption><p>Overview of scoring of psychometric properties in the included studies.</p></caption>
<table frame="box" rules="all">
<thead><tr>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Measures</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Content validity</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Internal consistency</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Criterion validity</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Construct validity</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Reproducibility: agreement</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Reproducibility: reliability</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Responsiveness</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Floor and ceiling effects</bold></th>
<th valign="top" align="left" style="background-color:#919497; color:#ffffff"><bold>Interpretability</bold></th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" colspan="10" style="background-color:#e0e1e3"><bold>Moral dilemmas</bold></td>
</tr> <tr>
<td valign="top" align="left" colspan="10" style="background-color:#e0e1e3"><bold>DIT</bold></td>
</tr> <tr>
<td valign="top" align="left">Martin et al. (<xref ref-type="bibr" rid="B39">1977</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">?</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">?</td>
</tr> <tr>
<td valign="top" align="left">Davison and Robbins (<xref ref-type="bibr" rid="B13">1978</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">?</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr> <tr>
<td valign="top" align="left" colspan="10" style="background-color:#e0e1e3"><bold>DIT-2</bold></td>
</tr> <tr>
<td valign="top" align="left">Rest et al. (<xref ref-type="bibr" rid="B46">1999</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">?</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">&#x0002B;</td>
</tr> <tr>
<td valign="top" align="left">Mitchell (<xref ref-type="bibr" rid="B43">2000</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">?</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">?</td>
</tr> <tr>
<td valign="top" align="left">Mayhew et al. (<xref ref-type="bibr" rid="B42">2015</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr> <tr>
<td valign="top" align="left">Choi et al. (<xref ref-type="bibr" rid="B7">2020</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr> <tr>
<td valign="top" align="left" colspan="10" style="background-color:#e0e1e3"><bold>bDIT</bold></td>
</tr> <tr>
<td valign="top" align="left">Choi et al. (<xref ref-type="bibr" rid="B8">2019</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">?</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr> <tr>
<td valign="top" align="left" colspan="10" style="background-color:#e0e1e3"><bold>Process Dissociation Model</bold></td>
</tr> <tr>
<td valign="top" align="left">Conway and Gawronski (<xref ref-type="bibr" rid="B12">2013</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr> <tr>
<td valign="top" align="left">Jang (<xref ref-type="bibr" rid="B28">2020</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">?</td>
</tr> <tr>
<td valign="top" align="left" colspan="10" style="background-color:#e0e1e3"><bold>CNI model</bold></td>
</tr> <tr>
<td valign="top" align="left">Gawronski et al. (<xref ref-type="bibr" rid="B20">2017</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr> <tr>
<td valign="top" align="left">K&#x000F6;rner et al. (<xref ref-type="bibr" rid="B32">2020</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr> <tr>
<td valign="top" align="left" colspan="10" style="background-color:#e0e1e3"><bold>Other moral dilemmas</bold></td>
</tr> <tr>
<td valign="top" align="left">Bore (<xref ref-type="bibr" rid="B4">2001</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">?</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">&#x0002B;</td>
</tr>
<tr>
<td valign="top" align="left">Carmona-Perera et al. (<xref ref-type="bibr" rid="B6">2013</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">?</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr> <tr>
<td valign="top" align="left">Christensen et al. (<xref ref-type="bibr" rid="B9">2014</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">?</td>
</tr> <tr>
<td valign="top" align="left">Kimhi (<xref ref-type="bibr" rid="B30">2014</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">?</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr> <tr>
<td valign="top" align="left">Lotto et al. (<xref ref-type="bibr" rid="B36">2014</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">?</td>
</tr> <tr>
<td valign="top" align="left">Fleischhut et al. (<xref ref-type="bibr" rid="B17">2017</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">?</td>
</tr> <tr>
<td valign="top" align="left" colspan="10" style="background-color:#e0e1e3"><bold>Self-report scales</bold></td>
</tr> <tr>
<td valign="top" align="left">Abdellaoui et al. (<xref ref-type="bibr" rid="B1">2016</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">?</td>
</tr> <tr>
<td valign="top" align="left">Kahane et al. (<xref ref-type="bibr" rid="B29">2018</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">?</td>
</tr>
<tr>
<td valign="top" align="left">Yamamoto and Maeder (<xref ref-type="bibr" rid="B53">2019</xref>)</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">&#x0002B;</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">N/A</td>
<td valign="top" align="left">0</td>
<td valign="top" align="left">0</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p>DIT, Defining Issues Test; DIT-2, Defining Issues Test Version 2; bDIT, behavioral Defining Issues Test; N/A, not applicable; &#x0002B;, Positive rating; ?, Intermediate rating; &#x02212;, Negative rating; 0, No information provided.</p>
</table-wrap-foot>
</table-wrap>
</sec>
<sec>
<title>DIT and revised versions</title>
<p>Seven studies used the Defining Issues Test and its revised versions. For <bold><italic>content</italic></bold> <bold><italic>validity</italic></bold>, all seven studies received positive ratings for content validity for providing adequate evidence on measurement aim, target population, and concepts being measured.</p>
<p>For <bold><italic>internal consistency</italic></bold>, one study received a positive rating for internal consistency. Choi et al. (<xref ref-type="bibr" rid="B7">2020</xref>) conducted factor analyses on the DIT-2 (Cronbach&#x00027;s &#x003B1; = 0.84). Five studies reported Cronbach&#x00027;s &#x003B1; ranging between 0.70 and 0.82 but did not perform factor analyses. These studies were assigned intermediate ratings for internal consistency. Mayhew et al. (<xref ref-type="bibr" rid="B42">2015</xref>) was assigned &#x0201C;0&#x0201D; for internal consistency as no information on internal consistency was reported.</p>
<p>For <bold><italic>criterion validity</italic></bold>, two studies developed revised versions of existing measures and used the original version as a benchmark for validation. Rest et al.&#x00027;s (<xref ref-type="bibr" rid="B46">1999</xref>) DIT-2 (<italic>r</italic> = 0.71) and Choi et al.&#x00027;s (<xref ref-type="bibr" rid="B8">2019</xref>) bDIT (<italic>r</italic> = 0.71) were validated against the DIT and were thus assigned positive ratings for criterion validity. The remaining studies did not mention a &#x0201C;gold standard&#x0201D; of moral decision-making measure. Therefore, they were assigned &#x0201C;N/A&#x0201D; for criterion validity.</p>
<p>For <bold><italic>construct validity</italic></bold>, of the seven studies that investigated various versions of the DIT, two studies were assigned positive ratings for construct validity. Rest et al. (<xref ref-type="bibr" rid="B46">1999</xref>) hypothesized positive correlations between DIT-2 scores and age, education level, and attitudes toward human rights. Mitchell (<xref ref-type="bibr" rid="B43">2000</xref>) hypothesized positive correlations between DIT-2 scores and age and political liberalism. The remaining five studies were each assigned a score of &#x0201C;0&#x0201D; because they did not propose theoretically-driven hypotheses. Choi et al. (<xref ref-type="bibr" rid="B7">2020</xref>) investigated the factorial structure of the DIT-2. While specific hypotheses were proposed, they were neither about relations to other measures nor expected group differences. Therefore, a &#x0201C;0&#x0201D; was assigned for construct validity for no information on appropriate hypotheses.</p>
<p><bold><italic>Reproducibility (agreement and reliability)</italic> </bold>and <bold><italic>responsiveness</italic> </bold>are criteria that apply to repeated measures designs only. Agreement is defined as the extent to which scores on repeated measures are close to each other (absolute measurement error) (Terwee et al., <xref ref-type="bibr" rid="B49">2007</xref>). Reliability (test-retest) is defined as the extent to which participants can be distinguished from each other, despite measurement error (Terwee et al., <xref ref-type="bibr" rid="B49">2007</xref>). Responsiveness is the measure&#x00027;s ability to detect clinically important changes over time, however small the changes are.</p>
<p>Five studies did not examine repeated measures of moral decision-making, therefore these criteria were not applicable for these studies. These studies were assigned an &#x0201C;N/A&#x0201D; rating on Agreement, Reliability, and Responsiveness.</p>
<p>Of the remaining two studies that used repeated measures designs, neither referred to the required indices of Agreement (e.g., Minimal Important Change). Therefore, these studies were assigned a &#x0201C;0&#x0201D; score for no information provided on Agreement. For Reliability, neither study referred to the required indices of Reliability (e.g., Intraclass Correlation Coefficient, weighted Cohen&#x00027;s Kappa). Therefore, these studies were assigned a &#x0201C;0&#x0201D; score for no information provided. Nonetheless, these studies reported test-retest reliabilities, which are presented in <xref ref-type="table" rid="T3">Table 3</xref>.</p>
<p>Neither study referred to the required indices of Responsiveness (e.g., Smallest Detectable Change, Minimal Important Change, Guyatt&#x00027;s Responsiveness Ratio). Therefore, these studies were assigned a &#x0201C;0&#x0201D; score for no information provided.</p>
<p><bold><italic>Interpretability</italic> </bold>is defined as the ability to assign qualitative meaning to quantitative scores (Terwee et al., <xref ref-type="bibr" rid="B49">2007</xref>). Interpretability is important for health measures because it is crucial that the scores from the instrument reflect meaningful differences between groups (e.g., patient vs. control, gender, age). A positive rating for interpretability was given only if the study reported means and standard deviations of the measure for at least four subgroups. An intermediate score was given if there was incomplete reporting of statistics and/or less than four subgroups.</p>
<p>Rest et al. (<xref ref-type="bibr" rid="B46">1999</xref>) received a positive rating for interpretability for reporting means and standard deviations of the DIT-2 N2 scores for participants in four education levels (from ninth grade students to graduate and professional school students).</p>
<p>Two studies received intermediate ratings for interpretability. Martin et al. (<xref ref-type="bibr" rid="B39">1977</xref>) reported means (but not standard deviation) of DIT P scores among junior high school, senior high school, and college students. Mitchell (<xref ref-type="bibr" rid="B43">2000</xref>) reported means (but not standard deviations) of DIT-2 P and N2 scores among five groups of political identities (from very liberal to very conservative).</p>
<p>The remaining four studies were assigned a score of &#x0201C;0&#x0201D; indicating no information on interpretability was reported. This was either due to not having subgroups or not reporting descriptive statistics of the measure across the subgroups.</p>
</sec>
<sec>
<title>Measures using the Process Dissociation Model</title>
<p>Two studies used PD measures. For <bold><italic>content validity</italic></bold>, both studies were positively rated for providing adequate evidence on measurement aim, target population, and concepts being measured. As the construction study of the PD measure, Conway and Gawronski (<xref ref-type="bibr" rid="B12">2013</xref>) were theoretically driven in their item selection and thus received a positive rating for content validity.</p>
<p>For <bold><italic>internal consistency</italic></bold>, both studies received &#x0201C;N/A&#x0201D; due to how the construct scores were calculated. The PD model takes responses from all dilemmas to produce a single score for each factor (i.e., propensities for utilitarian/deontological principles). This is contrary to self-report scales where multiple items measure the same construct such that analysis of internal consistency can be performed. Therefore, an &#x0201C;N/A&#x0201D; was assigned to these studies as the criteria for internal consistency was not applicable.</p>
<p>For <bold><italic>criterion validity</italic>, </bold>neither study mentioned a &#x0201C;gold standard&#x0201D; of moral decision-making measure. Therefore, they were assigned &#x0201C;N/A&#x0201D; for criterion validity.</p>
<p>For <bold><italic>construct validity</italic></bold>, the PD measures received one positive rating and one &#x0201C;0&#x0201D; score. Conway and Gawronski (<xref ref-type="bibr" rid="B12">2013</xref>) proposed specific hypotheses and found that deontological inclinations were positively correlated with empathic concern, perspective-taking, religiosity, and moral identity internalization. Jang (<xref ref-type="bibr" rid="B28">2020</xref>) translated and validated the Korean version of the PD measure. A score of &#x0201C;0&#x0201D; was assigned because the analyses were exploratory, and no hypotheses were proposed a priori.</p>
<p>For <bold><italic>reproducibility (agreement and reliability) and responsiveness</italic></bold>, neither studies used a repeated measures design. Therefore, both studies were assigned an &#x0201C;N/A&#x0201D; rating.</p>
<p>For <bold><italic>interpretability</italic></bold>, the PD measure received one intermediate rating and one &#x0201C;0&#x0201D; score. Jang (<xref ref-type="bibr" rid="B28">2020</xref>) reported means (but not standard deviation) of utilitarian and deontological inclinations of males and females. Conway and Gawronski (<xref ref-type="bibr" rid="B12">2013</xref>) did not report descriptive statistics for any subgroups.</p>
</sec>
<sec>
<title>Measures using the CNI model</title>
<p>Two studies used measures adopting the CNI model. For <bold><italic>content validity</italic></bold>, both studies were positively rated for providing adequate evidence on measurement aim, target population, and concepts being measured. As the construction study of the CNI measure, Gawronski et al. (<xref ref-type="bibr" rid="B20">2017</xref>) were theoretically driven in their item selection and thus received a positive rating for content validity.</p>
<p>For <bold><italic>internal consistency</italic></bold>, similar to the PD measures, both studies received &#x0201C;N/A&#x0201D; ratings. The CNI model takes responses from all dilemmas to produce a single score for each factor (i.e., sensitivity to consequences, sensitivity to norms, general tendency for inaction). Therefore, the criteria for internal consistency was not applicable.</p>
<p>For <bold><italic>criterion validity</italic>, </bold>neither study mentioned a &#x0201C;gold standard&#x0201D; of moral decision-making measure. Therefore, they were assigned &#x0201C;N/A&#x0201D; for criterion validity.</p>
<p>For <bold><italic>construct validity</italic></bold>, both studies received positive ratings. Although Gawronski et al. (<xref ref-type="bibr" rid="B20">2017</xref>) and K&#x000F6;rner et al. (<xref ref-type="bibr" rid="B32">2020</xref>) did not propose specific hypotheses regarding the model, each exploratory study (e.g., Studies 1a, 2a, 3a, 4a) was accompanied by a replication study (e.g., Studies 1b, 2b, 3b, 4b) that found supporting evidence for exploratory findings. We deemed this method to be appropriate for minimizing the risk of bias from retrospective explanations and evaluated the percentage of hypotheses supported based on the proportion of relationships replicated in the second study compared to the first study.</p>
<p>For <bold><italic>reproducibility (agreement and reliability) and responsiveness</italic></bold>, neither study used a repeated measures design. Therefore, both studies were assigned an &#x0201C;N/A&#x0201D; rating.</p>
<p>For <bold><italic>interpretability</italic></bold>, the CNI measure received two &#x0201C;0&#x0201D; scores. Neither study reported descriptive statistics for any subgroups.</p>
</sec>
<sec>
<title>Other moral dilemmas</title>
<p>There are six studies that each identified one unique measure of moral decision-making. For <bold><italic>content validity</italic></bold>, all studies received positive ratings for providing adequate evidence on measurement aim, target population, and concepts being measured. For construction studies, Bore (<xref ref-type="bibr" rid="B4">2001</xref>), Kimhi (<xref ref-type="bibr" rid="B30">2014</xref>), Lotto et al. (<xref ref-type="bibr" rid="B36">2014</xref>), and Fleischhut et al. (<xref ref-type="bibr" rid="B17">2017</xref>) were theoretically driven in their item selection and thus received a positive rating for content validity.</p>
<p>For <bold><italic>internal consistency</italic></bold>, Bore (<xref ref-type="bibr" rid="B4">2001</xref>) received a positive rating for conducting factor analyses and reporting internal consistency estimates (Cronbach&#x00027;s &#x003B1; = 0.83&#x02013;0.90). Carmona-Perera et al. (<xref ref-type="bibr" rid="B6">2013</xref>) and Kimhi (<xref ref-type="bibr" rid="B30">2014</xref>) were intermediately rated for reporting adequate internal consistency estimates (Cronbach&#x00027;s &#x003B1; &#x0003E; 0.70). Christensen et al. (<xref ref-type="bibr" rid="B9">2014</xref>), Lotto et al. (<xref ref-type="bibr" rid="B36">2014</xref>), and Fleischhut et al. (<xref ref-type="bibr" rid="B17">2017</xref>) did not report any reliability measures and were thus rated &#x0201C;0&#x0201D; for internal consistency.</p>
<p>For <bold><italic>criterion validity</italic></bold>, none of the studies mentioned a &#x0201C;gold standard&#x0201D; of moral decision-making measure. Therefore, they were assigned &#x0201C;N/A&#x0201D; for criterion validity.</p>
<p>For <bold><italic>construct validity</italic></bold>, Kimhi (<xref ref-type="bibr" rid="B30">2014</xref>) received a positive rating for formulating theoretically driven hypotheses and obtaining supporting evidence. Bore&#x00027;s (<xref ref-type="bibr" rid="B4">2001</xref>) MOJAC scale was intermediately rated for construct validity. This was because a number of hypothesized relationships (e.g., Right Wing Authoritarianism, emotional intelligence) were not supported. Christensen et al. (<xref ref-type="bibr" rid="B9">2014</xref>), Lotto et al. (<xref ref-type="bibr" rid="B36">2014</xref>), and Abdellaoui et al. (<xref ref-type="bibr" rid="B1">2016</xref>) did not propose specific hypotheses and were thus each assigned a score of &#x0201C;0&#x0201D;. Fleischhut et al. (<xref ref-type="bibr" rid="B17">2017</xref>) proposed specific hypotheses. However, a score of &#x0201C;0&#x0201D; was assigned because the hypothesized relationships were about group differences from experimental manipulations rather than theoretical relationships of the measure with other constructs.</p>
<p>For <bold><italic>reproducibility (agreement and reliability) and responsiveness</italic>, </bold>only Bore (<xref ref-type="bibr" rid="B4">2001</xref>) used a repeated measures design. However, the studies did not refer to indices of agreement, reliability, or responsiveness (e.g., Minimal Important Change). Therefore, these studies were assigned a &#x0201C;0&#x0201D; score for no information provided on these criteria. The remaining five studies were rated &#x0201C;N/A&#x0201D; as the criteria are not applicable.</p>
<p>For <bold><italic>interpretability</italic></bold>, Bore (<xref ref-type="bibr" rid="B4">2001</xref>) received a positive rating for reporting means and standard deviations of the MOJAC scale by groups based on the language spoken at home, years enrolled into medical school and cultural backgrounds. Christensen et al. (<xref ref-type="bibr" rid="B9">2014</xref>), Lotto et al. (<xref ref-type="bibr" rid="B36">2014</xref>), and Fleischhut et al. (<xref ref-type="bibr" rid="B17">2017</xref>) were intermediately rated either for reporting means but not standard deviations of the subgroups&#x00027; scores, or not systematically reporting descriptive statistics of all subgroups. Carmona-Perera et al. (<xref ref-type="bibr" rid="B6">2013</xref>) and Kimhi (<xref ref-type="bibr" rid="B30">2014</xref>) were given &#x0201C;0&#x0201D; as there was no information on descriptive statistics of subgroups.</p>
</sec>
<sec>
<title>Self-report scales</title>
<p>Three self-report scales were identified in three construction studies&#x02014;the ABB Scale (Abdellaoui et al., <xref ref-type="bibr" rid="B1">2016</xref>), the Oxford Utilitarianism Scale (Kahane et al., <xref ref-type="bibr" rid="B29">2018</xref>), and the Punishment Orientation Questionnaire (Yamamoto and Maeder, <xref ref-type="bibr" rid="B53">2019</xref>).</p>
<p>All three studies received positive ratings for <bold><italic>content validity</italic> </bold>for providing adequate evidence on measurement aim, target population, and concepts being measured. Additionally, they were all theoretically driven in the construction and selection of scale items.</p>
<p>For <bold><italic>internal consistency</italic></bold>, all three studies were positively rated for conducting factor analyses and reporting internal consistency estimates (Cronbach&#x00027;s &#x003B1; = 0.79&#x02013;0.88).</p>
<p>For <bold><italic>criterion validity</italic>, </bold>none of the studies mentioned a &#x0201C;gold standard&#x0201D; of moral decision-making measure. Therefore, they were assigned &#x0201C;N/A&#x0201D; for criterion validity.</p>
<p>For <bold><italic>construct validity</italic></bold>, Kahane et al. (<xref ref-type="bibr" rid="B29">2018</xref>) and Yamamoto and Maeder (<xref ref-type="bibr" rid="B53">2019</xref>) received positive ratings for formulating theoretically driven hypotheses and obtaining supporting evidence. Abdellaoui et al. (<xref ref-type="bibr" rid="B1">2016</xref>) did not propose specific hypotheses and was thus assigned a score of &#x0201C;0&#x0201D;.</p>
<p>For <bold><italic>reproducibility (agreement and reliability) and responsiveness</italic></bold>, none of the studies used a repeated measures design. Therefore, all three studies were assigned an &#x0201C;N/A&#x0201D; rating.</p>
<p>For <bold><italic>interpretability</italic></bold>, two studies were intermediately rated. Abdellaoui et al. (<xref ref-type="bibr" rid="B1">2016</xref>) reported means (but not standard deviations) of seriousness and defensibility ratings of violations between prison inmates and the general population, between sex offenders and other offenders, and between recidivists and first-time offenders. Kahane et al. (<xref ref-type="bibr" rid="B29">2018</xref>) reported means and standard deviations of Instrumental Harm and Impartial Beneficence between self-identified Republicans and Democrats. Additionally, means and standard deviations of Instrumental Harm (but not Impartial Beneficence) were reported between men and women. Yamamoto was assigned a score of &#x0201C;0&#x0201D; due to not having subgroups.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>The present systematic review had four aims: (1) identify existing measures of moral decision-making with life/death content, (2) evaluate the psychometric properties of these measures against a quality appraisal tool, (3) discuss the conceptualization of the construct and assess the usefulness of the identified measures, and (4) ascertain whether a gold standard measure of moral decision-making using the broad definition adopted in this review exists, and if not, whether promising measures exist. Below we will assess our degree of success in achieving each aim.</p>
<sec>
<title>Aims 1 and 2: Identifying and evaluating measures of moral decision-making</title>
<p>This review was successful in identifying twelve unique measures of moral decision-making in high-stakes situations. Nine of these were moral dilemma sets, of which only three were reported in more than one study (Defining Issues Test, Process Dissociation Model, and CNI Model), and the other three were self-report scales (ABB Scale, Oxford Utilitarianism Scale, Punishment Orientation Questionnaire).</p>
<sec>
<title>Defining Issues Test and revised versions</title>
<p>Consistent with previous findings (Villegas de Posada and Vargas-Trujillo, <xref ref-type="bibr" rid="B51">2015</xref>; Mart&#x000ED;-Vilar et al., <xref ref-type="bibr" rid="B40">2021</xref>), the Defining Issues Test (Rest, <xref ref-type="bibr" rid="B45">1974</xref>) and its revised versions were the most commonly used measure in the identified studies. The DIT is one of the earliest measures developed to examine moral decision-making. The results of this systematic review found adequate evidence on factorial structure, internal consistency, and temporal stability. Moreover, the DIT has been validated against theoretically related constructs (e.g., cognitive ability, education, political orientation) and other measures of moral decision-making (e.g., Christensen et al., <xref ref-type="bibr" rid="B9">2014</xref>). Ample evidence of reliability and validity suggests that the DIT and its revised versions (i.e., DIT-2, bDIT) may be a candidate for a gold standard for measuring moral decision-making. However, a limitation of the DIT is that it is based on Kohlberg&#x00027;s theory of moral reasoning, which is developmental in nature and, thus, the scope of the measure may be theoretically limited. Specifically, the difference in scores on the DIT may be attributed to the levels of moral reasoning development, and thus limits its usefulness for examining differences that may arise among morally mature people (e.g., utilitarian vs. deontological inclinations).</p>
</sec>
<sec>
<title>Process Dissociation Model and CNI model</title>
<p>The Process Dissociation Model was proposed to challenge the dichotomy between utilitarian and deontological tendencies that underlies earlier measures of moral decision-making (Conway and Gawronski, <xref ref-type="bibr" rid="B12">2013</xref>). Rather than considering them as opposites, utilitarian and deontological tendencies are proposed as separate constructs. Given this theoretical framework, the PD measure was validated against theoretically related constructs (e.g., empathic concern, perspective-taking, religiosity, moral identity internalization) as well as the Oxford Utilitarianism Scale (Kahane et al., <xref ref-type="bibr" rid="B29">2018</xref>). However, there is limited evidence on the reliability, especially internal and temporal, of the measure. Given its different scoring methods, traditional measures of internal consistency (e.g., Cronbach&#x00027;s &#x003B1;) are not applicable. Additionally, a lack of longitudinal studies means that there is no information on the temporal stability of these constructs. Such psychometric properties need to be examined for the measure to be considered a gold standard for measuring moral decision-making.</p>
<p>The CNI Model extended the PD Model by introducing a third factor&#x02014;a general tendency for inaction. In morally ambiguous situations, people may prefer inaction over any choice of action. The CNI measure was validated against theoretically related constructs (e.g., psychopathy, empathic concern, need for cognition, behavioral activation/inhibition, moral identity internalization, and religiosity) and the Oxford Utilitarianism Scale (Kahane et al., <xref ref-type="bibr" rid="B29">2018</xref>). However, the CNI Model shares the same limitation as the PD Model in the lack of evidence on reliability. Future studies will need to examine the internal consistency and temporal stability of the measure.</p>
</sec>
<sec>
<title>Single studies using moral dilemmas</title>
<p>Six other individual studies using different sets of moral dilemmas involving life and death scenarios were identified in this review. Five of these were intended to measure the effect of experimental manipulations (e.g., amount of information participants had before making a decision). Such measures were not intended for capturing robust individual differences, instead they focused on the state rather than trait aspects. Therefore, the evidence of their psychometric properties will not be discussed.</p>
<p>In the sixth, Bore (<xref ref-type="bibr" rid="B4">2001</xref>) developed the MOJAC scale to measure an individual&#x00027;s inclination toward the rights of the individual vs. the rights of the collective. The MOJAC scale demonstrated good internal consistency and test-retest reliability. Although some hypotheses were not supported, the MOJAC scale was related to some important theoretically relevant constructs (e.g., Power, Hedonism, Benevolence, and Conformity) and the DIT (Rest, <xref ref-type="bibr" rid="B45">1974</xref>). Overall, the MOJAC scale appears to be a good individual differences measure of certain facets of moral decision-making. Its theoretical scope may need to be extended as moral decision-making in high-stakes situations goes beyond consideration of the rights of the individual versus those of the collective.</p>
</sec>
<sec>
<title>Self-report measures</title>
<p>The ABB scale (Abdellaoui et al., <xref ref-type="bibr" rid="B1">2016</xref>) was intended to measure an individual&#x00027;s judgment of seriousness, defensibility, and tolerability of three types of transgressions&#x02014;personal, conventional, and moral. While the scale has good internal consistency, it was not validated against any theoretically related constructs. The incomplete evidence of psychometric properties needs to be addressed for the measure to be used in research.</p>
<p>The Oxford Utilitarianism Scale (Kahane et al., <xref ref-type="bibr" rid="B29">2018</xref>) is a self-report measure of utilitarianism in high-resolution by focusing on the two underlying factors&#x02014;impartial beneficence and instrumental harm. The authors provided adequate evidence on factorial structure, internal consistency, and construct validity on the OUS. Furthermore, it has been evaluated against the Process Dissociation Model (Jang, <xref ref-type="bibr" rid="B28">2020</xref>) and the CNI Model (K&#x000F6;rner et al., <xref ref-type="bibr" rid="B32">2020</xref>). However, in return for depth, the OUS has sacrificed its breadth in measurement. By focusing on utilitarianism, it overlooks other important factors such as the intentions of actions, motivations to conform to norms, and tendencies to avoid moral issues. Additionally, there is no information on the temporal stability of the constructs measured by the OUS.</p>
<p>The Punishment Orientation Questionnaire (Yamamoto and Maeder, <xref ref-type="bibr" rid="B53">2019</xref>) looked at moral decision-making in the form of punishments. The scale measures two motivations behind punishment&#x02014;utilitarianism and retributivism, each of which was further divided into prohibitive motivation and permissive motivation. There is good evidence of the factorial structure, internal consistency, and construct validity of the measure. However, the limited scope of POQ means that it is unlikely to be considered a gold standard measure, as moral decision-making entails more than just punishment. Moreover, the temporal stability of the punishment construct is yet to be examined.</p>
</sec>
</sec>
<sec>
<title>Aim 3: Conceptualization</title>
<p>The result of the present review indicates that a diverse set of theoretical frameworks has been used to conceptualize moral decision-making. Most notably, the DIT (Rest, <xref ref-type="bibr" rid="B45">1974</xref>) and its revised versions were based on Kohlberg&#x00027;s (<xref ref-type="bibr" rid="B31">1984</xref>) Moral Development Theory. Almost all measures involved measuring utilitarianism&#x02013;the OUS (Impartial Beneficence vs. Instrumental Harm), the POQ (Utilitarianism vs. Retributivism), the PD and CNI Models (Utilitarianism/Consequences, Deontology/Norms, and Inaction), and the MOJAC scale (Individual Rights vs. Collective Rights). However, none of the conceptualizations are broad enough to be considered moral decision-making. Instead, each conceptualization focuses on a sub-facet of moral decision-making. This creates a critical obstacle to the integration of the theory of moral decision-making in general, and limits the cross-validation of different measures against each other and other key measures in the nomological network. For instance, only two studies received a positive rating for criterion validity. The DIT-2 (Rest et al., <xref ref-type="bibr" rid="B46">1999</xref>) and the bDIT (Choi et al., <xref ref-type="bibr" rid="B8">2019</xref>) were both strongly correlated to the original DIT (Rest, <xref ref-type="bibr" rid="B45">1974</xref>). All other studies were rated &#x0201C;N/A&#x0201D; indicating that it was impossible to evaluate criterion validity. Therefore, there is a lack of consensus on what models and theories should be the basis of understanding moral decision-making.</p>
<p>The remaining measures were traditional sacrificial dilemmas that pitted utilitarian decisions against deontological decisions (Carmona-Perera et al., <xref ref-type="bibr" rid="B6">2013</xref>; Christensen et al., <xref ref-type="bibr" rid="B9">2014</xref>; Kimhi, <xref ref-type="bibr" rid="B30">2014</xref>; Lotto et al., <xref ref-type="bibr" rid="B36">2014</xref>; Fleischhut et al., <xref ref-type="bibr" rid="B17">2017</xref>) and were not intended to measure systematic individual differences in the way people approach and process moral decisions. Moreover, the breadth of the moral decision-making construct has not been captured by traditional sacrificial dilemmas. Although emotion and intuition play important roles in theoretical approaches to moral decision-making, such as Greene et al. (<xref ref-type="bibr" rid="B22">2001</xref>) and Haidt (<xref ref-type="bibr" rid="B23">2001</xref>), emotions were not measured in the scales we reviewed, and only once did we find a measure that the authors tried to validate against any aspect of emotion. Recent theories and empirical evidence suggest that cognitive processes may not be the only (or even the most important) factor in moral decision-making (Haidt, <xref ref-type="bibr" rid="B23">2001</xref>). Moreover, the literature on how metacognitive processes (thinking about thinking; Flavell, <xref ref-type="bibr" rid="B16">1979</xref>) are involved in moral decision-making is scarce. However, the emerging metacognitive Meta-reasoning model (Ackerman and Thompson, <xref ref-type="bibr" rid="B2">2017</xref>) outlines processes that monitor the progress of our problem-solving and reasoning that foster an individual to take a particular action, and these constructs and processes are of direct relevance to moral decision-making. Therefore, the present review identifies a need for a more holistic approach that captures <italic>broad and systematic individual differences in terms of both the breadth of scope and the systematic tendencies (e.g., trait-like factors) underlying moral judgements and their respective nomological network</italic>. Studies that used sacrificial dilemmas were primarily interested in measuring the effects of experimentally manipulating contextual variables (e.g., number of lives saved, whether oneself benefits from sacrificial killing). The considerable influence of this experimental paradigm in the moral decision-making field may partly explain the lack of consensus and systematic conceptualization of moral decision-making, which affects investigations into this construct, its measurement models, and its relationship with other measures. Striving toward a consensus on models and theories is critical and necessary for advancing research in moral decision-making.</p>
</sec>
<sec>
<title>Aim 4: Toward a gold standard</title>
<p>The final aim of the present review was to identify a gold standard measure of moral decision-making. We do not believe we have identified such a measure, but our review highlights what is needed. Two identified measures&#x02014;the DIT (and its revised versions) and the OUS&#x02014;seem promising given their psychometric soundness, however the DIT relies on a particular conceptualization of moral decision-making and the OUS only aims to measure one aspect. A gold standard would require agreement as to what a moral decision-making scale should measure, but at the moment there is a scattered conceptualization of moral decision-making across different measures. Therefore, the DIT and the OUS may serve as gold standards of what they aim to measure, as long as researchers are aware of their theoretically limited scope. If we are to have a gold standard then there is a critical need for a consensus on the conceptualization of moral decision-making in high-stakes situations and its nomological network, as well as cross-validation of existing measures and potentially development of new measures that capture the agreed-upon conceptualization of moral decision-making. This would pave the way for the development of psychometrically valid tools.</p>
<p>A gold standard measure would need evidence of predictive validity (i.e., predicting real-life outcomes). Predictive validity is not a criterion included in the quality appraisal framework because Terwee et al. (<xref ref-type="bibr" rid="B49">2007</xref>) developed the framework to assess the quality of health measures, which are themselves the outcome variable of interest. Therefore, an assessment of what the measures predict is not necessary. The application of a quality appraisal framework is beneficial as it allows for systematic evaluation of measures, and researchers need to be aware of differences in the contexts between the development and application of the framework. However, ultimately a measure of moral decision-making should predict what people actually do, and although predictive validity was not part of our framework we noted its lack in the studies we reviewed.</p>
<p>Ultimately, the ability to validly capture and train moral decision-making in situations where the consequences can involve the life and death of the civilians and combatants is paramount. Our results (see <xref ref-type="table" rid="T2">Table 2</xref>) show that the included studies either treated moral decision-making as the dependent variable to be predicted (e.g., by age, gender, education level), or validated moral decision-making with measures of other theoretically related constructs (e.g., empathy, cognitive ability, psychopathy). The lack of theoretical and empirical connections between moral decision-making and real-life outcomes invites criticism of how practically meaningful the construct is. Therefore, the predictive validity of moral decision-making measures is an important issue that future studies should address, and it should be a criterion that future reviews consider.</p>
</sec>
<sec>
<title>Limitations</title>
<sec>
<title>Search strategy</title>
<p>The search strategy limited results to only measures of moral decision-making that included life-and-death scenarios and/or items. This decision was based on the goals of our research. We acknowledge that there may be psychometrically sound measures that do not involve life-and-death content. Nevertheless, these measures would contain contexts of great variability such that comparisons between these measures would be difficult. Future studies may conduct systematic reviews of the quality of moral decision-making measures within a specific field (e.g., business, education, sports, engineering).</p>
</sec>
<sec>
<title>Quality assessment framework, strengths and limitations</title>
<p>The quality appraisal tool used in the present review was originally developed to evaluate self-report measures of officially diagnosable health conditions (Terwee et al., <xref ref-type="bibr" rid="B49">2007</xref>). Our decision to adopt this framework was based on two reasons. First, previous research has successfully applied the framework to non-diagnosable constructs (e.g., Imposter Phenomenon; Mak et al., <xref ref-type="bibr" rid="B37">2019</xref>). Second, the criteria assessed by this framework served as a good guide to evaluating the psychometric properties of measures aimed to capture systematic responses. However, the adoption of the framework placed limitations on our review, such as the lack of focus on predictive validity.</p>
<p>In addition, despite amendments to the quality assessment framework, specific psychometric properties did not necessarily receive higher scores. In certain instances, it was not possible to evaluate certain psychometric properties. For instance, internal consistency estimates of the Process Dissociation Model (Conway and Gawronski, <xref ref-type="bibr" rid="B12">2013</xref>) and the CNI Model (Gawronski et al., <xref ref-type="bibr" rid="B20">2017</xref>) could not be computed because of these measures&#x00027; design. Rather than taking each dilemma as an item of the measurement, responses from all dilemmas are processed to produce a single score for each factor (e.g., utilitarian and deontological tendencies). Therefore, a rating of &#x0201C;N/A&#x0201D; would imply an inability to evaluate it rather than evidence of poor internal reliability. Another example is the criteria that apply to longitudinal studies. These criteria were designed to evaluate the ability of health measures to detect qualitative changes in health status across time. However, the criteria may be too rigorous for moral decision-making measures, as reporting test-retest reliability would not suffice for a positive rating. Therefore, a lack of a positive rating does not necessarily reflect the poor quality of the measure. For readers, the criteria framework may merely serve as a guide to analyzing the psychometric quality of the measures, whereas the specific findings and statistics may be more informative. Lastly, the criterion &#x0201C;Interpretability&#x0201D;, which refers to the measure&#x00027;s ability to produce qualitative meaning from quantitative scores might make sense in a health/medical setting. However, it may not be applicable in the context of moral decision-making measures. Therefore, ratings on this criterion should bear little weight in evaluating the quality of the measure.</p>
</sec>
</sec>
</sec>
<sec sec-type="conclusions" id="s5">
<title>Conclusions</title>
<p>Overall, the present review extends previous systematic reviews. The results of our review confirm some findings of previous reviews and meta-analyses on moral reasoning (Villegas de Posada and Vargas-Trujillo, <xref ref-type="bibr" rid="B51">2015</xref>; Mart&#x000ED;-Vilar et al., <xref ref-type="bibr" rid="B40">2021</xref>) but also highlight novel key findings that are overlooked by past research. Consistent with previous studies, the DIT remains the most used tool to assess moral decision-making. Seven of the twenty included studies used some version of the DIT. However, while measures identified by Mart&#x000ED;-Vilar et al. (<xref ref-type="bibr" rid="B40">2021</xref>) predominantly relied on self-report responses, most measures identified in the present review used moral dilemmas. This suggests that a substantial amount of research in moral decision-making focuses on aspects of moral decision-making other than moral reasoning. Still, the scope of the moral decision-making construct captured by these measures is very limited. These omissions risk an incomplete and biased understanding of processes in moral decision-making that overestimates the role of cognition while ignoring other processes, such as emotion and metacognition. The present review contributes to the understanding of the current state of research by highlighting this omission and providing a critical foundation for future studies in this domain.</p>
<p>Future research that aims for a gold standard measure of moral decision-making needs to look toward unifying different theories and translating them empirically. This may require the development of new research tools that can be validated in real-world situations. A unifying theory is critical as it would provide a comprehensive taxonomy of different aspects of moral decision-making which are currently overlooked, helping us to develop state-of-the-art knowledge in this critically important area of research.</p>
</sec>
<sec sec-type="data-availability" id="s6">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="sec" rid="s10">Supplementary material</xref>, further inquiries can be directed to the corresponding author/s.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>BN and KM conducted systematic searches, screened for the inclusion of studies, and evaluated the quality of measures used in studies. BN wrote the manuscript. SK and BB met frequently with BN to provide feedback. All authors also met regularly to review updated drafts and provide feedback. All authors contributed to the article and approved the submitted version.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="s8">
<title>Funding</title>
<p>The study was funded by the University of Sydney Internal Booster Fund.</p>
</sec>
<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s9">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="s10">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.frontiersin.org/articles/10.3389/fpsyg.2022.1063607/full#supplementary-material">https://www.frontiersin.org/articles/10.3389/fpsyg.2022.1063607/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn id="fn0001"><p><sup>1</sup>Haidt, J., Bjorklund, F., and Murphy, S. (2000). <italic>Moral Dumbfounding: When Intuition Finds No Reason</italic>. University of Virginia (unpublished).</p></fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Abdellaoui</surname> <given-names>S.</given-names></name> <name><surname>Lourel</surname> <given-names>M.</given-names></name> <name><surname>Blatier</surname> <given-names>C.</given-names></name> <name><surname>Beauvois</surname> <given-names>J. L.</given-names></name></person-group> (<year>2016</year>). <article-title>Development and validation of a scale of social and moral judgments (ABB scale) and its use in prison settings</article-title>. <source>Eur. Rev. Appl. Psychol.</source> <volume>66</volume>, <fpage>171</fpage>&#x02013;<lpage>180</lpage>. <pub-id pub-id-type="doi">10.1016/j.erap.2015.07.002</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ackerman</surname> <given-names>R.</given-names></name> <name><surname>Thompson</surname> <given-names>V. A.</given-names></name></person-group> (<year>2017</year>). <article-title>Meta-reasoning: monitoring and control of thinking and reasoning</article-title>. <source>Trends Cogn. Sci.</source> <volume>21</volume>, <fpage>607</fpage>&#x02013;<lpage>617</lpage>. <pub-id pub-id-type="doi">10.1016/j.tics.2017.05.004</pub-id><pub-id pub-id-type="pmid">28625355</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Blasi</surname> <given-names>A.</given-names></name></person-group> (<year>1990</year>). <article-title>Kohlberg&#x00027;s theory and moral motivation</article-title>. <source>New Dir. Child Dev.</source> <volume>47</volume>, <fpage>51</fpage>&#x02013;<lpage>57</lpage>. <pub-id pub-id-type="doi">10.1002/cd.23219904706</pub-id></citation>
</ref>
<ref id="B4">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Bore</surname> <given-names>M. R.</given-names></name></person-group> (<year>2001</year>). <source>The Psychology of Morality: A Libertarian-Communitarian Dimension and a Dissonance Model of Moral Decision Making</source> (Doctoral dissertation), <publisher-loc>Newcastle</publisher-loc>: <publisher-name>University of Newcastle</publisher-name>.</citation>
</ref>
<ref id="B5">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bucciarelli</surname> <given-names>M.</given-names></name> <name><surname>Khemlani</surname> <given-names>S.</given-names></name> <name><surname>Johnson-Laird</surname> <given-names>P. N.</given-names></name></person-group> (<year>2008</year>). <article-title>The psychology of moral reasoning</article-title>. <source>Judgm. Decis. Mak.</source> <volume>3</volume>, <fpage>121</fpage>&#x02013;<lpage>139</lpage>.</citation>
</ref>
<ref id="B6">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Carmona-Perera</surname> <given-names>M.</given-names></name> <name><surname>Vilar-Lopez</surname> <given-names>R.</given-names></name> <name><surname>Perez-Garcia</surname> <given-names>M.</given-names></name> <name><surname>Verdejo-Garcia</surname> <given-names>A.</given-names></name></person-group> (<year>2013</year>). <article-title>Using moral dilemmas to characterize social decision-making</article-title>. <source>Clini. Neuropsychiatry</source> <volume>10</volume>, <fpage>95</fpage>&#x02013;<lpage>101</lpage>.</citation>
</ref>
<ref id="B7">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Choi</surname> <given-names>Y. J.</given-names></name> <name><surname>Han</surname> <given-names>H.</given-names></name> <name><surname>Bankhead</surname> <given-names>M.</given-names></name> <name><surname>Thoma</surname> <given-names>S. J.</given-names></name></person-group> (<year>2020</year>). <article-title>Validity study using factor analyses on the defining issues test-2 in undergraduate populations</article-title>. <source>PLoS ONE</source> <volume>15</volume>, <fpage>e0238110</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pone.0238110</pub-id><pub-id pub-id-type="pmid">32866162</pub-id></citation></ref>
<ref id="B8">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Choi</surname> <given-names>Y. J.</given-names></name> <name><surname>Han</surname> <given-names>H.</given-names></name> <name><surname>Dawson</surname> <given-names>K. J.</given-names></name> <name><surname>Thoma</surname> <given-names>S. J.</given-names></name> <name><surname>Glenn</surname> <given-names>A. L.</given-names></name></person-group> (<year>2019</year>). <article-title>Measuring moral reasoning using moral dilemmas: evaluating reliability, validity, and differential item functioning of the behavioural defining issues test (bDIT)</article-title>. <source>Eur. J. Dev. Psychol.</source> <volume>16</volume>, <fpage>622</fpage>&#x02013;<lpage>631</lpage>. <pub-id pub-id-type="doi">10.1080/17405629.2019.1614907</pub-id></citation>
</ref>
<ref id="B9">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Christensen</surname> <given-names>J. F.</given-names></name> <name><surname>Flexas</surname> <given-names>A.</given-names></name> <name><surname>Calabrese</surname> <given-names>M.</given-names></name> <name><surname>Gut</surname> <given-names>N. K.</given-names></name> <name><surname>Gomila</surname> <given-names>A.</given-names></name></person-group> (<year>2014</year>). <article-title>Moral judgment reloaded: a moral dilemma validation study</article-title>. <source>Front. Psychol.</source> <volume>5</volume>, <fpage>607</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2014.00607</pub-id><pub-id pub-id-type="pmid">25071621</pub-id></citation></ref>
<ref id="B10">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Christensen</surname> <given-names>J. F.</given-names></name> <name><surname>Gomila</surname> <given-names>A.</given-names></name></person-group> (<year>2012</year>). <article-title>Moral dilemmas in cognitive neuroscience of moral decision-making: a principled review</article-title>. <source>Neurosci. Biobehav. Rev.</source> <volume>36</volume>, <fpage>1249</fpage>&#x02013;<lpage>1264</lpage>. <pub-id pub-id-type="doi">10.1016/j.neubiorev.2012.02.008</pub-id><pub-id pub-id-type="pmid">22353427</pub-id></citation></ref>
<ref id="B11">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Cohen</surname> <given-names>J.</given-names></name></person-group> (<year>1960</year>). <article-title>A coefficient of agreement for nominal scales</article-title>. <source>Educ. Psychol. Meas.</source> <volume>20</volume>, <fpage>37</fpage>&#x02013;<lpage>46</lpage>. <pub-id pub-id-type="doi">10.1177/001316446002000104</pub-id></citation>
</ref>
<ref id="B12">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Conway</surname> <given-names>P.</given-names></name> <name><surname>Gawronski</surname> <given-names>B.</given-names></name></person-group> (<year>2013</year>). <article-title>Deontological and utilitarian inclinations in moral decision making: a process dissociation approach</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>104</volume>, <fpage>216</fpage>. <pub-id pub-id-type="doi">10.1037/a0031021</pub-id><pub-id pub-id-type="pmid">23276267</pub-id></citation></ref>
<ref id="B13">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Davison</surname> <given-names>M. L.</given-names></name> <name><surname>Robbins</surname> <given-names>S.</given-names></name></person-group> (<year>1978</year>). <article-title>The reliability and validity of objective indices of moral development</article-title>. <source>Appl. Psychol. Meas.</source> <volume>2</volume>, <fpage>391</fpage>&#x02013;<lpage>403</lpage>. <pub-id pub-id-type="doi">10.1177/014662167800200314</pub-id></citation>
</ref>
<ref id="B14">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Fiske</surname> <given-names>A. P.</given-names></name></person-group> (<year>1991</year>). <source>Structures of Social Life: The Four Elementary Forms of Human Relations: Communal Sharing, Authority Ranking, Equality Matching, Market Pricing</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Free Press</publisher-name>.</citation>
</ref>
<ref id="B15">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fiske</surname> <given-names>A. P.</given-names></name></person-group> (<year>1992</year>). <article-title>The four elementary forms of sociality: framework for a unified theory of social relations</article-title>. <source>Psychol. Rev.</source> <volume>99</volume>, <fpage>689</fpage>. <pub-id pub-id-type="doi">10.1037/0033-295X.99.4.689</pub-id><pub-id pub-id-type="pmid">1454904</pub-id></citation></ref>
<ref id="B16">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Flavell</surname> <given-names>J. H.</given-names></name></person-group> (<year>1979</year>). <article-title>Metacognition and cognitive monitoring: a new area of cognitive&#x02013;developmental inquiry</article-title>. <source>Am. Psychol.</source> <volume>34</volume>, <fpage>906</fpage>. <pub-id pub-id-type="doi">10.1037/0003-066X.34.10.906</pub-id></citation>
</ref>
<ref id="B17">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fleischhut</surname> <given-names>N.</given-names></name> <name><surname>Meder</surname> <given-names>B.</given-names></name> <name><surname>Gigerenzer</surname> <given-names>G.</given-names></name></person-group> (<year>2017</year>). <article-title>Moral hindsight</article-title>. <source>Exp. Psychol.</source> <volume>64</volume>, <fpage>110</fpage>&#x02013;<lpage>123</lpage>. <pub-id pub-id-type="doi">10.1027/1618-3169/a000353</pub-id><pub-id pub-id-type="pmid">28497719</pub-id></citation></ref>
<ref id="B18">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Foot</surname> <given-names>P.</given-names></name></person-group> (<year>1967</year>). <article-title>The problem of abortion and the doctrine of the double effect</article-title>. <source>Oxf. Rev.</source> <volume>5</volume>, <fpage>5</fpage>&#x02013;<lpage>15</lpage>.</citation>
</ref>
<ref id="B19">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garrigan</surname> <given-names>B.</given-names></name> <name><surname>Adlam</surname> <given-names>A. L.</given-names></name> <name><surname>Langdon</surname> <given-names>P. E.</given-names></name></person-group> (<year>2018</year>). <article-title>Moral decision-making and moral development: toward an integrative framework</article-title>. <source>Dev. Rev.</source> <volume>49</volume>, <fpage>80</fpage>&#x02013;<lpage>100</lpage>. <pub-id pub-id-type="doi">10.1016/j.dr.2018.06.001</pub-id></citation>
</ref>
<ref id="B20">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Gawronski</surname> <given-names>B.</given-names></name> <name><surname>Armstrong</surname> <given-names>J.</given-names></name> <name><surname>Conway</surname> <given-names>P.</given-names></name> <name><surname>Friesdorf</surname> <given-names>R.</given-names></name> <name><surname>Hutter</surname> <given-names>M.</given-names></name></person-group> (<year>2017</year>). <article-title>Consequences, norms, and generalized inaction in moral dilemmas: the CNI model of moral decision-making</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>113</volume>, <fpage>343</fpage>&#x02013;<lpage>376</lpage>. <pub-id pub-id-type="doi">10.1037/pspa0000086</pub-id><pub-id pub-id-type="pmid">28816493</pub-id></citation></ref>
<ref id="B21">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Goodwin</surname> <given-names>G. P.</given-names></name> <name><surname>Piazza</surname> <given-names>J.</given-names></name> <name><surname>Rozin</surname> <given-names>P.</given-names></name></person-group> (<year>2014</year>). <article-title>Moral character predominates in person perception and evaluation</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>106</volume>, <fpage>148</fpage>&#x02013;<lpage>168</lpage>. <pub-id pub-id-type="doi">10.1037/a0034726</pub-id><pub-id pub-id-type="pmid">24274087</pub-id></citation></ref>
<ref id="B22">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Greene</surname> <given-names>J. D.</given-names></name> <name><surname>Sommerville</surname> <given-names>R. B.</given-names></name> <name><surname>Nystrom</surname> <given-names>L. E.</given-names></name> <name><surname>Darley</surname> <given-names>J. M.</given-names></name> <name><surname>Cohen</surname> <given-names>J. D.</given-names></name></person-group> (<year>2001</year>). <article-title>An fMRI investigation of emotional engagement in moral judgment</article-title>. <source>Science</source> <volume>293</volume>, <fpage>2105</fpage>&#x02013;<lpage>2108</lpage>. <pub-id pub-id-type="doi">10.1126/science.1062872</pub-id><pub-id pub-id-type="pmid">11557895</pub-id></citation></ref>
<ref id="B23">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Haidt</surname> <given-names>J.</given-names></name></person-group> (<year>2001</year>). <article-title>The emotional dog and its rational tail: a social intuitionist approach to moral judgment</article-title>. <source>Psychol. Rev.</source> <volume>108</volume>, <fpage>814</fpage>&#x02013;<lpage>834</lpage>. <pub-id pub-id-type="doi">10.1037/0033-295X.108.4.814</pub-id><pub-id pub-id-type="pmid">11699120</pub-id></citation></ref>
<ref id="B24">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Haidt</surname> <given-names>J.</given-names></name></person-group> (<year>2003</year>). <article-title>&#x0201C;The moral emotions,&#x0201D;</article-title> in <source>Handbook of affective sciences</source>, eds R. J. Davidson, K. R. Scherer, and H. H. Goldsmith (<publisher-loc>Oxford</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>), <fpage>852</fpage>&#x02013;<lpage>870</lpage>.</citation>
</ref>
<ref id="B25">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Harris</surname> <given-names>J. R.</given-names></name></person-group> (<year>1995</year>). <article-title>Where is the child&#x00027;s environment? A group socialization theory of development</article-title>. <source>Psychol. Rev.</source> <volume>102</volume>, <fpage>458</fpage>&#x02013;<lpage>489</lpage>. <pub-id pub-id-type="doi">10.1037/0033-295X.102.3.458</pub-id></citation>
</ref>
<ref id="B26">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>&#x000CD;saksson</surname> <given-names>A.</given-names></name></person-group> (<year>1979</year>). <article-title>Kohlberg&#x00027;s theory of moral development and its relevance to education</article-title>. <source>Scand. J. Educ. Res.</source> <volume>23</volume>, <fpage>47</fpage>&#x02013;<lpage>63</lpage>. <pub-id pub-id-type="doi">10.1080/0031383790230202</pub-id></citation>
</ref>
<ref id="B27">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jacoby</surname> <given-names>L. L.</given-names></name></person-group> (<year>1991</year>). <article-title>A process dissociation framework: separating automatic from intentional uses of memory</article-title>. <source>J. Mem. Lang.</source> <volume>30</volume>, <fpage>513</fpage>&#x02013;<lpage>541</lpage>. <pub-id pub-id-type="doi">10.1016/0749-596X(91)90025-F</pub-id></citation>
</ref>
<ref id="B28">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Jang</surname> <given-names>E.</given-names></name></person-group> (<year>2020</year>). <article-title>Validation of the Korean version of the moral judgment scale: a process dissociation approach to moral dilemmas</article-title>. <source>Heliyon</source> <volume>6</volume>, <fpage>e05518</fpage>. <pub-id pub-id-type="doi">10.1016/j.heliyon.2020.e05518</pub-id><pub-id pub-id-type="pmid">33294671</pub-id></citation></ref>
<ref id="B29">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kahane</surname> <given-names>G.</given-names></name> <name><surname>Everett</surname> <given-names>J. A.</given-names></name> <name><surname>Earp</surname> <given-names>B. D.</given-names></name> <name><surname>Caviola</surname> <given-names>L.</given-names></name> <name><surname>Faber</surname> <given-names>N. S.</given-names></name> <name><surname>Crockett</surname> <given-names>M. J.</given-names></name> <etal/></person-group>. (<year>2018</year>). <article-title>Beyond sacrificial harm: a two-dimensional model of utilitarian psychology</article-title>. <source>Psychol. Rev.</source> <volume>125</volume>, <fpage>131</fpage>&#x02013;<lpage>164</lpage>. <pub-id pub-id-type="doi">10.1037/rev0000093</pub-id><pub-id pub-id-type="pmid">29658728</pub-id></citation></ref>
<ref id="B30">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kimhi</surname> <given-names>S.</given-names></name></person-group> (<year>2014</year>). <article-title>Moral dilemma in the war against terror: Political attitudes and regular versus reserve military service</article-title>. <source>Ethics Behav.</source> <volume>24</volume>, <fpage>1</fpage>&#x02013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1080/10508422.2013.818919</pub-id></citation>
</ref>
<ref id="B31">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Kohlberg</surname> <given-names>L.</given-names></name></person-group> (<year>1984</year>). <source>The Psychology of Moral Development</source>. <publisher-loc>San Francisco, CA</publisher-loc>: <publisher-name>Harper and Row</publisher-name>.</citation>
</ref>
<ref id="B32">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>K&#x000F6;rner</surname> <given-names>A.</given-names></name> <name><surname>Deutsch</surname> <given-names>R.</given-names></name> <name><surname>Gawronski</surname> <given-names>B.</given-names></name></person-group> (<year>2020</year>). <article-title>Using the CNI model to investigate individual differences in moral dilemma judgments</article-title>. <source>Pers. Soc. Psychol. Bull.</source> <volume>46</volume>, <fpage>1392</fpage>&#x02013;<lpage>1407</lpage>. <pub-id pub-id-type="doi">10.1177/0146167220907203</pub-id><pub-id pub-id-type="pmid">32111135</pub-id></citation></ref>
<ref id="B33">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kuylen</surname> <given-names>M. N.</given-names></name> <name><surname>Kim</surname> <given-names>S. Y.</given-names></name> <name><surname>Keene</surname> <given-names>A. R.</given-names></name> <name><surname>Owen</surname> <given-names>G. S.</given-names></name></person-group> (<year>2021</year>). <article-title>Should age matter in COVID-19 triage? A deliberative study</article-title>. <source>J. Med. Ethics</source> <volume>47</volume>, <fpage>291</fpage>&#x02013;<lpage>295</lpage>. <pub-id pub-id-type="doi">10.1136/medethics-2020-107071</pub-id><pub-id pub-id-type="pmid">33687917</pub-id></citation></ref>
<ref id="B34">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Lapsley</surname> <given-names>D. K.</given-names></name></person-group> (<year>1992</year>). <article-title>&#x0201C;Moral psychology after Kohlberg,&#x0201D;</article-title> in <source>Invited Address Given at the Meeting of the Midwestern Psychological Association</source>. <publisher-loc>Chicago, IL</publisher-loc>.<pub-id pub-id-type="pmid">6705622</pub-id></citation></ref>
<ref id="B35">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Liberati</surname> <given-names>A.</given-names></name> <name><surname>Altman</surname> <given-names>D. G.</given-names></name> <name><surname>Tetzlaff</surname> <given-names>J.</given-names></name> <name><surname>Mulrow</surname> <given-names>C.</given-names></name> <name><surname>G&#x000F8;tzsche</surname> <given-names>P. C.</given-names></name> <name><surname>Ioannidis</surname> <given-names>J. P. A.</given-names></name> <etal/></person-group>. (<year>2009</year>). <article-title>The PRISMA statement for reporting systematic reviews and meta-analyses of studies that evaluate health care interventions: explanation and elaboration</article-title>. <source>PLoS Med.</source> <volume>6</volume>, <fpage>e1000100</fpage>. <pub-id pub-id-type="doi">10.1371/journal.pmed.1000100</pub-id><pub-id pub-id-type="pmid">19631507</pub-id></citation></ref>
<ref id="B36">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Lotto</surname> <given-names>L.</given-names></name> <name><surname>Manfrinati</surname> <given-names>A.</given-names></name> <name><surname>Sarlo</surname> <given-names>M.</given-names></name></person-group> (<year>2014</year>). <article-title>A new set of moral dilemmas: norms for moral acceptability, decision times, and emotional salience</article-title>. <source>J. Behav. Decis. Mak.</source> <volume>27</volume>, <fpage>57</fpage>&#x02013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1002/bdm.1782</pub-id></citation>
</ref>
<ref id="B37">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mak</surname> <given-names>K. K. L.</given-names></name> <name><surname>Kleitman</surname> <given-names>S.</given-names></name> <name><surname>Abbott</surname> <given-names>M. J.</given-names></name></person-group> (<year>2019</year>). <article-title>Impostor phenomenon measurement scales: a systematic review</article-title>. <source>Front. Psychol.</source> <volume>10</volume>, <fpage>671</fpage>. <pub-id pub-id-type="doi">10.3389/fpsyg.2019.00671</pub-id><pub-id pub-id-type="pmid">31024375</pub-id></citation></ref>
<ref id="B38">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Malti</surname> <given-names>T.</given-names></name> <name><surname>Krettenauer</surname> <given-names>T.</given-names></name></person-group> (<year>2013</year>). <article-title>The relation of moral emotion attributions to prosocial and antisocial behavior: a meta-analysis</article-title>. <source>Child Dev.</source> <volume>84</volume>, <fpage>397</fpage>&#x02013;<lpage>412</lpage>. <pub-id pub-id-type="doi">10.1111/j.1467-8624.2012.01851.x</pub-id><pub-id pub-id-type="pmid">23005580</pub-id></citation></ref>
<ref id="B39">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Martin</surname> <given-names>R. M.</given-names></name> <name><surname>Shafto</surname> <given-names>M.</given-names></name> <name><surname>Vandeinse</surname> <given-names>W.</given-names></name></person-group> (<year>1977</year>). <article-title>The reliability, validity, and design of the defining issues test</article-title>. <source>Dev. Psychol.</source> <volume>13</volume>, <fpage>460</fpage>&#x02013;<lpage>468</lpage>. <pub-id pub-id-type="doi">10.1037/0012-1649.13.5.460</pub-id></citation>
</ref>
<ref id="B40">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mart&#x000ED;-Vilar</surname> <given-names>M.</given-names></name> <name><surname>Escrig-Espuig</surname> <given-names>J. M.</given-names></name> <name><surname>Merino-Soto</surname> <given-names>C.</given-names></name></person-group> (<year>2021</year>). <article-title>A systematic review of moral reasoning measures</article-title>. <source>Curr. Psychol</source>. <pub-id pub-id-type="doi">10.1007/s12144-021-01519-8</pub-id></citation>
</ref>
<ref id="B41">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mathes</surname> <given-names>E. W.</given-names></name></person-group> (<year>2021</year>). <article-title>An evolutionary perspective on Kohlberg&#x00027;s theory of moral development</article-title>. <source>Curr. Psychol.</source> <volume>40</volume>, <fpage>3908</fpage>&#x02013;<lpage>3921</lpage>. <pub-id pub-id-type="doi">10.1007/s12144-019-00348-0</pub-id><pub-id pub-id-type="pmid">10818625</pub-id></citation></ref>
<ref id="B42">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mayhew</surname> <given-names>M. J.</given-names></name> <name><surname>Pascarella</surname> <given-names>E. T.</given-names></name> <name><surname>Trolian</surname> <given-names>T.</given-names></name> <name><surname>Selznick</surname> <given-names>B.</given-names></name></person-group> (<year>2015</year>). <article-title>Measurements matter: taking the DIT-2 multiple times and college students&#x00027; moral reasoning development</article-title>. <source>Res. High. Educ.</source> <volume>56</volume>, <fpage>378</fpage>&#x02013;<lpage>396</lpage>. <pub-id pub-id-type="doi">10.1007/s11162-014-9348-5</pub-id></citation>
</ref>
<ref id="B43">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Mitchell</surname> <given-names>C. P.</given-names></name></person-group> (<year>2000</year>). <article-title>Investigating the nature and validity of DIT2 using four validity criteria</article-title>. <source>Dissertat. Abstr. Int. Sect. B Sci. Eng.</source> <volume>61</volume>, <fpage>2813</fpage>.</citation>
</ref>
<ref id="B44">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Pizarro</surname> <given-names>D. A.</given-names></name> <name><surname>Tannenbaum</surname> <given-names>D.</given-names></name></person-group> (<year>2012</year>). <article-title>&#x0201C;Bringing character back: How the motivation to evaluate character influences judgments of moral blame,&#x0201D;</article-title> in <source>The Social Psychology of Morality: Exploring the Causes of Good and Evil</source>, eds M. Mikulincer, and P. R. Shaver (<publisher-loc>Washington, DC</publisher-loc>: <publisher-name>American Psychological Association</publisher-name>), <fpage>91</fpage>&#x02013;<lpage>108</lpage>.</citation>
</ref>
<ref id="B45">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Rest</surname> <given-names>J.</given-names></name></person-group> (<year>1974</year>). <source>Manual for the Defining Issues Test: An Objective Test of Moral Judgment Development</source>. <publisher-loc>Minnesota</publisher-loc>, <publisher-name>University of Minnesota</publisher-name>.</citation>
</ref>
<ref id="B46">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Rest</surname> <given-names>J. R.</given-names></name> <name><surname>Narvaez</surname> <given-names>D.</given-names></name> <name><surname>Thoma</surname> <given-names>S. J.</given-names></name> <name><surname>Bebeau</surname> <given-names>M. J.</given-names></name></person-group> (<year>1999</year>). <article-title>DIT2: devising and testing a revised instrument of moral judgment</article-title>. <source>J. Educ. Psychol.</source> <volume>91</volume>, <fpage>644</fpage>&#x02013;<lpage>659</lpage>. <pub-id pub-id-type="doi">10.1037/0022-0663.91.4.644</pub-id></citation>
</ref>
<ref id="B47">
<citation citation-type="web"><person-group person-group-type="author"><name><surname>Richardson</surname> <given-names>H. S.</given-names></name></person-group> (<year>2018</year>). <article-title>Moral Reasoning. Stanford Encyclopedia of Philosophy</article-title>. Retrieved from: <ext-link ext-link-type="uri" xlink:href="https://plato.stanford.edu/entries/reasoning-moral/&#x00023;PhilImpoMoraReas">https://plato.stanford.edu/entries/reasoning-moral/&#x00023;PhilImpoMoraReas</ext-link> (accessed February 24, 2022).</citation>
</ref>
<ref id="B48">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Smetana</surname> <given-names>J. G.</given-names></name></person-group> (<year>2006</year>). <article-title>&#x0201C;Social-cognitive domain theory: consistencies and variations in children&#x00027;s moral and social judgments,&#x0201D;</article-title> in <source>Handbook of Moral Development</source>, eds M. Killen, and J. G. Smetana (<publisher-loc>Mahwah, NJ</publisher-loc>: <publisher-name>Lawrence Erlbaum Associates Publishers</publisher-name>), <fpage>119</fpage>&#x02013;<lpage>153</lpage>.</citation>
</ref>
<ref id="B49">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Terwee</surname> <given-names>C. B.</given-names></name> <name><surname>Bot</surname> <given-names>S. D.</given-names></name> <name><surname>de Boer</surname> <given-names>M. R.</given-names></name> <name><surname>van der Windt</surname> <given-names>D. A.</given-names></name> <name><surname>Knol</surname> <given-names>D. L.</given-names></name> <name><surname>Dekker</surname> <given-names>J.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>Quality criteria were proposed for measurement properties of health status questionnaires</article-title>. <source>J. Clin. Epidemiol.</source> <volume>60</volume>, <fpage>34</fpage>&#x02013;<lpage>42</lpage>. <pub-id pub-id-type="doi">10.1016/j.jclinepi.2006.03.012</pub-id><pub-id pub-id-type="pmid">17161752</pub-id></citation></ref>
<ref id="B50">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Uhlmann</surname> <given-names>E. L.</given-names></name> <name><surname>Pizarro</surname> <given-names>D. A.</given-names></name> <name><surname>Diermeier</surname> <given-names>D.</given-names></name></person-group> (<year>2015</year>). <article-title>A person-centered approach to moral judgment</article-title>. <source>Perspect. Psychol. Sci.</source> <volume>10</volume>, <fpage>72</fpage>&#x02013;<lpage>81</lpage>. <pub-id pub-id-type="doi">10.1177/1745691614556679</pub-id><pub-id pub-id-type="pmid">25910382</pub-id></citation></ref>
<ref id="B51">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Villegas de Posada</surname> <given-names>C.</given-names></name> <name><surname>Vargas-Trujillo</surname> <given-names>E.</given-names></name></person-group> (<year>2015</year>). <article-title>Moral reasoning and personal behavior: a meta-analytical review</article-title>. <source>Rev. Gen. Psychol.</source> <volume>19</volume>, <fpage>408</fpage>&#x02013;<lpage>424</lpage>. <pub-id pub-id-type="doi">10.1037/gpr0000053</pub-id></citation>
</ref>
<ref id="B52">
<citation citation-type="book"><person-group person-group-type="author"><name><surname>Whiting</surname> <given-names>J. W. M.</given-names></name> <name><surname>Child</surname> <given-names>I. L.</given-names></name></person-group> (<year>1953</year>). <source>Child Training and Personality: A Cross-Cultural Study</source>. <publisher-loc>New Haven, CT</publisher-loc>: <publisher-name>Yale University Press</publisher-name>.</citation>
</ref>
<ref id="B53">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yamamoto</surname> <given-names>S.</given-names></name> <name><surname>Maeder</surname> <given-names>E. M.</given-names></name></person-group> (<year>2019</year>). <article-title>Creating the punishment orientation questionnaire: an item response theory approach</article-title>. <source>Pers. Soc. Psychol. Bull.</source> <volume>45</volume>, <fpage>1283</fpage>&#x02013;<lpage>1294</lpage>. <pub-id pub-id-type="doi">10.1177/0146167218818485</pub-id><pub-id pub-id-type="pmid">30632453</pub-id></citation></ref>
</ref-list>
</back>
</article> 