<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="2.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Commun.</journal-id>
<journal-title>Frontiers in Communication</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Commun.</abbrev-journal-title>
<issn pub-type="epub">2297-900X</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/fcomm.2024.1266407</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Communication</subject>
<subj-group>
<subject>Original Research</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Revered and reviled: a sentiment analysis of female and male referents in three languages</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Levshina</surname> <given-names>Natalia</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/1174920/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/formal-analysis/"/>
<role content-type="https://credit.niso.org/contributor-roles/investigation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/visualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Koptjevskaja-Tamm</surname> <given-names>Maria</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x002A;</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/2388596/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/conceptualization/"/>
<role content-type="https://credit.niso.org/contributor-roles/project-administration/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
<contrib contrib-type="author">
<name><surname>&#x00D6;stling</surname> <given-names>Robert</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<uri xlink:href="https://loop.frontiersin.org/people/446059/overview"/>
<role content-type="https://credit.niso.org/contributor-roles/data-curation/"/>
<role content-type="https://credit.niso.org/contributor-roles/methodology/"/>
<role content-type="https://credit.niso.org/contributor-roles/resources/"/>
<role content-type="https://credit.niso.org/contributor-roles/software/"/>
<role content-type="https://credit.niso.org/contributor-roles/validation/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-original-draft/"/>
<role content-type="https://credit.niso.org/contributor-roles/writing-review-editing/"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>Neurobiology of Language Department, Max Planck Institute for Psycholinguistics</institution>, <addr-line>Nijmegen</addr-line>, <country>Netherlands</country></aff>
<aff id="aff2"><sup>2</sup><institution>Centre for Language Studies, Radboud University</institution>, <addr-line>Nijmegen</addr-line>, <country>Netherlands</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Linguistics, Stockholm University</institution>, <addr-line>Stockholm</addr-line>, <country>Sweden</country></aff>
<author-notes>
<fn fn-type="edited-by" id="fn0004">
<p>Edited by: Gerd Carling, Goethe University Frankfurt, Germany</p>
</fn>
<fn fn-type="edited-by" id="fn0005">
<p>Reviewed by: Gisela Redeker, University of Groningen, Netherlands</p>
<p>Kate Bellamy, Leiden University, Netherlands</p>
</fn>
<corresp id="c001">&#x002A;Correspondence: Maria Koptjevskaja-Tamm, <email>tamm@ling.su.se</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>28</day>
<month>03</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>9</volume>
<elocation-id>1266407</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>07</month>
<year>2023</year>
</date>
<date date-type="accepted">
<day>04</day>
<month>03</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x00A9; 2024 Levshina, Koptjevskaja-Tamm and &#x00D6;stling.</copyright-statement>
<copyright-year>2024</copyright-year>
<copyright-holder>Levshina, Koptjevskaja-Tamm and &#x00D6;stling</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p>
</license>
</permissions>
<abstract>
<p>Our study contributes to the less explored domain of lexical typology, focusing on semantic prosody and connotation. Semantic derogation, or pejoration of nouns referring to women, whereby such words acquire connotations and further denotations of social pejoration, immorality and/or loose sexuality, has been a very prominent question in studies on gender and language (change). It has been argued that pejoration emerges due to the general derogatory attitudes toward female referents. However, the evidence for systematic differences in connotations of female- vs. male-related words is fragmentary and often fairly impressionistic; moreover, many researchers argue that expressed sentiments toward women (as well as men) often are ambivalent. One should also expect gender differences in connotations to have decreased in the recent years, thanks to the advances of feminism and social progress. We test these ideas in a study of positive and negative connotations of feminine and masculine term pairs such as woman - man, girl - boy, wife - husband, etc. Sentences containing these words were sampled from diachronic corpora of English, Chinese and Russian, and sentiment scores for every word were obtained using two systems for Aspect-Based Sentiment Analysis: PyABSA, and OpenAI&#x2019;s large language model GPT-3.5. The Generalized Linear Mixed Models of our data provide no indications of significantly more negative sentiment toward female referents in comparison with their male counterparts. However, some of the models suggest that female referents are more infrequently associated with neutral sentiment than male ones. Neither do our data support the hypothesis of the diachronic convergence between the genders. In sum, results suggest that pejoration is unlikely to be explained simply by negative attitudes to female referents in general.</p>
</abstract>
<kwd-group>
<kwd>semantic derogation</kwd>
<kwd>pejoration</kwd>
<kwd>sentiment analysis</kwd>
<kwd>diachronic corpora</kwd>
<kwd>semantic change</kwd>
<kwd>semantic prosody</kwd>
<kwd>gender stereotypes</kwd>
<kwd>prejudice</kwd>
</kwd-group>
<counts>
<fig-count count="14"/>
<table-count count="15"/>
<equation-count count="0"/>
<ref-count count="52"/>
<page-count count="21"/>
<word-count count="13038"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Language Communication</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="sec1">
<label>1</label>
<title>Introduction</title>
<p>&#x201C;Semantic derogation&#x201D;, or pejoration of nouns applying to women, whereby such words acquire connotations and further denotations of social pejoration, immorality and/or loose sexuality, has been &#x201C;[p]erhaps the most prominent diachronic question&#x201D; (<xref ref-type="bibr" rid="ref9001">Salmons, 1990</xref>) in studies on gender and language (change). This process has been argued to be seldom observable in the corresponding male-related words, with examples such as <italic>lord</italic> vs. <italic>lady</italic>, <italic>bachelor</italic> vs. <italic>spinster</italic>, <italic>mister</italic> vs. <italic>mistress</italic>, etc. (<xref ref-type="bibr" rid="ref9002">Schulz, 1975</xref>; <xref ref-type="bibr" rid="ref9003">Bebout, 1984</xref>; <xref ref-type="bibr" rid="ref9004">Kleparski, 1997</xref>; <xref ref-type="bibr" rid="ref9005">Kim, 2008</xref>). In her seminal article <xref ref-type="bibr" rid="ref9002">Schulz (1975)</xref> considers numerous English terms referring to women that have undergone various kinds of semantic derogation, or pejoration, and points out that pejoration is especially prominent in certain semantic groups (e.g., female kinship terms, terms used as endearments, words for girls and young women). This is all the more striking given that their male equivalents have on the whole escaped pejoration. Notably, as pointed out in <xref ref-type="bibr" rid="ref3">Borkowska and Kleparski (2007)</xref>, even though examples of male words hit by pejoration are attested, as, e.g., Old English <italic>cnafa</italic> &#x2018;boy&#x2019; developing into the by now archaic <italic>knave</italic> &#x2018;a dishonest or unscrupulous man&#x2019;, words for males demonstrate many more examples of the opposite, ameliorative developments, such as <italic>page</italic>, originally meaning &#x2018;boy, lad&#x2019;, at some point acquiring the meaning of &#x2018;a youth employed as the personal attendant of a person of rank&#x2019;. For female words, ameliorative developments are reported much less often. This recurrent semantic derogation has been seen as a powerful factor behind repeated lexical replacement of words referring to (young) women (<xref ref-type="bibr" rid="ref9006">Grzega, 2004</xref>, pp. 32&#x2013;33), e.g., also Fr. <italic>gar&#x00E7;on</italic> &#x2018;boy&#x2019; vs. <italic>garce</italic> &#x2018;bitch&#x2019; (earlier &#x2018;girl&#x2019;), <italic>fille</italic> &#x2018;girl&#x2019;.</p>
<p>But why should it be so? Scholars working on semantic change basically agree that the roots of pejoration are to be sought in attitudes toward the referent (<xref ref-type="bibr" rid="ref39">Stern, 1931</xref>; <xref ref-type="bibr" rid="ref42">Ullmann, 1957</xref>; <xref ref-type="bibr" rid="ref3">Borkowska and Kleparski, 2007</xref>). To quote <xref ref-type="bibr" rid="ref9002">Schulz (1975)</xref>, p. 64, &#x201C;a language reflects the thought, attitudes, and culture of the people who make it and use it. A rich vocabulary on a given subject reveals an area of concern of the society whose language is being studied. The choice between positive and negative terms for any given concept (as, for example, in the choice between <italic>freedom fighter</italic> and <italic>terrorist</italic>) reveals the presence or absence of prejudicial feelings toward the subject&#x201D;. <xref ref-type="bibr" rid="ref9002">Schulz (1975)</xref>, p. 71 analyses her findings in light of the three different origins for pejoration as suggested by <xref ref-type="bibr" rid="ref42">Ullmann (1957)</xref>, pp. 231&#x2013;32 &#x2013; association with a contaminating concept, euphemism, and prejudice &#x2013; and finds evidence for all the three. According to her, men tend to think of women in sexual terms, and, by association, this results in the male speakers attributing sexual suggestiveness to any female term. Euphemism underlies many terms for prostitutes. However, the major factor behind semantic derogation of words for women is, in Schulz&#x2019;s view, prejudice, which has two main ingredients &#x2013; denigration and gross generalization. <xref ref-type="bibr" rid="ref9002">Schulz (1975)</xref>, p. 73 concludes, that the semantic change &#x201C;by which terms designating women routinely undergo pejoration, both reflects and perpetuates derogatory attitudes toward women. They should be abjured&#x201D;.</p>
<p>Schulz&#x2019;s conclusion about the prevalence of derogatory attitudes toward females in the society, to a large extent following the male norms, does not sound unfounded, given the accumulated and constantly growing knowledge of women&#x2019;s discrimination all over the world. However, as pointed out by <xref ref-type="bibr" rid="ref20">Glick and Fiske (1996)</xref>, p. 491, <xref ref-type="bibr" rid="ref9007">Allport&#x2019;s (1954)</xref>, p. 9 classical definition of prejudice as &#x201C;an antipathy based upon a faulty and inflexible generalization&#x201D; and primarily used for ethnic prejudice, is difficult to apply to the relations between women and men. First of all, no other two groups have been so interrelated as males and females. Moreover, while prejudice as antipathy is commonly indexed by such measures as negative stereotypes, &#x201C;cultural images of women from ancient to modern times are not uniformly negative; women have been revered as well as reviled&#x201D; (<xref ref-type="bibr" rid="ref20">Glick and Fiske, 1996</xref>, p. 491; see <xref ref-type="bibr" rid="ref36">Potts and Weare, 2018</xref> for a telling modern example of the ambivalence in the representation of women who kill as degraded victims or dehumanized monsters in English Crown Court sentencing remarks). The authors argue that sexism has always been marked by a deep ambivalence, in which the subjectively positive feelings toward women are closely associated with antipathy. They suggest further to distinguish between hostile sexism and benevolent sexism, the latter encompassing a set of attitudes based on viewing women stereotypically and in restricted roles, in which they trigger subjectively positive feelings and elicit pro-social behavior.</p>
<p>There has been massive research on prejudice (e.g., <xref ref-type="bibr" rid="ref11">Dovidio et al., 2005</xref>; <xref ref-type="bibr" rid="ref25">Jackson, 2011</xref>), including gender prejudice, and gender stereotypes in social psychology. In a nutshell, prejudices constitute the affective component of intergroup bias, whereas stereotypes account for its cognitive component and denote general beliefs about the characteristics of particular groups, e.g., different genders or sexes. Stereotypical beliefs may thus concern the general appropriateness of various roles and activities for men and women (gender/sex roles), or psychological or behavioral characteristics that are believed to characterize one of the genders/sexes with much greater frequency than the other(s) (gender/sex traits) (<xref ref-type="bibr" rid="ref44">Williams and Best, 1990</xref>, pp. 16&#x2013;17). Given that it is beyond the scope of this paper to give justice to the accumulated knowledge in the field, we have chosen here to mention the research that we find particularly relevant to our study.</p>
<p><xref ref-type="bibr" rid="ref44">Williams and Best (1990)</xref> utilizes the 300-item Adjective Check List, which is normally employed in self-descriptive personality assessment procedures (<xref ref-type="bibr" rid="ref21">Gough and Heilbrun (1965)</xref> for measuring sex-trait stereotypes by applying a relative judgment method. Males and females from 30 countries were asked to consider each of the 300 items on the list (translated into the relevant language) and assess whether it is equally applicable to both women and men or more frequently associated with either women or men. The resulting scores were used for defining the so-called &#x201C;focused stereotypes&#x201D; using a standard degree of association criterion: &#x201C;items were included in the stereotype set for a particular sex if they were associated with that sex at least twice as often as with the other sex&#x201D; (<xref ref-type="bibr" rid="ref44">Williams and Best, 1990</xref>, p. 59). The leading male-associated items across the countries included &#x201C;adventurous&#x201D;, &#x201C;dominant&#x201D;, &#x201C;forceful&#x201D;, &#x201C;independent&#x201D;, &#x201C;masculine&#x201D; and &#x201C;strong&#x201D; (and, slightly less frequent, &#x201C;aggressive&#x201D;, &#x201C;autocratic&#x201D;, &#x201C;daring&#x201D;, &#x201C;enterprising&#x201D;, &#x201C;robust&#x201D; and &#x201C;stern&#x201D;), while the leading recurrent female-associated items included &#x201C;sentimental&#x201D;, &#x201C;submissive&#x201D; and &#x201C;superstitious&#x201D;(and, slightly less frequent, &#x201C;affectionate&#x201D;, &#x201C;dreamy&#x201D;, &#x201C;feminine&#x201D; and &#x201C;sensitive&#x201D;) (<xref ref-type="bibr" rid="ref44">Williams and Best, 1990</xref>, pp. 75&#x2013;76).</p>
<p>The resulting stereotypes have been analyzed from different perspectives, of which the most relevant here concerns the affective or connotative meanings associated with them, much in the tradition of <xref ref-type="bibr" rid="ref32">Osgood et al.&#x2019;s (1975)</xref> Affective Meaning Theory. Each of the adjectives on the list was scaled along the dimensions of favorability (good vs. bad), strength (strong vs. weak) and activity (active vs. passive), resulting in the mean affective meaning score for each of the focused female and male stereotypes. Interestingly, while the male stereotypes in all countries were stronger and more active, there was no consistency across countries in the evaluation of favorability: in some countries (primarily in Peru, Italy and France), the female stereotype was evaluated more favorably than the male one, whereas others (primarily Nigeria, Japan and South Africa) showed the opposite trend (<xref ref-type="bibr" rid="ref44">Williams and Best, 1990</xref>, pp. 97&#x2013;99).</p>
<p><xref ref-type="bibr" rid="ref44">Williams and Best&#x2019;s (1990)</xref> finding that the stereotyping evaluation of females vs. males is not easily captured by the good &#x2013; bad dimension is much in line with both the Ambivalent Sexism idea in <xref ref-type="bibr" rid="ref20">Glick and Fiske (1996)</xref> and with the more general and highly influential Stereotype Content Model, related to it (<xref ref-type="bibr" rid="ref16">Fiske et al., 2002</xref>; <xref ref-type="bibr" rid="ref15">Fiske, 2018</xref>). The latter claims that stereotypes are captured by two dimensions, according to which people tend to categorize others (and themselves) on the basis of interpersonal and intergroup interactions &#x2013; warmth (trustworthiness, sociability) and competence (capability, agentivity). Moreover, stereotypes can be subjectively positive on one dimension, but negative (unflattering) on the other.</p>
<p>The availability of big digital corpora and development of corpus linguistic methods for extracting information from them has enabled large-scale research on collective representations of men and women, where the term &#x201C;collective representation&#x201D;, introduced by <xref ref-type="bibr" rid="ref12">Durkheim (1989/1953)</xref> and further developed, among others, by <xref ref-type="bibr" rid="ref9008">Moscovici (1988)</xref>, &#x201C;refers[s] to societal-level systems of meaning that pervade everyday social life&#x201D; (<xref ref-type="bibr" rid="ref6">Charlesworth et al., 2021</xref>, p. 218). As repeatedly argued (or at least assumed), &#x201C;[t]he spoken and written language of a society affords a unique way to measure the magnitude and prevalence of these widely shared collective representations&#x201D; (<xref ref-type="bibr" rid="ref6">Charlesworth et al., 2021</xref>, p. 218), also because it may provide access to implicit, hidden attitudes, much less visible in studies based on participants&#x2019; reports, where the participants tend to reply in a socially desirable manner and engage in self-deception (<xref ref-type="bibr" rid="ref31">Nosek et al., 2007</xref>; <xref ref-type="bibr" rid="ref9">DeFranza et al., 2020</xref>, p. 9; <xref ref-type="bibr" rid="ref6">Charlesworth et al., 2021</xref>). By studying language in use, researchers ask the questions of how often and in which ways females and males are spoken / written about in different contexts, primarily in texts of different genres and produced during different time periods. A particularly useful method for approaching these issues builds on a comparison of collocations, i.e., words and expressions that frequently occur in close proximity, for pairs of gendered nouns. There is a bulk of studies along these lines, predominantly on English (but see <xref ref-type="bibr" rid="ref46">Zasina, 2019</xref> on Czech), comparing the number of occurrences and collocations for such pairs as &#x201C;woman&#x201D; vs. man&#x201D; (<xref ref-type="bibr" rid="ref35">Pearce, 2008</xref>; <xref ref-type="bibr" rid="ref5">Caldas-Coulthard and Moon, 2010</xref>), &#x201C;boy&#x201D; and &#x201C;girl&#x201D; (<xref ref-type="bibr" rid="ref28">Macalister, 2011</xref>, <xref ref-type="bibr" rid="ref41">Taylor, 2013</xref>, <xref ref-type="bibr" rid="ref30">Norberg, 2016</xref>), &#x201C;bachelor&#x201D; vs. spinster&#x201D; (<xref ref-type="bibr" rid="ref38">Romaine, 2000</xref>, pp. 108&#x2013;109), the two pairs &#x201C;woman&#x201D; vs. man&#x201D; and &#x201C;girl&#x201D; vs. boy&#x201D; together (<xref ref-type="bibr" rid="ref38">Romaine, 2000</xref>, p. 110; <xref ref-type="bibr" rid="ref5">Caldas-Coulthard and Moon, 2010</xref>), or, more generally, for expressions referring to females vs. males (<xref ref-type="bibr" rid="ref22">Herda&#x011F;delen and Baroni, 2011</xref>; <italic>cf.</italic> <xref ref-type="bibr" rid="ref2">Baker, 2014</xref>: Chapter 6 for a useful overview).</p>
<p>All these studies unveil significant gender biases in the representation of females and males, with interesting differences among the genres and time periods. To give an example, <xref ref-type="bibr" rid="ref35">Pearce (2008)</xref> analyses collocations of &#x201C;man&#x201D; and &#x201C;woman&#x201D; with modifying adjectives and verbs that have them as their subject or object in British National Corpus (BNC) across five different domains, commonly reflecting persistent gender differences in the representation of males and females &#x2013; power and deviance, social categories, personality and mental capacity (the &#x201C;Big Fives&#x201D; of human personality), appearance, and sexuality. For instance, women are more often characterized by adjectives signaling marital/reproductive status (<italic>childless, married, separated</italic>) and sexual orientation (<italic>heterosexual</italic>) and are saliently or exclusively modified by adjectives of nationality (<italic>American, Bangladeshi</italic>), ethnicity (<italic>African-American, Asian, gipsy</italic>), and class (<italic>high-caste, lower-class</italic>) (<xref ref-type="bibr" rid="ref35">Pearce, 2008</xref>, p. 12). Men are strongly associated with attributive adjectives referring to physical strength, prowess, physical size, weight and bulk, while the corresponding adjectives for women show a more limited range of bodily types and shapes, with some referring to weight and size (pear-shaped, slender) and others to breasts (big-bosomed, large-breasted). Men&#x2019;s facial appearance and expression is likewise more variously described than women&#x2019;s (<xref ref-type="bibr" rid="ref35">Pearce, 2008</xref>, p. 17). Some of the differences within the domain of personality and mental capacities include the stronger association of such words as <italic>brilliant, clever, gifted</italic> and <italic>wise</italic> with men, while adjectives with negative associations in the domain of sexuality are more strongly associated with women (<italic>fallen, promiscuous, frigid, butch</italic>). Pearce concludes that the collocates of &#x201C;man&#x201D; and &#x201C;woman&#x201D; in the BNC seem often to represent gender in stereotypical ways, but also points out a number of important caveats stemming from the composition of the corpus and the limitations of the analytic tools.</p>
<p>The recent years have seen studies using more advanced Natural Language Processing techniques, such as word embedding (<xref ref-type="bibr" rid="ref17">Garg et al., 2018</xref>; <xref ref-type="bibr" rid="ref9">DeFranza et al., 2020</xref>; <xref ref-type="bibr" rid="ref6">Charlesworth et al., 2021</xref>) &#x2013; a machine-learning technique that captures the meaning of words by the context in which they occur. These studies use impressively big corpora representing different kinds of media, covering relatively long time periods and therefore allowing researchers to study trends in gender bias in society.</p>
<p>Noteworthy, while the unequal representation of the gender <italic>per se</italic> is either explicitly acknowledged or at least assumed to be reprehensible in all research on gender in corpora, very few studies approach the issue of the overall sentiment of the language used to describe the different genders. <xref ref-type="bibr" rid="ref38">Romaine (2000)</xref>, pp. 109-110 claims that &#x201C;words with negative overtones are still more frequently used together with <italic>girl/woman</italic> than with <italic>man/boy</italic>&#x201D;, supporting the claim with the frequencies of occurrences in the 3 mln (sub) corpus of BNC for such adjectives as <italic>hysterical</italic>, <italic>silly</italic>, <italic>loose</italic> and <italic>ugly</italic> vs. <italic>honest</italic> and <italic>intelligent</italic>. But this is a bit of cherry-picking: in <xref ref-type="bibr" rid="ref35">Pearce&#x2019;s (2008)</xref> study <italic>attractive</italic>, <italic>beautiful</italic>, <italic>glad</italic> are used predominantly about women, while <italic>ignorant</italic>, <italic>cruel</italic> and <italic>mad</italic> are more frequently applied to men. In other words, it is <italic>a priori</italic> unclear whether gender biases in the representation of females vs. males will go hand in hand with the overall prevalence of negative collocations or other overt linguistic markers of negativity in their descriptions.</p>
<p>We have found two recent studies aiming at quantifying the degree to which the language used to describe females and males differs in being more positive or negative. <xref ref-type="bibr" rid="ref9">DeFranza et al. (2020)</xref> use word embedding to test whether the male vs. female members of 218 gendered noun and pronoun pairs differed in their overall semantic similarities to 25 positively vs. 25 negatively valenced words in Wikipedia and in the Common Crawl corpus (containing snapshots of all the texts available to the general public on the Internet since 2013) in 45 languages. It turns out that a substantial portion of the corpora (Wikipedia in 21 languages and the Common Crawl corpus in 19 languages) manifest a higher degree of association of male words with positively valenced words.</p>
<p><xref ref-type="bibr" rid="ref24">Hoyle et al. (2019)</xref> have used a list of 22 gendered noun pairs and the pronouns <italic>he</italic> and <italic>she</italic> for pulling out collocations in a huge corpus (11 bln words) by means of a generative latent-variable model that jointly represents adjective or voice choice with its sentiment. While there are great differences in the exact semantics and semantic class of the positive and negative adjectives and verbs applying to females versus males, there is only one significant difference in the overall sentiment of these combinations: adjectives applying to men are more often neutral than those applying to women.</p>
<p>To summarize, previous research shows several main opinions. While some researchers highlight predominantly derogatory attitudes toward females in language and society, others find ambivalence; according to the latter, the stereotypical representations of both men and women include positive and negative features. Finally, some studies show that words representing males more often occur in neutral contexts in comparison with words representing females. In our study, we want to investigate which of the opinions is best supported by usage data. Another important question is whether the situation has changed over recent decades or not. Thanks to the effort of feminists and movements like #Metoo, many countries have witnessed progress in the political, economical and cultural role of women in the society. If there was indeed a bias for negative or less neutral sentiment toward women, it may have become weaker recently.</p>
<p>While most of the previous quantitative work has been on English and has used synchronic data, our study takes a cross-linguistic and diachronic approach. We use state-of-the-art NLP methods &#x2013; in particular, Aspect-Based Sentiment Analysis &#x2013; to look for evidence of derogation or non-neutral status of nouns referring to women in language use. More specifically, we want to find if there are differences between pairs of nouns denoting male and female humans, in terms of their sentiment &#x2013; positive, negative or neutral, and, in case there are gender differences, whether they have decreased with time. For this purpose, we use large diachronic corpora of Chinese, English and Russian. The choice of languages is motivated by their diversity: these languages represent two language families (Chinese: Sino-Tibetan, English and Russian: Indo-European) and are typologically very different &#x2013; from the isolating Chinese to the analytic English and finally to the synthetic Russian. They are also typologically different in their relation to grammatical gender: Russian has a three-gender distinction into masculine, feminine and neuter in its nouns and pronouns and an obligatory gender agreement in adjectives and several other groups of words (including verbs in their past forms); English has an obligatory three-gender distinction in personal pronouns, with <italic>he</italic> and <italic>she</italic> restricted to animate referents, while Chinese lacks any obligatory gender distinctions, but can optionally distinguish between &#x201C;he&#x201D;, &#x201C;she&#x201D; and &#x201C;it&#x201D; in writing (see also 2.1). This is relevant for the ongoing debate on whether &#x201C;gendered&#x201D; languages, i.e., languages with a differentiation between masculine and feminine genders, display more gender prejudice than genderless languages (<italic>cf.</italic> <xref ref-type="bibr" rid="ref9">DeFranza et al., 2020</xref>).</p>
<p>The second consideration for the choice of Chinese, English and Russian was availability of large diachronic corpora. Originally we wanted to study fiction in different languages. Fiction has important properties that make it attractive for a diachronic analysis of sentiment. First, it often contains fragments resembling everyday language use. Second, it is possible to obtain data from different historical periods and perform a diachronic analysis. But most importantly, fiction authors tend to express emotions and feelings of their characters toward other people. However, finding diachronic corpus data turned out to be more problematic than we had expected. For English and Russian, which we originally started with, we have found data representing fiction covering the time from 1950 to 2019. While the English and Russian data are comparable, we were not able to find a completely matching corpus of Chinese. The time span of the Chinese data is smaller &#x2013; only 20&#x2009;years, from 1991 to 2010. Unfortunately, it was difficult to access diachronic fiction data in Chinese, so we had to use data from newspapers. While this difference between the Russian and English data, on the one hand, and the Chinese data, on the other, is a limitation of our study, the similarity of the results based on the different sources is striking (<italic>cf.</italic> also fn. 3).</p>
<p>Our approach differs from the large-scale diachronic studies of biases and stereotypes, which employ word embeddings to compute average distributional vectors that represent social constructs of interest, such as gender or race (e.g., <xref ref-type="bibr" rid="ref17">Garg et al., 2018</xref>; <xref ref-type="bibr" rid="ref29">Morehouse et al., 2023</xref>). The results of such studies are often difficult to interpret because the dimensions of the embeddings are a &#x201C;black box&#x201D; without inherent meaning. In our study, we investigate the lexical categories representing female and male categories directly, obtaining their sentiment values in every context of use.</p>
<p>The remaining part of our paper is organized as follows. Section 2 discusses the corpora, the process of data extraction, the computational and statistical methods, and a few methodological caveats that may influence the interpretation of our data. In Section 3, we report the results of our analyses. Finally, Section 4 concludes the paper, discussing the main findings and providing a perspective.</p>
</sec>
<sec sec-type="materials|methods" id="sec2">
<label>2</label>
<title>Materials and methods</title>
<sec id="sec3">
<label>2.1</label>
<title>Materials</title>
<p><xref ref-type="table" rid="tab1">Table 1</xref> displays the words we analyzed in this study. We focused on nouns because pronouns would not be directly comparable across the languages. In English, the pronouns <italic>he</italic> and <italic>she</italic> are used only for animate referents (with a few exceptions). Spoken Chinese has no gender distinctions in the 3<sup>rd</sup> person singular, although one can differentiate between the equivalents of &#x201C;he&#x201D;, &#x201C;she&#x201D; and &#x201C;it&#x201D; in writing. In Russian, both animate and inanimate referents can be anaphorically referred to with gendered pronouns <italic>on</italic> &#x2018;he&#x2019;, <italic>ona</italic> &#x2018;she&#x2019; and <italic>ono</italic> &#x2018;it&#x2019; depending on the lexical gender.</p>
<table-wrap position="float" id="tab1">
<label>Table 1</label>
<caption>
<p>The words analyzed in the study.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Concept pair</th>
<th align="left" valign="top">English</th>
<th align="left" valign="top">Chinese</th>
<th align="left" valign="top">Russian</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">ADULT</td>
<td align="left" valign="top">F: woman<break/>M: man</td>
<td align="left" valign="top">F: &#x5973;&#x4EBA;<break/>M: &#x7537;&#x4EBA;</td>
<td align="left" valign="top">F: &#x017E;en&#x0161;&#x010D;ina<break/>M: mu&#x017E;&#x010D;ina</td>
</tr>
<tr>
<td align="left" valign="top">NOT ADULT</td>
<td align="left" valign="top">F: girl<break/>M: boy</td>
<td align="left" valign="top">F: &#x5973;&#x5B69;(&#x5B50;/&#x5152;/&#x2212;)<break/>M: &#x7537;&#x5B69;(&#x5B50;/&#x5152;/&#x2212;)</td>
<td align="left" valign="top">F: devo&#x010D;ka<break/>M: mal&#x2019;&#x010D;ik</td>
</tr>
<tr>
<td align="left" valign="top">PARENT</td>
<td align="left" valign="top">F: mother<break/>M: father</td>
<td align="left" valign="top">F: &#x6BCD;&#x89AA;/&#x5ABD;&#x5ABD;<break/>M: &#x7236;&#x89AA;/&#x7238;&#x7238;</td>
<td align="left" valign="top">F: mat&#x2019;<break/>M: otec</td>
</tr>
<tr>
<td align="left" valign="top">CHILD</td>
<td align="left" valign="top">F: daughter<break/>M: son</td>
<td align="left" valign="top">F: &#x5973;&#x5152;<break/>M: &#x5152;&#x5B50;</td>
<td align="left" valign="top">F: do&#x010D;<break/>M: syn</td>
</tr>
<tr>
<td align="left" valign="top">SIBLING</td>
<td align="left" valign="top">F: sister<break/>M: brother</td>
<td align="left" valign="top">F: &#x59D0;&#x59D0;/&#x59B9;&#x59B9;/&#x59D0;&#x59B9;<break/>M: &#x54E5;&#x54E5;/&#x5F1F;&#x5F1F;/&#x5144;&#x5F1F;</td>
<td align="left" valign="top">F: sestra<break/>M: brat</td>
</tr>
<tr>
<td align="left" valign="top">SPOUSE</td>
<td align="left" valign="top">F: wife<break/>M: husband</td>
<td align="left" valign="top">F: &#x59BB;&#x5B50;<break/>M: &#x4E08;&#x592B;</td>
<td align="left" valign="top">F: &#x017E;ena<break/>M: mu&#x017E;</td>
</tr>
<tr>
<td align="left" valign="top">PARENT-IN-LAW</td>
<td align="left" valign="top">F: mother-in-law<break/>M: father-in-law</td>
<td align="left" valign="top">F: &#x5A46;&#x5A46;/&#x5CB3;&#x6BCD;<break/>M: &#x526C;&#x526C;/&#x5CB3;&#x7236;</td>
<td align="left" valign="top">F: t&#x00EB;&#x0161;&#x010D;a<break/>M: test&#x2019;</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The choice of the nouns was motivated by the following reasons. First, these lexical categories exist in all three languages and can be easily found in corpora (although the semantic extensions may differ). Second, these nouns indicate the gender of the referent in all three languages (with a small number of exceptions discussed in Section 2.4). Finally, they occur frequently enough to allow for a comparison of their sentiment values and tracing their changes over time.</p>
<p>We extracted examples of these words in context from large corpora. The data and extraction procedure are described below.</p>
<p>To collect English data, we used the Corpus of Historical American English (COHA) (<xref ref-type="bibr" rid="ref7">Davies, 2010</xref>). We extracted contexts containing the word forms of interest from the fiction component of the corpus covering years from 1950 to 2019. For data extraction, we used the online version of the corpus at <ext-link xlink:href="https://www.english-corpora.org/coha/" ext-link-type="uri">https://www.english-corpora.org/coha/</ext-link>. We downloaded a random sample of 500 sentences per decade with the nouns in the singular or plural form, with the part-of-speech tag NOUN. If the form was infrequent, we included all available examples. The total number of examples was 75,736 sentences. We also saved information about the book in which every example appeared.</p>
<p>To find Chinese sentences, we used a local copy of the Chinese Gigaword Corpus Fifth Edition, Xinhua (XIN) and CNA sections. The data represented news from 1991 to 2010. We used a script to extract the examples and metadata. We took all sentences we could find in the corpus that contained the words representing &#x201C;mother-in-law&#x201D; and &#x201C;father-in-law&#x201D;, which were relatively infrequent. We sampled 7,000 observations representing each of the other lexical categories, which were more frequent. The total sample size was 91,095 sentences from the news agencies CNA and Xinhua.</p>
<p>As for Russian, we extracted sentences from the Russian National Corpus (RNC, ruscorpora.ru), using the online interface. We pre-selected the subcorpus of fiction (&#x201C;xudo&#x017E;estvennaja literatura&#x201D;) from 1950 to 2019. We searched separately for the lemmas in the RNC in texts written by female and male authors, and with grammatical features &#x201C;Singular&#x201D; and &#x201C;Plural&#x201D;. We had to perform a manual check of ambiguous word forms because not all forms were disambiguated in the corpus. For example, the form <italic>teste</italic> is the singular locative case of the words &#x2018;father-in-law&#x2019; and &#x2018;dough&#x2019;. The irrelevant forms were excluded, as well as ungrammatical and archaic forms, e.g., <italic>ot&#x010D;e</italic> &#x2018;father&#x2019; in the vocative case. We downloaded all available examples. Because the total size of the data was very large, we sampled 7,000 sentences for each of the concepts, with the exception of the words representing &#x201C;mother-in-law&#x201D; and &#x201C;father-in-law&#x201D;, which were infrequent in the corpus compared to the other nouns and of which we took all examples. The total number of sentences in the final dataset was 86,020.</p>
</sec>
<sec id="sec4">
<label>2.2</label>
<title>Methods</title>
<sec id="sec5">
<label>2.2.1</label>
<title>Aspect-based sentiment analysis</title>
<p>In order to obtain sentiment polarity values for the instances of the female and male terms in the corpora, we employed state-of-the-art Aspect-Based Sentiment Analysis (ABSA). It is a subtype of sentiment analysis and opinion mining, which allows companies to analyze customer opinions and sentiments expressed in reviews of products and services and helps improve marketing campaigns. Traditional sentiment analysis provides sentiment polarity values (usually positive, negative or neutral) to entire sentences or texts. In contrast, Aspect-Based Sentiment Analysis can deal with situations when a sentence expresses different sentiments to different entities. For example, in the sentence <italic>I love the pizza at this restaurant, but the service is terrible</italic>, there are different sentiments toward <italic>pizza</italic> and <italic>service</italic>: positive and negative, respectively. The words <italic>pizza</italic> and <italic>service</italic> are called aspect terms (for more detail, see <xref ref-type="bibr" rid="ref47">Zhang et al., 2022</xref>).</p>
<p>We obtained ABSA polarity values using two approaches. One of them was using a multilingual model from the PyABSA toolkit (<xref ref-type="bibr" rid="ref45">Yang and Li, 2023</xref>).<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref> For illustration, consider several examples from the COHA. The aspect term is underlined.</p>
<list list-type="order">
<list-item>
<p>a. The <underline>woman</underline> extended her hand. [NEUTRAL].</p>
<p>b. I cannot think of anything more exciting than drinking champagne in a pretty <underline>woman&#x2019;s</underline> bedroom. [POSITIVE].</p>
<p>c. She looked socially prominent, but the type of society <underline>woman</underline> that could be easily induced to lend her name and face to a cold-cream advertisement. [NEGATIVE].</p>
</list-item>
<list-item>
<p>a. The <underline>man</underline> opened his eyes. [NEUTRAL].</p>
<p>b. He was a <underline>man</underline> in a million. [POSITIVE].</p>
<p>c. Wherever that <underline>man</underline> goes, there is trouble, he said. [NEGATIVE].</p>
</list-item>
</list>
<p>The model was trained on different datasets, which contain mostly customer reviews of different products and restaurants. This represents a limitation. There is a danger that human beings will be evaluated based on the same criteria as laptops or shampoos, or at best as service personnel. This is why we also used a second method, employing GPT-3.5-turbo (<xref ref-type="bibr" rid="ref4">Brown et al., 2020</xref>; <xref ref-type="bibr" rid="ref33">Ouyang et al., 2022</xref>) from OpenAI. GPT models have been used previously for ABSA and related tasks (<xref ref-type="bibr" rid="ref23">Hosseini-Asl et al., 2022</xref>), but to the best of our knowledge not involving human referents. Because of the costs involved in processing of large numbers of tokens, we only annotated a part of the datasets, drawing random samples of 400 examples of every lexeme. For English and Russian, the examples of the singular and plural forms were sampled separately, for a total of 800 samples per lexeme. Some of forms were less frequent than 400 instances (e.g., the forms representing PARENTS-IN-LAW), in those cases all instances were used. This resulted in 10,214 annotated examples for the English data, 10,070 for the Russian data and 5,600 for the Chinese data.</p>
<p>In order to obtain ABSA judgments from the GPT models, we used a few-shot approach. In few-shot learning, a small number of example questions with human-assigned answers are given as context, followed by a question. The language model is then queried for the most likely continuation to this context, with the expectation that the final question is answered. Such a context is referred to as a <italic>prompt</italic>. In our case, we used a prompt according to the following pattern:</p>
<p>&#x201C;You will guess the sentiment of the author toward a particular person. Answer only with a single word, one of the following: Positive, Negative, Neutral. When in doubt, answer Neutral.</p>
<p>Question 1: what is the attitude toward &#x201C;sister&#x201D; in the following text?&#x2009;&#x003C;&#x2009;&#x003C;&#x003C; His sister won an Olympic gold medal. &#x003E;&#x003E;&#x003E;</p>
<p>Question 2: what is the attitude toward &#x201C;mothers&#x201D; in the following text?&#x2009;&#x003C;&#x2009;&#x003C;&#x003C; Their mothers were all above the age of 80. &#x003E;&#x003E;&#x003E;</p>
<p>Question 3: what is the attitude toward &#x201C;mother-in-law&#x201D; in the following text?&#x2009;&#x003C;&#x2009;&#x003C;&#x003C; Her mother-in-law likes to watch her suffer. &#x003E;&#x003E;&#x003E;</p>
<p>Answer 1: Positive.</p>
<p>Answer 2: Neutral.</p>
<p>Answer 3: Negative.</p>
<p>Question 1: what is the attitude toward &#x201C;fathers&#x201D; in the following text?&#x2009;&#x003C;&#x2009;&#x003C;&#x003C; Their fathers were all very interested in chess. &#x003E;&#x003E;&#x003E;</p>
<p>Question 2: what is the attitude toward &#x201C;brother&#x201D; in the following text?&#x2009;&#x003C;&#x2009;&#x003C;&#x003C; Her brother won an Oscar. &#x003E;&#x003E;&#x003E;&#x201D;</p>
<p>After the above prompt, we would expect a good language model to generate the following text, from which we then extract the sentiment labels of &#x201C;father&#x201D; and &#x201C;brother&#x201D; in the last two sentences above:</p>
<p>&#x201C;Answer 1: Neutral.</p>
<p>Answer 2: Positive.&#x201D;</p>
<p>Our reasoning in this case is that a person winning an Olympic medal or an Oscar are clear contexts where most readers would be expected to gain positive impressions of the people behind these achievements, as opposed to for instance a person simply liking chess.</p>
<p>We used a fixed set of examples for each language (English: 10; Chinese: 13; Russian: 15), divided into two question/answer blocks with the examples approximately evenly split between them. These were sent through the OpenAI API, using the model <italic>gpt-3.5-turbo</italic>. To ensure maximal consistency, we used a very small temperature parameter of 10<sup>&#x2212;6</sup>. For efficiency, we sent queries in batches of 10. In other words, our prompts ended with 10 questions, and we expected a text containing the corresponding 10 answers in return.</p>
<p>To see how reliable the models are, we compared a small number of randomly selected and manually annotated sentences with the labels provided by PyABSA and GPT-3.5. The results are shown in <xref ref-type="table" rid="tab2">Table 2</xref>. The Accuracy score represents the proportions of correct labels in the total number of annotated examples. With three categories, the baseline accuracy is obtained by randomly guessing is 33.3%. However, since most sentences in the data carry neutral sentiment, it is possible to obtain a much higher accuracy by assigning the label &#x201C;neutral&#x201D; to all sentences (<italic>cf.</italic> the prompt fragment &#x201C;When in doubt, answer Neutral&#x201D; for GPT-3.5). The macro-averaged F<sub>1</sub>-score is the mean of F<sub>1</sub> scores across the three sentiment labels, and each such F<sub>1</sub> score is the harmonic mean between precision and recall for that label. The F<sub>1</sub> score considers how good the model is at identifying each of the categories, and always predicting &#x201C;neutral&#x201D; would yield zero F<sub>1</sub>-scores for the positive and negative categories. The performance metrics show that the GPT-3.5 model strongly outperforms the PyABSA model. This is explained largely by different proportions of neutral sentiment assigned by each system, which is the sentiment observed in most of the test sentences. We have also evaluated the more recent GPT-4 model, and found it to be roughly equal in performance to GPT-3.5. The higher cost prevented us from applying GPT-4 to larger amounts of data.</p>
<table-wrap position="float" id="tab2">
<label>Table 2</label>
<caption>
<p>Performance metrics of the sentiment analysis models.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Language</th>
<th align="left" valign="top">Model</th>
<th align="center" valign="top">N data points</th>
<th align="center" valign="top">Accuracy</th>
<th align="center" valign="top">Macro F<sub>1</sub></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top" rowspan="2">English</td>
<td align="left" valign="top">PyABSA</td>
<td align="center" valign="top">200</td>
<td align="char" valign="top" char=".">59.0%</td>
<td align="char" valign="top" char=".">0.510</td>
</tr>
<tr>
<td align="left" valign="top">GPT-3.5</td>
<td align="center" valign="top">200</td>
<td align="char" valign="top" char=".">72.0%</td>
<td align="char" valign="top" char=".">0.544</td>
</tr>
<tr>
<td align="left" valign="top" rowspan="2">Chinese</td>
<td align="left" valign="top">PyABSA</td>
<td align="center" valign="top">143</td>
<td align="char" valign="top" char=".">42.0%</td>
<td align="char" valign="top" char=".">0.407</td>
</tr>
<tr>
<td align="left" valign="top">GPT-3.5</td>
<td align="center" valign="top">193</td>
<td align="char" valign="top" char=".">67.4%</td>
<td align="char" valign="top" char=".">0.541</td>
</tr>
<tr>
<td align="left" valign="top" rowspan="2">Russian</td>
<td align="left" valign="top">PyABSA</td>
<td align="center" valign="top">200</td>
<td align="char" valign="top" char=".">26.5%</td>
<td align="char" valign="top" char=".">0.276</td>
</tr>
<tr>
<td align="left" valign="top">GPT-3.5</td>
<td align="center" valign="top">200</td>
<td align="char" valign="top" char=".">67.0%</td>
<td align="char" valign="top" char=".">0.462</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec6">
<label>2.2.2</label>
<title>Generalized linear mixed-effect models</title>
<p>To test the effect of gender on sentiment, we used Generalized Linear Mixed-effect Models with logit as the link function. For every dataset, we fitted two types of models to test the main expectations based on previous findings. The first one predicted if the sentiment was neutral or not, as a follow-up of the results reported by <xref ref-type="bibr" rid="ref24">Hoyle et al. (2019)</xref>. The second one, which was inspired by the claims about pejoration summarized in Section 1, predicted if the sentiment was positive or negative, excluding the examples with neutral sentiment. Because of the multiple comparisons performed on the same data, we used a Bonferroni correction for model selection.</p>
<p>The fixed effects in all models contained the gender of the referent (female or male) and a scaled and centered version of the year. These variables are directly relevant for our expectations about the gender differences in synchrony and diachrony. In addition, we tested several covariates, which could potentially influence the results. The English and Russian models contained the number of the referents (singular or plural) because one could not exclude that writers have different attitudes to a woman or man as an individual and as a group. The number of referents in the Chinese sentences was, unfortunately, too difficult to control for, because Chinese nouns are usually not marked for number. The Russian data and one Chinese dataset contained the author&#x2019;s gender (female or male). This was an important factor to consider because it is possible that female and male writers have different attitude toward persons of their own and of the other gender(s). All pairwise interactions were tested, and the ones with the corrected <italic>p</italic>-value of the likelihood ratio test less than 0.05 were added to the model. In the Chinese dataset without the authors&#x2019; data, we tested the source (one of the two news agencies, XIN and CNA) as a fixed effect.<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref></p>
<p>There were also variables that were treated as random effects. The concept pairs were treated as random intercepts in all models. The individual books were random intercepts in English and Russian, and individual authors were random intercepts in the Chinese model that included author&#x2019;s data. This was necessary because the assumption of independence of observations was violated, with more than one sentence coming from one and the same book or individual author. Different authors could have their individual biases toward male and female referents they wrote about. All potential random slopes were tested using the likelihood ratio test.</p>
<p>The variables are summarized in <xref ref-type="table" rid="tab3">Table 3</xref>.</p>
<table-wrap position="float" id="tab3">
<label>Table 3</label>
<caption>
<p>Variables tested in the GLMM.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top">Variable</th>
<th align="left" valign="top">English</th>
<th align="left" valign="top">Chinese</th>
<th align="left" valign="top">Russian</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Sentiment_positive, Sentiment_neutral</td>
<td align="left" valign="top">Response</td>
<td align="left" valign="top">Response</td>
<td align="left" valign="top">Response</td>
</tr>
<tr>
<td align="left" valign="top">Year (scaled and centered)</td>
<td align="left" valign="top">Fixed</td>
<td align="left" valign="top">Fixed</td>
<td align="left" valign="top">Fixed</td>
</tr>
<tr>
<td align="left" valign="top">Gender (referent)</td>
<td align="left" valign="top">Fixed</td>
<td align="left" valign="top">Fixed</td>
<td align="left" valign="top">Fixed</td>
</tr>
<tr>
<td align="left" valign="top">Number (referent)</td>
<td align="left" valign="top">Fixed</td>
<td align="left" valign="top">&#x2013;</td>
<td align="left" valign="top">Fixed</td>
</tr>
<tr>
<td align="left" valign="top">Author&#x2019;s Gender</td>
<td align="left" valign="top">&#x2013;</td>
<td align="left" valign="top">&#x2013;</td>
<td align="left" valign="top">Fixed</td>
</tr>
<tr>
<td align="left" valign="top">Conceptual Pair</td>
<td align="left" valign="top">Random</td>
<td align="left" valign="top">Random</td>
<td align="left" valign="top">Random</td>
</tr>
<tr>
<td align="left" valign="top">Source/Author</td>
<td align="left" valign="top">Random (individual books)</td>
<td align="left" valign="top">Fixed (CNA or XIN)</td>
<td align="left" valign="top">Random (individual books)</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="sec7">
<label>2.3</label>
<title>A caveat: replaceability of female and male terms</title>
<p>An important caveat is that the gender-specific words may have their own contribution to the sentiment scores. In order to check whether the sentiment classification depends on the target noun itself, we selected a random sample of 1,000 sentences from each corpus and obtained the sentiment values as described above. After this, we replaced the target words with their correspondences of the opposite gender, e.g., <italic>woman</italic> was replaced with <italic>man</italic>, <italic>boys</italic> was replaced with <italic>girls</italic>, and so on. The sentiment analysis was then run on the modified sentences with the help of PyABSA. Finally, we computed the proportions of the same classification of the sentences before and after the modification. The results showed that for the overwhelming majority of the sentences the gender did not matter. But some of the concepts were slightly more sensitive to this transformation than others, although we found no systematic patterns across the languages.</p>
<p>In the English sample, the same label was assigned in the same context in 87.1% of all cases. The greatest effect of changing the gender was in sentences with &#x201C;brother&#x201D; (overlap 82.1%) and &#x201C;sister&#x201D; (overlap 83.1%). The weakest effect was in sentences with &#x201C;daughter&#x201D; (overlap 92%) and &#x201C;son&#x201D; (overlap 91%). &#x201C;Mother-in-law&#x201D; and &#x201C;father-in-law&#x201D; had only a few occurrences in the random sample, so they were not considered.</p>
<p>As for Chinese, the same label was assigned in the same context in 89% of all cases. The concepts &#x201C;wife&#x201D; and &#x201C;woman&#x201D; had the greatest effect of replacement (77.8 and 82.1% overlap, respectively). The replacement had the weakest effect for the concepts &#x201C;boy&#x201D; (96% overlap) and &#x201C;daughter&#x201D; (93.4%).</p>
<p>In the Russian sample, the same label was assigned in the same context in 89.3% of all cases. The greatest effect of replacement was in sentences with &#x201C;daughter&#x201D;, &#x201C;mother&#x201D; and &#x201C;father&#x201D; (overlap less than 85%). The smallest effect was in sentences with &#x201C;man&#x201D; and &#x201C;woman&#x201D; (overlap more than 94%), as well as the low-frequency &#x201C;mother-in-law&#x201D; and &#x201C;father-in-law&#x201D; (overlap 100%).</p>
<p>It is very difficult to say whether these differences have to do with the inherent sentiment associated with the individual words, or with their interaction with the context. For example, <italic>a pretty woman</italic> can be perceived positively in a heteronormative culture, but <italic>a pretty man</italic> may not. Moreover, as <xref ref-type="bibr" rid="ref38">Romaine (2000)</xref>, p. 109 observes, even seemingly gender-neutral terms have different connotations when applied to men and women. For example, to call a man a professional is a compliment, but in some languages, such as English, Japanese or French, if a woman is called a professional, this may be a euphemism for a prostitute. All this means that the sentiment value depends on the complex interaction of the target word with its context, which requires further investigation.</p>
</sec>
<sec id="sec8">
<label>2.4</label>
<title>Another caveat: polysemy and male bias</title>
<p>In our large corpus study, we did not have tools to control for polysemy of the nouns. One widely spread type of polysemy is the use of male terms to represent male and female referents, which serves as evidence of the unmarked status of male forms in structuralist theories of semantic markedness (<xref ref-type="bibr" rid="ref9009">Jakobson 1971[1932]</xref>). This type of polysemy is common across languages and represents an example of the so-called male bias (<xref ref-type="bibr" rid="ref1">Aikhenvald, 2016</xref>). In English, it is observed in the semantics of the English word <italic>man</italic>. An example from COHA is below.</p>
<list list-type="simple">
<list-item><p>(3) The hydrogen bomb represented the ultimate refinement in man&#x2019;s search for the means of self-destruction. (<italic>Morgan&#x2019;s Passing</italic> by <xref ref-type="bibr" rid="ref9010">Tyler, 1980</xref>)</p></list-item>
</list>
<p>This type of polysemy may distort the results. In order to estimate the size of the problem, we performed a manual check of 500 occurrences of the form <italic>man</italic> in our dataset and found only 17 instances where the form could be interpreted as referring to a human being regardless of their gender. This accounts for only 3.4% of the data. We can conclude that this type of polysemy does not play an important role in English. In Russian, a similar polysemy is observed in the word <italic>brat</italic> &#x2018;brother&#x2019;, as in <italic>All people are brothers</italic>. A manual check of 500 randomly selected sentences revealed, again, only 17 cases (3.4%) where this word could be potentially interpreted in this sense. This means that this type of polysemy was unlikely to cause major distortion in our analysis.</p>
<p>Other types of polysemy include the use of the Russian nouns <italic>sestra</italic> &#x2018;sister&#x2019; and <italic>brat</italic> &#x2018;brother&#x2019; in the meaning &#x2018;nurse&#x2019;. In some cases, the words <italic>mat&#x2019;</italic> &#x2018;mother&#x2019;, <italic>otec</italic> &#x2018;father&#x2019; and <italic>brat</italic> &#x2018;brother&#x2019; are used as terms of address that do not imply any kinship, similar to <italic>bro</italic> in English. This extended use of kinship terms is also common in Chinese, where for instance &#x5144;&#x5F1F; &#x2018;brother(s)&#x2019; is frequently found in contexts such as &#x2018;brother peoples&#x2019;. In Chinese, we also find polysemy within the kinship domain, with &#x526C;&#x526C; &#x2018;father-in-law&#x2019; also sometimes being used for &#x2018;grandfather&#x2019;. We included all these uses, as well. Our statistical models allowed us to control for the potential biases associated with individual concepts with the help of random effects.</p>
</sec>
</sec>
<sec sec-type="results" id="sec9">
<label>3</label>
<title>Results</title>
<sec id="sec10">
<label>3.1</label>
<title>English</title>
<sec id="sec11">
<label>3.1.1</label>
<title>Descriptive statistics</title>
<sec id="sec12">
<label>3.1.1.1</label>
<title>PyABSA</title>
<p><xref ref-type="fig" rid="fig1">Figure 1</xref> displays the proportions of positive, neutral and negative scores in the English data across the genders and conceptual pairs, obtained with PyABSA. We can see that the proportions vary in a subtle way across the genders and more substantially across the pairs. For example, the pair ADULT (&#x201C;woman&#x201D; and &#x201C;man&#x201D;) has less often neutral sentiment than the pair CHILD (&#x201C;daughter&#x201D; and &#x201C;son&#x201D;), but the proportions within each pair are almost equal. In some of the pairs, however, female referents are less often evaluated neutrally than male ones, e.g., NOT ADULT (&#x201C;girl&#x201D; has fewer neutral scores than &#x201C;boy&#x201D;), PARENT (&#x201C;mother&#x201D; vs. &#x201C;father&#x201D;) and PARENT-IN-LAW (&#x201C;mother-in-law&#x201D; vs. &#x201C;father-in-law&#x201D;). These female nouns are also more often used both in positive and in negative contexts than their male counterparts.</p>
<fig position="float" id="fig1">
<label>Figure 1</label>
<caption>
<p>Proportions of different sentiment scores in the English data, based on PyABSA.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g001.tif"/>
</fig>
</sec>
<sec id="sec13">
<label>3.1.1.2</label>
<title>GPT-3.5</title>
<p><xref ref-type="fig" rid="fig2">Figure 2</xref> shows the proportions of the positive, negative and neutral scores obtained with the help of GPT-3.5. The neutral scores are predominant for all lexemes. We observe no large differences between the female and male lexemes, with the exception of the pair PARENT-IN-LAW, where &#x201C;mother-in-law&#x201D; has visibly more negative scores than &#x201C;father-in-law&#x201D;. Unlike what we saw above, we find no indications of the male lexemes being more often used in neutral contexts than the female lexemes.</p>
<fig position="float" id="fig2">
<label>Figure 2</label>
<caption>
<p>Proportions of different sentiment scores in the English data, based on GPT-3.5.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g002.tif"/>
</fig>
</sec>
</sec>
<sec id="sec14">
<label>3.1.2</label>
<title>Generalized linear mixed-effect models</title>
<sec id="sec15">
<label>3.1.2.1</label>
<title>PyABSA</title>
<p>The first model was fitted to predict whether a sentiment score was neutral or not. We included random intercepts for each Concept Pair and Source (the book). We also tested all possible random slopes and ended up having random slopes for Gender, Number and the interaction between Gender and Number. The coefficients of the fixed effects, as well as their 95% confidence intervals and Bonferroni-corrected <italic>p</italic>-values, are shown in <xref ref-type="table" rid="tab4">Table 4</xref>. Positive log-odds ratios of the coefficients (or simply log-odds, for the intercept term) show that the variable increases the chances of neutral sentiment. Negative log-odds ratios decrease the likelihood of neutral sentiment. A log-odds ratio very close to zero means that there is no effect. Log-odds ratios can be transformed into odds ratios, which represent the ratio of odds of neutral sentiment in the presence of the specified value (or increase in one unit, for numeric variables) and the odds of in the absence of this value (or 0 for numeric variables). If a variable has no effect, the odds ratio will be 1.</p>
<table-wrap position="float" id="tab4">
<label>Table 4</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the generalized linear mixed-effect model with the response variable &#x201C;neutral or non-neutral&#x201D; based on the English data and PyABSA.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;) and its 95% confidence interval</th>
<th align="center" valign="top" rowspan="2">P-value (Bonferroni-corrected)</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">0.090 (&#x2212;0.131, 0.310)</td>
<td align="char" valign="top" char="(">1.094 (0.878, 1.363)</td>
<td align="char" valign="top" char=".">0.850</td>
</tr>
<tr>
<td align="left" valign="top">Year (scaled, centered)</td>
<td align="char" valign="top" char="(">0.024 (&#x2212;0.003, 0.051)</td>
<td align="char" valign="top" char="(">1.024 (0.997, 1.052)</td>
<td align="char" valign="top" char=".">0.167</td>
</tr>
<tr>
<td align="left" valign="top">Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">0.097 (&#x2212;0.109, 0.303)</td>
<td align="char" valign="top" char="(">1.102 (0.896, 1.354)</td>
<td align="char" valign="top" char=".">0.714</td>
</tr>
<tr>
<td align="left" valign="top">Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">&#x2212;0.500 (&#x2212;0.779, &#x2212;0.221)</td>
<td align="char" valign="top" char="(">0.606 (0.459, 0.802)</td>
<td align="char" valign="top" char=".">0.001</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term Gender&#x2009;=&#x2009;Male: Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">0.250 (0.096, 0.403)</td>
<td align="char" valign="top" char="(">1.283 (1.100, 1.497)</td>
<td align="char" valign="top" char=".">0.003</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term Year: Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">&#x2212;0.047 (&#x2212;0.078, &#x2212;0.017)</td>
<td align="char" valign="top" char="(">0.954 (0.925, 0.983)</td>
<td align="char" valign="top" char=".">0.005</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>We observed two significant interactions. One of them was between Year and Number. We found that singular nouns tend to become slightly less neutral with time, while plural nouns become slightly more neutral. The more important interaction for us, however, is the interaction between Gender and Number. This interaction is displayed in <xref ref-type="fig" rid="fig3">Figure 3</xref>. Our model revealed that the male nouns are more likely to get neutral scores than the female nouns in the singular. However, there was only a small difference in the plural.</p>
<fig position="float" id="fig3">
<label>Figure 3</label>
<caption>
<p>Interaction between Gender and Number in the English data: Neutral vs. non-neutral sentiment, based on PyABSA.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g003.tif"/>
</fig>
<p>We fitted the second model to predict whether a gendered word has a positive or a negative sentiment score, excluding the neutral scores. The main statistics are provided in <xref ref-type="table" rid="tab5">Table 5</xref>. In this case, positive log-odds ratios or odds ratios above 1 show that the variable increases the chances of positive sentiment, and negative log-odds ratios or odds ratios below 1 indicate that the chances of negative sentiment are higher.</p>
<table-wrap position="float" id="tab5">
<label>Table 5</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Positive or Negative&#x201D; based on the English data and PyABSA.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">&#x2212;0.340 (&#x2212;0.577, &#x2212;0.103)</td>
<td align="char" valign="top" char="(">0.712 (0.562, 0.902)</td>
<td align="char" valign="top" char=".">0.010</td>
</tr>
<tr>
<td align="left" valign="top">Year (scaled, centered)</td>
<td align="char" valign="top" char="(">&#x2212;0.013 (&#x2212;0.054, 0.027)</td>
<td align="char" valign="top" char="(">0.987 (0.948, 1.027)</td>
<td align="char" valign="top" char=".">1.000</td>
</tr>
<tr>
<td align="left" valign="top">Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">&#x2212;0.067 (&#x2212;0.110, &#x2212;0.024)</td>
<td align="char" valign="top" char="(">0.935 (0.896, 0.977)</td>
<td align="char" valign="top" char=".">0.004</td>
</tr>
<tr>
<td align="left" valign="top">Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">&#x2212;0.166 (&#x2212;0.335, 0.003)</td>
<td align="char" valign="top" char="(">0.847 (0.715, 1.003)</td>
<td align="char" valign="top" char=".">0.108</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term Year: Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">&#x2212;0.070 (&#x2212;0.115, &#x2212;0.027)</td>
<td align="char" valign="top" char="(">0.932 (0.892, 0.974)</td>
<td align="char" valign="top" char=".">0.004</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>In this model we observed a significant main effect of Gender. As shown in <xref ref-type="fig" rid="fig4">Figure 4</xref>, female nouns are more likely to have positive sentiment scores than male nouns, and male nouns are more likely to get negative scores. The effect is significant (<italic>p</italic>&#x2009;=&#x2009;0.004), but very small: the odds of positive sentiment if the referent is male are 0.935 as large as the odds of positive sentiment if the referent is female. To reformulate this, the odds of positive sentiment if the referent is female are only 1.069, or about 7%, higher than the odds of positive sentiment if the noun is male.</p>
<fig position="float" id="fig4">
<label>Figure 4</label>
<caption>
<p>Main effect of Gender and Number in the English data, based on PyABSA: Positive vs. negative sentiment.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g004.tif"/>
</fig>
<p>In addition, an examination of the interaction, which is not displayed here due to space limitations, reveals that singular nouns become more often negatively tagged with time for both genders, but this trend is much weaker in the plural.</p>
</sec>
<sec id="sec16">
<label>3.1.2.2</label>
<title>GPT-3.5</title>
<p>This subsection reports the regression modeling results based on GPT-3.5 with few-shot learning. The best model for neutral vs. non-neutral sentiment did not include Gender because it was not significant (corrected <italic>p</italic>&#x2009;=&#x2009;1). No random slopes or interactions improved the model. The only significant fixed effects were Year and Number, shown in <xref ref-type="table" rid="tab6">Table 6</xref>. We observe a decrease of neutral sentiment with time. We also find that singular nouns have lower chances of neutral sentiment.</p>
<table-wrap position="float" id="tab6">
<label>Table 6</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Neutral or Non-neutral&#x201D; based on the English data and GPT-3.5.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">1.126 (0.963, 1.289)</td>
<td align="char" valign="top" char="(">3.083 (2.620, 3.629)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Year (scaled, centered)</td>
<td align="char" valign="top" char="(">&#x2212;0.061 (&#x2212;0.111, &#x2212;0.011)</td>
<td align="char" valign="top" char="(">0.941 (0.895, 0.989)</td>
<td align="char" valign="top" char=".">0.034</td>
</tr>
<tr>
<td align="left" valign="top">Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">&#x2212;0.271 (&#x2212;0.362, &#x2212;0.180)</td>
<td align="char" valign="top" char="(">0.763 (0.696, 0.835)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As for positive vs. negative sentiment, the best model included only one fixed effect: that of Number. As shown in <xref ref-type="table" rid="tab7">Table 7</xref>, singular nouns have higher chances of being associated with positive sentiment than plural ones.</p>
<table-wrap position="float" id="tab7">
<label>Table 7</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Positive or Negative&#x201D; based on the English data and GPT-3.5.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">&#x2212;0.408 (&#x2212;0.779, &#x2212;0.037)</td>
<td align="char" valign="top" char="(">0.665 (0.459, 0.964)</td>
<td align="char" valign="top" char=".">0.062</td>
</tr>
<tr>
<td align="left" valign="top">Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">0.242 (0.081, 0.403)</td>
<td align="char" valign="top" char="(">1.273 (1.084, 1.496)</td>
<td align="char" valign="top" char=".">0.006</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec17">
<label>3.1.2.3</label>
<title>Summary</title>
<p>The models of data annotated by PyABSA and GPT-3.5 agree in several important respects. First of all, we find no evidence of the female nouns to be more associated with negative sentiment than the male nouns. Secondly, there is no predicted interaction between Gender and Year, which would display a gradual convergence between the genders during the time period examined.</p>
<p>There are also intriguing differences between the approaches. While the PyABSA approach reveals a tendency for singular male nouns to be more often associated with neutral sentiment, we find no such tendency in the GPT-3.5 data. Also, the PyABSA data show that male referents tend to occur in more negative contexts, whereas the GPT-3.5 data yield no significant gender differences at all.</p>
<p>Can these differences be explained by the smaller size of the data annotated by GPT-3.5? Refitting the models on the smaller sample reveals that the preference for singular male nouns to be associated with more neutral speeches, which was found with the help of PyABSA, is robust. Notably, the preference for male referents to be accompanied by more positive sentiment is no longer found in the smaller dataset. In the full PyABSA analysis, the effect was significant but small [odds ratio confidence interval (0.896, 0.977)]. Also, no effect of Year and Number is detected.</p>
</sec>
</sec>
</sec>
<sec id="sec18">
<label>3.2</label>
<title>Chinese</title>
<sec id="sec19">
<label>3.2.1</label>
<title>Descriptive statistics</title>
<p><xref ref-type="fig" rid="fig5">Figure 5</xref> shows the proportions of different sentiments in the Chinese data with PyABSA sentiment scores. We can see that the words are in general more often positively evaluated than in the English data. The proportion of neutral sentiment is smaller. Overall, the figure suggests that the female concepts are less frequently evaluated neutrally than the male ones in all concept pairs. They also have more often positive sentiment, with the exception of &#x201C;mother-in-law&#x201D;. The proportions of negative sentiment vary a lot across the pairs and genders.</p>
<fig position="float" id="fig5">
<label>Figure 5</label>
<caption>
<p>Proportions of different sentiment scores in the Chinese data, based on PyABSA.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g005.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig6">Figure 6</xref> displays the proportions of different sentiment values obtained for the smaller Chinese sample (see Section 2.2.1). In contrast with the PyABSA data, most of the sentiment values are neutral. There are also no systematic gender differences: for example, the concept &#x201C;man&#x201D; is used more often positively and less often negatively than &#x201C;woman&#x201D; in the conceptual pair ADULT, but &#x201C;mother&#x201D; is used more often positively than &#x201C;father&#x201D; in the pair PARENT. &#x201C;Girl&#x201D; appears less often in neutral contexts than &#x201C;boy&#x201D; (see NOT ADULT), but &#x201C;wife&#x201D; is used more frequently in neutral contexts than &#x201C;husband&#x201D; (see SPOUSE).</p>
<fig position="float" id="fig6">
<label>Figure 6</label>
<caption>
<p>Proportions of different sentiment scores in the Chinese data, based on GPT-3.5.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g006.tif"/>
</fig>
</sec>
<sec id="sec20">
<label>3.2.2</label>
<title>Generalized linear mixed-effect models</title>
<sec id="sec21">
<label>3.2.2.1</label>
<title>PyABSA</title>
<p>The best model predicting neutral vs. non-neutral sentiment fitted to the PyABSA scores had random slopes for the conceptual pairs, modifying the effects of Gender and Source. The coefficients for the fixed effects are provided in <xref ref-type="table" rid="tab8">Table 8</xref>. The model displays a significant effect of the referent&#x2019;s Gender. Male referents are presented neutrally more often than female referents, as shown in <xref ref-type="fig" rid="fig7">Figure 7</xref>. The log-odds ratio coefficient is 0.472 (<italic>p</italic>&#x2009;&#x003C;&#x2009;0.001), which means in simple odds that male referents have 1.6 times higher chances of neutral sentiment than female ones. There was also a change in the sentiment scores in the XIN news. With time, they became more neutral. In the CNA news, we detected no differences.</p>
<table-wrap position="float" id="tab8">
<label>Table 8</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Neutral or Non-neutral&#x201D; based on the Chinese data and PyABSA.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">&#x2212;1.447 (&#x2212;1.744, &#x2212;1.150)</td>
<td align="char" valign="top" char="(">0.235 (0.175, 0.317)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Year (scaled, centered)</td>
<td align="char" valign="top" char="(">0.004 (&#x2212;0.016, 0.023)</td>
<td align="char" valign="top" char="(">1.004 (0.984, 1.024)</td>
<td align="char" valign="top" char=".">1</td>
</tr>
<tr>
<td align="left" valign="top">Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">0.472 (0.299, 0.646)</td>
<td align="char" valign="top" char="(">1.603 (1.349, 1.907)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Source&#x2009;=&#x2009;XIN</td>
<td align="char" valign="top" char="(">&#x2212;0.025 (&#x2212;0.180, 0.129)</td>
<td align="char" valign="top" char="(">0.975 (0.835, 1.138)</td>
<td align="char" valign="top" char=".">1</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term Year: Source&#x2009;=&#x2009;XIN</td>
<td align="char" valign="top" char="(">0.082 (0.050, 0.114)</td>
<td align="char" valign="top" char="(">1.086 (1.051, 1.121)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig7">
<label>Figure 7</label>
<caption>
<p>Main effect of Gender in the Chinese, based on PyABSA: Neutral vs. non-neutral sentiment.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g007.tif"/>
</fig>
<p>We also performed regression on the choice between positive and negative sentiment labels produced by PyABSA. The model had random slopes for Year, Gender and Source. The coefficients are presented in <xref ref-type="table" rid="tab9">Table 9</xref>. The fixed effect of Gender is not statistically significant. As the interaction between Year and Source suggests (not shown due to space limitations), the sentiment values became more positive over time in XIN; in CNA, there is very little change.</p>
<table-wrap position="float" id="tab9">
<label>Table 9</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Positive or Negative&#x201D; based on the Chinese data and PyABSA.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">0.398 (0.208, 0.588)</td>
<td align="char" valign="top" char="(">1.489 (1.232, 1.800)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Year (scaled, centered)</td>
<td align="char" valign="top" char="(">0.036 (&#x2212;0.012, 0.084)</td>
<td align="char" valign="top" char="(">1.037 (0.988, 1.088)</td>
<td align="char" valign="top" char=".">0.290</td>
</tr>
<tr>
<td align="left" valign="top">Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">&#x2212;0.058 (&#x2212;0.273, 0.156)</td>
<td align="char" valign="top" char="(">0.943 (0.761, 1.169)</td>
<td align="char" valign="top" char=".">1</td>
</tr>
<tr>
<td align="left" valign="top">Source&#x2009;=&#x2009;XIN</td>
<td align="char" valign="top" char="(">0.375 (0.155, 0.596)</td>
<td align="char" valign="top" char="(">1.456 (1.168, 1.815)</td>
<td align="char" valign="top" char=".">0.002</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term Year: Source&#x2009;=&#x2009;XIN</td>
<td align="char" valign="top" char="(">0.054 (0.020, 0.087)</td>
<td align="char" valign="top" char="(">1.055 (1.020, 1.091)</td>
<td align="char" valign="top" char=".">0.002</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<p><xref ref-type="fig" rid="fig8">Figure 8</xref> displays the interaction between Gender and Source.</p>
</table-wrap-foot>
</table-wrap>
<fig position="float" id="fig8">
<label>Figure 8</label>
<caption>
<p>Interaction between Gender and Source in the Chinese data: Neutral vs. non-neutral sentiment, based on GPT-3.5.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g008.tif"/>
</fig>
</sec>
<sec id="sec22">
<label>3.2.2.2</label>
<title>GPT-3.5</title>
<p>We also fitted models based on the smaller sample annotated by GPT-3.5. The best model that predicts neutral vs. non-neutral labels had random slopes for Gender. The coefficients are shown in <xref ref-type="table" rid="tab10">Table 10</xref>. The effect of Year was not significant, which is why this variable was excluded from the final model. There is a significant interaction between Gender and Source. As shown in <xref ref-type="fig" rid="fig8">Figure 8</xref>, we observe no consistent effect of Gender across the sources.</p>
<table-wrap position="float" id="tab10">
<label>Table 10</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Neutral or Non-neutral&#x201D; based on the Chinese data and GPT-3.5.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">0.609 (0.379, 0.840)</td>
<td align="char" valign="top" char="(">1.839 (1.460, 2.315)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">0.183 (&#x2212;0.076, 0.442)</td>
<td align="char" valign="top" char="(">1.200 (0.926, 1.555)</td>
<td align="char" valign="top" char=".">0.324</td>
</tr>
<tr>
<td align="left" valign="top">Source&#x2009;=&#x2009;XIN</td>
<td align="char" valign="top" char="(">&#x2212;0.083 (&#x2212;0.250, 0.083)</td>
<td align="char" valign="top" char="(">0.920 (0.779, 1.086)</td>
<td align="char" valign="top" char=".">0.652</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term Gender&#x2009;=&#x2009;Male: Source&#x2009;=&#x2009;XIN</td>
<td align="char" valign="top" char="(">&#x2212;0.354 (&#x2212;0.590, &#x2212;0.118)</td>
<td align="char" valign="top" char="(">0.702 (0.554, 0.889)</td>
<td align="char" valign="top" char=".">0.006</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Finally, the best model that predicted positive vs. negative sentiment contained only Source (see <xref ref-type="table" rid="tab11">Table 11</xref>). The odds of positive sentiment were higher in XIN than CNA. The effects of the other predictors were not significant.</p>
<table-wrap position="float" id="tab11">
<label>Table 11</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Positive or Negative&#x201D; based on the Chinese data and GPT-3.5.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">0.436 (0.080, 0.792)</td>
<td align="char" valign="top" char="(">1.546 (1.083, 2.208)</td>
<td align="char" valign="top" char=".">0.033</td>
</tr>
<tr>
<td align="left" valign="top">Source&#x2009;=&#x2009;XIN</td>
<td align="char" valign="top" char="(">0.681 (0.471, 0.891)</td>
<td align="char" valign="top" char="(">1.976 (1.602, 2.437)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec23">
<label>3.2.2.3</label>
<title>Summary</title>
<p>The models based on PyABSA show that male referents are presented neutrally more often than female referents. In the GPT-3.5 sentiment labels, we find no stable effect of Gender, however. The direction of the effect depends on Source (the news agency). Neither approach has detected any gender-related differences with regard to positive vs. negative sentiment.</p>
<p>One should ask again if these differences between the approaches can be explained by the different sizes of the datasets used for the PyABSA and GPT-3.5. When we fitted the PyABSA neutral vs. non-neutral model on the smaller sample used for the GPT annotation, we found that the effect of Gender persists. At the same time, it is possible that the difference between the approaches is due to the greater bias toward neutral sentiment labels in the GPT-3.5 data.</p>
</sec>
</sec>
</sec>
<sec id="sec24">
<label>3.3</label>
<title>Russian</title>
<sec id="sec25">
<label>3.3.1</label>
<title>Descriptive statistics</title>
<p><xref ref-type="fig" rid="fig9">Figure 9</xref> shows the proportions of each sentiment by Conceptual Pair and Gender in the Russian fiction data annotated by PyABSA. Notably, negative and positive sentiment prevail, whereas neutral sentiment is the least frequent. Still, one can discern that the male concepts tend to have neutral scores more often than the female ones. The female concepts are more often negative in most conceptual pairs, especially PARENT and PARENT-IN-LAW, but in some of them they are also more often positive (CHILD, NOT ADULT).</p>
<fig position="float" id="fig9">
<label>Figure 9</label>
<caption>
<p>Proportions of different sentiment scores in the Russian data, based on PyABSA.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g009.tif"/>
</fig>
<p><xref ref-type="fig" rid="fig10">Figure 10</xref> displays the proportions based on GPT-3.5. Unlike in the PyABSA results, the neutral sentiment prevails. The male referents are also no longer universally associated with more neutral sentiment: there is no difference in the pair SIBLING, and the male referent in the pair SPOUSE (that is, &#x201C;husband&#x201D;) is actually less often neutral than its female counterpart (&#x201C;wife&#x201D;). The differences between the proportions of positive and negative labels also vary across the conceptual pairs. While &#x201C;mother-in-law&#x201D; has the largest proportion of negative labels, &#x201C;woman&#x201D; and &#x201C;mother&#x201D; have relatively high proportions of positive labels.</p>
<fig position="float" id="fig10">
<label>Figure 10</label>
<caption>
<p>Proportions of different sentiment scores in the Russian data, based on GPT-3.5.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g010.tif"/>
</fig>
</sec>
<sec id="sec26">
<label>3.3.2</label>
<title>Generalized linear mixed-effect models</title>
<sec id="sec27">
<label>3.3.2.1</label>
<title>PyABSA</title>
<p>The model that predicted neutral vs. non-neutral sentiment included the individual books and Conceptual Pairs as random intercepts, as well as random slopes of Conceptual Pairs for the variables Gender and Number. The coefficients of the fixed effects are shown in <xref ref-type="table" rid="tab12">Table 12</xref>. There is an interaction of Gender with Year, which is displayed in <xref ref-type="fig" rid="fig11">Figure 11</xref>. The male nouns are always used more neutrally than the female nouns, but the sentiment labels of male referents become more neutral with time, whereas the labels of female referents become slightly less neutral with time. Contrary to our expectation, the gender gap in Russian literature increases.</p>
<table-wrap position="float" id="tab12">
<label>Table 12</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Neutral or Non-neutral&#x201D; based on the Russian data and PyABSA.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">&#x2212;2.375 (&#x2212;2.755, 1.996)</td>
<td align="char" valign="top" char="(">0.092 (0.064, 0.134)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Year (scaled, centered)</td>
<td align="char" valign="top" char="(">&#x2212;0.025 (&#x2212;0.062, 0.013)</td>
<td align="char" valign="top" char="(">0.976 (0.940, 1.013)</td>
<td align="char" valign="top" char=".">0.396</td>
</tr>
<tr>
<td align="left" valign="top">Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">0.431 (0.301, 0.560)</td>
<td align="char" valign="top" char="(">1.538 (1.351, 1.751)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">&#x2212;0.150 (&#x2212;0.511, 0.212)</td>
<td align="char" valign="top" char="(">0.861 (0.600, 1.236)</td>
<td align="char" valign="top" char=".">0.834</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term Year: Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">0.056 (0.012, 0.100)</td>
<td align="char" valign="top" char="(">1.057 (1.012, 1.105)</td>
<td align="char" valign="top" char=".">0.026</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig11">
<label>Figure 11</label>
<caption>
<p>Interaction between Gender and Year in the Russian data, PyABSA: Neutral vs. non-neutral sentiment.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g011.tif"/>
</fig>
<p>The second model, in which we predicted positive vs. negative sentiment, included the same random effects as the first model. The coefficients of the fixed effects are displayed in <xref ref-type="table" rid="tab13">Table 13</xref>. The year did not play any significant role, so it was excluded from the final model. All the other predictors interacted. We found that the male nouns had slightly more often positive labels than the female nouns in the singular, but not in the plural, as shown in <xref ref-type="fig" rid="fig12">Figure 12</xref>.</p>
<table-wrap position="float" id="tab13">
<label>Table 13</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Positive or Negative&#x201D; based on the Russian data and PyABSA.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">0.163 (&#x2212;0.064, 0.390)</td>
<td align="char" valign="top" char="(">1.177 (0.938, 1.477)</td>
<td align="char" valign="top" char=".">0.320</td>
</tr>
<tr>
<td align="left" valign="top">Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">&#x2212;0.025 (&#x2212;0.132, 0.083)</td>
<td align="char" valign="top" char="(">0.975 (0.876, 1.086)</td>
<td align="char" valign="top" char=".">1.000</td>
</tr>
<tr>
<td align="left" valign="top">Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">&#x2212;0.322 (&#x2212;0.460, &#x2212;0.183)</td>
<td align="char" valign="top" char="(">0.725 (0.631, 0.833)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Author Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">0.060 (0.001, 0.119)</td>
<td align="char" valign="top" char="(">1.062 (1.001, 1.127)</td>
<td align="char" valign="top" char=".">0.090</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term<break/>Gender&#x2009;=&#x2009;Male:<break/>Author Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">&#x2212;0.132 (&#x2212;0.195, &#x2212;0.070)</td>
<td align="char" valign="top" char="(">0.876 (0.823, 0.932)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term Gender&#x2009;=&#x2009;Male:<break/>Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">0.175 (0.098, 0.251)</td>
<td align="char" valign="top" char="(">1.191 (1.103, 1.285)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig12">
<label>Figure 12</label>
<caption>
<p>Interaction between Gender and Number in the Russian data, PyABSA: Positive vs. negative sentiment.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g012.tif"/>
</fig>
<p>We also found an interaction between the referent&#x2019;s gender and the author&#x2019;s gender, which is displayed in <xref ref-type="fig" rid="fig13">Figure 13</xref>. Surprisingly, the male nouns in sentences written by female authors are slightly more likely to have positive scores than the female nouns. In the texts of male authors there is a very weak bias for the female nouns to get more positive scores than the male nouns.</p>
<fig position="float" id="fig13">
<label>Figure 13</label>
<caption>
<p>Interaction between Gender of the referent and Author&#x2019;s Gender in the Russian data, PyABSA: Positive vs. negative sentiment.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g013.tif"/>
</fig>
</sec>
<sec id="sec28">
<label>3.3.2.2</label>
<title>GPT-3.5</title>
<p>The best model predicting neutral vs. non-neutral scores provided by GPT-3.5 contained random slopes for individual concept pairs, which modified the effect of the referent&#x2019;s gender. The author&#x2019;s gender did not play a role and was excluded from the final model. The coefficients are provided in <xref ref-type="table" rid="tab14">Table 14</xref>. We observe an effect of the referent&#x2019;s gender: male referents are about 1.26 times more likely to get neutral labels than female referents. This effect is displayed in <xref ref-type="fig" rid="fig14">Figure 14</xref>. There is also an interaction between Year and Number (not shown): plural nouns get fewer neutral labels with time, whereas singular nouns remain stable.</p>
<table-wrap position="float" id="tab14">
<label>Table 14</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Neutral or Non-neutral&#x201D; based on the Russian data and GPT-3.5.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">0.578 (0.374, 0.783)</td>
<td align="char" valign="top" char="(">1.783 (1.453, 2.187)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Year (scaled, centered)</td>
<td align="char" valign="top" char="(">&#x2212;0.151 (&#x2212;0.220, &#x2212;0.082)</td>
<td align="char" valign="top" char="(">0.860 (0.803, 0.922)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Gender&#x2009;=&#x2009;Male</td>
<td align="char" valign="top" char="(">0.231 (0.041, 0.420)</td>
<td align="char" valign="top" char="(">1.260 (1.042, 1.522)</td>
<td align="char" valign="top" char=".">0.034</td>
</tr>
<tr>
<td align="left" valign="top">Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">0.203 (0.110, 0.296)</td>
<td align="char" valign="top" char="(">1.225 (1.116, 1.345)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Interaction term Year: Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">0.153 (0.062, 0.243)</td>
<td align="char" valign="top" char="(">1.165 (1.064, 1.275)</td>
<td align="char" valign="top" char=".">0.002</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig position="float" id="fig14">
<label>Figure 14</label>
<caption>
<p>Main effect of the referent&#x2019;s Gender in the Russian data, GPT-3.5: Neutral vs. non-neutral sentiment.</p>
</caption>
<graphic xlink:href="fcomm-09-1266407-g014.tif"/>
</fig>
<p>As for the difference between positive and negative sentiment, the best model included neither the referent&#x2019;s gender, nor the author&#x2019;s. They did not play any role. The only significant factor, as shown in <xref ref-type="table" rid="tab15">Table 15</xref>, was Year. The negative coefficient means that the odds of positive sentiment decreased with time, negative sentiment became gradually more likely. The term Number is included, although it was not statistically significant, due to the random slopes for this variable depending on individual Conceptual Pairs.</p>
<table-wrap position="float" id="tab15">
<label>Table 15</label>
<caption>
<p>Coefficients and their Wald 95% confidence intervals (in parentheses) of the fixed effects in the Generalized Linear Mixed-effect Model with the response variable &#x201C;Positive or Negative&#x201D; based on the Russian data and GPT-3.5.</p>
</caption>
<table frame="hsides" rules="groups">
<thead>
<tr>
<th align="left" valign="top" rowspan="2">Regression term</th>
<th align="center" valign="top" colspan="2">Coefficient (&#x03B2;)</th>
<th align="center" valign="top" rowspan="2"><italic>P</italic>-value</th>
</tr>
<tr>
<th align="center" valign="top">Log-odds (ratio)</th>
<th align="center" valign="top">Odds (ratio)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" valign="top">Intercept</td>
<td align="char" valign="top" char="(">&#x2212;0.530 (&#x2212;0.934, &#x2212;0.125)</td>
<td align="char" valign="top" char="(">0.589 (0.393, 0.882)</td>
<td align="char" valign="top" char=".">0.020</td>
</tr>
<tr>
<td align="left" valign="top">Year (scaled, centered)</td>
<td align="char" valign="top" char="(">&#x2212;0.171 (&#x2212;0.258, &#x2212;0.085)</td>
<td align="char" valign="top" char="(">0.842 (0.773, 0.918)</td>
<td align="char" valign="top" char=".">&#x003C;0.001</td>
</tr>
<tr>
<td align="left" valign="top">Number&#x2009;=&#x2009;Singular</td>
<td align="char" valign="top" char="(">0.214 (&#x2212;0.192, 0.620)</td>
<td align="char" valign="top" char="(">1.239 (0.825, 1.859)</td>
<td align="char" valign="top" char=".">0.603</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec29">
<label>3.3.2.3</label>
<title>Summary</title>
<p>The Russian data reveal a tendency for male referents to be more often used neutrally than female referents, in both approaches. As for the PyABSA labels, this gender gap increases with time. This interaction is not observed by the GPT-3.5 data, however.</p>
<p>The pejoration hypothesis is supported only marginally: male referents tend to be used in more positive contexts than female referents in restricted situations (female authors and singular forms), and only for the labels provided by PyABSA. No effect of gender is found in the GPT-3.5 data.</p>
<p>Can these differences between the approaches be explained by the different sizes of the datasets used for the PyABSA and GPT-3.5? The answer is positive for the contrast between neutral and non-neutral sentiment. When fitted on the smaller sample, the model predicting PyABSA labels did not support the interaction between Gender and Year anymore. Instead, we observe the same tendency for male referents to get neutral labels more often than for female referents, which was observed in the model based on the GPT-3.5 labels. As for the positive vs. negative sentiment, it is interesting that the PyABSA results hold even on the small sample.</p>
</sec>
</sec>
</sec>
</sec>
<sec sec-type="discussion" id="sec30">
<label>4</label>
<title>Discussion</title>
<p>In our paper we used two methods of Aspect-Based Sentiment Analysis with the help of language models. One employed the software package PyABSA and was based on zero-shot learning, whereas the other used the large language model GPT-3.5 and few-shot learning. We found differences in the results produced by the two approaches, but also many similarities.</p>
<p>The main result of our Aspect-Based Sentiment Analysis is that we do not find a consistent preference for female referents to be associated with more negative sentiment than for male referents. This goes against the view that highlights predominantly derogatory attitudes toward females.</p>
<p>However, some of our models suggest that female terms are on average less often associated with neutral sentiment than their male counterparts, supporting the analysis in <xref ref-type="bibr" rid="ref24">Hoyle et al. (2019)</xref>. This difference is found in the data annotated by PyABSA representing English fiction (only for the singular nouns, though), Chinese news and Russian fiction, and in the Russian data annotated by GPT-3.5. It remains an open question whether the absence of this effect in the English and Chinese GPT-3.5 data has to do with the very high frequency of neutral sentiment in the GPT-3.5 annotations, which makes it more difficult to discover significant effects.</p>
<p>If this bias is real, one could conclude that female humans are provided with more emotionally charged descriptions, positive or negative. It is quite remarkable that the results based on corpora representing three very different cultures and two registers converge in this point. At the same time, contrary to our expectations, we find no diachronic convergence in the sentiment evoked by female and male referents. In contrast, in the Russian data, the gender gap seems to be increasing with time. This can mean several things, in principle. First, it is possible that sexism is so deeply rooted that the recent progress has only affected the surface of our behavior and cognition. Alternatively, we cannot exclude that the text types examined in our study have not caught up yet with the social changes. It would be worthwhile to investigate other text sources and look into older data.</p>
<p>To conclude, our data lend tentative and partial support to <xref ref-type="bibr" rid="ref20">Glick and Fiske&#x2019;s (1996)</xref>, p. 491 claim that &#x201C;women have been revered as well as reviled&#x201D; throughout human history. The deep ambivalence leads to constant fluctuation between hostile sexism and benevolent sexism. This may also have to do with the society&#x2019;s constant scrutiny and evaluation of women. As <xref ref-type="bibr" rid="ref40">Tannen (1993)</xref> wrote in her eponymous 1993 essay, &#x201C;[t]here is no unmarked woman&#x201D;, in the sense that all choices that women make &#x2013; be that a hairstyle or choosing the name after marriage &#x2013; are perceived as marked, or carrying additional social meaning. In our case, we see that a woman is less likely to be described neutrally or unmarked in the emotional sense than a man. Although these findings can be still interpreted as evidence of prejudice, they do not support the idea that the roots of linguistic pejoration toward females have to do with a generally more negative attitude toward female referents, which is not found in our data. The causes of the diachronic processes leading to the semantic biases, which were outlined in the Introduction, should probably be searched for elsewhere.</p>
<p>It is necessary to mention some limitations of our study. First of all, our study is limited to seven pairs of gendered concepts in each of the three languages &#x2013; a limitation by necessity shared by most of the studies on gender in corpora mentioned in the Introduction. We are also aware that more additional factors need to be controlled for. For example, in the English data we were not able to control for the author&#x2019;s gender. We hope that follow-up studies will address this issue.</p>
<p>Second, our conclusions are based on pre-trained models for sentiment analysis. In other words, we have not fine-tuned the existing model on fiction and media texts. The feelings expressed about consumer goods and services, which most PyABSA sentiment analysis models are trained on, should be different from the feelings expressed about humans. However, we find partial support of the PyABSA results in the data annotated by GPT-3.5. Our study also supports previous findings based on a different approach and set of words. PyABSA is a smaller and more specialized model than GPT-3, and is likely to pay more attention to shallow linguistic features such as vocabulary use, while GPT-3 has the capability of performing a deeper semantic analysis. Whether it actually does this remains to be seen, as analyzing this in detail is a difficult but interesting question that goes beyond the scope of our work.</p>
<p>Finally, one cannot exclude that the sentiment evaluation of different contexts could be different at the time when the text was written and now. In other words, when we classify sentiment of a sentence published, let us say, in 1970, the model estimates the sentiment from a contemporary language user&#x2019;s perspective, and not from a perspective of someone who wrote or read this sentence in 1970. Unfortunately, it would be in principle impossible to estimate that sentiment with full certainty in all contexts.</p>
<p>Yet, our results dovetail with the conclusions reported in a recent study by <xref ref-type="bibr" rid="ref29">Morehouse et al. (2023)</xref>, who find that language representations from word embeddings based on different large corpora of English strongly correlate with people&#x2019;s implicit attitudes toward diverse topics measured experimentally. This correlation is remarkably stable, persisting across two centuries, and being found in different text registers.<xref ref-type="fn" rid="fn0003"><sup>3</sup></xref> Implicit attitudes are strongly anchored in our culture, and are less malleable than explicit attitudes, which depend on new norms and cultural demands (<italic>Ibid.</italic>). Their existence below the radar of consciousness can explain, at least partly, why social prejudice, such as the pro-White racial bias in the United States (<xref ref-type="bibr" rid="ref34">Payne et al., 2019</xref>), is so persistent and difficult to eradicate.</p>
<p>Despite all above-mentioned limitations, we hope that our study opens a new direction for research in diachronic lexical typology and in lexical typology in general. Lexical typology, defined as the &#x201C;systematic study of cross-linguistic variation in words and vocabularies, i.e., the cross-linguistic and typological branch of lexicology&#x201D; (<xref ref-type="bibr" rid="ref27">Koptjevskaja-Tamm, 2012</xref>: 373), has to a large extent ignored the issue of semantic prosody and connotation. Also, while changes in connotations and pejoration/meliorization are frequently discussed in the context of semantic change and lexical replacement in particular languages and richly illustrated in textbooks on historical semantics, these are rarely taken into account in the more systematic comparative research in diachronic lexical typology. The latter instead often focuses on the more &#x201C;conceptual&#x201D; side of semantic shifts such as metonymy, metaphor, broadening, etc. (e.g., the contributions in <xref ref-type="bibr" rid="ref26">Juvonen and Koptjevskaja-Tamm, 2016</xref>, <xref ref-type="bibr" rid="ref19">Georgakopoulos and Polis, 2021</xref>, but see <xref ref-type="bibr" rid="ref43">Vejdemo and H&#x00F6;rberg, 2016</xref> for including arousal in the model predicting the rate of lexical replacement across languages). We look forward toward other lexico-typological studies in which semantic prosody is taken as a noteworthy aspect of comparison.</p>
</sec>
<sec sec-type="data-availability" id="sec31">
<title>Data availability statement</title>
<p>The original contributions presented in the study are included in the article/<xref ref-type="supplementary-material" rid="SM1">Supplementary material</xref>, further inquiries can be directed to the corresponding author.</p>
</sec>
<sec sec-type="author-contributions" id="sec32">
<title>Author contributions</title>
<p>NL: Conceptualization, Formal analysis, Investigation, Methodology, Project administration, Resources, Validation, Visualization, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. MK: Conceptualization, Project administration, Validation, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing. R&#x00D6;: Data curation, Methodology, Resources, Software, Validation, Writing &#x2013; original draft, Writing &#x2013; review &#x0026; editing.</p>
</sec>
</body>
<back>
<sec sec-type="funding-information" id="sec33">
<title>Funding</title>
<p>The author(s) declare financial support was received for the research, authorship, and/or publication of this article. Natalia Levshina&#x2019;s research was partly funded by the Netherlands Organisation for Scientific Research (NWO) under Gravitation grant &#x201C;Language in Interaction&#x201D;, grant number 024.001.006. Robert &#x00D6;stling&#x2019;s research was partly funded by the Swedish Research Agency (VR), grant number 2019-04129.</p>
</sec>
<sec sec-type="COI-statement" id="sec34">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec id="sec100" sec-type="disclaimer">
<title>Publisher&#x2019;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>
<sec sec-type="supplementary-material" id="sec35">
<title>Supplementary material</title>
<p>The Supplementary material for this article can be found online at: <ext-link xlink:href="https://www.frontiersin.org/articles/10.3389/fcomm.2024.1266407/full#supplementary-material" ext-link-type="uri">https://www.frontiersin.org/articles/10.3389/fcomm.2024.1266407/full#supplementary-material</ext-link></p>
<supplementary-material xlink:href="Data_Sheet_1.docx" id="SM1" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<fn-group>
<fn id="fn0001">
<p><sup>1</sup>Online demo: <ext-link xlink:href="https://huggingface.co/spaces/yangheng/PyABSA-APC" ext-link-type="uri">https://huggingface.co/spaces/yangheng/PyABSA-APC</ext-link>; Github repository: <ext-link xlink:href="https://github.com/yangheng95/PyABSA" ext-link-type="uri">https://github.com/yangheng95/PyABSA</ext-link></p>
</fn>
<fn id="fn0002">
<p><sup>2</sup>Conceptually, this variable should be treated as random effects, but with the low number of groups (only two), there is no practical difference between treating it as fixed or random effects (<xref ref-type="bibr" rid="ref18">Gelman and Hill, 2007</xref>, p. 247).</p>
</fn>
<fn id="fn0003">
<p><sup>3</sup>The conclusion that the same correlation is found across different text registers harmonizes well with our finding that the results for the three languages in our study are very similar, in spite of the fact that the Chinese corpus represents a different text genre than the Russian and the English ones.</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="ref1"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Aikhenvald</surname> <given-names>A. Y.</given-names></name></person-group> (<year>2016</year>.) <source>How gender shapes the world</source>. <publisher-loc>Oxford</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>.</citation></ref>
<ref id="ref9007"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Allport</surname> <given-names>G. W.</given-names></name></person-group> (<year>1954</year>). <source>The nature of prejudice</source>. <publisher-loc>Cambridge, Mass.</publisher-loc>: <publisher-name>Addison-Wesley</publisher-name>.</citation></ref>
<ref id="ref2"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Baker</surname> <given-names>P.</given-names></name></person-group>. (<year>2014</year>). <source>Using corpora to analyze gender</source>. <publisher-loc>London, New York</publisher-loc>: <publisher-name>Bloomsbury Publishing</publisher-name>.</citation></ref>
<ref id="ref9003"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Bebout</surname> <given-names>L.</given-names></name></person-group> (<year>1984</year>). <article-title>Asymmetries in male-female word pairs</article-title>. <source>American Speech</source> <volume>59</volume>, <fpage>13</fpage>&#x2013;<lpage>30</lpage>.</citation></ref>
<ref id="ref3"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Borkowska</surname> <given-names>P.</given-names></name> <name><surname>Kleparski</surname> <given-names>G.</given-names></name></person-group> (<year>2007</year>). <article-title>It befalls words to fall down: pejoration as a type of semantic change</article-title>. <source>Stud Anglica Resoviensia</source> <volume>47</volume>, <fpage>33</fpage>&#x2013;<lpage>50</lpage>.</citation></ref>
<ref id="ref4"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Brown</surname> <given-names>T.</given-names></name> <name><surname>Mann</surname> <given-names>B.</given-names></name> <name><surname>Ryder</surname> <given-names>N.</given-names></name> <name><surname>Subbiah</surname> <given-names>M.</given-names></name> <name><surname>Kaplan</surname> <given-names>J. D.</given-names></name> <name><surname>Dhariwal</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2020</year>). <article-title>Language models are few-shot learners</article-title>. <source>Adv. Neural Inf. Proces. Syst.</source> <volume>33</volume>, <fpage>1877</fpage>&#x2013;<lpage>1901</lpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2005.14165</pub-id></citation></ref>
<ref id="ref5"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Caldas-Coulthard</surname> <given-names>C. R.</given-names></name> <name><surname>Moon</surname> <given-names>R.</given-names></name></person-group> (<year>2010</year>). <article-title>&#x2018;Curvy, hunky, kinky&#x2019;: using corpora as tools for critical analysis</article-title>. <source>Discourse Soc.</source> <volume>21</volume>, <fpage>99</fpage>&#x2013;<lpage>133</lpage>. doi: <pub-id pub-id-type="doi">10.1177/0957926509353843</pub-id></citation></ref>
<ref id="ref6"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Charlesworth</surname> <given-names>T. E. S.</given-names></name> <name><surname>Yang</surname> <given-names>V.</given-names></name> <name><surname>Mann</surname> <given-names>T. C.</given-names></name> <name><surname>Kurdi</surname> <given-names>B.</given-names></name> <name><surname>Banaji</surname> <given-names>M. R.</given-names></name></person-group> (<year>2021</year>). <article-title>Gender stereotypes in natural language: word Embeddings show robust consistency across Child and Adult language corpora of more than 65 million words</article-title>. <source>Psychol. Sci.</source> <volume>32</volume>, <fpage>218</fpage>&#x2013;<lpage>240</lpage>. doi: <pub-id pub-id-type="doi">10.1177/0956797620963619</pub-id>, PMID: <pub-id pub-id-type="pmid">33400629</pub-id></citation></ref>
<ref id="ref7"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Davies</surname> <given-names>M.</given-names></name></person-group> (<year>2010</year>). <italic>The Corpus of Historical American English (COHA)</italic>. Available at: <ext-link xlink:href="https://www.english-corpora.org/coha/" ext-link-type="uri">https://www.english-corpora.org/coha/</ext-link>.</citation></ref>
<ref id="ref9"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>DeFranza</surname> <given-names>D.</given-names></name> <name><surname>Mishra</surname> <given-names>H.</given-names></name> <name><surname>Mishra</surname> <given-names>A.</given-names></name></person-group> (<year>2020</year>). <article-title>How language shapes prejudice against women: an examination across 45 world languages</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>119</volume>, <fpage>7</fpage>&#x2013;<lpage>22</lpage>. doi: <pub-id pub-id-type="doi">10.1037/pspa0000188</pub-id>, PMID: <pub-id pub-id-type="pmid">32077734</pub-id></citation></ref>
<ref id="ref11"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Dovidio</surname> <given-names>J. F.</given-names></name> <name><surname>Samuel</surname> <given-names>G. P.</given-names></name> <name><surname>Laurie</surname> <given-names>A. R.</given-names></name></person-group> (<year>2005</year>). <source>On the nature of prejudice fifty years after Allport</source>. <publisher-loc>Malden, MA</publisher-loc>: <publisher-name>Blackwell Publishing</publisher-name>.</citation></ref>
<ref id="ref12"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Durkheim</surname> <given-names>&#x00C9;.</given-names></name></person-group> (<year>1989/1953</year>) <source>Sociology and philosophy</source>. <publisher-loc>Glencoe, Ill</publisher-loc>: <publisher-name>Free Press</publisher-name>.</citation></ref>
<ref id="ref15"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fiske</surname> <given-names>S. T.</given-names></name></person-group> (<year>2018</year>). <article-title>Stereotype content: warmth and competence endure</article-title>. <source>Curr. Dir. Psychol. Sci.</source> <volume>27</volume>, <fpage>67</fpage>&#x2013;<lpage>73</lpage>. doi: <pub-id pub-id-type="doi">10.1177/0963721417738825</pub-id>, PMID: <pub-id pub-id-type="pmid">29755213</pub-id></citation></ref>
<ref id="ref16"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Fiske</surname> <given-names>S. T.</given-names></name> <name><surname>Cuddy</surname> <given-names>A. J. C.</given-names></name> <name><surname>Glick</surname> <given-names>P.</given-names></name> <name><surname>Jun</surname> <given-names>X.</given-names></name></person-group> (<year>2002</year>). <article-title>A model of (often mixed) stereotype content: competence and warmth respectively follow from perceived status and competition</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>82</volume>, <fpage>878</fpage>&#x2013;<lpage>902</lpage>. doi: <pub-id pub-id-type="doi">10.1037/0022-3514.82.6.878</pub-id>, PMID: <pub-id pub-id-type="pmid">12051578</pub-id></citation></ref>
<ref id="ref17"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Garg</surname> <given-names>N.</given-names></name> <name><surname>Schiebinger</surname> <given-names>L.</given-names></name> <name><surname>Jurafsky</surname> <given-names>D.</given-names></name> <name><surname>Zou</surname> <given-names>J.</given-names></name></person-group> (<year>2018</year>). <article-title>Word Embeddings quantify 100 years of gender and ethnic stereotypes</article-title>. <source>Proc. Natl. Acad. Sci. USA</source> <volume>115</volume>, <fpage>E3635</fpage>&#x2013;<lpage>E3644</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1720347115</pub-id>, PMID: <pub-id pub-id-type="pmid">29615513</pub-id></citation></ref>
<ref id="ref18"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Gelman</surname> <given-names>A.</given-names></name> <name><surname>Hill</surname> <given-names>J.</given-names></name></person-group> (<year>2007</year>). <source>Data analysis using regression and multilevel/hierarchical models</source>. <publisher-loc>Cambridge</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>.</citation></ref>
<ref id="ref19"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Georgakopoulos</surname> <given-names>T.</given-names></name> <name><surname>Polis</surname> <given-names>S.</given-names></name></person-group> (<year>2021</year>). <article-title>Lexical diachronic semantic maps: mapping the evolution of time-related lexemes</article-title>. <source>J. Hist. Linguist.</source> <volume>11</volume>, <fpage>367</fpage>&#x2013;<lpage>420</lpage>. doi: <pub-id pub-id-type="doi">10.1075/jhl.19018.geo</pub-id></citation></ref>
<ref id="ref20"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Glick</surname> <given-names>P.</given-names></name> <name><surname>Fiske</surname> <given-names>S. T.</given-names></name></person-group> (<year>1996</year>). <article-title>The ambivalent sexism inventory: differentiating hostile and benevolent sexism</article-title>. <source>J. Pers. Soc. Psychol.</source> <volume>70</volume>, <fpage>491</fpage>&#x2013;<lpage>512</lpage>. doi: <pub-id pub-id-type="doi">10.1037/0022-3514.70.3.491</pub-id></citation></ref>
<ref id="ref21"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Gough</surname> <given-names>H. G.</given-names></name> <name><surname>Heilbrun</surname> <given-names>A. B.</given-names></name></person-group> (<year>1965</year>). <source>The adjective check list manual</source>. <publisher-loc>Palo Alto, CA</publisher-loc>: <publisher-name>Consulting Psychologists Press</publisher-name>.</citation></ref>
<ref id="ref9006"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Grzega</surname> <given-names>J.</given-names></name></person-group> (<year>2004</year>).&#x2002;<article-title>A qualitative and quantitative presentation of the forces for lexemic change in the history of English</article-title>. <source>Onomasiology Online</source> <volume>51</volume>, <fpage>1</fpage>&#x2013;<lpage>55</lpage>.</citation></ref>
<ref id="ref22"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Herda&#x011F;delen</surname> <given-names>A.</given-names></name> <name><surname>Baroni</surname> <given-names>M.</given-names></name></person-group> (<year>2011</year>). <article-title>Stereotypical gender actions can be extracted from web text</article-title>. <source>J. Am. Soc. Inf. Sci. Technol.</source> <volume>62</volume>, <fpage>1741</fpage>&#x2013;<lpage>1749</lpage>. doi: <pub-id pub-id-type="doi">10.1002/asi.21579</pub-id></citation></ref>
<ref id="ref23"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Hosseini-Asl</surname> <given-names>E.</given-names></name> <name><surname>Liu</surname> <given-names>W.</given-names></name> <name><surname>Xiong</surname> <given-names>C.</given-names></name></person-group> (<year>2022</year>). <article-title>A generative language model for few-shot aspect-based sentiment analysis</article-title>. <source>arXiv</source> <volume>2022</volume>:<fpage>5356</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2204.05356</pub-id></citation></ref>
<ref id="ref24"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Hoyle</surname> <given-names>A. M.</given-names></name> <name><surname>Wolf-Sonkin</surname> <given-names>L.</given-names></name> <name><surname>Wallach</surname> <given-names>H.</given-names></name> <name><surname>Augenstein</surname> <given-names>I.</given-names></name> <name><surname>Cotterell</surname> <given-names>R.</given-names></name></person-group> (<year>2019</year>). <italic>Unsupervised discovery of gendered language through latent-variable modeling</italic>. In: Proceedings of the 57th annual meeting of the Association for Computational Linguistics, pp. Florence, Italy: Association for Computational Linguistics, pp. 1706&#x2013;1716. Available at: <ext-link xlink:href="https://www.aclweb.org/anthology/P19-1167" ext-link-type="uri">https://www.aclweb.org/anthology/P19-1167</ext-link>.</citation></ref>
<ref id="ref25"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Jackson</surname> <given-names>L. M.</given-names></name></person-group> (<year>2011</year>). <source>Defining prejudice. The psychology of prejudice: From attitudes to social action; the psychology of prejudice: From attitudes to social action</source>. <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>American Psychological Association, American Psychological Association</publisher-name>, pp. <fpage>7</fpage>&#x2013;<lpage>28</lpage>.</citation></ref>
<ref id="ref9009"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Jakobson</surname> <given-names>R.</given-names></name></person-group> (<year>1971</year>[1932]). <article-title>&#x201C;Zur structur des russischen Verbums,&#x201D;</article-title> in Selected writings. Vol. II. Word and Language. ed. R. Jakobson. (Berlin: De Gruyter Mouton), <fpage>3</fpage>&#x2013;<lpage>15</lpage>.</citation></ref>
<ref id="ref26"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Juvonen</surname> <given-names>P.</given-names></name> <name><surname>Koptjevskaja-Tamm</surname> <given-names>M.</given-names></name></person-group> (<year>2016</year>). <source>The lexical typology of semantic shifts. Cognitive linguistics research</source>. <volume>58</volume>. <publisher-loc>Berlin, New York</publisher-loc>: <publisher-name>De Gruyter Mouton</publisher-name>.</citation></ref>
<ref id="ref9005"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname> <given-names>M.</given-names></name></person-group> (<year>2008</year>). <article-title>On the semantic derogation of terms for women in Korean, with parallel developments in Chinese and Japanese</article-title>. <source>Korean Stud.</source> <volume>32</volume>, <fpage>148</fpage>&#x2013;<lpage>176</lpage>.</citation></ref>
<ref id="ref9004"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Kleparski</surname> <given-names>G.</given-names></name></person-group> (<year>1997</year>). <source>Theory and practice of historical semantics: the case of middle English and Early Modern English synonyms of girl/young women</source>. <publisher-loc>Lublin</publisher-loc>: <publisher-name>University Press of the Catholic University of Lublin</publisher-name>.</citation></ref>
<ref id="ref27"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Koptjevskaja-Tamm</surname> <given-names>M.</given-names></name></person-group> (<year>2012</year>). <article-title>New directions in lexical typology</article-title>. <source>Linguistics</source> <volume>50</volume>, <fpage>373</fpage>&#x2013;<lpage>394</lpage>. doi: <pub-id pub-id-type="doi">10.1515/ling-2012-0013</pub-id></citation></ref>
<ref id="ref28"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Macalister</surname> <given-names>J.</given-names></name></person-group> (<year>2011</year>). <article-title>Flower-girl and bugler-boy no more: changing gender representation in writing for children</article-title>. <source>Corpora</source> <volume>6</volume>, <fpage>25</fpage>&#x2013;<lpage>44</lpage>. doi: <pub-id pub-id-type="doi">10.3366/cor.2011.0003</pub-id></citation></ref>
<ref id="ref29"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Morehouse</surname> <given-names>K.</given-names></name> <name><surname>Rouduri</surname> <given-names>V.</given-names></name> <name><surname>Cunningham</surname> <given-names>W.</given-names></name> <name><surname>Charlesworth</surname> <given-names>T.</given-names></name></person-group> (<year>2023</year>). <italic>Traces of human attitudes in contemporary and historical word embeddings (1800&#x2013;2000)</italic>. Preprint (Version 1) Research Square.</citation></ref>
<ref id="ref9008"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Moscovici</surname> <given-names>S.</given-names></name></person-group> (<year>1988</year>). <article-title>Notes towards a description of social representations</article-title>. <source>Eur. J. Soc. Psychol.</source> <volume>18</volume>, <fpage>211</fpage>&#x2013;<lpage>250</lpage>. doi: <pub-id pub-id-type="doi">10.1002/ejsp.2420180303</pub-id></citation></ref>
<ref id="ref30"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Norberg</surname> <given-names>C.</given-names></name></person-group> (<year>2016</year>). <article-title>Naughty boys and sexy girls: the representation of young individuals in a web-based Corpus of English</article-title>. <source>J. Engl. Linguist.</source> <volume>44</volume>, <fpage>291</fpage>&#x2013;<lpage>317</lpage>. doi: <pub-id pub-id-type="doi">10.1177/0075424216665672</pub-id></citation></ref>
<ref id="ref31"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Nosek</surname> <given-names>B. A.</given-names></name> <name><surname>Smyth</surname> <given-names>F. L.</given-names></name> <name><surname>Hansen</surname> <given-names>J. J.</given-names></name> <name><surname>Devos</surname> <given-names>T.</given-names></name> <name><surname>Lindner</surname> <given-names>N. M.</given-names></name> <name><surname>Ranganath</surname> <given-names>K. A.</given-names></name> <etal/></person-group>. (<year>2007</year>). <article-title>Pervasiveness and correlates of implicit attitudes and stereotypes</article-title>. <source>Eur. Rev. Soc. Psychol.</source> <volume>18</volume>, <fpage>36</fpage>&#x2013;<lpage>88</lpage>. doi: <pub-id pub-id-type="doi">10.1080/10463280701489053</pub-id></citation></ref>
<ref id="ref32"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Osgood</surname> <given-names>C. E.</given-names></name> <name><surname>May</surname> <given-names>W. H.</given-names></name> <name><surname>Miron</surname> <given-names>M. S.</given-names></name></person-group> (<year>1975</year>). <source>Cross-cultural universals of affective meaning</source>. <publisher-loc>Urbana, Illinois</publisher-loc>: <publisher-name>University of Illinois Press</publisher-name>.</citation></ref>
<ref id="ref33"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ouyang</surname> <given-names>L.</given-names></name> <name><surname>Wu</surname> <given-names>J.</given-names></name> <name><surname>Xu</surname> <given-names>J.</given-names></name> <name><surname>Almeida</surname> <given-names>D.</given-names></name> <name><surname>Wainwright</surname> <given-names>C.</given-names></name> <name><surname>Mishkin</surname> <given-names>P.</given-names></name> <etal/></person-group>. (<year>2022</year>). <article-title>Training language models to follow instructions with human feedback</article-title>. <source>Adv. Neural Inf. Proces. Syst.</source> <volume>35</volume>, <fpage>27730</fpage>&#x2013;<lpage>27744</lpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2203.02155</pub-id></citation></ref>
<ref id="ref34"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Payne</surname> <given-names>B. K.</given-names></name> <name><surname>Vuletich</surname> <given-names>H. A.</given-names></name> <name><surname>Brown-Iannuzzi</surname> <given-names>J. L.</given-names></name></person-group> (<year>2019</year>). <article-title>Historical roots of implicit bias in slavery</article-title>. <source>PNAS</source> <volume>116</volume>, <fpage>11693</fpage>&#x2013;<lpage>11698</lpage>. doi: <pub-id pub-id-type="doi">10.1073/pnas.1818816116</pub-id></citation></ref>
<ref id="ref35"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Pearce</surname> <given-names>M.</given-names></name></person-group> (<year>2008</year>). <article-title>Investigating the collocational behaviour of man and woman in the BNC using sketch engine</article-title>. <source>Corpora</source> <volume>3</volume>, <fpage>1</fpage>&#x2013;<lpage>29</lpage>. doi: <pub-id pub-id-type="doi">10.3366/E174950320800004X</pub-id></citation></ref>
<ref id="ref36"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Potts</surname> <given-names>A.</given-names></name> <name><surname>Weare</surname> <given-names>S.</given-names></name></person-group> (<year>2018</year>). <article-title>Mother, monster, Mrs, I: a critical evaluation of gendered naming strategies in English sentencing remarks of women who kill</article-title>. <source>Int. J. Semiot. Law - Rev. Int. S&#x00E9;miot. Jurid.</source> <volume>31</volume>, <fpage>21</fpage>&#x2013;<lpage>52</lpage>. doi: <pub-id pub-id-type="doi">10.1007/s11196-017-9523-z</pub-id></citation></ref>
<ref id="ref38"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Romaine</surname> <given-names>S.</given-names></name></person-group> (<year>2000</year>). <source>Language in society: An introduction to sociolinguistics</source>. <edition>2nd Edn.</edition> <publisher-loc>Oxford</publisher-loc>: <publisher-name>Oxford University Press</publisher-name>.</citation></ref>
<ref id="ref9001"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Salmons</surname> <given-names>J.</given-names></name></person-group> (<year>1990</year>). &#x201C;<article-title>The context of language change</article-title>,&#x201D; in <source>Research guide on language change</source>. ed. <person-group person-group-type="editor"><name><surname>Polom&#x00E9;</surname> <given-names>E. C.</given-names></name></person-group> <publisher-loc>(Berlin, New York</publisher-loc>: <publisher-name>De Gruyter Mouton)</publisher-name>, <fpage>71</fpage>&#x2013;<lpage>96</lpage>.</citation></ref>
<ref id="ref9002"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Schulz</surname> <given-names>M.</given-names></name></person-group> (<year>1975</year>). <article-title>&#x201C;The semantic derogation of women,&#x201D;</article-title> in <source><italic>Language and sex: difference and dominance</italic></source>. eds. <person-group person-group-type="editor"><name><surname>Thome</surname> <given-names>B.</given-names></name> <name><surname>Henley</surname> <given-names>N.</given-names></name></person-group> <publisher-loc>(Newbury Hall)</publisher-loc>.</citation></ref>
<ref id="ref39"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Stern</surname> <given-names>G.</given-names></name></person-group> (<year>1931</year>). <source>Meaning and change of meaning, with special reference to the English language</source>. <publisher-loc>Bloomington, London</publisher-loc>: <publisher-name>Indiana University Press</publisher-name>.</citation></ref>
<ref id="ref40"><citation citation-type="other"><person-group person-group-type="author"><name><surname>Tannen</surname> <given-names>D.</given-names></name></person-group> (<year>1993</year>). <italic>Wears jump suits. Sensible shoes. Uses husband&#x2019;s last name</italic>. New York Times Magazine. pp. 52&#x2013;54.</citation></ref>
<ref id="ref41"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Taylor</surname> <given-names>C.</given-names></name></person-group> (<year>2013</year>). <article-title>Searching for similarity using Corpus-assisted discourse studies</article-title>. <source>Corpora</source> <volume>8</volume>, <fpage>81</fpage>&#x2013;<lpage>113</lpage>. doi: <pub-id pub-id-type="doi">10.3366/cor.2013.0035</pub-id></citation></ref>
<ref id="ref9010"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Tyler</surname> <given-names>A.</given-names></name></person-group> (<year>1980</year>). <source>Morgan&#x2019;s passing</source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Knopf</publisher-name>.</citation></ref>
<ref id="ref42"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Ullmann</surname> <given-names>S.</given-names></name></person-group> (<year>1957</year>). <source>The principles of semantics</source>. <edition>2nd Edn.</edition> <publisher-loc>Glasgow, Oxford</publisher-loc>: <publisher-name>Jackson, Son, and Co., Basil Blackwell</publisher-name>.</citation></ref>
<ref id="ref43"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Vejdemo</surname> <given-names>S.</given-names></name> <name><surname>H&#x00F6;rberg</surname> <given-names>T.</given-names></name></person-group> (<year>2016</year>). <article-title>Semantic factors predict the rate of lexical replacement of content words</article-title>. <source>PLoS One</source> <volume>11</volume>:<fpage>e0147924</fpage>. doi: <pub-id pub-id-type="doi">10.1371/journal.pone.0147924</pub-id>, PMID: <pub-id pub-id-type="pmid">26820737</pub-id></citation></ref>
<ref id="ref44"><citation citation-type="book"><person-group person-group-type="author"><name><surname>Williams</surname> <given-names>J. E.</given-names></name> <name><surname>Best</surname> <given-names>D. L.</given-names></name></person-group> (<year>1990</year>). <source>Measuring sex stereotypes: A multination study</source>. <publisher-loc>Newbury Park, London</publisher-loc>: <publisher-name>Sage Publications, Inc</publisher-name>.</citation></ref>
<ref id="ref45"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname> <given-names>H.</given-names></name> <name><surname>Li</surname> <given-names>K.</given-names></name></person-group> (<year>2023</year>). <article-title>PyABSA: A modularized framework for reproducible Aspect-Based Sentiment Analysis</article-title>. <source>arXiv</source> <volume>2023</volume>:<fpage>01368</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2208.01368</pub-id></citation></ref>
<ref id="ref46"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zasina</surname> <given-names>A. J.</given-names></name></person-group> (<year>2019</year>). <article-title>Gender-specific adjectives in Czech newspapers and magazines</article-title>. <source>J. Linguist.</source> <volume>70</volume>, <fpage>299</fpage>&#x2013;<lpage>312</lpage>. doi: <pub-id pub-id-type="doi">10.2478/jazcas-2019-0060</pub-id></citation></ref>
<ref id="ref47"><citation citation-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname> <given-names>W.</given-names></name> <name><surname>Li</surname> <given-names>X.</given-names></name> <name><surname>Yang</surname> <given-names>D.</given-names></name> <name><surname>Bing</surname> <given-names>L.</given-names></name> <name><surname>Lam</surname> <given-names>W.</given-names></name></person-group> (<year>2022</year>). <article-title>A survey on Aspect-Based Sentiment Analysis: Tasks, methods, and challenges</article-title>. <source>arXiv</source> <volume>2022</volume>:<fpage>01054</fpage>. doi: <pub-id pub-id-type="doi">10.48550/arXiv.2203.01054</pub-id></citation></ref>
</ref-list>
</back>
</article>