<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.3 20070202//EN" "journalpublishing.dtd">
<article xml:lang="EN" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="editorial">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Front. Artif. Intell.</journal-id>
<journal-title>Frontiers in Artificial Intelligence</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Front. Artif. Intell.</abbrev-journal-title>
<issn pub-type="epub">2624-8212</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.3389/frai.2023.1128446</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Artificial Intelligence</subject>
<subj-group>
<subject>Editorial</subject>
</subj-group>
</subj-group>
</article-categories>
<title-group>
<article-title>Editorial: Text complexity and simplification</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<name><surname>Ermakova</surname> <given-names>Liana</given-names></name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
<xref ref-type="corresp" rid="c001"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/506387/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Solovyev</surname> <given-names>Valery</given-names></name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
<xref ref-type="corresp" rid="c002"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1659791/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Sidorov</surname> <given-names>Grigori</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c003"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1682697/overview"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name><surname>Gelbukh</surname> <given-names>Alexander</given-names></name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
<xref ref-type="corresp" rid="c004"><sup>&#x0002A;</sup></xref>
<uri xlink:href="http://loop.frontiersin.org/people/1682717/overview"/>
</contrib>
</contrib-group>
<aff id="aff1"><sup>1</sup><institution>HCTI, Universit&#x000E9; de Bretagne Occidentale</institution>, <addr-line>Brest</addr-line>, <country>France</country></aff>
<aff id="aff2"><sup>2</sup><institution>Laboratory &#x0226A;Linguistics and Artificial Intelligence&#x0226B;, Kazan Federal University</institution>, <addr-line>Kazan</addr-line>, <country>Russia</country></aff>
<aff id="aff3"><sup>3</sup><institution>Centro de Investigaci&#x000F3;n en Computaci&#x000F3;n (Center for Computing Research, CIC), National Polytechnic Institute (IPN)</institution>, <addr-line>Mexico City</addr-line>, <country>Mexico</country></aff>
<author-notes>

<fn fn-type="edited-by"><p>Edited and reviewed by: Weifeng Shen, Chongqing University, China</p></fn>
<corresp id="c001">&#x0002A;Correspondence: Liana Ermakova <email>liana.ermakova&#x00040;univ-brest.fr</email></corresp>
<corresp id="c002">Valery Solovyev <email>maki.solovyev&#x00040;mail.ru</email></corresp>
<corresp id="c003">Grigori Sidorov <email>sidorov&#x00040;cic.ipn.mx</email></corresp>
<corresp id="c004">Alexander Gelbukh <email>gelbukh&#x00040;cic.ipn.mx</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>13</day>
<month>06</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>6</volume>
<elocation-id>1128446</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>12</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>31</day>
<month>05</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#x000A9; 2023 Ermakova, Solovyev, Sidorov and Gelbukh.</copyright-statement>
<copyright-year>2023</copyright-year>
<copyright-holder>Ermakova, Solovyev, Sidorov and Gelbukh</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (CC BY). The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</p></license> </permissions>
<related-article id="RA1" related-article-type="commentary-article" xlink:href="https://www.frontiersin.org/research-topics/34050/text-complexity-and-simplification" ext-link-type="uri">Editorial on the Research Topic <article-title>Text complexity and simplification</article-title></related-article>
<kwd-group>
<kwd>text simplification</kwd>
<kwd>text complexity</kwd>
<kwd>lexical complexity</kwd>
<kwd>text clarity</kwd>
<kwd>deep learning</kwd>
<kwd>simplification benchmarks</kwd>
</kwd-group>
<counts>
<fig-count count="0"/>
<table-count count="0"/>
<equation-count count="0"/>
<ref-count count="3"/>
<page-count count="3"/>
<word-count count="1510"/>
</counts>
<custom-meta-wrap>
<custom-meta>
<meta-name>section-at-acceptance</meta-name>
<meta-value>Natural Language Processing</meta-value>
</custom-meta>
</custom-meta-wrap>
</article-meta>
</front>
<body>

<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>Recently, text simplification has raised a lot of interest in the scientific community as numerous texts, including classroom books, scientific articles, legal and financial documents, prove to be too difficult and as such cannot cater to readers&#x00027; needs. Although the first methods of measuring text complexity were suggested over 70 years ago, the problem is far from being solved. The diversity of languages, text types and genres, as well as their audience, are major challenges for researchers. Despite the significant progress of recent neural models (Sharoff, <xref ref-type="bibr" rid="B3">2022</xref>), many challenges remain unfaced, including the consistency of the long output provided by the models used in a generative context. This Research Topic covers the topic of text complexity and simplification, related notions, resources and methods for English, Portuguese, Spanish, and Russian languages.</p></sec>
<sec id="s2">
<title>Overview of the Research Topic</title>
<p>Recently, plain and easy language has gained attention as a subject of standardization in many countries. However, even the notions of text clarity, text simplicity, plain language and easy language are problematic. The paper of <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frai.2022.1042258">Vecchiato</ext-link> discusses these four notions and the formal processes of text simplification which should vary accordingly. She highlights that a clear text does not necessarily exclude any ambiguous expression. While much work on automatic text simplification aims to shorten the text, <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frai.2022.1042258">Vecchiato</ext-link> states that a clear text &#x0201C;can be reasonably long if more words are needed to adequately explain a concept.&#x0201D; <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frai.2022.1042258">Vecchiato</ext-link> distinguishes structural, cognitive and development complexity and suggests that a text simplification should integrate the different levels of intelligibility, namely readability, coherence and representability.</p>
<p>The paper <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frai.2022.1008530">(Blinova and Tarasov)</ext-link> develops a model for estimation of the complexity of legal texts in Russian. Several regression and classification methods are compared. Their input are the pre-trained fine-tuning BERT scores and the values of 130 linguistic features. BERT is configured on a tagged textbook corpus of about 10 million words. Such a complex hybrid model is exploited for estimation of the complexity of legal texts in Russian for the first time. It is shown that XGBoost classification model trained on linguistic features and language model predictions achieves the best results. The proposed model shows high results in terms of accuracy of estimation on test data and efficiency in identifying complex legal documents.</p>
<p>The paper <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frai.2022.1008411">(Ivanov)</ext-link> is devoted to the evaluation of the complexity of sentences in Russian. There are 2 datasets in the paper, which are used as the gold standard for the Russian language. One of them contains 75 thousand sentences, the others-&#x02212;6 million pairs of sentences. The author considers several neural network models, evaluates the complexity of sentences, and shows that fine-tuning pre-trained language models, namely RuBERT, performs slightly better than training a Graph Neural Network. It is fundamentally important that the sentences are aligned in terms of the number of words and the length of words, and it turned out that very high indicators for the accuracy of complexity estimates can be obtained without using these obvious features.</p>
<p><ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frai.2022.983008">Rosetti and Van Waes</ext-link> discuss stages of text preparation from the perspective of text simplification for second language writers. The authors consider various styles of text rewriting for simplification made by university students. The linear reviser, who prepares the whole text and then starts from the beginning to modify it. The intermittent reviser at each stage comes back to the previous text paragraphs and works with them. The recursive reviser performs revision at smaller portions of texts. The authors analyze dynamics of text production and take into account the expertise of text production in a foreign language. They experimentally study how text simplification training influences text readability in a second language and what is the relationship between the pausing dynamics of writing phases and text readability. Forty-seven Master students participated in their experiment, where they measured text complexity and text cohesion. They also used InputLog of text writing (number of characters types, pauses, cursor position, etc.).</p>
<p>The paper of <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frai.2022.984759">Dmitrieva et al.</ext-link> presents word alignment from parallel Russian simplification data. This word list is a valuable resource for several practical applications related to text simplification and foreign language teaching. The authors state that not all words are equally easy for learners. The task of monolingual word alignment consists in aligning words or phrases with similar meanings in different sentences in the same language, for example, &#x0201C;to doze off&#x0201D; is more complex than &#x0201C;to fall asleep,&#x0201D; while their meanings are similar. The resulting word list was first scored automatically and then manually evaluated. The size of the list is about 1,400 source lemmas and 800 target lemmas.</p>
<p><ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frai.2022.991242">&#x00160;tajner et al.</ext-link> introduced lexical simplification benchmarks for English, Portuguese and Spanish selected and annotated using comparable procedures allowing a fair comparison of lexical simplification systems across three languages. The authors propose simpler synonyms for complex words in 1,153 instances, split across three languages, annotated by 25 crowdsourced workers and validated by a linguist. The dataset is envisioned for evaluation of substitution generation and substitution selection methods. The dataset is limited to one-word terms. Extensive analysis of the dataset is given. The dataset was used for the shared tasks at Text Simplification, Accessibility, and Readability (TSAR).<xref ref-type="fn" rid="fn0001"><sup>1</sup></xref></p></sec>
<sec id="s3">
<title>Conclusions and perspectives</title>
<p>Despite the recent progress made by large-pretrained models, text simplification remains a challenging problem. Even the terminology related to text complexity and simplification is still not established <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/frai.2022.1042258">(Vecchiato)</ext-link>. Models for English are dominating in the field, but applications and resources for other languages are emerging. The selection of information and simplification may lead to the alteration of information as well as to potential biases (Ermakova et al., <xref ref-type="bibr" rid="B1">2023</xref>).</p>
<p>Although the goal of text simplification is make it understandable, the question has multiple aspects:</p>
<list list-type="bullet">
<list-item><p>How much knowledge is required to understand a given text? Is it possible to remove complex terminology? Or should it be explained? (O&#x00027;Reilly et al., <xref ref-type="bibr" rid="B2">2019</xref>; Ermakova et al., <xref ref-type="bibr" rid="B1">2023</xref>).</p></list-item>
<list-item><p>Can we avoid language complexity (too complex words or syntax) with the acceptable level of information distortion? (Ermakova et al., <xref ref-type="bibr" rid="B1">2023</xref>).</p></list-item>
<list-item><p>How to reduce second language complexity?</p></list-item>
</list></sec>
<sec sec-type="author-contributions" id="s4">
<title>Author contributions</title>
<p>All authors listed have made a substantial, direct, and intellectual contribution to the work and approved it for publication.</p></sec>
</body>
<back>
<sec sec-type="funding-information" id="s5">
<title>Funding</title>
<p>This work was supported by the MaDICS research group (<xref ref-type="fn" rid="fn0002"><sup>2</sup></xref>). This research was funded, in whole or in part, by the French National Research Agency (ANR) under the project ANR-22-CE23-0019-01.</p>
</sec>

<sec sec-type="COI-statement" id="conf1">
<title>Conflict of interest</title>
<p>The authors declare that the research was conducted in the absence of any commercial or financial relationships that could be construed as a potential conflict of interest.</p>
</sec>
<sec sec-type="disclaimer" id="s6">
<title>Publisher&#x00027;s note</title>
<p>All claims expressed in this article are solely those of the authors and do not necessarily represent those of their affiliated organizations, or those of the publisher, the editors and the reviewers. Any product that may be evaluated in this article, or claim that may be made by its manufacturer, is not guaranteed or endorsed by the publisher.</p>
</sec>



<fn-group>
<fn id="fn0001"><p><sup>1</sup><ext-link ext-link-type="uri" xlink:href="https://taln.upf.edu/pages/tsar2022-st/">https://taln.upf.edu/pages/tsar2022-st/</ext-link></p></fn>
<fn id="fn0002"><p><sup>2</sup><ext-link ext-link-type="uri" xlink:href="https://www.madics.fr/ateliers/simpletext/">https://www.madics.fr/ateliers/simpletext/</ext-link></p></fn>
</fn-group>

<ref-list>
<title>References</title>
<ref id="B1">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Ermakova</surname> <given-names>L.</given-names></name> <name><surname>Sanjuan</surname> <given-names>E.</given-names></name> <name><surname>Huet</surname> <given-names>S.</given-names></name> <name><surname>Augereau</surname> <given-names>O.</given-names></name> <name><surname>Azarbonyad</surname> <given-names>H.</given-names></name> <name><surname>Kamps</surname> <given-names>J.</given-names></name></person-group> (<year>2023</year>). <article-title>&#x0201C;CLEF 2023 Simple text track: What happens if general users search scientific texts?&#x0201D;</article-title> in <source>Advances in Information Retrieval: 45th European Conference on Information Retrieval</source> (<publisher-loc>Berlin; Heidelberg</publisher-loc>: <publisher-name>Springer</publisher-name>), <fpage>536</fpage>&#x02013;<lpage>545</lpage>. <pub-id pub-id-type="doi">10.1007/978-3-031-28241-6_62</pub-id></citation>
</ref>
<ref id="B2">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>O&#x00027;Reilly</surname> <given-names>T.</given-names></name> <name><surname>Wang</surname> <given-names>Z.</given-names></name> <name><surname>Sabatini</surname> <given-names>J.</given-names></name></person-group> (<year>2019</year>). <article-title>How much knowledge is too little? When a Lack of knowledge becomes a barrier to comprehension</article-title>. <source>Psychol. Sci.</source> <volume>30</volume>, <fpage>1344</fpage>&#x02013;<lpage>1351</lpage>. <pub-id pub-id-type="doi">10.1177/0956797619862276</pub-id><pub-id pub-id-type="pmid">31343951</pub-id></citation></ref>
<ref id="B3">
<citation citation-type="journal"><person-group person-group-type="author"><name><surname>Sharoff</surname> <given-names>S.</given-names></name></person-group> (<year>2022</year>). <article-title>What neural networks know about linguistic complexity?</article-title> <source>Russ. J. Linguist.</source> <volume>26</volume>, <fpage>371</fpage>&#x02013;<lpage>390</lpage>. <pub-id pub-id-type="doi">10.22363/2687-0088-30178</pub-id></citation>
</ref>
</ref-list>


</back>
</article> 