<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JSG</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Serious Games</journal-id>
      <journal-title>JMIR Serious Games</journal-title>
      <issn pub-type="epub">2291-9279</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i1e15349</article-id>
      <article-id pub-id-type="pmid">32130121</article-id>
      <article-id pub-id-type="doi">10.2196/15349</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using String Metrics to Improve the Design of Virtual Conversational Characters: Behavior Simulator Development Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Stourac</surname>
            <given-names>Petr</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>de Gloria</surname>
            <given-names>Alessandro</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sarbadhikari</surname>
            <given-names>Suptendra</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>García-Carbajal</surname>
            <given-names>Santiago</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Computer Science Department</institution>
            <institution>Universidad de Oviedo</institution>
            <addr-line>Campus de Viesques Office 1 b 15</addr-line>
            <addr-line>Gijón, 33203</addr-line>
            <country>Spain</country>
            <phone>34 985182487</phone>
            <email>sgarcia@uniovi.es</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1168-9865</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Pipa-Muniz</surname>
            <given-names>María</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2176-0423</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Múgica</surname>
            <given-names>Jose Luis</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4674-4198</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Computer Science Department</institution>
        <institution>Universidad de Oviedo</institution>
        <addr-line>Gijón</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Cabueñes Hospital</institution>
        <addr-line>Gijón</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Signal Software SL</institution>
        <institution>Parque Científico Tecnológico de Gijón</institution>
        <addr-line>Gijón, Asturias</addr-line>
        <country>Spain</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Santiago García García-Carbajal <email>sgarcia@uniovi.es</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <season>Jan-Mar</season>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>27</day>
        <month>2</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>1</issue>
      <elocation-id>e15349</elocation-id>
      <history>
        <date date-type="received">
          <day>3</day>
          <month>7</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>3</day>
          <month>10</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>13</day>
          <month>10</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>16</day>
          <month>12</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Santiago García García-Carbajal, María Pipa-Muniz, Jose Luis Múgica. Originally published in JMIR Serious Games (http://games.jmir.org), 27.02.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Serious Games, is properly cited. The complete bibliographic information, a link to the original publication on http://games.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://games.jmir.org/2020/1/e15349/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>An emergency waiting room is a place where conflicts often arise. Nervous relatives in a hostile, unknown environment force security and medical staff to be ready to deal with some awkward situations. Additionally, it has been said that the medical interview is the first diagnostic and therapeutic tool, involving both intellectual and emotional skills on the part of the doctor. At the same time, it seems that there is something mysterious about interviewing that cannot be formalized or taught.  In this context, virtual conversational characters (VCCs) are progressively present in most e-learning environments.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>In this study, we propose and develop a modular architecture for a VCC-based behavior simulator to be used as a tool for conflict avoidance training. Our behavior simulators are now being used in hospital environments, where training exercises must be easily designed and tested.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We define training exercises as labeled, directed graphs that help an instructor in the design of complex training situations. In order to increase the perception of talking to a real person, the simulator must deal with a huge number of sentences that a VCC must understand and react to. These sentences are grouped into sets identified with a common label. Labels are then used to trigger changes in the active node of the graph that encodes the current state of the training exercise. As a consequence, we need to be able to map every sentence said by the human user into the set it belongs to, in a fast and robust way. In this work, we discuss two different existing string metrics, and compare them to one that we use to assess a designed exercise.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Based on the similarities found between different sets, the proposed metric provided valuable information about ill-defined exercises. We also described the environment in which our programs are being used and illustrated it with an example.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Initially designed as a tool for training emergency room staff, our software could be of use in many other areas within the same environment. We are currently exploring the possibility of using it in speech therapy situations.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>spoken interaction</kwd>
        <kwd>string metrics</kwd>
        <kwd>virtual conversational characters</kwd>
        <kwd>serious games</kwd>
        <kwd>e-learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Virtual Conversational Characters</title>
        <p>The field of virtual conversational characters (VCCs) is an emerging research field that is growing in importance, both in industrial and academic applications. Our company started to include VCCs as a component of our simulators in 2014, mainly oriented to military and police environments, and recently it was proposed to migrate this type of simulator to hospital environments.</p>
        <p>VCCs, also known as embodied conversational agents by Poggi et al [<xref ref-type="bibr" rid="ref1">1</xref>], are 2D and 3D models of real persons that must be capable of human-like behavior. Apart from high-quality graphics, the most important characteristics that define VCCs are as follows:</p>
        <list list-type="order">
          <list-item>
            <p>Degree of embodiment: a full embodiment implies rendering a complete body. A talking head is an example of partial embodiment.</p>
          </list-item>
          <list-item>
            <p>Believable talking: the VCC must be able to maintain a conversation with the human user. The most difficult problem to solve is to manage communication in a way that the human user does not perceive his or her dialogue partner as an emotionally numb agent.</p>
          </list-item>
          <list-item>
            <p>Gesturing: nonverbal behavior is key when trying to solve the traditional lack of naturalness of VCCs. Nonverbal behavior can be introduced in one or both of the following ways:</p>
            <list list-type="alpha-lower">
              <list-item>
                <p>Facial gesture: different models and taxonomies for facial movement have been proposed by Ekman and Friesen [<xref ref-type="bibr" rid="ref2">2</xref>] and Martinez and Shichuan [<xref ref-type="bibr" rid="ref3">3</xref>]. An excellent state of the artwork on facial expressions for VCCs is that of Ochs et al [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
              </list-item>
              <list-item>
                <p>Body gesture: this usually involves hand and arm movements while talking, as included by Hartholt et al [<xref ref-type="bibr" rid="ref5">5</xref>] in the virtual human toolkit.</p>
              </list-item>
            </list>
          </list-item>
          <list-item>
            <p>Emotional behavior of the character: for a VCC, it is desirable not only to be able to maintain a conversation, but also to do so while showing some kind of personality, mood, or attitude.</p>
          </list-item>
        </list>
        <p>In this work, we focus on the dialogue management problem. Involving a VCC in a meaningful conversation often implies huge knowledge databases, syntactic and semantic analysis, and the use of artificial intelligence techniques to achieve a convincing result. Designing conversational situations as graphs in the way described in García et al [<xref ref-type="bibr" rid="ref6">6</xref>], we restrict the possible states of the dialogue, the sentences to be said by the VCC, and the sets of sentences that it will understand. This method does not decrease the applicability of our behavior simulator, as it is intended to be used in strictly constrained situations. Unfortunately, two problems arise when using such an approach: (1) the need for a huge number of similar but slightly different sentences to be said by the VCC if we want the agent not to appear too repetitive and (2) on the other hand, we want the VCC to be able to understand an order, question, or command expressed in as many ways as possible.</p>
        <p>The first problem can be solved merely by including a high number of different ways to express what the VCC is going to say and by randomly picking one of them at execution time. The second problem requires the mapping of the expressions said by the human user to any of those the VCC can accept, converting it into the associated label, and delivering it to the situation manager, all within the execution time. This is where string metrics come into play, as a way of measuring the similarities between sentences said by the human user and the sets of expressions the VCC is expecting.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>Related works include Rosmalen et al [<xref ref-type="bibr" rid="ref7">7</xref>], where an existing serious game is extended to include a chatbot, as well as those related to the formalization and use of behavior trees by Johansson and Dell’Acqua [<xref ref-type="bibr" rid="ref8">8</xref>], Isla [<xref ref-type="bibr" rid="ref9">9</xref>], or Imbert and de Antonio [<xref ref-type="bibr" rid="ref10">10</xref>], where COGNITIVA is proposed as an emotional architecture for VCCs. The most closely related works to that reported here are those of Hartholt et al [<xref ref-type="bibr" rid="ref5">5</xref>] and Morie et al [<xref ref-type="bibr" rid="ref11">11</xref>], where the virtual human toolkit is described. More recently, a framework for the rapid development of Spanish-speaking characters has been presented in Herrera et al [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
        <p>In this context, our system's characteristics are as follows:</p>
        <list list-type="order">
          <list-item>
            <p>Full embodiment: our VCCs are complete human models rendered inside a realistic 3D scene.</p>
          </list-item>
          <list-item>
            <p>We solve the dialogue management problem by defining our training situations as graphs and by introducing a statistical process of strings returned by the speech recognition library as a way of directing the evolution of the exercise.</p>
          </list-item>
          <list-item>
            <p>Inclusion of a facial action code system, as described by Ekman and Friesen [<xref ref-type="bibr" rid="ref2">2</xref>], as a way of manipulating the VCC's facial gesture.</p>
          </list-item>
          <list-item>
            <p>Emotional behavior is based on an emotional engine that permits the design and testing of the underlying personality of the VCC, described in García et al [<xref ref-type="bibr" rid="ref6">6</xref>], and is mainly oriented to the simulation of violent behaviors, as this has been the main application field of our software.</p>
          </list-item>
        </list>
        <p>The rest of the paper is structured as follows:</p>
        <list list-type="order">
          <list-item>
            <p>The Environment section describes the context where string metrics are being used.</p>
          </list-item>
          <list-item>
            <p>In the Situation Graphs section, we describe the component of the behavior simulator to be analyzed using string metrics.</p>
          </list-item>
          <list-item>
            <p>The String Metrics section is devoted to the explanation of some string metrics and their comparison to the one we are using.</p>
          </list-item>
          <list-item>
            <p>In the Graph Validation section, three different string metrics are applied to an example graph using our graph validation tool.</p>
          </list-item>
          <list-item>
            <p>Finally, in the Conclusions and Future Work sections, we present the main achievements of our work and some possible future lines of research.</p>
          </list-item>
        </list>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Environment</title>
        <p>In this section, we describe the context in which we are using string metrics. Our simulators are designed to be used in conflict avoidance training contexts, including situations where a member of the security staff must ask a suspect for his or her identity card, begin the initial evaluation process of a patient, or deal with an annoyed relative. Such situations are characterized by the fact that there is a clear policy the trainee must follow in order to fulfill the exercise. Conversely, the VCC will have a small set of expected behaviors. Therefore, we need to build a believable VCC that is able to communicate in a clearly constrained scene.</p>
        <p>Our tool lets the user give a formal description of the training exercise. The output of the tool is a directed graph in Graphviz format, following the description of Emden and North [<xref ref-type="bibr" rid="ref13">13</xref>], that represents the current and possible states of the exercise and defines transitions from one state to another, in terms of the labels associated with each arc. The main components of our behavior simulator are as follows:</p>
        <list list-type="order">
          <list-item>
            <p>A situation graph, defining the exercise.</p>
          </list-item>
          <list-item>
            <p>A set of sentences associated with each node of the graph. Whenever the situation enters a state, the system will randomly pick one of the sentences associated with that node. The higher the number of sentences, the lower the probability of repeating a sentence, while increasing the perception of talking to a real human.</p>
          </list-item>
          <list-item>
            <p>One or various sets of sentences that the VCC must recognize when the graph is in a valid state.</p>
          </list-item>
        </list>
        <p>Each node in the graph will be connected to one or more other nodes. The arcs representing these connections will be labeled with names like <italic>Ask_For_ID</italic>, which require an action from the human user. Each label will be associated with a set of sentences that the human user can say in order to trigger that transition. We keep this kind of information stored in files sharing the <italic>.lang</italic> extension. The other elements of the system are as follows:</p>
        <list list-type="order">
          <list-item>
            <p>An emotional engine that drives the emotional state and behavior of the VCC, as described in García et al [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
          </list-item>
          <list-item>
            <p>A body language interpreter that is developed using the Microsoft Kinect sensor, which analyzes the body gesture of the human interacting with the simulator, in order to give advice about good or bad practices while interacting with real humans.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Situation Graphs</title>
        <p>Any one of our situation graphs will contain, at least, the following states:</p>
        <list list-type="order">
          <list-item>
            <p>Init: in this state, the system performs some basic tasks, such as graph file parsing, audio and graphical setup, and some initial calculations that increase performance, which will be explained in the Histogram Matching section.</p>
          </list-item>
          <list-item>
            <p>Success: this state will be reached when the human performing the training exercise completes it in a satisfactory manner.</p>
          </list-item>
          <list-item>
            <p>Failure: the opposite of the Success state.</p>
          </list-item>
        </list>
        <p>In order to clearly state the role of the situation graph, we will describe an unreal, simple training situation with its associated states and sets of sentences. Describing a medical interview in terms of one of our situation graphs generates a huge image, too big for the illustrative purposes of this section.</p>
        <p>We have a situation where the behavior simulator, once initialized, will present the user with a VCC. The goal is to obtain their identification card, to avoid the start of a fight, or to prevent the individual from running away from the scene. The situation graph is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <p>Associated with each state in the graph are a stored set of sentences that the VCC will keep saying until the answer received from the user triggers a transition to another state. The system stores these sets in files named after the state they are associated with; all of them share the <italic>.talk</italic> extension in their file names. Based on the contents of the <italic>.talk</italic> files, the behavior simulator will keep the VCC saying a sentence picked at random from those associated with the current active node of the situation graph. The behavior simulator will also try to map what the human user says into the labels that can trigger a transition from the current state to any other.</p>
        <p>At the moment of writing this paper, we have designed four different training situations, or exercises, each with their own learning goals:</p>
        <list list-type="order">
          <list-item>
            <p>A lost child in the waiting room: the goals are to discover where the child came from and what she is doing at the hospital.</p>
          </list-item>
          <list-item>
            <p>An aggressive young man under the influence of drugs: the goal is to gain time while security personnel arrive.</p>
          </list-item>
          <list-item>
            <p>An elderly woman with cognitive impairment: the goals are to make an initial assessment of the woman's condition and to reassure her.</p>
          </list-item>
          <list-item>
            <p>A nervous young lady asking for information about one of her relatives: the goal is to convince her to leave the area and retire to the waiting room.</p>
          </list-item>
        </list>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> shows the system a moment after starting one of these training exercises. The system can be run in silent mode, showing sentences said by the VCC only on the screen in text, and the microphone can be disconnected to allow input of sentences using the keyboard.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Simple situation graph. Init: the state in which the system performs some basic tasks. Success: this state will be reached when the human performing the training exercise completes it in a satisfactory manner. Failure: the opposite of the Success state. <italic>Regular_Chat</italic>: as soon as the exercise starts, the scene enters this state, with the virtual conversational character (VCC) engaging in small talk. <italic>Asked_ID</italic>: the situation enters this state if the user says one of the sentences associated with the <italic>Ask_For_Identification</italic> label; when in this state, the VCC will probabilistically decide to collaborate or not, showing ID, or returning to <italic>Regular_Chat</italic>. The former means reaching the Success state. The latter means that the VCC refused to obey and show their ID card. In practice, this implies remaining in the same state, <italic>Regular_Chat</italic>. <italic>Buying_Time</italic>: if the user does not ask for identification, the scene enters a dumb state, with the VCC trying to escape. If the user continues asking for ID, the situation reaches an impasse. To return to <italic>Regular_Chat</italic>, the security guard must warn the VCC about trying to escape. Any other kind of conversation triggers a Failure.</p>
          </caption>
          <graphic xlink:href="games_v8i1e15349_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>A user facing the system a moment after starting a training exercise.</p>
          </caption>
          <graphic xlink:href="games_v8i1e15349_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>String Metrics</title>
        <sec>
          <title>Overview</title>
          <p>In order to trigger transitions from one state of the graph to another, we need some kind of metric to evaluate the distance between the string returned by the speech recognition library and all the strings that are acceptable for the current state. There is a large number of metrics that can be used to measure the difference between pairs of strings. In this section, we compare two different well-known metrics—that proposed by Levenshtein [<xref ref-type="bibr" rid="ref14">14</xref>] and the Gestalt pattern-matching algorithm proposed by Ratcliff and Metzener [<xref ref-type="bibr" rid="ref15">15</xref>]—when used for our purposes and justify the use of our own metric, which we will call histogram matching.</p>
          <p>String distance functions or string similarity metrics are defined between two strings, for example, s and t. Distance functions map a pair of strings s and t to a real number r, where a smaller value of r indicates greater similarity between s and t. Similarity functions are analogous to distance functions, except that larger values indicate greater similarity.</p>
        </sec>
        <sec>
          <title>Levenshtein Distance</title>
          <p>One important class of distance functions are edit distances, in which distance is the cost of the best sequence of edit operations that converts s to t. Typical edit operations are character insertion, deletion, and substitution, and each operation much be assigned a cost. Levenshtein distance is defined in Levenshtein [<xref ref-type="bibr" rid="ref14">14</xref>]. However, even in its normalized version proposed by Yujian and Bo [<xref ref-type="bibr" rid="ref16">16</xref>], it is not useful for us, as it gives high values to pairs of strings that are a word-by-word permutation of the original, for example, “Don't resist, please” and “Please, don't resist.” See <xref ref-type="table" rid="table1">Table 1</xref> for results.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Levenshtein distances for the strings s1, s2, and s3.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="250"/>
              <col width="250"/>
              <col width="250"/>
              <col width="250"/>
              <thead>
                <tr valign="top">
                  <td>String</td>
                  <td colspan="3">String, Levenshtein distance</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>s1<sup>a</sup></td>
                  <td>s2<sup>b</sup></td>
                  <td>s3<sup>c</sup></td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>s1</td>
                  <td>0</td>
                  <td>14</td>
                  <td>14</td>
                </tr>
                <tr valign="top">
                  <td>s2</td>
                  <td>14</td>
                  <td>0</td>
                  <td>16</td>
                </tr>
                <tr valign="top">
                  <td>s3</td>
                  <td>14</td>
                  <td>16</td>
                  <td>0</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>s1: “Please show me your ID.”</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>s2: “Show me your ID please.”</p>
              </fn>
              <fn id="table1fn3">
                <p><sup>c</sup>s3: “Your ID. Show it to me.”</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Gestalt Pattern Matching</title>
          <p>Ratcliff and Metzener’s pattern-matching algorithm [<xref ref-type="bibr" rid="ref15">15</xref>] has been described as a wild-card search process without wild cards. The algorithm builds its own wild cards, based on the matches found between two strings, s and t. First, the algorithm examines s and t and locates the largest common subsequence between them. It then uses this group of characters as an anchor between s and t. Any group of characters found to the left or the right of this anchor is placed on a stack for further examination. The procedure is repeated for all substrings on the stack until it is empty.</p>
          <p>The returned value is twice the number of characters found in common, divided by the total number of characters in the two strings; the score is returned as an integer, reflecting a percentage match. We are currently using the SequenceMatcher version of Ratcliff's algorithm, included in the difflib package from Python, version 3.7 (Python Software Foundation), that returns a real number instead.</p>
        </sec>
        <sec>
          <title>Histogram Matching</title>
          <p>We will now describe the numerical procedure that lets us assign a label to any string returned from speech recognition libraries, such as Microsoft Speech Application Programming Interface (API). When the exercise starts, we take each <italic>.lang</italic> file, and for each sentence we perform the following procedure (see <xref rid="figure3" ref-type="fig">Figure 3</xref>, Equation 1):</p>
          <list list-type="order">
            <list-item>
              <p>Convert the sentence to lowercase letters, discarding any punctuation marks.</p>
            </list-item>
            <list-item>
              <p>Calculate the number of letter “a”s, “b”s, etc, that the sentence contains. This array is what we call a letter histogram. Letter histograms for every single possible sentence that the human user can potentially say are calculated and stored before the exercise starts.</p>
            </list-item>
            <list-item>
              <p>Let h(c)s be the number of occurrences of character c inside string s.</p>
            </list-item>
            <list-item>
              <p>Let T(s) be the sum of h(c)<sub>s</sub> for each possible value of c inside string s.</p>
            </list-item>
          </list>
          <p>When the exercise starts, we need to know the distance between the words said by the human user as well as all the sentences stored inside the <italic>.lang</italic> files. We define the histogram-matching function between strings s and t as expressed in Equation 2 (see <xref rid="figure3" ref-type="fig">Figure 3</xref>). In the histogram-matching formula (see <xref rid="figure3" ref-type="fig">Figure 3</xref>, Equation 2), s represents the sentence said by the human user, and t is each one of the sentences included in the <italic>.lang</italic> files that is associated with outgoing arcs from the current active node in the situation graph. The maximum of these values determines the label we assign to the sentence that was said and, eventually, a transition to another node inside the graph.</p>
          <p>In Equation 3 (see <xref rid="figure3" ref-type="fig">Figure 3</xref>), set (t) is a function returning the set that t belongs to, and $set(t)$ is an outgoing arc.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Equations for our histogram-matching metric.</p>
            </caption>
            <graphic xlink:href="games_v8i1e15349_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Evaluation</title>
        <p>In this section we describe the results obtained when applying the three string metrics described in the Methods section to a set of sentences, and the process that we follow to validate a graph.</p>
      </sec>
      <sec>
        <title>Levenshtein Distance</title>
        <p><xref ref-type="table" rid="table1">Table 1</xref> shows the value of Levenshtein distance for three different strings—s1, s2, and s3—with s2 being a word-by-word permutation of s1, and being very similar to s3, at least semantically. The strings s1, s2, and s3 stand for “Please show me your ID,” “Show me your ID please,” and “Your ID. Show it to me,” respectively.</p>
        <p>As we do not process commas, nor any other punctuation marks, s1 and s2 should be equivalent sentences for our system, and the distance between s3 and the others should be minimal. We are showing not-normalized values here, but it can be seen that the distance between s1 and s2 is not equal to zero, forcing us to include all the valid permutations of a sentence in the respective <italic>.lang</italic> file if we want the VCC to understand all of them. This renders the Levenshtein distance metric inappropriate for our labeling needs.</p>
      </sec>
      <sec>
        <title>Gestalt Pattern Matching</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the values of the Gestalt pattern-matching algorithm by Ratcliff and Metzener [<xref ref-type="bibr" rid="ref15">15</xref>] when applied to s1, s2, and s3. It returns a 100% similarity value over the main diagonal, as expected, but the reported value is not symmetric for s2 and s3. Additionally, it gives a similarity value of 68% between s1 and s2, too low for a pair of sentences that must be considered equivalent for our system.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Python SequenceMatcher similarities for the strings s1, s2, and s3, based on the Gestalt pattern-matching algorithm.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>String</td>
                <td colspan="3">String, SequenceMatcher similarity</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>s1<sup>a</sup></td>
                <td>s2<sup>b</sup></td>
                <td>s3<sup>c</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>s1</td>
                <td>100.0</td>
                <td>68.18</td>
                <td>32.55</td>
              </tr>
              <tr valign="top">
                <td>s2</td>
                <td>68.18</td>
                <td>100.0</td>
                <td>41.86</td>
              </tr>
              <tr valign="top">
                <td>s3</td>
                <td>32.55</td>
                <td>46.51</td>
                <td>100.0</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>s1: “Please show me your ID.”</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>s2: “Show me your ID please.”</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>s3: “Your ID. Show it to me.”</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Histogram Matching</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows histogram-matching values between the strings s1, s2, and s3. The main diagonal values are 100%, as expected, but we also see total similarity between s1 and s2. Reported similarity between s1 and s3, and between s2 and s3, is higher than 70%, which is far from being an almost complete match, but significantly higher than the value reported by the Gestalt pattern algorithm (32%).</p>
        <p><xref rid="figure4" ref-type="fig">Figures 4</xref> and <xref rid="figure5" ref-type="fig">5</xref> show the letter histograms associated with every sentence included inside the <italic>Stop_Playing.lang</italic> and <italic>Ask_For_Identification.lang</italic> files.</p>
        <p>The example has been intentionally kept simple but practical, in order to explain how histogram matching works. Similarities between the different sentences associated with the same label (ie, inside the same set) present no practical problem. Problems arise when, for two different labels associated with arcs that come out of the same graph node, any of the sentences included in the corresponding files can be misunderstood as any of those defined for a different transition. In other words, the distance between each of the sentences included in any <italic>.lang</italic> file and those included in a <italic>.lang</italic> file whose initial graph node is the same, should be as large as possible, in order to have the least confusing exercise definition. Additionally, as a reinforcement factor, we also count the number of blank spaces included in the sentence, to avoid strange coincidences that would confuse the system.</p>
        <p>In the exercise we are using as an example, there is only one situation to analyze—there are three different arcs coming out from the <italic>Buying_Time</italic> state, and we need the triggering sentences for these arcs to be as different as possible: (1) <italic>Stop_Playing</italic>, (2) <italic>Ask_For_Identification</italic>, and (2) <italic>Anything_Else</italic>.</p>
        <p>Of these three transitions (ie, labels), only the first two are interesting, as <italic>Anything_Else</italic> is a special case that we will assign if it is not possible to assign any of the others, up to a defined tolerance. We mark this kind of label, leaving the corresponding <italic>.lang</italic> file almost empty, containing only a # symbol. Therefore, it is clear that the exercise would be ill defined if any sentence inside <italic>Stop_Playing.lang</italic> is too similar to any of the sentences included in <italic>Ask_For_Identification.lang</italic>. Our exercise validation tool analyzes this kind of situation and highlights potentially conflicting labels, sentences, and states. The example is analyzed in the Graph Validation section</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Histogram-matching similarities for the strings s1, s2, and s3.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>String</td>
                <td colspan="3">String, histogram-matching similarity</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>s1<sup>a</sup></td>
                <td>s2<sup>b</sup></td>
                <td>s3<sup>c</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>s1</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>70.5</td>
              </tr>
              <tr valign="top">
                <td>s2</td>
                <td>100.0</td>
                <td>100.0</td>
                <td>70.5</td>
              </tr>
              <tr valign="top">
                <td>s3</td>
                <td>70.5</td>
                <td>70.5</td>
                <td>100.0</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>s1: “Please show me your ID.”</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>s2: “Show me your ID please.”</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>s3: “Your ID. Show it to me.”</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Letter histogram for the <italic>Stop_Playing.lang</italic> file.</p>
          </caption>
          <graphic xlink:href="games_v8i1e15349_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Letter histogram for the <italic>Ask_For_Identification.lang</italic> file.</p>
          </caption>
          <graphic xlink:href="games_v8i1e15349_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Graph Validation</title>
        <p>For the purpose of explaining how graph validation works, we intentionally added the following sentence to the <italic>Ask_For_Identification</italic> set file: “Don't resist, please.”</p>
        <p>Besides the lack of utility of such a sentence inside the file that stores different ways of asking a person for his or her hospital identity card, this sentence causes problems: when it is said by the human user, it would lead to ambiguity. In <xref rid="figure6" ref-type="fig">Figures 6</xref> and <xref rid="figure7" ref-type="fig">7</xref>, letter histograms for “Don't resist, please” and “Please, don't resist” are highlighted in red; this is not because of the absolute similarity between them, but because of their similarity to this extra sentence included in another <italic>.lang</italic> file, which would render the system unable to decide which transition is the correct one to be triggered.</p>
        <p>The system informs us that sentences highlighted in red can confuse the situation manager when pronounced by the trainee. In this case, the solution is straightforward, as we have artificially generated the problem. The problem is solved simply by removing the extra sentence from the <italic>Ask_For_Identification.lang</italic> file. However, in more complex situations, the person in charge of the exercise design should look for alternatives.</p>
        <p>After determining all the conflicting labels, our graph validation tool also marks in red each graph node with ill-defined outgoing arcs, helping in the identification and fixing of such problems. The output is a file in the Graphviz format: see <xref rid="figure8" ref-type="fig">Figure 8</xref>, where the graph for the working exercise is colored to highlight problematic nodes. An arc whose arrow is highlighted in red means that triggering transitions from the source state is not possible. Nonproblematic arcs are highlighted in green. The goal is to rewrite the sentences associated with each arc or to modify the graph definition of the exercise until no ambiguity is detected by the tool.</p>
        <p>The use of the string metric defined in this paper is not mandatory. In fact, the user can choose one of the following string metrics and select the one that guarantees a better exercise definition to be used by the behavior simulator: (1) Levenshtein distance, as defined by Levenshtein [<xref ref-type="bibr" rid="ref14">14</xref>], (2) Gestalt pattern matching [<xref ref-type="bibr" rid="ref15">15</xref>], (3) histogram matching, as proposed in this paper, (4) Damerau-Levenshtein distance [<xref ref-type="bibr" rid="ref17">17</xref>], or (5) Jaro-Winkler distance [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
        <p>The explanation of each of these string metrics is outside the scope of this paper. In practice, we use the graph validation tool to choose a string metric that guarantees the absence of ambiguities when the simulator is running. If none of them can guarantee such a condition, the <italic>.lang</italic> files must be modified. The main window of the graph validation tool is pictured in <xref rid="figure9" ref-type="fig">Figure 9</xref>.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Processed histogram for the <italic>Stop_Playing.lang</italic> file.</p>
          </caption>
          <graphic xlink:href="games_v8i1e15349_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Processed histogram for the <italic>Ask_For_Identification.lang</italic> file.</p>
          </caption>
          <graphic xlink:href="games_v8i1e15349_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Validated graph. Init: the state in which the system performs some basic tasks. Success: this state will be reached when the human performing the training exercise completes it in a satisfactory manner. Failure: the opposite of the Success state. <italic>Regular_Chat</italic>: as soon as the exercise starts, the scene enters this state, with the virtual conversational character (VCC) engaging in small talk. Asked_ID: the situation enters this state if the user says one of the sentences associated with the <italic>Ask_For_Identification</italic> label; when in this state, the VCC will probabilistically decide to collaborate or not, showing ID, or returning to <italic>Regular_Chat</italic>. The former means reaching the Success state. The latter means that the VCC refused to obey and show their ID card. In practice, this implies remaining in the same state, <italic>Regular_Chat</italic>. <italic>Buying_Time</italic>: if the user does not ask for identification, the scene enters a dumb state, with the VCC trying to escape. If the user continues asking for ID, the situation reaches an impasse. To return to <italic>Regular_Chat</italic>, the security guard must warn the VCC about trying to escape. Any other kind of conversation triggers a Failure.</p>
          </caption>
          <graphic xlink:href="games_v8i1e15349_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure9" position="float">
          <label>Figure 9</label>
          <caption>
            <p>The main window of the graph validation tool. From this window, the user can (1) choose a Graphviz (.dot file) example to be analyzed, (2) obtain a graphic representation of the training exercise that it encodes, (3) select one of the available string metrics, (4) view letter histograms for each label, and (5) generate a .pdf file summarizing all the problems encountered during the analysis of the graph.</p>
          </caption>
          <graphic xlink:href="games_v8i1e15349_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>We have developed a system for the fast design and testing of conflict avoidance situations, involving interactions between humans and VCCs. VCC-enhanced simulators present many advantages for multimodal communication, but also have the disadvantage of dealing with complex processes in order to provide effective verbal communication between VCCs and the human user. Speech recognition software is available and working, but we needed a way to assign labels to the outputs produced by these APIs. As the number of possible sentences to be recognized is potentially huge, even for a simple training exercise, we decided to use string metrics as a way of labeling. We have developed a tool that, after designing a training exercise, analyzes the sets of sentences associated with each transition inside the situation graph, highlighting potential signs of ill-defined exercises. The tool is also used to check the existence of all the files needed for the system to work properly before the exercise starts and to dynamically change some settings, such as the minimal matching confidence level required for a positive match, once the simulation has begun. This is useful for cases when the speech recognition library is not working properly, due to suboptimal acoustic conditions of the environment or incorrect vocalization by the human user.</p>
      <p>After trying several existing string metrics, we decided to design one of our own: histogram matching. Histogram matching does this work for us at a reasonable speed, as half of the needed calculations are performed as soon as the training exercise is defined and before the whole system is running. The method is working correctly for the exercises we have defined to date. As a result, we can anticipate and solve design problems in the training exercise definition process and improve collaborative work between instructors and our development team.</p>
      <p>For the future, we are planning the development of a module that automatically assigns violence levels to the sentences included inside <italic>.lang</italic> files, as a function of the kind of vocabulary employed. There is another feature that has not yet been implemented, which would be very useful in the exercise definition process. That is, being able to check the exercise without the whole graphics system working, running only the speech recognition and language synthesis modules, and allowing the interactive visualization of the active node of the situation graph, histogram-matching level, emotional state of the VCC, etc.</p>
      <p>Our software was initially designed to help in the training of staff working for emergency services within a hospital. We think that it could also prove useful in speech therapy as a way of visually representing the differences between any goal sentence and what a human user actually says. We have identified some works using serious games in this field, such as Grossinho et al [<xref ref-type="bibr" rid="ref19">19</xref>] and Cagatay et al [<xref ref-type="bibr" rid="ref20">20</xref>]. In this sense, no structural modifications should be needed on our software, just a different philosophy in the design of the training exercises. That is to say, we have a VCC that the human must interact with. The goal would be to speak as correctly as possible in order to, for example, make the VCC do some work for us.</p>
      <p>We also think that VCCs, in general, and our system, in particular, can be useful in helping patients make informed decisions when asked about the treatment plan they prefer, as discussed in Sherwin et al [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">VCC</term>
          <def>
            <p>virtual conversational character</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Poggi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Pelachaud</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>de Rosis</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Carofiglio</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>De Carolis</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Stock</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Zancanaro</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Greta. A believable embodied conversational agent</article-title>
          <source>Multimodal Intelligent Information Presentation. Text, Speech and Language Technology. Volume 27</source>
          <year>2005</year>
          <publisher-loc>Dordrecht, the Netherlands</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>3</fpage>
          <lpage>25</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ekman</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Friesen</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <source>Facial Action Coding System</source>
          <year>1978</year>
          <publisher-loc>Palo Alto, CA</publisher-loc>
          <publisher-name>Consulting Psychologists Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martínez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shichuan</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A model of the perception of facial expressions of emotion by humans: Research overview and perspectives</article-title>
          <source>J Mach Learn Res</source>
          <year>2012</year>
          <volume>1</volume>
          <fpage>1589</fpage>
          <lpage>1608</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-57021-1_6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ochs</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Niewiadomski</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pelachaud</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Calvo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>D'Mello</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gratch</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kappas</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Facial expressions of emotions for virtual characters</article-title>
          <source>The Oxford Handbook of Affective Computing</source>
          <year>2015</year>
          <publisher-loc>Oxford, UK</publisher-loc>
          <publisher-name>Oxford University Press</publisher-name>
          <fpage>261</fpage>
          <lpage>272</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hartholt</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Traum</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Marsella</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shapiro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stratou</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Leuski</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Morency</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gratch</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>All together now: Introducing the virtual human toolkit</article-title>
          <source>Proceedings of the 13th International Conference on Intelligent Virtual Agents, IVA 2013</source>
          <year>2013</year>
          <conf-name>13th International Conference on Intelligent Virtual Agents, IVA 2013</conf-name>
          <conf-date>August 29-31, 2013</conf-date>
          <conf-loc>Edinburgh, UK</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/profile/Jonathan_Gratch/publication/285590059_All_Together_Now_Introducing_the_Virtual_Human_Toolkit/links/58191c8408ae1f34d24aba23/All-Together-Now-Introducing-the-Virtual-Human-Toolkit.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/978-3-642-40415-3_33</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carbajal</surname>
              <given-names>SG</given-names>
            </name>
            <name name-style="western">
              <surname>Polimeni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Múgica</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>An emotional engine for behavior simulators</article-title>
          <source>Int J Serious Games</source>
          <year>2015</year>
          <month>06</month>
          <day>17</day>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>57</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/profile/Santiago_Carbajal/publication/278667126_An_Emotional_Engine_for_Behavior_Simulators/links/5581ab1508ae12bde6e4aa0b/An-Emotional-Engine-for-Behavior-Simulators.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.17083/ijsg.v2i2.76</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eikelboom</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bloemers</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Winzum</surname>
              <given-names>KV</given-names>
            </name>
            <name name-style="western">
              <surname>Spronk</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Towards a game-chatbot: Extending the interaction in serious games</article-title>
          <source>Proceedings of the 6th European Conference on Games-Based Learning</source>
          <year>2012</year>
          <conf-name>6th European Conference on Games-Based Learning</conf-name>
          <conf-date>October 4-5, 2012</conf-date>
          <conf-loc>Cork, Ireland</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://eprints.teachingandlearning.ie/2469/1/Proceedings%20of%206th%20European%20Conference%20on%20Games%20Based%20Learning%202012.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johansson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dell Acqua</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Emotional behavior trees</article-title>
          <source>Proceedings of the IEEE Conference on Computational Intelligence and Games</source>
          <year>2012</year>
          <conf-name>IEEE Conference on Computational Intelligence and Games</conf-name>
          <conf-date>September 11-14, 2012</conf-date>
          <conf-loc>Granada, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cig.2012.6374177</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Isla</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Handling complexity in the Halo 2 AI</article-title>
          <source>Proceedings of the Game Developers Conference</source>
          <year>2005</year>
          <month>03</month>
          <day>07</day>
          <conf-name>Game Developers Conference</conf-name>
          <conf-date>March 7-11, 2005</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.gamasutra.com/view/feature/130663/gdc_2005_proceeding_handling_.php"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imbert</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>de Antonio</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>An emotional architecture for virtual characters</article-title>
          <source>Proceedings of the 3rd International Conference on Virtual Storytelling</source>
          <year>2005</year>
          <conf-name>3rd International Conference on Virtual Storytelling</conf-name>
          <conf-date>November 30-December 2, 2005</conf-date>
          <conf-loc>Strasbourg, France</conf-loc>
          <publisher-loc>Berlin, Germany</publisher-loc>
          <publisher-name>Springer Verlag</publisher-name>
          <fpage>63</fpage>
          <lpage>72</lpage>
          <pub-id pub-id-type="doi">10.1007/11590361_7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morie</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chance</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Haynes</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rajpurohit</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Hingston</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Embodied conversational agent avatars in virtual worlds: Making today's immersive environments more responsive to participants</article-title>
          <source>Believable Bots: Can Computers Play Like People?</source>
          <year>2012</year>
          <publisher-loc>Berlin, Germany</publisher-loc>
          <publisher-name>Springer-Verlag</publisher-name>
          <fpage>99</fpage>
          <lpage>118</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Herrera</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Herrera</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Velandia</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>HVUAN: A rapid-development framework for Spanish-speaking virtual humans</article-title>
          <source>Proceedings of the International Conference on Practical Applications of Agents and Multi-Agent Systems</source>
          <year>2018</year>
          <conf-name>International Conference on Practical Applications of Agents and Multi-Agent Systems</conf-name>
          <conf-date>June 20–22, 2018</conf-date>
          <conf-loc>Toledo, Spain</conf-loc>
          <fpage>318</fpage>
          <lpage>321</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-94580-4_29</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gansner</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>North</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>An open graph visualization system and its applications to software engineering</article-title>
          <source>Softw Pract Exp</source>
          <year>2000</year>
          <month>09</month>
          <volume>30</volume>
          <issue>11</issue>
          <fpage>1203</fpage>
          <lpage>1233</lpage>
          <pub-id pub-id-type="doi">10.1002/1097-024x(200009)30:11&#60;1203::aid-spe338&#62;3.0.co;2-n</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levenshtein</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Binary codes capable of correcting deletions, insertions and reversals</article-title>
          <source>Soviet Physics Doklady</source>
          <year>1966</year>
          <volume>10</volume>
          <issue>8</issue>
          <fpage>707</fpage>
          <lpage>710</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Metzener</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ratcliff</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Pattern matching: The gestalt approach</article-title>
          <source>Dr. Dobb's Journal of Software Tools</source>
          <year>1988</year>
          <month>01</month>
          <volume>13</volume>
          <issue>7</issue>
          <fpage>46</fpage>
          <lpage>47, 59-51, 68-72</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yujian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bo</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A normalized Levenshtein distance metric</article-title>
          <source>IEEE Trans Pattern Anal Mach Intell</source>
          <year>2007</year>
          <month>06</month>
          <volume>29</volume>
          <issue>6</issue>
          <fpage>1091</fpage>
          <lpage>1095</lpage>
          <pub-id pub-id-type="doi">10.1109/tpami.2007.1078</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Damerau</surname>
              <given-names>FJ</given-names>
            </name>
          </person-group>
          <article-title>A technique for computer detection and correction of spelling errors</article-title>
          <source>Commun ACM</source>
          <year>1964</year>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>171</fpage>
          <lpage>176</lpage>
          <pub-id pub-id-type="doi">10.1145/363958.363994</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Winkler</surname>
              <given-names>WE</given-names>
            </name>
          </person-group>
          <source>Proceedings of the Survey Research Methods Section</source>
          <year>1990</year>
          <access-date>2020-01-17</access-date>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>American Statistical Association</publisher-name>
          <comment>String comparator metrics and enhanced decision rules in the Fellegi-Sunter model of record linkage<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/profile/William_Winkler/publication/243772975_String_Comparator_Metrics_and_Enhanced_Decision_Rules_in_the_Fellegi-Sunter_Model_of_Record_Linkage/links/5522cd090cf2f9c1305447d9/String-Comparator-Metrics-and-Enhanced-Decision-Rules-in-the-Fellegi-Sunter-Model-of-Record-Linkage.pdf">https://www.researchgate.net/profile/William_Winkler/publication/243772975_String_Comparator_Metrics_and_Enhanced_Decision_Rules_in_the_Fellegi-Sunter_Model_of_Record_Linkage/links/5522cd090cf2f9c1305447d9/String-Comparator-Metrics-and-Enhanced-Decision-Rules-in-the-Fellegi-Sunter-Model-of-Record-Linkage.pdf</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guimares</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Grossinho</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Magalhes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cavaco</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Robust phoneme recognition for a speech therapy environment</article-title>
          <source>Proceedings of the IEEE International Conference on Serious Games and Applications for Health</source>
          <year>2016</year>
          <conf-name>IEEE International Conference on Serious Games and Applications for Health</conf-name>
          <conf-date>May 11-13, 2016</conf-date>
          <conf-loc>Orlando, FL</conf-loc>
          <pub-id pub-id-type="doi">10.1109/segah.2016.7586268</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ege</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cagatay</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tokdemir</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cagiltay</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>A serious game for speech disorder children therapy</article-title>
          <source>Proceedings of the 7th International Symposium on Health Informatics and Bioinformatics, HIBIT</source>
          <year>2012</year>
          <conf-name>7th International Symposium on Health Informatics and Bioinformatics, HIBIT</conf-name>
          <conf-date>April 19-22, 2012</conf-date>
          <conf-loc>Kapadokya, Nevşehir, Turkey</conf-loc>
          <fpage>18</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1109/hibit.2012.6209036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sherwin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>McKeown</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bhattacharyya</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>The waiting room "wait": From annoyance to opportunity</article-title>
          <source>Can Fam Physician</source>
          <year>2013</year>
          <month>05</month>
          <volume>59</volume>
          <issue>5</issue>
          <fpage>479</fpage>
          <lpage>481</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cfp.ca/cgi/pmidlookup?view=long&#38;pmid=23673581"/>
          </comment>
          <pub-id pub-id-type="medline">23673581</pub-id>
          <pub-id pub-id-type="pii">59/5/479</pub-id>
          <pub-id pub-id-type="pmcid">PMC3653648</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
