<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JSG</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Serious Games</journal-id>
      <journal-title>JMIR Serious Games</journal-title>
      <issn pub-type="epub">2291-9279</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v12i1e50315</article-id>
      <article-id pub-id-type="pmid">38598265</article-id>
      <article-id pub-id-type="doi">10.2196/50315</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Measuring the Reliability of a Gamified Stroop Task: Quantitative Experiment</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Leung</surname>
            <given-names>Tiffany</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Cheng</surname>
            <given-names>Xiangyi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Aghaei</surname>
            <given-names>Zahra</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Wiley</surname>
            <given-names>Katelyn</given-names>
          </name>
          <degrees>MEDes</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>University of Saskatchewan</institution>
            <addr-line>110 Science Place</addr-line>
            <addr-line>University of Saskatchewan</addr-line>
            <addr-line>Saskatoon, SK, S7N 5C9</addr-line>
            <country>Canada</country>
            <phone>1 5878884567</phone>
            <email>katelyn.wiley@usask.ca</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0854-346X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Berger</surname>
            <given-names>Phaedra</given-names>
          </name>
          <degrees>BSc, BA&amp;Sc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-4379-2830</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Friehs</surname>
            <given-names>Maximilian Achim</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9362-4140</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Mandryk</surname>
            <given-names>Regan Lee</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0772-6616</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>University of Saskatchewan</institution>
        <addr-line>Saskatoon, SK</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Faculty of Behavioural, Management and Social Sciences</institution>
        <institution>University of Twente</institution>
        <addr-line>Enschede</addr-line>
        <country>Netherlands</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Psychology</institution>
        <institution>University College Dublin</institution>
        <addr-line>Dublin</addr-line>
        <country>Ireland</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Max-Planck-Institute for Human Cognitive and Brain Sciences</institution>
        <addr-line>Leipzig</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Computer Science</institution>
        <institution>University of Victoria</institution>
        <addr-line>Victoria, BC</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Katelyn Wiley <email>katelyn.wiley@usask.ca</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>10</day>
        <month>4</month>
        <year>2024</year>
      </pub-date>
      <volume>12</volume>
      <elocation-id>e50315</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>6</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>4</day>
          <month>9</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>11</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>31</day>
          <month>1</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Katelyn Wiley, Phaedra Berger, Maximilian Achim Friehs, Regan Lee Mandryk. Originally published in JMIR Serious Games (https://games.jmir.org), 10.04.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Serious Games, is properly cited. The complete bibliographic information, a link to the original publication on https://games.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://games.jmir.org/2024/1/e50315" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Few gamified cognitive tasks are subjected to rigorous examination of psychometric properties, despite their use in experimental and clinical settings. Even small manipulations to cognitive tasks require extensive research to understand their effects.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to investigate how game elements can affect the reliability of scores on a Stroop task. We specifically investigated performance consistency within and across sessions.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We created 2 versions of the Stroop task, with and without game elements, and then tested each task with participants at 2 time points. The gamified task used points and feedback as game elements. In this paper, we report on the reliability of the gamified Stroop task in terms of internal consistency and test-retest reliability, compared with the control task. We used a permutation approach to evaluate internal consistency. For test-retest reliability, we calculated the Pearson correlation and intraclass correlation coefficients between each time point. We also descriptively compared the reliability of scores on a trial-by-trial basis, considering the different trial types.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>At the first time point, the Stroop effect was reduced in the game condition, indicating an increase in performance. Participants in the game condition had faster reaction times (<italic>P</italic>=.005) and lower error rates (<italic>P</italic>=.04) than those in the basic task condition. Furthermore, the game condition led to higher measures of internal consistency at both time points for reaction times and error rates, which indicates a more consistent response pattern. For reaction time in the basic task condition, at time 1, <italic>r</italic><sub>Spearman-Brown</sub>=0.78, 95% CI 0.64-0.89. At time 2, <italic>r</italic><sub>Spearman-Brown</sub>=0.64, 95% CI 0.40-0.81. For reaction time, in the game condition, at time 1, <italic>r</italic><sub>Spearman-Brown</sub>=0.83, 95% CI 0.71-0.91. At time 2, <italic>r</italic><sub>Spearman-Brown</sub>=0.76, 95% CI 0.60-0.88. Similarly, for error rates in the basic task condition, at time 1, <italic>r</italic><sub>Spearman-Brown</sub>=0.76, 95% CI 0.62-0.87. At time 2, <italic>r</italic><sub>Spearman-Brown</sub>=0.74, 95% CI 0.58-0.86. For error rates in the game condition, at time 1, <italic>r</italic><sub>Spearman-Brown</sub>=0.76, 95% CI 0.62-0.87. At time 2, <italic>r</italic><sub>Spearman-Brown</sub>=0.74, 95% CI 0.58-0.86. Test-retest reliability analysis revealed a distinctive performance pattern depending on the trial type, which may be reflective of motivational differences between task versions. In short, especially in the incongruent trials where cognitive conflict occurs, performance in the game condition reaches peak consistency after 100 trials, whereas performance consistency drops after 50 trials for the basic version and only catches up to the game after 250 trials.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Even subtle gamification can impact task performance albeit not only in terms of a direct difference in performance between conditions. People playing the game reach peak performance sooner, and their performance is more consistent within and across sessions. We advocate for a closer examination of the impact of game elements on performance.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>cognitive assessment</kwd>
        <kwd>gamification</kwd>
        <kwd>serious games</kwd>
        <kwd>Stroop task</kwd>
        <kwd>reliability</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In 1886, James Cattell observed that it takes people longer to name the colors and pictures of objects than it does for them to read the corresponding word [<xref ref-type="bibr" rid="ref1">1</xref>]. This experiment, along with others, paved the way for the development of what Cattell would call <italic>mental tests</italic> and what we now call <italic>cognitive tasks</italic>. On the basis of these and other results, JR Stroop developed a test of cognitive ability in which study participants read the color but not the meaning of a color word aloud [<xref ref-type="bibr" rid="ref2">2</xref>]. The results revealed an interference effect if the word color and word meaning did not match. Typical cognitive tasks require people to respond to such visual or auditory cues, and data about their responses, often reaction time and accuracy, are collected. These data can then be used to study human cognition, create population norms, and inform medical decisions, such as dementia diagnoses [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>Cognitive tasks are most useful when collecting high-quality, high-quantity data. However, this is a challenging process. Traditionally, capturing large data sets has been time consuming and expensive, requiring highly trained professionals to administer and score tasks with individual participants. With technological advancements, tasks can now be administered via computers, deployed remotely, and automatically scored [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. This automation makes it easier to collect large quantities of data but raises new concerns about data quality. Many factors influence cognitive test performance beyond cognitive capacity, such as motivation, stereotype threat, and fatigue [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Cognitive tasks are often repetitive and boring, leading to high attrition rates [<xref ref-type="bibr" rid="ref8">8</xref>] and suboptimal effort from participants [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>].</p>
        <p>In attempts to improve the quality of data collected by such tasks, researchers have increasingly turned to gamification, with the hope that tasks can be made more engaging through the addition of game elements, such as points and graphics.</p>
      </sec>
      <sec>
        <title>Cognitive Task Gamification</title>
        <sec>
          <title>Overview</title>
          <p>Deterding et al [<xref ref-type="bibr" rid="ref11">11</xref>] defined gamification as “the use of game design elements in nongame contexts.” In the context of cognitive tasks, this process typically involves layering game elements over an already existing task. For example, the Go No-Go task has commonly been gamified by adding points [<xref ref-type="bibr" rid="ref12">12</xref>], narrative elements [<xref ref-type="bibr" rid="ref13">13</xref>], and fun graphics [<xref ref-type="bibr" rid="ref14">14</xref>] to the basic task.</p>
        </sec>
        <sec>
          <title>Enjoyment and Motivation</title>
          <p>Typically, tasks are gamified with the intent of increasing participant enjoyment and motivation. Nicholson [<xref ref-type="bibr" rid="ref15">15</xref>] noted that gamification can target both extrinsic and intrinsic motivations depending on the game elements used. Reward-based elements, such as points, achievements, and badges, target extrinsic motivation, whereas elements such as play, exposition, and choice target intrinsic motivation. By targeting motivation, researchers aim to combat attrition and encourage repeated, prolonged play [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>].</p>
          <p>However, there is little examination of whether participants experience increased enjoyment when tasks are gamified. In a systematic review of gamified attention tasks, only 25 of the 74 studies reported results from an evaluation of gameplay [<xref ref-type="bibr" rid="ref16">16</xref>]. When enjoyment is measured, the research shows mixed results. Some studies have found that gamification increases motivation; for example, participants in a stop signal task study experienced higher enjoyment and more flow-like experiences in the gamified condition (as opposed to the basic task) [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
          <p>Other studies have found that certain game elements, especially thematic or narrative elements, can have a negative effect on self-reported enjoyment of cognitive tasks [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>], possibly due to the “chocolate-covered broccoli” effect [<xref ref-type="bibr" rid="ref22">22</xref>]. Tasks can only be gamified and retain the important elements of a task. When participants expect a fun game and must still complete a repetitive cognitive task, they may experience even lower enjoyment than if they expected a boring task [<xref ref-type="bibr" rid="ref20">20</xref>]. Game elements can also be used to introduce other emotions. For example, Levy et al [<xref ref-type="bibr" rid="ref23">23</xref>] found that some older Jewish participants were uncomfortable with their cooking-themed game as they required making recipes containing pork products.</p>
          <p>Do these mixed findings imply that researchers should move away from gamifying tasks? Not necessarily, participants might not <italic>enjoy</italic> assessment games more than a control task, but the data they produced may still be of higher quality.</p>
        </sec>
        <sec>
          <title>Performance</title>
          <p>Groening and Binnewies [<xref ref-type="bibr" rid="ref24">24</xref>] note that enjoyment is only one way to operationalize motivation, one closely linked to intrinsic motivation. They found that adding achievement-based game elements to a series of simple tasks did not improve self-reported motivation but did improve persistence—when participants could earn achievements, they engaged with a Stroop task for longer before voluntarily switching tasks, compared with when no achievements were available. Similarly, Mekler et al [<xref ref-type="bibr" rid="ref25">25</xref>] found that when they gamified an image annotation task, participants generated significantly more annotations, despite no reported differences in intrinsic motivation or competence need satisfaction when compared with the basic task.</p>
          <p>Adding game elements to a task may improve performance (without affecting enjoyment) in various ways. For example, Jung et al [<xref ref-type="bibr" rid="ref26">26</xref>] compared the performance of participants who were given a numeric goal (ie, generating 22 ideas) with those who were asked to “do their best.” Participants who were given a specific goal generated higher quantity and higher quality responses. When completing cognitive tasks, participants are often instructed to respond “as quickly and accurately as possible.” This nebulous goal can be clarified and reinforced through game elements that provide immediate feedback such as scoring points for fast reactions or losing points for incorrect responses.</p>
          <p>When designing gamified tasks for research and assessment purposes, it may be beneficial to focus on influencing performance rather than on enjoyment. Levy et al [<xref ref-type="bibr" rid="ref23">23</xref>] noted that changes in emotions can influence cognitive abilities, which may interfere with the collection of valid and reliable data when using games as scientific tools. When Vanden Abeele et al [<xref ref-type="bibr" rid="ref27">27</xref>] compared 2 games designed to measure psychoacoustic thresholds in preschoolers, they found that the more fully developed and motivating game was able to detect lower thresholds. As another example, Delisle and Braun [<xref ref-type="bibr" rid="ref28">28</xref>] found that changing a task to resemble a fast-paced videogame normalized the performance of participants with attention-deficit/hyperactivity disorder (ADHD), meaning that participants with and without ADHD performed similarly on a gamified task (but differently on a standard task). In some cases, such an effect may be desired, but it depends on why the task is used and gamified.</p>
        </sec>
      </sec>
      <sec>
        <title>Psychometric Properties of Gamified Tasks</title>
        <p>Tasks may also be gamified with the goal of improving the psychometric properties of a task, such as validity (how well a task measures what it claims to measure) and reliability (how consistent the measurement obtained by the task is) [<xref ref-type="bibr" rid="ref29">29</xref>]. There are also different types of evidence for reliability that must be considered when gamifying cognitive tasks. Internal consistency refers to the stability of the task data within an assessment; for example, the similarity of a participant’s reaction time at the beginning of a task to their reaction time at the end of the task. Test-retest reliability refers to the stability of the task data over time; for example, how similar a participant’s score on a task is at one time point compared with their score on the task a month later.</p>
        <p>Typical cognitive tasks are boring, repetitive, and long partly because of the issue of reliability. From one trial to the next, people will perform quite differently, so multiple trials are needed to decrease measurement noise [<xref ref-type="bibr" rid="ref30">30</xref>]. Adding game elements to a task may change the reliability of its measurement. Participants may be sufficiently engaged that their performance is more stable over time; for example, perhaps only 20 trials are needed for a reliable measure, instead of 200. Friehs et al [<xref ref-type="bibr" rid="ref19">19</xref>] found that response variability in a gamified stop signal task was lower than that in the nongame version. Shorter tasks would require fewer resources to administer and would reduce the burden on participants, which would be particularly beneficial for clinical and pediatric populations.</p>
        <p>Game elements also offer the ability to guide participants’ performance. Most cognitive tasks use measures of reaction time and accuracy, which leads to classic speed-accuracy trade-offs—the faster a participant responds, the less accurate they will be, and vice versa. Individual participants also favor speed or accuracy differently than one another [<xref ref-type="bibr" rid="ref30">30</xref>]. These behaviors can be manipulated through instructions (eg, asking participants to respond as quickly as possible). Game elements can also indirectly encourage participants to emphasize speed or accuracy, for example, by awarding points or feedback for faster or more accurate responses, generating more consistency across participants [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p>
      </sec>
      <sec>
        <title>This Study</title>
        <sec>
          <title>Overview</title>
          <p>Few gamified cognitive tasks are subjected to rigorous examination of psychometric properties [<xref ref-type="bibr" rid="ref16">16</xref>], despite their use in experimental and clinical settings. Parsons et al [<xref ref-type="bibr" rid="ref32">32</xref>] noted that psychology lacks a standard practice of reporting the reliability of cognitive task measurements. This problem is exacerbated when tasks are adapted, such as gamification. Even small manipulations of cognitive tasks require extensive research to understand their effects [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
          <p>In this study, we sought to research how game elements can affect the reliability of scores on a cognitive task, specifically the Stroop task. As a typical cognitive task that demonstrates robust experimental effects in the general population [<xref ref-type="bibr" rid="ref34">34</xref>], the Stroop task is well suited for this research.</p>
        </sec>
        <sec>
          <title>The Stroop Task</title>
          <p>Building on the 1886 work by Cattell [<xref ref-type="bibr" rid="ref1">1</xref>] with cognitive tasks, in 1935, Stroop [<xref ref-type="bibr" rid="ref2">2</xref>] conducted an experiment in which he asked participants to either name the colors of colored rectangles or name the colors of mismatched words (eg, the word “blue” printed in red ink). Participants responded much more slowly when naming incongruent colored words, a paradigm we now call the Stroop effect [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
          <p>Since Stroop’s first experiment and subsequent development of the experimental protocol [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref37">37</xref>], the Stroop task has become one of the most widely used tasks in both cognitive and clinical psychology [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. Recently, the Stroop task has been gamified for experimental and clinical applications. For example, Groening and Binnewies [<xref ref-type="bibr" rid="ref39">39</xref>] used the Stroop task to investigate the effects of game elements on participants’ motivation and performance. They found that when points and story elements were added to the task, participants were more persistent (they engaged with the task for longer before switching to a new task) and reported higher motivation. Gomez-Tello et al [<xref ref-type="bibr" rid="ref40">40</xref>] used gamified tasks as part of a battery of tests for neuropsychological screening of children and found evidence of the Stroop effect in a gamified version of the task. However, previous studies have not considered the reliability of the Stroop effect in a gamified task, either in terms of internal consistency or test-retest reliability. Thus, we have little guidance when gamified tasks can or should not be used in assessments.</p>
          <p>We created 2 versions of the Stroop task, with and without game elements, and tested each task with participants at 2 time points. In this paper, we report on the reliability of the gamified Stroop task in terms of internal consistency and test-retest reliability, compared with the control task. We also compared the reliability of these scores on a trial-by-trial basis. Our objective was to demonstrate how game elements can affect the reliability of scores on a Stroop task.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>This research project was approved on ethical grounds by the University of Saskatchewan Research Ethics Board (BEH 17-418). The participants were given GBP £6 (USD $8.3 at time of study) compensation at each time point.</p>
      </sec>
      <sec>
        <title>Tasks</title>
        <p>The control task was designed using the basic computerized Stroop task described by Macleod [<xref ref-type="bibr" rid="ref34">34</xref>] and Hedge et al [<xref ref-type="bibr" rid="ref41">41</xref>] as models. Participants were shown words in the middle of their screen in various colors (red, blue, green, or yellow). The word could be the same as the font color (congruent condition), a noncolor word (lot, ship, cross, or advice; neutral condition), or a nonmatching color word (eg, the word “blue” shown in green; incongruent condition). After each word, participants were asked to press a key corresponding to the font color (z-key for red, x-key for blue, n-key for green, and m-key for yellow). The participants first completed a training exercise to learn each keymap. The task consisted of 240 trials in each condition (congruent, neutral, and incongruent) for a total of 720 trials.</p>
        <p>The gamified version was designed to increase reliability by manipulating the speed-accuracy trade-off [<xref ref-type="bibr" rid="ref30">30</xref>] and improving engagement through game elements. On the basis of prior research, which demonstrated increased enjoyment from points and decreased enjoyment from themes added to a gamified task [<xref ref-type="bibr" rid="ref20">20</xref>], we focused on adding points-based game elements to the Stroop task. Points-based elements also target extrinsic motivation (rather than intrinsic motivation), which may be more effective in influencing participant performance [<xref ref-type="bibr" rid="ref24">24</xref>]. We followed the feedback category of the Gameful Design Heuristics from Tondello et al [<xref ref-type="bibr" rid="ref42">42</xref>], which states that the system should offer users clear and immediate feedback, actionable feedback, and graspable progress.</p>
        <p>Using feedback also allowed us to manipulate the speed-accuracy trade-off by preferentially awarding points for faster (but still correct) answers. In the game version of our task, participants saw their response time for each trial and whether they answered correctly. A record of the fastest response time was also displayed at the corner of the screen. They lost 5 points for any incorrect answer, gained 5 points for any correct answer, and were rewarded with a bonus of 25 points for responses that broke their previous “fastest time” record. A progress bar at the bottom of the screen tracked the points (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Game version of the task after a correct response was entered.</p>
          </caption>
          <graphic xlink:href="games_v12i1e50315_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Participants</title>
        <p>Participants were recruited through Prolific, a web-based platform for recruiting research participants. Web-based platforms are commonly used in human-computer interaction research to conduct studies [<xref ref-type="bibr" rid="ref43">43</xref>] and have been shown to yield reliable data when precautionary methods for data gathering and analysis are used [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. Each participant completed either the control task or the gamified task at 2 time points, 3 weeks apart (time 1 and time 2). The participants signed a consent form, were given instructions and training for the task, and then completed the task. After completion, they answered questionnaires collecting demographic information, including information about their experience with the task (Intrinsic Motivation Inventory [<xref ref-type="bibr" rid="ref46">46</xref>]), their general gaming behavior, and self-reported attentional control (Attentional Control Scale [<xref ref-type="bibr" rid="ref47">47</xref>]).</p>
        <p>The study design was between-subjects, with half the participants completing the control version of the task and the other half completing the points version. The participants were randomly assigned to a condition. The study took approximately 40 minutes to complete.</p>
        <p>Our analyses were based on the methods of Parsons et al [<xref ref-type="bibr" rid="ref32">32</xref>] and Hedge et al [<xref ref-type="bibr" rid="ref41">41</xref>]. Both studies used the same data sets, which had data from 47 (study 1) and 56 (study 2) participants for the Stroop task. In these studies, this sample size was sufficient to observe effects with medium effect sizes. Thus, based on these prior studies, we aimed to obtain approximately 50 participants for each condition [<xref ref-type="bibr" rid="ref48">48</xref>].</p>
        <p>We only analyzed data from participants who had completed both sessions. We also set quality thresholds and removed participants who did not meet them at either time point. Finally, we also removed outlying data points, such as individual trials that were much slower than the average for each participant, to reduce noise in the data, as the study was web-based, and we could not otherwise account for participant distraction from the tasks.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <sec>
          <title>Reaction Time and Error Rate Data</title>
          <p>We conducted 2-way ANOVAs with task type (basic or game) and trial condition (congruent, neutral, or incongruent) for reaction time and error rate data. We used 1-way ANOVAs to compare the effect of task type on the skewness and kurtosis of the distribution of reaction time data for each participant. In addition, we conducted 3-way repeated measures ANOVAs (task type × trial type × time) for reaction time cost and error rate cost data. We also created groups representing low and high attentional control based on the median of 51.0 of our participants and then conducted 3-way repeated measures ANOVAs (task type × attention × time) for reaction time cost and error rate cost data.</p>
        </sec>
        <sec>
          <title>Internal Consistency and Test-Retest Reliability</title>
          <p>For measuring and reporting reliability, our analysis followed the recommendations from Parsons et al [<xref ref-type="bibr" rid="ref32">32</xref>]. To evaluate internal consistency, we used a permutation approach, which involves repeatedly randomly splitting the data, calculating the reliability estimate, and then averaging all estimates. This approach provides a more stable estimate, independent of how trial stimuli and conditions are presented [<xref ref-type="bibr" rid="ref32">32</xref>]. To evaluate test-retest reliability, we calculated the Pearson correlation between each time point. We also used intraclass correlation coefficients (ICCs) to indicate the degree of consistency and agreement between each time point. On the basis of Parson recommendations, we used ICCs labelled ICC(3,1) and ICC(2,1), as described by Shrout and Fleiss [<xref ref-type="bibr" rid="ref49">49</xref>]. Finally, we plotted the test-retest reliability as the number of trials increased. To achieve this, we followed the method used by Hedge et al [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Participants</title>
        <p>For the first round of data collection (time 1), we received 135 responses, followed by 78 responses for time 2.</p>
        <p>All participants met the criteria for questionnaire speed of completion (participants needed to spend an average of 1.5 seconds per item) and variance (participants needed to show some variance across items). In total, 13 participants were excluded because they too frequently provided an incorrect response on the Stroop task (total incorrect responses&gt;1 SD above the mean number of incorrect responses) and because they responded to trials too slowly (mean reaction time&gt;3 SD above the group mean reaction time). Before calculating the group mean reaction time, we also removed any individual trials that were slower than the average for each participant (reaction time&gt;3 SD above the individual mean reaction time), as well as any remaining outlier trials that were slower than 2000 milliseconds. At time 1, we removed 1667 trials (out of 50,400). At time 2, we removed 1976 trials (out of 49,680). Notably, both at time 1 and time 2, significantly fewer trials needed to be removed from the game condition compared with the basic version; 38.6% of the removed trials were in the game condition at time 1, and 32.9% were in the game condition at time 2.</p>
        <p>After exclusions, 65 participants remained (50 female, 13 male, 1 nonbinary, and 1 prefer not to disclose; mean age 23.91, SD 4.64 years), with 31 participants in the basic task condition and 34 participants in the game condition. Our sample had a high proportion of women because of the web-based platform we used [<xref ref-type="bibr" rid="ref50">50</xref>]. The participants had a mean score of 51.8 (SD 7.54) on the Attentional Control Scale.</p>
      </sec>
      <sec>
        <title>Intrinsic Motivation Inventory</title>
        <p>At both time points, the basic task and game conditions showed no significant differences for any of the Intrinsic Motivation Inventory subscales (interest, competence, effort, and pressure).</p>
      </sec>
      <sec>
        <title>Reaction Time and Error Rate Data</title>
        <p>We averaged the reaction times and error rates across participants and then analyzed each measure by task type and trial condition at each time point. We also calculated reaction time and error rate costs (mean incongruent trials and mean congruent trials). <xref ref-type="table" rid="table1">Table 1</xref> presents the descriptive statistics for each measure.</p>
        <p>Histograms of reaction time for all participants are presented in <xref rid="figure2" ref-type="fig">Figure 2</xref> by task type and time point. One-way ANOVAs revealed no significant effects of task type on the skewness and kurtosis of the distribution of reaction time data for each participant (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <p>The 2-way ANOVAs for reaction time and error rate demonstrated evidence of the Stroop effect at both time points (significant differences between incongruent trials and both congruent and neutral trials). Furthermore, congruence sequence effect analysis revealed the expected adaptive control effect but no effect of task condition, time, or an interaction between the 2 emerged. There were also significant differences between task conditions at time 1: participants in the game condition had faster reaction times and lower error rates than those in the basic task condition. There were no significant differences at time 2 (<xref ref-type="table" rid="table3">Tables 3</xref> and <xref ref-type="table" rid="table4">4</xref>).</p>
        <p>Two-way repeated measures ANOVAs (task type × time) for reaction time cost and error rate cost data showed no significant interaction effects (<xref ref-type="table" rid="table5">Table 5</xref>). The 3-way repeated measures ANOVAs (task type × trial condition × time) for reaction time and error rate data showed no significant interaction effects (<xref ref-type="table" rid="table5">Table 5</xref>). On the basis of grouping our participants into low and high attentional control categories, we found a significant 3-way interaction between time, task type, and attention category for the error rate (<xref ref-type="table" rid="table5">Table 5</xref>). Participants who scored low in attentional control and were in the basic task condition had a lower error rate cost at time 1 than at time 2. In the game condition, participants who scored low on attentional control had a higher error rate cost at time 1 than at time 2. The error rate cost for participants who scored high on attentional control showed an opposite pattern. There were no significant simple 2-way interactions between task type and attention category at either time point.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Descriptive statistics for reaction time and error rates, at times 1 and 2 for each task type.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="440"/>
            <col width="260"/>
            <col width="270"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Time 1, mean (SD)</td>
                <td>Time 2, mean (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Basic task</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Congruent reaction time (milliseconds)</td>
                <td>678 (103)</td>
                <td>659 (104)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Neutral reaction time (milliseconds)</td>
                <td>671 (94.0)</td>
                <td>656 (94.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Incongruent reaction time (milliseconds)</td>
                <td>796 (124)</td>
                <td>758 (118)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Reaction time cost (milliseconds)</td>
                <td>118 (50.9)</td>
                <td>98.8 (39.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Congruent correct (%)</td>
                <td>96.0 (2.86)</td>
                <td>96.1 (2.52)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Neutral correct (%)</td>
                <td>96.7 (2.33)</td>
                <td>96.8 (2.43)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Incongruent correct (%)</td>
                <td>93.1 (5.46)</td>
                <td>93.6 (4.36)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Error rate cost (%)</td>
                <td>2.86 (4.53)</td>
                <td>2.55 (3.23)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Game task</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Congruent reaction time (milliseconds)</td>
                <td>638 (94.5)</td>
                <td>645 (95.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Neutral reaction time (milliseconds)</td>
                <td>628 (84.1)</td>
                <td>631 (79.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Incongruent reaction time (milliseconds)</td>
                <td>753 (112)</td>
                <td>730 (103)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Reaction time cost (milliseconds)</td>
                <td>115 (48.8)</td>
                <td>85.3 (42.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Congruent correct (%)</td>
                <td>94.6 (3.70)</td>
                <td>95.5 (2.50)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Neutral correct (%)</td>
                <td>96.0 (2.53)</td>
                <td>96.0 (2.79)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Incongruent correct (%)</td>
                <td>92.1 (3.90)</td>
                <td>93.0 (4.80)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Error rate cost (%)</td>
                <td>2.52 (4.71)</td>
                <td>2.53 (4.18)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Histograms of reaction time by time point and task type for each type of trial condition.</p>
          </caption>
          <graphic xlink:href="games_v12i1e50315_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>ANOVA summary table for reaction time distribution.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="250"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="70"/>
            <col width="0"/>
            <col width="370"/>
            <thead>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Mean squares</td>
                <td colspan="2"><italic>F</italic> test (<italic>df</italic>)</td>
                <td colspan="2"><italic>P</italic> values</td>
                <td>Effect size</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="10">
                  <bold>Time 1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Skewness</td>
                <td colspan="2">0.316</td>
                <td colspan="2">1.863 (1)</td>
                <td colspan="2">.18</td>
                <td colspan="2">0.029</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Kurtosis</td>
                <td colspan="2">0.003</td>
                <td colspan="2">0.001 (1)</td>
                <td colspan="2">.98</td>
                <td colspan="2">0.000</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Time 2</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Skewness</td>
                <td colspan="2">0.317</td>
                <td colspan="2">1.852 (1)</td>
                <td colspan="2">.18</td>
                <td colspan="2">0.029</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Kurtosis</td>
                <td colspan="2">1.159</td>
                <td colspan="2">0.358 (1)</td>
                <td colspan="2">.55</td>
                <td colspan="2">0.006</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>ANOVA summary table for reaction time.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="360"/>
            <col width="160"/>
            <col width="0"/>
            <col width="100"/>
            <col width="70"/>
            <col width="0"/>
            <col width="280"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">Mean squares</td>
                <td><italic>F</italic> test (<italic>df</italic>)</td>
                <td colspan="2"><italic>P</italic> value</td>
                <td>Effect size</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>Time 1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type</td>
                <td>85,185.015</td>
                <td colspan="2">8.107 (1)</td>
                <td>.005</td>
                <td colspan="2">0.041</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Condition</td>
                <td>317,396.780</td>
                <td colspan="2">30.205 (3)</td>
                <td>&lt;.001</td>
                <td colspan="2">0.242</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type × condition</td>
                <td>49.167</td>
                <td colspan="2">0.005 (2)</td>
                <td>.10</td>
                <td colspan="2">0.000</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Time 2</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type</td>
                <td>23,700.032</td>
                <td colspan="2">2.402 (1)</td>
                <td>.12</td>
                <td colspan="2">0.013</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Condition</td>
                <td>201,201.515</td>
                <td colspan="2">20.394 (2)</td>
                <td>&lt;.001</td>
                <td colspan="2">0.178</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type × condition</td>
                <td>788.555</td>
                <td colspan="2">0.080 (2)</td>
                <td>.92</td>
                <td colspan="2">0.001</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>ANOVA summary table for error rate.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="360"/>
            <col width="160"/>
            <col width="0"/>
            <col width="100"/>
            <col width="70"/>
            <col width="0"/>
            <col width="280"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="2">Mean squares</td>
                <td><italic>F</italic> test (<italic>df</italic>)</td>
                <td colspan="2"><italic>P</italic> value</td>
                <td>Effect size</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>Time 1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type</td>
                <td>0.005</td>
                <td colspan="2">4.012 (1)</td>
                <td>.05</td>
                <td colspan="2">0.021</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Condition</td>
                <td>0.024</td>
                <td colspan="2">18.301 (2)</td>
                <td>&lt;.001</td>
                <td colspan="2">0.162</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type × condition</td>
                <td>0.000</td>
                <td colspan="2">0.148 (2)</td>
                <td>.86</td>
                <td colspan="2">0.002</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Time 2</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type</td>
                <td>0.002</td>
                <td colspan="2">1.945 (1)</td>
                <td>.17</td>
                <td colspan="2">0.010</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Condition</td>
                <td>0.018</td>
                <td colspan="2">15.402 (2)</td>
                <td>&lt;.001</td>
                <td colspan="2">0.140</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type × condition</td>
                <td>0.010</td>
                <td colspan="2">0.022 (2)</td>
                <td>.98</td>
                <td colspan="2">0.000</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Repeated measures ANOVA summary table for reaction time and error rate.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="310"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="70"/>
            <col width="0"/>
            <col width="270"/>
            <thead>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Mean squares</td>
                <td colspan="2"><italic>F</italic> test (<italic>df</italic>)</td>
                <td colspan="2"><italic>P</italic> value</td>
                <td>Effect size</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="10">
                  <bold>Reaction time cost</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type × time</td>
                <td colspan="2">880.934</td>
                <td colspan="2">1.105 (1)</td>
                <td colspan="2">.30</td>
                <td colspan="2">.017</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Reaction time</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Trial type × task type × time<sup>a</sup></td>
                <td colspan="2">317.106</td>
                <td colspan="2">0.616 (2)</td>
                <td colspan="2">.49</td>
                <td colspan="2">0.010</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Attention × task type × time</td>
                <td colspan="2">1325.711</td>
                <td colspan="2">1.665 (1)</td>
                <td colspan="2">.20</td>
                <td colspan="2">0.012</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Error rate cost</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Task type × time</td>
                <td colspan="2">&lt;0.001</td>
                <td colspan="2">0.106 (1)</td>
                <td colspan="2">.75</td>
                <td colspan="2">0.002</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Error rate</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Trial type × task type × time</td>
                <td colspan="2">0.000</td>
                <td colspan="2">0.615 (2)</td>
                <td colspan="2">.54</td>
                <td colspan="2">0.010</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Attention × task type × time</td>
                <td colspan="2">39.218</td>
                <td colspan="2">5.493 (1)</td>
                <td colspan="2">.02</td>
                <td colspan="2">0.083</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Owing to the interaction violates the assumption of sphericity (<italic>P</italic>&lt;.001), <italic>P</italic> values are derived using the Greenhouse-Geisser statistic.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Internal Consistency</title>
        <sec>
          <title>Overview</title>
          <p>We estimated the internal consistency of the basic task by using a permutation-based split-half approach [<xref ref-type="bibr" rid="ref32">32</xref>] with 5000 random splits. Internal consistency ranged between 0 and 1, with higher numbers representing more consistency across an individual’s complete set of trials.</p>
        </sec>
        <sec>
          <title>Reaction Time</title>
          <p>When using the reaction time cost, the (Spearman-Brown corrected) split-half internal consistency for the basic task at time 1 was <italic>r</italic><sub>Spearman-Brown</sub>=0.78, 95% CI 0.64-0.89. At time 2, <italic>r</italic><sub>Spearman-Brown</sub>=0.64, 95% CI 0.40-0.81.</p>
          <p>For the game condition at time 1, the split-half internal consistency was <italic>r</italic><sub>Spearman-Brown</sub>=0.83, 95% CI 0.71-0.91. At time 2, <italic>r</italic><sub>Spearman-Brown</sub>=0.76, 95% CI 0.60-0.88.</p>
          <p>The internal consistency values were higher at both time 1 and time 2 for the game condition (<xref rid="figure3" ref-type="fig">Figure 3</xref>); however, converting the correlations to Fisher <italic>z</italic> scores indicated no significant differences between groups at each time point.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Internal consistency of reaction time cost for each time point and task type.</p>
            </caption>
            <graphic xlink:href="games_v12i1e50315_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Error Rate</title>
          <p>When using error rate cost, the (Spearman-Brown corrected) split-half internal consistency for the basic task at time 1 was <italic>r</italic><sub>Spearman-Brown</sub>=0.79, 95% CI 0.66-0.89. At time 2, <italic>r</italic><sub>Spearman-Brown</sub>=0.6, 95% CI 0.34-0.79.</p>
          <p>For the game condition at time 1, the split-half internal consistency was <italic>r</italic><sub>Spearman-Brown</sub>=0.76, 95% CI 0.62-0.87. At time 2, <italic>r</italic><sub>Spearman-Brown</sub>=0.74, 95% CI 0.58,0.86.</p>
          <p>The internal consistency values were higher at time 2 for the game condition at time 2 (<xref rid="figure4" ref-type="fig">Figure 4</xref>); however, similar to the reaction time data, converting the correlations to Fisher <italic>z</italic> scores indicated no significant differences between groups at each time point.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Internal consistency of error rate cost for each time point and task type.</p>
            </caption>
            <graphic xlink:href="games_v12i1e50315_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Test-Retest Reliability</title>
        <sec>
          <title>Reaction Time</title>
          <p>Using reaction time cost data, for the basic task, the Pearson correlation between each time point indicated a test-retest reliability of 0.68, 95% CI 0.43-0.84. This correlation was significant (<italic>t</italic><sub>29</sub>=5.04; <italic>P</italic>&lt;.001). For the game condition, we found a test-retest reliability of 0.58, 95% CI 0.31-0.77. This correlation was also significant (<italic>t</italic><sub>32</sub>=4.07; <italic>P</italic>&lt;.001).</p>
          <p>We also estimated the test-retest reliability between time 1 and time 2 with ICCs using the <italic>psych</italic> package in R (R Foundation for Statistical Computing) [<xref ref-type="bibr" rid="ref51">51</xref>]. ICCs were used to measure the reliability of a measure between 2 time points. The ICC value can range from 0 to 1, with higher values indicating higher reliability. We report the results of 2-way mixed-effects models for absolute agreement, ICC(2,1), and consistency, ICC(3,1).</p>
          <p>Using reaction time cost data, for the basic task, the estimated agreement was 0.61, 95% CI 0.36-0.78, and the estimated consistency was 0.66, 95% CI 0.46-0.80. For the game condition, the estimated agreement was 0.48, 95% CI 0.16-0.69, and the estimated consistency was 0.58, 95% CI 0.35-0.74.</p>
          <p>Typically, cognitive tasks require many trials to reduce measurement noise. We plotted how ICC(3,1) changes as the number of trials increases, to see if a more stable estimate could be determined with fewer trials when using game elements. <xref rid="figure5" ref-type="fig">Figure 5</xref> shows how the reliability of the Stroop effect (reaction time cost) changes with an increasing number of trials.</p>
          <p>To investigate why the game condition shows lower test-retest reliability, we also plotted how the reliability of reaction time changes over time for each trial type (neutral, congruent, and incongruent trials; <xref rid="figure6" ref-type="fig">Figure 6</xref>). Comparing the plots suggests that the game condition reaches a higher level of consistency sooner for incongruent trials, compared with both neutral and congruent conditions. The basic task showed similar patterns of consistency across all trial types.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Test-retest reliability of reaction time cost as the number of trials increases for each task type.</p>
            </caption>
            <graphic xlink:href="games_v12i1e50315_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>Test-retest reliability of reaction time as the number of trials increases for each trial type and task type.</p>
            </caption>
            <graphic xlink:href="games_v12i1e50315_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Error Rate</title>
          <p>Using error rate cost data, for the basic task, the Pearson correlation between each time point indicated a test-retest reliability of 0.55, 95% CI 0.24-0.76. This correlation was significant (<italic>t</italic><sub>29</sub>=3.56; <italic>P</italic>=.001). For the game condition, we found a test-retest reliability of 0.62, 95% CI 0.35-0.79. This correlation was also significant (<italic>t</italic><sub>32</sub>=4.45; <italic>P</italic>&lt;.001).</p>
          <p>Using error rate cost data, for the basic task, ICC(2,1) (estimated agreement) was 0.53, 95% CI 0.28-0.71, and ICC(3,1) (estimated consistency) was 0.53, 95% CI 0.28-0.71. For the game condition, ICC(2,1) was 0.62, 95% CI 0.42-0.77, and ICC(3,1) was 0.62, 95% CI 0.41-0.77.</p>
          <p>We plotted how ICC(3,1) changes as the number of trials increases, to determine whether a more stable estimate could be determined with fewer trials when using game elements. <xref rid="figure7" ref-type="fig">Figure 7</xref> shows how the reliability of the Stroop effect using the error rate cost changes with an increasing number of trials.</p>
          <p>Similar to the reaction time, we plotted how the reliability of the number of errors changes over time for each trial type (neutral, congruent, and incongruent trials; <xref rid="figure8" ref-type="fig">Figure 8</xref>). The basic task showed similar patterns of consistency across all the trial types, whereas in the game condition, only the neutral and congruent conditions were similar—the reliability of the incongruent trials continued to increase over time.</p>
          <fig id="figure7" position="float">
            <label>Figure 7</label>
            <caption>
              <p>Test-retest reliability of error rate cost as the number of trials increases, for each task type.</p>
            </caption>
            <graphic xlink:href="games_v12i1e50315_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure8" position="float">
            <label>Figure 8</label>
            <caption>
              <p>Test-retest reliability of error rate as the number of trials increases for each trial type and task type.</p>
            </caption>
            <graphic xlink:href="games_v12i1e50315_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Summary and Explanation of Findings</title>
        <sec>
          <title>Performance</title>
          <p>Both versions of the task demonstrated the Stroop effect, meaning that the effect is robust to the addition of certain game elements. Gamification can affect the validity of cognitive tasks; for example, adding graphics (especially those that change the stimuli participants respond to) can worsen performance compared with a control task [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. In this study, in the game condition, reaction times and a progress bar were perpetually displayed on the screen. Graphics indicating gained or lost points also appeared between stimuli. These elements did not interfere with the validity of the Stroop task.</p>
          <p>There were no significant differences in performance-based measures between the basic task and game conditions, with one exception: Participants in the game condition had significantly faster reaction times and lower error rates than those in the basic task condition but only at time 1. There may be several reasons for these results.</p>
          <p>Points that function as extrinsic motivators have been shown to improve performance in cognitive tasks [<xref ref-type="bibr" rid="ref25">25</xref>]; however, this effect may be short lived. Nicholson [<xref ref-type="bibr" rid="ref15">15</xref>] noted that reward-based game elements can drive immediate spikes in engagement but only as long as continuous rewards are provided. In our game condition, participants were continually awarded points for accurate responses; however, for reaction time, they were only awarded bonus points for responses that broke their previous “fastest time” record. There is a physical limitation on how quickly participants can react to stimuli—once that threshold is met, it will be near impossible to improve further, and the motivating influence of the bonus points may be diminished.</p>
          <p>In the game condition, participants may also learn faster and reach their “peak performance” sooner. Participants were quickly incentivized to put forth their best effort. This effect may be particularly pronounced when the cognitive demands of the task are higher. When we plotted the reliability of reaction time and error rate as the number of trials increased, the incongruent trials showed an improved pattern of consistency only in the game condition. Specifically, after approximately 50 to 100 trials, the reaction time remained consistent in the game, whereas there was a significant variation in the basic version, with a noticeable drop after 50 trials. A similar pattern was observed for the error rates. For the basic task, the plots of all 3 trial types showed similar patterns across both performance measures. This is especially noteworthy because incongruent trials are arguably the most important trials in the Stroop task, as they are the trials wherein cognitive conflict needs to be resolved. Improved performance in the incongruent trials also explains why the reliability of the Stroop effect (reaction time cost) appeared lower in the game condition—participants in that condition performed better and more consistently in the incongruent trials.</p>
          <p>The differences between the basic task and game conditions may be emphasized by incongruent trials because they are more cognitively demanding than the congruent and neutral trials. Evidence suggests that game elements can differentially affect cognition depending on how participants experience the demands of the task. For example, gamification can normalize the performance of participants with ADHD [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
          <p>Another indication of improved performance consistency comes in the form of a significantly smaller number of outlier trials that need to be removed from the game condition compared to the basic version. Approximately twice the number of far-out outlier trials were removed from the basic task. These trials were not considered valuable data and were essentially lost time for both the researcher and the participant. By reducing the number of trials that needed to be removed from performance, the time investment for participants was reduced. Furthermore, this means that the previous results are a conservative estimate of the game’s reliability advantage because the most egregious outliers were already removed from the analysis.</p>
        </sec>
        <sec>
          <title>Enjoyment</title>
          <p>There were also no differences in the self-reported measures of motivation between the basic task and game conditions. These results align with those of other studies, which found that achievement-based game elements are only effective in promoting performance and not motivation [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>].</p>
          <p>Levy et al [<xref ref-type="bibr" rid="ref23">23</xref>] note how carefully games must be designed to appropriately function as scientific tools and highlight the importance of using the research and data collection goals to inform the choice of game design. For this study, we specifically chose game elements that we thought would influence performance rather than enjoyment. Gamified tasks may be more successful if the game elements are just “good enough” to achieve the goals of the study without interfering with the validity of the task [<xref ref-type="bibr" rid="ref23">23</xref>]. Because we wanted to improve participant performance irrespective of enjoyment, we did not add extraneous game elements, even if those elements would have made the game more fun.</p>
        </sec>
      </sec>
      <sec>
        <title>Limitations and Future Work</title>
        <p>One limitation of our study is the small sample size. The 2 task conditions were designed with subtle differences in the form of points and feedback. While this design was intentional, we also had a relatively small sample size, which may not have been powerful enough to reveal the small effects of our slight manipulation. We recruited 135 participants for time 1 with the intent of having at least 50 participants per condition. However, only 78 participants returned at time 2. It was difficult to incentivize participants to return to a web-based study. Future studies may find significant effects with a larger sample size.</p>
        <p>Another limitation is that our sample was heavily skewed toward young adult female participants. We recruited participants through a web-based platform called Prolific. At the time of our study, a young woman made a video describing her hustle as a participant on the platform. Her video went viral on TikTok, resulting in an influx of new signups to Prolific, most of whom were, similar to the creator, female adults in their 20s [<xref ref-type="bibr" rid="ref50">50</xref>]. However, given the fundamental nature of this research, this sampling bias is unlikely to have influenced the results.</p>
        <p>The addition of points and feedback is one simple approach to gamification. Other game elements may produce different results. As discussed, we had theoretical and practical reasons for using points, but even within the category of points and achievement-based game elements, we could have made different design and mechanical choices. For example, adding a leaderboard system may have influenced participant behavior because of increased competition. Mekler et al [<xref ref-type="bibr" rid="ref25">25</xref>] found that for an image annotation task, participants in the point condition significantly outperformed those in a control condition, where no game elements were used. However, participants in the points condition were, in turn, significantly outperformed by those in conditions where leaderboards and levels were used.</p>
        <p>Future studies should investigate other game elements. Other cognitive tasks could also be investigated to determine how game elements affect reliability across task types that target different cognitive domains. Our same methods for investigating reliability could be applied to any gamified task.</p>
      </sec>
      <sec>
        <title>Implications</title>
        <p>In this study, we show that the Stroop effect is robust to the addition of simple points-based game elements. Adding points to a Stroop task does initially increase participant reaction time, but this gamification may be most effective in the short term. Our results also suggest that game elements may differently influence <italic>parts</italic> of a cognitive task, such as the more cognitively demanding incongruent trials.</p>
        <p>We also provide an example of reporting psychometric data for a gamified task. Despite a long history of cognitive task gamification, the field lacks standard practices regarding how these tasks are made and measured [<xref ref-type="bibr" rid="ref16">16</xref>]. Any advancement in how these tasks are designed and used requires a stronger base of knowledge on how individual game elements affect cognitive behavioral measures [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. One of the most cited reasons for gamifying tasks is to address the limitations of standard neuropsychological testing [<xref ref-type="bibr" rid="ref16">16</xref>]; however, these games will never be acceptable replacements for traditional tests if they are not subjected to the same rigorous standards of reliability and validity.</p>
        <p>The results of this study suggest a potential advantage of using game-like tasks to assess cognitive functioning, especially for difficult-to-reach populations or individuals who cannot be subjected to prolonged testing. For example, gamified tasks have been shown to provide a more engaging environment that creates a more captivating setting that may aid in collecting data from populations with a lower attention span, such as children or groups of patients with concentration or attention deficits [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
        <p>Our results suggest that the game condition may provide faster onboarding to true performance and improved consistency, as demonstrated descriptively through the lower proportion of outlier trials removed, the reaction time distributions, the split-half internal consistency values for reaction time and error rate, and reaction time cost by trial number charts. This faster onboarding is also supported by the significantly faster reaction times and lower error rates in the game condition at time 1. However, these trends do not result in significant performance differences between the basic task and game conditions in analyses of reaction time cost and also do not influence test-retest reliabilities, suggesting that the game elements we included neither significantly improved nor compromised performance in a gamified Stroop task.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADHD</term>
          <def>
            <p>attention-deficit/hyperactivity disorder</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ICC</term>
          <def>
            <p>intraclass correlation coefficient</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors are grateful for the funding provided by the Natural Sciences and Engineering Research Council of Canada and the Saskatchewan-Waterloo Games User Research Training Program.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated and analyzed in this study are available from the corresponding author upon reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>KW and RLM conceptualized the study and developed the methodology. KW and PB developed the tasks and administered the projects. KW curated the data. KW, MAF, and RLM performed analyses and interpreted the study findings. KW wrote the original draft of the manuscript, and all authors reviewed and edited the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cattell</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>The time it takes to see and name objects</article-title>
          <source>Mind</source>
          <year>1886</year>
          <volume>os-XI</volume>
          <issue>41</issue>
          <fpage>63</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1093/mind/os-xi.41.63</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stroop</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Studies of interference in serial verbal reactions</article-title>
          <source>J Exp Psychol</source>
          <year>1935</year>
          <month>12</month>
          <volume>18</volume>
          <issue>6</issue>
          <fpage>643</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1037/h0054651</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wenzel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <source>Cognitive Methods and Their Application to Clinical Research</source>
          <year>2005</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>American Psychological Association</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luciana</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Practitioner review: computerized assessment of neuropsychological function in children: clinical and research applications of the Cambridge Neuropsychological Testing Automated Battery (CANTAB)</article-title>
          <source>J Child Psychol Psychiatry</source>
          <year>2003</year>
          <month>07</month>
          <volume>44</volume>
          <issue>5</issue>
          <fpage>649</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.1111/1469-7610.00152</pub-id>
          <pub-id pub-id-type="medline">12831110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zygouris</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tsolaki</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Computerized cognitive testing for older adults: a review</article-title>
          <source>Am J Alzheimers Dis Other Demen</source>
          <year>2015</year>
          <month>02</month>
          <volume>30</volume>
          <issue>1</issue>
          <fpage>13</fpage>
          <lpage>28</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/1533317514522852?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1533317514522852</pub-id>
          <pub-id pub-id-type="medline">24526761</pub-id>
          <pub-id pub-id-type="pii">1533317514522852</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmader</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Johns</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Forbes</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>An integrated process model of stereotype threat effects on performance</article-title>
          <source>Psychol Rev</source>
          <year>2008</year>
          <month>04</month>
          <volume>115</volume>
          <issue>2</issue>
          <fpage>336</fpage>
          <lpage>56</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/18426293"/>
          </comment>
          <pub-id pub-id-type="doi">10.1037/0033-295X.115.2.336</pub-id>
          <pub-id pub-id-type="medline">18426293</pub-id>
          <pub-id pub-id-type="pii">2008-04236-003</pub-id>
          <pub-id pub-id-type="pmcid">PMC2570773</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boksem</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Tops</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Mental fatigue: costs and benefits</article-title>
          <source>Brain Res Rev</source>
          <year>2008</year>
          <month>11</month>
          <volume>59</volume>
          <issue>1</issue>
          <fpage>125</fpage>
          <lpage>39</lpage>
          <pub-id pub-id-type="doi">10.1016/j.brainresrev.2008.07.001</pub-id>
          <pub-id pub-id-type="medline">18652844</pub-id>
          <pub-id pub-id-type="pii">S0165-0173(08)00071-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lumsden</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Skinner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Coyle</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lawrence</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Munafo</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Attrition from web-based cognitive testing: a repeated measures comparison of gamification techniques</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>11</month>
          <day>22</day>
          <volume>19</volume>
          <issue>11</issue>
          <fpage>e395</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2017/11/e395/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.8473</pub-id>
          <pub-id pub-id-type="medline">29167090</pub-id>
          <pub-id pub-id-type="pii">v19i11e395</pub-id>
          <pub-id pub-id-type="pmcid">PMC5719230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>DeRight</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jorgensen</surname>
              <given-names>RS</given-names>
            </name>
          </person-group>
          <article-title>I just want my research credit: frequency of suboptimal effort in a non-clinical healthy undergraduate sample</article-title>
          <source>Clin Neuropsychol</source>
          <year>2015</year>
          <volume>29</volume>
          <issue>1</issue>
          <fpage>101</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1080/13854046.2014.989267</pub-id>
          <pub-id pub-id-type="medline">25494327</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kirkwood</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Kirk</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Blaha</surname>
              <given-names>RZ</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Noncredible effort during pediatric neuropsychological exam: a case series and literature review</article-title>
          <source>Child Neuropsychol</source>
          <year>2010</year>
          <volume>16</volume>
          <issue>6</issue>
          <fpage>604</fpage>
          <lpage>18</lpage>
          <pub-id pub-id-type="doi">10.1080/09297049.2010.495059</pub-id>
          <pub-id pub-id-type="medline">20628928</pub-id>
          <pub-id pub-id-type="pii">924303083</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deterding</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dixon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Khaled</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nacke</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>From game design elements to gamefulness: defining "gamification"</article-title>
          <source>Proceedings of the 15th International Academic MindTrek Conference: Envisioning Future Media Environments</source>
          <year>2011</year>
          <conf-name>MindTrek '11</conf-name>
          <conf-date>September 28-30, 2011</conf-date>
          <conf-loc>Tampere, Finland</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2181037.2181040</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lumsden</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Skinner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Woods</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lawrence</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Munafò</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The effects of gamelike features and test location on cognitive test performance and participant enjoyment</article-title>
          <source>PeerJ</source>
          <year>2016</year>
          <month>07</month>
          <day>06</day>
          <volume>4</volume>
          <fpage>e2184</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27441120"/>
          </comment>
          <pub-id pub-id-type="doi">10.7717/peerj.2184</pub-id>
          <pub-id pub-id-type="medline">27441120</pub-id>
          <pub-id pub-id-type="pii">2184</pub-id>
          <pub-id pub-id-type="pmcid">PMC4941792</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chicchi Giglioli</surname>
              <given-names>IA</given-names>
            </name>
            <name name-style="western">
              <surname>de Juan Ripoll</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Parra</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Alcañiz Raya</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>EXPANSE: a novel narrative serious game for the behavioral assessment of cognitive abilities</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <month>11</month>
          <day>9</day>
          <volume>13</volume>
          <issue>11</issue>
          <fpage>e0206925</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0206925"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0206925</pub-id>
          <pub-id pub-id-type="medline">30412614</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-14486</pub-id>
          <pub-id pub-id-type="pmcid">PMC6226175</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Rogers</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>McMahon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Garrett</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Manley</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A novel approach to measure executive functions in students: an evaluation of two child-friendly apps</article-title>
          <source>Front Psychol</source>
          <year>2020</year>
          <month>7</month>
          <day>16</day>
          <volume>11</volume>
          <fpage>1702</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32765379"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyg.2020.01702</pub-id>
          <pub-id pub-id-type="medline">32765379</pub-id>
          <pub-id pub-id-type="pmcid">PMC7378958</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nicholson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Reiners</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A RECIPE for meaningful gamification</article-title>
          <source>Gamification in Education and Business</source>
          <year>2015</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wiley</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mandryk</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>The making and evaluation of digital games used for the assessment of attention: systematic review</article-title>
          <source>JMIR Serious Games</source>
          <year>2021</year>
          <month>08</month>
          <day>09</day>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>e26449</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://games.jmir.org/2021/3/e26449/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26449</pub-id>
          <pub-id pub-id-type="medline">34383674</pub-id>
          <pub-id pub-id-type="pii">v9i3e26449</pub-id>
          <pub-id pub-id-type="pmcid">PMC8386381</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thirkettle</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Langdridge</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pike</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A mobile app delivering a gamified battery of cognitive tests designed for repeated play (OU Brainwave): app design and cohort study</article-title>
          <source>JMIR Serious Games</source>
          <year>2018</year>
          <month>10</month>
          <day>30</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e10519</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://games.jmir.org/2018/4/e10519/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/10519</pub-id>
          <pub-id pub-id-type="medline">30377140</pub-id>
          <pub-id pub-id-type="pii">v6i4e10519</pub-id>
          <pub-id pub-id-type="pmcid">PMC6234338</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friehs</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Dechant</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schäfer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mandryk</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>More than skin deep: about the influence of self-relevant avatars on inhibitory control</article-title>
          <source>Cogn Res Princ Implic</source>
          <year>2022</year>
          <month>04</month>
          <day>08</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35394227"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s41235-022-00384-8</pub-id>
          <pub-id pub-id-type="medline">35394227</pub-id>
          <pub-id pub-id-type="pii">10.1186/s41235-022-00384-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC8993990</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friehs</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Dechant</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vedress</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Frings</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mandryk</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>Effective gamification of the stop-signal task: two controlled laboratory experiments</article-title>
          <source>JMIR Serious Games</source>
          <year>2020</year>
          <month>09</month>
          <day>08</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>e17810</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://games.jmir.org/2020/3/e17810/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17810</pub-id>
          <pub-id pub-id-type="medline">32897233</pub-id>
          <pub-id pub-id-type="pii">v8i3e17810</pub-id>
          <pub-id pub-id-type="pmcid">PMC7509611</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wiley</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Vedress</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mandryk</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>How points and theme affect performance and experience in a gamified cognitive task</article-title>
          <source>Proceedings of the 2020 CHI Conference on Human Factors in Computing Systems</source>
          <year>2020</year>
          <conf-name>CHI '20</conf-name>
          <conf-date>April 25-30, 2020</conf-date>
          <conf-loc>Honolulu, HI</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3313831.3376697</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Birk</surname>
              <given-names>MV</given-names>
            </name>
            <name name-style="western">
              <surname>Mandryk</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Bowey</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Buttlar</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>The effects of adding premise and backstory to psychological tasks</article-title>
          <source>Proceedings of the Conference on Human Factors in Computing Systems</source>
          <year>2015</year>
          <conf-name>CHI'15</conf-name>
          <conf-date>April 18-23, 2015</conf-date>
          <conf-loc>Seoul, Korea</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bruckman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Can educational be fun?</article-title>
          <source>Proceedings of the Game Developer's Conference</source>
          <year>1999</year>
          <conf-name>GDC 1999</conf-name>
          <conf-date>March 17, 1999</conf-date>
          <conf-loc>San Jose, CA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lambeth</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Solomon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gandy</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Method in the madness: the design of games as valid and reliable scientific tools</article-title>
          <source>Proceedings of the 13th International Conference on the Foundations of Digital Games</source>
          <year>2018</year>
          <conf-name>FDG '18</conf-name>
          <conf-date>August 7-10, 2018</conf-date>
          <conf-loc>Malmö, Sweden</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Groening</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Binnewies</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>“Achievement unlocked!” - the impact of digital achievements as a gamification element on motivation and performance</article-title>
          <source>Comput Hum Behav</source>
          <year>2019</year>
          <month>08</month>
          <volume>97</volume>
          <fpage>151</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2019.02.026</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mekler</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Brühlmann</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Tuch</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Opwis</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Towards understanding the effects of individual gamification elements on intrinsic motivation and performance</article-title>
          <source>Comput Hum Behav</source>
          <year>2017</year>
          <month>06</month>
          <volume>71</volume>
          <fpage>525</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2015.08.048</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Schneider</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Valacich</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Enhancing the motivational affordance of information systems: the effects of real-time performance feedback and goal setting in group collaboration environments</article-title>
          <source>Manag Sci</source>
          <year>2010</year>
          <month>04</month>
          <volume>56</volume>
          <issue>4</issue>
          <fpage>724</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1287/mnsc.1090.1129</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vanden Abeele</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Wouters</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ghesquière</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Goeleven</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Geurts</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Game-based assessment of psycho-acoustic thresholds: not all games are equal!</article-title>
          <source>Proceedings of the 2015 Annual Symposium on Computer-Human Interaction in Play</source>
          <year>2015</year>
          <conf-name>CHI PLAY '15</conf-name>
          <conf-date>October 5-7, 2015</conf-date>
          <conf-loc>London, UK</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2793107.2793132</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Delisle</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>A context for normalizing impulsiveness at work for adults with attention deficit/hyperactivity disorder (combined type)</article-title>
          <source>Arch Clin Neuropsychol</source>
          <year>2011</year>
          <month>11</month>
          <volume>26</volume>
          <issue>7</issue>
          <fpage>602</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1093/arclin/acr043</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <article-title>The standards for educational and psychological testing</article-title>
          <source>American Psychological Association</source>
          <access-date>2024-03-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.apa.org/science/programs/testing/standards">https://www.apa.org/science/programs/testing/standards</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hedge</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bompas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sumner</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Task reliability considerations in computational psychiatry</article-title>
          <source>Biol Psychiatry Cogn Neurosci Neuroimaging</source>
          <year>2020</year>
          <month>09</month>
          <volume>5</volume>
          <issue>9</issue>
          <fpage>837</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/j.bpsc.2020.05.004</pub-id>
          <pub-id pub-id-type="medline">32605726</pub-id>
          <pub-id pub-id-type="pii">S2451-9022(20)30128-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miranda</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Palmer</surname>
              <given-names>EM</given-names>
            </name>
          </person-group>
          <article-title>Intrinsic motivation and attentional capture from gamelike features in a visual search task</article-title>
          <source>Behav Res Methods</source>
          <year>2014</year>
          <month>03</month>
          <volume>46</volume>
          <issue>1</issue>
          <fpage>159</fpage>
          <lpage>72</lpage>
          <pub-id pub-id-type="doi">10.3758/s13428-013-0357-7</pub-id>
          <pub-id pub-id-type="medline">23835649</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Parsons</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kruijt</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Psychological science needs a standard practice of reporting the reliability of cognitive-behavioral measurements</article-title>
          <source>Adv Methods Pract Psychol Sci</source>
          <year>2019</year>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>378</fpage>
          <lpage>95</lpage>
          <pub-id pub-id-type="doi">10.1177/2515245919879695</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Price</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Kuckertz</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Siegle</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ladouceur</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Silk</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Dahl</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Amir</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Empirical recommendations for improving the stability of the dot-probe task in clinical research</article-title>
          <source>Psychol Assess</source>
          <year>2015</year>
          <month>06</month>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>365</fpage>
          <lpage>76</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25419646"/>
          </comment>
          <pub-id pub-id-type="doi">10.1037/pas0000036</pub-id>
          <pub-id pub-id-type="medline">25419646</pub-id>
          <pub-id pub-id-type="pii">2014-49229-001</pub-id>
          <pub-id pub-id-type="pmcid">PMC4442069</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>MacLeod</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Wenzel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <article-title>The Stroop task in cognitive research</article-title>
          <source>Cognitive Methods and Their Application to Clinical Research</source>
          <year>2005</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>American Psychological Association</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>GS</given-names>
            </name>
          </person-group>
          <article-title>Semantic power measured through the interference of words with color-naming</article-title>
          <source>Am J Psychol</source>
          <year>1964</year>
          <month>12</month>
          <volume>77</volume>
          <issue>4</issue>
          <fpage>576</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.2307/1420768</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tecce</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dimartino</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Effects of heightened drive (shock) on performance in a tachistoscopic color-word interference task</article-title>
          <source>Psychol Rep</source>
          <year>1965</year>
          <month>02</month>
          <volume>16</volume>
          <fpage>93</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.2466/pr0.1965.16.1.93</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dalrymple-Alford</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Budayer</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Examination of some aspects of the stroop color-word test</article-title>
          <source>Percept Mot Skills</source>
          <year>1966</year>
          <month>12</month>
          <volume>23</volume>
          <issue>3</issue>
          <fpage>1211</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.2466/pms.1966.23.3f.1211</pub-id>
          <pub-id pub-id-type="medline">5972923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>MacLeod</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Wenzel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <article-title>The Stroop task in clinical research</article-title>
          <source>Cognitive Methods and Their Application to Clinical Research</source>
          <year>2005</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>American Psychological Association</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Groening</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Binnewies</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>The more, the merrier? - how adding and removing game design elements impact motivation and performance in a gamification environment</article-title>
          <source>Int J Hum Comput Interact</source>
          <year>2021</year>
          <volume>37</volume>
          <issue>12</issue>
          <fpage>1130</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1080/10447318.2020.1870828</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gómez-Tello</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Rosetti</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Galicia-Alvarado</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Maya</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Apiquian</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Neuropsychological screening with TOWI: performance in 6- to 12-year-old children</article-title>
          <source>Appl Neuropsychol Child</source>
          <year>2022</year>
          <volume>11</volume>
          <issue>2</issue>
          <fpage>115</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1080/21622965.2020.1764357</pub-id>
          <pub-id pub-id-type="medline">32429822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hedge</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Powell</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sumner</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The reliability paradox: why robust cognitive tasks do not produce reliable individual differences</article-title>
          <source>Behav Res Methods</source>
          <year>2018</year>
          <month>06</month>
          <volume>50</volume>
          <issue>3</issue>
          <fpage>1166</fpage>
          <lpage>86</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28726177"/>
          </comment>
          <pub-id pub-id-type="doi">10.3758/s13428-017-0935-1</pub-id>
          <pub-id pub-id-type="medline">28726177</pub-id>
          <pub-id pub-id-type="pii">10.3758/s13428-017-0935-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5990556</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tondello</surname>
              <given-names>GF</given-names>
            </name>
            <name name-style="western">
              <surname>Kappen</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Ganaba</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nacke</surname>
              <given-names>LE</given-names>
            </name>
          </person-group>
          <article-title>Gameful design heuristics: a gamification inspection tool</article-title>
          <source>Proceedings of the 21st International Conference on Human-computer Interaction</source>
          <year>2019</year>
          <conf-name>HCII 2019</conf-name>
          <conf-date>July 26-31, 2019</conf-date>
          <conf-loc>Orlando, FL</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-030-22646-6_16</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paolacci</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chandler</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Inside the turk: understanding mechanical turk as a participant pool</article-title>
          <source>Curr Dir Psychol Sci</source>
          <year>2014</year>
          <volume>23</volume>
          <issue>3</issue>
          <fpage>184</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1177/0963721414531598</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meade</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Craig</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>Identifying careless responses in survey data</article-title>
          <source>Psychol Methods</source>
          <year>2012</year>
          <month>09</month>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>437</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1037/a0028085</pub-id>
          <pub-id pub-id-type="medline">22506584</pub-id>
          <pub-id pub-id-type="pii">2012-10015-001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buchanan</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Scofield</surname>
              <given-names>JE</given-names>
            </name>
          </person-group>
          <article-title>Methods to detect low quality data and its implication for psychological research</article-title>
          <source>Behav Res Methods</source>
          <year>2018</year>
          <month>12</month>
          <volume>50</volume>
          <issue>6</issue>
          <fpage>2586</fpage>
          <lpage>96</lpage>
          <pub-id pub-id-type="doi">10.3758/s13428-018-1035-6</pub-id>
          <pub-id pub-id-type="medline">29542063</pub-id>
          <pub-id pub-id-type="pii">10.3758/s13428-018-1035-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Control and information in the intrapersonal sphere: an extension of cognitive evaluation theory</article-title>
          <source>J Pers Soc Psychol</source>
          <year>1982</year>
          <volume>43</volume>
          <issue>3</issue>
          <fpage>450</fpage>
          <lpage>61</lpage>
          <pub-id pub-id-type="doi">10.1037//0022-3514.43.3.450</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Derryberry</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Reed</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Anxiety-related attentional biases and their regulation by attentional control</article-title>
          <source>J Abnorm Psychol</source>
          <year>2002</year>
          <month>05</month>
          <volume>111</volume>
          <issue>2</issue>
          <fpage>225</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1037//0021-843x.111.2.225</pub-id>
          <pub-id pub-id-type="medline">12003445</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lakens</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Sample size justification</article-title>
          <source>Collabra Psychol</source>
          <year>2022</year>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>33267</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://online.ucpress.edu/collabra/article/8/1/33267/120491/Sample-Size-Justification"/>
          </comment>
          <pub-id pub-id-type="doi">10.1525/collabra.33267</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shrout</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Fleiss</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Intraclass correlations: uses in assessing rater reliability</article-title>
          <source>Psychol Bull</source>
          <year>1979</year>
          <month>03</month>
          <volume>86</volume>
          <issue>2</issue>
          <fpage>420</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1037//0033-2909.86.2.420</pub-id>
          <pub-id pub-id-type="medline">18839484</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Charalambides</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>We recently went viral on TikTok - here's what we learned</article-title>
          <source>Prolific</source>
          <year>2021</year>
          <month>08</month>
          <day>24</day>
          <access-date>2024-03-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.prolific.com/resources/we-recently-went-viral-on-tiktok-heres-what-we-learned">https://www.prolific.com/resources/we-recently-went-viral-on-tiktok-heres-what-we-learned</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Revelle</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Psych: procedures for psychological, psychometric, and personality research</article-title>
          <source>The Comprehensive R Archive Network</source>
          <year>2024</year>
          <month>1</month>
          <day>18</day>
          <access-date>2024-03-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/psych/index.html">https://cran.r-project.org/web/packages/psych/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gallagher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kessler</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bramham</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dechant</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Friehs</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>A proof-of-concept study exploring the effects of impulsivity on a gamified version of the stop-signal task in children</article-title>
          <source>Front Psychol</source>
          <year>2023</year>
          <month>2</month>
          <day>9</day>
          <volume>14</volume>
          <fpage>1068229</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36844283"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyg.2023.1068229</pub-id>
          <pub-id pub-id-type="medline">36844283</pub-id>
          <pub-id pub-id-type="pmcid">PMC9946965</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
