<?xml version="1.0" encoding="UTF-8"?>
<?oxygen RNGSchema="../../common/schema/DHQauthor-TEI.rng" type="xml"?>
<?oxygen SCHSchema="../../common/schema/dhqTEI-ready.sch"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:dhq="http://www.digitalhumanities.org/ns/dhq"
   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:cc="http://web.resource.org/cc/">
   <teiHeader>
      <fileDesc>
         <titleStmt>
            <title>Designing Data Mining Droplets: New Interface Objects for the Humanities
               Scholar</title>
            <author>Stan Ruecker</author>
            <dhq:authorInfo>
               <dhq:author_name>Stan <dhq:family>Ruecker</dhq:family>
               </dhq:author_name>
               <dhq:affiliation>University of Alberta, Canada</dhq:affiliation>
               <email>sruecker@ualberta.ca</email>
               <dhq:bio>
                  <p>Stan Ruecker is an Assistant Professor of Humanities Computing in the
                     Department of English and Film Studies at the University of Alberta. Ruecker
                     holds advanced degrees in English, Humanities Computing, and Design. He has
                     expertise in the design of experimental interfaces to support online browsing
                     tasks, typically by combining some meaningful representation of every item in
                     the collection with emergent tools for manipulating the display. Ruecker also
                     has a growing body of experience in qualitative evaluation studies of the
                     various stages of prototypes. He has published on text encoding theory,
                     affective design, interaction histories, electronic books, information design,
                     issue crawling, and interface design for a variety of uses. He has presented in
                     the last few years at international conferences in design, computing science,
                     educational technology, English literature, communication technology, library
                     and information studies, and humanities computing.</p>
               </dhq:bio>
            </dhq:authorInfo>
            <author>Milena Radzikowska</author>
            <dhq:authorInfo>
               <dhq:author_name>Milena <dhq:family>Radzikowska</dhq:family>
               </dhq:author_name>
               <dhq:affiliation>Mount Royal College, Canada</dhq:affiliation>
               <email>mradzikowska@gmail.com</email>
               <dhq:bio>
                  <p>Milena Radzikowska is an associate professor in Information Design, Faculty of
                     Communication Studies, Mount Royal College. In 2000, she graduated from NSCAD
                     University (BDes Hon.) and, in 2003, from the University of Alberta, with a
                     masters (MDes) degree in visual communication design. Her active research
                     interests are in the areas of visual communication, interface and information
                     design, and text visualization. Over the last few years, Ms. Radzikowska
                     presented at international conferences in design (Edmonton, Cape Town, and Hong
                     Kong), educational technology (Lima), communication technology (Honolulu and
                     Helsinki), humanities computing (Victoria, Paris, Fredericton, Saskatoon,
                     Vancouver, and Chicago), and medieval studies (Prague). In January 2009, she
                     began working on an interdisciplinary PhD at theUniversity of Alberta in
                     Computing Science &amp; Humanities Computing, under the supervision of Dr.
                     Walter Bischof and Dr. Stan Ruecker.</p>
               </dhq:bio>
            </dhq:authorInfo>
            <author>Stéfan Sinclair</author>
            <dhq:authorInfo>
               <dhq:author_name>Stéfan <dhq:family>Sinclair</dhq:family>
               </dhq:author_name>
               <dhq:affiliation>McMaster University, Canada</dhq:affiliation>
               <email>sgs@mcmaster.edu</email>
               <dhq:bio>
                  <p>Stéfan Sinclair is an Associate Professor in the department of Communication
                     Studies and Multimedia at McMaster University. Sinclair is involved in the
                     design, development and theorisation of tools for the digital humanities. His
                     research projects, funded SSHRC, CFI, and the Mellon Foundation, include the
                     following: Voyeur, an environment for reading and analyzing digital texts <ref
                        target="http://voyeur.hermeneuti.ca">voyeur.hermeneuti.ca</ref>; TAPoR, a
                     portal for text analysis <ref target="http://portal.tapor.ca"
                        >portal.tapor.ca</ref>; BonPatron, a widely-used French grammar checker <ref
                        target="http://bonpatron.com">bonpatron.com</ref>; Monk, a data mining tool
                     for literary scholars <ref target="http://monkproject.org"
                        >monkproject.org</ref>; HumViz, a suite of visualization interfaces for the
                     humanities <ref target="http://humviz.org">humviz.org</ref>; and Digital Texts
                     2.0, a Facebook application for managing digital texts <ref
                        target="http://dtext2.org">dtext2.org</ref>. Sinclair's background is in
                     French literature, and especially the Oulipo, a group of writers and
                     mathematicians that creates constraint-based literature. Prior to arriving at
                     McMaster, Sinclair helped create and direct the M.A. in Humanities Computing at
                     the University of Alberta. His current teaching is focused on the history and
                     development of multimedia, as well as on practices of programming for the
                     humanities.</p>
               </dhq:bio>
            </dhq:authorInfo>
         </titleStmt>
         <publicationStmt>
            <publisher>Alliance of Digital Humanities Organizations</publisher>
            <publisher>Association of Computers and the Humanities</publisher>
            <idno type="DHQarticle-id">000067</idno>
            <idno type="volume">003</idno>
            <idno type="issue">3</idno>
            <dhq:articleType>article</dhq:articleType>
            <date when="2009-09-29">29 September 2009</date>
            <availability>
               <cc:License xmlns="http://digitalhumanities.org/DHQ/namespace"
                  rdf:about="https://creativecommons.org/licenses/by-nd/2.5/"/>
            </availability>
         </publicationStmt>
         <sourceDesc>
            <p>Authored for DHQ; migrated from original DHQauthor format</p>
         </sourceDesc>
      </fileDesc>
      <encodingDesc>
         <classDecl>
            <taxonomy xml:id="dhq_keywords">
               <bibl>DHQ classification scheme; full list available in the <ref
                     target="http://www.digitalhumanities.org/dhq/taxonomy.xml">DHQ keyword
                     taxonomy</ref>
               </bibl>
            </taxonomy>
            <taxonomy xml:id="authorial_keywords">
               <bibl>Keywords supplied by author; no controlled vocabulary</bibl>
            </taxonomy>
         </classDecl>
      </encodingDesc>
      <profileDesc>
         <langUsage>
            <language ident="en"/>
         </langUsage>
      </profileDesc>
      <revisionDesc>
         <change who="Stéfan" when="2009-02-17">Document converted by Voyeur Tools
            (voyeur.hermeneuti.ca).</change>
         <change who="Stéfan" when="2009-02-18">Exported document cleaned up.</change>
         <change when="2009-03-04" who="Alyssa">checked and corrected XML</change>
         <change when="2009-08-06" who="Alyssa">added bios</change>
         <change when="2013-06-20" who="Tassie Gniady">Changed "dhq:caption" to "head."</change>
      </revisionDesc>
   </teiHeader>
   <text xml:lang="en">
      <front>
         <dhq:abstract>
            <p>In this paper, we describe the design of a number of alternative interface
                  <q>droplets</q> that are intended for use by humanities scholars interested in
               applying data mining and information visualization tools to the task of hypothesis
               formulation. The trained droplets provide several functions. Their primary purpose is
               to encapsulate the results of the software training phase. They can be saved for
               future re-use against other collections or combinations of collections. They can be
               modified by having the user accept or reject features identified by the data mining
               software. Finally, they can also contain choices for how to display and organize
               items in the collection. The opportunity to develop a new interface object presents
               the designer with the challenge of effectively communicating what the tool is good
               for and how it is used. This paper outlines the design process we followed in
               creating the visual representations of these interface objects, describes the
               communicative strengths and weaknesses of a number of alternative designs, and
               discusses the importance of the study of new interface objects as the means of
               providing the user with new interface affordances.</p>
         </dhq:abstract>
         <dhq:teaser>
            <p>Wherein our valiant heroes describe experiments with visual metaphors intended to
               help them in their perilous data mining quests.</p>
         </dhq:teaser>
      </front>
      <body>
         <head>Designing Data Mining Droplets: New Interface Objects for the Humanities
            Scholar</head>
         <div>
            <head>Introduction</head>
            <p>The goal of this paper is to address some of the conceptual issues that arise in the
               design of a new kind of interface object for a specific domain — data mining for the
               humanities. In that context, we describe one component of our research: the design of
               a form of visual representation that would provide humanities scholars with some
               insight into the data mining process, while at the same time making the activity of
               data mining attractive and easy to carry out.</p>
            <figure xml:id="figure01">
               <head>An early sketch of the data mining environment (in this case for the NORA
                  project) shows someone using a droplet trained for identifying the erotic in a set
                  of poems from the Emily Dickinson collection in the Institute for Advanced
                  Technology in the Humanities (IATH) at the University of Virginia. Note that the
                  preliminary droplet design shown here (bottom right) has no specifically
                  communicative morphology.</head>
               <graphic url="resources/images/figure01.jpg"/>
               <figDesc>Screenshot of the data mining worksheet for step three.</figDesc>
            </figure>
            <p>Our strategy in this interface was to provide the user with a variety of empty
                  <q>droplets</q> which would be filled with the results of the software training
               phase <ptr target="#ruecker2006"/>. Each droplet would contain or encapsulate an
               entire working state of the system, including the algorithmic consequences of a
               particular training exercise, combined with some parameters for organizing and
               selecting the form of the display. The choice of the proper word to identify the
               droplets is in itself a subject of design. Other terms that have been suggested
               include <q>magnet,</q>
               <q>crystal,</q>
               <q>capsule,</q>
               <q>lens,</q>
               <q>charm,</q>
               <q>filter,</q>
               <q>system state,</q>
               <q>kernel,</q> and the very Canadian <q>hockey puck.</q> Whatever these objects are
               eventually called, for the time being we are using the term <q>droplet,</q> which
               suggests to us a densely compressed item that can unpack in an organic way to
               influence the entire surroundings. Once a droplet has been trained for data mining,
               it can be saved and applied to the entire collection, or to a different collection. A
               droplet is applied to a collection by dragging and dropping it onto a display
               representing each item, after which the display organizes itself in a series of
                  <q>oil and water</q> effects. </p>
            <p>Of vital significance to the success of this strategy is the design of the droplets.
               The droplets need to be able to represent the relevant information about the data
               mining process in a form that is readily interpretable by humanities scholars. The
               droplet serves in one sense like an icon — a person looking at it will hopefully
               remember what system state it contains. This iconic function should work at different
               scales, at least one of which is quite small. The droplets therefore need to be
               easily visually differentiable one from another, at every scale. Finally, the
               droplets need to be visually appealing. We describe here our initial attempts to
               design these interface objects, based on a set of metaphors to real-world items that
               combine complex visual appearance with a compact form.</p>
         </div>
         <div>
            <head>Background</head>
            <p>The online availability of a wide range of digital data has resulted in a
               corresponding increase in various kinds of tools for retrieving and manipulating the
               items in a collection <ptr target="#hockey2000"/>. Interface design researchers have
               worked on systems intended to help users access digital images, work with electronic
               text files, and apply data mining algorithms to a variety of problems, both in the
               sciences and in the humanities.</p>
            <p>In the area of digital images, <ptr target="#bederson2001"/> describes a zoomable
               browser, <ptr target="#bumgardner2005"/> provides an experimental search tool that
               uses a colour wheel as its interface, and <ptr target="#hascoet1998"/> discuss the
               use of maps in accessing a digital library. Other examples include <ptr
                  target="#rodden2001"/>, who studied the use of similarity clustering for browsing
               tasks, and <ptr target="#ruecker2005"/>, who developed a prototype image browser for
               pill identification.</p>
            <p>For tools related to text files, <ptr target="#pirolli1996"/> describe a system for
               visualizing documents which allows the user to form dynamic groups. <ptr
                  target="#small1996"/> developed a 3D prototype for text navigation, where the
               reader moved between columns of text from Shakespeare’s plays. A variety of
               researchers have worked in the area of data mining for text collections of various
               kinds. For example, <ptr target="#feldman1997"/> discuss early efforts in this area,
               and <ptr target="#weiss2005"/> provide a recent update on methods.</p>
            <p>Some researchers have pointed out that the potential for applying data mining tools
               to questions in the humanities lies largely in the capacity of such tools to
               contribute, not primarily to hypothesis testing, but instead to hypothesis
               formulation <ptr target="#shneiderman2001"/>; <ptr target="#ramsay2003"/>; <ptr
                  target="#unsworth2004"/>. The standard approach in humanities research is not to
               solve a problem by testing one hypothesis against another, but rather to enrich the
               object of study by repeated observation and reporting. Data mining tools and their
               accompanying visualizations, which facilitate pattern finding across a wide range of
               data, can definitely play a role in this process.</p>
            <p>With respect to the design of interfaces for data mining, it is important to remember
               that each new online tool represents a new opportunity for action, or affordance <ptr
                  target="#gibson1979"/>; <ptr target="#vicente2002"/>. For instance, in a more
               conventional approach to the interface for data mining, it would be possible to
               create a history palette that records previous states of the system. However, it is
               not necessarily straightforward to repurpose an item from that history to a new
               collection. By encapsulating the history states as droplets, we make the repurposing
               simpler.</p>
            <p>Another significant feature of the droplets is their role in interactivity. By
               providing the user with an item to drag and drop to trigger a series of dynamic
               responses from the system, the droplets help facilitate an instructional aspect: the
               user can see the steps carried out by system, which correspond to the steps
               associated with the droplet. While visually dynamic responses are not reliant on the
               presence of droplets as objects, their existence as part of the user interaction
               helps to suggest to the designer these various new forms of feedback, which are a
               kind of affordance.</p>
            <p>Studying these new affordances presents a challenge, in that the researcher by
               definition does not always have an existing object with a similar affordance —
               otherwise it would be a case of a redesign rather than a new tool <ptr
                  target="#ruecker2003"/>. Though opinions vary, the current dominant perspective is
               that interface research requires a component of usability study <ptr
                  target="#nielsen2000"/>, but that usability study alone is probably not enough.
               Attention should also be paid to other factors, such as aesthetics <ptr
                  target="#karvonen2000"/>, effect <ptr target="#dillon2001"/>, and sustained use
               over time <ptr target="#plaisant2004"/>.</p>
         </div>
         <div>
            <head>Methodology</head>
            <p>We began by identifying the kinds of information the user might want to know while
               working with the system. These included an overview of the process, suggestions about
               the kinds of tasks that could be performed using the system, reassurance at each
               point that the right things were happening, and assistance in interpreting the
               results of each stage and moving successfully to the next stage. With the droplets,
               we hoped to be able to communicate what had been done to create them, in order to
               suggest how they might be successfully deployed once they were created.</p>
            <p>To construct the droplets, we generated a candidate list of real-world items that
               have a sufficiently complex physical shape to serve as possible metaphors for the
               complexities of the data mining process. We determined early in the process that it
               would be difficult and probably not helpful to attempt to communicate for this
               demographic the actual algorithms involved, as for example by superimposing an
               equation on a geometric shape. Instead, we hoped to be able to visually express the
               following information:</p>
            <list type="unordered">
               <item>Is this a trained droplet or an empty one?</item>
               <item>For trained droplets, has the user accepted the features recommended by the
                  system or has the list of features been modified?</item>
               <item>What kinds of features were included?</item>
               <item>How many features were included?</item>
               <item>What options for display have been associated with the droplet?</item>
               <item>What choices for organizing the display have been applied?</item>
            </list>
            <p>There are also other pieces of information that could be useful for understanding
               what has been happening. These items need to be communicated somehow but could be
               difficult to associate with the visual appearance of the droplets. These include:</p>
            <list type="unordered">
               <item>The name of the collection or collections used in training.</item>
               <item>The size of the collection.</item>
               <item>The size of the training set.</item>
               <item>The name and goals of the person responsible for training the droplet.</item>
            </list>
            <p>Some strategies involving droplet morphology might include using the size of the
               droplet to indicate the size of the training set or of the collection the set was
               drawn from. Internal and external lines can also be thickened or lightened as a way
               of suggesting robustness of the training set. Finally, depending on the visual kind
               of droplet, it may be possible to nest one droplet inside another, as a way of
               indicating their use in combination.</p>
            <p>It may also be possible to associate this information with the droplets using
               strategies that do not involve the droplet morphology <emph>per se</emph>, but
               instead rely on the combination of text and image. Combining these methods is seen by
               some theorists as an important approach to the design of technical communications
                  <ptr target="#horn1998"/>. We will provide this connection in the case of the
               prototype by refreshing an information panel about the droplet details whenever the
               user selects a droplet. This panel will also provide the opportunity to adjust some
               of the settings stored by the droplet.</p>
         </div>
         <div>
            <head>Results</head>


            <p>Working from our original map of over a dozen potential metaphors (<ref
                  target="#figure02">Figure 2</ref>), we selected the following short list for
               further investigation. We wanted to have a variety of items that were distinct from
               each other but were also visually complex in a way that could communicate the stages
               in droplet training. We thought we should include examples that covered points on a
               terrain that included the organic and the mechanical, with reference to several
               disciplines. Finally, we tried to choose examples that could be contained by a common
               perimeter. Our working list contained the following items:</p>
            <list type="unordered">
               <item>Ferns — configurations of individual organic pieces that form larger
                  items</item>
               <item>Snowflakes — a single solid unique configuration that relies on symmetry</item>
               <item>Solar system — individual items in relations suggested by a larger
                  structure</item>
               <item>Atoms — individual items connected in a more elaborate geometric
                  framework</item>
               <item>Cells — complex interiors composed of pieces that associate by
                  juxtaposition</item>
               <item>Clockwork — complex interiors consisting of structures that interconnect</item>
               <item>Lego™ — geometric shapes with complex surfaces that interconnect</item>
            </list>
            <p>For each of these metaphors, we developed sketches for four different states of the
               droplet: untrained, trained, trained with multiple display options chosen, and
               trained with multiple display and two different organization options. Our goal in
               each case was to make the different states visually distinct at every level of
               magnification, and to make the number of display and organization options obvious at
               the largest size.</p>
            <figure xml:id="figure02">
               <head>Our concept map of possible droplet metaphors shows a wide range of candidate
                  real-world objects that combine visual complexity with a compact form.</head>
               <graphic url="resources/images/figure02.jpg"/>
               <figDesc>Concept map with the word 'droplets' in the center.</figDesc>
            </figure>
            <p>We chose these various states because they represent significant choices made by the
               user. It would also be possible to consider visually representing choices the user
               makes about what collection to work with in the first place, which may be one of the
               most significant choices the user makes. However, visually representing collections
               is definitely a challenge, and it may be preferable to provide information about the
               collection in the form of text labels.</p>

            <div>
               <head>Ferns</head>
               <p>A fern is a fractal, which means it repeats its morphology at increasing scales
                     (<ref target="#figure03">Figure 3</ref>). We might adopt this strategy for two
                  scales, where in the unfolding fern leaf, the individual leaflets represent
                  functions and the entire leaf represents the complete, organized droplet.</p>
               <p>We can use the stem to represent the software training, and the leaflets to
                  represent the other functions. This strategy has the benefit of looking minimal
                  when no display or organization functions are chosen, which may prompt the user to
                  want to choose more sophisticated configurations of options.</p>
               <p>If we also assume that the two sides of the stem represent two kinds of
                  organization, then having all the display items on one side of the stem would
                  indicate only one kind of sorting, while dividing display items on both sides of
                  the stem would indicate two kinds of sorting.</p>
               <figure xml:id="figure03">
                  <head>The placement of leaflets along the stem of the fern leaf allows us to
                     express the user choices starting with an empty droplet (left), then
                     sequentially adding training data, display choices, then organization in one
                     way and in two ways.</head>
                  <graphic url="resources/images/figure03.jpg"/>
                  <figDesc>Five yellow droplets with various types of leaves inside.</figDesc>
               </figure>
               <p>Reading the sequence from left to right, we show first an untrained or empty
                  droplet. The next version shows a droplet that has been trained by the user.
                  Taking one of the demonstration projects as an example, this second droplet might
                  contain the results of training the system to recognize poems by Emily Dickinson
                  with an erotic charge, using a naïve Bayesian algorithm. The third version shows
                  this same trained droplet with seven items chosen for display. In the case of the
                  Dickinson collection, these items might include the poem’s title (often the first
                  line), the date of first publication, the place of publication, the name of the
                  publisher, the number of lines in the poem, the number of words in the poem, the
                  number of key features found in the poem related to eroticism, and the numeric
                  score assigned by the system for the poem in terms of its erotic charge. The
                  fourth version would represent the same information about each poem, but organize
                  the results in some way — perhaps by the numeric rating assigned by the system.
                  The fifth and final version would show the items arranged in two ways — first by
                  numeric rating, and chronologically within that.</p>
               <p>The organic nature of the fern droplet may lead to some difficulties for the user
                  in that a growth process for a fern is not the same as selection among various
                  options by a user defining a droplet. The use of this organic metaphor, however,
                  does suggest another possibility — would it be interesting to indicate how long it
                  has been since someone used a droplet? Do the droplets visibly age when they
                  aren’t used? Does new use refresh the appearance of the droplet? Would people be
                  encouraged to experiment with strange droplets because they are obviously drying
                  up or deteriorating?</p>
            </div>
            <div>
               <head>Snowflakes</head>
               <p>Ferns suggest quite a regular form of arrangement, which means there is little
                  meaningful variation possible between different droplets. Snowflakes also tend to
                  symmetry, but each is unique. They combine a complex silhouette with a compact
                  form (<ref target="#figure04">Figure 4</ref>). Variations in the details
                  comprising the silhouette could therefore be used to communicate a wide range of
                  functions. </p>
               <p>However, the strong visual language of the snowflake may prove to be difficult to
                  repurpose as a meaningful channel of communication. The fact that each snowflake
                  is supposed to be unique also means that there is no basic, restricted vocabulary
                  of shapes to draw on in their construction.</p>
               <figure xml:id="figure04">
                  <head>Each snowflake is a unique visual object, which allows us to differentiate
                     one droplet from another, but introduces a difficulty in that there is no
                     simple method of re-using recognizable components.</head>
                  <graphic url="resources/images/figure04.jpg"/>
                  <figDesc>Five blue snowflakes with varying decorations inside.</figDesc>
               </figure>
               <p>Our draft solution in this case is to treat the visual complexity of the interior
                  of the object as the measure of the state of the droplet. Unlike our other
                  designs, which involve composites of countable objects, the snowflake droplets
                  indicate each condition by filling in spaces that are otherwise unarticulated.
               </p>
            </div>
            <div>
               <head>Solar System</head>
               <p>Objects in the solar system create a composite object where the individual items
                  are in relation to one another but not in immediate contact (<ref
                     target="#figure05">Figure 5</ref>). The central position of the sun also serves
                  to imply the centrality of the software training. A solar system without a sun is
                  clearly incomplete.</p>
               <figure xml:id="figure05">
                  <head>The solar system, with its objects in orbit, provides a structure that can
                     be progressively filled with planetary dots that represent choices of
                     representation, while location on the orbits is used to indicate
                     organization.</head>
                  <graphic url="resources/images/figure05.jpg"/>
                  <figDesc>Five sample solar system droplets with orbiting objects in different
                     locations.</figDesc>
               </figure>
               <p>Another potential difficulty with several of the designs, including the solar
                  system, is that they may suggest a degree of order and regularity which may be
                  somewhat at odds with the experience of the scholar using data mining techniques.
                  Using a data mining system can actually involve an iterative and somewhat
                     <q>messy</q> experimentation with various options.</p>
            </div>
            <div>
               <head>Atomic</head>
               <p>Our starting point for the atomic droplets are the simple models that consist of
                  electrons in elliptical orbits around a nucleus (<ref target="#figure06">Figure
                     6</ref>). The nucleus is filled in during the training phase, while the
                  inclusion of electrons and their locations represent choices about item
                  representation and organization.</p>
               <figure xml:id="figure06">
                  <head>Atomic models provide a vocabulary for expressing the components of the
                     droplets, consisting of individual items connected to each other.</head>
                  <graphic url="resources/images/figure06.jpg"/>
                  <figDesc>Five sample atoms with particles in various locations.</figDesc>
               </figure>
            </div>
            <div>
               <head>Cells</head>
               <p>A cell has an interior that is populated with a number of distinct individual
                  items and structures (<ref target="#figure07">Figure 7</ref>). Cells therefore
                  provide a compact metaphor based on the complexities of the interior of the
                  droplet. We also have available for future exploration the single-celled
                  organisms, such as the paramecium, which combine this interior complexity with an
                  exterior with some communicative potential.</p>
               <p>Cells also suggest an organic form, which may help to counterbalance the highly
                  technical profile of data mining in the humanities.</p>
               <figure xml:id="figure07">
                  <head>A cell is neither an aggregate nor does it have a complex silhouette. Its
                     communicative potential consists instead of a rich interior of organic shapes,
                     including individual items and structures that divide, enclose, and support
                     them.</head>
                  <graphic url="resources/images/figure07.jpg"/>
                  <figDesc>Five purple cells with varying arrangements of inner parts.</figDesc>
               </figure>
            </div>
            <div>
               <head>Clockwork</head>
               <p>A clockwork is a complex interior like a cell, without the suggestion of the
                  organic (<ref target="#figure08">Figure 8</ref>). There is a high degree of
                  interconnection of the parts inside a clock, implying that all the parts are
                  necessary in order for it to work. This level of constraint on what is necessary
                  and what is optional might not be appropriate in the context of data mining, but
                  the operational nature of the clock and the implied association with the
                  mathematical operations underlying data mining may make it particularly
                  appropriate.</p>
               <p>The variety of interior components also provides a potentially rich visual
                  vocabulary for representing the different aspects of the droplets. Finally, we
                  have used an external outline suggestive of clock gears, in order to allow a
                  direct visual association to the mechanical, even for the untrained form of the
                  droplet.</p>
               <figure xml:id="figure08">
                  <head>Like a cell, a clockwork shows a rich internal landscape that can be used to
                     represent a variety of functions. Clockworks are mechanical rather than
                     organic, and therefore suggest interconnection, rather than isolation of the
                     functions.</head>
                  <graphic url="resources/images/figure08.jpg"/>
                  <figDesc>Five sample clock droplets with varying arrangements of gears.</figDesc>
               </figure>
            </div>
            <div>
               <head>Lego™</head>
               <p>With Lego, there are a set number of individual shapes that are aggregated. With
                  this metaphor, we can use the external contour of the composite droplet (<ref
                     target="#figure09">Figure 9</ref>). We can distinguish by size between more and
                  less important functions, so the central training can be indicated by large Lego
                  piece, while the display functions are secondary and the organization functions
                  tertiary.</p>
               <p>Lego also comes with the affordance of assembling the separate pieces into
                  different configurations. The user could distinguish between similar droplets by
                  taking advantage of different kinds of arrangement.</p>
               <figure xml:id="figure09">
                  <head>Lego™ suggests a method of combining separate items to create a new whole.
                     For our purposes, each individual piece of Lego would stand either for the
                     result of software training or for a choice of representation or
                     organization.</head>
                  <graphic url="resources/images/figure09.jpg"/>
                  <figDesc>Five sample Lego™ pieces built in different designs.</figDesc>
               </figure>
            </div>
         </div>
         <div>
            <head>Conclusions and Future Research</head>
            <p>Having identified a range of possibilities, our next step will be to present them to
               potential users in order to collect measures of performance and preference. By
               placing them in the interactive context of a prototype environment, we will be able
               to examine how humanities scholars respond to the various affordances. The goals of
               this phase will be to determine whether participants are able to make the necessary
               intuitive leaps to understand the intended communicative aspects of each of the
               droplet designs. Once we’ve established a smaller subset of droplets, we will proceed
               by expanding the visual positioning or skinning of each droplet type, in order to
               determine how humanities scholars respond to various semantic differentials such as
               glossy/rough, technological/natural, geometric/organic, and colour/grey scale. By
               determining how potential users of the data mining system perceive the design
               dimensions of the droplets, we will be able to decide to what extent this strategy
               can prove beneficial in removing barriers to them adopting the system. One
               possibility may consist of the use of a hybrid form of droplets, where different
               visual components are assembled in a kind of toolkit. Our eventual decisions with
               respect to the design of the droplets may also be usefully repurposed to inform the
               visual aspects of the design of the entire system.</p>
         </div>
         <div>
            <head>Acknowledgements</head>
            <p>The authors wish to thanks the many members of the NORA project research team for
               their contributions to this work. Their names can be found at <ref
                  target="http://www.noraproject.org/team.php"
                  >http://www.noraproject.org/team.php</ref>. We would also like to acknowledge the
               generous support of the Andrew W. Mellon Foundation, the Social Sciences and
               Humanities Research Council of Canada, the Natural Sciences and Engineering Council
               of Canada, and the Canadian Foundation for Innovation.</p>
         </div>
      </body>
      <back>
         <listBibl>
            <bibl xml:id="bederson2001" label="Bederson 2001" key="bederson2001">
               <author>B. B. Bederson</author>. <title rend="quotes">PhotoMesa: a zoomable image
                  browser using quantum treemaps and bubblemaps.</title>
               <title>Proceedings of the 14th annual ACM symposium on User interface software and
                  technology.</title> pp. 71-80, <date>2001</date>.</bibl>
            <bibl xml:id="bumgardner2005" label="Bumgardner et al. 2005" key="bumgardner2005">
               <author>J. Bumgardner</author>. <title>Flickr Colour Fields Experimental Colr
                  Pickr</title>, <date>2005</date>.</bibl>
            <bibl xml:id="dillon2001" label="Dillon 2001" key="dillon2001a">
               <author>A. Dillon</author>. <title rend="quotes">Beyond usability: process, outcome
                  and affect in human-computer interactions.</title>
               <title>Canadian Journal of Library and Information Science</title>, 26(4), 57-69,
                  <date>2001</date>.</bibl>
            <bibl xml:id="feldman1997" label="Feldman et al. 1997" key="feldman1997">
               <author>R. Feldman</author> and <author>H. Hirsh</author>. <title rend="quotes"
                  >Finding Associations in Collections of Text.</title> In <editor>Michalski, R.S.,
                  Bratko, I. and Kubat, M.</editor>, <title rend="italic">Machine Learning and Data
                  Mining: Methods and Applications</title>. <pubPlace>NY</pubPlace>: <publisher>J.
                  Wiley</publisher>, 223-240. <date>1997</date>.</bibl>
            <bibl xml:id="gibson1979" label="Gibson 1979" key="gibson1979">
               <author>J. J. Gibson</author>. <title rend="italic">The Ecological Approach to Visual
                  Perception.</title>
               <pubPlace>Boston</pubPlace>: <publisher>Houghton-Mifflin</publisher>,
                  <date>1979</date>.</bibl>
            <bibl xml:id="hascoet1998" label="Hascoët et al. 1998" key="hascoet1998">
               <author>M. Hascoët</author> and <author>X. Soinard</author>. <title rend="quotes"
                  >Using maps as a user interface to a digital library.</title>
               <title>Proceedings of the 21st annual international ACM SIGIR conference on Research
                  and development in information retrieval</title>, 339-340. <ref
                  target="http://doi.acm.org/10.1145/290941.291028"
                  >http://doi.acm.org/10.1145/290941.291028</ref>, <date>1998</date>.</bibl>
            <bibl xml:id="hockey2000" label="Hockey 2000" key="hockey2000">
               <author>S. Hockey</author>
               <title rend="italic">Electronic Texts in the Humanities</title>.
                  <pubPlace>Oxford</pubPlace>: <publisher>Oxford University Press</publisher>,
                  <date>2000</date>.</bibl>
            <bibl xml:id="horn1998" label="Horn 1998" key="horn1998">
               <author>R. E. Horn</author>. <title>Visual Language: Global Communication for the
                  21st Century</title>. MacroVU, Inc. Bainbridge Island, WA,
               <date>1998</date>.</bibl>
            <bibl xml:id="horton2006" label="Horton et al. 2006" key="horton2006">
               <author>T. Horton</author>, <author>K. Taylor</author>, <author>B. Yu</author>, and
                  <author>X. Xiang</author>. <title rend="quotes">
                  <emph>Quite right, dear, and interesting</emph>: Seeking the Sentimental in
                  Nineteenth Century American Fiction.</title>
               <title>Digital Humanities 2006</title>. Paris. Sorbonne, 5-9 July, <date>2006</date>,
               81-82.</bibl>
            <bibl xml:id="karvonen2000" label="Karvonen 2000" key="karvonen2000">
               <author>K. Karvonen</author>. <title rend="quotes">The beauty of simplicity.</title>
               <title>Proceedings of the 2000 conference on Universal Usability</title>. November,
                  <date>2000</date>.</bibl>
            <bibl xml:id="kirschenbaum" label="Kirschenbaum et al. 2006" key="kirschenbaum">
               <author>M. Kirschenbaum</author>, <author>C. Plaisant</author>, <author>M. N.
                  Smith</author>, <author>L. Auvil</author>, <author>J. Rose</author>, <author>B.
                  Yu</author>, and <author>T. Clement</author>. <title rend="quotes">
                  <emph>Undiscovered Public Knowledge</emph>: Mining for Patterns of Erotic Language
                  in Emily Dickinson's Correspondence with Susan Huntington (Gilbert)
                  Dickinson</title>. <title>Digital Humanities 2006</title>. Paris. Sorbonne, 5-9
                  <date>2006</date>, 252-255.</bibl>
            <bibl xml:id="nielsen2000" label="Nielsen 2000" key="nielsen2000">
               <author>J. Nielsen</author>. <title rend="italic">Designing web usability: The
                  practice of simplicity. Indianapolis</title>, <pubPlace>IN</pubPlace>:
                  <publisher>New Riders</publisher>, <date>2000</date>.</bibl>
            <bibl xml:id="pirolli1996" label="Pirolli et al. 1996" key="pirolli1996">
               <author>P. Pirolli</author>, <author>P. Schank</author>, <author>M. Hearst</author>,
               and <author>C. Diehl</author>. <title rend="quotes">Scatter/Gather Browsing
                  Communicates the Topic Structure of a Very Large Text Collection.</title>
               <title>Proceedings of the SIGCHI conference on Human factors in computing systems:
                  common ground</title>, 213-220. <date>1996</date>.</bibl>
            <bibl xml:id="plaisant2004" label="Plaisant 2004" key="plaisant2004">
               <author>C. Plaisant</author>. <title rend="quotes">The Challenge of Information
                  Visualization Evaluation.</title>
               <title>IEEE Proc. of AVI 2004.</title>, <date>2004</date>.</bibl>
            <bibl xml:id="ramsay2006" label="Ramsay and Steger 2006" key="ramsay2006">
               <author>S. Ramsay</author> and <author>S. Steger</author>. <title rend="quotes"
                  >Distinguished Speakers: Keyword Extraction and Critical Analysis with Virginia
                  Woolf's <title rend="italic">The Waves</title>.</title>
               <title>Digital Humanities 2006.</title> Paris. Sorbonne, 5-9 July, <date>2006</date>,
               255-257.</bibl>
            <bibl xml:id="ramsay2003" label="Ramsay 2003" key="ramsay2003">
               <author>S. Ramsay</author>. <title rend="quotes">Toward an Algorithmic
                  Criticism.</title>
               <title rend="italic">Literary and Linguistic Computing</title>. (18) 2, 167-174, <date>2003</date>.</bibl>
            <bibl xml:id="rodden2001" label="Rodden et al. 2001" key="rodden2001">
               <author>K. Rodden</author>, <author>W. Basalaj</author>, <author>D.
               Sinclair</author>, and <author>K. Wood</author>. <title rend="quotes">Does
                  Organisation by Similarity Assist Image Browsing.</title> In <title rend="italic"
                  >Proceedings of Human Factors in Computing Systems (CHI 2001) ACM Press</title>,
               pp. 190-197, <date>2001</date>.</bibl>
            <bibl xml:id="ruecker2006" label="Ruecker et al." key="ruecker2006">
               <author>S. Ruecker</author>, <author>M. Radzikowska</author>, and <author>S.
                  Sinclair</author>. <title rend="quotes">Communicating Process with Form: Designing
                  the Visual Morphology of the Nora Data Mining Kernels.</title>
               <title>Proceedings of CaSTA 2006. Breadth of Text: A Joint Computer Science and
                  Humanities Computing Conference.</title> Fredericton, New Brunswick. October
               11-14. 57-68, <date>2006</date>. </bibl>
            <bibl xml:id="ruecker2005" label="Ruecker et al. 2005" key="ruecker2005">
               <author>S. Ruecker</author>, <author>L. M. Given</author>, <author>B.
               Sadler</author>, and <author>A. Ruskin</author>. <title rend="quotes">Building
                  Accessible Web Interfaces for Seniors: Similarity Clustering of Pill
                  Images.</title>
               <title>Include 2005. London. Helen Hamlyn Institute. Royal College of Art.</title>
               April 5-8, 2005, <date>2005</date>.</bibl>
            <bibl xml:id="ruecker2003" label="Ruecker 2003" key="ruecker2003">
               <author>S. Ruecker</author>. <title rend="italic">Affordances of prospect for
                  academic users of interpretively-tagged text collections.</title> Unpublished
               doctoral dissertation. University of Alberta, Edmonton, Alberta, Canada,
                  <date>2003</date>.</bibl>
            <bibl xml:id="shneiderman2001" label="Shneiderman 2001" key="shneiderman2001">
               <author>B. Shneiderman</author>. <title rend="quotes">Inventing Discovery Tools:
                  Combining Information Visualization with Data Mining.</title>
               <title>Keynote for Discovery Science 2001 Conference</title>, November 25-28,
               Washington, DC, <date>2001</date>.</bibl>
            <bibl xml:id="small1996" label="Small 1996" key="small1996">
               <author>D. Small</author>. <title rend="quotes">Navigating large bodies of
                  text.</title>
               <title rend="italic">IBM Systems Journal</title>. 35:3–4, <date>1996</date>.</bibl>
            <bibl xml:id="unsworth2005" label="Unsworth 2005" key="unsworth2005">
               <author>J. Unsworth</author>. <title rend="quotes">New Methods for Humanities
                  Research.</title>
               <title>The 2005 Lyman Award Lecture. November 11. National Humanities Center.
                  Research Triangle Park, NC.</title>
               <ptr target="http://www3.isrl.uiuc.edu/~unsworth/lyman.htm"/>,
               <date>2005</date>.</bibl>
            <bibl xml:id="unsworth2004" label="Unsworth 2004" key="unsworth2004">
               <author>J. Unsworth</author>. <title rend="quotes">Forms of Attention: Digital
                  Humanities Beyond Representation.</title> A paper delivered at <title>The Face of
                  Text: Computer-Assisted Text Analysis in the Humanities</title>. The third
               conference of the Canadian Symposium on Text Analysis (CaSTA), McMaster University,
               November 19-21, <date>2004</date>.</bibl>
            <bibl xml:id="vicente2002" label="Vicente 2002" key="vicente2002">
               <author>K.J. Vicente</author>. <title rend="quotes">Ecological Interface Design:
                  Progress and Challenges.</title>
               <title rend="italic">Human Factors</title>. 44.1:62–78, <date>2002</date>.</bibl>
            <bibl xml:id="weiss2005" label="Weiss et al. 2005" key="weiss2005">
               <author>S. Weiss</author>, <author>N. Indurkhya</author>, <author>T. Zhang</author>,
               and <author>F. Damerau</author>. <title rend="italic">Text Mining: Predictive Methods
                  for Analyzing Unstructured Information</title>. <pubPlace>New York</pubPlace>:
                  <publisher>Springer</publisher>, <date>2005</date>.</bibl>
         </listBibl>
      </back>
   </text>
</TEI>
