<?xml version="1.0" encoding="utf-8"?>
<?oxygen RNGSchema="../../common/schema/DHQauthor-TEI.rng" type="xml"?>
<?oxygen SCHSchema="../../common/schema/dhqTEI-ready.sch"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0"
     xmlns:dhq="http://www.digitalhumanities.org/ns/dhq"
     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
     xmlns:cc="http://web.resource.org/cc/">
   <teiHeader>
      <fileDesc>
         <titleStmt>
            <title>Designing Data Mining Droplets: New Interface Objects for the Humanities
            Scholar</title>
            <author>Stan Ruecker</author>
            <dhq:authorInfo>
               <dhq:author_name>Stan <dhq:family>Ruecker</dhq:family>
               </dhq:author_name>
               <dhq:affiliation>University of Alberta, Canada</dhq:affiliation>
               <email>sruecker@ualberta.ca</email>
               <dhq:bio>
                  <p>Stan Ruecker is an Assistant Professor of Humanities Computing in the 
                Department of English and Film Studies at the University of Alberta. 
                Ruecker holds advanced degrees in English, Humanities Computing, and 
                Design. He has expertise in the design of experimental interfaces to 
                support online browsing tasks, typically by combining some meaningful 
                representation of every item in the collection with emergent tools for 
                manipulating the display. Ruecker also has a growing body of 
                experience in qualitative evaluation studies of the various stages of 
                prototypes. He has published on text encoding theory, affective 
                design, interaction histories, electronic books, information design, 
                issue crawling, and interface design for a variety of uses. He has 
                presented in the last few years at international conferences in 
                design, computing science, educational technology, English literature, 
                communication technology, library and information studies, and 
                humanities computing.</p>
               </dhq:bio>
            </dhq:authorInfo>
            <author>Milena Radzikowska</author>
            <dhq:authorInfo>
               <dhq:author_name>Milena <dhq:family>Radzikowska</dhq:family>
               </dhq:author_name>
               <dhq:affiliation>Mount Royal College, Canada</dhq:affiliation>
               <email>mradzikowska@gmail.com</email>
               <dhq:bio>
                  <p>Milena Radzikowska is an associate professor in Information Design, 
                Faculty of Communication Studies, Mount Royal College. In 2000, she 
                graduated from NSCAD University (BDes Hon.) and, in 2003, from the 
                University of Alberta, with a masters (MDes) degree in visual 
                communication design. Her active research interests are in the areas 
                of visual communication, interface and information design, and text 
                visualization. Over the last few years, Ms. Radzikowska presented at 
                international conferences in design (Edmonton, Cape Town, and Hong 
                Kong), educational technology (Lima), communication technology 
                (Honolulu and Helsinki), humanities computing (Victoria, Paris, 
                Fredericton, Saskatoon, Vancouver, and Chicago), and medieval studies 
                (Prague). In January 2009, she  began working on an interdisciplinary 
                PhD at theUniversity of Alberta in Computing Science &amp; Humanities 
                Computing, under the supervision of Dr. Walter Bischof and Dr. Stan 
                Ruecker.</p>
               </dhq:bio>
            </dhq:authorInfo>
            <author>Stéfan Sinclair</author>
            <dhq:authorInfo>
               <dhq:author_name>Stéfan <dhq:family>Sinclair</dhq:family>
               </dhq:author_name>
               <dhq:affiliation>McMaster University, Canada</dhq:affiliation>
               <email>sgs@mcmaster.edu</email>
               <dhq:bio>
                  <p>Stéfan Sinclair is an Associate Professor in the department of 
                Communication Studies and Multimedia at McMaster University. Sinclair 
                is involved in the design, development and  theorisation of tools for 
                the digital humanities. His research  projects, funded SSHRC, CFI, and 
                the Mellon Foundation, include the  following: Voyeur, an environment 
                for reading and analyzing digital  texts <ref target="http://voyeur.hermeneuti.ca">voyeur.hermeneuti.ca</ref>; 
                      TAPoR, a portal for text analysis <ref target="http://portal.tapor.ca">portal.tapor.ca</ref>; BonPatron, a 
                      widely-used French grammar checker <ref target="http://bonpatron.com">bonpatron.com</ref>; Monk, a data 
                      mining tool for literary scholars <ref target="http://monkproject.org">monkproject.org</ref>; HumViz, a suite 
                      of visualization interfaces for the humanities <ref target="http://humviz.org">humviz.org</ref>; and 
                      Digital Texts 2.0, a Facebook application for managing digital texts <ref target="http://dtext2.org">dtext2.org</ref>. Sinclair's background is in French literature, and 
                especially the  Oulipo, a group of writers and mathematicians that 
                creates constraint-based literature. Prior to arriving at McMaster, 
                Sinclair helped create and direct the M.A. in Humanities Computing at 
                the University of Alberta. His current teaching is focused on the 
                history and development of multimedia, as well as on practices of 
                programming for the humanities.</p>
               </dhq:bio>
            </dhq:authorInfo>
         </titleStmt>
         <publicationStmt>
            <idno type="DHQarticle-id">000067</idno>
            <idno type="volume">003</idno>
            <idno type="issue">3</idno>
            <dhq:articleType>article</dhq:articleType>
            <date when="2009-09-29">29 September 2009</date>
            <availability>
               <cc:License xmlns="http://digitalhumanities.org/DHQ/namespace"
                           rdf:about="http://creativecommons.org/licenses/by-nc-nd/2.5/"/>
            </availability>
         </publicationStmt>
         <sourceDesc>
            <p>Authored for DHQ; migrated from original DHQauthor format</p>
         </sourceDesc>
      </fileDesc>
      <encodingDesc>
         <classDecl>
            <taxonomy xml:id="dhq_keywords">
               <bibl>DHQ classification scheme; full list available in the <ref target="http://www.digitalhumanities.org/dhq/taxonomy.xml">DHQ keyword taxonomy</ref>
               </bibl>
            </taxonomy>
            <taxonomy xml:id="authorial_keywords">
               <bibl>Keywords supplied by author; no controlled vocabulary</bibl>
            </taxonomy>
         </classDecl>
      </encodingDesc>
      <profileDesc>
         <langUsage>
            <language ident="en"/>
         </langUsage>
      </profileDesc>
      <revisionDesc>
         <change who="Stéfan" when="2009-02-17">Document converted by Voyeur Tools
                    (voyeur.hermeneuti.ca).</change>
         <change who="Stéfan" when="2009-02-18">Exported document cleaned up.</change>
         <change when="2009-03-04" who="Alyssa">checked and corrected XML</change>
         <change when="2009-08-06" who="Alyssa">added bios</change>
      </revisionDesc>
   </teiHeader>
   <text>
      <front>
         <dhq:abstract>
            <p>In this paper, we describe the design of a number of alternative interface <q>droplets</q>
                that are intended for use by humanities scholars interested in applying data mining
                and information visualization tools to the task of hypothesis formulation. The
                trained droplets provide several functions. Their primary purpose is to encapsulate
                the results of the software training phase. They can be saved for future re-use
                against other collections or combinations of collections. They can be modified by
                having the user accept or reject features identified by the data mining software.
                Finally, they can also contain choices for how to display and organize items in the
                collection. The opportunity to develop a new interface object presents the designer
                with the challenge of effectively communicating what the tool is good for and how it
                is used. This paper outlines the design process we followed in creating the visual
                representations of these interface objects, describes the communicative strengths
                and weaknesses of a number of alternative designs, and discusses the importance of
                the study of new interface objects as the means of providing the user with new
                interface affordances.</p>
         </dhq:abstract>
         <dhq:teaser>
            <p>Wherein our valiant heroes describe experiments with visual metaphors intended to
            help them in their perilous data mining quests.</p>
         </dhq:teaser>
      </front>
      <body>
         <head>Designing Data Mining Droplets: New Interface Objects for the Humanities
            Scholar</head>
         <div>
            <head>Introduction</head>
            <p>The goal of this paper is to address some of the conceptual issues that arise in the
                design of a new kind of interface object for a specific domain — data mining for the
                humanities. In that context, we describe one component of our research: the design
                of a form of visual representation that would provide humanities scholars with some
                insight into the data mining process, while at the same time making the activity of
                data mining attractive and easy to carry out.</p>
            <figure xml:id="figure01">
               <graphic url="resources/images/figure01.jpg"/>
               <figDesc>Screenshot of the data mining worksheet for step three.</figDesc>
               <dhq:caption>An early sketch of the data mining environment (in this case for the
                        NORA project) shows someone using a droplet trained for identifying the
                        erotic in a set of poems from the Emily Dickinson collection in the
                        Institute for Advanced Technology in the Humanities (IATH) at the University
                        of Virginia. Note that the preliminary droplet design shown here (bottom
                        right) has no specifically communicative morphology.</dhq:caption>
            </figure>
            <p>Our strategy in this interface was to provide the user with a variety of empty
                <q>droplets</q> which would be filled with the results of the software training phase
                <ptr target="#ruecker2006"/>. Each droplet would
                contain or encapsulate an entire working state of the system, including the
                algorithmic consequences of a particular training exercise, combined with some
                parameters for organizing and selecting the form of the display. The choice of the
                proper word to identify the droplets is in itself a subject of design. Other terms
                that have been suggested include <q>magnet,</q> 
               <q>crystal,</q> 
               <q>capsule,</q> 
               <q>lens,</q> 
               <q>charm,</q>
                <q>filter,</q> 
               <q>system state,</q> 
               <q>kernel,</q> and the very Canadian <q>hockey puck.</q> Whatever
                these objects are eventually called, for the time being we are using the term
                <q>droplet,</q> which suggests to us a densely compressed item that can unpack in an
                organic way to influence the entire surroundings. Once a droplet has been trained
                for data mining, it can be saved and applied to the entire collection, or to a
                different collection. A droplet is applied to a collection by dragging and dropping
                it onto a display representing each item, after which the display organizes itself
                in a series of <q>oil and water</q> effects. </p>
            <p>Of vital significance to the success of this strategy is the design of the droplets.
                The droplets need to be able to represent the relevant information about the data
                mining process in a form that is readily interpretable by humanities scholars. The
                droplet serves in one sense like an icon — a person looking at it will hopefully
                remember what system state it contains. This iconic function should work at
                different scales, at least one of which is quite small. The droplets therefore need
                to be easily visually differentiable one from another, at every scale. Finally, the
                droplets need to be visually appealing. We describe here our initial attempts to
                design these interface objects, based on a set of metaphors to real-world items that
                combine complex visual appearance with a compact form.</p>
         </div>
         <div>
            <head>Background</head>
            <p>The online availability of a wide range of digital data has resulted in a
                corresponding increase in various kinds of tools for retrieving and manipulating the
                items in a collection <ptr target="#hockey2000"/>. Interface design researchers have
                worked on systems intended to help users access digital images, work with electronic
                text files, and apply data mining algorithms to a variety of problems, both in the
                sciences and in the humanities.</p>
            <p>In the area of digital images, <ptr target="#bederson2001"/> describes a
                zoomable browser, <ptr target="#bumgardner2005"/> provides an
                experimental search tool that uses a colour wheel as its interface, and <ptr target="#hascoët1998"/> discuss the use of maps in accessing a digital library.
                Other examples include <ptr target="#rodden2001"/>, who studied the
                use of similarity clustering for browsing tasks, and <ptr target="#ruecker2005"/>, who developed a prototype image browser for pill
                identification.</p>
            <p>For tools related to text files, <ptr target="#pirolli1996"/> describe
                a system for visualizing documents which allows the user to form dynamic groups.
                <ptr target="#small1996"/> developed a 3D prototype for text navigation, where
                the reader moved between columns of text from Shakespeare’s plays. A variety of
                researchers have worked in the area of data mining for text collections of various
                kinds. For example, <ptr target="#feldman1997"/> discuss early
                efforts in this area, and <ptr target="#weiss2005"/> provide a recent
                update on methods.</p>
            <p>Some researchers have pointed out that the potential for applying data mining tools
                to questions in the humanities lies largely in the capacity of such tools to
                contribute, not primarily to hypothesis testing, but instead to hypothesis
                formulation <ptr target="#shneiderman2001"/>; <ptr target="#ramsay2003"/>; <ptr target="#unsworth2004"/>. The standard approach in humanities research is not
                to solve a problem by testing one hypothesis against another, but rather to enrich
                the object of study by repeated observation and reporting. Data mining tools and
                their accompanying visualizations, which facilitate pattern finding across a wide
                range of data, can definitely play a role in this process.</p>
            <p>With respect to the design of interfaces for data mining, it is important to remember
                that each new online tool represents a new opportunity for action, or affordance
                <ptr target="#gibson1979"/>; <ptr target="#vicente2002"/>. For instance, in a
                more conventional approach to the interface for data mining, it would be possible to
                create a history palette that records previous states of the system. However, it is
                not necessarily straightforward to repurpose an item from that history to a new
                collection. By encapsulating the history states as droplets, we make the repurposing
                simpler.</p>
            <p>Another significant feature of the droplets is their role in interactivity. By
                providing the user with an item to drag and drop to trigger a series of dynamic
                responses from the system, the droplets help facilitate an instructional aspect: the
                user can see the steps carried out by system, which correspond to the steps
                associated with the droplet. While visually dynamic responses are not reliant on the
                presence of droplets as objects, their existence as part of the user interaction
                helps to suggest to the designer these various new forms of feedback, which are a
                kind of affordance.</p>
            <p>Studying these new affordances presents a challenge, in that the researcher by
                definition does not always have an existing object with a similar
                affordance — otherwise it would be a case of a redesign rather than a new tool <ptr target="#ruecker2003"/>. Though opinions vary, the current dominant perspective
                is that interface research requires a component of usability study <ptr target="#nielsen2000"/>, but that usability study alone is probably not enough.
                Attention should also be paid to other factors, such as aesthetics <ptr target="#karvonen2000"/>, effect <ptr target="#dillon2001"/>, and sustained use
                over time <ptr target="#plaisant2004"/>.</p>
         </div>
         <div>
            <head>Methodology</head>
            <p>We began by identifying the kinds of information the user might want to know while
                working with the system. These included an overview of the process, suggestions
                about the kinds of tasks that could be performed using the system, reassurance at
                each point that the right things were happening, and assistance in interpreting the
                results of each stage and moving successfully to the next stage. With the droplets,
                we hoped to be able to communicate what had been done to create them, in order to
                suggest how they might be successfully deployed once they were created.</p>
            <p>To construct the droplets, we generated a candidate list of real-world items that
                have a sufficiently complex physical shape to serve as possible metaphors for the
                complexities of the data mining process. We determined early in the process that it
                would be difficult and probably not helpful to attempt to communicate for this
                demographic the actual algorithms involved, as for example by superimposing an
                equation on a geometric shape. Instead, we hoped to be able to visually express the
                following information:</p>
            <list type="unordered">
               <item>Is this a trained droplet or an empty one?</item>
               <item>For trained droplets, has the user accepted the features recommended by the
                    system or has the list of features been modified?</item>
               <item>What kinds of features were included?</item>
               <item>How many features were included?</item>
               <item>What options for display have been associated with the droplet?</item>
               <item>What choices for organizing the display have been applied?</item>
            </list>
            <p>There are also other pieces of information that could be useful for understanding
                what has been happening. These items need to be communicated somehow but could be
                difficult to associate with the visual appearance of the droplets. These
                include:</p>
            <list type="unordered">
               <item>The name of the collection or collections used in training.</item>
               <item>The size of the collection.</item>
               <item>The size of the training set.</item>
               <item>The name and goals of the person responsible for training the droplet.</item>
            </list>
            <p>Some strategies involving droplet morphology might include using the size of the
                droplet to indicate the size of the training set or of the collection the set was
                drawn from. Internal and external lines can also be thickened or lightened as a way
                of suggesting robustness of the training set. Finally, depending on the visual kind
                of droplet, it may be possible to nest one droplet inside another, as a way of
                indicating their use in combination.</p>
            <p>It may also be possible to associate this information with the droplets using
                strategies that do not involve the droplet morphology <emph>per se</emph>, but
                instead rely on the combination of text and image. Combining these methods is seen
                by some theorists as an important approach to the design of technical communications
                    <ptr target="#horn1998"/>. We will provide this connection in the case of the
                prototype by refreshing an information panel about the droplet details whenever the
                user selects a droplet. This panel will also provide the opportunity to adjust some
                of the settings stored by the droplet.</p>
         </div>
         <div>
             <head>Results</head>
           
               
               <p>Working from our original map of over a dozen potential metaphors (<ref  target="#figure02">Figure 2</ref>), we
                    selected the following short list for further investigation. We wanted to have a
                    variety of items that were distinct from each other but were also visually
                    complex in a way that could communicate the stages in droplet training. We
                    thought we should include examples that covered points on a terrain that
                    included the organic and the mechanical, with reference to several disciplines.
                    Finally, we tried to choose examples that could be contained by a common
                    perimeter. Our working list contained the following items:</p>
               <list type="unordered">
                  <item>Ferns — configurations of individual organic pieces that form larger
                        items</item>
                  <item>Snowflakes — a single solid unique configuration that relies on
                        symmetry</item>
                  <item>Solar system — individual items in relations suggested by a larger
                        structure</item>
                  <item>Atoms — individual items connected in a more elaborate geometric
                        framework</item>
                  <item>Cells — complex interiors composed of pieces that associate by
                        juxtaposition</item>
                  <item>Clockwork — complex interiors consisting of structures that
                        interconnect</item>
                  <item>Lego™ — geometric shapes with complex surfaces that interconnect</item>
               </list>
               <p>For each of these metaphors, we developed sketches for four different states of
                    the droplet: untrained, trained, trained with multiple display options chosen,
                    and trained with multiple display and two different organization options. Our
                    goal in each case was to make the different states visually distinct at every
                    level of magnification, and to make the number of display and organization
                    options obvious at the largest size.</p>
               <figure xml:id="figure02">
                  <graphic url="resources/images/figure02.jpg"/>
                  <figDesc>Concept map with the word 'droplets' in the center.</figDesc>
                  <dhq:caption>Our concept map of possible droplet metaphors shows a wide range of
                            candidate real-world objects that combine visual complexity with a
                            compact form.</dhq:caption>
               </figure>
               <p>We chose these various states because they represent significant choices made by
                    the user. It would also be possible to consider visually representing choices
                    the user makes about what collection to work with in the first place, which may
                    be one of the most significant choices the user makes. However, visually
                    representing collections is definitely a challenge, and it may be preferable to
                    provide information about the collection in the form of text labels.</p>
           
            <div>
               <head>Ferns</head>
               <p>A fern is a fractal, which means it repeats its morphology at increasing scales
                    (<ref  target="#figure03">Figure 3</ref>). We might adopt this strategy for two scales, where in the unfolding
                    fern leaf, the individual leaflets represent functions and the entire leaf
                    represents the complete, organized droplet.</p>
               <p>We can use the stem to represent the software training, and the leaflets to
                    represent the other functions. This strategy has the benefit of looking minimal
                    when no display or organization functions are chosen, which may prompt the user
                    to want to choose more sophisticated configurations of options.</p>
               <p>If we also assume that the two sides of the stem represent two kinds of
                    organization, then having all the display items on one side of the stem would
                    indicate only one kind of sorting, while dividing display items on both sides of
                    the stem would indicate two kinds of sorting.</p>
               <figure xml:id="figure03">
                  <graphic url="resources/images/figure03.jpg"/>
                  <figDesc>Five yellow droplets with various types of leaves inside.</figDesc>
                  <dhq:caption>The placement of leaflets along the stem of the fern leaf allows us
                            to express the user choices starting with an empty droplet (left), then
                            sequentially adding training data, display choices, then organization in
                            one way and in two ways.</dhq:caption>
               </figure>
               <p>Reading the sequence from left to right, we show first an untrained or empty
                    droplet. The next version shows a droplet that has been trained by the user.
                    Taking one of the demonstration projects as an example, this second droplet
                    might contain the results of training the system to recognize poems by Emily
                    Dickinson with an erotic charge, using a naïve Bayesian algorithm. The third
                    version shows this same trained droplet with seven items chosen for display. In
                    the case of the Dickinson collection, these items might include the poem’s title
                    (often the first line), the date of first publication, the place of publication,
                    the name of the publisher, the number of lines in the poem, the number of words
                    in the poem, the number of key features found in the poem related to eroticism,
                    and the numeric score assigned by the system for the poem in terms of its erotic
                    charge. The fourth version would represent the same information about each poem,
                    but organize the results in some way — perhaps by the numeric rating assigned by
                    the system. The fifth and final version would show the items arranged in two
                    ways — first by numeric rating, and chronologically within that.</p>
               <p>The organic nature of the fern droplet may lead to some difficulties for the user
                    in that a growth process for a fern is not the same as selection among various
                    options by a user defining a droplet. The use of this organic metaphor, however,
                    does suggest another possibility — would it be interesting to indicate how long it
                    has been since someone used a droplet? Do the droplets visibly age when they
                    aren’t used? Does new use refresh the appearance of the droplet? Would people be
                    encouraged to experiment with strange droplets because they are obviously drying
                    up or deteriorating?</p>
            </div>
            <div>
               <head>Snowflakes</head>
               <p>Ferns suggest quite a regular form of arrangement, which means there is little
                    meaningful variation possible between different droplets. Snowflakes also tend
                    to symmetry, but each is unique. They combine a complex silhouette with a
                    compact form (<ref  target="#figure04">Figure 4</ref>). Variations in the details comprising the silhouette
                    could therefore be used to communicate a wide range of functions. </p>
               <p>However, the strong visual language of the snowflake may prove to be difficult to
                    repurpose as a meaningful channel of communication. The fact that each snowflake
                    is supposed to be unique also means that there is no basic, restricted
                    vocabulary of shapes to draw on in their construction.</p>
               <figure xml:id="figure04">
                  <graphic url="resources/images/figure04.jpg"/>
                  <figDesc>Five blue snowflakes with varying decorations inside.</figDesc>
                  <dhq:caption>Each snowflake is a unique visual object, which allows us to
                            differentiate one droplet from another, but introduces a difficulty in
                            that there is no simple method of re-using recognizable
                            components.</dhq:caption>
               </figure>
               <p>Our draft solution in this case is to treat the visual complexity of the interior
                    of the object as the measure of the state of the droplet. Unlike our other
                    designs, which involve composites of countable objects, the snowflake droplets
                    indicate each condition by filling in spaces that are otherwise unarticulated.
                </p>
            </div>
            <div>
               <head>Solar System</head>
               <p>Objects in the solar system create a composite object where the individual items
                    are in relation to one another but not in immediate contact (<ref  target="#figure05">Figure 5</ref>). The
                    central position of the sun also serves to imply the centrality of the software
                    training. A solar system without a sun is clearly incomplete.</p>
               <figure xml:id="figure05">
                  <graphic url="resources/images/figure05.jpg"/>
                  <figDesc>Five sample solar system droplets with orbiting objects in different locations.</figDesc>
                  <dhq:caption>The solar system, with its objects in orbit, provides a structure
                            that can be progressively filled with planetary dots that represent
                            choices of representation, while location on the orbits is used to
                            indicate organization.</dhq:caption>
               </figure>
               <p>Another potential difficulty with several of the designs, including the solar
                    system, is that they may suggest a degree of order and regularity which may be
                    somewhat at odds with the experience of the scholar using data mining
                    techniques. Using a data mining system can actually involve an iterative and
                    somewhat <q>messy</q> experimentation with various options.</p>
            </div>
            <div>
               <head>Atomic</head>
               <p>Our starting point for the atomic droplets are the simple models that consist of
                    electrons in elliptical orbits around a nucleus (<ref  target="#figure06">Figure 6</ref>). The nucleus is
                    filled in during the training phase, while the inclusion of electrons and their
                    locations represent choices about item representation and organization.</p>
               <figure xml:id="figure06">
                  <graphic url="resources/images/figure06.jpg"/>
                  <figDesc>Five sample atoms with particles in various locations.</figDesc>
                  <dhq:caption>Atomic models provide a vocabulary for expressing the components of
                            the droplets, consisting of individual items connected to each
                            other.</dhq:caption>
               </figure>
            </div>
            <div>
               <head>Cells</head>
               <p>A cell has an interior that is populated with a number of distinct individual
                    items and structures (<ref  target="#figure07">Figure 7</ref>). Cells therefore provide a compact metaphor
                    based on the complexities of the interior of the droplet. We also have available
                    for future exploration the single-celled organisms, such as the paramecium,
                    which combine this interior complexity with an exterior with some communicative
                    potential.</p>
               <p>Cells also suggest an organic form, which may help to counterbalance the highly
                    technical profile of data mining in the humanities.</p>
               <figure xml:id="figure07">
                  <graphic url="resources/images/figure07.jpg"/>
                  <figDesc>Five purple cells with varying arrangements of inner parts.</figDesc>
                  <dhq:caption>A cell is neither an aggregate nor does it have a complex
                            silhouette. Its communicative potential consists instead of a rich
                            interior of organic shapes, including individual items and structures
                            that divide, enclose, and support them.</dhq:caption>
               </figure>
            </div>
            <div>
               <head>Clockwork</head>
               <p>A clockwork is a complex interior like a cell, without the suggestion of the
                    organic (<ref  target="#figure08">Figure 8</ref>). There is a high degree of interconnection of the parts
                    inside a clock, implying that all the parts are necessary in order for it to
                    work. This level of constraint on what is necessary and what is optional might
                    not be appropriate in the context of data mining, but the operational nature of
                    the clock and the implied association with the mathematical operations
                    underlying data mining may make it particularly appropriate.</p>
               <p>The variety of interior components also provides a potentially rich visual
                    vocabulary for representing the different aspects of the droplets. Finally, we
                    have used an external outline suggestive of clock gears, in order to allow a
                    direct visual association to the mechanical, even for the untrained form of the
                    droplet.</p>
               <figure xml:id="figure08">
                  <graphic url="resources/images/figure08.jpg"/>
                  <figDesc>Five sample clock droplets with varying arrangements of gears.</figDesc>
                  <dhq:caption>Like a cell, a clockwork shows a rich internal landscape that can
                            be used to represent a variety of functions. Clockworks are mechanical
                            rather than organic, and therefore suggest interconnection, rather than
                            isolation of the functions.</dhq:caption>
               </figure>
            </div>
            <div>
               <head>Lego™</head>
               <p>With Lego, there are a set number of individual shapes that are aggregated. With
                    this metaphor, we can use the external contour of the composite droplet (<ref  target="#figure09">Figure 9</ref>). We can distinguish by size between more and less important functions, so the
                    central training can be indicated by large Lego piece, while the display
                    functions are secondary and the organization functions tertiary.</p>
               <p>Lego also comes with the affordance of assembling the separate pieces into
                    different configurations. The user could distinguish between similar droplets by
                    taking advantage of different kinds of arrangement.</p>
               <figure xml:id="figure09">
                  <graphic url="resources/images/figure09.jpg"/>
                  <figDesc>Five sample Lego™ pieces built in different designs.</figDesc>
                  <dhq:caption>Lego™ suggests a method of combining separate items to create a new
                            whole. For our purposes, each individual piece of Lego would stand
                            either for the result of software training or for a choice of
                            representation or organization.</dhq:caption>
               </figure>
            </div>
         </div>
         <div>
            <head>Conclusions and Future Research</head>
            <p>Having identified a range of possibilities, our next step will be to present them to
                potential users in order to collect measures of performance and preference. By
                placing them in the interactive context of a prototype environment, we will be able
                to examine how humanities scholars respond to the various affordances. The goals of
                this phase will be to determine whether participants are able to make the necessary
                intuitive leaps to understand the intended communicative aspects of each of the
                droplet designs. Once we’ve established a smaller subset of droplets, we will
                proceed by expanding the visual positioning or skinning of each droplet type, in
                order to determine how humanities scholars respond to various semantic differentials
                such as glossy/rough, technological/natural, geometric/organic, and colour/grey
                scale. By determining how potential users of the data mining system perceive the
                design dimensions of the droplets, we will be able to decide to what extent this
                strategy can prove beneficial in removing barriers to them adopting the system. One
                possibility may consist of the use of a hybrid form of droplets, where different
                visual components are assembled in a kind of toolkit. Our eventual decisions with
                respect to the design of the droplets may also be usefully repurposed to inform the
                visual aspects of the design of the entire system.</p>
         </div>
         <div>
            <head>Acknowledgements</head>
            <p>The authors wish to thanks the many members of the NORA project research team for
                their contributions to this work. Their names can be found at
                <ref target="http://www.noraproject.org/team.php">http://www.noraproject.org/team.php</ref>. We would also like to acknowledge the generous support
                of the Andrew W. Mellon Foundation, the Social Sciences and Humanities Research
                Council of Canada, the Natural Sciences and Engineering Council of Canada, and the
                Canadian Foundation for Innovation.</p>
         </div>
      </body>
      <back>
         <listBibl>
            <bibl xml:id="bederson2001" label="Bederson 2001"> 
               <author>B. B. Bederson</author>. <title rend="quotes">PhotoMesa: a zoomable image browser using quantum treemaps and
                bubblemaps.</title> 
               <title>Proceedings of the 14th annual ACM symposium on User interface software and technology.</title> pp. 71-80, <date>2001</date>.</bibl>
            <bibl xml:id="bumgardner2005" label="Bumgardner et al. 2005"> 
               <author>J.
                Bumgardner</author>. <title>Flickr Colour Fields Experimental Colr Pickr</title>, 
                <date>2005</date>.</bibl>
            <bibl xml:id="dillon2001" label="Dillon 2001">
               <author>A. Dillon</author>. <title rend="quotes">Beyond usability: process, outcome and affect in human-computer
                interactions.</title> 
               <title>Canadian Journal of Library and Information Science</title>, <biblScope type="vol">26</biblScope>(4), 57-69, <date>2001</date>.</bibl>
            <bibl xml:id="feldman1997" label="Feldman et al. 1997"> 
               <author>R. Feldman</author> and <author>H.
                Hirsh</author>. 
            <title rend="quotes">Finding Associations in Collections of Text.</title> In
                <editor>Michalski, R.S., Bratko, I. and Kubat, M.</editor>, 
            <title rend="italic">Machine Learning and Data Mining: Methods and Applications</title>.
                <pubPlace>NY</pubPlace>: <publisher>J. Wiley</publisher>, 223-240.
                <date>1997</date>.</bibl>
            <bibl xml:id="gibson1979" label="Gibson 1979">
               <author>J. J. Gibson</author>. <title rend="italic">The
                Ecological Approach to Visual Perception.</title> 
               <pubPlace>Boston</pubPlace>: <publisher>Houghton-Mifflin</publisher>,
            <date>1979</date>.</bibl>
            <bibl xml:id="hascoët1998" label="Hascoët et al. 1998"> 
               <author>M. Hascoët</author> and <author>X.
                Soinard</author>. <title rend="quotes">Using maps as a user interface to a digital
                library.</title>
               <title>Proceedings of the 21st annual international ACM SIGIR conference on Research and
                development in information retrieval</title>, 339-340. <ref target="http://doi.acm.org/10.1145/290941.291028">http://doi.acm.org/10.1145/290941.291028</ref>, <date>1998</date>.</bibl>
            <bibl xml:id="hockey2000" label="Hockey 2000">
               <author>S. Hockey</author> 
               <title rend="italic">Electronic
                Texts in the Humanities</title>. <pubPlace>Oxford</pubPlace>: <publisher>Oxford
                University Press</publisher>, <date>2000</date>.</bibl>
            <bibl xml:id="horn1998" label="Horn 1998"> 
               <author>R. E. Horn</author>. <title>Visual
                Language: Global Communication for the 21st Century</title>. MacroVU, Inc.
            Bainbridge Island, WA, <date>1998</date>.</bibl>
            <bibl xml:id="horton" label="Horton et al. 2006"> 
               <author>T.
          Horton</author>, <author>K. Taylor</author>, <author>B. Yu</author>, and <author>X.
            Xiang</author>. <title rend="quotes">
                  <emph>Quite right, dear, and interesting</emph>:
              Seeking the Sentimental in Nineteenth Century American Fiction.</title>
               <title>Digital Humanities 2006</title>. Paris. Sorbonne, 5-9 July,
          <date>2006</date>, 81-82.</bibl>
            <bibl xml:id="karvonen2000" label="Karvonen 2000">
               <author>K. Karvonen</author>. <title rend="quotes">The beauty of simplicity.</title>
               <title>Proceedings of the 2000 conference on Universal Usability</title>. November,
                <date>2000</date>.</bibl>
            <bibl xml:id="kirschenbaum" label="Kirschenbaum et al. 2006"> 
               <author>M.
          Kirschenbaum</author>, 
          <author>C. Plaisant</author>, <author>M. N. Smith</author>, <author>L.
            Auvil</author>, <author>J. Rose</author>, <author>B. Yu</author>, and <author>T.
              Clement</author>. 
          <title rend="quotes">
                  <emph>Undiscovered Public Knowledge</emph>: Mining for Patterns of
            Erotic Language in Emily Dickinson's Correspondence with Susan Huntington (Gilbert)
            Dickinson</title>. <title>Digital Humanities 2006</title>. Paris. Sorbonne, 5-9
          <date>2006</date>, 252-255.</bibl>
            <bibl xml:id="nielsen2000" label="Nielsen 2000">
               <author>J.
                Nielsen</author>. <title rend="italic">Designing web usability: The practice of simplicity.
                Indianapolis</title>, <pubPlace>IN</pubPlace>: <publisher>New Riders</publisher>,
                <date>2000</date>.</bibl>
            <bibl xml:id="pirolli1996" label="Pirolli et al. 1996">
               <author>P. Pirolli</author>, 
            <author>P. Schank</author>, 
            <author>M. Hearst</author>, and 
            <author>C. Diehl</author>. <title rend="quotes">Scatter/Gather Browsing Communicates the
                Topic Structure of a Very Large Text Collection.</title>
               <title>Proceedings of the SIGCHI conference on Human factors in computing systems:
                common ground</title>, 213-220. <date>1996</date>.</bibl>
            <bibl xml:id="plaisant2004" label="Plaisant 2004"> 
               <author>C. Plaisant</author>. <title rend="quotes">The Challenge of Information Visualization Evaluation.</title>
               <title>IEEE Proc. of AVI 2004.</title>, <date>2004</date>.</bibl>
            <bibl xml:id="ramsay" label="Ramsay and Steger 2006"> 
               <author>S. Ramsay</author> and
          <author>S. Steger</author>. <title rend="quotes">Distinguished Speakers: Keyword
            Extraction and Critical Analysis with Virginia Woolf's <title rend="italic">The
              Waves</title>.</title>
               <title>Digital Humanities 2006.</title> Paris. Sorbonne, 5-9 July,
          <date>2006</date>, 255-257.</bibl>
            <bibl xml:id="ramsay2003" label="Ramsay 2003"> 
               <author>S. Ramsay</author>. <title rend="quotes">Toward an Algorithmic Criticism.</title>
               <title rend="italic">Literary and Linguistic Computing</title>. (<biblScope type="vol">18</biblScope>) 2, 167-174,
                <date>2003</date>.</bibl>
            <bibl xml:id="rodden2001" label="Rodden et al. 2001">
               <author>K. Rodden</author>, 
            <author>W. Basalaj</author>, 
            <author>D. Sinclair</author>, and
            <author>K. Wood</author>. <title rend="quotes">Does Organisation by Similarity Assist
                Image Browsing.</title> In <title rend="italic">Proceedings of Human Factors in Computing Systems
                (CHI 2001) ACM Press</title>, pp. 190-197, <date>2001</date>.</bibl>
            <bibl xml:id="ruecker2006" label="Ruecker et al."> 
               <author>S. Ruecker</author>, <author>M.
                Radzikowska</author>, and
            <author>S. Sinclair</author>. <title rend="quotes">Communicating Process with Form:
                Designing the Visual Morphology of the Nora Data Mining Kernels.</title>
               <title>Proceedings of CaSTA 2006. Breadth of Text: A Joint Computer Science and
                Humanities Computing Conference.</title> Fredericton, New Brunswick. October 11-14.
             57-68, <date>2006</date>. </bibl>
            <bibl xml:id="ruecker2005" label="Ruecker et al. 2005"> 
               <author>S. Ruecker</author>, 
            <author>L. M. Given</author>, 
            <author>B. Sadler</author>, and 
            <author>A. Ruskin</author>. <title rend="quotes">Building Accessible Web Interfaces for
                Seniors: Similarity Clustering of Pill Images.</title>
               <title>Include 2005. London. Helen Hamlyn Institute. Royal College of Art.</title> April
            5-8, 2005, <date>2005</date>.</bibl>
            <bibl xml:id="ruecker2003" label="Ruecker 2003"> 
               <author>S.
                Ruecker</author>. <title rend="italic">Affordances of prospect for academic users of
                interpretively-tagged text collections.</title> Unpublished doctoral dissertation.
            University of Alberta, Edmonton, Alberta, Canada, <date>2003</date>.</bibl>
            <bibl xml:id="shneiderman2001" label="Shneiderman 2001"> 
               <author>B.
                Shneiderman</author>. <title rend="quotes">Inventing Discovery Tools: Combining
                Information Visualization with Data Mining.</title>
               <title>Keynote for Discovery Science 2001 Conference</title>, November 25-28,
            Washington, DC, <date>2001</date>.</bibl>
            <bibl xml:id="small1996" label="Small 1996"> 
               <author>D. Small</author>. <title rend="quotes">Navigating large bodies of text.</title>
               <title rend="italic">IBM Systems Journal</title>. <biblScope type="vol">35</biblScope>:3–4, <date>1996</date>.</bibl>
            <bibl xml:id="unsworth2005" label="Unsworth 2005"> 
               <author>J. Unsworth</author>. <title rend="quotes">New Methods for Humanities Research.</title>
               <title>The 2005 Lyman Award Lecture. November 11. National Humanities Center. Research
                Triangle Park, NC.</title>
               <ptr target="http://www3.isrl.uiuc.edu/~unsworth/lyman.htm"/>, <date>2005</date>.</bibl>
            <bibl xml:id="unsworth2004" label="Unsworth 2004"> 
               <author>J. Unsworth</author>. <title rend="quotes">Forms of Attention: Digital Humanities Beyond Representation.</title>
            A paper delivered at <title>The Face of Text: Computer-Assisted Text Analysis in the
                Humanities</title>. The third conference of the Canadian Symposium on Text Analysis
            (CaSTA), McMaster University, November 19-21, <date>2004</date>.</bibl>
            <bibl xml:id="vicente2002" label="Vicente 2002"> 
               <author>K.J. Vicente</author>. <title rend="quotes">Ecological Interface Design: Progress and Challenges.</title>
               <title rend="italic">Human Factors</title>. <biblScope type="vol">44</biblScope>.1:62–78, <date>2002</date>.</bibl>
            <bibl xml:id="weiss2005" label="Weiss et al. 2005"> 
               <author>S. Weiss</author>, 
            <author>N. Indurkhya</author>, 
            <author>T. Zhang</author>, and 
            <author>F. Damerau</author>. <title rend="italic">Text Mining: Predictive Methods for Analyzing
                Unstructured Information</title>. <pubPlace>New York</pubPlace>:
                <publisher>Springer</publisher>, <date>2005</date>.</bibl>
         </listBibl>
      </back>
   </text>
</TEI>
