@article{hider_search_2018, title = {The {Search} {Value} {Added} by {Professional} {Indexing} to a {Bibliographic} {Database}}, volume = {45}, issn = {0943-7444}, abstract = {Gross et al. (2015) have demonstrated that about a quarter of hits would typically be lost to keyword searchers if contemporary academic library catalogs dropped their controlled subject headings. This article re- ports on an investigation of the search value that subject descriptors and identifiers assigned by professional indexers add to a bibliographic database, namely the Australian Education Index (AEI). First, a similar methodology to that developed by Gross et al. (2015) was applied, with keyword searches representing a range of educational topics run on the AEI database with and without its subject indexing. The results indicated that AEI users would also lose, on average, about a quarter of hits per query. Second, an alternative research design was applied in which an experienced literature searcher was asked to find resources on a set of educational topics on an AEI database stripped of its subject indexing and then asked to search for additional resources on the same topics after the subject indexing had been reinserted. In this study, the proportion of additional resources that would have been lost had it not been for the subject indexing was again found to be about a quarter of the total resources found for each topic, on average.}, language = {en}, number = {1}, journal = {Official Journal of the International Society for Knowledge Organization}, author = {Hider, Philip}, year = {2018}, pages = {23--32}, } @article{hudon_introduction._2017, title = {Introduction. {La} classification à facettes revisitée. {De} la théorie à la pratique}, volume = {13}, issn = {1622-1494}, url = {http://www.cairn.info/resume.php?ID_ARTICLE=LCN_131_0009}, language = {fr}, number = {1}, urldate = {2017-04-13}, journal = {Les Cahiers du numérique}, author = {Hudon, Michèle and Mustafa El Hadi, Widad}, month = apr, year = {2017}, pages = {9--24}, } @article{godert_ontology-based_2016, title = {An ontology-based model for indexing and retrieval}, volume = {67}, copyright = {© 2015 ASIS\&T}, issn = {2330-1643}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.23420}, doi = {10.1002/asi.23420}, abstract = {The presented ontology-based model for indexing and retrieval combines the methods and experiences of traditional indexing languages with their cognitively interpreted entities and relationships with the strengths and possibilities of formal knowledge representation. The core component of the model uses inferences along the paths of typed relations between the entities of a knowledge representation for enabling the determination of result sets in the context of retrieval processes. A proposal for a general, but condensed, inventory of typed relations is given. The entities are arranged in aspect-oriented facets to ensure a consistent hierarchical structure. The possible consequences for indexing and retrieval are discussed.}, language = {en}, number = {3}, urldate = {2019-01-25}, journal = {Journal of the Association for Information Science and Technology}, author = {Gödert, Winfried}, year = {2016}, pages = {594--609}, } @article{kelly_systematic_2013, title = {A systematic review of interactive information retrieval evaluation studies, 1967–2006}, volume = {64}, issn = {1532-2890}, url = {http://dx.doi.org/10.1002/asi.22799}, doi = {10.1002/asi.22799}, abstract = {With the increasing number and diversity of search tools available, interest in the evaluation of search systems, particularly from a user perspective, has grown among researchers. More researchers are designing and evaluating interactive information retrieval (IIR) systems and beginning to innovate in evaluation methods. Maturation of a research specialty relies on the ability to replicate research, provide standards for measurement and analysis, and understand past endeavors. This article presents a historical overview of 40 years of IIR evaluation studies using the method of systematic review. A total of 2,791 journal and conference units were manually examined and 127 articles were selected for analysis in this study, based on predefined inclusion and exclusion criteria. These articles were systematically coded using features such as author, publication date, sources and references, and properties of the research method used in the articles, such as number of subjects, tasks, corpora, and measures. Results include data describing the growth of IIR studies over time, the most frequently occurring and cited authors and sources, and the most common types of corpora and measures used. An additional product of this research is a bibliography of IIR evaluation research that can be used by students, teachers, and those new to the area. To the authors' knowledge, this is the first historical, systematic characterization of the IIR evaluation literature, including the documentation of methods and measures used by researchers in this specialty.}, language = {en}, number = {4}, journal = {Journal of the American Society for Information Science and Technology}, author = {Kelly, Diane and Sugimoto, Cassidy R.}, month = apr, year = {2013}, pages = {745--770}, } @article{hjorland_facet_2013, title = {Facet analysis: {The} logical approach to knowledge organization}, volume = {49}, issn = {0306-4573}, url = {http://www.sciencedirect.com/science/article/pii/S0306457312001203}, doi = {10.1016/j.ipm.2012.10.001}, abstract = {The facet-analytic paradigm is probably the most distinct approach to knowledge organization within Library and Information Science, and in many ways it has dominated what has be termed “modern classification theory”. It was mainly developed by S.R. Ranganathan and the British Classification Research Group, but it is mostly based on principles of logical division developed more than two millennia ago. Colon Classification (CC) and Bliss 2 (BC2) are among the most important systems developed on this theoretical basis, but it has also influenced the development of other systems, such as the Dewey Decimal Classification (DDC) and is also applied in many websites. It still has a strong position in the field and it is the most explicit and “pure” theoretical approach to knowledge organization (KO) (but it is not by implication necessarily also the most important one). The strength of this approach is its logical principles and the way it provides structures in knowledge organization systems (KOS). The main weaknesses are (1) its lack of empirical basis and (2) its speculative ordering of knowledge without basis in the development or influence of theories and socio-historical studies. It seems to be based on the problematic assumption that relations between concepts are a priori and not established by the development of models, theories and laws.}, language = {en}, number = {2}, journal = {Information Processing \& Management}, author = {Hjørland, Birger}, month = mar, year = {2013}, pages = {545--557}, } @article{broughton_faceted_2013, title = {Faceted classification as a general theory for knowledge organization}, volume = {50}, abstract = {The Classification Research Group manifesto of 1955, 'Faceted classification as the basis of all information retrieval', has been at least in part achieved, and there is much evidence of faceted classification influencing a whole range of modern information retrieval tools. This paper examines the theory underlying faceted classification, how and why it has been taken up so widely, and what benefits it brings to the activity of knowledge organization. The role of facet analysis as a general research tool is also considered, and how it compares with other content analysis tools as a means of modelling subject domains.}, language = {en}, number = {6}, journal = {SRELS Journal of Information Management}, author = {Broughton, Vanda}, year = {2013}, pages = {735--750}, } @article{kules_influence_2012, title = {Influence of training and stage of search on gaze behavior in a library catalog faceted search interface}, volume = {63}, copyright = {© 2011 ASIS\&T}, issn = {1532-2890}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.21647}, doi = {10.1002/asi.21647}, abstract = {This study examined how searchers interact with a web-based, faceted library catalog when conducting exploratory searches. It applied multiple methods, including eye tracking and stimulated recall interviews, to investigate important aspects of faceted search interface use, specifically: (a) searcher gaze behavior—what components of the interface searchers look at; (b) how gaze behavior differs when training is and is not provided; (c) how gaze behavior changes as searchers become familiar with the interface; and (d) how gaze behavior differs depending on the stage of the search process. The results confirm previous findings that facets account for approximately 10–30\% of interface use. They show that providing a 60-second video demonstration increased searcher use of facets. However, searcher use of the facets did not evolve during the study session, which suggests that searchers may not, on their own, rapidly apply the faceted interfaces. The findings also suggest that searcher use of interface elements varied by the stage of their search during the session, with higher use of facets during decision-making stages. These findings will be of interest to librarians and interface designers who wish to maximize the value of faceted searching for patrons, as well as to researchers who study search behavior.}, language = {en}, number = {1}, urldate = {2018-08-03}, journal = {Journal of the American Society for Information Science and Technology}, author = {Kules, Bill and Capra, Robert}, month = jan, year = {2012}, pages = {114--138}, } @inproceedings{clarke_comparative_2011, address = {New York, NY, USA}, series = {{WSDM} '11}, title = {A {Comparative} {Analysis} of {Cascade} {Measures} for {Novelty} and {Diversity}}, isbn = {978-1-4503-0493-1}, url = {http://doi.acm.org/10.1145/1935826.1935847}, doi = {10.1145/1935826.1935847}, abstract = {Traditional editorial effectiveness measures, such as nDCG, remain standard for Web search evaluation. Unfortunately, these traditional measures can inappropriately reward redundant information and can fail to reflect the broad range of user needs that can underlie a Web query. To address these deficiencies, several researchers have recently proposed effectiveness measures for novelty and diversity. Many of these measures are based on simple cascade models of user behavior, which operate by considering the relationship between successive elements of a result list. The properties of these measures are still poorly understood, and it is not clear from prior research that they work as intended. In this paper we examine the properties and performance of cascade measures with the goal of validating them as tools for measuring effectiveness. We explore their commonalities and differences, placing them in a unified framework; we discuss their theoretical difficulties and limitations, and compare the measures experimentally, contrasting them against traditional measures and against other approaches to measuring novelty. Data collected by the TREC 2009 Web Track is used as the basis for our experimental comparison. Our results indicate that these measures reward systems that achieve an balance between novelty and overall precision in their result lists, as intended. Nonetheless, other measures provide insights not captured by the cascade measures, and we suggest that future evaluation efforts continue to report a variety of measures.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the {Fourth} {ACM} {International} {Conference} on {Web} {Search} and {Data} {Mining}}, publisher = {ACM}, author = {Clarke, Charles L.A. and Craswell, Nick and Soboroff, Ian and Ashkan, Azin}, year = {2011}, pages = {75--84}, } @inproceedings{vieira_query_2011, address = {Washington, DC, USA}, series = {{ICDE} '11}, title = {On {Query} {Result} {Diversification}}, isbn = {978-1-4244-8959-6}, url = {http://dx.doi.org/10.1109/ICDE.2011.5767846}, doi = {10.1109/ICDE.2011.5767846}, abstract = {In this paper we describe a general framework for evaluation and optimization of methods for diversifying query results. In these methods, an initial ranking candidate set produced by a query is used to construct a result set, where elements are ranked with respect to relevance and diversity features, i.e., the retrieved elements should be as relevant as possible to the query, and, at the same time, the result set should be as diverse as possible. While addressing relevance is relatively simple and has been heavily studied, diversity is a harder problem to solve. One major contribution of this paper is that, using the above framework, we adapt, implement and evaluate several existing methods for diversifying query results. We also propose two new approaches, namely the Greedy with Marginal Contribution (GMC) and the Greedy Randomized with Neighborhood Expansion (GNE) methods. Another major contribution of this paper is that we present the first thorough experimental evaluation of the various diversification techniques implemented in a common framework. We examine the methods' performance with respect to precision, running time and quality of the result. Our experimental results show that while the proposed methods have higher running times, they achieve precision very close to the optimal, while also providing the best result quality. While GMC is deterministic, the randomized approach (GNE) can achieve better result quality if the user is willing to tradeoff running time.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 2011 {IEEE} 27th {International} {Conference} on {Data} {Engineering}}, publisher = {IEEE Computer Society}, author = {Vieira, Marcos R. and Razente, Humberto L. and Barioni, Maria C. N. and Hadjieleftheriou, Marios and Srivastava, Divesh and Traina, Caetano and Tsotras, Vassilis J.}, year = {2011}, pages = {1163--1174}, } @article{fagan_usability_2010, title = {Usability studies of faceted browsing : a literature review}, volume = {29}, copyright = {Copyright (c) 2015 Information Technology and Libraries}, issn = {2163-5226}, shorttitle = {Usability {Studies} of {Faceted} {Browsing}}, url = {http://ejournals.bc.edu/ojs/index.php/ital/article/view/3144}, doi = {10.6017/ital.v29i2.3144}, abstract = {Faceted browsing is a common feature of new library catalog interfaces. But to what extent does it improve user performance in searching within today’s library catalog systems? This article reviews the literature for user studies involving faceted browsing and user studies of “next-generation” library catalogs that incorporate faceted browsing. Both the results and the methods of these studies are analyzed by asking, What do we currently know about faceted browsing? How can we design better studies of faceted browsing in library catalogs? The article proposes methodological considerations for practicing librarians and provides examples of goals, tasks, and measurements for user studies of faceted browsing in library catalogs.}, language = {en}, number = {2}, urldate = {2016-08-17}, journal = {Information Technology and Libraries}, author = {Fagan, Jody Condit}, month = jun, year = {2010}, pages = {58--66}, } @article{hjorland_foundation_2010, title = {The foundation of the concept of relevance}, volume = {61}, copyright = {© 2009 ASIS\&T}, issn = {1532-2890}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.21261}, doi = {10.1002/asi.21261}, abstract = {In 1975 Tefko Saracevic declared “the subject knowledge view” to be the most fundamental perspective of relevance. This paper examines the assumptions in different views of relevance, including “the system's view” and “the user's view” and offers a reinterpretation of these views. The paper finds that what was regarded as the most fundamental view by Saracevic in 1975 has not since been considered (with very few exceptions). Other views, which are based on less fruitful assumptions, have dominated the discourse on relevance in information retrieval and information science. Many authors have reexamined the concept of relevance in information science, but have neglected the subject knowledge view, hence basic theoretical assumptions seem not to have been properly addressed. It is as urgent now as it was in 1975 seriously to consider “the subject knowledge view” of relevance (which may also be termed “the epistemological view”). The concept of relevance, like other basic concepts, is influenced by overall approaches to information science, such as the cognitive view and the domain-analytic view. There is today a trend toward a social paradigm for information science. This paper offers an understanding of relevance from such a social point of view.}, language = {en}, number = {2}, urldate = {2018-04-25}, journal = {Journal of the American Society for Information Science and Technology}, author = {Hjørland, Birger}, month = feb, year = {2010}, pages = {217--237}, } @article{la_barre_facet_2010, title = {Facet analysis}, volume = {44}, issn = {1550-8382}, url = {http://onlinelibrary.wiley.com/doi/10.1002/aris.2010.1440440113/abstract}, language = {en}, number = {1}, urldate = {2017-01-27}, journal = {Annual Review of Information Science and Technology}, author = {La Barre, Kathryn}, month = jan, year = {2010}, pages = {243--284}, } @book{buttcher_information_2010, title = {Information {Retrieval}: {Implementing} and {Evaluating} {Search} {Engines}}, isbn = {978-0-262-52887-0}, shorttitle = {Information {Retrieval}}, abstract = {Information retrieval is the foundation for modern search engines. This textbook offers an introduction to the core topics underlying modern search technologies, including algorithms, data structures, indexing, retrieval, and evaluation. The emphasis is on implementation and experimentation; each chapter includes exercises and suggestions for student projects. Wumpus -- a multiuser open-source information retrieval system developed by one of the authors and available online -- provides model implementations and a basis for student work. The modular structure of the book allows instructors to use it in a variety of graduate-level courses, including courses taught from a database systems perspective, traditional information retrieval courses with a focus on IR theory, and courses covering the basics of Web retrieval. In addition to its classroom use, Information Retrieval will be a valuable reference for professionals in computer science, computer engineering, and software engineering.}, language = {en}, publisher = {MIT Press}, author = {Büttcher, Stefan and Clarke, Charles L. A. and Cormack, Gordon V.}, year = {2010}, } @article{robertson_probabilistic_2009, title = {The {Probabilistic} {Relevance} {Framework}: {BM25} and {Beyond}}, volume = {3}, issn = {1554-0669, 1554-0677}, shorttitle = {The {Probabilistic} {Relevance} {Framework}}, url = {https://www.nowpublishers.com/article/Details/INR-019}, doi = {10.1561/1500000019}, abstract = {The Probabilistic Relevance Framework (PRF) is a formal framework for document retrieval, grounded in work done in the 1970–1980s, which led to the development of one of the most successful text-retrieval algorithms, BM25. In recent years, research in the PRF has yielded new retrieval models capable of taking into account document meta-data (especially structure and link-graph information). Again, this has led to one of the most successful Web-search and corporate-search algorithms, BM25F. This work presents the PRF from a conceptual point of view, describing the probabilistic modelling assumptions behind the framework and the different ranking algorithms that result from its application: the binary independence model, relevance feedback models, BM25 and BM25F. It also discusses the relation between the PRF and other statistical models for IR, and covers some related topics, such as the use of non-textual features, and parameter optimisation for models with free parameters.}, language = {en}, number = {4}, urldate = {2019-01-18}, journal = {Foundations and Trends® in Information Retrieval}, author = {Robertson, Stephen and Zaragoza, Hugo}, month = dec, year = {2009}, pages = {333--389}, } @article{radlinski_redundancy_2009, title = {Redundancy, diversity and interdependent document relevance}, volume = {43}, issn = {0163-5840}, url = {http://dl.acm.org/citation.cfm?id=1670564.1670572}, doi = {10.1145/1670564.1670572}, abstract = {The goal of the Redundancy, Diversity, and Interdependent Document Relevance workshop was to explore how ranking, performance assessment and learning to rank can move beyond the assumption that the relevance of a document is independent of other documents. In particular, the workshop focussed on three themes: the effect of redundancy on information retrieval utility (for example, minimizing the wasted effort of users who must skip redundant information), the role of diversity (for example, for mitigating the risk of misinterpreting ambiguous queries), and algorithms for set-level optimization (where the quality of a set of retrieved documents is not simply the sum of its parts). This workshop built directly upon the Beyond Binary Relevance: Preferences, Diversity and Set-Level Judgments workshop at SIGIR 2008 [3], shifting focus to address the questions left open by the discussions and results from that workshop. As such, it was the first workshop to explicitly focus on the related research challenges of redundancy, diversity, and interdependent relevance – all of which require novel performance measures, learning methods, and evaluation techniques. The workshop program committee consisted of 15 researchers from academia and industry, with experience in IR evaluation, machine learning, and IR algorithmic design. Over 40 people attended the workshop. This report aims to summarize the workshop, and also to systematize common themes and key concepts so as to encourage research in the three workshop themes. It contains our attempt to summarize and organize the topics that came up in presentations as well as in discussions, pulling out common elements. Many audience members contributed, yet due to the free-flowing discussion, attributing all the observations to particular audience members is unfortunately impossible. Not all audience members would necessarily agree with the views presented, but we do attempt to present a consensus view as far as possible.}, language = {en}, number = {2}, urldate = {2019-01-27}, journal = {ACM SIGIR Forum}, author = {Radlinski, Filip and Bennett, Paul N. and Carterette, Ben and Joachims, Thorsten}, month = dec, year = {2009}, pages = {46--52}, } @article{liu_learning_2009, title = {Learning to {Rank} for {Information} {Retrieval}}, volume = {3}, issn = {1554-0669, 1554-0677}, url = {https://www.nowpublishers.com/article/Details/INR-016}, doi = {10.1561/1500000016}, abstract = {Learning to rank for Information Retrieval (IR) is a task to automatically construct a ranking model using training data, such that the model can sort new objects according to their degrees of relevance, preference, or importance. Many IR problems are by nature ranking problems, and many IR technologies can be potentially enhanced by using learning-to-rank techniques. The objective of this tutorial is to give an introduction to this research direction. Specifically, the existing learning-to-rank algorithms are reviewed and categorized into three approaches: the pointwise, pairwise, and listwise approaches. The advantages and disadvantages with each approach are analyzed, and the relationships between the loss functions used in these approaches and IR evaluation measures are discussed. Then the empirical evaluations on typical learning-to-rank methods are shown, with the LETOR collection as a benchmark dataset, which seems to suggest that the listwise approach be the most effective one among all the approaches. After that, a statistical ranking theory is introduced, which can describe different learning-to-rank algorithms, and be used to analyze their query-level generalization abilities. At the end of the tutorial, we provide a summary and discuss potential future work on learning to rank.}, language = {en}, number = {3}, urldate = {2019-01-18}, journal = {Foundations and Trends® in Information Retrieval}, author = {Liu, Tie-Yan}, month = jun, year = {2009}, pages = {225--331}, } @article{kelly_methods_2009, title = {Methods for {Evaluating} {Interactive} {Information} {Retrieval} {Systems} with {Users}}, volume = {3}, issn = {1554-0669, 1554-0677}, url = {https://www.nowpublishers.com/article/Details/INR-012}, doi = {10.1561/1500000012}, abstract = {This paper provides overview and instruction regarding the evaluation of interactive information retrieval systems with users. The primary goal of this article is to catalog and compile material related to this topic into a single source. This article (1) provides historical background on the development of user-centered approaches to the evaluation of interactive information retrieval systems; (2) describes the major components of interactive information retrieval system evaluation; (3) describes different experimental designs and sampling strategies; (4) presents core instruments and data collection techniques and measures; (5) explains basic data analysis techniques; and (4) reviews and discusses previous studies. This article also discusses validity and reliability issues with respect to both measures and methods, presents background information on research ethics and discusses some ethical issues which are specific to studies of interactive information retrieval (IIR). Finally, this article concludes with a discussion of outstanding challenges and future research directions.}, language = {en}, number = {1–2}, urldate = {2018-08-07}, journal = {Foundations and Trends® in Information Retrieval}, author = {Kelly, Diane}, month = apr, year = {2009}, pages = {1--224}, } @article{tunkelang_faceted_2009, title = {Faceted search}, volume = {1}, issn = {1947-945X}, url = {http://www.morganclaypool.com/doi/abs/10.2200/s00190ed1v01y200904icr005}, doi = {10.2200/S00190ED1V01Y200904ICR005}, abstract = {We live in an information age that requires us, more than ever, to represent, access, and use information. Over the last several decades, we have developed a modern science and technology for information retrieval, relentlessly pursuing the vision of a "memex" that Vannevar Bush proposed in his seminal article, "As We May Think." Faceted search plays a key role in this program. Faceted search addresses weaknesses of conventional search approaches and has emerged as a foundation for interactive information retrieval. User studies demonstrate that faceted search provides more effective information-seeking support to users than best-first search. Indeed, faceted search has become increasingly prevalent in online information access systems, particularly for e-commerce and site search. In this lecture, we explore the history, theory, and practice of faceted search. Although we cannot hope to be exhaustive, our aim is to provide sufficient depth and breadth to offer a useful resource to both researchers and practitioners. Because faceted search is an area of interest to computer scientists, information scientists, interface designers, and usability researchers, we do not assume that the reader is a specialist in any of these fields. Rather, we offer a self-contained treatment of the topic, with an extensive bibliography for those who would like to pursue particular aspects in more depth.}, language = {en}, number = {1}, urldate = {2017-04-09}, journal = {Synthesis lectures on information concepts, retrieval, and services}, author = {Tunkelang, Daniel}, month = jan, year = {2009}, pages = {1--80}, } @article{white_exploratory_2009, title = {Exploratory {Search}: {Beyond} the {Query}-{Response} {Paradigm}}, volume = {1}, issn = {1947-945X}, shorttitle = {Exploratory {Search}}, url = {https://www.morganclaypool.com/doi/abs/10.2200/S00174ED1V01Y200901ICR003}, doi = {10.2200/S00174ED1V01Y200901ICR003}, abstract = {As information becomes more ubiquitous and the demands that searchers have on search systems grow, there is a need to support search behaviors beyond simple lookup. Information seeking is the process or activity of attempting to obtain information in both human and technological contexts. Exploratory search describes an information-seeking problem context that is open-ended, persistent, and multifaceted, and information-seeking processes that are opportunistic, iterative, and multitactical. Exploratory searchers aim to solve complex problems and develop enhanced mental capacities. Exploratory search systems support this through symbiotic human-machine relationships that provide guidance in exploring unfamiliar information landscapes. Exploratory search has gained prominence in recent years. There is an increased interest from the information retrieval, information science, and human-computer interaction communities in moving beyond the traditional turn-taking interaction model supported by major Web search engines, and toward support for human intelligence amplification and information use. In this lecture, we introduce exploratory search, relate it to relevant extant research, outline the features of exploratory search systems, discuss the evaluation of these systems, and suggest some future directions for supporting exploratory search. Exploratory search is a new frontier in the search domain and is becoming increasingly important in shaping our future world.}, language = {en}, number = {1}, urldate = {2018-05-27}, journal = {Synthesis Lectures on Information Concepts, Retrieval, and Services}, author = {White, Ryen W. and Roth, Resa A.}, month = jan, year = {2009}, pages = {1--98}, } @inproceedings{agrawal_diversifying_2009, address = {New York, NY, USA}, series = {{WSDM} '09}, title = {Diversifying {Search} {Results}}, isbn = {978-1-60558-390-7}, url = {http://doi.acm.org/10.1145/1498759.1498766}, doi = {10.1145/1498759.1498766}, abstract = {We study the problem of answering ambiguous web queries in a setting where there exists a taxonomy of information, and that both queries and documents may belong to more than one category according to this taxonomy. We present a systematic approach to diversifying results that aims to minimize the risk of dissatisfaction of the average user. We propose an algorithm that well approximates this objective in general, and is provably optimal for a natural special case. Furthermore, we generalize several classical IR metrics, including NDCG, MRR, and MAP, to explicitly account for the value of diversification. We demonstrate empirically that our algorithm scores higher in these generalized metrics compared to results produced by commercial search engines.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the {Second} {ACM} {International} {Conference} on {Web} {Search} and {Data} {Mining}}, publisher = {ACM}, author = {Agrawal, Rakesh and Gollapudi, Sreenivas and Halverson, Alan and Ieong, Samuel}, year = {2009}, pages = {5--14}, } @inproceedings{gollapudi_axiomatic_2009, address = {New York, NY, USA}, series = {{WWW} '09}, title = {An {Axiomatic} {Approach} for {Result} {Diversification}}, isbn = {978-1-60558-487-4}, url = {http://doi.acm.org/10.1145/1526709.1526761}, doi = {10.1145/1526709.1526761}, abstract = {Understanding user intent is key to designing an effective ranking system in a search engine. In the absence of any explicit knowledge of user intent, search engines want to diversify results to improve user satisfaction. In such a setting, the probability ranking principle-based approach of presenting the most relevant results on top can be sub-optimal, and hence the search engine would like to trade-off relevance for diversity in the results. In analogy to prior work on ranking and clustering systems, we use the axiomatic approach to characterize and design diversification systems. We develop a set of natural axioms that a diversification system is expected to satisfy, and show that no diversification function can satisfy all the axioms simultaneously. We illustrate the use of the axiomatic framework by providing three example diversification objectives that satisfy different subsets of the axioms. We also uncover a rich link to the facility dispersion problem that results in algorithms for a number of diversification objectives. Finally, we propose an evaluation methodology to characterize the objectives and the underlying axioms. We conduct a large scale evaluation of our objectives based on two data sets: a data set derived from the Wikipedia disambiguation pages and a product database.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 18th {International} {Conference} on {World} {Wide} {Web}}, publisher = {ACM}, author = {Gollapudi, Sreenivas and Sharma, Aneesh}, year = {2009}, pages = {381--390}, } @book{hearst_search_2009, title = {Search user interfaces}, isbn = {0-521-11379-2}, language = {en}, publisher = {Cambridge University Press}, author = {Hearst, Marti}, year = {2009}, } @inproceedings{kules_what_2009, address = {New York, NY, USA}, series = {{JCDL} '09}, title = {What {Do} {Exploratory} {Searchers} {Look} at in a {Faceted} {Search} {Interface}?}, isbn = {978-1-60558-322-8}, url = {http://doi.acm.org/10.1145/1555400.1555452}, doi = {10.1145/1555400.1555452}, abstract = {This study examined how searchers interacted with a web-based, faceted library catalog when conducting exploratory searches. It applied eye tracking, stimulated recall interviews, and direct observation to investigate important aspects of gaze behavior in a faceted search interface: what components of the interface searchers looked at, for how long, and in what order. It yielded empirical data that will be useful for both practitioners (e.g., for improving search interface designs), and researchers (e.g., to inform models of search behavior). Results of the study show that participants spent about 50 seconds per task looking at (fixating on) the results, about 25 seconds looking at the facets, and only about 6 seconds looking at the query itself. These findings suggest that facets played an important role in the exploratory search process.}, language = {en}, urldate = {2018-08-07}, booktitle = {Proceedings of the 9th {ACM}/{IEEE}-{CS} {Joint} {Conference} on {Digital} {Libraries}}, publisher = {ACM}, author = {Kules, Bill and Capra, Robert and Banta, Matthew and Sierra, Tito}, year = {2009}, pages = {313--322}, } @book{manning_introduction_2009, address = {Cambridge, England}, title = {An introduction to information retrieval}, url = {http://www.informationretrieval.org/}, language = {en}, publisher = {Cambridge University Press}, author = {Manning, Christopher D. and Raghavan, Prabhakar and Schütze, Hinrich}, year = {2009}, } @article{kuhlthau_information_2008, title = {The 'information search process' revisited: is the model still useful?}, volume = {13}, issn = {13681613}, shorttitle = {The 'information search process' revisited}, abstract = {Introduction. This paper examines the continued usefulness of Kuhlthau's Information Search Process as a model of information behaviour in new, technologically rich information environments. Method. A comprehensive review of research that has explored the model in various settings and a study employing qualitative and quantitative methods undertaken in the context of an inquiry project among school students (n=574). Students were interviewed at three stages of the information search process, during which nine feelings were identified and tracked. Results. Findings show individual patterns, but confirm the Information Search Process as a valid model in the changing information environment for describing information behaviour in tasks that require knowledge construction. The findings support the progression of feelings, thoughts and actions as suggested by the search process model. Conclusions. The information search process model remains useful for explaining students' information behaviour. The model was found to have value as a research tool as well as for practical application.}, language = {en}, number = {4}, journal = {Information Research}, author = {Kuhlthau, Carol C. and Heinström, Jannica and Todd, Ross J.}, month = dec, year = {2008}, pages = {45--45}, } @article{jansen_determining_2008, title = {Determining the informational, navigational, and transactional intent of {Web} queries}, volume = {44}, issn = {0306-4573}, url = {http://www.sciencedirect.com/science/article/pii/S030645730700163X}, doi = {10.1016/j.ipm.2007.07.015}, abstract = {In this paper, we define and present a comprehensive classification of user intent for Web searching. The classification consists of three hierarchical levels of informational, navigational, and transactional intent. After deriving attributes of each, we then developed a software application that automatically classified queries using a Web search engine log of over a million and a half queries submitted by several hundred thousand users. Our findings show that more than 80\% of Web queries are informational in nature, with about 10\% each being navigational and transactional. In order to validate the accuracy of our algorithm, we manually coded 400 queries and compared the results from this manual classification to the results determined by the automated method. This comparison showed that the automatic classification has an accuracy of 74\%. Of the remaining 25\% of the queries, the user intent is vague or multi-faceted, pointing to the need for probabilistic classification. We discuss how search engines can use knowledge of user intent to provide more targeted and relevant results in Web searching.}, language = {en}, number = {3}, urldate = {2018-03-28}, journal = {Information Processing \& Management}, author = {Jansen, Bernard J. and Booth, Danielle L. and Spink, Amanda}, month = may, year = {2008}, pages = {1251--1266}, } @inproceedings{clarke_novelty_2008, address = {New York, NY, USA}, series = {{SIGIR} '08}, title = {Novelty and {Diversity} in {Information} {Retrieval} {Evaluation}}, isbn = {978-1-60558-164-4}, url = {http://doi.acm.org/10.1145/1390334.1390446}, doi = {10.1145/1390334.1390446}, abstract = {Evaluation measures act as objective functions to be optimized by information retrieval systems. Such objective functions must accurately reflect user requirements, particularly when tuning IR systems and learning ranking functions. Ambiguity in queries and redundancy in retrieved documents are poorly reflected by current evaluation measures. In this paper, we present a framework for evaluation that systematically rewards novelty and diversity. We develop this framework into a specific evaluation measure, based on cumulative gain. We demonstrate the feasibility of our approach using a test collection based on the TREC question answering track.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 31st {Annual} {International} {ACM} {SIGIR} {Conference} on {Research} and {Development} in {Information} {Retrieval}}, publisher = {ACM}, author = {Clarke, Charles L.A. and Kolla, Maheedhar and Cormack, Gordon V. and Vechtomova, Olga and Ashkan, Azin and Büttcher, Stefan and MacKinnon, Ian}, year = {2008}, pages = {659--666}, } @article{vickery_faceted_2008, title = {Faceted {Classification} for the {Web}}, volume = {18}, issn = {1572-8390}, url = {http://dx.doi.org/10.1007/s10516-007-9025-9}, doi = {10.1007/s10516-007-9025-9}, abstract = {The article describes the nature of a faceted classification, and its application in document retrieval. The kinds of facet used are illustrated. Procedures are then discussed for identifying facets in a subject field, populating the facets with individual subject terms, arranging these in helpful sequences, using the scheme to classify documents, and searching the resultant classified index, with particular reference to Internet search.}, language = {en}, number = {2}, journal = {Axiomathes}, author = {Vickery, Brian}, year = {2008}, pages = {145--160}, } @article{saracevic_relevance:_2007, title = {Relevance: {A} review of the literature and a framework for thinking on the notion in information science. {Part} {III}: {Behavior} and effects of relevance}, volume = {58}, issn = {1532-2890}, shorttitle = {Relevance}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.20681}, doi = {10.1002/asi.20681}, abstract = {All is flux. —Plato on Knowledge in the Theaetetus (about 369 BC) Relevance is a, if not even the, key notion in information science in general and information retrieval in particular. This two-part critical review traces and synthesizes the scholarship on relevance over the past 30 years or so and provides an updated framework within which the still widely dissonant ideas and works about relevance might be interpreted and related. It is a continuation and update of a similar review that appeared in 1975 under the same title, considered here as being Part I. The present review is organized in two parts: Part II addresses the questions related to nature and manifestations of relevance, and Part III addresses questions related to relevance behavior and effects. In Part II, the nature of relevance is discussed in terms of meaning ascribed to relevance, theories used or proposed, and models that have been developed. The manifestations of relevance are classified as to several kinds of relevance that form an interdependent system of relevancies. In Part III, relevance behavior and effects are synthesized using experimental and observational works that incorporated data. In both parts, each section concludes with a summary that in effect provides an interpretation and synthesis of contemporary thinking on the topic treated or suggests hypotheses for future research. Analyses of some of the major trends that shape relevance work are offered in conclusions.}, language = {en}, number = {13}, urldate = {2018-04-25}, journal = {Journal of the American Society for Information Science and Technology}, author = {Saracevic, Tefko}, month = nov, year = {2007}, pages = {2126--2144}, } @article{nasir_uddin_performance_2007, title = {Performance and usability testing of multidimensional taxonomy in web site search and navigation}, volume = {8}, issn = {1467-8047}, url = {https://www.emeraldinsight.com/doi/full/10.1108/14678040710748058}, doi = {10.1108/14678040710748058}, abstract = {Purpose – Development of an effective search system and interface largely depends on usability studies. The aim of this paper is to present the results of an empirical evaluation of a prototype web site search and browsing tool based on multidimensional taxonomies derived from the use of faceted classification. Design/methodology/approach – A prototype Faceted Classification System (FCS), which classifies and organizes web documents under different facets (orthogonal sets of categories), was implemented on the domain of an academic institute. Facet are created from content oriented metadata, and then assembled into multiple taxonomies that describe alternative classifications of the web site content, such as by subject and location. The search and browsing interfaces use these taxonomies to enable users to access information in multiple ways. The paper compares the FCS interfaces to the existing single‐classification system to evaluate the usability of the facets in typical navigation and searching tasks. Findings – The findings suggest that performance and usability are significantly better with the FCS in the areas of efficient access, search success, flexibility, understanding of content, relevant search result, and satisfaction. These results are especially promising since unfamiliarity often leads users to reject new search interfaces. Originality/value – The results of the study in this paper can significantly contribute to interface research in the IR community, emphasizing the advantages of multidimensional taxonomies in online information collections.}, language = {en}, number = {1}, urldate = {2018-08-03}, journal = {Performance Measurement and Metrics}, author = {Nasir Uddin, Mohammad and Janecek, Paul}, month = mar, year = {2007}, pages = {18--33}, } @article{broughton_building_2007, title = {Building a faceted classification for the humanities: principles and procedures}, volume = {63}, shorttitle = {Building a faceted classification for the humanities: principles and procedures}, abstract = {Purpose – This paper aims to provide an overview of principles and procedures involved in creating a faceted classification scheme for use in resource discovery in an online environment. Design/methodology/approach – Facet analysis provides an established rigorous methodology for the conceptual organization of a subject field, and the structuring of an associated classification or controlled vocabulary. This paper explains how that methodology was applied to the humanities in the FATKS project, where the objective was to explore the potential of facet analytical theory for creating a controlled vocabulary for the humanities, and to establish the requirements of a faceted classification appropriate to an online environment. A detailed faceted vocabulary was developed for two areas of the humanities within a broader facet framework for the whole of knowledge. Research issues included how to create a data model which made the faceted structure explicit and machine-readable and provided for its further development and use. Findings – In order to support easy facet combination in indexing, and facet searching and browsing on the interface, faceted classification requires a formalized data structure and an appropriate tool for its management. The conceptual framework of a faceted system proper can be applied satisfactorily to humanities, and fully integrated within a vocabulary management system. Research limitations/implications – The procedures described in this paper are concerned only with the structuring of the classification, and do not extend to indexing, retrieval and application issues. Practical implications – Many stakeholders in the domain of resource discovery consider developing their own classification system and supporting tools. The methods described in this paper may clarify the process of building a faceted classification and may provide some useful ideas with respect to the vocabulary maintenance tool. Originality/value – As far as the authors are aware there is no comparable research in this area.}, language = {en}, number = {5}, journal = {Journal of Documentation}, author = {Broughton, Vanda and Slavic, Aida}, year = {2007}, pages = {727--754}, } @article{jansen_search_2006, title = {Search log analysis: {What} it is, what's been done, how to do it}, volume = {28}, issn = {0740-8188}, shorttitle = {Search log analysis}, url = {http://www.sciencedirect.com/science/article/pii/S0740818806000673}, doi = {10.1016/j.lisr.2006.06.005}, abstract = {The use of data stored in transaction logs of Web search engines, Intranets, and Web sites can provide valuable insight into understanding the information-searching process of online searchers. This understanding can enlighten information system design, interface development, and devising the information architecture for content collections. This article presents a review and foundation for conducting Web search transaction log analysis. A methodology is outlined consisting of three stages, which are collection, preparation, and analysis. The three stages of the methodology are presented in detail with discussions of goals, metrics, and processes at each stage. Critical terms in transaction log analysis for Web searching are defined. The strengths and limitations of transaction log analysis as a research method are presented. An application to log client-side interactions that supplements transaction logs is reported on, and the application is made available for use by the research community. Suggestions are provided on ways to leverage the strengths of, while addressing the limitations of, transaction log analysis for Web-searching research. Finally, a complete flat text transaction log from a commercial search engine is available as supplementary material with this manuscript.}, language = {en}, number = {3}, urldate = {2018-03-20}, journal = {Library \& Information Science Research}, author = {Jansen, Bernard J.}, month = sep, year = {2006}, pages = {407--432}, } @inproceedings{hearst_design_2006, title = {Design recommendations for hierarchical faceted search interfaces}, abstract = {This paper presents interface design recommendations for faceted navigation systems, based on 13 years of experience in experimenting with and evaluating such designs.}, language = {en}, booktitle = {{ACM} {SIGIR} workshop on faceted search}, publisher = {Seattle, WA}, author = {Hearst, Marti}, month = aug, year = {2006}, pages = {1--5}, } @article{godbold_beyond_2006, title = {Beyond {Information} {Seeking}: {Towards} a {General} {Model} of {Information} {Behaviour}}, volume = {11}, issn = {1368-1613}, shorttitle = {Beyond {Information} {Seeking}}, url = {https://eric.ed.gov/?id=EJ1104640}, abstract = {Introduction: The aim of the paper is to propose new models of information behaviour that extend the concept beyond simply information seeking to consider other modes of behaviour. The models chiefly explored are those of Wilson and Dervin. Argument: A shortcoming of some models of information behaviour is that they present a sequence of stages where it is evident that actual behaviour is not always sequential. In addition, information behaviour models tend to confine themselves to depictions of information seeking. Development: A model of "multi-directionality" is explored, to overcome the notion of sequential stages. Inspired by authors such as Chatman, Krikelas, and Savolainen, modes of information behaviour such as creating, destroying and avoiding information are included. Conclusion: New models of information behaviour are presented that replace the notion of "barriers" with the concept of "gap", as a means of integrating the views of Wilson and Dervin. The proposed models incorporate the notion of multi-directionality and identify ways in which an individual may navigate "gap" using modes of information behaviour beyond information seeking.}, language = {en}, number = {4}, urldate = {2019-01-25}, journal = {Information Research: An International Electronic Journal}, author = {Godbold, Natalya}, month = jul, year = {2006}, } @article{broughton_need_2006, title = {The need for a faceted classification as the basis of all methods of information retrieval}, volume = {58}, issn = {0001-253X}, doi = {10.1108/00012530610648671}, abstract = {Purpose – The aim of this article is to estimate the impact of faceted classification and the faceted analytical method on the development of various information retrieval tools over the latter part of the twentieth and early twenty‐first centuries. Design/methodology/approach – The article presents an examination of various subject access tools intended for retrieval of both print and digital materials to determine whether they exhibit features of faceted systems. Some attention is paid to use of the faceted approach as a means of structuring information on commercial web sites. The secondary and research literature is also surveyed for commentary on and evaluation of facet analysis as a basis for the building of vocabulary and conceptual tools. Findings – The study finds that faceted systems are now very common, with a major increase in their use over the last 15 years. Most LIS subject indexing tools (classifications, subject heading lists and thesauri) now demonstrate features of facet analysis to a greater or lesser degree. A faceted approach is frequently taken to the presentation of product information on commercial web sites, and there is an independent strand of theory and documentation related to this application. There is some significant research on semi‐automatic indexing and retrieval (query expansion and query formulation) using facet analytical techniques. Originality/value – This article provides an overview of an important conceptual approach to information retrieval, and compares different understandings and applications of this methodology.}, language = {en}, number = {1/2}, journal = {Aslib Proceedings}, author = {Broughton, Vanda}, editor = {Dawson, Andy}, month = jan, year = {2006}, pages = {49--72}, } @inproceedings{agichtein_improving_2006, address = {New York, NY, USA}, series = {{SIGIR} '06}, title = {Improving {Web} {Search} {Ranking} by {Incorporating} {User} {Behavior} {Information}}, isbn = {978-1-59593-369-0}, url = {http://doi.acm.org/10.1145/1148170.1148177}, doi = {10.1145/1148170.1148177}, abstract = {We show that incorporating user behavior data can significantly improve ordering of top results in real web search setting. We examine alternatives for incorporating feedback into the ranking process and explore the contributions of user feedback compared to other common web search features. We report results of a large scale evaluation over 3,000 queries and 12 million user interactions with a popular web search engine. We show that incorporating implicit feedback can augment other features, improving the accuracy of a competitive web search ranking algorithms by as much as 31\% relative to the original performance.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the 29th {Annual} {International} {ACM} {SIGIR} {Conference} on {Research} and {Development} in {Information} {Retrieval}}, publisher = {ACM}, author = {Agichtein, Eugene and Brill, Eric and Dumais, Susan}, year = {2006}, pages = {19--26}, } @article{fox_evaluating_2005, title = {Evaluating {Implicit} {Measures} to {Improve} {Web} {Search}}, volume = {23}, issn = {1046-8188}, url = {http://doi.acm.org/10.1145/1059981.1059982}, doi = {10.1145/1059981.1059982}, abstract = {Of growing interest in the area of improving the search experience is the collection of implicit user behavior measures (implicit measures) as indications of user interest and user satisfaction. Rather than having to submit explicit user feedback, which can be costly in time and resources and alter the pattern of use within the search experience, some research has explored the collection of implicit measures as an efficient and useful alternative to collecting explicit measure of interest from users.This research article describes a recent study with two main objectives. The first was to test whether there is an association between explicit ratings of user satisfaction and implicit measures of user interest. The second was to understand what implicit measures were most strongly associated with user satisfaction. The domain of interest was Web search. We developed an instrumented browser to collect a variety of measures of user activity and also to ask for explicit judgments of the relevance of individual pages visited and entire search sessions. The data was collected in a workplace setting to improve the generalizability of the results.Results were analyzed using traditional methods (e.g., Bayesian modeling and decision trees) as well as a new usage behavior pattern analysis (“gene analysis”). We found that there was an association between implicit measures of user activity and the user's explicit satisfaction ratings. The best models for individual pages combined clickthrough, time spent on the search result page, and how a user exited a result or ended a search session (exit type/end action). Behavioral patterns (through the gene analysis) can also be used to predict user satisfaction for search sessions.}, language = {en}, number = {2}, urldate = {2019-01-18}, journal = {ACM Trans. Inf. Syst.}, author = {Fox, Steve and Karnawat, Kuldeep and Mydland, Mark and Dumais, Susan and White, Thomas}, month = apr, year = {2005}, pages = {147--168}, } @inproceedings{joachims_accurately_2005, title = {Accurately {Interpreting} {Clickthrough} {Data} {As} {Implicit} {Feedback}}, abstract = {This paper examines the reliability of implicit feedback generated from clickthrough data in WWW search. Analyzing the users' decision process using eyetracking and comparing implicit feedback against manual relevance judgments, we conclude that clicks are informative but biased. While this makes the interpretation of clicks as absolute relevance judgments difficult, we show that relative preferences derived from clicks are reasonably accurate on average.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the 28th annual international {ACM} {SIGIR} conference on {Research} and development in information retrieval, 2005}, author = {Joachims, Thorsten and Granka, Laura and Pan, Bing and Hembrooke, Helene and Gay, Geri}, year = {2005}, pages = {154--161}, } @inproceedings{rose_understanding_2004, address = {New York, NY, USA}, series = {{WWW} '04}, title = {Understanding {User} {Goals} in {Web} {Search}}, isbn = {978-1-58113-844-3}, url = {http://doi.acm.org/10.1145/988672.988675}, doi = {10.1145/988672.988675}, abstract = {Previous work on understanding user web search behavior has focused on how people search and what they are searching for, but not why they are searching. In this paper, we describe a framework for understanding the underlying goals of user searches, and our experience in using the framework to manually classify queries from a web search engine. Our analysis suggests that so-called navigational" searches are less prevalent than generally believed while a previously unexplored "resource-seeking" goal may account for a large fraction of web searches. We also illustrate how this knowledge of user search goals might be used to improve future web search engines.}, language = {en}, urldate = {2018-03-28}, booktitle = {Proceedings of the 13th {International} {Conference} on {World} {Wide} {Web}}, publisher = {ACM}, author = {Rose, Daniel E. and Levinson, Danny}, year = {2004}, pages = {13--19}, } @misc{denton_how_2003, title = {How to {Make} a {Faceted} {Classification} and {Put} {It} {On} the {Web}}, url = {https://www.miskatonic.org/library/facet-web-howto.html}, language = {en}, urldate = {2016-08-16}, journal = {Miskatonic University Press}, author = {Denton, William}, year = {2003}, } @inproceedings{yee_faceted_2003, address = {New York, NY, USA}, series = {{CHI} '03}, title = {Faceted {Metadata} for {Image} {Search} and {Browsing}}, isbn = {978-1-58113-630-2}, url = {http://doi.acm.org/10.1145/642611.642681}, doi = {10.1145/642611.642681}, abstract = {There are currently two dominant interface types for searching and browsing large image collections: keyword-based search, and searching by overall similarity to sample images. We present an alternative based on enabling users to navigate along conceptual dimensions that describe the images. The interface makes use of hierarchical faceted metadata and dynamically generated query previews. A usability study, in which 32 art history students explored a collection of 35,000 fine arts images, compares this approach to a standard image search interface. Despite the unfamiliarity and power of the interface (attributes that often lead to rejection of new search interfaces), the study results show that 90\% of the participants preferred the metadata approach overall, 97\% said that it helped them learn more about the collection, 75\% found it more flexible, and 72\% found it easier to use than a standard baseline system. These results indicate that a category-based approach is a successful way to provide access to image collections.}, language = {en}, urldate = {2018-08-09}, booktitle = {Proceedings of the {SIGCHI} {Conference} on {Human} {Factors} in {Computing} {Systems}}, publisher = {ACM}, author = {Yee, Ka-Ping and Swearingen, Kirsten and Li, Kevin and Hearst, Marti}, year = {2003}, pages = {401--408}, } @article{broder_taxonomy_2002, title = {A {Taxonomy} of {Web} {Search}}, volume = {36}, issn = {0163-5840}, url = {http://doi.acm.org/10.1145/792550.792552}, doi = {10.1145/792550.792552}, abstract = {Classic IR (information retrieval) is inherently predicated on users searching for information, the so-called "information need". But the need behind a web search is often not informational -- it might be navigational (give me the url of the site I want to reach) or transactional (show me sites where I can perform a certain transaction, e.g. shop, download a file, or find a map). We explore this taxonomy of web searches and discuss how global search engines evolved to deal with web-specific needs.}, language = {en}, number = {2}, urldate = {2018-08-03}, journal = {SIGIR Forum}, author = {Broder, Andrei}, month = sep, year = {2002}, pages = {3--10}, } @article{hearst_finding_2002, title = {Finding the flow in web site search}, volume = {45}, issn = {0001-0782}, url = {http://doi.acm.org/10.1145/567498.567525}, doi = {10.1145/567498.567525}, abstract = {Designing a search system and interface may best be served (and executed) by scrutinizing usability studies.}, language = {en}, number = {9}, journal = {Communications of the ACM}, author = {Hearst, Marti and Elliott, Ame and English, Jennifer and Sinha, Rashmi and Swearingen, Kirsten and Yee, Ka-Ping}, month = sep, year = {2002}, pages = {42--49}, } @inproceedings{joachims_optimizing_2002, address = {Edmonton, Alberta, Canada}, title = {Optimizing search engines using clickthrough data}, isbn = {978-1-58113-567-1}, url = {http://dl.acm.org/citation.cfm?id=775047.775067}, doi = {10.1145/775047.775067}, abstract = {This paper presents an approach to automatically optimizing the retrieval quality of search engines using clickthrough data. Intuitively, a good information retrieval system should present relevant documents high in the ranking, with less relevant documents following below. While previous approaches to learning retrieval functions from examples exist, they typically require training data generated from relevance judgments by experts. This makes them difficult and expensive to apply. The goal of this paper is to develop a method that utilizes clickthrough data for training, namely the query-log of the search engine in connection with the log of links the users clicked on in the presented ranking. Such clickthrough data is available in abundance and can be recorded at very low cost. Taking a Support Vector Machine (SVM) approach, this paper presents a method for learning retrieval functions. From a theoretical perspective, this method is shown to be well-founded in a risk minimization framework. Furthermore, it is shown to be feasible even for large sets of queries and features. The theoretical results are verified in a controlled experiment. It shows that the method can effectively adapt the retrieval function of a meta-search engine to a particular group of users, outperforming Google in terms of retrieval quality after only a couple of hundred training examples.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the eighth {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {ACM}, author = {Joachims, Thorsten}, month = jul, year = {2002}, pages = {133--142}, } @inproceedings{english_hierarchical_2002, address = {New York, NY, USA}, series = {{CHI} {EA} '02}, title = {Hierarchical {Faceted} {Metadata} in {Site} {Search} {Interfaces}}, isbn = {978-1-58113-454-4}, url = {http://doi.acm.org/10.1145/506443.506517}, doi = {10.1145/506443.506517}, abstract = {One of the most pressing usability issues in the design of large web sites is that of the organization of search results. A previous study on a moderate-sized web site indicated that users understood and preferred dynamically organized faceted metadata over standard search. We are now examining how to scale this approach to very large collections, since it is difficult to present hierarchical faceted metadata in a manner appealing and understandable to general users. We have iteratively designed and tested interfaces that address these design challenges; the most recent version is receiving enthusiastic responses in ongoing usability studies.}, language = {en}, urldate = {2018-07-06}, booktitle = {{CHI} '02 {Extended} {Abstracts} on {Human} {Factors} in {Computing} {Systems}}, publisher = {ACM}, author = {English, Jennifer and Hearst, Marti and Sinha, Rashmi and Swearingen, Kirsten and Yee, Ka-Ping}, year = {2002}, pages = {628--639}, } @article{spink_searching_2001, title = {Searching the web: {The} public and their queries}, volume = {52}, copyright = {Copyright © 2001 John Wiley \& Sons, Inc.}, issn = {1532-2890}, shorttitle = {Searching the web}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/1097-4571%282000%299999%3A9999%3C%3A%3AAID-ASI1591%3E3.0.CO%3B2-R}, doi = {10.1002/1097-4571(2000)9999:9999<::AID-ASI1591>3.0.CO;2-R}, abstract = {In studying actual Web searching by the public at large, we analyzed over one million Web queries by users of the Excite search engine. We found that most people use few search terms, few modified queries, view few Web pages, and rarely use advanced search features. A small number of search terms are used with high frequency, and a great many terms are unique; the language of Web queries is distinctive. Queries about recreation and entertainment rank highest. Findings are compared to data from two other large studies of Web queries. This study provides an insight into the public practices and choices in Web searching.}, language = {en}, number = {3}, urldate = {2019-01-21}, journal = {Journal of the American Society for Information Science and Technology}, author = {Spink, Amanda and Wolfram, Dietmar and Jansen, Major B. J. and Saracevic, Tefko}, year = {2001}, pages = {226--234}, } @article{jansen_real_2000, title = {Real life, real users, and real needs: a study and analysis of user queries on the web}, volume = {36}, issn = {0306-4573}, shorttitle = {Real life, real users, and real needs}, url = {http://www.sciencedirect.com/science/article/pii/S0306457399000564}, doi = {10.1016/S0306-4573(99)00056-4}, abstract = {We analyzed transaction logs containing 51,473 queries posed by 18,113 users of Excite, a major Internet search service. We provide data on: (i) sessions — changes in queries during a session, number of pages viewed, and use of relevance feedback; (ii) queries — the number of search terms, and the use of logic and modifiers; and (iii) terms — their rank/frequency distribution and the most highly used search terms. We then shift the focus of analysis from the query to the user to gain insight to the characteristics of the Web user. With these characteristics as a basis, we then conducted a failure analysis, identifying trends among user mistakes. We conclude with a summary of findings and a discussion of the implications of these findings.}, language = {en}, number = {2}, urldate = {2019-01-27}, journal = {Information Processing \& Management}, author = {Jansen, Bernard J. and Spink, Amanda and Saracevic, Tefko}, month = mar, year = {2000}, pages = {207--227}, }