@inproceedings{clarke_comparative_2011, address = {New York, NY, USA}, series = {{WSDM} '11}, title = {A {Comparative} {Analysis} of {Cascade} {Measures} for {Novelty} and {Diversity}}, isbn = {978-1-4503-0493-1}, url = {http://doi.acm.org/10.1145/1935826.1935847}, doi = {10.1145/1935826.1935847}, abstract = {Traditional editorial effectiveness measures, such as nDCG, remain standard for Web search evaluation. Unfortunately, these traditional measures can inappropriately reward redundant information and can fail to reflect the broad range of user needs that can underlie a Web query. To address these deficiencies, several researchers have recently proposed effectiveness measures for novelty and diversity. Many of these measures are based on simple cascade models of user behavior, which operate by considering the relationship between successive elements of a result list. The properties of these measures are still poorly understood, and it is not clear from prior research that they work as intended. In this paper we examine the properties and performance of cascade measures with the goal of validating them as tools for measuring effectiveness. We explore their commonalities and differences, placing them in a unified framework; we discuss their theoretical difficulties and limitations, and compare the measures experimentally, contrasting them against traditional measures and against other approaches to measuring novelty. Data collected by the TREC 2009 Web Track is used as the basis for our experimental comparison. Our results indicate that these measures reward systems that achieve an balance between novelty and overall precision in their result lists, as intended. Nonetheless, other measures provide insights not captured by the cascade measures, and we suggest that future evaluation efforts continue to report a variety of measures.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the {Fourth} {ACM} {International} {Conference} on {Web} {Search} and {Data} {Mining}}, publisher = {ACM}, author = {Clarke, Charles L.A. and Craswell, Nick and Soboroff, Ian and Ashkan, Azin}, year = {2011}, pages = {75--84}, } @inproceedings{vieira_query_2011, address = {Washington, DC, USA}, series = {{ICDE} '11}, title = {On {Query} {Result} {Diversification}}, isbn = {978-1-4244-8959-6}, url = {http://dx.doi.org/10.1109/ICDE.2011.5767846}, doi = {10.1109/ICDE.2011.5767846}, abstract = {In this paper we describe a general framework for evaluation and optimization of methods for diversifying query results. In these methods, an initial ranking candidate set produced by a query is used to construct a result set, where elements are ranked with respect to relevance and diversity features, i.e., the retrieved elements should be as relevant as possible to the query, and, at the same time, the result set should be as diverse as possible. While addressing relevance is relatively simple and has been heavily studied, diversity is a harder problem to solve. One major contribution of this paper is that, using the above framework, we adapt, implement and evaluate several existing methods for diversifying query results. We also propose two new approaches, namely the Greedy with Marginal Contribution (GMC) and the Greedy Randomized with Neighborhood Expansion (GNE) methods. Another major contribution of this paper is that we present the first thorough experimental evaluation of the various diversification techniques implemented in a common framework. We examine the methods' performance with respect to precision, running time and quality of the result. Our experimental results show that while the proposed methods have higher running times, they achieve precision very close to the optimal, while also providing the best result quality. While GMC is deterministic, the randomized approach (GNE) can achieve better result quality if the user is willing to tradeoff running time.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 2011 {IEEE} 27th {International} {Conference} on {Data} {Engineering}}, publisher = {IEEE Computer Society}, author = {Vieira, Marcos R. and Razente, Humberto L. and Barioni, Maria C. N. and Hadjieleftheriou, Marios and Srivastava, Divesh and Traina, Caetano and Tsotras, Vassilis J.}, year = {2011}, pages = {1163--1174}, } @inproceedings{agrawal_diversifying_2009, address = {New York, NY, USA}, series = {{WSDM} '09}, title = {Diversifying {Search} {Results}}, isbn = {978-1-60558-390-7}, url = {http://doi.acm.org/10.1145/1498759.1498766}, doi = {10.1145/1498759.1498766}, abstract = {We study the problem of answering ambiguous web queries in a setting where there exists a taxonomy of information, and that both queries and documents may belong to more than one category according to this taxonomy. We present a systematic approach to diversifying results that aims to minimize the risk of dissatisfaction of the average user. We propose an algorithm that well approximates this objective in general, and is provably optimal for a natural special case. Furthermore, we generalize several classical IR metrics, including NDCG, MRR, and MAP, to explicitly account for the value of diversification. We demonstrate empirically that our algorithm scores higher in these generalized metrics compared to results produced by commercial search engines.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the {Second} {ACM} {International} {Conference} on {Web} {Search} and {Data} {Mining}}, publisher = {ACM}, author = {Agrawal, Rakesh and Gollapudi, Sreenivas and Halverson, Alan and Ieong, Samuel}, year = {2009}, pages = {5--14}, } @inproceedings{gollapudi_axiomatic_2009, address = {New York, NY, USA}, series = {{WWW} '09}, title = {An {Axiomatic} {Approach} for {Result} {Diversification}}, isbn = {978-1-60558-487-4}, url = {http://doi.acm.org/10.1145/1526709.1526761}, doi = {10.1145/1526709.1526761}, abstract = {Understanding user intent is key to designing an effective ranking system in a search engine. In the absence of any explicit knowledge of user intent, search engines want to diversify results to improve user satisfaction. In such a setting, the probability ranking principle-based approach of presenting the most relevant results on top can be sub-optimal, and hence the search engine would like to trade-off relevance for diversity in the results. In analogy to prior work on ranking and clustering systems, we use the axiomatic approach to characterize and design diversification systems. We develop a set of natural axioms that a diversification system is expected to satisfy, and show that no diversification function can satisfy all the axioms simultaneously. We illustrate the use of the axiomatic framework by providing three example diversification objectives that satisfy different subsets of the axioms. We also uncover a rich link to the facility dispersion problem that results in algorithms for a number of diversification objectives. Finally, we propose an evaluation methodology to characterize the objectives and the underlying axioms. We conduct a large scale evaluation of our objectives based on two data sets: a data set derived from the Wikipedia disambiguation pages and a product database.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 18th {International} {Conference} on {World} {Wide} {Web}}, publisher = {ACM}, author = {Gollapudi, Sreenivas and Sharma, Aneesh}, year = {2009}, pages = {381--390}, } @inproceedings{kules_what_2009, address = {New York, NY, USA}, series = {{JCDL} '09}, title = {What {Do} {Exploratory} {Searchers} {Look} at in a {Faceted} {Search} {Interface}?}, isbn = {978-1-60558-322-8}, url = {http://doi.acm.org/10.1145/1555400.1555452}, doi = {10.1145/1555400.1555452}, abstract = {This study examined how searchers interacted with a web-based, faceted library catalog when conducting exploratory searches. It applied eye tracking, stimulated recall interviews, and direct observation to investigate important aspects of gaze behavior in a faceted search interface: what components of the interface searchers looked at, for how long, and in what order. It yielded empirical data that will be useful for both practitioners (e.g., for improving search interface designs), and researchers (e.g., to inform models of search behavior). Results of the study show that participants spent about 50 seconds per task looking at (fixating on) the results, about 25 seconds looking at the facets, and only about 6 seconds looking at the query itself. These findings suggest that facets played an important role in the exploratory search process.}, language = {en}, urldate = {2018-08-07}, booktitle = {Proceedings of the 9th {ACM}/{IEEE}-{CS} {Joint} {Conference} on {Digital} {Libraries}}, publisher = {ACM}, author = {Kules, Bill and Capra, Robert and Banta, Matthew and Sierra, Tito}, year = {2009}, pages = {313--322}, } @inproceedings{clarke_novelty_2008, address = {New York, NY, USA}, series = {{SIGIR} '08}, title = {Novelty and {Diversity} in {Information} {Retrieval} {Evaluation}}, isbn = {978-1-60558-164-4}, url = {http://doi.acm.org/10.1145/1390334.1390446}, doi = {10.1145/1390334.1390446}, abstract = {Evaluation measures act as objective functions to be optimized by information retrieval systems. Such objective functions must accurately reflect user requirements, particularly when tuning IR systems and learning ranking functions. Ambiguity in queries and redundancy in retrieved documents are poorly reflected by current evaluation measures. In this paper, we present a framework for evaluation that systematically rewards novelty and diversity. We develop this framework into a specific evaluation measure, based on cumulative gain. We demonstrate the feasibility of our approach using a test collection based on the TREC question answering track.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 31st {Annual} {International} {ACM} {SIGIR} {Conference} on {Research} and {Development} in {Information} {Retrieval}}, publisher = {ACM}, author = {Clarke, Charles L.A. and Kolla, Maheedhar and Cormack, Gordon V. and Vechtomova, Olga and Ashkan, Azin and Büttcher, Stefan and MacKinnon, Ian}, year = {2008}, pages = {659--666}, } @inproceedings{hearst_design_2006, title = {Design recommendations for hierarchical faceted search interfaces}, abstract = {This paper presents interface design recommendations for faceted navigation systems, based on 13 years of experience in experimenting with and evaluating such designs.}, language = {en}, booktitle = {{ACM} {SIGIR} workshop on faceted search}, publisher = {Seattle, WA}, author = {Hearst, Marti}, month = aug, year = {2006}, pages = {1--5}, } @inproceedings{agichtein_improving_2006, address = {New York, NY, USA}, series = {{SIGIR} '06}, title = {Improving {Web} {Search} {Ranking} by {Incorporating} {User} {Behavior} {Information}}, isbn = {978-1-59593-369-0}, url = {http://doi.acm.org/10.1145/1148170.1148177}, doi = {10.1145/1148170.1148177}, abstract = {We show that incorporating user behavior data can significantly improve ordering of top results in real web search setting. We examine alternatives for incorporating feedback into the ranking process and explore the contributions of user feedback compared to other common web search features. We report results of a large scale evaluation over 3,000 queries and 12 million user interactions with a popular web search engine. We show that incorporating implicit feedback can augment other features, improving the accuracy of a competitive web search ranking algorithms by as much as 31\% relative to the original performance.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the 29th {Annual} {International} {ACM} {SIGIR} {Conference} on {Research} and {Development} in {Information} {Retrieval}}, publisher = {ACM}, author = {Agichtein, Eugene and Brill, Eric and Dumais, Susan}, year = {2006}, pages = {19--26}, } @inproceedings{joachims_accurately_2005, title = {Accurately {Interpreting} {Clickthrough} {Data} {As} {Implicit} {Feedback}}, abstract = {This paper examines the reliability of implicit feedback generated from clickthrough data in WWW search. Analyzing the users' decision process using eyetracking and comparing implicit feedback against manual relevance judgments, we conclude that clicks are informative but biased. While this makes the interpretation of clicks as absolute relevance judgments difficult, we show that relative preferences derived from clicks are reasonably accurate on average.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the 28th annual international {ACM} {SIGIR} conference on {Research} and development in information retrieval, 2005}, author = {Joachims, Thorsten and Granka, Laura and Pan, Bing and Hembrooke, Helene and Gay, Geri}, year = {2005}, pages = {154--161}, } @inproceedings{rose_understanding_2004, address = {New York, NY, USA}, series = {{WWW} '04}, title = {Understanding {User} {Goals} in {Web} {Search}}, isbn = {978-1-58113-844-3}, url = {http://doi.acm.org/10.1145/988672.988675}, doi = {10.1145/988672.988675}, abstract = {Previous work on understanding user web search behavior has focused on how people search and what they are searching for, but not why they are searching. In this paper, we describe a framework for understanding the underlying goals of user searches, and our experience in using the framework to manually classify queries from a web search engine. Our analysis suggests that so-called navigational" searches are less prevalent than generally believed while a previously unexplored "resource-seeking" goal may account for a large fraction of web searches. We also illustrate how this knowledge of user search goals might be used to improve future web search engines.}, language = {en}, urldate = {2018-03-28}, booktitle = {Proceedings of the 13th {International} {Conference} on {World} {Wide} {Web}}, publisher = {ACM}, author = {Rose, Daniel E. and Levinson, Danny}, year = {2004}, pages = {13--19}, } @inproceedings{yee_faceted_2003, address = {New York, NY, USA}, series = {{CHI} '03}, title = {Faceted {Metadata} for {Image} {Search} and {Browsing}}, isbn = {978-1-58113-630-2}, url = {http://doi.acm.org/10.1145/642611.642681}, doi = {10.1145/642611.642681}, abstract = {There are currently two dominant interface types for searching and browsing large image collections: keyword-based search, and searching by overall similarity to sample images. We present an alternative based on enabling users to navigate along conceptual dimensions that describe the images. The interface makes use of hierarchical faceted metadata and dynamically generated query previews. A usability study, in which 32 art history students explored a collection of 35,000 fine arts images, compares this approach to a standard image search interface. Despite the unfamiliarity and power of the interface (attributes that often lead to rejection of new search interfaces), the study results show that 90\% of the participants preferred the metadata approach overall, 97\% said that it helped them learn more about the collection, 75\% found it more flexible, and 72\% found it easier to use than a standard baseline system. These results indicate that a category-based approach is a successful way to provide access to image collections.}, language = {en}, urldate = {2018-08-09}, booktitle = {Proceedings of the {SIGCHI} {Conference} on {Human} {Factors} in {Computing} {Systems}}, publisher = {ACM}, author = {Yee, Ka-Ping and Swearingen, Kirsten and Li, Kevin and Hearst, Marti}, year = {2003}, pages = {401--408}, } @inproceedings{joachims_optimizing_2002, address = {Edmonton, Alberta, Canada}, title = {Optimizing search engines using clickthrough data}, isbn = {978-1-58113-567-1}, url = {http://dl.acm.org/citation.cfm?id=775047.775067}, doi = {10.1145/775047.775067}, abstract = {This paper presents an approach to automatically optimizing the retrieval quality of search engines using clickthrough data. Intuitively, a good information retrieval system should present relevant documents high in the ranking, with less relevant documents following below. While previous approaches to learning retrieval functions from examples exist, they typically require training data generated from relevance judgments by experts. This makes them difficult and expensive to apply. The goal of this paper is to develop a method that utilizes clickthrough data for training, namely the query-log of the search engine in connection with the log of links the users clicked on in the presented ranking. Such clickthrough data is available in abundance and can be recorded at very low cost. Taking a Support Vector Machine (SVM) approach, this paper presents a method for learning retrieval functions. From a theoretical perspective, this method is shown to be well-founded in a risk minimization framework. Furthermore, it is shown to be feasible even for large sets of queries and features. The theoretical results are verified in a controlled experiment. It shows that the method can effectively adapt the retrieval function of a meta-search engine to a particular group of users, outperforming Google in terms of retrieval quality after only a couple of hundred training examples.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the eighth {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {ACM}, author = {Joachims, Thorsten}, month = jul, year = {2002}, pages = {133--142}, } @inproceedings{english_hierarchical_2002, address = {New York, NY, USA}, series = {{CHI} {EA} '02}, title = {Hierarchical {Faceted} {Metadata} in {Site} {Search} {Interfaces}}, isbn = {978-1-58113-454-4}, url = {http://doi.acm.org/10.1145/506443.506517}, doi = {10.1145/506443.506517}, abstract = {One of the most pressing usability issues in the design of large web sites is that of the organization of search results. A previous study on a moderate-sized web site indicated that users understood and preferred dynamically organized faceted metadata over standard search. We are now examining how to scale this approach to very large collections, since it is difficult to present hierarchical faceted metadata in a manner appealing and understandable to general users. We have iteratively designed and tested interfaces that address these design challenges; the most recent version is receiving enthusiastic responses in ongoing usability studies.}, language = {en}, urldate = {2018-07-06}, booktitle = {{CHI} '02 {Extended} {Abstracts} on {Human} {Factors} in {Computing} {Systems}}, publisher = {ACM}, author = {English, Jennifer and Hearst, Marti and Sinha, Rashmi and Swearingen, Kirsten and Yee, Ka-Ping}, year = {2002}, pages = {628--639}, }