@inproceedings{clarke_comparative_2011, address = {New York, NY, USA}, series = {{WSDM} '11}, title = {A {Comparative} {Analysis} of {Cascade} {Measures} for {Novelty} and {Diversity}}, isbn = {978-1-4503-0493-1}, url = {http://doi.acm.org/10.1145/1935826.1935847}, doi = {10.1145/1935826.1935847}, abstract = {Traditional editorial effectiveness measures, such as nDCG, remain standard for Web search evaluation. Unfortunately, these traditional measures can inappropriately reward redundant information and can fail to reflect the broad range of user needs that can underlie a Web query. To address these deficiencies, several researchers have recently proposed effectiveness measures for novelty and diversity. Many of these measures are based on simple cascade models of user behavior, which operate by considering the relationship between successive elements of a result list. The properties of these measures are still poorly understood, and it is not clear from prior research that they work as intended. In this paper we examine the properties and performance of cascade measures with the goal of validating them as tools for measuring effectiveness. We explore their commonalities and differences, placing them in a unified framework; we discuss their theoretical difficulties and limitations, and compare the measures experimentally, contrasting them against traditional measures and against other approaches to measuring novelty. Data collected by the TREC 2009 Web Track is used as the basis for our experimental comparison. Our results indicate that these measures reward systems that achieve an balance between novelty and overall precision in their result lists, as intended. Nonetheless, other measures provide insights not captured by the cascade measures, and we suggest that future evaluation efforts continue to report a variety of measures.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the {Fourth} {ACM} {International} {Conference} on {Web} {Search} and {Data} {Mining}}, publisher = {ACM}, author = {Clarke, Charles L.A. and Craswell, Nick and Soboroff, Ian and Ashkan, Azin}, year = {2011}, pages = {75--84}, } @inproceedings{vieira_query_2011, address = {Washington, DC, USA}, series = {{ICDE} '11}, title = {On {Query} {Result} {Diversification}}, isbn = {978-1-4244-8959-6}, url = {http://dx.doi.org/10.1109/ICDE.2011.5767846}, doi = {10.1109/ICDE.2011.5767846}, abstract = {In this paper we describe a general framework for evaluation and optimization of methods for diversifying query results. In these methods, an initial ranking candidate set produced by a query is used to construct a result set, where elements are ranked with respect to relevance and diversity features, i.e., the retrieved elements should be as relevant as possible to the query, and, at the same time, the result set should be as diverse as possible. While addressing relevance is relatively simple and has been heavily studied, diversity is a harder problem to solve. One major contribution of this paper is that, using the above framework, we adapt, implement and evaluate several existing methods for diversifying query results. We also propose two new approaches, namely the Greedy with Marginal Contribution (GMC) and the Greedy Randomized with Neighborhood Expansion (GNE) methods. Another major contribution of this paper is that we present the first thorough experimental evaluation of the various diversification techniques implemented in a common framework. We examine the methods' performance with respect to precision, running time and quality of the result. Our experimental results show that while the proposed methods have higher running times, they achieve precision very close to the optimal, while also providing the best result quality. While GMC is deterministic, the randomized approach (GNE) can achieve better result quality if the user is willing to tradeoff running time.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 2011 {IEEE} 27th {International} {Conference} on {Data} {Engineering}}, publisher = {IEEE Computer Society}, author = {Vieira, Marcos R. and Razente, Humberto L. and Barioni, Maria C. N. and Hadjieleftheriou, Marios and Srivastava, Divesh and Traina, Caetano and Tsotras, Vassilis J.}, year = {2011}, pages = {1163--1174}, } @book{buttcher_information_2010, title = {Information {Retrieval}: {Implementing} and {Evaluating} {Search} {Engines}}, isbn = {978-0-262-52887-0}, shorttitle = {Information {Retrieval}}, abstract = {Information retrieval is the foundation for modern search engines. This textbook offers an introduction to the core topics underlying modern search technologies, including algorithms, data structures, indexing, retrieval, and evaluation. The emphasis is on implementation and experimentation; each chapter includes exercises and suggestions for student projects. Wumpus -- a multiuser open-source information retrieval system developed by one of the authors and available online -- provides model implementations and a basis for student work. The modular structure of the book allows instructors to use it in a variety of graduate-level courses, including courses taught from a database systems perspective, traditional information retrieval courses with a focus on IR theory, and courses covering the basics of Web retrieval. In addition to its classroom use, Information Retrieval will be a valuable reference for professionals in computer science, computer engineering, and software engineering.}, language = {en}, publisher = {MIT Press}, author = {Büttcher, Stefan and Clarke, Charles L. A. and Cormack, Gordon V.}, year = {2010}, } @article{robertson_probabilistic_2009, title = {The {Probabilistic} {Relevance} {Framework}: {BM25} and {Beyond}}, volume = {3}, issn = {1554-0669, 1554-0677}, shorttitle = {The {Probabilistic} {Relevance} {Framework}}, url = {https://www.nowpublishers.com/article/Details/INR-019}, doi = {10.1561/1500000019}, abstract = {The Probabilistic Relevance Framework (PRF) is a formal framework for document retrieval, grounded in work done in the 1970–1980s, which led to the development of one of the most successful text-retrieval algorithms, BM25. In recent years, research in the PRF has yielded new retrieval models capable of taking into account document meta-data (especially structure and link-graph information). Again, this has led to one of the most successful Web-search and corporate-search algorithms, BM25F. This work presents the PRF from a conceptual point of view, describing the probabilistic modelling assumptions behind the framework and the different ranking algorithms that result from its application: the binary independence model, relevance feedback models, BM25 and BM25F. It also discusses the relation between the PRF and other statistical models for IR, and covers some related topics, such as the use of non-textual features, and parameter optimisation for models with free parameters.}, language = {en}, number = {4}, urldate = {2019-01-18}, journal = {Foundations and Trends® in Information Retrieval}, author = {Robertson, Stephen and Zaragoza, Hugo}, month = dec, year = {2009}, pages = {333--389}, } @article{radlinski_redundancy_2009, title = {Redundancy, diversity and interdependent document relevance}, volume = {43}, issn = {0163-5840}, url = {http://dl.acm.org/citation.cfm?id=1670564.1670572}, doi = {10.1145/1670564.1670572}, abstract = {The goal of the Redundancy, Diversity, and Interdependent Document Relevance workshop was to explore how ranking, performance assessment and learning to rank can move beyond the assumption that the relevance of a document is independent of other documents. In particular, the workshop focussed on three themes: the effect of redundancy on information retrieval utility (for example, minimizing the wasted effort of users who must skip redundant information), the role of diversity (for example, for mitigating the risk of misinterpreting ambiguous queries), and algorithms for set-level optimization (where the quality of a set of retrieved documents is not simply the sum of its parts). This workshop built directly upon the Beyond Binary Relevance: Preferences, Diversity and Set-Level Judgments workshop at SIGIR 2008 [3], shifting focus to address the questions left open by the discussions and results from that workshop. As such, it was the first workshop to explicitly focus on the related research challenges of redundancy, diversity, and interdependent relevance – all of which require novel performance measures, learning methods, and evaluation techniques. The workshop program committee consisted of 15 researchers from academia and industry, with experience in IR evaluation, machine learning, and IR algorithmic design. Over 40 people attended the workshop. This report aims to summarize the workshop, and also to systematize common themes and key concepts so as to encourage research in the three workshop themes. It contains our attempt to summarize and organize the topics that came up in presentations as well as in discussions, pulling out common elements. Many audience members contributed, yet due to the free-flowing discussion, attributing all the observations to particular audience members is unfortunately impossible. Not all audience members would necessarily agree with the views presented, but we do attempt to present a consensus view as far as possible.}, language = {en}, number = {2}, urldate = {2019-01-27}, journal = {ACM SIGIR Forum}, author = {Radlinski, Filip and Bennett, Paul N. and Carterette, Ben and Joachims, Thorsten}, month = dec, year = {2009}, pages = {46--52}, } @article{liu_learning_2009, title = {Learning to {Rank} for {Information} {Retrieval}}, volume = {3}, issn = {1554-0669, 1554-0677}, url = {https://www.nowpublishers.com/article/Details/INR-016}, doi = {10.1561/1500000016}, abstract = {Learning to rank for Information Retrieval (IR) is a task to automatically construct a ranking model using training data, such that the model can sort new objects according to their degrees of relevance, preference, or importance. Many IR problems are by nature ranking problems, and many IR technologies can be potentially enhanced by using learning-to-rank techniques. The objective of this tutorial is to give an introduction to this research direction. Specifically, the existing learning-to-rank algorithms are reviewed and categorized into three approaches: the pointwise, pairwise, and listwise approaches. The advantages and disadvantages with each approach are analyzed, and the relationships between the loss functions used in these approaches and IR evaluation measures are discussed. Then the empirical evaluations on typical learning-to-rank methods are shown, with the LETOR collection as a benchmark dataset, which seems to suggest that the listwise approach be the most effective one among all the approaches. After that, a statistical ranking theory is introduced, which can describe different learning-to-rank algorithms, and be used to analyze their query-level generalization abilities. At the end of the tutorial, we provide a summary and discuss potential future work on learning to rank.}, language = {en}, number = {3}, urldate = {2019-01-18}, journal = {Foundations and Trends® in Information Retrieval}, author = {Liu, Tie-Yan}, month = jun, year = {2009}, pages = {225--331}, } @inproceedings{agrawal_diversifying_2009, address = {New York, NY, USA}, series = {{WSDM} '09}, title = {Diversifying {Search} {Results}}, isbn = {978-1-60558-390-7}, url = {http://doi.acm.org/10.1145/1498759.1498766}, doi = {10.1145/1498759.1498766}, abstract = {We study the problem of answering ambiguous web queries in a setting where there exists a taxonomy of information, and that both queries and documents may belong to more than one category according to this taxonomy. We present a systematic approach to diversifying results that aims to minimize the risk of dissatisfaction of the average user. We propose an algorithm that well approximates this objective in general, and is provably optimal for a natural special case. Furthermore, we generalize several classical IR metrics, including NDCG, MRR, and MAP, to explicitly account for the value of diversification. We demonstrate empirically that our algorithm scores higher in these generalized metrics compared to results produced by commercial search engines.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the {Second} {ACM} {International} {Conference} on {Web} {Search} and {Data} {Mining}}, publisher = {ACM}, author = {Agrawal, Rakesh and Gollapudi, Sreenivas and Halverson, Alan and Ieong, Samuel}, year = {2009}, pages = {5--14}, } @inproceedings{gollapudi_axiomatic_2009, address = {New York, NY, USA}, series = {{WWW} '09}, title = {An {Axiomatic} {Approach} for {Result} {Diversification}}, isbn = {978-1-60558-487-4}, url = {http://doi.acm.org/10.1145/1526709.1526761}, doi = {10.1145/1526709.1526761}, abstract = {Understanding user intent is key to designing an effective ranking system in a search engine. In the absence of any explicit knowledge of user intent, search engines want to diversify results to improve user satisfaction. In such a setting, the probability ranking principle-based approach of presenting the most relevant results on top can be sub-optimal, and hence the search engine would like to trade-off relevance for diversity in the results. In analogy to prior work on ranking and clustering systems, we use the axiomatic approach to characterize and design diversification systems. We develop a set of natural axioms that a diversification system is expected to satisfy, and show that no diversification function can satisfy all the axioms simultaneously. We illustrate the use of the axiomatic framework by providing three example diversification objectives that satisfy different subsets of the axioms. We also uncover a rich link to the facility dispersion problem that results in algorithms for a number of diversification objectives. Finally, we propose an evaluation methodology to characterize the objectives and the underlying axioms. We conduct a large scale evaluation of our objectives based on two data sets: a data set derived from the Wikipedia disambiguation pages and a product database.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 18th {International} {Conference} on {World} {Wide} {Web}}, publisher = {ACM}, author = {Gollapudi, Sreenivas and Sharma, Aneesh}, year = {2009}, pages = {381--390}, } @book{manning_introduction_2009, address = {Cambridge, England}, title = {An introduction to information retrieval}, url = {http://www.informationretrieval.org/}, language = {en}, publisher = {Cambridge University Press}, author = {Manning, Christopher D. and Raghavan, Prabhakar and Schütze, Hinrich}, year = {2009}, } @article{jansen_determining_2008, title = {Determining the informational, navigational, and transactional intent of {Web} queries}, volume = {44}, issn = {0306-4573}, url = {http://www.sciencedirect.com/science/article/pii/S030645730700163X}, doi = {10.1016/j.ipm.2007.07.015}, abstract = {In this paper, we define and present a comprehensive classification of user intent for Web searching. The classification consists of three hierarchical levels of informational, navigational, and transactional intent. After deriving attributes of each, we then developed a software application that automatically classified queries using a Web search engine log of over a million and a half queries submitted by several hundred thousand users. Our findings show that more than 80\% of Web queries are informational in nature, with about 10\% each being navigational and transactional. In order to validate the accuracy of our algorithm, we manually coded 400 queries and compared the results from this manual classification to the results determined by the automated method. This comparison showed that the automatic classification has an accuracy of 74\%. Of the remaining 25\% of the queries, the user intent is vague or multi-faceted, pointing to the need for probabilistic classification. We discuss how search engines can use knowledge of user intent to provide more targeted and relevant results in Web searching.}, language = {en}, number = {3}, urldate = {2018-03-28}, journal = {Information Processing \& Management}, author = {Jansen, Bernard J. and Booth, Danielle L. and Spink, Amanda}, month = may, year = {2008}, pages = {1251--1266}, } @inproceedings{clarke_novelty_2008, address = {New York, NY, USA}, series = {{SIGIR} '08}, title = {Novelty and {Diversity} in {Information} {Retrieval} {Evaluation}}, isbn = {978-1-60558-164-4}, url = {http://doi.acm.org/10.1145/1390334.1390446}, doi = {10.1145/1390334.1390446}, abstract = {Evaluation measures act as objective functions to be optimized by information retrieval systems. Such objective functions must accurately reflect user requirements, particularly when tuning IR systems and learning ranking functions. Ambiguity in queries and redundancy in retrieved documents are poorly reflected by current evaluation measures. In this paper, we present a framework for evaluation that systematically rewards novelty and diversity. We develop this framework into a specific evaluation measure, based on cumulative gain. We demonstrate the feasibility of our approach using a test collection based on the TREC question answering track.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 31st {Annual} {International} {ACM} {SIGIR} {Conference} on {Research} and {Development} in {Information} {Retrieval}}, publisher = {ACM}, author = {Clarke, Charles L.A. and Kolla, Maheedhar and Cormack, Gordon V. and Vechtomova, Olga and Ashkan, Azin and Büttcher, Stefan and MacKinnon, Ian}, year = {2008}, pages = {659--666}, } @article{jansen_search_2006, title = {Search log analysis: {What} it is, what's been done, how to do it}, volume = {28}, issn = {0740-8188}, shorttitle = {Search log analysis}, url = {http://www.sciencedirect.com/science/article/pii/S0740818806000673}, doi = {10.1016/j.lisr.2006.06.005}, abstract = {The use of data stored in transaction logs of Web search engines, Intranets, and Web sites can provide valuable insight into understanding the information-searching process of online searchers. This understanding can enlighten information system design, interface development, and devising the information architecture for content collections. This article presents a review and foundation for conducting Web search transaction log analysis. A methodology is outlined consisting of three stages, which are collection, preparation, and analysis. The three stages of the methodology are presented in detail with discussions of goals, metrics, and processes at each stage. Critical terms in transaction log analysis for Web searching are defined. The strengths and limitations of transaction log analysis as a research method are presented. An application to log client-side interactions that supplements transaction logs is reported on, and the application is made available for use by the research community. Suggestions are provided on ways to leverage the strengths of, while addressing the limitations of, transaction log analysis for Web-searching research. Finally, a complete flat text transaction log from a commercial search engine is available as supplementary material with this manuscript.}, language = {en}, number = {3}, urldate = {2018-03-20}, journal = {Library \& Information Science Research}, author = {Jansen, Bernard J.}, month = sep, year = {2006}, pages = {407--432}, } @inproceedings{agichtein_improving_2006, address = {New York, NY, USA}, series = {{SIGIR} '06}, title = {Improving {Web} {Search} {Ranking} by {Incorporating} {User} {Behavior} {Information}}, isbn = {978-1-59593-369-0}, url = {http://doi.acm.org/10.1145/1148170.1148177}, doi = {10.1145/1148170.1148177}, abstract = {We show that incorporating user behavior data can significantly improve ordering of top results in real web search setting. We examine alternatives for incorporating feedback into the ranking process and explore the contributions of user feedback compared to other common web search features. We report results of a large scale evaluation over 3,000 queries and 12 million user interactions with a popular web search engine. We show that incorporating implicit feedback can augment other features, improving the accuracy of a competitive web search ranking algorithms by as much as 31\% relative to the original performance.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the 29th {Annual} {International} {ACM} {SIGIR} {Conference} on {Research} and {Development} in {Information} {Retrieval}}, publisher = {ACM}, author = {Agichtein, Eugene and Brill, Eric and Dumais, Susan}, year = {2006}, pages = {19--26}, } @article{fox_evaluating_2005, title = {Evaluating {Implicit} {Measures} to {Improve} {Web} {Search}}, volume = {23}, issn = {1046-8188}, url = {http://doi.acm.org/10.1145/1059981.1059982}, doi = {10.1145/1059981.1059982}, abstract = {Of growing interest in the area of improving the search experience is the collection of implicit user behavior measures (implicit measures) as indications of user interest and user satisfaction. Rather than having to submit explicit user feedback, which can be costly in time and resources and alter the pattern of use within the search experience, some research has explored the collection of implicit measures as an efficient and useful alternative to collecting explicit measure of interest from users.This research article describes a recent study with two main objectives. The first was to test whether there is an association between explicit ratings of user satisfaction and implicit measures of user interest. The second was to understand what implicit measures were most strongly associated with user satisfaction. The domain of interest was Web search. We developed an instrumented browser to collect a variety of measures of user activity and also to ask for explicit judgments of the relevance of individual pages visited and entire search sessions. The data was collected in a workplace setting to improve the generalizability of the results.Results were analyzed using traditional methods (e.g., Bayesian modeling and decision trees) as well as a new usage behavior pattern analysis (“gene analysis”). We found that there was an association between implicit measures of user activity and the user's explicit satisfaction ratings. The best models for individual pages combined clickthrough, time spent on the search result page, and how a user exited a result or ended a search session (exit type/end action). Behavioral patterns (through the gene analysis) can also be used to predict user satisfaction for search sessions.}, language = {en}, number = {2}, urldate = {2019-01-18}, journal = {ACM Trans. Inf. Syst.}, author = {Fox, Steve and Karnawat, Kuldeep and Mydland, Mark and Dumais, Susan and White, Thomas}, month = apr, year = {2005}, pages = {147--168}, } @inproceedings{joachims_accurately_2005, title = {Accurately {Interpreting} {Clickthrough} {Data} {As} {Implicit} {Feedback}}, abstract = {This paper examines the reliability of implicit feedback generated from clickthrough data in WWW search. Analyzing the users' decision process using eyetracking and comparing implicit feedback against manual relevance judgments, we conclude that clicks are informative but biased. While this makes the interpretation of clicks as absolute relevance judgments difficult, we show that relative preferences derived from clicks are reasonably accurate on average.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the 28th annual international {ACM} {SIGIR} conference on {Research} and development in information retrieval, 2005}, author = {Joachims, Thorsten and Granka, Laura and Pan, Bing and Hembrooke, Helene and Gay, Geri}, year = {2005}, pages = {154--161}, } @inproceedings{yee_faceted_2003, address = {New York, NY, USA}, series = {{CHI} '03}, title = {Faceted {Metadata} for {Image} {Search} and {Browsing}}, isbn = {978-1-58113-630-2}, url = {http://doi.acm.org/10.1145/642611.642681}, doi = {10.1145/642611.642681}, abstract = {There are currently two dominant interface types for searching and browsing large image collections: keyword-based search, and searching by overall similarity to sample images. We present an alternative based on enabling users to navigate along conceptual dimensions that describe the images. The interface makes use of hierarchical faceted metadata and dynamically generated query previews. A usability study, in which 32 art history students explored a collection of 35,000 fine arts images, compares this approach to a standard image search interface. Despite the unfamiliarity and power of the interface (attributes that often lead to rejection of new search interfaces), the study results show that 90\% of the participants preferred the metadata approach overall, 97\% said that it helped them learn more about the collection, 75\% found it more flexible, and 72\% found it easier to use than a standard baseline system. These results indicate that a category-based approach is a successful way to provide access to image collections.}, language = {en}, urldate = {2018-08-09}, booktitle = {Proceedings of the {SIGCHI} {Conference} on {Human} {Factors} in {Computing} {Systems}}, publisher = {ACM}, author = {Yee, Ka-Ping and Swearingen, Kirsten and Li, Kevin and Hearst, Marti}, year = {2003}, pages = {401--408}, } @inproceedings{joachims_optimizing_2002, address = {Edmonton, Alberta, Canada}, title = {Optimizing search engines using clickthrough data}, isbn = {978-1-58113-567-1}, url = {http://dl.acm.org/citation.cfm?id=775047.775067}, doi = {10.1145/775047.775067}, abstract = {This paper presents an approach to automatically optimizing the retrieval quality of search engines using clickthrough data. Intuitively, a good information retrieval system should present relevant documents high in the ranking, with less relevant documents following below. While previous approaches to learning retrieval functions from examples exist, they typically require training data generated from relevance judgments by experts. This makes them difficult and expensive to apply. The goal of this paper is to develop a method that utilizes clickthrough data for training, namely the query-log of the search engine in connection with the log of links the users clicked on in the presented ranking. Such clickthrough data is available in abundance and can be recorded at very low cost. Taking a Support Vector Machine (SVM) approach, this paper presents a method for learning retrieval functions. From a theoretical perspective, this method is shown to be well-founded in a risk minimization framework. Furthermore, it is shown to be feasible even for large sets of queries and features. The theoretical results are verified in a controlled experiment. It shows that the method can effectively adapt the retrieval function of a meta-search engine to a particular group of users, outperforming Google in terms of retrieval quality after only a couple of hundred training examples.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the eighth {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {ACM}, author = {Joachims, Thorsten}, month = jul, year = {2002}, pages = {133--142}, } @article{jansen_real_2000, title = {Real life, real users, and real needs: a study and analysis of user queries on the web}, volume = {36}, issn = {0306-4573}, shorttitle = {Real life, real users, and real needs}, url = {http://www.sciencedirect.com/science/article/pii/S0306457399000564}, doi = {10.1016/S0306-4573(99)00056-4}, abstract = {We analyzed transaction logs containing 51,473 queries posed by 18,113 users of Excite, a major Internet search service. We provide data on: (i) sessions — changes in queries during a session, number of pages viewed, and use of relevance feedback; (ii) queries — the number of search terms, and the use of logic and modifiers; and (iii) terms — their rank/frequency distribution and the most highly used search terms. We then shift the focus of analysis from the query to the user to gain insight to the characteristics of the Web user. With these characteristics as a basis, we then conducted a failure analysis, identifying trends among user mistakes. We conclude with a summary of findings and a discussion of the implications of these findings.}, language = {en}, number = {2}, urldate = {2019-01-27}, journal = {Information Processing \& Management}, author = {Jansen, Bernard J. and Spink, Amanda and Saracevic, Tefko}, month = mar, year = {2000}, pages = {207--227}, }