@article{hider_search_2018, title = {The {Search} {Value} {Added} by {Professional} {Indexing} to a {Bibliographic} {Database}}, volume = {45}, issn = {0943-7444}, abstract = {Gross et al. (2015) have demonstrated that about a quarter of hits would typically be lost to keyword searchers if contemporary academic library catalogs dropped their controlled subject headings. This article re- ports on an investigation of the search value that subject descriptors and identifiers assigned by professional indexers add to a bibliographic database, namely the Australian Education Index (AEI). First, a similar methodology to that developed by Gross et al. (2015) was applied, with keyword searches representing a range of educational topics run on the AEI database with and without its subject indexing. The results indicated that AEI users would also lose, on average, about a quarter of hits per query. Second, an alternative research design was applied in which an experienced literature searcher was asked to find resources on a set of educational topics on an AEI database stripped of its subject indexing and then asked to search for additional resources on the same topics after the subject indexing had been reinserted. In this study, the proportion of additional resources that would have been lost had it not been for the subject indexing was again found to be about a quarter of the total resources found for each topic, on average.}, language = {en}, number = {1}, journal = {Official Journal of the International Society for Knowledge Organization}, author = {Hider, Philip}, year = {2018}, pages = {23--32}, } @article{kules_influence_2012, title = {Influence of training and stage of search on gaze behavior in a library catalog faceted search interface}, volume = {63}, copyright = {© 2011 ASIS\&T}, issn = {1532-2890}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/asi.21647}, doi = {10.1002/asi.21647}, abstract = {This study examined how searchers interact with a web-based, faceted library catalog when conducting exploratory searches. It applied multiple methods, including eye tracking and stimulated recall interviews, to investigate important aspects of faceted search interface use, specifically: (a) searcher gaze behavior—what components of the interface searchers look at; (b) how gaze behavior differs when training is and is not provided; (c) how gaze behavior changes as searchers become familiar with the interface; and (d) how gaze behavior differs depending on the stage of the search process. The results confirm previous findings that facets account for approximately 10–30\% of interface use. They show that providing a 60-second video demonstration increased searcher use of facets. However, searcher use of the facets did not evolve during the study session, which suggests that searchers may not, on their own, rapidly apply the faceted interfaces. The findings also suggest that searcher use of interface elements varied by the stage of their search during the session, with higher use of facets during decision-making stages. These findings will be of interest to librarians and interface designers who wish to maximize the value of faceted searching for patrons, as well as to researchers who study search behavior.}, language = {en}, number = {1}, urldate = {2018-08-03}, journal = {Journal of the American Society for Information Science and Technology}, author = {Kules, Bill and Capra, Robert}, month = jan, year = {2012}, pages = {114--138}, } @article{liu_learning_2009, title = {Learning to {Rank} for {Information} {Retrieval}}, volume = {3}, issn = {1554-0669, 1554-0677}, url = {https://www.nowpublishers.com/article/Details/INR-016}, doi = {10.1561/1500000016}, abstract = {Learning to rank for Information Retrieval (IR) is a task to automatically construct a ranking model using training data, such that the model can sort new objects according to their degrees of relevance, preference, or importance. Many IR problems are by nature ranking problems, and many IR technologies can be potentially enhanced by using learning-to-rank techniques. The objective of this tutorial is to give an introduction to this research direction. Specifically, the existing learning-to-rank algorithms are reviewed and categorized into three approaches: the pointwise, pairwise, and listwise approaches. The advantages and disadvantages with each approach are analyzed, and the relationships between the loss functions used in these approaches and IR evaluation measures are discussed. Then the empirical evaluations on typical learning-to-rank methods are shown, with the LETOR collection as a benchmark dataset, which seems to suggest that the listwise approach be the most effective one among all the approaches. After that, a statistical ranking theory is introduced, which can describe different learning-to-rank algorithms, and be used to analyze their query-level generalization abilities. At the end of the tutorial, we provide a summary and discuss potential future work on learning to rank.}, language = {en}, number = {3}, urldate = {2019-01-18}, journal = {Foundations and Trends® in Information Retrieval}, author = {Liu, Tie-Yan}, month = jun, year = {2009}, pages = {225--331}, } @inproceedings{agrawal_diversifying_2009, address = {New York, NY, USA}, series = {{WSDM} '09}, title = {Diversifying {Search} {Results}}, isbn = {978-1-60558-390-7}, url = {http://doi.acm.org/10.1145/1498759.1498766}, doi = {10.1145/1498759.1498766}, abstract = {We study the problem of answering ambiguous web queries in a setting where there exists a taxonomy of information, and that both queries and documents may belong to more than one category according to this taxonomy. We present a systematic approach to diversifying results that aims to minimize the risk of dissatisfaction of the average user. We propose an algorithm that well approximates this objective in general, and is provably optimal for a natural special case. Furthermore, we generalize several classical IR metrics, including NDCG, MRR, and MAP, to explicitly account for the value of diversification. We demonstrate empirically that our algorithm scores higher in these generalized metrics compared to results produced by commercial search engines.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the {Second} {ACM} {International} {Conference} on {Web} {Search} and {Data} {Mining}}, publisher = {ACM}, author = {Agrawal, Rakesh and Gollapudi, Sreenivas and Halverson, Alan and Ieong, Samuel}, year = {2009}, pages = {5--14}, } @inproceedings{gollapudi_axiomatic_2009, address = {New York, NY, USA}, series = {{WWW} '09}, title = {An {Axiomatic} {Approach} for {Result} {Diversification}}, isbn = {978-1-60558-487-4}, url = {http://doi.acm.org/10.1145/1526709.1526761}, doi = {10.1145/1526709.1526761}, abstract = {Understanding user intent is key to designing an effective ranking system in a search engine. In the absence of any explicit knowledge of user intent, search engines want to diversify results to improve user satisfaction. In such a setting, the probability ranking principle-based approach of presenting the most relevant results on top can be sub-optimal, and hence the search engine would like to trade-off relevance for diversity in the results. In analogy to prior work on ranking and clustering systems, we use the axiomatic approach to characterize and design diversification systems. We develop a set of natural axioms that a diversification system is expected to satisfy, and show that no diversification function can satisfy all the axioms simultaneously. We illustrate the use of the axiomatic framework by providing three example diversification objectives that satisfy different subsets of the axioms. We also uncover a rich link to the facility dispersion problem that results in algorithms for a number of diversification objectives. Finally, we propose an evaluation methodology to characterize the objectives and the underlying axioms. We conduct a large scale evaluation of our objectives based on two data sets: a data set derived from the Wikipedia disambiguation pages and a product database.}, language = {en}, urldate = {2019-01-27}, booktitle = {Proceedings of the 18th {International} {Conference} on {World} {Wide} {Web}}, publisher = {ACM}, author = {Gollapudi, Sreenivas and Sharma, Aneesh}, year = {2009}, pages = {381--390}, } @inproceedings{kules_what_2009, address = {New York, NY, USA}, series = {{JCDL} '09}, title = {What {Do} {Exploratory} {Searchers} {Look} at in a {Faceted} {Search} {Interface}?}, isbn = {978-1-60558-322-8}, url = {http://doi.acm.org/10.1145/1555400.1555452}, doi = {10.1145/1555400.1555452}, abstract = {This study examined how searchers interacted with a web-based, faceted library catalog when conducting exploratory searches. It applied eye tracking, stimulated recall interviews, and direct observation to investigate important aspects of gaze behavior in a faceted search interface: what components of the interface searchers looked at, for how long, and in what order. It yielded empirical data that will be useful for both practitioners (e.g., for improving search interface designs), and researchers (e.g., to inform models of search behavior). Results of the study show that participants spent about 50 seconds per task looking at (fixating on) the results, about 25 seconds looking at the facets, and only about 6 seconds looking at the query itself. These findings suggest that facets played an important role in the exploratory search process.}, language = {en}, urldate = {2018-08-07}, booktitle = {Proceedings of the 9th {ACM}/{IEEE}-{CS} {Joint} {Conference} on {Digital} {Libraries}}, publisher = {ACM}, author = {Kules, Bill and Capra, Robert and Banta, Matthew and Sierra, Tito}, year = {2009}, pages = {313--322}, } @article{jansen_determining_2008, title = {Determining the informational, navigational, and transactional intent of {Web} queries}, volume = {44}, issn = {0306-4573}, url = {http://www.sciencedirect.com/science/article/pii/S030645730700163X}, doi = {10.1016/j.ipm.2007.07.015}, abstract = {In this paper, we define and present a comprehensive classification of user intent for Web searching. The classification consists of three hierarchical levels of informational, navigational, and transactional intent. After deriving attributes of each, we then developed a software application that automatically classified queries using a Web search engine log of over a million and a half queries submitted by several hundred thousand users. Our findings show that more than 80\% of Web queries are informational in nature, with about 10\% each being navigational and transactional. In order to validate the accuracy of our algorithm, we manually coded 400 queries and compared the results from this manual classification to the results determined by the automated method. This comparison showed that the automatic classification has an accuracy of 74\%. Of the remaining 25\% of the queries, the user intent is vague or multi-faceted, pointing to the need for probabilistic classification. We discuss how search engines can use knowledge of user intent to provide more targeted and relevant results in Web searching.}, language = {en}, number = {3}, urldate = {2018-03-28}, journal = {Information Processing \& Management}, author = {Jansen, Bernard J. and Booth, Danielle L. and Spink, Amanda}, month = may, year = {2008}, pages = {1251--1266}, } @article{nasir_uddin_performance_2007, title = {Performance and usability testing of multidimensional taxonomy in web site search and navigation}, volume = {8}, issn = {1467-8047}, url = {https://www.emeraldinsight.com/doi/full/10.1108/14678040710748058}, doi = {10.1108/14678040710748058}, abstract = {Purpose – Development of an effective search system and interface largely depends on usability studies. The aim of this paper is to present the results of an empirical evaluation of a prototype web site search and browsing tool based on multidimensional taxonomies derived from the use of faceted classification. Design/methodology/approach – A prototype Faceted Classification System (FCS), which classifies and organizes web documents under different facets (orthogonal sets of categories), was implemented on the domain of an academic institute. Facet are created from content oriented metadata, and then assembled into multiple taxonomies that describe alternative classifications of the web site content, such as by subject and location. The search and browsing interfaces use these taxonomies to enable users to access information in multiple ways. The paper compares the FCS interfaces to the existing single‐classification system to evaluate the usability of the facets in typical navigation and searching tasks. Findings – The findings suggest that performance and usability are significantly better with the FCS in the areas of efficient access, search success, flexibility, understanding of content, relevant search result, and satisfaction. These results are especially promising since unfamiliarity often leads users to reject new search interfaces. Originality/value – The results of the study in this paper can significantly contribute to interface research in the IR community, emphasizing the advantages of multidimensional taxonomies in online information collections.}, language = {en}, number = {1}, urldate = {2018-08-03}, journal = {Performance Measurement and Metrics}, author = {Nasir Uddin, Mohammad and Janecek, Paul}, month = mar, year = {2007}, pages = {18--33}, } @inproceedings{agichtein_improving_2006, address = {New York, NY, USA}, series = {{SIGIR} '06}, title = {Improving {Web} {Search} {Ranking} by {Incorporating} {User} {Behavior} {Information}}, isbn = {978-1-59593-369-0}, url = {http://doi.acm.org/10.1145/1148170.1148177}, doi = {10.1145/1148170.1148177}, abstract = {We show that incorporating user behavior data can significantly improve ordering of top results in real web search setting. We examine alternatives for incorporating feedback into the ranking process and explore the contributions of user feedback compared to other common web search features. We report results of a large scale evaluation over 3,000 queries and 12 million user interactions with a popular web search engine. We show that incorporating implicit feedback can augment other features, improving the accuracy of a competitive web search ranking algorithms by as much as 31\% relative to the original performance.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the 29th {Annual} {International} {ACM} {SIGIR} {Conference} on {Research} and {Development} in {Information} {Retrieval}}, publisher = {ACM}, author = {Agichtein, Eugene and Brill, Eric and Dumais, Susan}, year = {2006}, pages = {19--26}, } @article{fox_evaluating_2005, title = {Evaluating {Implicit} {Measures} to {Improve} {Web} {Search}}, volume = {23}, issn = {1046-8188}, url = {http://doi.acm.org/10.1145/1059981.1059982}, doi = {10.1145/1059981.1059982}, abstract = {Of growing interest in the area of improving the search experience is the collection of implicit user behavior measures (implicit measures) as indications of user interest and user satisfaction. Rather than having to submit explicit user feedback, which can be costly in time and resources and alter the pattern of use within the search experience, some research has explored the collection of implicit measures as an efficient and useful alternative to collecting explicit measure of interest from users.This research article describes a recent study with two main objectives. The first was to test whether there is an association between explicit ratings of user satisfaction and implicit measures of user interest. The second was to understand what implicit measures were most strongly associated with user satisfaction. The domain of interest was Web search. We developed an instrumented browser to collect a variety of measures of user activity and also to ask for explicit judgments of the relevance of individual pages visited and entire search sessions. The data was collected in a workplace setting to improve the generalizability of the results.Results were analyzed using traditional methods (e.g., Bayesian modeling and decision trees) as well as a new usage behavior pattern analysis (“gene analysis”). We found that there was an association between implicit measures of user activity and the user's explicit satisfaction ratings. The best models for individual pages combined clickthrough, time spent on the search result page, and how a user exited a result or ended a search session (exit type/end action). Behavioral patterns (through the gene analysis) can also be used to predict user satisfaction for search sessions.}, language = {en}, number = {2}, urldate = {2019-01-18}, journal = {ACM Trans. Inf. Syst.}, author = {Fox, Steve and Karnawat, Kuldeep and Mydland, Mark and Dumais, Susan and White, Thomas}, month = apr, year = {2005}, pages = {147--168}, } @inproceedings{joachims_accurately_2005, title = {Accurately {Interpreting} {Clickthrough} {Data} {As} {Implicit} {Feedback}}, abstract = {This paper examines the reliability of implicit feedback generated from clickthrough data in WWW search. Analyzing the users' decision process using eyetracking and comparing implicit feedback against manual relevance judgments, we conclude that clicks are informative but biased. While this makes the interpretation of clicks as absolute relevance judgments difficult, we show that relative preferences derived from clicks are reasonably accurate on average.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the 28th annual international {ACM} {SIGIR} conference on {Research} and development in information retrieval, 2005}, author = {Joachims, Thorsten and Granka, Laura and Pan, Bing and Hembrooke, Helene and Gay, Geri}, year = {2005}, pages = {154--161}, } @inproceedings{yee_faceted_2003, address = {New York, NY, USA}, series = {{CHI} '03}, title = {Faceted {Metadata} for {Image} {Search} and {Browsing}}, isbn = {978-1-58113-630-2}, url = {http://doi.acm.org/10.1145/642611.642681}, doi = {10.1145/642611.642681}, abstract = {There are currently two dominant interface types for searching and browsing large image collections: keyword-based search, and searching by overall similarity to sample images. We present an alternative based on enabling users to navigate along conceptual dimensions that describe the images. The interface makes use of hierarchical faceted metadata and dynamically generated query previews. A usability study, in which 32 art history students explored a collection of 35,000 fine arts images, compares this approach to a standard image search interface. Despite the unfamiliarity and power of the interface (attributes that often lead to rejection of new search interfaces), the study results show that 90\% of the participants preferred the metadata approach overall, 97\% said that it helped them learn more about the collection, 75\% found it more flexible, and 72\% found it easier to use than a standard baseline system. These results indicate that a category-based approach is a successful way to provide access to image collections.}, language = {en}, urldate = {2018-08-09}, booktitle = {Proceedings of the {SIGCHI} {Conference} on {Human} {Factors} in {Computing} {Systems}}, publisher = {ACM}, author = {Yee, Ka-Ping and Swearingen, Kirsten and Li, Kevin and Hearst, Marti}, year = {2003}, pages = {401--408}, } @article{hearst_finding_2002, title = {Finding the flow in web site search}, volume = {45}, issn = {0001-0782}, url = {http://doi.acm.org/10.1145/567498.567525}, doi = {10.1145/567498.567525}, abstract = {Designing a search system and interface may best be served (and executed) by scrutinizing usability studies.}, language = {en}, number = {9}, journal = {Communications of the ACM}, author = {Hearst, Marti and Elliott, Ame and English, Jennifer and Sinha, Rashmi and Swearingen, Kirsten and Yee, Ka-Ping}, month = sep, year = {2002}, pages = {42--49}, } @inproceedings{joachims_optimizing_2002, address = {Edmonton, Alberta, Canada}, title = {Optimizing search engines using clickthrough data}, isbn = {978-1-58113-567-1}, url = {http://dl.acm.org/citation.cfm?id=775047.775067}, doi = {10.1145/775047.775067}, abstract = {This paper presents an approach to automatically optimizing the retrieval quality of search engines using clickthrough data. Intuitively, a good information retrieval system should present relevant documents high in the ranking, with less relevant documents following below. While previous approaches to learning retrieval functions from examples exist, they typically require training data generated from relevance judgments by experts. This makes them difficult and expensive to apply. The goal of this paper is to develop a method that utilizes clickthrough data for training, namely the query-log of the search engine in connection with the log of links the users clicked on in the presented ranking. Such clickthrough data is available in abundance and can be recorded at very low cost. Taking a Support Vector Machine (SVM) approach, this paper presents a method for learning retrieval functions. From a theoretical perspective, this method is shown to be well-founded in a risk minimization framework. Furthermore, it is shown to be feasible even for large sets of queries and features. The theoretical results are verified in a controlled experiment. It shows that the method can effectively adapt the retrieval function of a meta-search engine to a particular group of users, outperforming Google in terms of retrieval quality after only a couple of hundred training examples.}, language = {en}, urldate = {2019-01-18}, booktitle = {Proceedings of the eighth {ACM} {SIGKDD} international conference on {Knowledge} discovery and data mining}, publisher = {ACM}, author = {Joachims, Thorsten}, month = jul, year = {2002}, pages = {133--142}, } @inproceedings{english_hierarchical_2002, address = {New York, NY, USA}, series = {{CHI} {EA} '02}, title = {Hierarchical {Faceted} {Metadata} in {Site} {Search} {Interfaces}}, isbn = {978-1-58113-454-4}, url = {http://doi.acm.org/10.1145/506443.506517}, doi = {10.1145/506443.506517}, abstract = {One of the most pressing usability issues in the design of large web sites is that of the organization of search results. A previous study on a moderate-sized web site indicated that users understood and preferred dynamically organized faceted metadata over standard search. We are now examining how to scale this approach to very large collections, since it is difficult to present hierarchical faceted metadata in a manner appealing and understandable to general users. We have iteratively designed and tested interfaces that address these design challenges; the most recent version is receiving enthusiastic responses in ongoing usability studies.}, language = {en}, urldate = {2018-07-06}, booktitle = {{CHI} '02 {Extended} {Abstracts} on {Human} {Factors} in {Computing} {Systems}}, publisher = {ACM}, author = {English, Jennifer and Hearst, Marti and Sinha, Rashmi and Swearingen, Kirsten and Yee, Ka-Ping}, year = {2002}, pages = {628--639}, } @article{spink_searching_2001, title = {Searching the web: {The} public and their queries}, volume = {52}, copyright = {Copyright © 2001 John Wiley \& Sons, Inc.}, issn = {1532-2890}, shorttitle = {Searching the web}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/1097-4571%282000%299999%3A9999%3C%3A%3AAID-ASI1591%3E3.0.CO%3B2-R}, doi = {10.1002/1097-4571(2000)9999:9999<::AID-ASI1591>3.0.CO;2-R}, abstract = {In studying actual Web searching by the public at large, we analyzed over one million Web queries by users of the Excite search engine. We found that most people use few search terms, few modified queries, view few Web pages, and rarely use advanced search features. A small number of search terms are used with high frequency, and a great many terms are unique; the language of Web queries is distinctive. Queries about recreation and entertainment rank highest. Findings are compared to data from two other large studies of Web queries. This study provides an insight into the public practices and choices in Web searching.}, language = {en}, number = {3}, urldate = {2019-01-21}, journal = {Journal of the American Society for Information Science and Technology}, author = {Spink, Amanda and Wolfram, Dietmar and Jansen, Major B. J. and Saracevic, Tefko}, year = {2001}, pages = {226--234}, } @article{jansen_real_2000, title = {Real life, real users, and real needs: a study and analysis of user queries on the web}, volume = {36}, issn = {0306-4573}, shorttitle = {Real life, real users, and real needs}, url = {http://www.sciencedirect.com/science/article/pii/S0306457399000564}, doi = {10.1016/S0306-4573(99)00056-4}, abstract = {We analyzed transaction logs containing 51,473 queries posed by 18,113 users of Excite, a major Internet search service. We provide data on: (i) sessions — changes in queries during a session, number of pages viewed, and use of relevance feedback; (ii) queries — the number of search terms, and the use of logic and modifiers; and (iii) terms — their rank/frequency distribution and the most highly used search terms. We then shift the focus of analysis from the query to the user to gain insight to the characteristics of the Web user. With these characteristics as a basis, we then conducted a failure analysis, identifying trends among user mistakes. We conclude with a summary of findings and a discussion of the implications of these findings.}, language = {en}, number = {2}, urldate = {2019-01-27}, journal = {Information Processing \& Management}, author = {Jansen, Bernard J. and Spink, Amanda and Saracevic, Tefko}, month = mar, year = {2000}, pages = {207--227}, } @article{silverstein_analysis_1999, title = {Analysis of a {Very} {Large} {Web} {Search} {Engine} {Query} {Log}}, volume = {33}, issn = {0163-5840}, url = {http://doi.acm.org/10.1145/331403.331405}, doi = {10.1145/331403.331405}, abstract = {In this paper we present an analysis of an AltaVista Search Engine query log consisting of approximately 1 billion entries for search requests over a period of six weeks. This represents almost 285 million user sessions, each an attempt to fill a single information need. We present an analysis of individual queries, query duplication, and query sessions. We also present results of a correlation analysis of the log entries, studying the interaction of terms within queries. Our data supports the conjecture that web users differ significantly from the user assumed in the standard information retrieval literature. Specifically, we show that web users type in short queries, mostly look at the first 10 results only, and seldom modify the query. This suggests that traditional information retrieval techniques may not work well for answering web search requests. The correlation analysis showed that the most highly correlated items are constituents of phrases. This result indicates it may be useful for search engines to consider search terms as parts of phrases even if the user did not explicitly specify them as such.}, language = {en}, number = {1}, urldate = {2018-03-29}, journal = {SIGIR Forum}, author = {Silverstein, Craig and Marais, Hannes and Henzinger, Monika and Moricz, Michael}, month = sep, year = {1999}, pages = {6--12}, } @article{saracevic_study_1988, title = {A study of information seeking and retrieving. {III}. {Searchers}, searches, and overlap}, volume = {39}, copyright = {Copyright © 1988 John Wiley \& Sons, Inc.}, issn = {1097-4571}, url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/%28SICI%291097-4571%28198805%2939%3A3%3C197%3A%3AAID-ASI4%3E3.0.CO%3B2-A}, doi = {10.1002/(SICI)1097-4571(198805)39:3<197::AID-ASI4>3.0.CO;2-A}, abstract = {The objectives of the study were to conduct a series of observations and experiments under as real-life situation as possible related to: (1) user context of questions in information retrieval; (2) the structure and classification of questions; (3) cognitive traits and decision making of searchers; and (4) diferent searches of the same question. The study is presented in three parts: Part I presents the background of the study and describes the models, measures, methods, procedures and statistical analyses used. Part II is devoted to results related to users, questions and effectiveness measures, and Part III to results related to searchers, searches and overlap studies. A concluding summary of all results is presented in Part III. © 1988 John Wiley \& Sons, Inc.}, language = {en}, number = {3}, urldate = {2019-01-21}, journal = {Journal of the American Society for Information Science}, author = {Saracevic, Tefko and Kantor, Paul}, year = {1988}, pages = {197--216}, }