@article{robertson_probabilistic_2009, title = {The {Probabilistic} {Relevance} {Framework}: {BM25} and {Beyond}}, volume = {3}, issn = {1554-0669, 1554-0677}, shorttitle = {The {Probabilistic} {Relevance} {Framework}}, url = {https://www.nowpublishers.com/article/Details/INR-019}, doi = {10.1561/1500000019}, abstract = {The Probabilistic Relevance Framework (PRF) is a formal framework for document retrieval, grounded in work done in the 1970–1980s, which led to the development of one of the most successful text-retrieval algorithms, BM25. In recent years, research in the PRF has yielded new retrieval models capable of taking into account document meta-data (especially structure and link-graph information). Again, this has led to one of the most successful Web-search and corporate-search algorithms, BM25F. This work presents the PRF from a conceptual point of view, describing the probabilistic modelling assumptions behind the framework and the different ranking algorithms that result from its application: the binary independence model, relevance feedback models, BM25 and BM25F. It also discusses the relation between the PRF and other statistical models for IR, and covers some related topics, such as the use of non-textual features, and parameter optimisation for models with free parameters.}, language = {en}, number = {4}, urldate = {2019-01-18}, journal = {Foundations and Trends® in Information Retrieval}, author = {Robertson, Stephen and Zaragoza, Hugo}, month = dec, year = {2009}, pages = {333--389}, } @article{maron_relevance_1960, title = {On {Relevance}, {Probabilistic} {Indexing} and {Information} {Retrieval}}, volume = {7}, issn = {0004-5411}, url = {http://doi.acm.org/10.1145/321033.321035}, doi = {10.1145/321033.321035}, abstract = {This paper reports on a novel technique for literature indexing and searching in a mechanized library system. The notion of relevance is taken as the key concept in the theory of information retrieval and a comparative concept of relevance is explicated in terms of the theory of probability. The resulting technique called “Probabilistic Indexing,” allows a computing machine, given a request for information, to make a statistical inference and derive a number (called the “relevance number”) for each document, which is a measure of the probability that the document will satisfy the given request. The result of a search is an ordered list of those documents which satisfy the request ranked according to their probable relevance. The paper goes on to show that whereas in a conventional library system the cross-referencing (“see” and “see also”) is based solely on the “semantical closeness” between index terms, statistical measures of closeness between index terms can be defined and computed. Thus, given an arbitrary request consisting of one (or many) index term(s), a machine can elaborate on it to increase the probability of selecting relevant documents that would not otherwise have been selected. Finally, the paper suggests an interpretation of the whole library problem as one where the request is considered as a clue on the basis of which the library system makes a concatenated statistical inference in order to provide as an output an ordered list of those documents which most probably satisfy the information needs of the user.}, language = {en}, number = {3}, urldate = {2019-01-27}, journal = {Journal of the ACM}, author = {Maron, M. E. and Kuhns, J. L.}, month = jul, year = {1960}, pages = {216--244}, }