Document (#18154)

Author
Schutze, H.
Pederson, J.O.
Title
¬A cooccurrence-based thesaurus and two applications to information retrieval
Source
Information processing and management. 33(1997) no.3, S.307-318
Year
1997
Abstract
Presents a new method for computing a thesaurus from a text corpus. Each word is represented as a vector in a multi-dimensional space that captures cooccurrence information. Words are defined to be similar if they have similar cooccurrence patterns. 2 different methods for using these thesaurus vectors in information retrieval are shown to significantly improve performance over the Tipster reference corpus as compared to a vector space baseline

Similar documents (content)

  1. Song, D.; Bruza, P.D.: Towards context sensitive information inference (2003) 0.25
    0.25350562 = sum of:
      0.25350562 = product of:
        0.633764 = sum of:
          0.027779283 = weight(abstract_txt:words in 1428) [ClassicSimilarity], result of:
            0.027779283 = score(doc=1428,freq=1.0), product of:
              0.083031565 = queryWeight, product of:
                1.1560521 = boost
                5.353007 = idf(docFreq=568, maxDocs=44218)
                0.013417389 = queryNorm
              0.33456293 = fieldWeight in 1428, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.353007 = idf(docFreq=568, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
          0.030205576 = weight(abstract_txt:significantly in 1428) [ClassicSimilarity], result of:
            0.030205576 = score(doc=1428,freq=1.0), product of:
              0.087798536 = queryWeight, product of:
                1.1887743 = boost
                5.5045247 = idf(docFreq=488, maxDocs=44218)
                0.013417389 = queryNorm
              0.3440328 = fieldWeight in 1428, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5045247 = idf(docFreq=488, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
          0.03413056 = weight(abstract_txt:represented in 1428) [ClassicSimilarity], result of:
            0.03413056 = score(doc=1428,freq=1.0), product of:
              0.0952485 = queryWeight, product of:
                1.238183 = boost
                5.733308 = idf(docFreq=388, maxDocs=44218)
                0.013417389 = queryNorm
              0.35833174 = fieldWeight in 1428, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.733308 = idf(docFreq=388, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
          0.080764666 = weight(abstract_txt:dimensional in 1428) [ClassicSimilarity], result of:
            0.080764666 = score(doc=1428,freq=2.0), product of:
              0.13424562 = queryWeight, product of:
                1.4699612 = boost
                6.806538 = idf(docFreq=132, maxDocs=44218)
                0.013417389 = queryNorm
              0.60161865 = fieldWeight in 1428, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.806538 = idf(docFreq=132, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
          0.015201074 = weight(abstract_txt:retrieval in 1428) [ClassicSimilarity], result of:
            0.015201074 = score(doc=1428,freq=1.0), product of:
              0.06998775 = queryWeight, product of:
                1.5010039 = boost
                3.4751394 = idf(docFreq=3720, maxDocs=44218)
                0.013417389 = queryNorm
              0.21719621 = fieldWeight in 1428, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.4751394 = idf(docFreq=3720, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
          0.021804664 = weight(abstract_txt:information in 1428) [ClassicSimilarity], result of:
            0.021804664 = score(doc=1428,freq=8.0), product of:
              0.050949417 = queryWeight, product of:
                1.5685054 = boost
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.013417389 = queryNorm
              0.4279669 = fieldWeight in 1428, product of:
                2.828427 = tf(freq=8.0), with freq of:
                  8.0 = termFreq=8.0
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
          0.086110786 = weight(abstract_txt:vectors in 1428) [ClassicSimilarity], result of:
            0.086110786 = score(doc=1428,freq=1.0), product of:
              0.17652284 = queryWeight, product of:
                1.6856066 = boost
                7.805067 = idf(docFreq=48, maxDocs=44218)
                0.013417389 = queryNorm
              0.4878167 = fieldWeight in 1428, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.805067 = idf(docFreq=48, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
          0.113590814 = weight(abstract_txt:space in 1428) [ClassicSimilarity], result of:
            0.113590814 = score(doc=1428,freq=4.0), product of:
              0.16851866 = queryWeight, product of:
                2.3291357 = boost
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.013417389 = queryNorm
              0.6740548 = fieldWeight in 1428, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
          0.082152225 = weight(abstract_txt:corpus in 1428) [ClassicSimilarity], result of:
            0.082152225 = score(doc=1428,freq=1.0), product of:
              0.21553548 = queryWeight, product of:
                2.6340873 = boost
                6.0984654 = idf(docFreq=269, maxDocs=44218)
                0.013417389 = queryNorm
              0.3811541 = fieldWeight in 1428, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.0984654 = idf(docFreq=269, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
          0.14202437 = weight(abstract_txt:vector in 1428) [ClassicSimilarity], result of:
            0.14202437 = score(doc=1428,freq=2.0), product of:
              0.24641724 = queryWeight, product of:
                2.8164778 = boost
                6.5207376 = idf(docFreq=176, maxDocs=44218)
                0.013417389 = queryNorm
              0.57635725 = fieldWeight in 1428, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.5207376 = idf(docFreq=176, maxDocs=44218)
                0.0625 = fieldNorm(doc=1428)
        0.4 = coord(10/25)
    
  2. Bernier-Colborne, G.: Identifying semantic relations in a specialized corpus through distributional analysis of a cooccurrence tensor (2014) 0.25
    0.25047374 = sum of:
      0.25047374 = product of:
        1.2523687 = sum of:
          0.05816358 = weight(abstract_txt:word in 2153) [ClassicSimilarity], result of:
            0.05816358 = score(doc=2153,freq=1.0), product of:
              0.085607134 = queryWeight, product of:
                1.173845 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.013417389 = queryNorm
              0.67942446 = fieldWeight in 2153, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.125 = fieldNorm(doc=2153)
          0.015418226 = weight(abstract_txt:information in 2153) [ClassicSimilarity], result of:
            0.015418226 = score(doc=2153,freq=1.0), product of:
              0.050949417 = queryWeight, product of:
                1.5685054 = boost
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.013417389 = queryNorm
              0.3026183 = fieldWeight in 2153, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.125 = fieldNorm(doc=2153)
          0.113590814 = weight(abstract_txt:space in 2153) [ClassicSimilarity], result of:
            0.113590814 = score(doc=2153,freq=1.0), product of:
              0.16851866 = queryWeight, product of:
                2.3291357 = boost
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.013417389 = queryNorm
              0.6740548 = fieldWeight in 2153, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.125 = fieldNorm(doc=2153)
          0.16430445 = weight(abstract_txt:corpus in 2153) [ClassicSimilarity], result of:
            0.16430445 = score(doc=2153,freq=1.0), product of:
              0.21553548 = queryWeight, product of:
                2.6340873 = boost
                6.0984654 = idf(docFreq=269, maxDocs=44218)
                0.013417389 = queryNorm
              0.7623082 = fieldWeight in 2153, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.0984654 = idf(docFreq=269, maxDocs=44218)
                0.125 = fieldNorm(doc=2153)
          0.90089154 = weight(abstract_txt:cooccurrence in 2153) [ClassicSimilarity], result of:
            0.90089154 = score(doc=2153,freq=1.0), product of:
              0.7671812 = queryWeight, product of:
                6.0864687 = boost
                9.394302 = idf(docFreq=9, maxDocs=44218)
                0.013417389 = queryNorm
              1.1742878 = fieldWeight in 2153, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                9.394302 = idf(docFreq=9, maxDocs=44218)
                0.125 = fieldNorm(doc=2153)
        0.2 = coord(5/25)
    
  3. Lund, K.; Burgess, C.: Producing high-dimensional semantic spaces from lexical co-occurrence (1996) 0.22
    0.21993409 = sum of:
      0.21993409 = product of:
        0.7854789 = sum of:
          0.062963925 = weight(abstract_txt:word in 1704) [ClassicSimilarity], result of:
            0.062963925 = score(doc=1704,freq=3.0), product of:
              0.085607134 = queryWeight, product of:
                1.173845 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.013417389 = queryNorm
              0.73549855 = fieldWeight in 1704, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.078125 = fieldNorm(doc=1704)
          0.07138656 = weight(abstract_txt:dimensional in 1704) [ClassicSimilarity], result of:
            0.07138656 = score(doc=1704,freq=1.0), product of:
              0.13424562 = queryWeight, product of:
                1.4699612 = boost
                6.806538 = idf(docFreq=132, maxDocs=44218)
                0.013417389 = queryNorm
              0.5317608 = fieldWeight in 1704, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.806538 = idf(docFreq=132, maxDocs=44218)
                0.078125 = fieldNorm(doc=1704)
          0.01669072 = weight(abstract_txt:information in 1704) [ClassicSimilarity], result of:
            0.01669072 = score(doc=1704,freq=3.0), product of:
              0.050949417 = queryWeight, product of:
                1.5685054 = boost
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.013417389 = queryNorm
              0.32759392 = fieldWeight in 1704, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.078125 = fieldNorm(doc=1704)
          0.24068697 = weight(abstract_txt:vectors in 1704) [ClassicSimilarity], result of:
            0.24068697 = score(doc=1704,freq=5.0), product of:
              0.17652284 = queryWeight, product of:
                1.6856066 = boost
                7.805067 = idf(docFreq=48, maxDocs=44218)
                0.013417389 = queryNorm
              1.3634892 = fieldWeight in 1704, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                7.805067 = idf(docFreq=48, maxDocs=44218)
                0.078125 = fieldNorm(doc=1704)
          0.07099426 = weight(abstract_txt:space in 1704) [ClassicSimilarity], result of:
            0.07099426 = score(doc=1704,freq=1.0), product of:
              0.16851866 = queryWeight, product of:
                2.3291357 = boost
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.013417389 = queryNorm
              0.42128426 = fieldWeight in 1704, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.078125 = fieldNorm(doc=1704)
          0.145226 = weight(abstract_txt:corpus in 1704) [ClassicSimilarity], result of:
            0.145226 = score(doc=1704,freq=2.0), product of:
              0.21553548 = queryWeight, product of:
                2.6340873 = boost
                6.0984654 = idf(docFreq=269, maxDocs=44218)
                0.013417389 = queryNorm
              0.67379165 = fieldWeight in 1704, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.0984654 = idf(docFreq=269, maxDocs=44218)
                0.078125 = fieldNorm(doc=1704)
          0.17753045 = weight(abstract_txt:vector in 1704) [ClassicSimilarity], result of:
            0.17753045 = score(doc=1704,freq=2.0), product of:
              0.24641724 = queryWeight, product of:
                2.8164778 = boost
                6.5207376 = idf(docFreq=176, maxDocs=44218)
                0.013417389 = queryNorm
              0.7204466 = fieldWeight in 1704, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.5207376 = idf(docFreq=176, maxDocs=44218)
                0.078125 = fieldNorm(doc=1704)
        0.28 = coord(7/25)
    
  4. Lochbaum, K.E.; Streeter, A.R.: Comparing and combining the effectiveness of latent semantic indexing and the ordinary vector space model for information retrieval (1989) 0.20
    0.20156306 = sum of:
      0.20156306 = product of:
        0.55989736 = sum of:
          0.022474956 = weight(abstract_txt:performance in 3458) [ClassicSimilarity], result of:
            0.022474956 = score(doc=3458,freq=1.0), product of:
              0.06212815 = queryWeight, product of:
                4.63042 = idf(docFreq=1171, maxDocs=44218)
                0.013417389 = queryNorm
              0.3617516 = fieldWeight in 3458, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.63042 = idf(docFreq=1171, maxDocs=44218)
                0.078125 = fieldNorm(doc=3458)
          0.028570559 = weight(abstract_txt:compared in 3458) [ClassicSimilarity], result of:
            0.028570559 = score(doc=3458,freq=1.0), product of:
              0.07290685 = queryWeight, product of:
                1.0832781 = boost
                5.0160327 = idf(docFreq=796, maxDocs=44218)
                0.013417389 = queryNorm
              0.39187756 = fieldWeight in 3458, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0160327 = idf(docFreq=796, maxDocs=44218)
                0.078125 = fieldNorm(doc=3458)
          0.03635224 = weight(abstract_txt:word in 3458) [ClassicSimilarity], result of:
            0.03635224 = score(doc=3458,freq=1.0), product of:
              0.085607134 = queryWeight, product of:
                1.173845 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.013417389 = queryNorm
              0.4246403 = fieldWeight in 3458, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.078125 = fieldNorm(doc=3458)
          0.042663198 = weight(abstract_txt:represented in 3458) [ClassicSimilarity], result of:
            0.042663198 = score(doc=3458,freq=1.0), product of:
              0.0952485 = queryWeight, product of:
                1.238183 = boost
                5.733308 = idf(docFreq=388, maxDocs=44218)
                0.013417389 = queryNorm
              0.44791466 = fieldWeight in 3458, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.733308 = idf(docFreq=388, maxDocs=44218)
                0.078125 = fieldNorm(doc=3458)
          0.07138656 = weight(abstract_txt:dimensional in 3458) [ClassicSimilarity], result of:
            0.07138656 = score(doc=3458,freq=1.0), product of:
              0.13424562 = queryWeight, product of:
                1.4699612 = boost
                6.806538 = idf(docFreq=132, maxDocs=44218)
                0.013417389 = queryNorm
              0.5317608 = fieldWeight in 3458, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.806538 = idf(docFreq=132, maxDocs=44218)
                0.078125 = fieldNorm(doc=3458)
          0.026871955 = weight(abstract_txt:retrieval in 3458) [ClassicSimilarity], result of:
            0.026871955 = score(doc=3458,freq=2.0), product of:
              0.06998775 = queryWeight, product of:
                1.5010039 = boost
                3.4751394 = idf(docFreq=3720, maxDocs=44218)
                0.013417389 = queryNorm
              0.38395226 = fieldWeight in 3458, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                3.4751394 = idf(docFreq=3720, maxDocs=44218)
                0.078125 = fieldNorm(doc=3458)
          0.06405638 = weight(abstract_txt:similar in 3458) [ClassicSimilarity], result of:
            0.06405638 = score(doc=3458,freq=1.0), product of:
              0.15735267 = queryWeight, product of:
                2.2506495 = boost
                5.2107263 = idf(docFreq=655, maxDocs=44218)
                0.013417389 = queryNorm
              0.40708798 = fieldWeight in 3458, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.2107263 = idf(docFreq=655, maxDocs=44218)
                0.078125 = fieldNorm(doc=3458)
          0.14198852 = weight(abstract_txt:space in 3458) [ClassicSimilarity], result of:
            0.14198852 = score(doc=3458,freq=4.0), product of:
              0.16851866 = queryWeight, product of:
                2.3291357 = boost
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.013417389 = queryNorm
              0.8425685 = fieldWeight in 3458, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.078125 = fieldNorm(doc=3458)
          0.12553298 = weight(abstract_txt:vector in 3458) [ClassicSimilarity], result of:
            0.12553298 = score(doc=3458,freq=1.0), product of:
              0.24641724 = queryWeight, product of:
                2.8164778 = boost
                6.5207376 = idf(docFreq=176, maxDocs=44218)
                0.013417389 = queryNorm
              0.5094326 = fieldWeight in 3458, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.5207376 = idf(docFreq=176, maxDocs=44218)
                0.078125 = fieldNorm(doc=3458)
        0.36 = coord(9/25)
    
  5. Duwairi, R.M.: Machine learning for Arabic text categorization (2006) 0.17
    0.16806026 = sum of:
      0.16806026 = product of:
        0.6002152 = sum of:
          0.034724105 = weight(abstract_txt:words in 5115) [ClassicSimilarity], result of:
            0.034724105 = score(doc=5115,freq=1.0), product of:
              0.083031565 = queryWeight, product of:
                1.1560521 = boost
                5.353007 = idf(docFreq=568, maxDocs=44218)
                0.013417389 = queryNorm
              0.41820365 = fieldWeight in 5115, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.353007 = idf(docFreq=568, maxDocs=44218)
                0.078125 = fieldNorm(doc=5115)
          0.042663198 = weight(abstract_txt:represented in 5115) [ClassicSimilarity], result of:
            0.042663198 = score(doc=5115,freq=1.0), product of:
              0.0952485 = queryWeight, product of:
                1.238183 = boost
                5.733308 = idf(docFreq=388, maxDocs=44218)
                0.013417389 = queryNorm
              0.44791466 = fieldWeight in 5115, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.733308 = idf(docFreq=388, maxDocs=44218)
                0.078125 = fieldNorm(doc=5115)
          0.07138656 = weight(abstract_txt:dimensional in 5115) [ClassicSimilarity], result of:
            0.07138656 = score(doc=5115,freq=1.0), product of:
              0.13424562 = queryWeight, product of:
                1.4699612 = boost
                6.806538 = idf(docFreq=132, maxDocs=44218)
                0.013417389 = queryNorm
              0.5317608 = fieldWeight in 5115, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.806538 = idf(docFreq=132, maxDocs=44218)
                0.078125 = fieldNorm(doc=5115)
          0.1522238 = weight(abstract_txt:vectors in 5115) [ClassicSimilarity], result of:
            0.1522238 = score(doc=5115,freq=2.0), product of:
              0.17652284 = queryWeight, product of:
                1.6856066 = boost
                7.805067 = idf(docFreq=48, maxDocs=44218)
                0.013417389 = queryNorm
              0.86234623 = fieldWeight in 5115, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                7.805067 = idf(docFreq=48, maxDocs=44218)
                0.078125 = fieldNorm(doc=5115)
          0.07099426 = weight(abstract_txt:space in 5115) [ClassicSimilarity], result of:
            0.07099426 = score(doc=5115,freq=1.0), product of:
              0.16851866 = queryWeight, product of:
                2.3291357 = boost
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.013417389 = queryNorm
              0.42128426 = fieldWeight in 5115, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.3924384 = idf(docFreq=546, maxDocs=44218)
                0.078125 = fieldNorm(doc=5115)
          0.10269029 = weight(abstract_txt:corpus in 5115) [ClassicSimilarity], result of:
            0.10269029 = score(doc=5115,freq=1.0), product of:
              0.21553548 = queryWeight, product of:
                2.6340873 = boost
                6.0984654 = idf(docFreq=269, maxDocs=44218)
                0.013417389 = queryNorm
              0.4764426 = fieldWeight in 5115, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.0984654 = idf(docFreq=269, maxDocs=44218)
                0.078125 = fieldNorm(doc=5115)
          0.12553298 = weight(abstract_txt:vector in 5115) [ClassicSimilarity], result of:
            0.12553298 = score(doc=5115,freq=1.0), product of:
              0.24641724 = queryWeight, product of:
                2.8164778 = boost
                6.5207376 = idf(docFreq=176, maxDocs=44218)
                0.013417389 = queryNorm
              0.5094326 = fieldWeight in 5115, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.5207376 = idf(docFreq=176, maxDocs=44218)
                0.078125 = fieldNorm(doc=5115)
        0.28 = coord(7/25)