Document (#26428)

Author
Bartell, B.T.
Cottrell, G.W.
Belew, R.K.
Title
Representing documents using an explicit model of their similarities
Source
Journal of the American Society for Information Science. 46(1995) no.4, S.254-271
Year
1995
Abstract
Proposes a method for creating vector space representations of documents based on modelling target interdocument similariyt values. The target similarity values are assumed to capture semantic relationships, or associations, between the documents. The vector representations are chosen so that the inner product similarities between document vector pairs closely match their target interdocument similarities. The method is closely related to the Latent Semantic Indexing approach
Object
Latent Semantic Indexing

Similar documents (content)

  1. Martin, D.I.; Berry, M.W.: Latent Semantic Indexing (2009) 0.23
    0.2266178 = sum of:
      0.2266178 = product of:
        0.8093493 = sum of:
          0.020845613 = weight(abstract_txt:between in 835) [ClassicSimilarity], result of:
            0.020845613 = score(doc=835,freq=1.0), product of:
              0.07627496 = queryWeight, product of:
                1.2559756 = boost
                3.498184 = idf(docFreq=3478, maxDocs=42306)
                0.017360337 = queryNorm
              0.2732956 = fieldWeight in 835, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.498184 = idf(docFreq=3478, maxDocs=42306)
                0.078125 = fieldNorm(doc=835)
          0.12001589 = weight(abstract_txt:latent in 835) [ClassicSimilarity], result of:
            0.12001589 = score(doc=835,freq=2.0), product of:
              0.1543517 = queryWeight, product of:
                1.263371 = boost
                7.037564 = idf(docFreq=100, maxDocs=42306)
                0.017360337 = queryNorm
              0.77754825 = fieldWeight in 835, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                7.037564 = idf(docFreq=100, maxDocs=42306)
                0.078125 = fieldNorm(doc=835)
          0.08867912 = weight(abstract_txt:semantic in 835) [ClassicSimilarity], result of:
            0.08867912 = score(doc=835,freq=4.0), product of:
              0.12615365 = queryWeight, product of:
                1.6152513 = boost
                4.4988503 = idf(docFreq=1278, maxDocs=42306)
                0.017360337 = queryNorm
              0.70294535 = fieldWeight in 835, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.4988503 = idf(docFreq=1278, maxDocs=42306)
                0.078125 = fieldNorm(doc=835)
          0.06421525 = weight(abstract_txt:method in 835) [ClassicSimilarity], result of:
            0.06421525 = score(doc=835,freq=2.0), product of:
              0.12817039 = queryWeight, product of:
                1.6281111 = boost
                4.534668 = idf(docFreq=1233, maxDocs=42306)
                0.017360337 = queryNorm
              0.5010147 = fieldWeight in 835, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.534668 = idf(docFreq=1233, maxDocs=42306)
                0.078125 = fieldNorm(doc=835)
          0.10185133 = weight(abstract_txt:documents in 835) [ClassicSimilarity], result of:
            0.10185133 = score(doc=835,freq=4.0), product of:
              0.1583776 = queryWeight, product of:
                2.2165763 = boost
                4.115787 = idf(docFreq=1875, maxDocs=42306)
                0.017360337 = queryNorm
              0.64309174 = fieldWeight in 835, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.115787 = idf(docFreq=1875, maxDocs=42306)
                0.078125 = fieldNorm(doc=835)
          0.20161608 = weight(abstract_txt:vector in 835) [ClassicSimilarity], result of:
            0.20161608 = score(doc=835,freq=1.0), product of:
              0.39635658 = queryWeight, product of:
                3.5065405 = boost
                6.5110207 = idf(docFreq=170, maxDocs=42306)
                0.017360337 = queryNorm
              0.5086735 = fieldWeight in 835, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.5110207 = idf(docFreq=170, maxDocs=42306)
                0.078125 = fieldNorm(doc=835)
          0.21212603 = weight(abstract_txt:similarities in 835) [ClassicSimilarity], result of:
            0.21212603 = score(doc=835,freq=1.0), product of:
              0.4100139 = queryWeight, product of:
                3.5664418 = boost
                6.6222463 = idf(docFreq=152, maxDocs=42306)
                0.017360337 = queryNorm
              0.517363 = fieldWeight in 835, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.6222463 = idf(docFreq=152, maxDocs=42306)
                0.078125 = fieldNorm(doc=835)
        0.28 = coord(7/25)
    
  2. Bartell, B.T.; Cottrell, G.W.; Belew, R.K.: Optimizing similarity using multi-query relevance feedback (1998) 0.19
    0.18893765 = sum of:
      0.18893765 = product of:
        0.6747773 = sum of:
          0.07659767 = weight(abstract_txt:similarity in 2153) [ClassicSimilarity], result of:
            0.07659767 = score(doc=2153,freq=4.0), product of:
              0.10538026 = queryWeight, product of:
                1.0438902 = boost
                5.814954 = idf(docFreq=342, maxDocs=42306)
                0.017360337 = queryNorm
              0.7268692 = fieldWeight in 2153, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                5.814954 = idf(docFreq=342, maxDocs=42306)
                0.0625 = fieldNorm(doc=2153)
          0.050213747 = weight(abstract_txt:match in 2153) [ClassicSimilarity], result of:
            0.050213747 = score(doc=2153,freq=1.0), product of:
              0.12623622 = queryWeight, product of:
                1.1425289 = boost
                6.364417 = idf(docFreq=197, maxDocs=42306)
                0.017360337 = queryNorm
              0.39777607 = fieldWeight in 2153, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.364417 = idf(docFreq=197, maxDocs=42306)
                0.0625 = fieldNorm(doc=2153)
          0.016676491 = weight(abstract_txt:between in 2153) [ClassicSimilarity], result of:
            0.016676491 = score(doc=2153,freq=1.0), product of:
              0.07627496 = queryWeight, product of:
                1.2559756 = boost
                3.498184 = idf(docFreq=3478, maxDocs=42306)
                0.017360337 = queryNorm
              0.2186365 = fieldWeight in 2153, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.498184 = idf(docFreq=3478, maxDocs=42306)
                0.0625 = fieldNorm(doc=2153)
          0.06291784 = weight(abstract_txt:method in 2153) [ClassicSimilarity], result of:
            0.06291784 = score(doc=2153,freq=3.0), product of:
              0.12817039 = queryWeight, product of:
                1.6281111 = boost
                4.534668 = idf(docFreq=1233, maxDocs=42306)
                0.017360337 = queryNorm
              0.4908922 = fieldWeight in 2153, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.534668 = idf(docFreq=1233, maxDocs=42306)
                0.0625 = fieldNorm(doc=2153)
          0.070564665 = weight(abstract_txt:documents in 2153) [ClassicSimilarity], result of:
            0.070564665 = score(doc=2153,freq=3.0), product of:
              0.1583776 = queryWeight, product of:
                2.2165763 = boost
                4.115787 = idf(docFreq=1875, maxDocs=42306)
                0.017360337 = queryNorm
              0.445547 = fieldWeight in 2153, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.115787 = idf(docFreq=1875, maxDocs=42306)
                0.0625 = fieldNorm(doc=2153)
          0.16129287 = weight(abstract_txt:vector in 2153) [ClassicSimilarity], result of:
            0.16129287 = score(doc=2153,freq=1.0), product of:
              0.39635658 = queryWeight, product of:
                3.5065405 = boost
                6.5110207 = idf(docFreq=170, maxDocs=42306)
                0.017360337 = queryNorm
              0.4069388 = fieldWeight in 2153, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.5110207 = idf(docFreq=170, maxDocs=42306)
                0.0625 = fieldNorm(doc=2153)
          0.23651402 = weight(abstract_txt:target in 2153) [ClassicSimilarity], result of:
            0.23651402 = score(doc=2153,freq=2.0), product of:
              0.40604162 = queryWeight, product of:
                3.5491233 = boost
                6.5900893 = idf(docFreq=157, maxDocs=42306)
                0.017360337 = queryNorm
              0.5824871 = fieldWeight in 2153, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.5900893 = idf(docFreq=157, maxDocs=42306)
                0.0625 = fieldNorm(doc=2153)
        0.28 = coord(7/25)
    
  3. Shibata, N.; Kajikawa, Y.; Sakata, I.: Measuring relatedness between communities in a citation network (2011) 0.18
    0.18026295 = sum of:
      0.18026295 = product of:
        0.64379627 = sum of:
          0.04787354 = weight(abstract_txt:similarity in 1485) [ClassicSimilarity], result of:
            0.04787354 = score(doc=1485,freq=1.0), product of:
              0.10538026 = queryWeight, product of:
                1.0438902 = boost
                5.814954 = idf(docFreq=342, maxDocs=42306)
                0.017360337 = queryNorm
              0.45429325 = fieldWeight in 1485, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.814954 = idf(docFreq=342, maxDocs=42306)
                0.078125 = fieldNorm(doc=1485)
          0.06415364 = weight(abstract_txt:capture in 1485) [ClassicSimilarity], result of:
            0.06415364 = score(doc=1485,freq=1.0), product of:
              0.12808838 = queryWeight, product of:
                1.1508801 = boost
                6.4109373 = idf(docFreq=188, maxDocs=42306)
                0.017360337 = queryNorm
              0.5008545 = fieldWeight in 1485, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.4109373 = idf(docFreq=188, maxDocs=42306)
                0.078125 = fieldNorm(doc=1485)
          0.020845613 = weight(abstract_txt:between in 1485) [ClassicSimilarity], result of:
            0.020845613 = score(doc=1485,freq=1.0), product of:
              0.07627496 = queryWeight, product of:
                1.2559756 = boost
                3.498184 = idf(docFreq=3478, maxDocs=42306)
                0.017360337 = queryNorm
              0.2732956 = fieldWeight in 1485, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.498184 = idf(docFreq=3478, maxDocs=42306)
                0.078125 = fieldNorm(doc=1485)
          0.04433956 = weight(abstract_txt:semantic in 1485) [ClassicSimilarity], result of:
            0.04433956 = score(doc=1485,freq=1.0), product of:
              0.12615365 = queryWeight, product of:
                1.6152513 = boost
                4.4988503 = idf(docFreq=1278, maxDocs=42306)
                0.017360337 = queryNorm
              0.35147268 = fieldWeight in 1485, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.4988503 = idf(docFreq=1278, maxDocs=42306)
                0.078125 = fieldNorm(doc=1485)
          0.045407042 = weight(abstract_txt:method in 1485) [ClassicSimilarity], result of:
            0.045407042 = score(doc=1485,freq=1.0), product of:
              0.12817039 = queryWeight, product of:
                1.6281111 = boost
                4.534668 = idf(docFreq=1233, maxDocs=42306)
                0.017360337 = queryNorm
              0.35427094 = fieldWeight in 1485, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.534668 = idf(docFreq=1233, maxDocs=42306)
                0.078125 = fieldNorm(doc=1485)
          0.20905083 = weight(abstract_txt:target in 1485) [ClassicSimilarity], result of:
            0.20905083 = score(doc=1485,freq=1.0), product of:
              0.40604162 = queryWeight, product of:
                3.5491233 = boost
                6.5900893 = idf(docFreq=157, maxDocs=42306)
                0.017360337 = queryNorm
              0.51485074 = fieldWeight in 1485, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.5900893 = idf(docFreq=157, maxDocs=42306)
                0.078125 = fieldNorm(doc=1485)
          0.21212603 = weight(abstract_txt:similarities in 1485) [ClassicSimilarity], result of:
            0.21212603 = score(doc=1485,freq=1.0), product of:
              0.4100139 = queryWeight, product of:
                3.5664418 = boost
                6.6222463 = idf(docFreq=152, maxDocs=42306)
                0.017360337 = queryNorm
              0.517363 = fieldWeight in 1485, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.6222463 = idf(docFreq=152, maxDocs=42306)
                0.078125 = fieldNorm(doc=1485)
        0.28 = coord(7/25)
    
  4. Liddy, E.D.: ¬An alternative representation for documents and queries (1993) 0.18
    0.17836425 = sum of:
      0.17836425 = product of:
        0.63701516 = sum of:
          0.04787354 = weight(abstract_txt:similarity in 7813) [ClassicSimilarity], result of:
            0.04787354 = score(doc=7813,freq=1.0), product of:
              0.10538026 = queryWeight, product of:
                1.0438902 = boost
                5.814954 = idf(docFreq=342, maxDocs=42306)
                0.017360337 = queryNorm
              0.45429325 = fieldWeight in 7813, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.814954 = idf(docFreq=342, maxDocs=42306)
                0.078125 = fieldNorm(doc=7813)
          0.015968084 = weight(abstract_txt:their in 7813) [ClassicSimilarity], result of:
            0.015968084 = score(doc=7813,freq=1.0), product of:
              0.06385684 = queryWeight, product of:
                1.1491954 = boost
                3.2007766 = idf(docFreq=4683, maxDocs=42306)
                0.017360337 = queryNorm
              0.25006068 = fieldWeight in 7813, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.2007766 = idf(docFreq=4683, maxDocs=42306)
                0.078125 = fieldNorm(doc=7813)
          0.04433956 = weight(abstract_txt:semantic in 7813) [ClassicSimilarity], result of:
            0.04433956 = score(doc=7813,freq=1.0), product of:
              0.12615365 = queryWeight, product of:
                1.6152513 = boost
                4.4988503 = idf(docFreq=1278, maxDocs=42306)
                0.017360337 = queryNorm
              0.35147268 = fieldWeight in 7813, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.4988503 = idf(docFreq=1278, maxDocs=42306)
                0.078125 = fieldNorm(doc=7813)
          0.06421525 = weight(abstract_txt:method in 7813) [ClassicSimilarity], result of:
            0.06421525 = score(doc=7813,freq=2.0), product of:
              0.12817039 = queryWeight, product of:
                1.6281111 = boost
                4.534668 = idf(docFreq=1233, maxDocs=42306)
                0.017360337 = queryNorm
              0.5010147 = fieldWeight in 7813, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.534668 = idf(docFreq=1233, maxDocs=42306)
                0.078125 = fieldNorm(doc=7813)
          0.10747076 = weight(abstract_txt:representations in 7813) [ClassicSimilarity], result of:
            0.10747076 = score(doc=7813,freq=1.0), product of:
              0.22763152 = queryWeight, product of:
                2.1697335 = boost
                6.0432124 = idf(docFreq=272, maxDocs=42306)
                0.017360337 = queryNorm
              0.47212598 = fieldWeight in 7813, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.0432124 = idf(docFreq=272, maxDocs=42306)
                0.078125 = fieldNorm(doc=7813)
          0.07201976 = weight(abstract_txt:documents in 7813) [ClassicSimilarity], result of:
            0.07201976 = score(doc=7813,freq=2.0), product of:
              0.1583776 = queryWeight, product of:
                2.2165763 = boost
                4.115787 = idf(docFreq=1875, maxDocs=42306)
                0.017360337 = queryNorm
              0.45473453 = fieldWeight in 7813, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.115787 = idf(docFreq=1875, maxDocs=42306)
                0.078125 = fieldNorm(doc=7813)
          0.2851282 = weight(abstract_txt:vector in 7813) [ClassicSimilarity], result of:
            0.2851282 = score(doc=7813,freq=2.0), product of:
              0.39635658 = queryWeight, product of:
                3.5065405 = boost
                6.5110207 = idf(docFreq=170, maxDocs=42306)
                0.017360337 = queryNorm
              0.7193729 = fieldWeight in 7813, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.5110207 = idf(docFreq=170, maxDocs=42306)
                0.078125 = fieldNorm(doc=7813)
        0.28 = coord(7/25)
    
  5. Dominich, S.; Kiezer, T.: ¬A measure theoretic approach to information retrieval (2007) 0.16
    0.16255014 = sum of:
      0.16255014 = product of:
        0.6772922 = sum of:
          0.052158955 = weight(abstract_txt:product in 2446) [ClassicSimilarity], result of:
            0.052158955 = score(doc=2446,freq=2.0), product of:
              0.11233268 = queryWeight, product of:
                1.0777754 = boost
                6.00371 = idf(docFreq=283, maxDocs=42306)
                0.017360337 = queryNorm
              0.46432576 = fieldWeight in 2446, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.00371 = idf(docFreq=283, maxDocs=42306)
                0.0546875 = fieldNorm(doc=2446)
          0.014591929 = weight(abstract_txt:between in 2446) [ClassicSimilarity], result of:
            0.014591929 = score(doc=2446,freq=1.0), product of:
              0.07627496 = queryWeight, product of:
                1.2559756 = boost
                3.498184 = idf(docFreq=3478, maxDocs=42306)
                0.017360337 = queryNorm
              0.19130693 = fieldWeight in 2446, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.498184 = idf(docFreq=3478, maxDocs=42306)
                0.0546875 = fieldNorm(doc=2446)
          0.05940484 = weight(abstract_txt:latent in 2446) [ClassicSimilarity], result of:
            0.05940484 = score(doc=2446,freq=1.0), product of:
              0.1543517 = queryWeight, product of:
                1.263371 = boost
                7.037564 = idf(docFreq=100, maxDocs=42306)
                0.017360337 = queryNorm
              0.38486677 = fieldWeight in 2446, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.037564 = idf(docFreq=100, maxDocs=42306)
                0.0546875 = fieldNorm(doc=2446)
          0.14670065 = weight(abstract_txt:inner in 2446) [ClassicSimilarity], result of:
            0.14670065 = score(doc=2446,freq=2.0), product of:
              0.22382481 = queryWeight, product of:
                1.5213506 = boost
                8.47463 = idf(docFreq=23, maxDocs=42306)
                0.017360337 = queryNorm
              0.6554262 = fieldWeight in 2446, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                8.47463 = idf(docFreq=23, maxDocs=42306)
                0.0546875 = fieldNorm(doc=2446)
          0.031037694 = weight(abstract_txt:semantic in 2446) [ClassicSimilarity], result of:
            0.031037694 = score(doc=2446,freq=1.0), product of:
              0.12615365 = queryWeight, product of:
                1.6152513 = boost
                4.4988503 = idf(docFreq=1278, maxDocs=42306)
                0.017360337 = queryNorm
              0.24603088 = fieldWeight in 2446, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.4988503 = idf(docFreq=1278, maxDocs=42306)
                0.0546875 = fieldNorm(doc=2446)
          0.3733982 = weight(abstract_txt:vector in 2446) [ClassicSimilarity], result of:
            0.3733982 = score(doc=2446,freq=7.0), product of:
              0.39635658 = queryWeight, product of:
                3.5065405 = boost
                6.5110207 = idf(docFreq=170, maxDocs=42306)
                0.017360337 = queryNorm
              0.9420765 = fieldWeight in 2446, product of:
                2.6457512 = tf(freq=7.0), with freq of:
                  7.0 = termFreq=7.0
                6.5110207 = idf(docFreq=170, maxDocs=42306)
                0.0546875 = fieldNorm(doc=2446)
        0.24 = coord(6/25)