Document (#16514)

Author
Paijmans, H.
Title
Gravity wells of meaning : detecting information rich passages in scientific texts
Source
Journal of documentation. 53(1997) no.5, S.520-536
Year
1997
Abstract
Presents research in which 4 term weigthing schemes were used to detect information rich passages in texts and the results compared. Demonstrates that word categories and frequency derived weights have a close correlation but that weighting according to the first mention theory or the cue method shows no correlation with frequency based weights
Content
Vgl. auch unter: http://www.emeraldinsight.com/10.1108/EUM0000000007209.
Theme
Volltextretrieval

Similar documents (content)

  1. Li, X.; Zhang, A.; Li, C.; Ouyang, J.; Cai, Y.: Exploring coherent topics by topic modeling with term weighting (2018) 0.30
    0.29735982 = sum of:
      0.29735982 = product of:
        0.82599944 = sum of:
          0.010925028 = weight(abstract_txt:that in 5045) [ClassicSimilarity], result of:
            0.010925028 = score(doc=5045,freq=4.0), product of:
              0.0368859 = queryWeight, product of:
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.015567117 = queryNorm
              0.2961844 = fieldWeight in 5045, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.0625 = fieldNorm(doc=5045)
          0.03935658 = weight(abstract_txt:term in 5045) [ClassicSimilarity], result of:
            0.03935658 = score(doc=5045,freq=3.0), product of:
              0.07572277 = queryWeight, product of:
                1.0131367 = boost
                4.8012047 = idf(docFreq=987, maxDocs=44218)
                0.015567117 = queryNorm
              0.51974565 = fieldWeight in 5045, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.8012047 = idf(docFreq=987, maxDocs=44218)
                0.0625 = fieldNorm(doc=5045)
          0.025911178 = weight(abstract_txt:compared in 5045) [ClassicSimilarity], result of:
            0.025911178 = score(doc=5045,freq=1.0), product of:
              0.08265075 = queryWeight, product of:
                1.0584692 = boost
                5.0160327 = idf(docFreq=796, maxDocs=44218)
                0.015567117 = queryNorm
              0.31350204 = fieldWeight in 5045, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0160327 = idf(docFreq=796, maxDocs=44218)
                0.0625 = fieldNorm(doc=5045)
          0.05710316 = weight(abstract_txt:word in 5045) [ClassicSimilarity], result of:
            0.05710316 = score(doc=5045,freq=3.0), product of:
              0.097048394 = queryWeight, product of:
                1.1469619 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.015567117 = queryNorm
              0.5883988 = fieldWeight in 5045, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.0625 = fieldNorm(doc=5045)
          0.06025366 = weight(abstract_txt:schemes in 5045) [ClassicSimilarity], result of:
            0.06025366 = score(doc=5045,freq=3.0), product of:
              0.10058593 = queryWeight, product of:
                1.167679 = boost
                5.533572 = idf(docFreq=474, maxDocs=44218)
                0.015567117 = queryNorm
              0.59902674 = fieldWeight in 5045, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.533572 = idf(docFreq=474, maxDocs=44218)
                0.0625 = fieldNorm(doc=5045)
          0.17090198 = weight(abstract_txt:weighting in 5045) [ClassicSimilarity], result of:
            0.17090198 = score(doc=5045,freq=6.0), product of:
              0.15996918 = queryWeight, product of:
                1.4725599 = boost
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.015567117 = queryNorm
              1.0683432 = fieldWeight in 5045, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.0625 = fieldNorm(doc=5045)
          0.10497287 = weight(abstract_txt:texts in 5045) [ClassicSimilarity], result of:
            0.10497287 = score(doc=5045,freq=2.0), product of:
              0.21004228 = queryWeight, product of:
                2.3862898 = boost
                5.6542544 = idf(docFreq=420, maxDocs=44218)
                0.015567117 = queryNorm
              0.4997702 = fieldWeight in 5045, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.6542544 = idf(docFreq=420, maxDocs=44218)
                0.0625 = fieldNorm(doc=5045)
          0.08638495 = weight(abstract_txt:frequency in 5045) [ClassicSimilarity], result of:
            0.08638495 = score(doc=5045,freq=1.0), product of:
              0.23239355 = queryWeight, product of:
                2.510047 = boost
                5.947494 = idf(docFreq=313, maxDocs=44218)
                0.015567117 = queryNorm
              0.37171838 = fieldWeight in 5045, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.947494 = idf(docFreq=313, maxDocs=44218)
                0.0625 = fieldNorm(doc=5045)
          0.27019006 = weight(abstract_txt:weights in 5045) [ClassicSimilarity], result of:
            0.27019006 = score(doc=5045,freq=3.0), product of:
              0.3446179 = queryWeight, product of:
                3.056601 = boost
                7.24254 = idf(docFreq=85, maxDocs=44218)
                0.015567117 = queryNorm
              0.78402793 = fieldWeight in 5045, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                7.24254 = idf(docFreq=85, maxDocs=44218)
                0.0625 = fieldNorm(doc=5045)
        0.36 = coord(9/25)
    
  2. Dang, E.K.F.; Luk, R.W.P.; Allan, J.; Ho, K.S.; Chung, K.F.L.; Lee, D.L.: ¬A new context-dependent term weight computed by boost and discount using relevance information (2010) 0.21
    0.21198036 = sum of:
      0.21198036 = product of:
        0.7570727 = sum of:
          0.06011817 = weight(abstract_txt:term in 4120) [ClassicSimilarity], result of:
            0.06011817 = score(doc=4120,freq=7.0), product of:
              0.07572277 = queryWeight, product of:
                1.0131367 = boost
                4.8012047 = idf(docFreq=987, maxDocs=44218)
                0.015567117 = queryNorm
              0.79392457 = fieldWeight in 4120, product of:
                2.6457512 = tf(freq=7.0), with freq of:
                  7.0 = termFreq=7.0
                4.8012047 = idf(docFreq=987, maxDocs=44218)
                0.0625 = fieldNorm(doc=4120)
          0.008239608 = weight(abstract_txt:information in 4120) [ClassicSimilarity], result of:
            0.008239608 = score(doc=4120,freq=2.0), product of:
              0.038505822 = queryWeight, product of:
                1.0217227 = boost
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.015567117 = queryNorm
              0.21398345 = fieldWeight in 4120, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.0625 = fieldNorm(doc=4120)
          0.025911178 = weight(abstract_txt:compared in 4120) [ClassicSimilarity], result of:
            0.025911178 = score(doc=4120,freq=1.0), product of:
              0.08265075 = queryWeight, product of:
                1.0584692 = boost
                5.0160327 = idf(docFreq=796, maxDocs=44218)
                0.015567117 = queryNorm
              0.31350204 = fieldWeight in 4120, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0160327 = idf(docFreq=796, maxDocs=44218)
                0.0625 = fieldNorm(doc=4120)
          0.029647931 = weight(abstract_txt:according in 4120) [ClassicSimilarity], result of:
            0.029647931 = score(doc=4120,freq=1.0), product of:
              0.09041732 = queryWeight, product of:
                1.1070842 = boost
                5.2464166 = idf(docFreq=632, maxDocs=44218)
                0.015567117 = queryNorm
              0.32790104 = fieldWeight in 4120, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.2464166 = idf(docFreq=632, maxDocs=44218)
                0.0625 = fieldNorm(doc=4120)
          0.06977044 = weight(abstract_txt:weighting in 4120) [ClassicSimilarity], result of:
            0.06977044 = score(doc=4120,freq=1.0), product of:
              0.15996918 = queryWeight, product of:
                1.4725599 = boost
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.015567117 = queryNorm
              0.43614927 = fieldWeight in 4120, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.0625 = fieldNorm(doc=4120)
          0.122166775 = weight(abstract_txt:frequency in 4120) [ClassicSimilarity], result of:
            0.122166775 = score(doc=4120,freq=2.0), product of:
              0.23239355 = queryWeight, product of:
                2.510047 = boost
                5.947494 = idf(docFreq=313, maxDocs=44218)
                0.015567117 = queryNorm
              0.5256892 = fieldWeight in 4120, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.947494 = idf(docFreq=313, maxDocs=44218)
                0.0625 = fieldNorm(doc=4120)
          0.44121855 = weight(abstract_txt:weights in 4120) [ClassicSimilarity], result of:
            0.44121855 = score(doc=4120,freq=8.0), product of:
              0.3446179 = queryWeight, product of:
                3.056601 = boost
                7.24254 = idf(docFreq=85, maxDocs=44218)
                0.015567117 = queryNorm
              1.2803123 = fieldWeight in 4120, product of:
                2.828427 = tf(freq=8.0), with freq of:
                  8.0 = termFreq=8.0
                7.24254 = idf(docFreq=85, maxDocs=44218)
                0.0625 = fieldNorm(doc=4120)
        0.28 = coord(7/25)
    
  3. Wong, S.K.M.; Yao, Y.Y.: ¬An information-theoretic measure of term specifics (1992) 0.19
    0.18893224 = sum of:
      0.18893224 = product of:
        0.5904133 = sum of:
          0.011587742 = weight(abstract_txt:that in 4807) [ClassicSimilarity], result of:
            0.011587742 = score(doc=4807,freq=2.0), product of:
              0.0368859 = queryWeight, product of:
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.015567117 = queryNorm
              0.314151 = fieldWeight in 4807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.09375 = fieldNorm(doc=4807)
          0.08348791 = weight(abstract_txt:term in 4807) [ClassicSimilarity], result of:
            0.08348791 = score(doc=4807,freq=6.0), product of:
              0.07572277 = queryWeight, product of:
                1.0131367 = boost
                4.8012047 = idf(docFreq=987, maxDocs=44218)
                0.015567117 = queryNorm
              1.102547 = fieldWeight in 4807, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                4.8012047 = idf(docFreq=987, maxDocs=44218)
                0.09375 = fieldNorm(doc=4807)
          0.012359413 = weight(abstract_txt:information in 4807) [ClassicSimilarity], result of:
            0.012359413 = score(doc=4807,freq=2.0), product of:
              0.038505822 = queryWeight, product of:
                1.0217227 = boost
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.015567117 = queryNorm
              0.32097518 = fieldWeight in 4807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.09375 = fieldNorm(doc=4807)
          0.04110952 = weight(abstract_txt:shows in 4807) [ClassicSimilarity], result of:
            0.04110952 = score(doc=4807,freq=1.0), product of:
              0.08580043 = queryWeight, product of:
                1.0784489 = boost
                5.1107154 = idf(docFreq=724, maxDocs=44218)
                0.015567117 = queryNorm
              0.47912955 = fieldWeight in 4807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.1107154 = idf(docFreq=724, maxDocs=44218)
                0.09375 = fieldNorm(doc=4807)
          0.0521812 = weight(abstract_txt:schemes in 4807) [ClassicSimilarity], result of:
            0.0521812 = score(doc=4807,freq=1.0), product of:
              0.10058593 = queryWeight, product of:
                1.167679 = boost
                5.533572 = idf(docFreq=474, maxDocs=44218)
                0.015567117 = queryNorm
              0.51877236 = fieldWeight in 4807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.533572 = idf(docFreq=474, maxDocs=44218)
                0.09375 = fieldNorm(doc=4807)
          0.058431864 = weight(abstract_txt:derived in 4807) [ClassicSimilarity], result of:
            0.058431864 = score(doc=4807,freq=1.0), product of:
              0.10846617 = queryWeight, product of:
                1.2125565 = boost
                5.746245 = idf(docFreq=383, maxDocs=44218)
                0.015567117 = queryNorm
              0.5387105 = fieldWeight in 4807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.746245 = idf(docFreq=383, maxDocs=44218)
                0.09375 = fieldNorm(doc=4807)
          0.14800544 = weight(abstract_txt:weighting in 4807) [ClassicSimilarity], result of:
            0.14800544 = score(doc=4807,freq=2.0), product of:
              0.15996918 = queryWeight, product of:
                1.4725599 = boost
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.015567117 = queryNorm
              0.92521226 = fieldWeight in 4807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.09375 = fieldNorm(doc=4807)
          0.18325017 = weight(abstract_txt:frequency in 4807) [ClassicSimilarity], result of:
            0.18325017 = score(doc=4807,freq=2.0), product of:
              0.23239355 = queryWeight, product of:
                2.510047 = boost
                5.947494 = idf(docFreq=313, maxDocs=44218)
                0.015567117 = queryNorm
              0.7885338 = fieldWeight in 4807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.947494 = idf(docFreq=313, maxDocs=44218)
                0.09375 = fieldNorm(doc=4807)
        0.32 = coord(8/25)
    
  4. Mengle, S.; Goharian, N.: Passage detection using text classification (2009) 0.18
    0.17868058 = sum of:
      0.17868058 = product of:
        0.89340293 = sum of:
          0.009559399 = weight(abstract_txt:that in 2765) [ClassicSimilarity], result of:
            0.009559399 = score(doc=2765,freq=4.0), product of:
              0.0368859 = queryWeight, product of:
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.015567117 = queryNorm
              0.25916135 = fieldWeight in 2765, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.0546875 = fieldNorm(doc=2765)
          0.00882999 = weight(abstract_txt:information in 2765) [ClassicSimilarity], result of:
            0.00882999 = score(doc=2765,freq=3.0), product of:
              0.038505822 = queryWeight, product of:
                1.0217227 = boost
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.015567117 = queryNorm
              0.22931573 = fieldWeight in 2765, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.0546875 = fieldNorm(doc=2765)
          0.043191224 = weight(abstract_txt:categories in 2765) [ClassicSimilarity], result of:
            0.043191224 = score(doc=2765,freq=3.0), product of:
              0.088065654 = queryWeight, product of:
                1.0925922 = boost
                5.17774 = idf(docFreq=677, maxDocs=44218)
                0.015567117 = queryNorm
              0.49044347 = fieldWeight in 2765, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.17774 = idf(docFreq=677, maxDocs=44218)
                0.0546875 = fieldNorm(doc=2765)
          0.0643448 = weight(abstract_txt:detect in 2765) [ClassicSimilarity], result of:
            0.0643448 = score(doc=2765,freq=1.0), product of:
              0.16567576 = queryWeight, product of:
                1.4985951 = boost
                7.1017675 = idf(docFreq=98, maxDocs=44218)
                0.015567117 = queryNorm
              0.3883779 = fieldWeight in 2765, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.1017675 = idf(docFreq=98, maxDocs=44218)
                0.0546875 = fieldNorm(doc=2765)
          0.7674775 = weight(abstract_txt:passages in 2765) [ClassicSimilarity], result of:
            0.7674775 = score(doc=2765,freq=14.0), product of:
              0.45212764 = queryWeight, product of:
                3.5010664 = boost
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.015567117 = queryNorm
              1.6974797 = fieldWeight in 2765, product of:
                3.7416575 = tf(freq=14.0), with freq of:
                  14.0 = termFreq=14.0
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.0546875 = fieldNorm(doc=2765)
        0.2 = coord(5/25)
    
  5. Stamatatos, E.: Plagiarism detection using stopword n-grams (2011) 0.16
    0.16188113 = sum of:
      0.16188113 = product of:
        0.67450476 = sum of:
          0.011826688 = weight(abstract_txt:that in 4955) [ClassicSimilarity], result of:
            0.011826688 = score(doc=4955,freq=3.0), product of:
              0.0368859 = queryWeight, product of:
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.015567117 = queryNorm
              0.320629 = fieldWeight in 4955, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.078125 = fieldNorm(doc=4955)
          0.0072828536 = weight(abstract_txt:information in 4955) [ClassicSimilarity], result of:
            0.0072828536 = score(doc=4955,freq=1.0), product of:
              0.038505822 = queryWeight, product of:
                1.0217227 = boost
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.015567117 = queryNorm
              0.18913643 = fieldWeight in 4955, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.4209464 = idf(docFreq=10677, maxDocs=44218)
                0.078125 = fieldNorm(doc=4955)
          0.032388974 = weight(abstract_txt:compared in 4955) [ClassicSimilarity], result of:
            0.032388974 = score(doc=4955,freq=1.0), product of:
              0.08265075 = queryWeight, product of:
                1.0584692 = boost
                5.0160327 = idf(docFreq=796, maxDocs=44218)
                0.015567117 = queryNorm
              0.39187756 = fieldWeight in 4955, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0160327 = idf(docFreq=796, maxDocs=44218)
                0.078125 = fieldNorm(doc=4955)
          0.09192115 = weight(abstract_txt:detect in 4955) [ClassicSimilarity], result of:
            0.09192115 = score(doc=4955,freq=1.0), product of:
              0.16567576 = queryWeight, product of:
                1.4985951 = boost
                7.1017675 = idf(docFreq=98, maxDocs=44218)
                0.015567117 = queryNorm
              0.5548256 = fieldWeight in 4955, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.1017675 = idf(docFreq=98, maxDocs=44218)
                0.078125 = fieldNorm(doc=4955)
          0.11668622 = weight(abstract_txt:detecting in 4955) [ClassicSimilarity], result of:
            0.11668622 = score(doc=4955,freq=1.0), product of:
              0.1942354 = queryWeight, product of:
                1.6226282 = boost
                7.689554 = idf(docFreq=54, maxDocs=44218)
                0.015567117 = queryNorm
              0.6007464 = fieldWeight in 4955, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.689554 = idf(docFreq=54, maxDocs=44218)
                0.078125 = fieldNorm(doc=4955)
          0.41439888 = weight(abstract_txt:passages in 4955) [ClassicSimilarity], result of:
            0.41439888 = score(doc=4955,freq=2.0), product of:
              0.45212764 = queryWeight, product of:
                3.5010664 = boost
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.015567117 = queryNorm
              0.91655284 = fieldWeight in 4955, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.078125 = fieldNorm(doc=4955)
        0.24 = coord(6/25)