Document (#5815)

Author
Damerau, F.J.
Title
Generating an evaluating domain-oriented multi-word terms from texts
Source
Information processing and management. 29(1993) no.4, S.433-447
Year
1993
Abstract
Examines techniques for automatically generating domain vocabularies from large text collections. Focuses on the problem of generating multi-word vocabulary terms (specifically pairs). Discusses statistical issues associated with word co-occurrences likely to be of use in a natural language interface. Provides a more objective evaluation of the selection procedures. As substantial experimentation with subjects using a working query system is absent, all evaluation is necessarily subjective. Uses surrogate for experimentation by relying on pre-existing dictionaries as indicators of domain relevance
Theme
Automatisches Indexieren

Similar documents (content)

  1. Spiteri, L.F.: Word association testing and thesaurus construction : a pilot study (2005) 0.21
    0.21032625 = sum of:
      0.21032625 = product of:
        0.8763594 = sum of:
          0.06820323 = weight(abstract_txt:indicators in 5216) [ClassicSimilarity], result of:
            0.06820323 = score(doc=5216,freq=1.0), product of:
              0.120706886 = queryWeight, product of:
                1.0866745 = boost
                6.027006 = idf(docFreq=289, maxDocs=44218)
                0.018430239 = queryNorm
              0.5650318 = fieldWeight in 5216, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.027006 = idf(docFreq=289, maxDocs=44218)
                0.09375 = fieldNorm(doc=5216)
          0.09791388 = weight(abstract_txt:pairs in 5216) [ClassicSimilarity], result of:
            0.09791388 = score(doc=5216,freq=1.0), product of:
              0.15361193 = queryWeight, product of:
                1.2258742 = boost
                6.7990475 = idf(docFreq=133, maxDocs=44218)
                0.018430239 = queryNorm
              0.6374107 = fieldWeight in 5216, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7990475 = idf(docFreq=133, maxDocs=44218)
                0.09375 = fieldNorm(doc=5216)
          0.058268726 = weight(abstract_txt:terms in 5216) [ClassicSimilarity], result of:
            0.058268726 = score(doc=5216,freq=2.0), product of:
              0.108680695 = queryWeight, product of:
                1.4582254 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.018430239 = queryNorm
              0.53614604 = fieldWeight in 5216, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.09375 = fieldNorm(doc=5216)
          0.09925307 = weight(abstract_txt:domain in 5216) [ClassicSimilarity], result of:
            0.09925307 = score(doc=5216,freq=1.0), product of:
              0.22356226 = queryWeight, product of:
                2.5614939 = boost
                4.7355914 = idf(docFreq=1054, maxDocs=44218)
                0.018430239 = queryNorm
              0.44396168 = fieldWeight in 5216, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.7355914 = idf(docFreq=1054, maxDocs=44218)
                0.09375 = fieldNorm(doc=5216)
          0.2599414 = weight(abstract_txt:word in 5216) [ClassicSimilarity], result of:
            0.2599414 = score(doc=5216,freq=3.0), product of:
              0.29451838 = queryWeight, product of:
                2.9400198 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.018430239 = queryNorm
              0.8825982 = fieldWeight in 5216, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.09375 = fieldNorm(doc=5216)
          0.2927791 = weight(abstract_txt:generating in 5216) [ClassicSimilarity], result of:
            0.2927791 = score(doc=5216,freq=1.0), product of:
              0.4598285 = queryWeight, product of:
                3.6736012 = boost
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.018430239 = queryNorm
              0.6367137 = fieldWeight in 5216, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.09375 = fieldNorm(doc=5216)
        0.24 = coord(6/25)
    
  2. Yang, C.C.; Li, K.W.: Automatic construction of English/Chinese parallel corpora (2003) 0.12
    0.11992354 = sum of:
      0.11992354 = product of:
        0.4996814 = sum of:
          0.0310044 = weight(abstract_txt:statistical in 1683) [ClassicSimilarity], result of:
            0.0310044 = score(doc=1683,freq=1.0), product of:
              0.10221935 = queryWeight, product of:
                5.5462847 = idf(docFreq=468, maxDocs=44218)
                0.018430239 = queryNorm
              0.30331245 = fieldWeight in 1683, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5462847 = idf(docFreq=468, maxDocs=44218)
                0.0546875 = fieldNorm(doc=1683)
          0.034812402 = weight(abstract_txt:objective in 1683) [ClassicSimilarity], result of:
            0.034812402 = score(doc=1683,freq=1.0), product of:
              0.11042656 = queryWeight, product of:
                1.0393701 = boost
                5.7646422 = idf(docFreq=376, maxDocs=44218)
                0.018430239 = queryNorm
              0.31525388 = fieldWeight in 1683, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.7646422 = idf(docFreq=376, maxDocs=44218)
                0.0546875 = fieldNorm(doc=1683)
          0.080774836 = weight(abstract_txt:pairs in 1683) [ClassicSimilarity], result of:
            0.080774836 = score(doc=1683,freq=2.0), product of:
              0.15361193 = queryWeight, product of:
                1.2258742 = boost
                6.7990475 = idf(docFreq=133, maxDocs=44218)
                0.018430239 = queryNorm
              0.525837 = fieldWeight in 1683, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.7990475 = idf(docFreq=133, maxDocs=44218)
                0.0546875 = fieldNorm(doc=1683)
          0.1195777 = weight(abstract_txt:dictionaries in 1683) [ClassicSimilarity], result of:
            0.1195777 = score(doc=1683,freq=3.0), product of:
              0.1743053 = queryWeight, product of:
                1.3058363 = boost
                7.24254 = idf(docFreq=85, maxDocs=44218)
                0.018430239 = queryNorm
              0.6860244 = fieldWeight in 1683, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                7.24254 = idf(docFreq=85, maxDocs=44218)
                0.0546875 = fieldNorm(doc=1683)
          0.08187961 = weight(abstract_txt:domain in 1683) [ClassicSimilarity], result of:
            0.08187961 = score(doc=1683,freq=2.0), product of:
              0.22356226 = queryWeight, product of:
                2.5614939 = boost
                4.7355914 = idf(docFreq=1054, maxDocs=44218)
                0.018430239 = queryNorm
              0.3662497 = fieldWeight in 1683, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.7355914 = idf(docFreq=1054, maxDocs=44218)
                0.0546875 = fieldNorm(doc=1683)
          0.15163247 = weight(abstract_txt:word in 1683) [ClassicSimilarity], result of:
            0.15163247 = score(doc=1683,freq=3.0), product of:
              0.29451838 = queryWeight, product of:
                2.9400198 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.018430239 = queryNorm
              0.51484895 = fieldWeight in 1683, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.0546875 = fieldNorm(doc=1683)
        0.24 = coord(6/25)
    
  3. Huo, W.: Automatic multi-word term extraction and its application to Web-page summarization (2012) 0.12
    0.11990477 = sum of:
      0.11990477 = product of:
        0.74940485 = sum of:
          0.068670355 = weight(abstract_txt:terms in 563) [ClassicSimilarity], result of:
            0.068670355 = score(doc=563,freq=4.0), product of:
              0.108680695 = queryWeight, product of:
                1.4582254 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.018430239 = queryNorm
              0.6318542 = fieldWeight in 563, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.078125 = fieldNorm(doc=563)
          0.26713437 = weight(abstract_txt:multi in 563) [ClassicSimilarity], result of:
            0.26713437 = score(doc=563,freq=6.0), product of:
              0.23483473 = queryWeight, product of:
                2.1435301 = boost
                5.9443145 = idf(docFreq=314, maxDocs=44218)
                0.018430239 = queryNorm
              1.137542 = fieldWeight in 563, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                5.9443145 = idf(docFreq=314, maxDocs=44218)
                0.078125 = fieldNorm(doc=563)
          0.0827109 = weight(abstract_txt:domain in 563) [ClassicSimilarity], result of:
            0.0827109 = score(doc=563,freq=1.0), product of:
              0.22356226 = queryWeight, product of:
                2.5614939 = boost
                4.7355914 = idf(docFreq=1054, maxDocs=44218)
                0.018430239 = queryNorm
              0.3699681 = fieldWeight in 563, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.7355914 = idf(docFreq=1054, maxDocs=44218)
                0.078125 = fieldNorm(doc=563)
          0.33088923 = weight(abstract_txt:word in 563) [ClassicSimilarity], result of:
            0.33088923 = score(doc=563,freq=7.0), product of:
              0.29451838 = queryWeight, product of:
                2.9400198 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.018430239 = queryNorm
              1.1234926 = fieldWeight in 563, product of:
                2.6457512 = tf(freq=7.0), with freq of:
                  7.0 = termFreq=7.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.078125 = fieldNorm(doc=563)
        0.16 = coord(4/25)
    
  4. Stoykova, V.; Petkova, E.: Automatic extraction of mathematical terms for precalculus (2012) 0.11
    0.11494071 = sum of:
      0.11494071 = product of:
        0.47891963 = sum of:
          0.053150404 = weight(abstract_txt:statistical in 156) [ClassicSimilarity], result of:
            0.053150404 = score(doc=156,freq=1.0), product of:
              0.10221935 = queryWeight, product of:
                5.5462847 = idf(docFreq=468, maxDocs=44218)
                0.018430239 = queryNorm
              0.5199642 = fieldWeight in 156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5462847 = idf(docFreq=468, maxDocs=44218)
                0.09375 = fieldNorm(doc=156)
          0.05454362 = weight(abstract_txt:oriented in 156) [ClassicSimilarity], result of:
            0.05454362 = score(doc=156,freq=1.0), product of:
              0.10399794 = queryWeight, product of:
                1.0086623 = boost
                5.5943284 = idf(docFreq=446, maxDocs=44218)
                0.018430239 = queryNorm
              0.5244683 = fieldWeight in 156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5943284 = idf(docFreq=446, maxDocs=44218)
                0.09375 = fieldNorm(doc=156)
          0.062290903 = weight(abstract_txt:evaluating in 156) [ClassicSimilarity], result of:
            0.062290903 = score(doc=156,freq=1.0), product of:
              0.113626204 = queryWeight, product of:
                1.0543206 = boost
                5.8475623 = idf(docFreq=346, maxDocs=44218)
                0.018430239 = queryNorm
              0.54820895 = fieldWeight in 156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8475623 = idf(docFreq=346, maxDocs=44218)
                0.09375 = fieldNorm(doc=156)
          0.12727721 = weight(abstract_txt:occurrences in 156) [ClassicSimilarity], result of:
            0.12727721 = score(doc=156,freq=1.0), product of:
              0.18296267 = queryWeight, product of:
                1.3378724 = boost
                7.4202213 = idf(docFreq=71, maxDocs=44218)
                0.018430239 = queryNorm
              0.69564575 = fieldWeight in 156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.4202213 = idf(docFreq=71, maxDocs=44218)
                0.09375 = fieldNorm(doc=156)
          0.08240442 = weight(abstract_txt:terms in 156) [ClassicSimilarity], result of:
            0.08240442 = score(doc=156,freq=4.0), product of:
              0.108680695 = queryWeight, product of:
                1.4582254 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.018430239 = queryNorm
              0.75822496 = fieldWeight in 156, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.09375 = fieldNorm(doc=156)
          0.09925307 = weight(abstract_txt:domain in 156) [ClassicSimilarity], result of:
            0.09925307 = score(doc=156,freq=1.0), product of:
              0.22356226 = queryWeight, product of:
                2.5614939 = boost
                4.7355914 = idf(docFreq=1054, maxDocs=44218)
                0.018430239 = queryNorm
              0.44396168 = fieldWeight in 156, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.7355914 = idf(docFreq=1054, maxDocs=44218)
                0.09375 = fieldNorm(doc=156)
        0.24 = coord(6/25)
    
  5. He, Q.: ¬A study of the strength indexes in co-word analysis (2000) 0.11
    0.113129735 = sum of:
      0.113129735 = product of:
        0.5656487 = sum of:
          0.06730483 = weight(abstract_txt:likely in 111) [ClassicSimilarity], result of:
            0.06730483 = score(doc=111,freq=2.0), product of:
              0.10723513 = queryWeight, product of:
                1.0242406 = boost
                5.68073 = idf(docFreq=409, maxDocs=44218)
                0.018430239 = queryNorm
              0.62763786 = fieldWeight in 111, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.68073 = idf(docFreq=409, maxDocs=44218)
                0.078125 = fieldNorm(doc=111)
          0.14132652 = weight(abstract_txt:pairs in 111) [ClassicSimilarity], result of:
            0.14132652 = score(doc=111,freq=3.0), product of:
              0.15361193 = queryWeight, product of:
                1.2258742 = boost
                6.7990475 = idf(docFreq=133, maxDocs=44218)
                0.018430239 = queryNorm
              0.9200231 = fieldWeight in 111, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.7990475 = idf(docFreq=133, maxDocs=44218)
                0.078125 = fieldNorm(doc=111)
          0.106064335 = weight(abstract_txt:occurrences in 111) [ClassicSimilarity], result of:
            0.106064335 = score(doc=111,freq=1.0), product of:
              0.18296267 = queryWeight, product of:
                1.3378724 = boost
                7.4202213 = idf(docFreq=71, maxDocs=44218)
                0.018430239 = queryNorm
              0.57970476 = fieldWeight in 111, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.4202213 = idf(docFreq=71, maxDocs=44218)
                0.078125 = fieldNorm(doc=111)
          0.034335177 = weight(abstract_txt:terms in 111) [ClassicSimilarity], result of:
            0.034335177 = score(doc=111,freq=1.0), product of:
              0.108680695 = queryWeight, product of:
                1.4582254 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.018430239 = queryNorm
              0.3159271 = fieldWeight in 111, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.078125 = fieldNorm(doc=111)
          0.21661784 = weight(abstract_txt:word in 111) [ClassicSimilarity], result of:
            0.21661784 = score(doc=111,freq=3.0), product of:
              0.29451838 = queryWeight, product of:
                2.9400198 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.018430239 = queryNorm
              0.73549855 = fieldWeight in 111, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.078125 = fieldNorm(doc=111)
        0.2 = coord(5/25)