Document (#5815)

Author
Damerau, F.J.
Title
Generating an evaluating domain-oriented multi-word terms from texts
Source
Information processing and management. 29(1993) no.4, S.433-447
Year
1993
Abstract
Examines techniques for automatically generating domain vocabularies from large text collections. Focuses on the problem of generating multi-word vocabulary terms (specifically pairs). Discusses statistical issues associated with word co-occurrences likely to be of use in a natural language interface. Provides a more objective evaluation of the selection procedures. As substantial experimentation with subjects using a working query system is absent, all evaluation is necessarily subjective. Uses surrogate for experimentation by relying on pre-existing dictionaries as indicators of domain relevance
Theme
Automatisches Indexieren

Similar documents (content)

  1. Spiteri, L.F.: Word association testing and thesaurus construction : a pilot study (2005) 0.21
    0.2119151 = sum of:
      0.2119151 = product of:
        0.88297963 = sum of:
          0.068792924 = weight(abstract_txt:indicators in 217) [ClassicSimilarity], result of:
            0.068792924 = score(doc=217,freq=1.0), product of:
              0.121271215 = queryWeight, product of:
                1.0880489 = boost
                6.0508275 = idf(docFreq=276, maxDocs=43254)
                0.018420208 = queryNorm
              0.5672651 = fieldWeight in 217, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.0508275 = idf(docFreq=276, maxDocs=43254)
                0.09375 = fieldNorm(doc=217)
          0.099310234 = weight(abstract_txt:pairs in 217) [ClassicSimilarity], result of:
            0.099310234 = score(doc=217,freq=1.0), product of:
              0.1549023 = queryWeight, product of:
                1.2296981 = boost
                6.838563 = idf(docFreq=125, maxDocs=43254)
                0.018420208 = queryNorm
              0.6411153 = fieldWeight in 217, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.838563 = idf(docFreq=125, maxDocs=43254)
                0.09375 = fieldNorm(doc=217)
          0.05869523 = weight(abstract_txt:terms in 217) [ClassicSimilarity], result of:
            0.05869523 = score(doc=217,freq=2.0), product of:
              0.10909305 = queryWeight, product of:
                1.4594294 = boost
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.018420208 = queryNorm
              0.5380291 = fieldWeight in 217, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.09375 = fieldNorm(doc=217)
          0.10047842 = weight(abstract_txt:domain in 217) [ClassicSimilarity], result of:
            0.10047842 = score(doc=217,freq=1.0), product of:
              0.22515632 = queryWeight, product of:
                2.5678654 = boost
                4.760114 = idf(docFreq=1006, maxDocs=43254)
                0.018420208 = queryNorm
              0.4462607 = fieldWeight in 217, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.760114 = idf(docFreq=1006, maxDocs=43254)
                0.09375 = fieldNorm(doc=217)
          0.25982568 = weight(abstract_txt:word in 217) [ClassicSimilarity], result of:
            0.25982568 = score(doc=217,freq=3.0), product of:
              0.2941146 = queryWeight, product of:
                2.9348674 = boost
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.018420208 = queryNorm
              0.8834165 = fieldWeight in 217, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.09375 = fieldNorm(doc=217)
          0.29587716 = weight(abstract_txt:generating in 217) [ClassicSimilarity], result of:
            0.29587716 = score(doc=217,freq=1.0), product of:
              0.46256906 = queryWeight, product of:
                3.6805987 = boost
                6.822815 = idf(docFreq=127, maxDocs=43254)
                0.018420208 = queryNorm
              0.6396389 = fieldWeight in 217, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.822815 = idf(docFreq=127, maxDocs=43254)
                0.09375 = fieldNorm(doc=217)
        0.24 = coord(6/25)
    
  2. Huo, W.: Automatic multi-word term extraction and its application to Web-page summarization (2012) 0.15
    0.15465128 = sum of:
      0.15465128 = product of:
        0.7732564 = sum of:
          0.019271607 = weight(abstract_txt:from in 2028) [ClassicSimilarity], result of:
            0.019271607 = score(doc=2028,freq=3.0), product of:
              0.05121898 = queryWeight, product of:
                2.7805862 = idf(docFreq=7289, maxDocs=43254)
                0.018420208 = queryNorm
              0.3762591 = fieldWeight in 2028, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                2.7805862 = idf(docFreq=7289, maxDocs=43254)
                0.078125 = fieldNorm(doc=2028)
          0.06917299 = weight(abstract_txt:terms in 2028) [ClassicSimilarity], result of:
            0.06917299 = score(doc=2028,freq=4.0), product of:
              0.10909305 = queryWeight, product of:
                1.4594294 = boost
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.018420208 = queryNorm
              0.6340733 = fieldWeight in 2028, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.078125 = fieldNorm(doc=2028)
          0.27033797 = weight(abstract_txt:multi in 2028) [ClassicSimilarity], result of:
            0.27033797 = score(doc=2028,freq=6.0), product of:
              0.23645417 = queryWeight, product of:
                2.1486123 = boost
                5.9744015 = idf(docFreq=298, maxDocs=43254)
                0.018420208 = queryNorm
              1.1432996 = fieldWeight in 2028, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                5.9744015 = idf(docFreq=298, maxDocs=43254)
                0.078125 = fieldNorm(doc=2028)
          0.08373202 = weight(abstract_txt:domain in 2028) [ClassicSimilarity], result of:
            0.08373202 = score(doc=2028,freq=1.0), product of:
              0.22515632 = queryWeight, product of:
                2.5678654 = boost
                4.760114 = idf(docFreq=1006, maxDocs=43254)
                0.018420208 = queryNorm
              0.37188393 = fieldWeight in 2028, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.760114 = idf(docFreq=1006, maxDocs=43254)
                0.078125 = fieldNorm(doc=2028)
          0.33074188 = weight(abstract_txt:word in 2028) [ClassicSimilarity], result of:
            0.33074188 = score(doc=2028,freq=7.0), product of:
              0.2941146 = queryWeight, product of:
                2.9348674 = boost
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.018420208 = queryNorm
              1.1245341 = fieldWeight in 2028, product of:
                2.6457512 = tf(freq=7.0), with freq of:
                  7.0 = termFreq=7.0
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.078125 = fieldNorm(doc=2028)
        0.2 = coord(5/25)
    
  3. He, Q.: ¬A study of the strength indexes in co-word analysis (2000) 0.14
    0.13905805 = sum of:
      0.13905805 = product of:
        0.5794086 = sum of:
          0.011126468 = weight(abstract_txt:from in 2112) [ClassicSimilarity], result of:
            0.011126468 = score(doc=2112,freq=1.0), product of:
              0.05121898 = queryWeight, product of:
                2.7805862 = idf(docFreq=7289, maxDocs=43254)
                0.018420208 = queryNorm
              0.2172333 = fieldWeight in 2112, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.7805862 = idf(docFreq=7289, maxDocs=43254)
                0.078125 = fieldNorm(doc=2112)
          0.06845342 = weight(abstract_txt:likely in 2112) [ClassicSimilarity], result of:
            0.06845342 = score(doc=2112,freq=2.0), product of:
              0.108335175 = queryWeight, product of:
                1.0283816 = boost
                5.7190075 = idf(docFreq=385, maxDocs=43254)
                0.018420208 = queryNorm
              0.63186705 = fieldWeight in 2112, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.7190075 = idf(docFreq=385, maxDocs=43254)
                0.078125 = fieldNorm(doc=2112)
          0.14334196 = weight(abstract_txt:pairs in 2112) [ClassicSimilarity], result of:
            0.14334196 = score(doc=2112,freq=3.0), product of:
              0.1549023 = queryWeight, product of:
                1.2296981 = boost
                6.838563 = idf(docFreq=125, maxDocs=43254)
                0.018420208 = queryNorm
              0.92537016 = fieldWeight in 2112, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.838563 = idf(docFreq=125, maxDocs=43254)
                0.078125 = fieldNorm(doc=2112)
          0.105378844 = weight(abstract_txt:occurrences in 2112) [ClassicSimilarity], result of:
            0.105378844 = score(doc=2112,freq=1.0), product of:
              0.18197776 = queryWeight, product of:
                1.3328421 = boost
                7.412165 = idf(docFreq=70, maxDocs=43254)
                0.018420208 = queryNorm
              0.5790754 = fieldWeight in 2112, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.412165 = idf(docFreq=70, maxDocs=43254)
                0.078125 = fieldNorm(doc=2112)
          0.034586497 = weight(abstract_txt:terms in 2112) [ClassicSimilarity], result of:
            0.034586497 = score(doc=2112,freq=1.0), product of:
              0.10909305 = queryWeight, product of:
                1.4594294 = boost
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.018420208 = queryNorm
              0.31703666 = fieldWeight in 2112, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.078125 = fieldNorm(doc=2112)
          0.2165214 = weight(abstract_txt:word in 2112) [ClassicSimilarity], result of:
            0.2165214 = score(doc=2112,freq=3.0), product of:
              0.2941146 = queryWeight, product of:
                2.9348674 = boost
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.018420208 = queryNorm
              0.7361804 = fieldWeight in 2112, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.078125 = fieldNorm(doc=2112)
        0.24 = coord(6/25)
    
  4. Tomov, D.T.: Some critical remarks on the stop word lists of ISI publications (2001) 0.12
    0.11855456 = sum of:
      0.11855456 = product of:
        0.49397737 = sum of:
          0.012588161 = weight(abstract_txt:from in 479) [ClassicSimilarity], result of:
            0.012588161 = score(doc=479,freq=2.0), product of:
              0.05121898 = queryWeight, product of:
                2.7805862 = idf(docFreq=7289, maxDocs=43254)
                0.018420208 = queryNorm
              0.24577142 = fieldWeight in 479, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.7805862 = idf(docFreq=7289, maxDocs=43254)
                0.0625 = fieldNorm(doc=479)
          0.036167968 = weight(abstract_txt:oriented in 479) [ClassicSimilarity], result of:
            0.036167968 = score(doc=479,freq=1.0), product of:
              0.10351551 = queryWeight, product of:
                1.0052458 = boost
                5.5903454 = idf(docFreq=438, maxDocs=43254)
                0.018420208 = queryNorm
              0.3493966 = fieldWeight in 479, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5903454 = idf(docFreq=438, maxDocs=43254)
                0.0625 = fieldNorm(doc=479)
          0.079086214 = weight(abstract_txt:dictionaries in 479) [ClassicSimilarity], result of:
            0.079086214 = score(doc=479,freq=1.0), product of:
              0.17439066 = queryWeight, product of:
                1.3047616 = boost
                7.2560043 = idf(docFreq=82, maxDocs=43254)
                0.018420208 = queryNorm
              0.45350027 = fieldWeight in 479, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.2560043 = idf(docFreq=82, maxDocs=43254)
                0.0625 = fieldNorm(doc=479)
          0.03913015 = weight(abstract_txt:terms in 479) [ClassicSimilarity], result of:
            0.03913015 = score(doc=479,freq=2.0), product of:
              0.10909305 = queryWeight, product of:
                1.4594294 = boost
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.018420208 = queryNorm
              0.35868603 = fieldWeight in 479, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.0625 = fieldNorm(doc=479)
          0.126991 = weight(abstract_txt:absent in 479) [ClassicSimilarity], result of:
            0.126991 = score(doc=479,freq=1.0), product of:
              0.23913218 = queryWeight, product of:
                1.5278776 = boost
                8.496791 = idf(docFreq=23, maxDocs=43254)
                0.018420208 = queryNorm
              0.53104943 = fieldWeight in 479, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.496791 = idf(docFreq=23, maxDocs=43254)
                0.0625 = fieldNorm(doc=479)
          0.20001389 = weight(abstract_txt:word in 479) [ClassicSimilarity], result of:
            0.20001389 = score(doc=479,freq=4.0), product of:
              0.2941146 = queryWeight, product of:
                2.9348674 = boost
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.018420208 = queryNorm
              0.6800543 = fieldWeight in 479, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.0625 = fieldNorm(doc=479)
        0.24 = coord(6/25)
    
  5. Justeson, J.S.; Katz, S.M.: Technical terminology : some linguistic properties and an algorithm for identification in text (1995) 0.12
    0.11780623 = sum of:
      0.11780623 = product of:
        0.49085933 = sum of:
          0.011126468 = weight(abstract_txt:from in 2220) [ClassicSimilarity], result of:
            0.011126468 = score(doc=2220,freq=1.0), product of:
              0.05121898 = queryWeight, product of:
                2.7805862 = idf(docFreq=7289, maxDocs=43254)
                0.018420208 = queryNorm
              0.2172333 = fieldWeight in 2220, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.7805862 = idf(docFreq=7289, maxDocs=43254)
                0.078125 = fieldNorm(doc=2220)
          0.0759079 = weight(abstract_txt:substantial in 2220) [ClassicSimilarity], result of:
            0.0759079 = score(doc=2220,freq=1.0), product of:
              0.14623141 = queryWeight, product of:
                1.1947854 = boost
                6.6444073 = idf(docFreq=152, maxDocs=43254)
                0.018420208 = queryNorm
              0.51909435 = fieldWeight in 2220, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.6444073 = idf(docFreq=152, maxDocs=43254)
                0.078125 = fieldNorm(doc=2220)
          0.08471927 = weight(abstract_txt:terms in 2220) [ClassicSimilarity], result of:
            0.08471927 = score(doc=2220,freq=6.0), product of:
              0.10909305 = queryWeight, product of:
                1.4594294 = boost
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.018420208 = queryNorm
              0.77657807 = fieldWeight in 2220, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                4.058069 = idf(docFreq=2031, maxDocs=43254)
                0.078125 = fieldNorm(doc=2220)
          0.11036501 = weight(abstract_txt:multi in 2220) [ClassicSimilarity], result of:
            0.11036501 = score(doc=2220,freq=1.0), product of:
              0.23645417 = queryWeight, product of:
                2.1486123 = boost
                5.9744015 = idf(docFreq=298, maxDocs=43254)
                0.018420208 = queryNorm
              0.46675012 = fieldWeight in 2220, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.9744015 = idf(docFreq=298, maxDocs=43254)
                0.078125 = fieldNorm(doc=2220)
          0.08373202 = weight(abstract_txt:domain in 2220) [ClassicSimilarity], result of:
            0.08373202 = score(doc=2220,freq=1.0), product of:
              0.22515632 = queryWeight, product of:
                2.5678654 = boost
                4.760114 = idf(docFreq=1006, maxDocs=43254)
                0.018420208 = queryNorm
              0.37188393 = fieldWeight in 2220, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.760114 = idf(docFreq=1006, maxDocs=43254)
                0.078125 = fieldNorm(doc=2220)
          0.12500867 = weight(abstract_txt:word in 2220) [ClassicSimilarity], result of:
            0.12500867 = score(doc=2220,freq=1.0), product of:
              0.2941146 = queryWeight, product of:
                2.9348674 = boost
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.018420208 = queryNorm
              0.42503393 = fieldWeight in 2220, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.4404345 = idf(docFreq=509, maxDocs=43254)
                0.078125 = fieldNorm(doc=2220)
        0.24 = coord(6/25)