Document (#21568)

Author
Jianchao, X.
Ming, H.
Milin, S.
Title
On indexing descriptors for document archive
Source
Journal of the China Society for Scientific and Technical Information. 17(1998) no.4, S.263-265
Year
1998
Abstract
Describes a method of indexing the descriptors of the full text of document archives. Explains how the method organizes the thesaurus of descriptors, and mixes both keyword and index terms from the thesaurus. Presents a procedure for weighting descriptors and discusses the technical issues involved
Footnote
[In Chinesisch]

Similar documents (content)

  1. Ferber, R.: Automated indexing with thesaurus descriptors : a co-occurence based approach to multilingual retrieval (1997) 0.34
    0.3393886 = sum of:
      0.3393886 = product of:
        1.1636181 = sum of:
          0.0077869315 = weight(abstract_txt:from in 4144) [ClassicSimilarity], result of:
            0.0077869315 = score(doc=4144,freq=2.0), product of:
              0.031875115 = queryWeight, product of:
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.01153273 = queryNorm
              0.24429502 = fieldWeight in 4144, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.0625 = fieldNorm(doc=4144)
          0.024389226 = weight(abstract_txt:terms in 4144) [ClassicSimilarity], result of:
            0.024389226 = score(doc=4144,freq=2.0), product of:
              0.06823484 = queryWeight, product of:
                1.4631108 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.01153273 = queryNorm
              0.3574307 = fieldWeight in 4144, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.0625 = fieldNorm(doc=4144)
          0.08862534 = weight(abstract_txt:weighting in 4144) [ClassicSimilarity], result of:
            0.08862534 = score(doc=4144,freq=1.0), product of:
              0.20319957 = queryWeight, product of:
                2.5248497 = boost
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.01153273 = queryNorm
              0.43614927 = fieldWeight in 4144, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.0625 = fieldNorm(doc=4144)
          0.05834469 = weight(abstract_txt:document in 4144) [ClassicSimilarity], result of:
            0.05834469 = score(doc=4144,freq=2.0), product of:
              0.15377475 = queryWeight, product of:
                3.106216 = boost
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.01153273 = queryNorm
              0.37941656 = fieldWeight in 4144, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.0625 = fieldNorm(doc=4144)
          0.085841395 = weight(abstract_txt:indexing in 4144) [ClassicSimilarity], result of:
            0.085841395 = score(doc=4144,freq=4.0), product of:
              0.15788421 = queryWeight, product of:
                3.1474473 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.01153273 = queryNorm
              0.54369843 = fieldWeight in 4144, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.0625 = fieldNorm(doc=4144)
          0.14381962 = weight(abstract_txt:thesaurus in 4144) [ClassicSimilarity], result of:
            0.14381962 = score(doc=4144,freq=4.0), product of:
              0.22271678 = queryWeight, product of:
                3.7382264 = boost
                5.1660094 = idf(docFreq=685, maxDocs=44218)
                0.01153273 = queryNorm
              0.6457512 = fieldWeight in 4144, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                5.1660094 = idf(docFreq=685, maxDocs=44218)
                0.0625 = fieldNorm(doc=4144)
          0.75481087 = weight(abstract_txt:descriptors in 4144) [ClassicSimilarity], result of:
            0.75481087 = score(doc=4144,freq=6.0), product of:
              0.7403081 = queryWeight, product of:
                9.63852 = boost
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.01153273 = queryNorm
              1.0195901 = fieldWeight in 4144, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.0625 = fieldNorm(doc=4144)
        0.29166666 = coord(7/24)
    
  2. Loosjes, T.P.; Tichelaar, P.A.; Goossens, J.; Stuurman, P.: Ontsluiting op onderwerp (1977) 0.34
    0.33654165 = sum of:
      0.33654165 = product of:
        1.009625 = sum of:
          0.00688274 = weight(abstract_txt:from in 910) [ClassicSimilarity], result of:
            0.00688274 = score(doc=910,freq=1.0), product of:
              0.031875115 = queryWeight, product of:
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.01153273 = queryNorm
              0.21592833 = fieldWeight in 910, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.078125 = fieldNorm(doc=910)
          0.021557234 = weight(abstract_txt:text in 910) [ClassicSimilarity], result of:
            0.021557234 = score(doc=910,freq=1.0), product of:
              0.06823484 = queryWeight, product of:
                1.4631108 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.01153273 = queryNorm
              0.3159271 = fieldWeight in 910, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.078125 = fieldNorm(doc=910)
          0.03491366 = weight(abstract_txt:index in 910) [ClassicSimilarity], result of:
            0.03491366 = score(doc=910,freq=1.0), product of:
              0.094103925 = queryWeight, product of:
                1.7182169 = boost
                4.74895 = idf(docFreq=1040, maxDocs=44218)
                0.01153273 = queryNorm
              0.37101173 = fieldWeight in 910, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.74895 = idf(docFreq=1040, maxDocs=44218)
                0.078125 = fieldNorm(doc=910)
          0.038886864 = weight(abstract_txt:full in 910) [ClassicSimilarity], result of:
            0.038886864 = score(doc=910,freq=1.0), product of:
              0.10111435 = queryWeight, product of:
                1.7810681 = boost
                4.922663 = idf(docFreq=874, maxDocs=44218)
                0.01153273 = queryNorm
              0.38458306 = fieldWeight in 910, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.922663 = idf(docFreq=874, maxDocs=44218)
                0.078125 = fieldNorm(doc=910)
          0.05365087 = weight(abstract_txt:indexing in 910) [ClassicSimilarity], result of:
            0.05365087 = score(doc=910,freq=1.0), product of:
              0.15788421 = queryWeight, product of:
                3.1474473 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.01153273 = queryNorm
              0.3398115 = fieldWeight in 910, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.078125 = fieldNorm(doc=910)
          0.059449073 = weight(abstract_txt:method in 910) [ClassicSimilarity], result of:
            0.059449073 = score(doc=910,freq=1.0), product of:
              0.1690639 = queryWeight, product of:
                3.2569761 = boost
                4.50095 = idf(docFreq=1333, maxDocs=44218)
                0.01153273 = queryNorm
              0.3516367 = fieldWeight in 910, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.50095 = idf(docFreq=1333, maxDocs=44218)
                0.078125 = fieldNorm(doc=910)
          0.12711978 = weight(abstract_txt:thesaurus in 910) [ClassicSimilarity], result of:
            0.12711978 = score(doc=910,freq=2.0), product of:
              0.22271678 = queryWeight, product of:
                3.7382264 = boost
                5.1660094 = idf(docFreq=685, maxDocs=44218)
                0.01153273 = queryNorm
              0.5707688 = fieldWeight in 910, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.1660094 = idf(docFreq=685, maxDocs=44218)
                0.078125 = fieldNorm(doc=910)
          0.6671648 = weight(abstract_txt:descriptors in 910) [ClassicSimilarity], result of:
            0.6671648 = score(doc=910,freq=3.0), product of:
              0.7403081 = queryWeight, product of:
                9.63852 = boost
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.01153273 = queryNorm
              0.90119886 = fieldWeight in 910, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.078125 = fieldNorm(doc=910)
        0.33333334 = coord(8/24)
    
  3. Lu, K.; Mao, J.; Li, G.: Toward effective automated weighted subject indexing : a comparison of different approaches in different environments (2018) 0.30
    0.2988014 = sum of:
      0.2988014 = product of:
        0.89640415 = sum of:
          0.0055061923 = weight(abstract_txt:from in 4292) [ClassicSimilarity], result of:
            0.0055061923 = score(doc=4292,freq=1.0), product of:
              0.031875115 = queryWeight, product of:
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.01153273 = queryNorm
              0.17274266 = fieldWeight in 4292, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.0625 = fieldNorm(doc=4292)
          0.024389226 = weight(abstract_txt:text in 4292) [ClassicSimilarity], result of:
            0.024389226 = score(doc=4292,freq=2.0), product of:
              0.06823484 = queryWeight, product of:
                1.4631108 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.01153273 = queryNorm
              0.3574307 = fieldWeight in 4292, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.0625 = fieldNorm(doc=4292)
          0.043995466 = weight(abstract_txt:full in 4292) [ClassicSimilarity], result of:
            0.043995466 = score(doc=4292,freq=2.0), product of:
              0.10111435 = queryWeight, product of:
                1.7810681 = boost
                4.922663 = idf(docFreq=874, maxDocs=44218)
                0.01153273 = queryNorm
              0.43510607 = fieldWeight in 4292, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.922663 = idf(docFreq=874, maxDocs=44218)
                0.0625 = fieldNorm(doc=4292)
          0.17725068 = weight(abstract_txt:weighting in 4292) [ClassicSimilarity], result of:
            0.17725068 = score(doc=4292,freq=4.0), product of:
              0.20319957 = queryWeight, product of:
                2.5248497 = boost
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.01153273 = queryNorm
              0.87229854 = fieldWeight in 4292, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                6.9783883 = idf(docFreq=111, maxDocs=44218)
                0.0625 = fieldNorm(doc=4292)
          0.041255925 = weight(abstract_txt:document in 4292) [ClassicSimilarity], result of:
            0.041255925 = score(doc=4292,freq=1.0), product of:
              0.15377475 = queryWeight, product of:
                3.106216 = boost
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.01153273 = queryNorm
              0.26828802 = fieldWeight in 4292, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.0625 = fieldNorm(doc=4292)
          0.085841395 = weight(abstract_txt:indexing in 4292) [ClassicSimilarity], result of:
            0.085841395 = score(doc=4292,freq=4.0), product of:
              0.15788421 = queryWeight, product of:
                3.1474473 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.01153273 = queryNorm
              0.54369843 = fieldWeight in 4292, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.0625 = fieldNorm(doc=4292)
          0.08237505 = weight(abstract_txt:method in 4292) [ClassicSimilarity], result of:
            0.08237505 = score(doc=4292,freq=3.0), product of:
              0.1690639 = queryWeight, product of:
                3.2569761 = boost
                4.50095 = idf(docFreq=1333, maxDocs=44218)
                0.01153273 = queryNorm
              0.4872421 = fieldWeight in 4292, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.50095 = idf(docFreq=1333, maxDocs=44218)
                0.0625 = fieldNorm(doc=4292)
          0.4357902 = weight(abstract_txt:descriptors in 4292) [ClassicSimilarity], result of:
            0.4357902 = score(doc=4292,freq=2.0), product of:
              0.7403081 = queryWeight, product of:
                9.63852 = boost
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.01153273 = queryNorm
              0.5886606 = fieldWeight in 4292, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.0625 = fieldNorm(doc=4292)
        0.33333334 = coord(8/24)
    
  4. Fagan, J.L.: ¬The effectiveness of a nonsyntactic approach to automatic phrase indexing for document retrieval (1989) 0.30
    0.2976519 = sum of:
      0.2976519 = product of:
        1.0205208 = sum of:
          0.0055061923 = weight(abstract_txt:from in 1845) [ClassicSimilarity], result of:
            0.0055061923 = score(doc=1845,freq=1.0), product of:
              0.031875115 = queryWeight, product of:
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.01153273 = queryNorm
              0.17274266 = fieldWeight in 1845, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.0625 = fieldNorm(doc=1845)
          0.024389226 = weight(abstract_txt:text in 1845) [ClassicSimilarity], result of:
            0.024389226 = score(doc=1845,freq=2.0), product of:
              0.06823484 = queryWeight, product of:
                1.4631108 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.01153273 = queryNorm
              0.3574307 = fieldWeight in 1845, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.0625 = fieldNorm(doc=1845)
          0.12897354 = weight(abstract_txt:procedure in 1845) [ClassicSimilarity], result of:
            0.12897354 = score(doc=1845,freq=3.0), product of:
              0.18093011 = queryWeight, product of:
                2.3824816 = boost
                6.5848994 = idf(docFreq=165, maxDocs=44218)
                0.01153273 = queryNorm
              0.71283627 = fieldWeight in 1845, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.5848994 = idf(docFreq=165, maxDocs=44218)
                0.0625 = fieldNorm(doc=1845)
          0.092251055 = weight(abstract_txt:document in 1845) [ClassicSimilarity], result of:
            0.092251055 = score(doc=1845,freq=5.0), product of:
              0.15377475 = queryWeight, product of:
                3.106216 = boost
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.01153273 = queryNorm
              0.59991026 = fieldWeight in 1845, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.0625 = fieldNorm(doc=1845)
          0.085841395 = weight(abstract_txt:indexing in 1845) [ClassicSimilarity], result of:
            0.085841395 = score(doc=1845,freq=4.0), product of:
              0.15788421 = queryWeight, product of:
                3.1474473 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.01153273 = queryNorm
              0.54369843 = fieldWeight in 1845, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.0625 = fieldNorm(doc=1845)
          0.06725895 = weight(abstract_txt:method in 1845) [ClassicSimilarity], result of:
            0.06725895 = score(doc=1845,freq=2.0), product of:
              0.1690639 = queryWeight, product of:
                3.2569761 = boost
                4.50095 = idf(docFreq=1333, maxDocs=44218)
                0.01153273 = queryNorm
              0.3978315 = fieldWeight in 1845, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.50095 = idf(docFreq=1333, maxDocs=44218)
                0.0625 = fieldNorm(doc=1845)
          0.61630046 = weight(abstract_txt:descriptors in 1845) [ClassicSimilarity], result of:
            0.61630046 = score(doc=1845,freq=4.0), product of:
              0.7403081 = queryWeight, product of:
                9.63852 = boost
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.01153273 = queryNorm
              0.8324918 = fieldWeight in 1845, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.0625 = fieldNorm(doc=1845)
        0.29166666 = coord(7/24)
    
  5. Gopinath, M.A.: Descriptors and their role in information retrieval (1993) 0.26
    0.2583227 = sum of:
      0.2583227 = product of:
        1.239949 = sum of:
          0.011012385 = weight(abstract_txt:from in 7802) [ClassicSimilarity], result of:
            0.011012385 = score(doc=7802,freq=1.0), product of:
              0.031875115 = queryWeight, product of:
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.01153273 = queryNorm
              0.34548533 = fieldWeight in 7802, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.7638826 = idf(docFreq=7577, maxDocs=44218)
                0.125 = fieldNorm(doc=7802)
          0.032096125 = weight(abstract_txt:discusses in 7802) [ClassicSimilarity], result of:
            0.032096125 = score(doc=7802,freq=1.0), product of:
              0.06503781 = queryWeight, product of:
                1.4284239 = boost
                3.947996 = idf(docFreq=2318, maxDocs=44218)
                0.01153273 = queryNorm
              0.4934995 = fieldWeight in 7802, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.947996 = idf(docFreq=2318, maxDocs=44218)
                0.125 = fieldNorm(doc=7802)
          0.034491573 = weight(abstract_txt:text in 7802) [ClassicSimilarity], result of:
            0.034491573 = score(doc=7802,freq=1.0), product of:
              0.06823484 = queryWeight, product of:
                1.4631108 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.01153273 = queryNorm
              0.5054833 = fieldWeight in 7802, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.125 = fieldNorm(doc=7802)
          0.094885245 = weight(abstract_txt:explains in 7802) [ClassicSimilarity], result of:
            0.094885245 = score(doc=7802,freq=1.0), product of:
              0.13396662 = queryWeight, product of:
                2.0500877 = boost
                5.666202 = idf(docFreq=415, maxDocs=44218)
                0.01153273 = queryNorm
              0.70827526 = fieldWeight in 7802, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.666202 = idf(docFreq=415, maxDocs=44218)
                0.125 = fieldNorm(doc=7802)
          1.0674636 = weight(abstract_txt:descriptors in 7802) [ClassicSimilarity], result of:
            1.0674636 = score(doc=7802,freq=3.0), product of:
              0.7403081 = queryWeight, product of:
                9.63852 = boost
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.01153273 = queryNorm
              1.4419181 = fieldWeight in 7802, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.6599345 = idf(docFreq=153, maxDocs=44218)
                0.125 = fieldNorm(doc=7802)
        0.20833333 = coord(5/24)