Document (#25051)

Author
Froissart, C.
Lallich-Boidin, G.
Title
Towards structuring of indexing vocabulary for large technical documents
Source
Structures and relations in knowledge organization: Proceedings of the 5th International ISKO-Conference, Lille, 25.-29.8.1998. Ed.: W. Mustafa el Hadi et al
Imprint
Würzburg : Ergon
Year
1998
Pages
S.285-292
Series
Advances in knowledge organization; vol.6
Abstract
This paper deals with indexing of large textual and structured documents. We limit our area to technical documents like maintenance and users manuals. This firstly implies, that the document describes a closed world, and then that they are used by experts in this area. We suggest a methodology to extract the indexing vocabulary from the text with linguistic and numeric tools and then to structure the vocabulary, as a thesaurus might. We aim at assisting the user in order that he retrieves quickly the only text passages he needs
Theme
Dokumentenmanagement

Similar documents (content)

  1. Theory of subject analysis : A sourcebook (1985) 0.16
    0.1623415 = sum of:
      0.1623415 = product of:
        0.4509486 = sum of:
          0.055682387 = weight(abstract_txt:structuring in 3622) [ClassicSimilarity], result of:
            0.055682387 = score(doc=3622,freq=2.0), product of:
              0.18213235 = queryWeight, product of:
                1.1587467 = boost
                6.9177637 = idf(docFreq=118, maxDocs=44218)
                0.022721283 = queryNorm
              0.30572486 = fieldWeight in 3622, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.9177637 = idf(docFreq=118, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.017760184 = weight(abstract_txt:that in 3622) [ClassicSimilarity], result of:
            0.017760184 = score(doc=3622,freq=14.0), product of:
              0.064103425 = queryWeight, product of:
                1.1906831 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.022721283 = queryNorm
              0.27705514 = fieldWeight in 3622, product of:
                3.7416575 = tf(freq=14.0), with freq of:
                  14.0 = termFreq=14.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.012279593 = weight(abstract_txt:this in 3622) [ClassicSimilarity], result of:
            0.012279593 = score(doc=3622,freq=6.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.18470818 = fieldWeight in 3622, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.06789896 = weight(abstract_txt:passages in 3622) [ClassicSimilarity], result of:
            0.06789896 = score(doc=3622,freq=1.0), product of:
              0.26191515 = queryWeight, product of:
                1.3895535 = boost
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.022721283 = queryNorm
              0.2592403 = fieldWeight in 3622, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.042139452 = weight(abstract_txt:technical in 3622) [ClassicSimilarity], result of:
            0.042139452 = score(doc=3622,freq=2.0), product of:
              0.19056559 = queryWeight, product of:
                1.6762246 = boost
                5.0035634 = idf(docFreq=806, maxDocs=44218)
                0.022721283 = queryNorm
              0.22112834 = fieldWeight in 3622, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.0035634 = idf(docFreq=806, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.030224781 = weight(abstract_txt:area in 3622) [ClassicSimilarity], result of:
            0.030224781 = score(doc=3622,freq=1.0), product of:
              0.19238475 = queryWeight, product of:
                1.6842064 = boost
                5.027389 = idf(docFreq=787, maxDocs=44218)
                0.022721283 = queryNorm
              0.15710591 = fieldWeight in 3622, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.027389 = idf(docFreq=787, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.04326045 = weight(abstract_txt:documents in 3622) [ClassicSimilarity], result of:
            0.04326045 = score(doc=3622,freq=3.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.22307204 = fieldWeight in 3622, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.071919516 = weight(abstract_txt:indexing in 3622) [ClassicSimilarity], result of:
            0.071919516 = score(doc=3622,freq=6.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.33294594 = fieldWeight in 3622, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
          0.1097833 = weight(abstract_txt:vocabulary in 3622) [ClassicSimilarity], result of:
            0.1097833 = score(doc=3622,freq=4.0), product of:
              0.32781574 = queryWeight, product of:
                2.6925917 = boost
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.022721283 = queryNorm
              0.33489332 = fieldWeight in 3622, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.03125 = fieldNorm(doc=3622)
        0.36 = coord(9/25)
    
  2. Dumais, S.T.: Latent semantic analysis (2003) 0.16
    0.16185302 = sum of:
      0.16185302 = product of:
        0.4495917 = sum of:
          0.014239824 = weight(abstract_txt:that in 2462) [ClassicSimilarity], result of:
            0.014239824 = score(doc=2462,freq=9.0), product of:
              0.064103425 = queryWeight, product of:
                1.1906831 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.022721283 = queryNorm
              0.22213829 = fieldWeight in 2462, product of:
                3.0 = tf(freq=9.0), with freq of:
                  9.0 = termFreq=9.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.044544633 = weight(abstract_txt:limit in 2462) [ClassicSimilarity], result of:
            0.044544633 = score(doc=2462,freq=1.0), product of:
              0.19774953 = queryWeight, product of:
                1.2074043 = boost
                7.208251 = idf(docFreq=88, maxDocs=44218)
                0.022721283 = queryNorm
              0.22525784 = fieldWeight in 2462, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.208251 = idf(docFreq=88, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.007089626 = weight(abstract_txt:this in 2462) [ClassicSimilarity], result of:
            0.007089626 = score(doc=2462,freq=2.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.106641315 = fieldWeight in 2462, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.03853024 = weight(abstract_txt:text in 2462) [ClassicSimilarity], result of:
            0.03853024 = score(doc=2462,freq=6.0), product of:
              0.12447418 = queryWeight, product of:
                1.3547202 = boost
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.022721283 = queryNorm
              0.30954406 = fieldWeight in 2462, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                4.0438666 = idf(docFreq=2106, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.06789896 = weight(abstract_txt:passages in 2462) [ClassicSimilarity], result of:
            0.06789896 = score(doc=2462,freq=1.0), product of:
              0.26191515 = queryWeight, product of:
                1.3895535 = boost
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.022721283 = queryNorm
              0.2592403 = fieldWeight in 2462, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.29569 = idf(docFreq=29, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.04699995 = weight(abstract_txt:large in 2462) [ClassicSimilarity], result of:
            0.04699995 = score(doc=2462,freq=5.0), product of:
              0.15100922 = queryWeight, product of:
                1.4921473 = boost
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.022721283 = queryNorm
              0.31123894 = fieldWeight in 2462, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.07898243 = weight(abstract_txt:documents in 2462) [ClassicSimilarity], result of:
            0.07898243 = score(doc=2462,freq=10.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.40727198 = fieldWeight in 2462, product of:
                3.1622777 = tf(freq=10.0), with freq of:
                  10.0 = termFreq=10.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.041522752 = weight(abstract_txt:indexing in 2462) [ClassicSimilarity], result of:
            0.041522752 = score(doc=2462,freq=2.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.19222642 = fieldWeight in 2462, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
          0.1097833 = weight(abstract_txt:vocabulary in 2462) [ClassicSimilarity], result of:
            0.1097833 = score(doc=2462,freq=4.0), product of:
              0.32781574 = queryWeight, product of:
                2.6925917 = boost
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.022721283 = queryNorm
              0.33489332 = fieldWeight in 2462, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.03125 = fieldNorm(doc=2462)
        0.36 = coord(9/25)
    
  3. Mounier, E.; Paganelli, C.: Text structure and information retrieval in large documents (1998) 0.15
    0.14855348 = sum of:
      0.14855348 = product of:
        0.61897284 = sum of:
          0.10122721 = weight(abstract_txt:textual in 66) [ClassicSimilarity], result of:
            0.10122721 = score(doc=66,freq=1.0), product of:
              0.13564695 = queryWeight, product of:
                5.9700394 = idf(docFreq=306, maxDocs=44218)
                0.022721283 = queryNorm
              0.7462549 = fieldWeight in 66, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.9700394 = idf(docFreq=306, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.13931432 = weight(abstract_txt:deals in 66) [ClassicSimilarity], result of:
            0.13931432 = score(doc=66,freq=1.0), product of:
              0.16783237 = queryWeight, product of:
                1.1123279 = boost
                6.640641 = idf(docFreq=156, maxDocs=44218)
                0.022721283 = queryNorm
              0.83008015 = fieldWeight in 66, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.640641 = idf(docFreq=156, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.028358504 = weight(abstract_txt:this in 66) [ClassicSimilarity], result of:
            0.028358504 = score(doc=66,freq=2.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.42656526 = fieldWeight in 66, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.08407606 = weight(abstract_txt:large in 66) [ClassicSimilarity], result of:
            0.08407606 = score(doc=66,freq=1.0), product of:
              0.15100922 = queryWeight, product of:
                1.4921473 = boost
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.022721283 = queryNorm
              0.55676115 = fieldWeight in 66, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.099905744 = weight(abstract_txt:documents in 66) [ClassicSimilarity], result of:
            0.099905744 = score(doc=66,freq=1.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.5151628 = fieldWeight in 66, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
          0.16609101 = weight(abstract_txt:indexing in 66) [ClassicSimilarity], result of:
            0.16609101 = score(doc=66,freq=2.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.7689057 = fieldWeight in 66, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.125 = fieldNorm(doc=66)
        0.24 = coord(6/25)
    
  4. Anderson, J.D.: Guidelines for indexes and related information retrieval devices (1997) 0.15
    0.14747187 = sum of:
      0.14747187 = product of:
        0.61446613 = sum of:
          0.08707145 = weight(abstract_txt:deals in 3807) [ClassicSimilarity], result of:
            0.08707145 = score(doc=3807,freq=1.0), product of:
              0.16783237 = queryWeight, product of:
                1.1123279 = boost
                6.640641 = idf(docFreq=156, maxDocs=44218)
                0.022721283 = queryNorm
              0.5188001 = fieldWeight in 3807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.640641 = idf(docFreq=156, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.012532807 = weight(abstract_txt:this in 3807) [ClassicSimilarity], result of:
            0.012532807 = score(doc=3807,freq=1.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.18851699 = fieldWeight in 3807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.10534863 = weight(abstract_txt:technical in 3807) [ClassicSimilarity], result of:
            0.10534863 = score(doc=3807,freq=2.0), product of:
              0.19056559 = queryWeight, product of:
                1.6762246 = boost
                5.0035634 = idf(docFreq=806, maxDocs=44218)
                0.022721283 = queryNorm
              0.55282086 = fieldWeight in 3807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.0035634 = idf(docFreq=806, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.088305034 = weight(abstract_txt:documents in 3807) [ClassicSimilarity], result of:
            0.088305034 = score(doc=3807,freq=2.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.4553439 = fieldWeight in 3807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.12713693 = weight(abstract_txt:indexing in 3807) [ClassicSimilarity], result of:
            0.12713693 = score(doc=3807,freq=3.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.5885708 = fieldWeight in 3807, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
          0.1940713 = weight(abstract_txt:vocabulary in 3807) [ClassicSimilarity], result of:
            0.1940713 = score(doc=3807,freq=2.0), product of:
              0.32781574 = queryWeight, product of:
                2.6925917 = boost
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.022721283 = queryNorm
              0.59201336 = fieldWeight in 3807, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.078125 = fieldNorm(doc=3807)
        0.24 = coord(6/25)
    
  5. Wolfram, D.; Olson, H.A.; Bloom, R.: Measuring consistency for multiple taggers using vector space modeling (2009) 0.15
    0.14627464 = sum of:
      0.14627464 = product of:
        0.52240944 = sum of:
          0.020553418 = weight(abstract_txt:that in 3113) [ClassicSimilarity], result of:
            0.020553418 = score(doc=3113,freq=3.0), product of:
              0.064103425 = queryWeight, product of:
                1.1906831 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.022721283 = queryNorm
              0.320629 = fieldWeight in 3113, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.012532807 = weight(abstract_txt:this in 3113) [ClassicSimilarity], result of:
            0.012532807 = score(doc=3113,freq=1.0), product of:
              0.06648105 = queryWeight, product of:
                1.2125636 = boost
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.022721283 = queryNorm
              0.18851699 = fieldWeight in 3113, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.4130175 = idf(docFreq=10762, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.052547544 = weight(abstract_txt:large in 3113) [ClassicSimilarity], result of:
            0.052547544 = score(doc=3113,freq=1.0), product of:
              0.15100922 = queryWeight, product of:
                1.4921473 = boost
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.022721283 = queryNorm
              0.34797573 = fieldWeight in 3113, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.454089 = idf(docFreq=1397, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.106860735 = weight(abstract_txt:area in 3113) [ClassicSimilarity], result of:
            0.106860735 = score(doc=3113,freq=2.0), product of:
              0.19238475 = queryWeight, product of:
                1.6842064 = boost
                5.027389 = idf(docFreq=787, maxDocs=44218)
                0.022721283 = queryNorm
              0.55545324 = fieldWeight in 3113, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.027389 = idf(docFreq=787, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.062441092 = weight(abstract_txt:documents in 3113) [ClassicSimilarity], result of:
            0.062441092 = score(doc=3113,freq=1.0), product of:
              0.19393042 = queryWeight, product of:
                2.0709927 = boost
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.022721283 = queryNorm
              0.32197678 = fieldWeight in 3113, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.1213026 = idf(docFreq=1949, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.07340255 = weight(abstract_txt:indexing in 3113) [ClassicSimilarity], result of:
            0.07340255 = score(doc=3113,freq=1.0), product of:
              0.21600959 = queryWeight, product of:
                2.1857078 = boost
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.022721283 = queryNorm
              0.3398115 = fieldWeight in 3113, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.3495874 = idf(docFreq=1551, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
          0.1940713 = weight(abstract_txt:vocabulary in 3113) [ClassicSimilarity], result of:
            0.1940713 = score(doc=3113,freq=2.0), product of:
              0.32781574 = queryWeight, product of:
                2.6925917 = boost
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.022721283 = queryNorm
              0.59201336 = fieldWeight in 3113, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.358293 = idf(docFreq=565, maxDocs=44218)
                0.078125 = fieldNorm(doc=3113)
        0.28 = coord(7/25)