Document (#6628)

Author
Can, F.
Title
Incremental clustering for dynamic information processing
Source
ACM transactions on information systems. 11(1993) no.2, S.143-164
Year
1993
Abstract
Clustering of very large document databases is useful for both searching and browsing. The periodic updating of clusters is required due to the dynamic nature of databases. Introduces an algorithm for incremental clustering and discusses the complexity and cost of analysis of the algorithm together with an investigation of its expected behaviour. Shows through empirical testing that the algortihm achieves cost effectiveness and generates statistically valid clusters that are compatible with those of reclustering. The experimental evidence shows that the algorithm creates an effective and effecient retrieval environment
Theme
Automatisches Indexieren
Retrievalalgorithmen

Similar documents (content)

  1. Cathey, R.J.; Jensen, E.C.; Beitzel, S.M.; Frieder, O.; Grossman, D.: Exploiting parallelism to support scalable hierarchical clustering (2007) 0.20
    0.1984057 = sum of:
      0.1984057 = product of:
        0.82669044 = sum of:
          0.042110056 = weight(abstract_txt:complexity in 448) [ClassicSimilarity], result of:
            0.042110056 = score(doc=448,freq=1.0), product of:
              0.11394449 = queryWeight, product of:
                1.0848101 = boost
                5.913062 = idf(docFreq=324, maxDocs=44218)
                0.017763443 = queryNorm
              0.36956638 = fieldWeight in 448, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.913062 = idf(docFreq=324, maxDocs=44218)
                0.0625 = fieldNorm(doc=448)
          0.04714831 = weight(abstract_txt:expected in 448) [ClassicSimilarity], result of:
            0.04714831 = score(doc=448,freq=1.0), product of:
              0.122860864 = queryWeight, product of:
                1.1264551 = boost
                6.140059 = idf(docFreq=258, maxDocs=44218)
                0.017763443 = queryNorm
              0.3837537 = fieldWeight in 448, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.140059 = idf(docFreq=258, maxDocs=44218)
                0.0625 = fieldNorm(doc=448)
          0.011495853 = weight(abstract_txt:that in 448) [ClassicSimilarity], result of:
            0.011495853 = score(doc=448,freq=2.0), product of:
              0.0548901 = queryWeight, product of:
                1.3041115 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.017763443 = queryNorm
              0.20943399 = fieldWeight in 448, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.0625 = fieldNorm(doc=448)
          0.11265294 = weight(abstract_txt:clusters in 448) [ClassicSimilarity], result of:
            0.11265294 = score(doc=448,freq=1.0), product of:
              0.2766567 = queryWeight, product of:
                2.3905215 = boost
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.017763443 = queryNorm
              0.407194 = fieldWeight in 448, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.0625 = fieldNorm(doc=448)
          0.25375712 = weight(abstract_txt:algorithm in 448) [ClassicSimilarity], result of:
            0.25375712 = score(doc=448,freq=5.0), product of:
              0.3182478 = queryWeight, product of:
                3.14015 = boost
                5.705423 = idf(docFreq=399, maxDocs=44218)
                0.017763443 = queryNorm
              0.7973571 = fieldWeight in 448, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                5.705423 = idf(docFreq=399, maxDocs=44218)
                0.0625 = fieldNorm(doc=448)
          0.3595262 = weight(abstract_txt:clustering in 448) [ClassicSimilarity], result of:
            0.3595262 = score(doc=448,freq=6.0), product of:
              0.37778655 = queryWeight, product of:
                3.4212985 = boost
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.017763443 = queryNorm
              0.95166487 = fieldWeight in 448, product of:
                2.4494898 = tf(freq=6.0), with freq of:
                  6.0 = termFreq=6.0
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.0625 = fieldNorm(doc=448)
        0.24 = coord(6/25)
    
  2. Zamir, O.; Etzioni, O.: Grouper : a dynamic clustering interface to Web search results (1999) 0.16
    0.16141239 = sum of:
      0.16141239 = product of:
        0.8070619 = sum of:
          0.044205736 = weight(abstract_txt:browsing in 6207) [ClassicSimilarity], result of:
            0.044205736 = score(doc=6207,freq=1.0), product of:
              0.10142583 = queryWeight, product of:
                1.0234847 = boost
                5.57879 = idf(docFreq=453, maxDocs=44218)
                0.017763443 = queryNorm
              0.435843 = fieldWeight in 6207, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.57879 = idf(docFreq=453, maxDocs=44218)
                0.078125 = fieldNorm(doc=6207)
          0.010160995 = weight(abstract_txt:that in 6207) [ClassicSimilarity], result of:
            0.010160995 = score(doc=6207,freq=1.0), product of:
              0.0548901 = queryWeight, product of:
                1.3041115 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.017763443 = queryNorm
              0.18511525 = fieldWeight in 6207, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.078125 = fieldNorm(doc=6207)
          0.24390078 = weight(abstract_txt:clusters in 6207) [ClassicSimilarity], result of:
            0.24390078 = score(doc=6207,freq=3.0), product of:
              0.2766567 = queryWeight, product of:
                2.3905215 = boost
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.017763443 = queryNorm
              0.88160086 = fieldWeight in 6207, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.078125 = fieldNorm(doc=6207)
          0.14185455 = weight(abstract_txt:algorithm in 6207) [ClassicSimilarity], result of:
            0.14185455 = score(doc=6207,freq=1.0), product of:
              0.3182478 = queryWeight, product of:
                3.14015 = boost
                5.705423 = idf(docFreq=399, maxDocs=44218)
                0.017763443 = queryNorm
              0.44573617 = fieldWeight in 6207, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.705423 = idf(docFreq=399, maxDocs=44218)
                0.078125 = fieldNorm(doc=6207)
          0.36693984 = weight(abstract_txt:clustering in 6207) [ClassicSimilarity], result of:
            0.36693984 = score(doc=6207,freq=4.0), product of:
              0.37778655 = queryWeight, product of:
                3.4212985 = boost
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.017763443 = queryNorm
              0.9712888 = fieldWeight in 6207, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.078125 = fieldNorm(doc=6207)
        0.2 = coord(5/25)
    
  3. Kishida, K.: High-speed rough clustering for very large document collections (2010) 0.15
    0.15023018 = sum of:
      0.15023018 = product of:
        0.7511509 = sum of:
          0.042110056 = weight(abstract_txt:complexity in 3463) [ClassicSimilarity], result of:
            0.042110056 = score(doc=3463,freq=1.0), product of:
              0.11394449 = queryWeight, product of:
                1.0848101 = boost
                5.913062 = idf(docFreq=324, maxDocs=44218)
                0.017763443 = queryNorm
              0.36956638 = fieldWeight in 3463, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.913062 = idf(docFreq=324, maxDocs=44218)
                0.0625 = fieldNorm(doc=3463)
          0.011495853 = weight(abstract_txt:that in 3463) [ClassicSimilarity], result of:
            0.011495853 = score(doc=3463,freq=2.0), product of:
              0.0548901 = queryWeight, product of:
                1.3041115 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.017763443 = queryNorm
              0.20943399 = fieldWeight in 3463, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.0625 = fieldNorm(doc=3463)
          0.11265294 = weight(abstract_txt:clusters in 3463) [ClassicSimilarity], result of:
            0.11265294 = score(doc=3463,freq=1.0), product of:
              0.2766567 = queryWeight, product of:
                2.3905215 = boost
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.017763443 = queryNorm
              0.407194 = fieldWeight in 3463, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.0625 = fieldNorm(doc=3463)
          0.19655943 = weight(abstract_txt:algorithm in 3463) [ClassicSimilarity], result of:
            0.19655943 = score(doc=3463,freq=3.0), product of:
              0.3182478 = queryWeight, product of:
                3.14015 = boost
                5.705423 = idf(docFreq=399, maxDocs=44218)
                0.017763443 = queryNorm
              0.6176301 = fieldWeight in 3463, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                5.705423 = idf(docFreq=399, maxDocs=44218)
                0.0625 = fieldNorm(doc=3463)
          0.38833264 = weight(abstract_txt:clustering in 3463) [ClassicSimilarity], result of:
            0.38833264 = score(doc=3463,freq=7.0), product of:
              0.37778655 = queryWeight, product of:
                3.4212985 = boost
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.017763443 = queryNorm
              1.0279155 = fieldWeight in 3463, product of:
                2.6457512 = tf(freq=7.0), with freq of:
                  7.0 = termFreq=7.0
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.0625 = fieldNorm(doc=3463)
        0.2 = coord(5/25)
    
  4. Kostoff, R.N.; Block, J.A.: Factor matrix text filtering and clustering (2005) 0.14
    0.14219277 = sum of:
      0.14219277 = product of:
        0.71096385 = sum of:
          0.017599357 = weight(abstract_txt:that in 3683) [ClassicSimilarity], result of:
            0.017599357 = score(doc=3683,freq=3.0), product of:
              0.0548901 = queryWeight, product of:
                1.3041115 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.017763443 = queryNorm
              0.320629 = fieldWeight in 3683, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.078125 = fieldNorm(doc=3683)
          0.043753944 = weight(abstract_txt:databases in 3683) [ClassicSimilarity], result of:
            0.043753944 = score(doc=3683,freq=1.0), product of:
              0.12691636 = queryWeight, product of:
                1.6191269 = boost
                4.4127526 = idf(docFreq=1456, maxDocs=44218)
                0.017763443 = queryNorm
              0.3447463 = fieldWeight in 3683, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.4127526 = idf(docFreq=1456, maxDocs=44218)
                0.078125 = fieldNorm(doc=3683)
          0.14081618 = weight(abstract_txt:clusters in 3683) [ClassicSimilarity], result of:
            0.14081618 = score(doc=3683,freq=1.0), product of:
              0.2766567 = queryWeight, product of:
                2.3905215 = boost
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.017763443 = queryNorm
              0.5089925 = fieldWeight in 3683, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.078125 = fieldNorm(doc=3683)
          0.14185455 = weight(abstract_txt:algorithm in 3683) [ClassicSimilarity], result of:
            0.14185455 = score(doc=3683,freq=1.0), product of:
              0.3182478 = queryWeight, product of:
                3.14015 = boost
                5.705423 = idf(docFreq=399, maxDocs=44218)
                0.017763443 = queryNorm
              0.44573617 = fieldWeight in 3683, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.705423 = idf(docFreq=399, maxDocs=44218)
                0.078125 = fieldNorm(doc=3683)
          0.36693984 = weight(abstract_txt:clustering in 3683) [ClassicSimilarity], result of:
            0.36693984 = score(doc=3683,freq=4.0), product of:
              0.37778655 = queryWeight, product of:
                3.4212985 = boost
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.017763443 = queryNorm
              0.9712888 = fieldWeight in 3683, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.078125 = fieldNorm(doc=3683)
        0.2 = coord(5/25)
    
  5. Frants, V.I.; Kamenoff, N.I.; Shapiro, J.: ¬One approach to classification of users and automatic clustering of documents (1993) 0.14
    0.13740629 = sum of:
      0.13740629 = product of:
        0.8587893 = sum of:
          0.016257592 = weight(abstract_txt:that in 4569) [ClassicSimilarity], result of:
            0.016257592 = score(doc=4569,freq=1.0), product of:
              0.0548901 = queryWeight, product of:
                1.3041115 = boost
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.017763443 = queryNorm
              0.2961844 = fieldWeight in 4569, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                2.3694751 = idf(docFreq=11241, maxDocs=44218)
                0.125 = fieldNorm(doc=4569)
          0.10875608 = weight(abstract_txt:shows in 4569) [ClassicSimilarity], result of:
            0.10875608 = score(doc=4569,freq=1.0), product of:
              0.17024009 = queryWeight, product of:
                1.8752234 = boost
                5.1107154 = idf(docFreq=724, maxDocs=44218)
                0.017763443 = queryNorm
              0.6388394 = fieldWeight in 4569, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.1107154 = idf(docFreq=724, maxDocs=44218)
                0.125 = fieldNorm(doc=4569)
          0.31863064 = weight(abstract_txt:clusters in 4569) [ClassicSimilarity], result of:
            0.31863064 = score(doc=4569,freq=2.0), product of:
              0.2766567 = queryWeight, product of:
                2.3905215 = boost
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.017763443 = queryNorm
              1.1517185 = fieldWeight in 4569, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.515104 = idf(docFreq=177, maxDocs=44218)
                0.125 = fieldNorm(doc=4569)
          0.41514504 = weight(abstract_txt:clustering in 4569) [ClassicSimilarity], result of:
            0.41514504 = score(doc=4569,freq=2.0), product of:
              0.37778655 = queryWeight, product of:
                3.4212985 = boost
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.017763443 = queryNorm
              1.0988878 = fieldWeight in 4569, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.2162485 = idf(docFreq=239, maxDocs=44218)
                0.125 = fieldNorm(doc=4569)
        0.16 = coord(4/25)