Document (#27400)

Author
Brückner, T.
Dambeck, H.
Title
Sortierautomaten : Grundlagen der Textklassifizierung
Source
c't. 2003, H.19, S.192-197
Year
2003
Abstract
Rechnung, Kündigung oder Adressänderung? Eingehende Briefe und E-Mails werden immer häufiger von Software statt aufwändig von Menschenhand sortiert. Die Textklassifizierer arbeiten erstaunlich genau. Sie fahnden auch nach ähnlichen Texten und sorgen so für einen schnellen Überblick. Ihre Werkzeuge sind Linguistik, Statistik und Logik
Content
Mehrere grafische Darstellungen
Theme
Automatisches Klassifizieren
Data Mining

Similar documents (content)

  1. Patzig, G.: Sprache und Logik (1981) 0.09
    0.09113541 = sum of:
      0.09113541 = product of:
        0.32548362 = sum of:
          0.010864423 = weight(abstract_txt:einen in 6345) [ClassicSimilarity], result of:
            0.010864423 = score(doc=6345,freq=1.0), product of:
              0.06474743 = queryWeight, product of:
                4.2956023 = idf(docFreq=1577, maxDocs=42596)
                0.015072958 = queryNorm
              0.16779697 = fieldWeight in 6345, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.2956023 = idf(docFreq=1577, maxDocs=42596)
                0.0390625 = fieldNorm(doc=6345)
          0.011464878 = weight(abstract_txt:nach in 6345) [ClassicSimilarity], result of:
            0.011464878 = score(doc=6345,freq=1.0), product of:
              0.067111626 = queryWeight, product of:
                1.0180933 = boost
                4.373324 = idf(docFreq=1459, maxDocs=42596)
                0.015072958 = queryNorm
              0.17083296 = fieldWeight in 6345, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.373324 = idf(docFreq=1459, maxDocs=42596)
                0.0390625 = fieldNorm(doc=6345)
          0.015749453 = weight(abstract_txt:ihre in 6345) [ClassicSimilarity], result of:
            0.015749453 = score(doc=6345,freq=1.0), product of:
              0.082933195 = queryWeight, product of:
                1.1317563 = boost
                4.861575 = idf(docFreq=895, maxDocs=42596)
                0.015072958 = queryNorm
              0.18990529 = fieldWeight in 6345, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.861575 = idf(docFreq=895, maxDocs=42596)
                0.0390625 = fieldNorm(doc=6345)
          0.017445276 = weight(abstract_txt:immer in 6345) [ClassicSimilarity], result of:
            0.017445276 = score(doc=6345,freq=1.0), product of:
              0.088784404 = queryWeight, product of:
                1.1710005 = boost
                5.0301523 = idf(docFreq=756, maxDocs=42596)
                0.015072958 = queryNorm
              0.19649032 = fieldWeight in 6345, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0301523 = idf(docFreq=756, maxDocs=42596)
                0.0390625 = fieldNorm(doc=6345)
          0.033793196 = weight(abstract_txt:grundlagen in 6345) [ClassicSimilarity], result of:
            0.033793196 = score(doc=6345,freq=1.0), product of:
              0.13796565 = queryWeight, product of:
                1.4597356 = boost
                6.270444 = idf(docFreq=218, maxDocs=42596)
                0.015072958 = queryNorm
              0.24493921 = fieldWeight in 6345, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.270444 = idf(docFreq=218, maxDocs=42596)
                0.0390625 = fieldNorm(doc=6345)
          0.16609424 = weight(abstract_txt:logik in 6345) [ClassicSimilarity], result of:
            0.16609424 = score(doc=6345,freq=7.0), product of:
              0.20849156 = queryWeight, product of:
                1.7944566 = boost
                7.708272 = idf(docFreq=51, maxDocs=42596)
                0.015072958 = queryNorm
              0.7966473 = fieldWeight in 6345, product of:
                2.6457512 = tf(freq=7.0), with freq of:
                  7.0 = termFreq=7.0
                7.708272 = idf(docFreq=51, maxDocs=42596)
                0.0390625 = fieldNorm(doc=6345)
          0.07007213 = weight(abstract_txt:sorgen in 6345) [ClassicSimilarity], result of:
            0.07007213 = score(doc=6345,freq=1.0), product of:
              0.22434427 = queryWeight, product of:
                1.8614279 = boost
                7.995954 = idf(docFreq=38, maxDocs=42596)
                0.015072958 = queryNorm
              0.31234196 = fieldWeight in 6345, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.995954 = idf(docFreq=38, maxDocs=42596)
                0.0390625 = fieldNorm(doc=6345)
        0.28 = coord(7/25)
    
  2. Heyer, G.; Quasthoff, U.; Wittig, T.: Text Mining : Wissensrohstoff Text. Konzepte, Algorithmen, Ergebnisse (2006) 0.09
    0.08535895 = sum of:
      0.08535895 = product of:
        0.3556623 = sum of:
          0.015364612 = weight(abstract_txt:einen in 219) [ClassicSimilarity], result of:
            0.015364612 = score(doc=219,freq=2.0), product of:
              0.06474743 = queryWeight, product of:
                4.2956023 = idf(docFreq=1577, maxDocs=42596)
                0.015072958 = queryNorm
              0.23730072 = fieldWeight in 219, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.2956023 = idf(docFreq=1577, maxDocs=42596)
                0.0390625 = fieldNorm(doc=219)
          0.03091874 = weight(abstract_txt:arbeiten in 219) [ClassicSimilarity], result of:
            0.03091874 = score(doc=219,freq=1.0), product of:
              0.13002673 = queryWeight, product of:
                1.4171149 = boost
                6.087362 = idf(docFreq=262, maxDocs=42596)
                0.015072958 = queryNorm
              0.23778757 = fieldWeight in 219, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.087362 = idf(docFreq=262, maxDocs=42596)
                0.0390625 = fieldNorm(doc=219)
          0.06758639 = weight(abstract_txt:grundlagen in 219) [ClassicSimilarity], result of:
            0.06758639 = score(doc=219,freq=4.0), product of:
              0.13796565 = queryWeight, product of:
                1.4597356 = boost
                6.270444 = idf(docFreq=218, maxDocs=42596)
                0.015072958 = queryNorm
              0.48987842 = fieldWeight in 219, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                6.270444 = idf(docFreq=218, maxDocs=42596)
                0.0390625 = fieldNorm(doc=219)
          0.07471265 = weight(abstract_txt:texten in 219) [ClassicSimilarity], result of:
            0.07471265 = score(doc=219,freq=2.0), product of:
              0.1858393 = queryWeight, product of:
                1.694172 = boost
                7.277489 = idf(docFreq=79, maxDocs=42596)
                0.015072958 = queryNorm
              0.40202826 = fieldWeight in 219, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                7.277489 = idf(docFreq=79, maxDocs=42596)
                0.0390625 = fieldNorm(doc=219)
          0.068765536 = weight(abstract_txt:linguistik in 219) [ClassicSimilarity], result of:
            0.068765536 = score(doc=219,freq=1.0), product of:
              0.22154672 = queryWeight, product of:
                1.8497856 = boost
                7.9459434 = idf(docFreq=40, maxDocs=42596)
                0.015072958 = queryNorm
              0.31038842 = fieldWeight in 219, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.9459434 = idf(docFreq=40, maxDocs=42596)
                0.0390625 = fieldNorm(doc=219)
          0.09831434 = weight(abstract_txt:statistik in 219) [ClassicSimilarity], result of:
            0.09831434 = score(doc=219,freq=1.0), product of:
              0.28116593 = queryWeight, product of:
                2.0838673 = boost
                8.951466 = idf(docFreq=14, maxDocs=42596)
                0.015072958 = queryNorm
              0.34966663 = fieldWeight in 219, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.951466 = idf(docFreq=14, maxDocs=42596)
                0.0390625 = fieldNorm(doc=219)
        0.24 = coord(6/25)
    
  3. Lanvent, A.: Licht im Daten Chaos (2004) 0.08
    0.08097257 = sum of:
      0.08097257 = product of:
        0.6747714 = sum of:
          0.05503142 = weight(abstract_txt:nach in 3807) [ClassicSimilarity], result of:
            0.05503142 = score(doc=3807,freq=1.0), product of:
              0.067111626 = queryWeight, product of:
                1.0180933 = boost
                4.373324 = idf(docFreq=1459, maxDocs=42596)
                0.015072958 = queryNorm
              0.81999826 = fieldWeight in 3807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.373324 = idf(docFreq=1459, maxDocs=42596)
                0.1875 = fieldNorm(doc=3807)
          0.25358316 = weight(abstract_txt:texten in 3807) [ClassicSimilarity], result of:
            0.25358316 = score(doc=3807,freq=1.0), product of:
              0.1858393 = queryWeight, product of:
                1.694172 = boost
                7.277489 = idf(docFreq=79, maxDocs=42596)
                0.015072958 = queryNorm
              1.3645293 = fieldWeight in 3807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.277489 = idf(docFreq=79, maxDocs=42596)
                0.1875 = fieldNorm(doc=3807)
          0.36615685 = weight(abstract_txt:mails in 3807) [ClassicSimilarity], result of:
            0.36615685 = score(doc=3807,freq=1.0), product of:
              0.23741168 = queryWeight, product of:
                1.914872 = boost
                8.225529 = idf(docFreq=30, maxDocs=42596)
                0.015072958 = queryNorm
              1.5422866 = fieldWeight in 3807, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.225529 = idf(docFreq=30, maxDocs=42596)
                0.1875 = fieldNorm(doc=3807)
        0.12 = coord(3/25)
    
  4. Internet Adressen : die 'Gelben Seiten' für das Internet (1996) 0.08
    0.080961175 = sum of:
      0.080961175 = product of:
        0.6746765 = sum of:
          0.06299781 = weight(abstract_txt:ihre in 4538) [ClassicSimilarity], result of:
            0.06299781 = score(doc=4538,freq=1.0), product of:
              0.082933195 = queryWeight, product of:
                1.1317563 = boost
                4.861575 = idf(docFreq=895, maxDocs=42596)
                0.015072958 = queryNorm
              0.75962114 = fieldWeight in 4538, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.861575 = idf(docFreq=895, maxDocs=42596)
                0.15625 = fieldNorm(doc=4538)
          0.26112118 = weight(abstract_txt:schnellen in 4538) [ClassicSimilarity], result of:
            0.26112118 = score(doc=4538,freq=1.0), product of:
              0.21399626 = queryWeight, product of:
                1.8179914 = boost
                7.809368 = idf(docFreq=46, maxDocs=42596)
                0.015072958 = queryNorm
              1.2202138 = fieldWeight in 4538, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.809368 = idf(docFreq=46, maxDocs=42596)
                0.15625 = fieldNorm(doc=4538)
          0.35055748 = weight(abstract_txt:sortiert in 4538) [ClassicSimilarity], result of:
            0.35055748 = score(doc=4538,freq=1.0), product of:
              0.26042596 = queryWeight, product of:
                2.0055377 = boost
                8.614993 = idf(docFreq=20, maxDocs=42596)
                0.015072958 = queryNorm
              1.3460927 = fieldWeight in 4538, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.614993 = idf(docFreq=20, maxDocs=42596)
                0.15625 = fieldNorm(doc=4538)
        0.12 = coord(3/25)
    
  5. Meyer, R.: Allein, es wär' so schön gewesen : Der Copernic Summarzier kann Internettexte leider nicht befriedigend und sinnvoll zusammenfassen (2002) 0.08
    0.07755339 = sum of:
      0.07755339 = product of:
        0.2769764 = sum of:
          0.015364612 = weight(abstract_txt:einen in 1649) [ClassicSimilarity], result of:
            0.015364612 = score(doc=1649,freq=2.0), product of:
              0.06474743 = queryWeight, product of:
                4.2956023 = idf(docFreq=1577, maxDocs=42596)
                0.015072958 = queryNorm
              0.23730072 = fieldWeight in 1649, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.2956023 = idf(docFreq=1577, maxDocs=42596)
                0.0390625 = fieldNorm(doc=1649)
          0.02252797 = weight(abstract_txt:software in 1649) [ClassicSimilarity], result of:
            0.02252797 = score(doc=1649,freq=4.0), product of:
              0.066325344 = queryWeight, product of:
                1.0121118 = boost
                4.3476295 = idf(docFreq=1497, maxDocs=42596)
                0.015072958 = queryNorm
              0.33965856 = fieldWeight in 1649, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                4.3476295 = idf(docFreq=1497, maxDocs=42596)
                0.0390625 = fieldNorm(doc=1649)
          0.016213786 = weight(abstract_txt:nach in 1649) [ClassicSimilarity], result of:
            0.016213786 = score(doc=1649,freq=2.0), product of:
              0.067111626 = queryWeight, product of:
                1.0180933 = boost
                4.373324 = idf(docFreq=1459, maxDocs=42596)
                0.015072958 = queryNorm
              0.2415943 = fieldWeight in 1649, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.373324 = idf(docFreq=1459, maxDocs=42596)
                0.0390625 = fieldNorm(doc=1649)
          0.017445276 = weight(abstract_txt:immer in 1649) [ClassicSimilarity], result of:
            0.017445276 = score(doc=1649,freq=1.0), product of:
              0.088784404 = queryWeight, product of:
                1.1710005 = boost
                5.0301523 = idf(docFreq=756, maxDocs=42596)
                0.015072958 = queryNorm
              0.19649032 = fieldWeight in 1649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.0301523 = idf(docFreq=756, maxDocs=42596)
                0.0390625 = fieldNorm(doc=1649)
          0.043848712 = weight(abstract_txt:genau in 1649) [ClassicSimilarity], result of:
            0.043848712 = score(doc=1649,freq=1.0), product of:
              0.16413051 = queryWeight, product of:
                1.5921478 = boost
                6.839234 = idf(docFreq=123, maxDocs=42596)
                0.015072958 = queryNorm
              0.26715758 = fieldWeight in 1649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.839234 = idf(docFreq=123, maxDocs=42596)
                0.0390625 = fieldNorm(doc=1649)
          0.09150393 = weight(abstract_txt:texten in 1649) [ClassicSimilarity], result of:
            0.09150393 = score(doc=1649,freq=3.0), product of:
              0.1858393 = queryWeight, product of:
                1.694172 = boost
                7.277489 = idf(docFreq=79, maxDocs=42596)
                0.015072958 = queryNorm
              0.49238205 = fieldWeight in 1649, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                7.277489 = idf(docFreq=79, maxDocs=42596)
                0.0390625 = fieldNorm(doc=1649)
          0.07007213 = weight(abstract_txt:sorgen in 1649) [ClassicSimilarity], result of:
            0.07007213 = score(doc=1649,freq=1.0), product of:
              0.22434427 = queryWeight, product of:
                1.8614279 = boost
                7.995954 = idf(docFreq=38, maxDocs=42596)
                0.015072958 = queryNorm
              0.31234196 = fieldWeight in 1649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.995954 = idf(docFreq=38, maxDocs=42596)
                0.0390625 = fieldNorm(doc=1649)
        0.28 = coord(7/25)