Document (#16600)

Author
Huffman, S.
Title
Acquaintance : language-independent document categorization by n-grams
Source
The Fourth Text Retrieval Conference (TREC-4). Ed.: K. Harman
Imprint
Gaithersburgh, MD : National Institute of Standards and Technology
Year
1996
Pages
S.359-371
Series
NIST special publication; 500-236
Object
TREC

Similar documents (content)

  1. Khoo, C.S.G.; Dai, D.; Loh, T.E.: Using statistical and contextual information to identify two- and three-character words in Chinese text (2002) 0.62
    0.6182407 = sum of:
      0.6182407 = product of:
        1.0304011 = sum of:
          0.077221036 = weight(abstract_txt:document in 5206) [ClassicSimilarity], result of:
            0.077221036 = score(doc=5206,freq=2.0), product of:
              0.20352575 = queryWeight, product of:
                1.0264271 = boost
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.04619234 = queryNorm
              0.37941656 = fieldWeight in 5206, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.0625 = fieldNorm(doc=5206)
          0.19182245 = weight(abstract_txt:independent in 5206) [ClassicSimilarity], result of:
            0.19182245 = score(doc=5206,freq=2.0), product of:
              0.37330335 = queryWeight, product of:
                1.3901107 = boost
                5.813565 = idf(docFreq=358, maxDocs=44218)
                0.04619234 = queryNorm
              0.5138514 = fieldWeight in 5206, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.813565 = idf(docFreq=358, maxDocs=44218)
                0.0625 = fieldNorm(doc=5206)
          0.7613576 = weight(abstract_txt:grams in 5206) [ClassicSimilarity], result of:
            0.7613576 = score(doc=5206,freq=4.0), product of:
              0.74275357 = queryWeight, product of:
                1.960834 = boost
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.04619234 = queryNorm
              1.0250474 = fieldWeight in 5206, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.0625 = fieldNorm(doc=5206)
        0.6 = coord(3/5)
    
  2. Juola, P.; Mikros, G.K.; Vinsick, S.: ¬A comparative assessment of the difficulty of authorship attribution in Greek and in English (2019) 0.51
    0.513864 = sum of:
      0.513864 = product of:
        0.64233 = sum of:
          0.07140871 = weight(abstract_txt:language in 4676) [ClassicSimilarity], result of:
            0.07140871 = score(doc=4676,freq=2.0), product of:
              0.19318041 = queryWeight, product of:
                4.1820874 = idf(docFreq=1834, maxDocs=44218)
                0.04619234 = queryNorm
              0.3696478 = fieldWeight in 4676, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.1820874 = idf(docFreq=1834, maxDocs=44218)
                0.0625 = fieldNorm(doc=4676)
          0.05460352 = weight(abstract_txt:document in 4676) [ClassicSimilarity], result of:
            0.05460352 = score(doc=4676,freq=1.0), product of:
              0.20352575 = queryWeight, product of:
                1.0264271 = boost
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.04619234 = queryNorm
              0.26828802 = fieldWeight in 4676, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.0625 = fieldNorm(doc=4676)
          0.13563895 = weight(abstract_txt:independent in 4676) [ClassicSimilarity], result of:
            0.13563895 = score(doc=4676,freq=1.0), product of:
              0.37330335 = queryWeight, product of:
                1.3901107 = boost
                5.813565 = idf(docFreq=358, maxDocs=44218)
                0.04619234 = queryNorm
              0.3633478 = fieldWeight in 4676, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.813565 = idf(docFreq=358, maxDocs=44218)
                0.0625 = fieldNorm(doc=4676)
          0.3806788 = weight(abstract_txt:grams in 4676) [ClassicSimilarity], result of:
            0.3806788 = score(doc=4676,freq=1.0), product of:
              0.74275357 = queryWeight, product of:
                1.960834 = boost
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.04619234 = queryNorm
              0.5125237 = fieldWeight in 4676, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.0625 = fieldNorm(doc=4676)
        0.8 = coord(4/5)
    
  3. Robertson, A.M.; Willett, P.: Applications of n-grams in textual information systems (1998) 0.41
    0.4121983 = sum of:
      0.4121983 = product of:
        1.0304958 = sum of:
          0.088363774 = weight(abstract_txt:language in 4715) [ClassicSimilarity], result of:
            0.088363774 = score(doc=4715,freq=1.0), product of:
              0.19318041 = queryWeight, product of:
                4.1820874 = idf(docFreq=1834, maxDocs=44218)
                0.04619234 = queryNorm
              0.45741582 = fieldWeight in 4715, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.1820874 = idf(docFreq=1834, maxDocs=44218)
                0.109375 = fieldNorm(doc=4715)
          0.94213194 = weight(abstract_txt:grams in 4715) [ClassicSimilarity], result of:
            0.94213194 = score(doc=4715,freq=2.0), product of:
              0.74275357 = queryWeight, product of:
                1.960834 = boost
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.04619234 = queryNorm
              1.2684314 = fieldWeight in 4715, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.109375 = fieldNorm(doc=4715)
        0.4 = coord(2/5)
    
  4. Pearce, C.; Nicholas, C.: TELLTALE: Experiments in a dynamic hypertext environment for degraded and multilingual data (1996) 0.36
    0.36433193 = sum of:
      0.36433193 = product of:
        0.6072199 = sum of:
          0.06311698 = weight(abstract_txt:language in 4071) [ClassicSimilarity], result of:
            0.06311698 = score(doc=4071,freq=1.0), product of:
              0.19318041 = queryWeight, product of:
                4.1820874 = idf(docFreq=1834, maxDocs=44218)
                0.04619234 = queryNorm
              0.32672557 = fieldWeight in 4071, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.1820874 = idf(docFreq=1834, maxDocs=44218)
                0.078125 = fieldNorm(doc=4071)
          0.068254404 = weight(abstract_txt:document in 4071) [ClassicSimilarity], result of:
            0.068254404 = score(doc=4071,freq=1.0), product of:
              0.20352575 = queryWeight, product of:
                1.0264271 = boost
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.04619234 = queryNorm
              0.33536002 = fieldWeight in 4071, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.2926083 = idf(docFreq=1642, maxDocs=44218)
                0.078125 = fieldNorm(doc=4071)
          0.4758485 = weight(abstract_txt:grams in 4071) [ClassicSimilarity], result of:
            0.4758485 = score(doc=4071,freq=1.0), product of:
              0.74275357 = queryWeight, product of:
                1.960834 = boost
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.04619234 = queryNorm
              0.6406546 = fieldWeight in 4071, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.078125 = fieldNorm(doc=4071)
        0.6 = coord(3/5)
    
  5. Ahmed, F.; Nürnberger, A.: Evaluation of n-gram conflation approaches for Arabic text retrieval (2009) 0.36
    0.36226514 = sum of:
      0.36226514 = product of:
        0.6037752 = sum of:
          0.087457456 = weight(abstract_txt:language in 2941) [ClassicSimilarity], result of:
            0.087457456 = score(doc=2941,freq=3.0), product of:
              0.19318041 = queryWeight, product of:
                4.1820874 = idf(docFreq=1834, maxDocs=44218)
                0.04619234 = queryNorm
              0.45272425 = fieldWeight in 2941, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                4.1820874 = idf(docFreq=1834, maxDocs=44218)
                0.0625 = fieldNorm(doc=2941)
          0.13563895 = weight(abstract_txt:independent in 2941) [ClassicSimilarity], result of:
            0.13563895 = score(doc=2941,freq=1.0), product of:
              0.37330335 = queryWeight, product of:
                1.3901107 = boost
                5.813565 = idf(docFreq=358, maxDocs=44218)
                0.04619234 = queryNorm
              0.3633478 = fieldWeight in 2941, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.813565 = idf(docFreq=358, maxDocs=44218)
                0.0625 = fieldNorm(doc=2941)
          0.3806788 = weight(abstract_txt:grams in 2941) [ClassicSimilarity], result of:
            0.3806788 = score(doc=2941,freq=1.0), product of:
              0.74275357 = queryWeight, product of:
                1.960834 = boost
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.04619234 = queryNorm
              0.5125237 = fieldWeight in 2941, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.200379 = idf(docFreq=32, maxDocs=44218)
                0.0625 = fieldNorm(doc=2941)
        0.6 = coord(3/5)