Document (#37963)

Author
Ramisch, C.
Schreiner, P.
Idiart, M.
Villavicencio, A.
Title
¬An evaluation of methods for the extraction of multiword expressions
Source
http://www.inf.ufrgs.br/pln/nlp/papers/mwe-08-lrec.pdf
Year
20xx
Abstract
This paper focuses on the evaluation of some methods for the automatic acquisition of Multiword Expressions (MWEs). First we investigate the hypothesis that MWEs can be detected solely by the distinct statistical properties of their component words, regardless of their type, comparing 3 statistical measures: Mutual Information, Chi**2 and Permutation Entropy. Moreover, we also look at the impact that the addition of type-specific linguistic information has on the performance of these methods.
Theme
Computerlinguistik

Similar documents (content)

  1. Nagy T., I.: Detecting multiword expressions and named entities in natural language texts (2014) 0.40
    0.39792904 = sum of:
      0.39792904 = product of:
        1.2435282 = sum of:
          0.023422467 = weight(abstract_txt:linguistic in 1536) [ClassicSimilarity], result of:
            0.023422467 = score(doc=1536,freq=1.0), product of:
              0.102942295 = queryWeight, product of:
                1.0805396 = boost
                5.8247695 = idf(docFreq=354, maxDocs=44218)
                0.0163559 = queryNorm
              0.22753006 = fieldWeight in 1536, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8247695 = idf(docFreq=354, maxDocs=44218)
                0.0390625 = fieldNorm(doc=1536)
          0.04872551 = weight(abstract_txt:extraction in 1536) [ClassicSimilarity], result of:
            0.04872551 = score(doc=1536,freq=3.0), product of:
              0.11631505 = queryWeight, product of:
                1.1485811 = boost
                6.1915555 = idf(docFreq=245, maxDocs=44218)
                0.0163559 = queryNorm
              0.41890973 = fieldWeight in 1536, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.1915555 = idf(docFreq=245, maxDocs=44218)
                0.0390625 = fieldNorm(doc=1536)
          0.01057295 = weight(abstract_txt:their in 1536) [ClassicSimilarity], result of:
            0.01057295 = score(doc=1536,freq=2.0), product of:
              0.060576323 = queryWeight, product of:
                1.1722229 = boost
                3.1594994 = idf(docFreq=5101, maxDocs=44218)
                0.0163559 = queryNorm
              0.17453933 = fieldWeight in 1536, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                3.1594994 = idf(docFreq=5101, maxDocs=44218)
                0.0390625 = fieldNorm(doc=1536)
          0.040442023 = weight(abstract_txt:statistical in 1536) [ClassicSimilarity], result of:
            0.040442023 = score(doc=1536,freq=1.0), product of:
              0.18666835 = queryWeight, product of:
                2.057757 = boost
                5.5462847 = idf(docFreq=468, maxDocs=44218)
                0.0163559 = queryNorm
              0.21665174 = fieldWeight in 1536, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5462847 = idf(docFreq=468, maxDocs=44218)
                0.0390625 = fieldNorm(doc=1536)
          0.035855588 = weight(abstract_txt:methods in 1536) [ClassicSimilarity], result of:
            0.035855588 = score(doc=1536,freq=2.0), product of:
              0.15652141 = queryWeight, product of:
                2.307763 = boost
                4.146752 = idf(docFreq=1900, maxDocs=44218)
                0.0163559 = queryNorm
              0.22907785 = fieldWeight in 1536, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.146752 = idf(docFreq=1900, maxDocs=44218)
                0.0390625 = fieldNorm(doc=1536)
          0.21003424 = weight(abstract_txt:expressions in 1536) [ClassicSimilarity], result of:
            0.21003424 = score(doc=1536,freq=8.0), product of:
              0.27990597 = queryWeight, product of:
                2.5197928 = boost
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.0163559 = queryNorm
              0.75037426 = fieldWeight in 1536, product of:
                2.828427 = tf(freq=8.0), with freq of:
                  8.0 = termFreq=8.0
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.0390625 = fieldNorm(doc=1536)
          0.5092456 = weight(abstract_txt:multiword in 1536) [ClassicSimilarity], result of:
            0.5092456 = score(doc=1536,freq=11.0), product of:
              0.45429298 = queryWeight, product of:
                3.2101605 = boost
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.0163559 = queryNorm
              1.1209629 = fieldWeight in 1536, product of:
                3.3166249 = tf(freq=11.0), with freq of:
                  11.0 = termFreq=11.0
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.0390625 = fieldNorm(doc=1536)
          0.36522987 = weight(abstract_txt:mwes in 1536) [ClassicSimilarity], result of:
            0.36522987 = score(doc=1536,freq=3.0), product of:
              0.5612881 = queryWeight, product of:
                3.5682204 = boost
                9.617446 = idf(docFreq=7, maxDocs=44218)
                0.0163559 = queryNorm
              0.65069944 = fieldWeight in 1536, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                9.617446 = idf(docFreq=7, maxDocs=44218)
                0.0390625 = fieldNorm(doc=1536)
        0.32 = coord(8/25)
    
  2. Cruys, T. van de; Moirón, B.V.: Semantics-based multiword expression extraction (2007) 0.36
    0.35798395 = sum of:
      0.35798395 = product of:
        1.4915998 = sum of:
          0.06459756 = weight(abstract_txt:measures in 2919) [ClassicSimilarity], result of:
            0.06459756 = score(doc=2919,freq=2.0), product of:
              0.08963935 = queryWeight, product of:
                1.0083077 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.0163559 = queryNorm
              0.72063845 = fieldWeight in 2919, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.09375 = fieldNorm(doc=2919)
          0.067516044 = weight(abstract_txt:extraction in 2919) [ClassicSimilarity], result of:
            0.067516044 = score(doc=2919,freq=1.0), product of:
              0.11631505 = queryWeight, product of:
                1.1485811 = boost
                6.1915555 = idf(docFreq=245, maxDocs=44218)
                0.0163559 = queryNorm
              0.58045834 = fieldWeight in 2919, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.1915555 = idf(docFreq=245, maxDocs=44218)
                0.09375 = fieldNorm(doc=2919)
          0.09706087 = weight(abstract_txt:statistical in 2919) [ClassicSimilarity], result of:
            0.09706087 = score(doc=2919,freq=1.0), product of:
              0.18666835 = queryWeight, product of:
                2.057757 = boost
                5.5462847 = idf(docFreq=468, maxDocs=44218)
                0.0163559 = queryNorm
              0.5199642 = fieldWeight in 2919, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.5462847 = idf(docFreq=468, maxDocs=44218)
                0.09375 = fieldNorm(doc=2919)
          0.17821996 = weight(abstract_txt:expressions in 2919) [ClassicSimilarity], result of:
            0.17821996 = score(doc=2919,freq=1.0), product of:
              0.27990597 = queryWeight, product of:
                2.5197928 = boost
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.0163559 = queryNorm
              0.6367137 = fieldWeight in 2919, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.09375 = fieldNorm(doc=2919)
          0.36850393 = weight(abstract_txt:multiword in 2919) [ClassicSimilarity], result of:
            0.36850393 = score(doc=2919,freq=1.0), product of:
              0.45429298 = queryWeight, product of:
                3.2101605 = boost
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.0163559 = queryNorm
              0.8111592 = fieldWeight in 2919, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.09375 = fieldNorm(doc=2919)
          0.7157014 = weight(abstract_txt:mwes in 2919) [ClassicSimilarity], result of:
            0.7157014 = score(doc=2919,freq=2.0), product of:
              0.5612881 = queryWeight, product of:
                3.5682204 = boost
                9.617446 = idf(docFreq=7, maxDocs=44218)
                0.0163559 = queryNorm
              1.2751052 = fieldWeight in 2919, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                9.617446 = idf(docFreq=7, maxDocs=44218)
                0.09375 = fieldNorm(doc=2919)
        0.24 = coord(6/25)
    
  3. Ramisch, C.: Multiword expressions acquisition : a generic and open framework (2015) 0.30
    0.29848862 = sum of:
      0.29848862 = product of:
        1.0660307 = sum of:
          0.037475947 = weight(abstract_txt:linguistic in 1649) [ClassicSimilarity], result of:
            0.037475947 = score(doc=1649,freq=1.0), product of:
              0.102942295 = queryWeight, product of:
                1.0805396 = boost
                5.8247695 = idf(docFreq=354, maxDocs=44218)
                0.0163559 = queryNorm
              0.3640481 = fieldWeight in 1649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8247695 = idf(docFreq=354, maxDocs=44218)
                0.0625 = fieldNorm(doc=1649)
          0.06507612 = weight(abstract_txt:acquisition in 1649) [ClassicSimilarity], result of:
            0.06507612 = score(doc=1649,freq=2.0), product of:
              0.11804018 = queryWeight, product of:
                1.1570674 = boost
                6.237302 = idf(docFreq=234, maxDocs=44218)
                0.0163559 = queryNorm
              0.5513048 = fieldWeight in 1649, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.237302 = idf(docFreq=234, maxDocs=44218)
                0.0625 = fieldNorm(doc=1649)
          0.06706984 = weight(abstract_txt:regardless in 1649) [ClassicSimilarity], result of:
            0.06706984 = score(doc=1649,freq=1.0), product of:
              0.15174358 = queryWeight, product of:
                1.3118944 = boost
                7.071914 = idf(docFreq=101, maxDocs=44218)
                0.0163559 = queryNorm
              0.44199464 = fieldWeight in 1649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.071914 = idf(docFreq=101, maxDocs=44218)
                0.0625 = fieldNorm(doc=1649)
          0.03424082 = weight(abstract_txt:evaluation in 1649) [ClassicSimilarity], result of:
            0.03424082 = score(doc=1649,freq=1.0), product of:
              0.122123204 = queryWeight, product of:
                1.6644005 = boost
                4.4860687 = idf(docFreq=1353, maxDocs=44218)
                0.0163559 = queryNorm
              0.2803793 = fieldWeight in 1649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.4860687 = idf(docFreq=1353, maxDocs=44218)
                0.0625 = fieldNorm(doc=1649)
          0.047160003 = weight(abstract_txt:type in 1649) [ClassicSimilarity], result of:
            0.047160003 = score(doc=1649,freq=1.0), product of:
              0.15117663 = queryWeight, product of:
                1.8518298 = boost
                4.991248 = idf(docFreq=816, maxDocs=44218)
                0.0163559 = queryNorm
              0.311953 = fieldWeight in 1649, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.991248 = idf(docFreq=816, maxDocs=44218)
                0.0625 = fieldNorm(doc=1649)
          0.26567465 = weight(abstract_txt:expressions in 1649) [ClassicSimilarity], result of:
            0.26567465 = score(doc=1649,freq=5.0), product of:
              0.27990597 = queryWeight, product of:
                2.5197928 = boost
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.0163559 = queryNorm
              0.94915676 = fieldWeight in 1649, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.0625 = fieldNorm(doc=1649)
          0.5493333 = weight(abstract_txt:multiword in 1649) [ClassicSimilarity], result of:
            0.5493333 = score(doc=1649,freq=5.0), product of:
              0.45429298 = queryWeight, product of:
                3.2101605 = boost
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.0163559 = queryNorm
              1.2092048 = fieldWeight in 1649, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.0625 = fieldNorm(doc=1649)
        0.28 = coord(7/25)
    
  4. Nissim, M.; Zaninello, A,: Modeling the internal variability of multiword expressions through a pattern-based method (2013) 0.27
    0.27487695 = sum of:
      0.27487695 = product of:
        1.1453207 = sum of:
          0.030451585 = weight(abstract_txt:measures in 990) [ClassicSimilarity], result of:
            0.030451585 = score(doc=990,freq=1.0), product of:
              0.08963935 = queryWeight, product of:
                1.0083077 = boost
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.0163559 = queryNorm
              0.33971223 = fieldWeight in 990, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.4353957 = idf(docFreq=523, maxDocs=44218)
                0.0625 = fieldNorm(doc=990)
          0.063654736 = weight(abstract_txt:extraction in 990) [ClassicSimilarity], result of:
            0.063654736 = score(doc=990,freq=2.0), product of:
              0.11631505 = queryWeight, product of:
                1.1485811 = boost
                6.1915555 = idf(docFreq=245, maxDocs=44218)
                0.0163559 = queryNorm
              0.54726136 = fieldWeight in 990, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.1915555 = idf(docFreq=245, maxDocs=44218)
                0.0625 = fieldNorm(doc=990)
          0.011961929 = weight(abstract_txt:their in 990) [ClassicSimilarity], result of:
            0.011961929 = score(doc=990,freq=1.0), product of:
              0.060576323 = queryWeight, product of:
                1.1722229 = boost
                3.1594994 = idf(docFreq=5101, maxDocs=44218)
                0.0163559 = queryNorm
              0.19746871 = fieldWeight in 990, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.1594994 = idf(docFreq=5101, maxDocs=44218)
                0.0625 = fieldNorm(doc=990)
          0.118813306 = weight(abstract_txt:expressions in 990) [ClassicSimilarity], result of:
            0.118813306 = score(doc=990,freq=1.0), product of:
              0.27990597 = queryWeight, product of:
                2.5197928 = boost
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.0163559 = queryNorm
              0.4244758 = fieldWeight in 990, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.0625 = fieldNorm(doc=990)
          0.24566929 = weight(abstract_txt:multiword in 990) [ClassicSimilarity], result of:
            0.24566929 = score(doc=990,freq=1.0), product of:
              0.45429298 = queryWeight, product of:
                3.2101605 = boost
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.0163559 = queryNorm
              0.5407728 = fieldWeight in 990, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.0625 = fieldNorm(doc=990)
          0.67476976 = weight(abstract_txt:mwes in 990) [ClassicSimilarity], result of:
            0.67476976 = score(doc=990,freq=4.0), product of:
              0.5612881 = queryWeight, product of:
                3.5682204 = boost
                9.617446 = idf(docFreq=7, maxDocs=44218)
                0.0163559 = queryNorm
              1.2021807 = fieldWeight in 990, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                9.617446 = idf(docFreq=7, maxDocs=44218)
                0.0625 = fieldNorm(doc=990)
        0.24 = coord(6/25)
    
  5. Snajder, J.; Almic, P.: Modeling semantic compositionality of Croatian multiword expressions (2015) 0.23
    0.23059493 = sum of:
      0.23059493 = product of:
        1.4412184 = sum of:
          0.017942894 = weight(abstract_txt:their in 2920) [ClassicSimilarity], result of:
            0.017942894 = score(doc=2920,freq=1.0), product of:
              0.060576323 = queryWeight, product of:
                1.1722229 = boost
                3.1594994 = idf(docFreq=5101, maxDocs=44218)
                0.0163559 = queryNorm
              0.29620308 = fieldWeight in 2920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.1594994 = idf(docFreq=5101, maxDocs=44218)
                0.09375 = fieldNorm(doc=2920)
          0.17821996 = weight(abstract_txt:expressions in 2920) [ClassicSimilarity], result of:
            0.17821996 = score(doc=2920,freq=1.0), product of:
              0.27990597 = queryWeight, product of:
                2.5197928 = boost
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.0163559 = queryNorm
              0.6367137 = fieldWeight in 2920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.7916126 = idf(docFreq=134, maxDocs=44218)
                0.09375 = fieldNorm(doc=2920)
          0.36850393 = weight(abstract_txt:multiword in 2920) [ClassicSimilarity], result of:
            0.36850393 = score(doc=2920,freq=1.0), product of:
              0.45429298 = queryWeight, product of:
                3.2101605 = boost
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.0163559 = queryNorm
              0.8111592 = fieldWeight in 2920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.652365 = idf(docFreq=20, maxDocs=44218)
                0.09375 = fieldNorm(doc=2920)
          0.8765517 = weight(abstract_txt:mwes in 2920) [ClassicSimilarity], result of:
            0.8765517 = score(doc=2920,freq=3.0), product of:
              0.5612881 = queryWeight, product of:
                3.5682204 = boost
                9.617446 = idf(docFreq=7, maxDocs=44218)
                0.0163559 = queryNorm
              1.5616786 = fieldWeight in 2920, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                9.617446 = idf(docFreq=7, maxDocs=44218)
                0.09375 = fieldNorm(doc=2920)
        0.16 = coord(4/25)