Document (#37964)

Author
Ramisch, C.
Schreiner, P.
Idiart, M.
Villavicencio, A.
Title
¬An evaluation of methods for the extraction of multiword expressions
Source
http://www.inf.ufrgs.br/pln/nlp/papers/mwe-08-lrec.pdf
Year
20xx
Abstract
This paper focuses on the evaluation of some methods for the automatic acquisition of Multiword Expressions (MWEs). First we investigate the hypothesis that MWEs can be detected solely by the distinct statistical properties of their component words, regardless of their type, comparing 3 statistical measures: Mutual Information, Chi**2 and Permutation Entropy. Moreover, we also look at the impact that the addition of type-specific linguistic information has on the performance of these methods.
Theme
Computerlinguistik

Similar documents (content)

  1. Nagy T., I.: Detecting multiword expressions and named entities in natural language texts (2014) 0.40
    0.3965732 = sum of:
      0.3965732 = product of:
        1.2392912 = sum of:
          0.023765292 = weight(abstract_txt:linguistic in 3537) [ClassicSimilarity], result of:
            0.023765292 = score(doc=3537,freq=1.0), product of:
              0.1039142 = queryWeight, product of:
                1.0781347 = boost
                5.8547482 = idf(docFreq=332, maxDocs=42740)
                0.01646242 = queryNorm
              0.2287011 = fieldWeight in 3537, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8547482 = idf(docFreq=332, maxDocs=42740)
                0.0390625 = fieldNorm(doc=3537)
          0.049265653 = weight(abstract_txt:extraction in 3537) [ClassicSimilarity], result of:
            0.049265653 = score(doc=3537,freq=3.0), product of:
              0.11713909 = queryWeight, product of:
                1.1446863 = boost
                6.216153 = idf(docFreq=231, maxDocs=42740)
                0.01646242 = queryNorm
              0.42057395 = fieldWeight in 3537, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                6.216153 = idf(docFreq=231, maxDocs=42740)
                0.0390625 = fieldNorm(doc=3537)
          0.010894756 = weight(abstract_txt:their in 3537) [ClassicSimilarity], result of:
            0.010894756 = score(doc=3537,freq=2.0), product of:
              0.061781406 = queryWeight, product of:
                1.1756538 = boost
                3.1921601 = idf(docFreq=4772, maxDocs=42740)
                0.01646242 = queryNorm
              0.1763436 = fieldWeight in 3537, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                3.1921601 = idf(docFreq=4772, maxDocs=42740)
                0.0390625 = fieldNorm(doc=3537)
          0.040374257 = weight(abstract_txt:statistical in 3537) [ClassicSimilarity], result of:
            0.040374257 = score(doc=3537,freq=1.0), product of:
              0.18640569 = queryWeight, product of:
                2.0421147 = boost
                5.544793 = idf(docFreq=453, maxDocs=42740)
                0.01646242 = queryNorm
              0.21659347 = fieldWeight in 3537, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.544793 = idf(docFreq=453, maxDocs=42740)
                0.0390625 = fieldNorm(doc=3537)
          0.036492225 = weight(abstract_txt:methods in 3537) [ClassicSimilarity], result of:
            0.036492225 = score(doc=3537,freq=2.0), product of:
              0.15832278 = queryWeight, product of:
                2.3049839 = boost
                4.172361 = idf(docFreq=1790, maxDocs=42740)
                0.01646242 = queryNorm
              0.23049256 = fieldWeight in 3537, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                4.172361 = idf(docFreq=1790, maxDocs=42740)
                0.0390625 = fieldNorm(doc=3537)
          0.2146143 = weight(abstract_txt:expressions in 3537) [ClassicSimilarity], result of:
            0.2146143 = score(doc=3537,freq=8.0), product of:
              0.28387812 = queryWeight, product of:
                2.5200927 = boost
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.01646242 = queryNorm
              0.7560086 = fieldWeight in 3537, product of:
                2.828427 = tf(freq=8.0), with freq of:
                  8.0 = termFreq=8.0
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.0390625 = fieldNorm(doc=3537)
          0.50282866 = weight(abstract_txt:multiword in 3537) [ClassicSimilarity], result of:
            0.50282866 = score(doc=3537,freq=11.0), product of:
              0.45033795 = queryWeight, product of:
                3.1740944 = boost
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.01646242 = queryNorm
              1.1165584 = fieldWeight in 3537, product of:
                3.3166249 = tf(freq=11.0), with freq of:
                  11.0 = termFreq=11.0
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.0390625 = fieldNorm(doc=3537)
          0.36105606 = weight(abstract_txt:mwes in 3537) [ClassicSimilarity], result of:
            0.36105606 = score(doc=3537,freq=3.0), product of:
              0.5568422 = queryWeight, product of:
                3.5295281 = boost
                9.583449 = idf(docFreq=7, maxDocs=42740)
                0.01646242 = queryNorm
              0.64839923 = fieldWeight in 3537, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                9.583449 = idf(docFreq=7, maxDocs=42740)
                0.0390625 = fieldNorm(doc=3537)
        0.32 = coord(8/25)
    
  2. Cruys, T. van de; Moirón, B.V.: Semantics-based multiword expression extraction (2007) 0.36
    0.3559241 = sum of:
      0.3559241 = product of:
        1.4830171 = sum of:
          0.06436509 = weight(abstract_txt:measures in 4920) [ClassicSimilarity], result of:
            0.06436509 = score(doc=4920,freq=2.0), product of:
              0.08939822 = queryWeight, product of:
                5.4304423 = idf(docFreq=508, maxDocs=42740)
                0.01646242 = queryNorm
              0.71998173 = fieldWeight in 4920, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                5.4304423 = idf(docFreq=508, maxDocs=42740)
                0.09375 = fieldNorm(doc=4920)
          0.06826449 = weight(abstract_txt:extraction in 4920) [ClassicSimilarity], result of:
            0.06826449 = score(doc=4920,freq=1.0), product of:
              0.11713909 = queryWeight, product of:
                1.1446863 = boost
                6.216153 = idf(docFreq=231, maxDocs=42740)
                0.01646242 = queryNorm
              0.5827644 = fieldWeight in 4920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.216153 = idf(docFreq=231, maxDocs=42740)
                0.09375 = fieldNorm(doc=4920)
          0.09689822 = weight(abstract_txt:statistical in 4920) [ClassicSimilarity], result of:
            0.09689822 = score(doc=4920,freq=1.0), product of:
              0.18640569 = queryWeight, product of:
                2.0421147 = boost
                5.544793 = idf(docFreq=453, maxDocs=42740)
                0.01646242 = queryNorm
              0.5198244 = fieldWeight in 4920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.544793 = idf(docFreq=453, maxDocs=42740)
                0.09375 = fieldNorm(doc=4920)
          0.18210627 = weight(abstract_txt:expressions in 4920) [ClassicSimilarity], result of:
            0.18210627 = score(doc=4920,freq=1.0), product of:
              0.28387812 = queryWeight, product of:
                2.5200927 = boost
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.01646242 = queryNorm
              0.6414946 = fieldWeight in 4920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.09375 = fieldNorm(doc=4920)
          0.36386046 = weight(abstract_txt:multiword in 4920) [ClassicSimilarity], result of:
            0.36386046 = score(doc=4920,freq=1.0), product of:
              0.45033795 = queryWeight, product of:
                3.1740944 = boost
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.01646242 = queryNorm
              0.807972 = fieldWeight in 4920, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.09375 = fieldNorm(doc=4920)
          0.7075225 = weight(abstract_txt:mwes in 4920) [ClassicSimilarity], result of:
            0.7075225 = score(doc=4920,freq=2.0), product of:
              0.5568422 = queryWeight, product of:
                3.5295281 = boost
                9.583449 = idf(docFreq=7, maxDocs=42740)
                0.01646242 = queryNorm
              1.2705978 = fieldWeight in 4920, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                9.583449 = idf(docFreq=7, maxDocs=42740)
                0.09375 = fieldNorm(doc=4920)
        0.24 = coord(6/25)
    
  3. Ramisch, C.: Multiword expressions acquisition : a generic and open framework (2015) 0.30
    0.29916745 = sum of:
      0.29916745 = product of:
        1.0684552 = sum of:
          0.038024466 = weight(abstract_txt:linguistic in 3650) [ClassicSimilarity], result of:
            0.038024466 = score(doc=3650,freq=1.0), product of:
              0.1039142 = queryWeight, product of:
                1.0781347 = boost
                5.8547482 = idf(docFreq=332, maxDocs=42740)
                0.01646242 = queryNorm
              0.36592177 = fieldWeight in 3650, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.8547482 = idf(docFreq=332, maxDocs=42740)
                0.0625 = fieldNorm(doc=3650)
          0.065738656 = weight(abstract_txt:acquisition in 3650) [ClassicSimilarity], result of:
            0.065738656 = score(doc=3650,freq=2.0), product of:
              0.11880554 = queryWeight, product of:
                1.1527998 = boost
                6.2602134 = idf(docFreq=221, maxDocs=42740)
                0.01646242 = queryNorm
              0.5533299 = fieldWeight in 3650, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.2602134 = idf(docFreq=221, maxDocs=42740)
                0.0625 = fieldNorm(doc=3650)
          0.06868474 = weight(abstract_txt:regardless in 3650) [ClassicSimilarity], result of:
            0.06868474 = score(doc=3650,freq=1.0), product of:
              0.15412496 = queryWeight, product of:
                1.3130221 = boost
                7.130291 = idf(docFreq=92, maxDocs=42740)
                0.01646242 = queryNorm
              0.4456432 = fieldWeight in 3650, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                7.130291 = idf(docFreq=92, maxDocs=42740)
                0.0625 = fieldNorm(doc=3650)
          0.034364603 = weight(abstract_txt:evaluation in 3650) [ClassicSimilarity], result of:
            0.034364603 = score(doc=3650,freq=1.0), product of:
              0.12238185 = queryWeight, product of:
                1.6546612 = boost
                4.492771 = idf(docFreq=1299, maxDocs=42740)
                0.01646242 = queryNorm
              0.2807982 = fieldWeight in 3650, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                4.492771 = idf(docFreq=1299, maxDocs=42740)
                0.0625 = fieldNorm(doc=3650)
          0.047763597 = weight(abstract_txt:type in 3650) [ClassicSimilarity], result of:
            0.047763597 = score(doc=3650,freq=1.0), product of:
              0.1524196 = queryWeight, product of:
                1.8465921 = boost
                5.013906 = idf(docFreq=771, maxDocs=42740)
                0.01646242 = queryNorm
              0.31336913 = fieldWeight in 3650, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.013906 = idf(docFreq=771, maxDocs=42740)
                0.0625 = fieldNorm(doc=3650)
          0.271468 = weight(abstract_txt:expressions in 3650) [ClassicSimilarity], result of:
            0.271468 = score(doc=3650,freq=5.0), product of:
              0.28387812 = queryWeight, product of:
                2.5200927 = boost
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.01646242 = queryNorm
              0.9562837 = fieldWeight in 3650, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.0625 = fieldNorm(doc=3650)
          0.54241115 = weight(abstract_txt:multiword in 3650) [ClassicSimilarity], result of:
            0.54241115 = score(doc=3650,freq=5.0), product of:
              0.45033795 = queryWeight, product of:
                3.1740944 = boost
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.01646242 = queryNorm
              1.2044536 = fieldWeight in 3650, product of:
                2.236068 = tf(freq=5.0), with freq of:
                  5.0 = termFreq=5.0
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.0625 = fieldNorm(doc=3650)
        0.28 = coord(7/25)
    
  4. Nissim, M.; Zaninello, A,: Modeling the internal variability of multiword expressions through a pattern-based method (2013) 0.27
    0.27313557 = sum of:
      0.27313557 = product of:
        1.1380649 = sum of:
          0.030341992 = weight(abstract_txt:measures in 2991) [ClassicSimilarity], result of:
            0.030341992 = score(doc=2991,freq=1.0), product of:
              0.08939822 = queryWeight, product of:
                5.4304423 = idf(docFreq=508, maxDocs=42740)
                0.01646242 = queryNorm
              0.33940265 = fieldWeight in 2991, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                5.4304423 = idf(docFreq=508, maxDocs=42740)
                0.0625 = fieldNorm(doc=2991)
          0.06436037 = weight(abstract_txt:extraction in 2991) [ClassicSimilarity], result of:
            0.06436037 = score(doc=2991,freq=2.0), product of:
              0.11713909 = queryWeight, product of:
                1.1446863 = boost
                6.216153 = idf(docFreq=231, maxDocs=42740)
                0.01646242 = queryNorm
              0.5494355 = fieldWeight in 2991, product of:
                1.4142135 = tf(freq=2.0), with freq of:
                  2.0 = termFreq=2.0
                6.216153 = idf(docFreq=231, maxDocs=42740)
                0.0625 = fieldNorm(doc=2991)
          0.012326009 = weight(abstract_txt:their in 2991) [ClassicSimilarity], result of:
            0.012326009 = score(doc=2991,freq=1.0), product of:
              0.061781406 = queryWeight, product of:
                1.1756538 = boost
                3.1921601 = idf(docFreq=4772, maxDocs=42740)
                0.01646242 = queryNorm
              0.19951001 = fieldWeight in 2991, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.1921601 = idf(docFreq=4772, maxDocs=42740)
                0.0625 = fieldNorm(doc=2991)
          0.121404186 = weight(abstract_txt:expressions in 2991) [ClassicSimilarity], result of:
            0.121404186 = score(doc=2991,freq=1.0), product of:
              0.28387812 = queryWeight, product of:
                2.5200927 = boost
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.01646242 = queryNorm
              0.42766306 = fieldWeight in 2991, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.0625 = fieldNorm(doc=2991)
          0.24257363 = weight(abstract_txt:multiword in 2991) [ClassicSimilarity], result of:
            0.24257363 = score(doc=2991,freq=1.0), product of:
              0.45033795 = queryWeight, product of:
                3.1740944 = boost
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.01646242 = queryNorm
              0.538648 = fieldWeight in 2991, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.0625 = fieldNorm(doc=2991)
          0.66705865 = weight(abstract_txt:mwes in 2991) [ClassicSimilarity], result of:
            0.66705865 = score(doc=2991,freq=4.0), product of:
              0.5568422 = queryWeight, product of:
                3.5295281 = boost
                9.583449 = idf(docFreq=7, maxDocs=42740)
                0.01646242 = queryNorm
              1.1979312 = fieldWeight in 2991, product of:
                2.0 = tf(freq=4.0), with freq of:
                  4.0 = termFreq=4.0
                9.583449 = idf(docFreq=7, maxDocs=42740)
                0.0625 = fieldNorm(doc=2991)
        0.24 = coord(6/25)
    
  5. Snajder, J.; Almic, P.: Modeling semantic compositionality of Croatian multiword expressions (2015) 0.23
    0.22895843 = sum of:
      0.22895843 = product of:
        1.4309902 = sum of:
          0.018489014 = weight(abstract_txt:their in 4921) [ClassicSimilarity], result of:
            0.018489014 = score(doc=4921,freq=1.0), product of:
              0.061781406 = queryWeight, product of:
                1.1756538 = boost
                3.1921601 = idf(docFreq=4772, maxDocs=42740)
                0.01646242 = queryNorm
              0.29926503 = fieldWeight in 4921, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                3.1921601 = idf(docFreq=4772, maxDocs=42740)
                0.09375 = fieldNorm(doc=4921)
          0.18210627 = weight(abstract_txt:expressions in 4921) [ClassicSimilarity], result of:
            0.18210627 = score(doc=4921,freq=1.0), product of:
              0.28387812 = queryWeight, product of:
                2.5200927 = boost
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.01646242 = queryNorm
              0.6414946 = fieldWeight in 4921, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                6.842609 = idf(docFreq=123, maxDocs=42740)
                0.09375 = fieldNorm(doc=4921)
          0.36386046 = weight(abstract_txt:multiword in 4921) [ClassicSimilarity], result of:
            0.36386046 = score(doc=4921,freq=1.0), product of:
              0.45033795 = queryWeight, product of:
                3.1740944 = boost
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.01646242 = queryNorm
              0.807972 = fieldWeight in 4921, product of:
                1.0 = tf(freq=1.0), with freq of:
                  1.0 = termFreq=1.0
                8.618368 = idf(docFreq=20, maxDocs=42740)
                0.09375 = fieldNorm(doc=4921)
          0.8665345 = weight(abstract_txt:mwes in 4921) [ClassicSimilarity], result of:
            0.8665345 = score(doc=4921,freq=3.0), product of:
              0.5568422 = queryWeight, product of:
                3.5295281 = boost
                9.583449 = idf(docFreq=7, maxDocs=42740)
                0.01646242 = queryNorm
              1.5561581 = fieldWeight in 4921, product of:
                1.7320508 = tf(freq=3.0), with freq of:
                  3.0 = termFreq=3.0
                9.583449 = idf(docFreq=7, maxDocs=42740)
                0.09375 = fieldNorm(doc=4921)
        0.16 = coord(4/25)