@article{DERCZYNSKI201532,
title = "Analysis of named entity recognition and linking for tweets",
journal = "Information Processing & Management",
volume = "51",
number = "2",
pages = "32 - 49",
year = "2015",
issn = "0306-4573",
doi = "https://doi.org/10.1016/j.ipm.2014.10.006",
url = "http://www.sciencedirect.com/science/article/pii/S0306457314001034",
author = "Leon Derczynski and Diana Maynard and Giuseppe Rizzo and Marieke {van Erp} and Genevieve Gorrell and Raphaël Troncy and Johann Petrak and Kalina Bontcheva",
keywords = "Information extraction, Named entity recognition, Entity disambiguation, Microblogs, Twitter",
abstract = "Applying natural language processing for mining and intelligent information access to tweets (a form of microblog) is a challenging, emerging research area. Unlike carefully authored news text and other longer content, tweets pose a number of new challenges, due to their short, noisy, context-dependent, and dynamic nature. Information extraction from tweets is typically performed in a pipeline, comprising consecutive stages of language identification, tokenisation, part-of-speech tagging, named entity recognition and entity disambiguation (e.g. with respect to DBpedia). In this work, we describe a new Twitter entity disambiguation dataset, and conduct an empirical analysis of named entity recognition and disambiguation, investigating how robust a number of state-of-the-art systems are on such noisy texts, what the main sources of error are, and which problems should be further investigated to improve the state of the art."
}