@article{DERCZYNSKI201532, title = "Analysis of named entity recognition and linking for tweets", journal = "Information Processing & Management", volume = "51", number = "2", pages = "32 - 49", year = "2015", issn = "0306-4573", doi = "https://doi.org/10.1016/j.ipm.2014.10.006", url = "http://www.sciencedirect.com/science/article/pii/S0306457314001034", author = "Leon Derczynski and Diana Maynard and Giuseppe Rizzo and Marieke {van Erp} and Genevieve Gorrell and Raphaƫl Troncy and Johann Petrak and Kalina Bontcheva", keywords = "Information extraction, Named entity recognition, Entity disambiguation, Microblogs, Twitter", abstract = "Applying natural language processing for mining and intelligent information access to tweets (a form of microblog) is a challenging, emerging research area. Unlike carefully authored news text and other longer content, tweets pose a number of new challenges, due to their short, noisy, context-dependent, and dynamic nature. Information extraction from tweets is typically performed in a pipeline, comprising consecutive stages of language identification, tokenisation, part-of-speech tagging, named entity recognition and entity disambiguation (e.g. with respect to DBpedia). In this work, we describe a new Twitter entity disambiguation dataset, and conduct an empirical analysis of named entity recognition and disambiguation, investigating how robust a number of state-of-the-art systems are on such noisy texts, what the main sources of error are, and which problems should be further investigated to improve the state of the art." }