publications.bib

@inproceedings{Hurriyetoglu+22a,
    title = "Extended Multilingual protest news detection - Shared Task 1, CASE 2021 and 2022",
    author = {H{\"u}rriyeto{\u{g}}lu, Ali  and
      Mutlu, Osman and
      Duru{\c{s}}an, F{\i}rat and
      Uca, Onur and
      G{\"u}rel, Alaeddin Sel\c{c}uk and
      Radford, Benjamin and
      Dai, Yaoyao and
      Hettiarachchi, Hansi and
      Stoehr, Niklas and
      Nomoto, Tadashi and
      Slavcheva, Milena and
      Vargas, Francielle and
      Javid, Aaqib and
      Beyhan, Fatih and
      Y{\"o}r{\"u}k, Erdem},
    booktitle = "Proceedings of the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2022)",
    month = dec,
    year = "2022",
    address = "online",
    publisher = "Association for Computational Linguistics (ACL)",  
}

@inproceedings{Hurriyetoglu+22b,
    title = "Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2022): Workshop and Shared Task Report",
    author = {H{\"u}rriyeto{\u{g}}lu, Ali  and
      Tanev, Hristo and
      Zavarella, Vanni and
      Yeniterzi, Reyyan and
      Mutlu, Osman and
      Y{\"o}r{\"u}k, Erdem
      },
    booktitle = "Proceedings of the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2022)",
    month = aug,
    year = "2022",
    address = "online",
    publisher = "Association for Computational Linguistics (ACL)",
    
}

@misc{Durusan+2022,
  doi = {10.48550/ARXIV.2206.10299},
  url = {https://arxiv.org/abs/2206.10299},
  author = {Duruşan, Fırat and Hürriyetoğlu, Ali and Yörük, Erdem and Mutlu, Osman and Yoltar, Çağrı and Gürel, Burak and Comin, Alvaro},
  keywords = {Computation and Language (cs.CL), Computers and Society (cs.CY), Machine Learning (cs.LG), FOS: Computer and information sciences, FOS: Computer and information sciences},
  title = {Global Contentious Politics Database (GLOCON) Annotation Manuals},
  publisher = {arXiv},
  year = {2022},
  copyright = {Creative Commons Attribution 4.0 International}
  }


@inproceedings{Hurriyetoglu+21a,
    title = "Multilingual protest news detection - Shared Task 1, CASE 2021",
    author = {H{\"u}rriyeto{\u{g}}lu, Ali  and
      Mutlu, Osman and
      Liza, Farhana Ferdousi and
      Y{\"o}r{\"u}k, Erdem and
      Kumar, Ritesh and
      Ratan, Shyam
      },
    booktitle = "Proceedings of the 4th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2021)",
    month = aug,
    year = "2021",
    address = "online",
    publisher = "Association for Computational Linguistics (ACL)",  
}

@inproceedings{Hurriyetoglu+21b,
    title = "Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2021): Workshop and Shared Task Report",
    author = {H{\"u}rriyeto{\u{g}}lu, Ali  and
      Tanev, Hristo and
      Zavarella, Vanni and
      Piskorski, Jakub and
      Yeniterzi, Reyyan and
      Y{\"o}r{\"u}k, Erdem and
      Mutlu, Osman and
      Yüret, Deniz and
      Villavicencio, Aline 
      },
    booktitle = "Proceedings of the 4th Workshop on Challenges and Applications of Automated Extraction of Socio-political Events from Text (CASE 2021)",
    month = aug,
    year = "2021",
    address = "online",
    publisher = "Association for Computational Linguistics (ACL)",
    
}

@article{10.1162/dint_a_00092,
    author = {Hürriyetoğlu, Ali and Yörük, Erdem and Mutlu, Osman and Duruşan, Fırat and Yoltar, Çağrı and Yüret, Deniz and Gürel, Burak},
    title = "{Cross-Context News Corpus for Protest Event-Related Knowledge Base Construction}",
    journal = {Data Intelligence},
    pages = {1-28},
    year = {2021},
    month = {04},
    abstract = "{We describe a gold standard corpus of protest events that comprise various local and international English language sources from various countries. The corpus contains document-, sentence-, and token-level annotations. This corpus facilitates creating machine learning models that automatically classify news articles and extract protest event-related information, constructing knowledge bases that enable comparative social and political science studies. For each news source, the annotation starts with random samples of news articles and continues with samples drawn using active learning. Each batch of samples is annotated by two social and political scientists, adjudicated by an annotation supervisor, and improved by identifying annotation errors semi-automatically. We found that the corpus possesses the variety and quality that are necessary to develop and benchmark text classification and event extraction systems in a cross-context setting, contributing to the generalizability and robustness of automated text processing systems. This corpus and the reported results will establish a common foundation in automated protest event collection studies, which is currently lacking in the literature.}",
    issn = {2641-435X},
    doi = {10.1162/dint_a_00092},
    url = {https://doi.org/10.1162/dint_a_00092},
    eprint = {https://direct.mit.edu/dint/article-pdf/doi/10.1162/dint_a_00092/1912934/dint_a_00092.pdf},
}

@article{Yoruk+21,
author = {Y{\"o}r{\"u}k, Erdem
and H{\"u}rriyeto{\u{g}}lu, Ali
and Yoltar, {\c{C}}a{\u{g}}r{\i}
and Duru{\c{s}}an, F{\i}rat},
title ={{Random Sampling in Corpus Design: Cross-Context Generalizability in Automated Multicountry Protest Event Collection}},
journal = {American Behavioral Scientist},
volume = {0},
number = {0},
pages = {00027642211021630},
year = {0},
doi = {10.1177/00027642211021630},
URL = {https://doi.org/10.1177/00027642211021630},
eprint = {https://doi.org/10.1177/00027642211021630},
abstract = { What is the most optimal way of creating a gold standard corpus for training a machine learning system that is designed for automatically collecting protest information in a cross-country context? We show that creating a gold standard corpus for training and testing machine learning models on the basis of randomly chosen news articles from news archives yields better performance than selecting news articles on the basis of keyword filtering, which is the most prevalent method currently used in automated event coding. We advance this new bottom-up approach to ensure generalizability and reliability in cross-country comparative protest event collection from international and local news in different countries, languages, sources and time periods, which entails a large variety of event types, actors, and targets. We present the results of comparing our random-sample approach with keyword filtering. We show that the machine learning algorithms, and particularly state-of-the-art deep learning tools, perform much better when they are trained with the gold standard corpus from a randomly selected set of news articles from China, India, and South Africa. Finally, we also present our approach to overcome the major ethical issues that are intrinsic to protest event coding. }
}

@inproceedings{Hurriyetoglu+20b,
    title = "Automated Extraction of Socio-political Events from News ({AESPEN}): Workshop and Shared Task Report",
    author = {H{\"u}rriyeto{\u{g}}lu, Ali  and
      Zavarella, Vanni  and
      Tanev, Hristo  and
      Y{\"o}r{\"u}k, Erdem  and
      Safaya, Ali  and
      Mutlu, Osman},
    booktitle = "Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020",
    month = may,
    year = "2020",
    address = "Marseille, France",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://www.aclweb.org/anthology/2020.aespen-1.1",
    pages = "1--6",
    abstract = "We describe our effort on automated extraction of socio-political events from news in the scope of a workshop and a shared task we organized at Language Resources and Evaluation Conference (LREC 2020). We believe the event extraction studies in computational linguistics and social and political sciences should further support each other in order to enable large scale socio-political event information collection across sources, countries, and languages. The event consists of regular research papers and a shared task, which is about event sentence coreference identification (ESCI), tracks. All submissions were reviewed by five members of the program committee. The workshop attracted research papers related to evaluation of machine learning methodologies, language resources, material conflict forecasting, and a shared task participation report in the scope of socio-political event information collection. It has shown us the volume and variety of both the data sources and event information collection approaches related to socio-political events and the need to fill the gap between automated text processing techniques and requirements of social and political sciences.",
    language = "English",
    ISBN = "979-10-95546-50-4",
}


@InProceedings{Hurriyetoglu+19b,
author="H{\"u}rriyeto{\u{g}}lu, Ali
and Y{\"o}r{\"u}k, Erdem
and Y{\"u}ret, Deniz
and Yoltar, {\c{C}}a{\u{g}}r{\i}
and G{\"u}rel, Burak
and Duru{\c{s}}an, F{\i}rat
and Mutlu, Osman
and Akdemir, Arda",
editor="Crestani, Fabio
and Braschler, Martin
and Savoy, Jacques
and Rauber, Andreas
and M{\"u}ller, Henning
and Losada, David E.
and Heinatz B{\"u}rki, Gundula
and Cappellato, Linda
and Ferro, Nicola",
title="Overview of CLEF 2019 Lab ProtestNews: Extracting Protests from News in a Cross-Context Setting",
booktitle="Experimental IR Meets Multilinguality, Multimodality, and Interaction",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="425--432",
abstract="We present an overview of the CLEF-2019 Lab ProtestNews on Extracting Protests from News in the context of generalizable natural language processing. The lab consists of document, sentence, and token level information classification and extraction tasks that were referred as task 1, task 2, and task 3 respectively in the scope of this lab. The tasks required the participants to identify protest relevant information from English local news at one or more aforementioned levels in a cross-context setting, which is cross-country in the scope of this lab. The training and development data were collected from India and test data was collected from India and China. The lab attracted 58 teams to participate in the lab. 12 and 9 of these teams submitted results and working notes respectively. We have observed neural networks yield the best results and the performance drops significantly for majority of the submissions in the cross-country setting, which is China.",
isbn="978-3-030-28577-7"
}

@InProceedings{Hurriyetoglu+19,
author="H{\"u}rriyeto{\u{g}}lu, Ali
and Y{\"o}r{\"u}k, Erdem
and Y{\"u}ret, Deniz
and Yoltar, {\c{C}}a{\u{g}}r{\i}
and G{\"u}rel, Burak
and Duru{\c{s}}an, F{\i}rat
and Mutlu, Osman",
editor="Azzopardi, Leif
and Stein, Benno
and Fuhr, Norbert
and Mayr, Philipp
and Hauff, Claudia
and Hiemstra, Djoerd",
title="A Task Set Proposal for Automatic Protest Information Collection Across Multiple Countries",
booktitle="Advances in Information Retrieval",
year="2019",
publisher="Springer International Publishing",
address="Cham",
pages="316--323",
abstract="We propose a coherent set of tasks for protest information collection in the context of generalizable natural language processing. The tasks are news article classification, event sentence detection, and event extraction. Having tools for collecting event information from data produced in multiple countries enables comparative sociology and politics studies. We have annotated news articles in English from a source and a target country in order to be able to measure the performance of the tools developed using data from one country on data from a different country. Our preliminary experiments have shown that the performance of the tools developed using English texts from India drops to a level that are not usable when they are applied on English texts from China. We think our setting addresses the challenge of building generalizable NLP tools that perform well independent of the source of the text and will accelerate progress in line of developing generalizable NLP systems.",
isbn="978-3-030-15719-7"
}