@inproceedings{a6225311a80845d98019d84ad6934020,
title = "BERT based language identification in code-mixed english-assamese social media text",
abstract = "Language identification in code-mixed language pairs has progressively gained research interest in recent times. Due to the extensive use of social media, it has become necessary to identify languages in code-mixed text for dealing with tasks such as detection of hate speeches, misinformation, and disinformation. Recent transformer models such as BERT have shown very good results in many NLP tasks including language identification. This work uses a transfer learning approach by applying a BERT model for language identification at a word level in a code-mixed Assamese-English language pair. Experimental results performed with an available data set show that BERT performs better than using word-level features or semantic word embeddings with an accuracy of 94\%. ",
author = "Kalita, \{Nayan Jyoti\} and Pritam Deka and Vijay Chennareddy and Sarma, \{Shikhar Kumar\}",
year = "2023",
month = sep,
day = "2",
doi = "10.1007/978-981-99-1620-7\_14",
language = "English",
isbn = "9789819916191",
series = "Algorithms for Intelligent Systems: MIDAS: Workshop on Mining Data for Financial Applications",
publisher = "Springer Singapore",
pages = "173--181",
editor = "Ramdane-Cherif, \{Amar \} and Singh, \{T. P. \} and Tomar, \{Ravi \} and Choudhury, \{Tanupriya \} and Um, \{Jung-Sup \}",
booktitle = "Machine Intelligence and Data Science Applications (MIDAS 2022): Proceedings",
address = "Singapore",
note = "3rd International Conference on Machine Intelligence \& Data Science Applications (Midas - 2022) ; Conference date: 28-10-2022 Through 29-10-2022",
}