% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/helper_functions.R
\encoding{UTF-8}
\name{preprocess_string}
\alias{preprocess_string}
\title{Preprocess a string, removing special characters and handling abbreviations.}
\usage{
preprocess_string(verbatim, lang = "de")
}
\arguments{
\item{verbatim}{The character vector to process.}

\item{lang}{The language the text is in.
Currently only German is supported.
Defaults to "de" (German).}
}
\value{
The same character vector after processing
}
\description{
Replace some common characters / character sequences
(e.g., Ä, Ü, "DIPL.-ING.") with their uppercase equivalents and removes
punctuation, empty spaces and the word "Diplom".
}
\details{
\code{\link[=charToRaw]{charToRaw()}} or \code{\link[stringi:stri_escape_unicode]{stringi::stri_escape_unicode()}} helps to find UTF-8 characters.
}
\examples{
preprocess_string(c(
  "Verkauf von B\u00fcchern, Schreibwaren",
  "Fach\u00e4rztin f\u00fcr Kinder- und Jugendmedizin im \u00f6ffentlichen Gesundheitswesen",
  "Industriemechaniker",
  "Dipl.-Ing. - Agrarwirtschaft (Landwirtschaft)"
))
}
