
#Legend of utf8-Signs:

#\u011d  g with circumflex
#\u011c  G with circumflex
#\u0161  s with caron
#\u0160  S with caron
#\u1e2b  h with breve below
#\u1e2a  H with breve below

#\u00e1  a acute
#\u00e9  e acute
#\u00ed  i acute
#\u00fa  u acute

#\u00e0  a gravis
#\u00e8  e gravis
#\u00ec  i gravis
#\u00f9  u gravis

#\u00d7  operator: cross
#\u00b7  operator: cdot
#\u00a4  operator: of
#\u2218  operator: chaining

#\u2205  emptyset
#\u2192  arrow
#\u2612  checkbox


standardize_signs <- function(x){
  x <- str_replace(x, "^<(.*)>$", "\\1")

  ## Only relevant for signs of type 1
  x <- str_replace_all(x, "c", "\u0161")
  x <- str_replace_all(x, "j", "\u011d")
  x <- str_replace_all(x, "h", "\u1e2b")

  is_variant_2 <- str_detect(x, "\u00e1|\u00e9|\u00ed|\u00fa")
  is_variant_3 <- str_detect(x, "\u00e0|\u00e8|\u00ec|\u00f9")

  x <- str_replace_all(x, "\u00e1", "a")
  x <- str_replace_all(x, "\u00e9", "e")
  x <- str_replace_all(x, "\u00ed", "i")
  x <- str_replace_all(x, "\u00fa", "u")
  x <- str_replace_all(x, "\u00e0", "a")
  x <- str_replace_all(x, "\u00e8", "e")
  x <- str_replace_all(x, "\u00ec", "i")
  x <- str_replace_all(x, "\u00f9", "u")

  x <- ifelse(is_variant_2, paste0(x, "2"), x)
  x <- ifelse(is_variant_3, paste0(x, "3"), x)

  ## Only relevant for signs of type 2

  x <- str_replace_all(x, "PLUS",     "+")
  x <- str_replace_all(x, "TIMES", "\u00d7")
  x <- str_replace_all(x, "OVER",     "/")
  x <- str_replace_all(x, "CROSSING", "%")
  x <- str_replace_all(x, "TENU",     "tenu")
  x <- str_replace_all(x, "GUNU",     "gunu")
  x <- str_replace_all(x, "SHESHIG", "\u0161e\u0161ig")
  x <- str_replace_all(x, "\u0160E\u0160IG", "\u0161e\u0161ig")
  x <- str_replace_all(x, "SESIG", "\u0161e\u0161ig")
  x <- str_replace_all(x, ".OPPOSING.", "\u00b7OPPOSING\u00b7")
  x <- str_replace_all(x, ".INVERTED", "\u00b7INVERTED")
  x <- str_replace_all(x, "ONE.", "ONE\u00b7")
  x <- str_replace_all(x, "TWO.", "TWO\u00b7")
  x <- str_replace_all(x, "THREE.", "THREE\u00b7")
  x <- str_replace_all(x, "FOUR.", "FOUR\u00b7")
  x <- str_replace_all(x, "FIVE.", "FIVE\u00b7")

  x <- str_replace_all(x, " ", "")

  return(x)
}
