11# read data
2- editors <- read.csv(" Output\\ editors.csv" , header = TRUE )
2+ editors <- read.csv(" Output\\ editors.csv" , header = TRUE , fileEncoding = " UTF-8 " )
33
44# remove automatically created numeric columns
55editors $ X <- NULL
@@ -16,5 +16,47 @@ editors <- editors[!is.na(editors$publisher), ]
1616nas_df <- editors [which(is.na(editors $ editor ) & is.na(editors $ affiliation )), ] # 0
1717editors <- editors [! with(editors , is.na(editor ) & is.na(affiliation )), ]
1818
19+ # clean encoding (fix wrongful hex-codes)
20+ ascii <- structure(list (Hex = c(" <a0>" , " <a1>" , " <a2>" , " <a3>" , " <a4>" ,
21+ " <a5>" , " <a6>" , " <a7>" , " <a8>" , " <a9>" , " <aa>" , " <ab>" , " <ac>" ,
22+ " <ad>" , " <ae>" , " <af>" , " <b0>" , " <b1>" , " <b2>" , " <b3>" , " <b4>" ,
23+ " <b5>" , " <b6>" , " <b7>" , " <b8>" , " <b9>" , " <ba>" , " <bb>" , " <bc>" ,
24+ " <bd>" , " <be>" , " <bf>" , " <c0>" , " <c1>" , " <c2>" , " <c3>" , " <c4>" ,
25+ " <c5>" , " <c6>" , " <c7>" , " <c8>" , " <c9>" , " <ca>" , " <cb>" , " <cc>" ,
26+ " <cd>" , " <ce>" , " <cf>" , " <d0>" , " <d1>" , " <d2>" , " <d3>" , " <d4>" ,
27+ " <d5>" , " <d6>" , " <d7>" , " <d8>" , " <d9>" , " <da>" , " <db>" , " <dc>" ,
28+ " <dd>" , " <de>" , " <df>" , " <e0>" , " <e1>" , " <e2>" , " <e3>" , " <e4>" ,
29+ " <e5>" , " <e6>" , " <e7>" , " <e8>" , " <e9>" , " <ea>" , " <eb>" , " <ec>" ,
30+ " <ed>" , " <ee>" , " <ef>" , " <f0>" , " <f1>" , " <f2>" , " <f3>" , " <f4>" ,
31+ " <f5>" , " <f6>" , " <f7>" , " <f8>" , " <f9>" , " <fa>" , " <fb>" , " <fc>" ,
32+ " <fd>" , " <fe>" , " <ff>" ), Actual = c(" " , " ¡" , " ¢" , " £" , " ¤" ,
33+ " ¥" , " ¦" , " §" , " ¨" , " ©" , " ª" , " «" , " ¬" , " SHY" , " ®" , " ¯" , " °" ,
34+ " ±" , " ²" , " ³" , " ´" , " µ" , " ¶" , " ·" , " ¸" , " ¹" , " º" , " »" , " ¼" , " ½" ,
35+ " ¾" , " ¿" , " À" , " Á" , " Â" , " Ã" , " Ä" , " Å" , " Æ" , " Ç" , " È" , " É" , " Ê" ,
36+ " Ë" , " Ì" , " Í" , " Î" , " Ï" , " Ð" , " Ñ" , " Ò" , " Ó" , " Ô" , " Õ" , " Ö" , " ×" ,
37+ " Ø" , " Ù" , " Ú" , " Û" , " Ü" , " Ý" , " Þ" , " ß" , " à" , " á" , " â" , " ã" , " ä" ,
38+ " å" , " æ" , " ç" , " è" , " é" , " ê" , " ë" , " ì" , " í" , " î" , " ï" , " ð" , " ñ" ,
39+ " ò" , " ó" , " ô" , " õ" , " ö" , " ÷" , " ø" , " ù" , " ú" , " û" , " ü" , " ý" , " þ" ,
40+ " ÿ" )), row.names = c(NA , - 96L ), class = " data.frame" )
41+
42+ editors $ editor <- stringi :: stri_replace_all_fixed(
43+ editors $ editor ,
44+ ascii $ Hex ,
45+ ascii $ Actual ,
46+ vectorize_all = FALSE
47+ )
48+ editors $ affiliation <- stringi :: stri_replace_all_fixed(
49+ editors $ affiliation ,
50+ ascii $ Hex ,
51+ ascii $ Actual ,
52+ vectorize_all = FALSE
53+ )
54+ editors $ journal <- stringi :: stri_replace_all_fixed(
55+ editors $ journal ,
56+ ascii $ Hex ,
57+ ascii $ Actual ,
58+ vectorize_all = FALSE
59+ )
60+
1961# save the cleaned data
20- write.csv(editors , " Output\\ editors.csv" )
62+ write.csv(editors , " Output\\ editors.csv" , fileEncoding = " UTF-8 " )
0 commit comments