Skip to content

Commit c0ff978

Browse files
authored
Merge pull request #294 from iMMIQ/dev/non-utf8
feat: add support for file encoding detection
2 parents 4a444e9 + 4ad4420 commit c0ff978

3 files changed

Lines changed: 50 additions & 2 deletions

File tree

CONTRIBUTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,4 @@ under the Developer Certificate of Origin <https://developercertificate.org/>.
150150
- Shantanu Sinha (@ShantanuPSinha)
151151
- Ronit Nallagatla (@ronitnallagatla)
152152
- Shreyas Chinnola (@ShreChinno)
153+
- An Yudong (@iMMIQ)

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ sv-parser = "0.13.3"
4141
term = "1.0"
4242
toml = "0.8"
4343
sv-filelist-parser = "0.1.3"
44+
chardetng = "0.1.17"
45+
encoding_rs = "0.8.34"
4446

4547
[build-dependencies]
4648
regex = "1"

src/main.rs

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
use anyhow::{Context, Error};
2+
use chardetng::EncodingDetector;
23
use clap::{Parser, CommandFactory};
34
use clap_complete;
45
use enquote;
56
use std::collections::HashMap;
6-
use std::fs::{read_to_string, File, OpenOptions};
7+
use std::fs::{File, OpenOptions};
78
use std::io::{Read, Write};
89
use std::path::{Path, PathBuf};
910
use std::{env, process};
@@ -275,7 +276,15 @@ pub fn run_opt_config(printer: &mut Printer, opt: &Opt, config: Config) -> Resul
275276
// by textrules to reset their internal state.
276277
let _ = linter.textrules_check(TextRuleEvent::StartOfFile, &path, &0);
277278

278-
let text: String = read_to_string(&path)?;
279+
let mut file = File::open(&path)?;
280+
let mut buffer = Vec::new();
281+
282+
file.read_to_end(&mut buffer)?;
283+
let mut detector = EncodingDetector::new();
284+
detector.feed(&buffer, true);
285+
let encoding = detector.guess(None, true).decode(&buffer).0;
286+
287+
let text = encoding.into_owned();
279288
let mut beg: usize = 0;
280289

281290
// Iterate over lines in the file, applying each textrule to each
@@ -1215,4 +1224,40 @@ mod tests {
12151224
let stdout = printer.read_to_string().unwrap();
12161225
assert_eq!(stdout, expected_contents("dump_filelist_8"));
12171226
} // }}}
1227+
1228+
#[test]
1229+
fn lint_gbk_encoded_verilog() {
1230+
use std::fs::File;
1231+
use std::io::Write;
1232+
use std::path::Path;
1233+
1234+
// Create a temporary Verilog file
1235+
let temp_path = Path::new("temp_gbk_verilog.sv");
1236+
let mut file = File::create(&temp_path).expect("Failed to create test file");
1237+
1238+
// Write GBK-encoded Verilog code
1239+
let gbk_verilog = vec![
1240+
0x0a, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x20, 0x74, 0x6f, 0x70, 0x3b, 0x0a, 0x2f,
1241+
0x2f, 0x20, 0xd6, 0xd0, 0xce, 0xc4, 0xd7, 0xa2, 0xca, 0xcd, 0x0a, 0x65, 0x6e, 0x64,
1242+
0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x0a,
1243+
];
1244+
file.write_all(&gbk_verilog)
1245+
.expect("Failed to write test file");
1246+
drop(file); // Close the file
1247+
1248+
// Run `svlint` to analyze the file
1249+
let config: Config = toml::from_str("").unwrap();
1250+
let mut args = vec!["svlint"];
1251+
args.push(temp_path.to_str().unwrap());
1252+
let opt = Opt::parse_from(args.iter());
1253+
1254+
let mut printer = Printer::new(true);
1255+
let ret = run_opt_config(&mut printer, &opt, config.clone());
1256+
1257+
// Clean up the test file
1258+
std::fs::remove_file(&temp_path).expect("Failed to remove test file");
1259+
1260+
// Assert that `svlint` successfully processes the GBK-encoded Verilog file
1261+
assert!(ret.is_ok(), "svlint failed to process GBK-encoded Verilog");
1262+
}
12181263
}

0 commit comments

Comments
 (0)