|
@@ -5,7 +5,14 @@ import (
|
|
|
"context"
|
|
|
"encoding/csv"
|
|
|
"fmt"
|
|
|
+ "github.com/saintfish/chardet"
|
|
|
"github.com/suyuan32/simple-admin-common/msg/errormsg"
|
|
|
+ "golang.org/x/text/encoding"
|
|
|
+ "golang.org/x/text/encoding/charmap"
|
|
|
+ "golang.org/x/text/encoding/japanese"
|
|
|
+ "golang.org/x/text/encoding/korean"
|
|
|
+ "golang.org/x/text/encoding/traditionalchinese"
|
|
|
+ "golang.org/x/text/encoding/unicode"
|
|
|
"io"
|
|
|
"mime/multipart"
|
|
|
"strings"
|
|
@@ -115,11 +122,66 @@ func trim(s string) string {
|
|
|
return s
|
|
|
}
|
|
|
|
|
|
+// transCharset 自动检测编码并转换为 UTF-8
|
|
|
func transCharset(s string) string {
|
|
|
- s = trim(s)
|
|
|
- return s
|
|
|
- rd := transform.NewReader(bytes.NewReader([]byte(s)), simplifiedchinese.GBK.NewDecoder())
|
|
|
- bytes, err := io.ReadAll(rd)
|
|
|
- fmt.Printf("bytes=%s err=%v\n", bytes, err)
|
|
|
- return string(bytes)
|
|
|
+ // 1. 自动检测编码
|
|
|
+ detector := chardet.NewTextDetector()
|
|
|
+ result, err := detector.DetectBest([]byte(s))
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println("Encoding detection failed:", err)
|
|
|
+ return s
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 找到相应的编码
|
|
|
+ fmt.Println("result.Charset:", result.Charset)
|
|
|
+ enc := getEncoding(result.Charset)
|
|
|
+ fmt.Println("enc:", enc)
|
|
|
+ if enc == nil {
|
|
|
+ // 直接返回原始字符串
|
|
|
+ fmt.Println("Unsupported charset:", result.Charset)
|
|
|
+ return s
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3. 转换为 UTF-8
|
|
|
+ rd := transform.NewReader(bytes.NewReader([]byte(s)), enc.NewDecoder())
|
|
|
+ utf8Bytes, err := io.ReadAll(rd)
|
|
|
+ if err != nil {
|
|
|
+ fmt.Println("Encoding conversion failed:", err)
|
|
|
+ return s
|
|
|
+ }
|
|
|
+
|
|
|
+ // 4. 返回转换后的 UTF-8 字符串
|
|
|
+ return string(utf8Bytes)
|
|
|
+}
|
|
|
+
|
|
|
+// 根据字符集名称获取 `encoding.Encoding`
|
|
|
+func getEncoding(charset string) encoding.Encoding {
|
|
|
+ switch charset {
|
|
|
+ case "UTF-8", "ASCII":
|
|
|
+ return encoding.Nop // 无需转换
|
|
|
+ case "ISO-8859-1":
|
|
|
+ return charmap.ISO8859_1
|
|
|
+ case "ISO-8859-2":
|
|
|
+ return charmap.ISO8859_2
|
|
|
+ case "ISO-8859-15":
|
|
|
+ return charmap.ISO8859_15
|
|
|
+ case "Windows-1252":
|
|
|
+ return charmap.Windows1252
|
|
|
+ case "Big5":
|
|
|
+ return traditionalchinese.Big5
|
|
|
+ case "GB-2312", "GBK", "GB-18030":
|
|
|
+ return simplifiedchinese.GBK
|
|
|
+ case "Shift_JIS":
|
|
|
+ return japanese.ShiftJIS
|
|
|
+ case "EUC-JP":
|
|
|
+ return japanese.EUCJP
|
|
|
+ case "EUC-KR":
|
|
|
+ return korean.EUCKR
|
|
|
+ case "UTF-16LE":
|
|
|
+ return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
|
|
|
+ case "UTF-16BE":
|
|
|
+ return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
|
|
|
+ default:
|
|
|
+ return nil
|
|
|
+ }
|
|
|
}
|