Kaynağa Gözat

自动转换上传文件编码

boweniac 6 gün önce
ebeveyn
işleme
0415bad511
1 değiştirilmiş dosya ile 68 ekleme ve 6 silme
  1. 68 6
      internal/logic/agent/upload_agent_data_logic.go

+ 68 - 6
internal/logic/agent/upload_agent_data_logic.go

@@ -5,7 +5,14 @@ import (
 	"context"
 	"encoding/csv"
 	"fmt"
+	"github.com/saintfish/chardet"
 	"github.com/suyuan32/simple-admin-common/msg/errormsg"
+	"golang.org/x/text/encoding"
+	"golang.org/x/text/encoding/charmap"
+	"golang.org/x/text/encoding/japanese"
+	"golang.org/x/text/encoding/korean"
+	"golang.org/x/text/encoding/traditionalchinese"
+	"golang.org/x/text/encoding/unicode"
 	"io"
 	"mime/multipart"
 	"strings"
@@ -115,11 +122,66 @@ func trim(s string) string {
 	return s
 }
 
+// transCharset 自动检测编码并转换为 UTF-8
 func transCharset(s string) string {
-	s = trim(s)
-	return s
-	rd := transform.NewReader(bytes.NewReader([]byte(s)), simplifiedchinese.GBK.NewDecoder())
-	bytes, err := io.ReadAll(rd)
-	fmt.Printf("bytes=%s err=%v\n", bytes, err)
-	return string(bytes)
+	// 1. 自动检测编码
+	detector := chardet.NewTextDetector()
+	result, err := detector.DetectBest([]byte(s))
+	if err != nil {
+		fmt.Println("Encoding detection failed:", err)
+		return s
+	}
+
+	// 2. 找到相应的编码
+	fmt.Println("result.Charset:", result.Charset)
+	enc := getEncoding(result.Charset)
+	fmt.Println("enc:", enc)
+	if enc == nil {
+		// 直接返回原始字符串
+		fmt.Println("Unsupported charset:", result.Charset)
+		return s
+	}
+
+	// 3. 转换为 UTF-8
+	rd := transform.NewReader(bytes.NewReader([]byte(s)), enc.NewDecoder())
+	utf8Bytes, err := io.ReadAll(rd)
+	if err != nil {
+		fmt.Println("Encoding conversion failed:", err)
+		return s
+	}
+
+	// 4. 返回转换后的 UTF-8 字符串
+	return string(utf8Bytes)
+}
+
+// 根据字符集名称获取 `encoding.Encoding`
+func getEncoding(charset string) encoding.Encoding {
+	switch charset {
+	case "UTF-8", "ASCII":
+		return encoding.Nop // 无需转换
+	case "ISO-8859-1":
+		return charmap.ISO8859_1
+	case "ISO-8859-2":
+		return charmap.ISO8859_2
+	case "ISO-8859-15":
+		return charmap.ISO8859_15
+	case "Windows-1252":
+		return charmap.Windows1252
+	case "Big5":
+		return traditionalchinese.Big5
+	case "GB-2312", "GBK", "GB-18030":
+		return simplifiedchinese.GBK
+	case "Shift_JIS":
+		return japanese.ShiftJIS
+	case "EUC-JP":
+		return japanese.EUCJP
+	case "EUC-KR":
+		return korean.EUCKR
+	case "UTF-16LE":
+		return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM)
+	case "UTF-16BE":
+		return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM)
+	default:
+		return nil
+	}
 }