Browse Source

fix:edit agent/data/upload

jimmyyem 3 months ago
parent
commit
f24d821b63

+ 4 - 3
go.mod

@@ -14,15 +14,19 @@ require (
 	github.com/go-resty/resty/v2 v2.14.0
 	github.com/gofrs/uuid/v5 v5.0.0
 	github.com/golang-jwt/jwt/v5 v5.2.1
+	github.com/golang/protobuf v1.5.4
 	github.com/imroc/req/v3 v3.43.1
 	github.com/redis/go-redis/v9 v9.6.1
 	github.com/robfig/cron/v3 v3.0.1
+	github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d
 	github.com/sashabaranov/go-openai v1.31.0
 	github.com/speps/go-hashids/v2 v2.0.1
 	github.com/spf13/cast v1.6.0
 	github.com/suyuan32/simple-admin-common v1.3.11
 	github.com/suyuan32/simple-admin-core v1.3.11
 	github.com/zeromicro/go-zero v1.6.3
+	golang.org/x/text v0.18.0
+	google.golang.org/protobuf v1.35.2
 )
 
 require (
@@ -68,7 +72,6 @@ require (
 	github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/mock v1.6.0 // indirect
-	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/gnostic-models v0.6.8 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/gofuzz v1.2.0 // indirect
@@ -141,14 +144,12 @@ require (
 	golang.org/x/sync v0.8.0 // indirect
 	golang.org/x/sys v0.25.0 // indirect
 	golang.org/x/term v0.24.0 // indirect
-	golang.org/x/text v0.18.0 // indirect
 	golang.org/x/time v0.6.0 // indirect
 	golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
 	google.golang.org/appengine v1.6.8 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240318140521-94a12d6c2237 // indirect
 	google.golang.org/grpc v1.62.1 // indirect
-	google.golang.org/protobuf v1.35.2 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect

+ 2 - 6
go.sum

@@ -443,7 +443,6 @@ github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Ky
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
 github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
 github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
 github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
 github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
@@ -477,7 +476,6 @@ github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLA
 github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
 github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
 github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
-github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
 github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
 github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
@@ -545,6 +543,8 @@ github.com/rs/zerolog v1.13.0/go.mod h1:YbFCdg8HfsridGWAh22vktObvhZbQsZXe4/zB0OK
 github.com/rs/zerolog v1.15.0/go.mod h1:xYTKnLHcpfU2225ny5qZjxnj9NvkumZYjJHlAThCjNc=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
+github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
+github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
 github.com/sashabaranov/go-openai v1.31.0 h1:rGe77x7zUeCjtS2IS7NCY6Tp4bQviXNMhkQM6hz/UC4=
 github.com/sashabaranov/go-openai v1.31.0/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
 github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
@@ -572,8 +572,6 @@ github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkU
 github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0=
 github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
 github.com/spf13/cobra v1.1.3/go.mod h1:pGADOWyqRD/YMrPZigI/zbliZ2wVD/23d+is3pSWzOo=
-github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
-github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
 github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
 github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
@@ -964,8 +962,6 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD
 google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
-google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg=
-google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw=
 google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
 google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
 gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=

+ 21 - 0
hook/charset/detector.go

@@ -0,0 +1,21 @@
+package charset
+
+import (
+	"fmt"
+	"github.com/saintfish/chardet"
+)
+
+// DetectCharset 检测字符编码
+func DetectCharset(data []byte) string {
+	detector := chardet.NewTextDetector()
+	result, err := detector.DetectBest(data)
+	if err == nil {
+		fmt.Printf(
+			"Detected charset is %s, language is %s",
+			result.Charset,
+			result.Language)
+		return result.Charset
+	}
+
+	return "unknown"
+}

+ 6 - 0
internal/handler/agent/upload_agent_data_handler.go

@@ -49,7 +49,9 @@ func UploadAgentDataHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
 			httpx.ErrorCtx(r.Context(), w, errors.New("upload file cannot be null"))
 			return
 		}
+
 		//l.Logger.Infof("header=%+v size=%d filename=%v\n", header.Header, header.Size, header.Filename)
+
 		if header.Header.Get("Content-Type") != "text/csv" {
 			httpx.ErrorCtx(r.Context(), w, errors.New("file format must be csv"))
 			return
@@ -63,6 +65,10 @@ func UploadAgentDataHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
 		}
 
 		resp, err := l.UploadAgentData(&req, file, uint64(agentId))
+
+		// 删除临时文件
+		_ = r.MultipartForm.RemoveAll()
+
 		if err != nil {
 			httpx.ErrorCtx(r.Context(), w, err)
 		} else {

+ 20 - 15
internal/logic/agent/upload_agent_data_logic.go

@@ -1,10 +1,12 @@
 package agent
 
 import (
+	"bytes"
 	"context"
 	"encoding/csv"
 	"fmt"
 	"github.com/suyuan32/simple-admin-common/msg/errormsg"
+	"io"
 	"mime/multipart"
 	agentModel "wechat-api/ent/agent"
 	"wechat-api/hook/fastgpt"
@@ -14,6 +16,9 @@ import (
 	"wechat-api/internal/types"
 
 	"github.com/zeromicro/go-zero/core/logx"
+
+	"golang.org/x/text/encoding/simplifiedchinese"
+	"golang.org/x/text/transform"
 )
 
 type UploadAgentDataLogic struct {
@@ -31,14 +36,12 @@ func NewUploadAgentDataLogic(ctx context.Context, svcCtx *svc.ServiceContext) *U
 
 func (l *UploadAgentDataLogic) UploadAgentData(req *types.UploadDataReq, file multipart.File, agentId uint64) (*types.BaseDataInfo, error) {
 	var count uint64 = 0
-	//fmt.Printf("count=%d \n", count)
 
 	reader := csv.NewReader(file)
 	records, err := reader.ReadAll()
 	if err != nil {
 		return nil, err
 	}
-	//fmt.Printf("records=%v \n", records)
 
 	agent, err := l.svcCtx.DB.Agent.Query().Where(agentModel.ID(agentId)).Only(l.ctx)
 	if err != nil {
@@ -51,10 +54,6 @@ func (l *UploadAgentDataLogic) UploadAgentData(req *types.UploadDataReq, file mu
 
 	qas := make([]fastgpt.DataQuestion, 0, 100)
 	for idx, record := range records {
-		// 第一行标题内容过滤
-		//fmt.Printf("idx=%v, record=%v \n", idx, record)
-		//fmt.Println(idx == 0, record[0] == "问题", record[1] == "答案")
-		//fmt.Printf("record=%+v\n", record)
 		if idx == 0 && record[1] == "答案" {
 			continue
 		}
@@ -63,23 +62,32 @@ func (l *UploadAgentDataLogic) UploadAgentData(req *types.UploadDataReq, file mu
 			continue
 		}
 
+		fmt.Printf("转换前:question=%s, answer=%s \n", record[0], record[1])
+
+		var question, answer []byte
+		reader0 := transform.NewReader(bytes.NewReader([]byte(record[0])), simplifiedchinese.GBK.NewDecoder())
+		question, _ = io.ReadAll(reader0)
+
+		reader1 := transform.NewReader(bytes.NewReader([]byte(record[1])), simplifiedchinese.GBK.NewDecoder())
+		answer, _ = io.ReadAll(reader1)
+
+		fmt.Printf("转换后:question=%s, answer=%s \n", question, answer)
+
 		qas = append(qas, fastgpt.DataQuestion{
-			Q: record[0],
-			A: record[1],
+			Q: string(question),
+			A: string(answer),
 		})
 
 		length := len(qas)
 		if length > 0 && length%100 == 0 {
 			params.Data = qas
-			//fmt.Printf("11 qas=%+v \n", qas)
+			//fmt.Printf("params=%+v\n", params)
 			response, err := fastgpt.CreateBulkData(&params)
 			if err != nil {
 				l.Logger.Errorf("batch insert data to fastgpt failed. collection=%s error=%s", agent.CollectionID, err.Error())
 				return nil, err
 			}
-			//fmt.Printf("response=%+v \n", response)
 			count += response.Data.InsertLen
-			//fmt.Printf("count=%d insert=%d \n", count, response.Data.InsertLen)
 
 			qas = make([]fastgpt.DataQuestion, 0, 100)
 		}
@@ -87,16 +95,13 @@ func (l *UploadAgentDataLogic) UploadAgentData(req *types.UploadDataReq, file mu
 
 	if len(qas) > 0 {
 		params.Data = qas
-		//fmt.Printf("22 qas=%+v \n", qas)
 		response, err := fastgpt.CreateBulkData(&params)
 		if err != nil {
 			l.Logger.Errorf("batch insert data to fastgpt failed. collection=%s error=%s", agent.CollectionID, err.Error())
 			return nil, err
 		}
-		//fmt.Printf("response=%+v \n", response)
-		//fmt.Printf("count=%d insert=%d \n", count, response.Data.InsertLen)
 		count += response.Data.InsertLen
-		//fmt.Printf("count=%d insert=%d \n", count, response.Data.InsertLen)
+		qas = make([]fastgpt.DataQuestion, 0, 100)
 	}
 
 	resp := &types.BaseDataInfo{}