call_rating.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. import json
  2. from typing import TypedDict, Optional
  3. from openai import OpenAI
  4. from pydantic import BaseModel
  5. from utils.file_util import ExcelFile
  6. class ScoringRule(BaseModel):
  7. score: str
  8. rule: str
  9. class ScoringRuleList(BaseModel):
  10. rules: list[ScoringRule]
  11. class HeaderIndex(TypedDict, total=False):
  12. human_intent: int
  13. human_rating: int
  14. robot_intent: int
  15. robot_rating: int
  16. llm_intent: int
  17. llm_rating: int
  18. chat_history: int
  19. chat_history_simple: int
  20. scoring_criteria: int
  21. error_reasons: int
  22. class RatingScore(BaseModel):
  23. score: str
  24. confidence_score: str
  25. scoring_criteria: str
  26. class InferenceScoringRuleAdvanced(BaseModel):
  27. rule: str
  28. # 金融
  29. # no_intention = ['非目标人群', 'F非目标人群', '贷不了', 'E贷不了', '语音助手', 'F语音助手', '接通挂机', 'F接通挂机']
  30. # Intended = ['问贷款信息', 'C问贷款信息', '成功结束', 'A成功结束', '在忙待跟进', 'D在忙待跟进', '待跟进', 'B待跟进']
  31. # pos
  32. no_intention = ['无意向', '不需要', '非目标人群', '已经有']
  33. Intended = ['有意向', 'A邀约成功']
  34. class CallRating:
  35. def __init__(self, file_path):
  36. self._file = ExcelFile(file_path)
  37. api_key = "sk-TL1U0f39NF5xOFnm8bB39e9b73E2474b9753C6BfCe95B5Db" # 计价测试
  38. # api_key = "sk-uN8GZmYwwu8bKeZyF605D111AcCa48738cD777B1332348D1" # 调试
  39. base_url = "https://newapi.gkscrm.com/v1"
  40. self.model = "gpt-4o"
  41. # self.model = "qwen2.5:7b"
  42. # base_url = "http://localhost:11434/v1"
  43. self._openai_client = OpenAI(api_key=api_key,
  44. base_url=base_url)
  45. # 评分方法
  46. def rating_score(self, scoring_rule: list[ScoringRule], general_rule: Optional[int] = 0, start_row: Optional[int] = 0, end_row: Optional[int] = 0):
  47. # 获取表头索引
  48. header_index = self._get_header_index()
  49. if start_row == 0 and end_row == 0:
  50. df_subset = self._file.file
  51. else:
  52. df_subset = self._file.file.iloc[start_row:end_row]
  53. for index, row in df_subset.iterrows():
  54. robot_intent = row.iloc[header_index["robot_intent"]]
  55. chat_history = row.iloc[header_index["chat_history"]]
  56. print(f"chat_history:{chat_history}")
  57. chat_history = self._format_chat_history(chat_history)
  58. print(f"chat_history:{chat_history}")
  59. completion = self._openai_client.beta.chat.completions.parse(
  60. model=self.model,
  61. messages=[
  62. {"role": "system", "content": f"""# 任务
  63. 1. 首先,判断用户的第一句话是否说了:“你好,(任意内容)通话”,如果说了,则不用理会评级规则,直接强制分配为"语音助手"
  64. 2. 如果不属于“语音助手”,请根据评级规则,对聊天记录给出评级、置信度、评分依据(逐项分析不要遗漏)
  65. # 细节说明
  66. 置信度从0到1,0为置信度最低,1为置信度最高。"""},
  67. {
  68. "role": "user",
  69. "content": f"""# 评级规则:
  70. {scoring_rule}
  71. # 聊天记录
  72. {chat_history}
  73. """
  74. }],
  75. response_format=RatingScore
  76. # response_format={"type": "json_object"}
  77. )
  78. score = json.loads(completion.choices[0].message.content)["score"]
  79. confidence_score = json.loads(completion.choices[0].message.content)["confidence_score"]
  80. scoring_criteria = json.loads(completion.choices[0].message.content)["scoring_criteria"]
  81. if robot_intent in Intended:
  82. self._file.new_value(index, "机器人意向", "有意向")
  83. elif robot_intent in no_intention:
  84. self._file.new_value(index, "机器人意向", "无意向")
  85. else:
  86. self._file.new_value(index, "机器人意向", "不确定")
  87. self._file.new_value(index, "精简聊天记录", chat_history)
  88. self._file.new_value(index, "大模型评级", score)
  89. if score in Intended:
  90. self._file.new_value(index, "大模型意向", "有意向")
  91. elif score in no_intention:
  92. self._file.new_value(index, "大模型意向", "无意向")
  93. else:
  94. self._file.new_value(index, "大模型意向", "不确定")
  95. self._file.new_value(index, "置信度", confidence_score)
  96. self._file.new_value(index, "评分依据", scoring_criteria)
  97. def rating_score_test(self, scoring_rule: list[ScoringRule]):
  98. header_index = self._get_header_index()
  99. self._file.new_column("大模型意向")
  100. self._file.new_column("置信度")
  101. for index, row in self._file.file.iterrows():
  102. chat_history = row.iloc[header_index["chat_history"]]
  103. chat_history = self._format_chat_history(chat_history)
  104. print(f"chat_history:{chat_history}")
  105. completion = self._openai_client.chat.completions.create(
  106. model=self.model,
  107. messages=[
  108. {"role": "system", "content": f"""请告诉我,用户的第一句话是否说了:'你好,(任意内容)通话'"""},
  109. {
  110. "role": "user",
  111. "content": f"""
  112. # 聊天记录
  113. {chat_history}
  114. """
  115. }]
  116. )
  117. print(f"completion:{completion}")
  118. def inference_scoring_rule_advanced(self, scoring_rule: list[ScoringRule]):
  119. self.rating_score(scoring_rule)
  120. header_index = self._get_header_index()
  121. system_prompt = '''# 角色
  122. 我希望你扮演逆向规则工程师,根据多组聊天记录、评级错误结果、评级错误原因,完善现有评级规则。
  123. # 输出要求
  124. 1.聚类相同评级,不要一个评级输出多次规则
  125. 2.不要更改或合并评级'''
  126. user_prompt = f"""# 现有评级规则:
  127. {scoring_rule}\n"""
  128. for index, row in self._file.file.iterrows():
  129. human_intent = row.iloc[header_index["human_intent"]]
  130. chat_history = row.iloc[header_index["chat_history"]]
  131. chat_history = self._format_chat_history(chat_history)
  132. error_reasons = row.iloc[header_index["error_reasons"]]
  133. user_prompt += f"""# 第{index + 1}组
  134. ## 聊天记录
  135. {chat_history}
  136. ## 错误评级结果
  137. {human_intent}
  138. ## 评级错误原因
  139. {error_reasons}\n\n"""
  140. completion = self._openai_client.beta.chat.completions.parse(
  141. model=self.model,
  142. messages=[
  143. {"role": "system", "content": system_prompt},
  144. {
  145. "role": "user",
  146. "content": user_prompt
  147. }
  148. ],
  149. response_format=ScoringRuleList
  150. )
  151. return json.loads(completion.choices[0].message.content)["rules"]
  152. def iterate_scoring_rule(self, iteration_count: int, scoring_rule: Optional[list[ScoringRule]] = None):
  153. # if iteration_count < 1:
  154. # return
  155. if scoring_rule is None:
  156. scoring_rule = self.inference_scoring_rule()
  157. self.rating_score(scoring_rule)
  158. iterate = False
  159. header_index = self._get_header_index()
  160. system_prompt = '''# 角色
  161. 我希望你扮演逆向规则工程师,帮我完善评级现有评级规则。
  162. # 过程
  163. 1. 根据评级错误的组,完善旧的评级规则
  164. 2. 完善后的评级规则应当能够正确评级所有组
  165. # 输出要求
  166. 1.聚类相同评级,不要一个评级输出多次规则
  167. 2.不要更改或合并评级'''
  168. user_prompt = "# 现有评级规则:{scoring_rule}\n"
  169. i = 0
  170. for index, row in self._file.file.iterrows():
  171. llm_intent = row.iloc[header_index["llm_intent"]]
  172. human_intent = row.iloc[header_index["human_intent"]]
  173. chat_history = row.iloc[header_index["chat_history"]]
  174. chat_history = self._format_chat_history(chat_history)
  175. if llm_intent != human_intent:
  176. iterate = True
  177. i += 1
  178. print(f"{i}第{index + 2}行,大模型评级:{llm_intent},人工评级:{human_intent}")
  179. user_prompt += f"""# 第{index + 1}组:有错误
  180. ## 聊天记录
  181. {chat_history}
  182. ## 根据现有规则产生的错误评级结果
  183. {llm_intent}
  184. ## 正确的评级结果应当是
  185. {human_intent}\n"""
  186. else:
  187. user_prompt += f"""# 第{index + 1}组:无错误
  188. ## 聊天记录
  189. {chat_history}
  190. ## 根据现有规则产生了正确的评级结果
  191. {llm_intent}\n"""
  192. if iteration_count < 1:
  193. return scoring_rule
  194. if not iterate:
  195. return scoring_rule
  196. completion = self._openai_client.beta.chat.completions.parse(
  197. model=self.model,
  198. messages=[
  199. {"role": "system", "content": system_prompt},
  200. {
  201. "role": "user",
  202. "content": user_prompt
  203. }
  204. ],
  205. response_format=ScoringRuleList
  206. )
  207. print(iteration_count)
  208. print(json.loads(completion.choices[0].message.content)["rules"])
  209. return self.iterate_scoring_rule(iteration_count - 1,
  210. json.loads(completion.choices[0].message.content)["rules"])
  211. def inference_scoring_rule(self):
  212. header_index = self._get_header_index()
  213. system_prompt = '''# 角色
  214. 我希望你扮演推理机器,根据多组聊天记录和评级结果,推理出评级规则。
  215. # 输出要求
  216. 1.聚类相同评级,不要一个评级输出多次规则
  217. 2.不要更改或合并评级'''
  218. user_prompt = ""
  219. for index, row in self._file.file.iterrows():
  220. human_intent = row.iloc[header_index["human_intent"]]
  221. chat_history = row.iloc[header_index["chat_history"]]
  222. chat_history = self._format_chat_history(chat_history)
  223. user_prompt += f"""# 第{index + 1}组
  224. ## 聊天记录
  225. {chat_history}
  226. ## 评级结果
  227. {human_intent}\n"""
  228. completion = self._openai_client.beta.chat.completions.parse(
  229. model=self.model,
  230. messages=[
  231. {"role": "system", "content": system_prompt},
  232. {
  233. "role": "user",
  234. "content": user_prompt
  235. }
  236. ],
  237. response_format=ScoringRuleList
  238. )
  239. return json.loads(completion.choices[0].message.content)["rules"]
  240. def _get_header_index(self):
  241. header_index: HeaderIndex = {}
  242. for index, column in enumerate(self._file.file.columns):
  243. if "人工意向" == column:
  244. header_index["human_intent"] = index
  245. elif "人工评级" == column:
  246. header_index["human_rating"] = index
  247. elif "机器人意向" == column:
  248. header_index["robot_intent"] = index
  249. elif "机器人评级" == column:
  250. header_index["robot_rating"] = index
  251. elif "大模型意向" == column:
  252. header_index["llm_intent"] = index
  253. elif "大模型评级" == column:
  254. header_index["llm_rating"] = index
  255. elif "大模型意向(通用规则)" == column:
  256. header_index["general_llm_intent"] = index
  257. elif "大模型评级(通用规则)" == column:
  258. header_index["general_llm_rating"] = index
  259. elif "聊天记录" == column:
  260. header_index["chat_history"] = index
  261. elif "精简聊天记录" == column:
  262. header_index["chat_history_simple"] = index
  263. elif "评分依据" == column:
  264. header_index["scoring_criteria"] = index
  265. elif "错误原因" == column:
  266. header_index["error_reasons"] = index
  267. return header_index
  268. def _get_rating_type(self):
  269. column_values = self._file.file.loc[:, '人工意向']
  270. # 遍历列的值,获取评级类型
  271. rating_type = set()
  272. for value in column_values:
  273. rating_type.add(value)
  274. return rating_type
  275. @staticmethod
  276. def _format_chat_history(chat_history):
  277. chat_history_data = json.loads(chat_history)
  278. format_chat_history = ""
  279. for message in chat_history_data:
  280. if message["type"] == "ai":
  281. role = "机器人"
  282. elif message["type"] == "user":
  283. role = "用户"
  284. else:
  285. role = "系统"
  286. format_chat_history += f"{role}: {message['content']}\n"
  287. return format_chat_history