update_mismatch_record.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. import json
  2. import asyncio
  3. from pydantic import BaseModel
  4. from app.admin.schema.intent_org import CurrentIntentOrgIns
  5. from app.call_center.crud.crud_mismatch_records import mismatch_records_dao
  6. from app.call_center.schema.mismatch_records import GetMismatchRecordsDetails
  7. from common.log import log
  8. from common.oai import send_request_with_retry, generate_text, generate_json
  9. from database.db_mysql import async_db_session
  10. from model.mismatch_records import MismatchRecords
  11. from utils.serializers import select_as_dict
  12. # class Keyword(BaseModel):
  13. # user_intent: str
  14. # similar_reply: list[str]
  15. # keywords: list[str]
  16. # regular: list[str]
  17. async def update_mismatch_record(org_map: dict[int, CurrentIntentOrgIns], limit: int = 1):
  18. async with async_db_session.begin() as db:
  19. record = await mismatch_records_dao.get_earliest_record(db, limit)
  20. if not record:
  21. return 0
  22. await asyncio.gather(*(process_mismatch_record(org_map[r.org_id], r) for r in record))
  23. return len(record)
  24. async def process_mismatch_record(org_data: CurrentIntentOrgIns, record: MismatchRecords):
  25. record_data = GetMismatchRecordsDetails(**select_as_dict(record))
  26. log.info(f"[process_mismatch_record] record_data: {record_data}")
  27. # 开始过滤
  28. llm_ignore = record_data.llm_ignore # 如后续人工强制强制分析,则保留原始过滤结果
  29. ignore_response_data = None
  30. ignore_model = None
  31. ignore_prompt_tokens = None
  32. ignore_completion_tokens = None
  33. if record_data.ignore == 0:
  34. ignore_messages = [
  35. {"role": "system", "content": f"""# 任务介绍
  36. 公司在用语音识别系统巡检电话通话记录时,可能会因为通话时的背景噪音、方言等问题,导致用户的语音内容没有被正确识别,请根据以下通话记录,判断识别到的内容是否无意义或使上下文不通顺,非常感谢!
  37. # 注意事项
  38. 不用考虑机器人的回复,因为即便识别正确,机器人的回复也可能有误
  39. # 输出要求
  40. 1. 如放在上下文中无意义,请回复一个数字0
  41. 2. 如放在上下文中有意义,请回复一个数字1
  42. 3. 请不要回复 0 或 1 以外的文字内容"""
  43. },
  44. {
  45. "role": "user",
  46. "content": f"""# 通话记录
  47. {record_data.chat_history}
  48. # 可能识别有误的内容:{record_data.missed}
  49. """
  50. }
  51. ]
  52. ignore_model = "gpt-3.5-turbo"
  53. if org_data.model == "deepseek-v3" or org_data.model == "DeepSeek-V3":
  54. ignore_model = org_data.model
  55. ignore_content, ignore_response_data = await generate_text(org_data.openai_key, org_data.openai_base, ignore_model, ignore_messages)
  56. if ignore_content:
  57. ignore_prompt_tokens = ignore_response_data.usage.prompt_tokens
  58. ignore_completion_tokens = ignore_response_data.usage.completion_tokens
  59. if ignore_content == "0":
  60. status = 1
  61. llm_ignore = 1
  62. # 推送
  63. url = org_data.mismatch_callback
  64. if url:
  65. headers = {
  66. "Content-Type": "application/json"
  67. }
  68. data = {
  69. "internal_id": record_data.id,
  70. "external_id": record_data.external_id,
  71. "ignore": record_data.ignore,
  72. "llm_ignore": llm_ignore
  73. }
  74. is_success = await send_request_with_retry(url, data, headers, max_retries=3,
  75. delay_between_retries=2)
  76. if is_success:
  77. status = 2
  78. async with async_db_session.begin() as db:
  79. try:
  80. await mismatch_records_dao.update_llm_ignore(db, record_data.id, llm_ignore,
  81. ignore_response_data.to_dict(),
  82. status, ignore_model, ignore_prompt_tokens, ignore_completion_tokens)
  83. except Exception as e:
  84. log.error(f"更新意图记录时发生异常:{e}")
  85. finally:
  86. return None
  87. elif ignore_content == "1":
  88. llm_ignore = 2
  89. ignore_response_data = ignore_response_data.to_dict()
  90. else:
  91. return None
  92. # 开始分析
  93. if org_data.model == "deepseek-v3" or org_data.model == "DeepSeek-V3":
  94. keyword_schema = """{
  95. "user_intent": str, #用户意图
  96. "similar_reply": list[str], #类似回复
  97. "keywords": list[str], #关键词库
  98. "regular": list[str], #正则表达式
  99. }"""
  100. else:
  101. keyword_schema = {
  102. "name": "keyword_schema",
  103. "schema": { # 添加 schema 字段
  104. "type": "object",
  105. "description": "从通话记录中提取表单值",
  106. "properties": {
  107. "user_intent": {"type": "string", "description": "用户意图"},
  108. "similar_reply": {
  109. "type": "array",
  110. "description": "类似回复",
  111. "items": {"type": "string"}
  112. },
  113. "keywords": {
  114. "type": "array",
  115. "description": "关键词库",
  116. "items": {"type": "string"}
  117. },
  118. "regular": {
  119. "type": "array",
  120. "description": "正则表达式",
  121. "items": {"type": "string"}
  122. },
  123. },
  124. "required": ["user_intent", "similar_reply", "keywords", "regular"]
  125. }
  126. }
  127. keyword_messages = [
  128. {"role": "system", "content": f"""# 任务介绍
  129. 公司在用程序巡检电话通话记录时,有些用户的回复内容没有被关键词库识别到,请根据以下通话记录,给出一些类似的回复,以及对关键词库的具体建议,非常感谢!
  130. # 输出要求
  131. 1. 用户意图:结合上下文,首先考虑可能的语音识别错误并纠错,然后分析未识别内容的根本意图,用户说这句话的背后真实目的是什么,结合上下文挖掘用户最深的意图,请不要停留在表面的意思。
  132. 2. 类似回复:首先应包含未识别的内容原文:{record_data.missed}。其次生成尽可能多的类似意图的回复。
  133. 2. 关键词库:从类似回复中抽取特征词,要求有两点:一是特征词应尽可能的准确,即要覆盖住此类问题,又要尽可能避免和其他语境相冲突。二是需要注重通用性,一些专有名词、人名、地名、产品名、公司名等需要排除在外。
  134. 3. 正则表达式:对关键词词进行解耦,用正则表达式进行表示,专注于主要内容,排除次要内容
  135. # 注意事项
  136. 1. 应贴近真实的外呼场景,用户的素质参差不齐,请避免使用任何浮夸的用词,避免使用高级词汇,避免使用任何的礼貌用语或敬语,适当的低素质些,请至少给出20条结果
  137. 2. 通话记录为录音转译,因此可能有错别字或音同字不同的情况(例如:借和接),请根据上下文分析后就成可能的错字错误
  138. 3. 正则关键词库和正则表达式中也应该考虑到音同字不同的情况,且避免使用匹配次数相关的语法如{0,2}"""
  139. },
  140. {
  141. "role": "user",
  142. "content": f"""# 通话记录
  143. {record_data.chat_history}
  144. # 可能识别有误的内容:{record_data.missed}
  145. """
  146. }
  147. ]
  148. response_json, response_data = await generate_json(org_data.openai_key, org_data.openai_base, org_data.model, keyword_messages, keyword_schema)
  149. if response_json:
  150. user_intent = response_json.get('user_intent')
  151. similar_reply = response_json.get('similar_reply')
  152. keywords = response_json.get('keywords', [])
  153. regular = response_json.get('regular', [])
  154. if user_intent and similar_reply:
  155. status = 1
  156. # 推送
  157. url = org_data.mismatch_callback
  158. if url:
  159. headers = {
  160. "Content-Type": "application/json"
  161. }
  162. data = {
  163. "internal_id": record_data.id,
  164. "external_id": record_data.external_id,
  165. "ignore": record_data.ignore,
  166. "llm_ignore": llm_ignore,
  167. "user_intent": user_intent,
  168. "similar_reply": similar_reply,
  169. "keywords": keywords,
  170. "regular": regular
  171. }
  172. is_success = await send_request_with_retry(url, data, headers, max_retries=3, delay_between_retries=2)
  173. if is_success:
  174. status = 2
  175. async with async_db_session.begin() as db:
  176. try:
  177. await mismatch_records_dao.update(db, record_data.id, llm_ignore, user_intent, similar_reply, keywords, regular, {"messages": keyword_messages}, response_data.to_dict(), status, ignore_response_data, ignore_model, ignore_prompt_tokens, ignore_completion_tokens, org_data.model, response_data.usage.prompt_tokens, response_data.usage.completion_tokens)
  178. except Exception as e:
  179. log.error(f"更新意图记录时发生异常:{e}")