import json
from typing import TypedDict, Optional
from openai import OpenAI
from pydantic import BaseModel

from utils.file_util import ExcelFile


class Keyword(BaseModel):
    user_intent: str
    similar_reply: list[str]
    keywords: list[str]
    regular: list[str]

class CallKeyword:
    def __init__(self):
        api_key = "sk-TL1U0f39NF5xOFnm8bB39e9b73E2474b9753C6BfCe95B5Db" # 计价测试
        # api_key = "sk-uN8GZmYwwu8bKeZyF605D111AcCa48738cD777B1332348D1" # 调试
        base_url = "https://newapi.gkscrm.com/v1"
        self.model = "gpt-4o"
        # self.model = "qwen2.5:7b"
        # base_url = "http://localhost:11434/v1"

        self._openai_client = OpenAI(api_key=api_key,
                                     base_url=base_url)
        self._file: ExcelFile

    # 评分方法
    def from_excel(self, file_path: str):
        # 获取表头索引
        self._file = ExcelFile(file_path)
        header_index = self._get_header_index()
        df_subset = self._file.file
        for index, row in df_subset.iterrows():
            chat_history = row.iloc[header_index["chat_history"]]
            unmatched = row.iloc[header_index["unmatched"]]
            result = self.single_record(chat_history, unmatched)
            self._file.new_value(index, "客户意图", result["user_intent"])
            self._file.new_value(index, "相似回复", json.dumps(result["similar_reply"], ensure_ascii=False))
            self._file.new_value(index, "关键词", json.dumps(result["keywords"], ensure_ascii=False))
            self._file.new_value(index, "正则表达式", json.dumps(result["regular"], ensure_ascii=False))

    def single_record(self, chat_history: str, unmatched: str):
        result = {}
        completion = self._openai_client.beta.chat.completions.parse(
            model=self.model,
            messages=[
                {"role": "system", "content": f"""# 任务介绍
公司在用程序巡检电话通话记录时，有些用户的回复内容没有被关键词库识别到，请根据以下通话记录，给出一些类似的回复，以及对关键词库的具体建议，非常感谢！

# 输出要求
1. 用户意图：结合上下文分析未识别内容的根本意图，用户说这句话的背后真实目的是什么，结合上下文挖掘用户最深的意图，请不要停留在表面的意思。
2. 类似回复：首先应包含未识别的内容原文：{unmatched}。其次生成尽可能多的类似意图的回复。
2. 关键词库：从类似回复中抽取特征词，要求有两点：一是特征词应尽可能的准确，即要覆盖住此类问题，又要尽可能避免和其他语境相冲突。二是需要注重通用性，一些专有名词、人名、地名、产品名、公司名等需要排除在外。
3. 正则表达式：对关键词词进行解耦，用正则表达式进行表示，专注于主要内容，排除次要内容

# 注意事项
1. 应贴近真实的外呼场景，用户的素质参差不齐，请避免使用任何浮夸的用词，避免使用高级词汇，避免使用任何的礼貌用语或敬语，适当的低素质些，请至少给出20条结果
2. 通话记录为录音转译，因此可能有错别字或音同字不同的情况（例如：借和接），请根据上下文分析后就成可能的错字错误
3. 正则关键词库和正则表达式中也应该考虑到音同字不同的情况，且避免使用匹配次数相关的语法如{0,2}"""},
                {
                    "role": "user",
                    "content": f"""# 通话记录
{chat_history}

# 其中未识别到的内容为：{unmatched}
"""
                }],
            response_format=Keyword
            # response_format={"type": "json_object"}
        )
        user_intent = json.loads(completion.choices[0].message.content)["user_intent"]
        similar_reply = json.loads(completion.choices[0].message.content)["similar_reply"]
        keywords = json.loads(completion.choices[0].message.content)["keywords"]
        regular = json.loads(completion.choices[0].message.content)["regular"]

        result["user_intent"] = user_intent
        result["similar_reply"] = similar_reply
        result["keywords"] = keywords
        result["regular"] = regular

        print(f"result: {result}")

        return result

    def _get_header_index(self):
        header_index = {}
        for index, column in enumerate(self._file.file.columns):
            if "通话记录" == column:
                header_index["chat_history"] = index
            elif "客户侧未命中" == column:
                header_index["unmatched"] = index
            elif "客户意图" == column:
                header_index["user_intent"] = index
            elif "相似回复" == column:
                header_index["similar_reply"] = index
            elif "关键词" == column:
                header_index["keywords"] = index
            elif "正则表达式" == column:
                header_index["regular"] = index
        return header_index