package com.ejianc.foundation.ai.service.impl;

import com.alibaba.fastjson.JSONObject;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.ejianc.foundation.ai.bean.*;
import com.ejianc.foundation.ai.config.EjcAiEmbeding;
import com.ejianc.foundation.ai.mapper.KnowledgeItemMapper;
import com.ejianc.foundation.ai.service.*;
import com.ejianc.foundation.ai.utils.DocumentSplitUtil;
import com.ejianc.foundation.ai.vo.KnowledgeItemVO;
import com.ejianc.framework.core.context.InvocationInfoProxy;
import com.ejianc.framework.core.exception.BusinessException;
import com.ejianc.framework.core.kit.mapper.BeanMapper;
import com.ejianc.framework.skeleton.template.BaseServiceImpl;
import com.ejianc.support.idworker.util.IdWorker;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.splitter.DocumentByCharacterSplitter;
import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter;
import dev.langchain4j.data.document.splitter.DocumentByRegexSplitter;
import dev.langchain4j.data.document.splitter.DocumentSplitters;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.allminilml6v2.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStore;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * AI知识库文件
 * 
 * @author generator
 * 
 */
@Service("knowledgeItemService")
public class KnowledgeItemServiceImpl extends BaseServiceImpl<KnowledgeItemMapper, KnowledgeItemEntity> implements IKnowledgeItemService{

    @Autowired
    private DocumentSplitUtil documentSplitUtil;
    @Autowired
    private EjcAiEmbeding aiEmbedingStore;
    @Autowired
    private IKnowledgeEmbeddingService knowledgeEmbeddingService;
    @Autowired
    private IKnowledgeBaseService knowledgeBaseService;
    @Autowired
    private IKnowledgeEmbeddingPointsService knowledgeEmbeddingPointsService;
    @Autowired
    private IKnowledgeItemTableIndexService knowledgeItemTableIndexService;

    @Override
    public KnowledgeItemVO insert(KnowledgeItemVO saveOrUpdateVO) {
        KnowledgeBaseEntity knowledgeBase = knowledgeBaseService.selectById(saveOrUpdateVO.getKnowledgeBaseId());
        String indexName = knowledgeBase.getCode();
        KnowledgeItemEntity entity = BeanMapper.map(saveOrUpdateVO, KnowledgeItemEntity.class);
        if(saveOrUpdateVO.getId()==null){
            entity.setId(IdWorker.getId());
            entity.setUploadUserId(InvocationInfoProxy.getUserid());
        }

        //加载文件内容
        Document document = documentSplitUtil.loadDocument(entity.getFileSuffix(), entity.getFilePath());

        if(entity.getKnowledgeType() == 1){
            //文本型知识库

            DocumentSplitter documentSplitter = null;
            if(entity.getSliceStrategy()==0){
                //默认切片
                documentSplitter = DocumentSplitters.recursive(entity.getSliceMaxLength(), entity.getSliceOverlap());
            }else{
                if(StringUtils.isBlank(entity.getSliceIdentifier())){
                    throw new BusinessException("标识符不能为空");
                }
                //自定义切片策咯
                if("char".equals(entity.getSliceIdentifier())){
                    //按字符切分
                    documentSplitter = new DocumentByCharacterSplitter(entity.getSliceMaxLength(), entity.getSliceOverlap());
                }else if("paragraph".equals(entity.getSliceIdentifier())){
                    //按段落切分
                    documentSplitter = new DocumentByParagraphSplitter(entity.getSliceMaxLength(), entity.getSliceOverlap());
                }else if("regex".equals(entity.getSliceIdentifier())){
                    //按正则表达式切分
                    documentSplitter = new DocumentByRegexSplitter(entity.getSliceExpression(), "|", entity.getSliceMaxLength(), entity.getSliceOverlap());
                }else{
                    throw new BusinessException("暂不支持该切片策咯");
                }
            }
            //AllMiniLmL6V2EmbeddingModel（通常表示为all-MiniLM-L6-v2）是一个基于MiniLM架构的句子嵌入模型，以下是对该模型的详细介绍：
            //all-MiniLM-L6-v2模型能够将句子和短段落映射到384维的稠密向量空间中，这些向量保留了文本的语义信息，使得模型能够用于各种NLP任务，如信息检索、聚类、语义搜索和句子相似度计算等。
//        EmbeddingModel embeddingModel = AllMiniLmL6V2EmbeddingModelUtil.getInstance();
            EmbeddingModel embeddingModel = new AllMiniLmL6V2EmbeddingModel();
            //// 创建一个内存中的嵌入存储
            EmbeddingStore<TextSegment> embeddingStore = aiEmbedingStore.getEmbeddingStore(indexName);

            JSONObject json = documentSplitUtil.documentSplitStore(document, documentSplitter, embeddingModel, embeddingStore);

            List segments = (List) json.get("segments");
            List embeddings = (List) json.get("embeddings");
            List redisIds = (List) json.get("redisIds");

            List<KnowledgeEmbeddingEntity> list = new ArrayList<>();
            List<KnowledgeEmbeddingPointsEntity> listPoints = new ArrayList<>();
            for(int i=0; i<segments.size(); i++){
                TextSegment segment = (TextSegment) segments.get(i);
                Embedding embedding = (Embedding) embeddings.get(i);
                String id = (String) redisIds.get(i);

                KnowledgeEmbeddingEntity e = new KnowledgeEmbeddingEntity();
                e.setId(IdWorker.getId());
                e.setUuid("embedding:"+id);
                e.setItemId(entity.getId());
                e.setContent(segment.text());
                e.setInitContent(segment.text());
                e.setSliceState(1);
                e.setType(1);
                list.add(e);

                KnowledgeEmbeddingPointsEntity p = new KnowledgeEmbeddingPointsEntity();
                p.setEmbeddingId(e.getId());
                p.setUuid("embedding:"+id);
                p.setContent(segment.text());
                p.setInitContent(segment.text());
                p.setVector(Arrays.toString(embedding.vector()));
                p.setType(1);
                listPoints.add(p);
            }
            if(list!=null && list.size()>0){
                knowledgeEmbeddingService.saveOrUpdateBatch(list, list.size(), false);
                knowledgeEmbeddingPointsService.saveOrUpdateBatch(listPoints, listPoints.size(), false);
            }
            entity.setEmbeddingCount(segments.size());
            entity.setEmbeddingStatus(3);
            this.saveOrUpdate(entity, false);
        }else{
            entity.setEmbeddingStatus(1);
            this.saveOrUpdate(entity, false);
            //表格型知识库
            String[] allText = document.text().split("\n");
            //第0行是页签名称
            if(allText.length<2){
                throw new BusinessException("未获取到文件数据，请确认数据是否正确");
            }
            String[] titles = allText[1].split("\t");

            //保存索引信息
            KnowledgeItemTableIndexEntity indexEntity = new KnowledgeItemTableIndexEntity();
            indexEntity.setItemId(entity.getId());
            indexEntity.setTitles(String.join(",", titles));
            knowledgeItemTableIndexService.saveOrUpdate(indexEntity, false);
        }

        KnowledgeItemVO vo = BeanMapper.map(entity, KnowledgeItemVO.class);
        return vo;
    }

    @Override
    public KnowledgeItemVO saveData(KnowledgeItemVO saveOrUpdateVO) {
        KnowledgeItemEntity entity = BeanMapper.map(saveOrUpdateVO, KnowledgeItemEntity.class);
        if(saveOrUpdateVO.getId()==null){
            entity.setUploadUserId(InvocationInfoProxy.getUserid());
        }
        this.saveOrUpdate(entity, false);
        KnowledgeItemVO vo = BeanMapper.map(entity, KnowledgeItemVO.class);
        return vo;
    }

    @Override
    public void delData(List<Long> ids) {
        QueryWrapper<KnowledgeEmbeddingEntity> wrapperEmbedding = new QueryWrapper<>();
        wrapperEmbedding.in("item_id", ids);
        List<KnowledgeEmbeddingEntity> list = knowledgeEmbeddingService.list(wrapperEmbedding);
        if(list!=null && list.size()>0){
            List<Long> embeddingIds = new ArrayList<>();
            list.forEach(item -> {
                embeddingIds.add(item.getId());
            });
            QueryWrapper<KnowledgeEmbeddingPointsEntity> wrapperEmbeddingPoints = new QueryWrapper<>();
            wrapperEmbedding.in("embedding_id", embeddingIds);
            List<KnowledgeEmbeddingPointsEntity> points = knowledgeEmbeddingPointsService.list(wrapperEmbeddingPoints);

            KnowledgeBaseEntity knowledgeBase = knowledgeBaseService.queryBaseDataByItemId(ids.get(0));
            String indexName = knowledgeBase.getCode();

            if(points!=null && points.size()>0){
                List<String> rids = new ArrayList<>();
                points.forEach(item -> {
                    rids.add(item.getUuid());
                });
                EmbeddingStore<TextSegment> embeddingStore = aiEmbedingStore.getEmbeddingStore(indexName);
                embeddingStore.removeAll(rids);
            }
            knowledgeEmbeddingService.remove(wrapperEmbedding);
            knowledgeEmbeddingPointsService.remove(wrapperEmbeddingPoints);
        }
        this.removeByIds(ids,true);
    }

}
