package com.ejianc.foundation.ai.utils;

import com.alibaba.fastjson.JSONObject;
import com.ejianc.foundation.ai.config.EjcAiBeanConfig;
import com.ejianc.framework.core.exception.BusinessException;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.loader.UrlDocumentLoader;
import dev.langchain4j.data.document.parser.TextDocumentParser;
import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
import dev.langchain4j.data.document.parser.apache.poi.ApachePoiDocumentParser;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.onnx.allminilml6v2.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.redis.RedisEmbeddingStore;
import java.util.Collections;
import java.util.List;
import okhttp3.MediaType;
import org.apache.commons.lang3.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import redis.clients.jedis.JedisPooled;

@Component
/* loaded from: input_file:com/ejianc/foundation/ai/utils/DocumentSplitUtil.class */
public class DocumentSplitUtil {
    private Logger logger = LoggerFactory.getLogger(getClass());
    public static final String[] POI_DOC_TYPES = {"doc", "docx", "ppt", "pptx", "xls", "xlsx"};
    private static final MediaType mediaType = MediaType.parse("application/json");

    @Autowired
    private EjcAiBeanConfig aiBeanConfig;

    public Document loadDocument(String str, String str2) {
        Document load;
        if (str.equalsIgnoreCase("txt")) {
            load = UrlDocumentLoader.load(str2, new TextDocumentParser());
        } else if (str.equalsIgnoreCase("pdf")) {
            load = UrlDocumentLoader.load(str2, new ApachePdfBoxDocumentParser());
        } else {
            if (!ArrayUtils.contains(POI_DOC_TYPES, str)) {
                throw new BusinessException("目前不支持" + str + "文件格式解析");
            }
            load = UrlDocumentLoader.load(str2, new ApachePoiDocumentParser());
        }
        return load;
    }

    public JSONObject documentSplitStore(Document document, DocumentSplitter documentSplitter, EmbeddingModel embeddingModel, EmbeddingStore<TextSegment> embeddingStore) {
        JSONObject jSONObject = new JSONObject();
        try {
            List<TextSegment> splitAll = documentSplitter.splitAll(Collections.singletonList(document));
            List<Embedding> list = (List) embeddingModel.embedAll(splitAll).content();
            List<String> addAll = embeddingStore.addAll(list, splitAll);
            jSONObject.put("segments", splitAll);
            jSONObject.put("embeddings", list);
            jSONObject.put("redisIds", addAll);
            return jSONObject;
        } catch (Exception e) {
            this.logger.info("将文件存入向量数据库：{}", e);
            throw new BusinessException("将文件存入向量数据库失败");
        }
    }

    public JSONObject documentStoreSingle(String str, EmbeddingModel embeddingModel, EmbeddingStore<TextSegment> embeddingStore) {
        JSONObject jSONObject = new JSONObject();
        try {
            Embedding embedding = (Embedding) embeddingModel.embed(str).content();
            String add = embeddingStore.add(embedding, (Embedding) TextSegment.from(str));
            jSONObject.put("embedding", embedding);
            jSONObject.put("redisId", add);
            return jSONObject;
        } catch (Exception e) {
            this.logger.info("将文件存入向量数据库：{}", e);
            throw new BusinessException("将文件存入向量数据库");
        }
    }

    public JSONObject documentStoreSingleSourceId(String str, String str2, EmbeddingModel embeddingModel, EmbeddingStore<TextSegment> embeddingStore) {
        JSONObject jSONObject = new JSONObject();
        try {
            Embedding embedding = (Embedding) embeddingModel.embed(str2).content();
            embeddingStore.add(str, embedding, TextSegment.from(str2));
            jSONObject.put("embedding", embedding);
            jSONObject.put("redisId", str);
            return jSONObject;
        } catch (Exception e) {
            this.logger.info("将文件存入向量数据库：{}", e);
            throw new BusinessException("将文件存入向量数据库");
        }
    }

    public JSONObject documentStoreList(List<TextSegment> list, EmbeddingModel embeddingModel, EmbeddingStore<TextSegment> embeddingStore) {
        JSONObject jSONObject = new JSONObject();
        try {
            List<Embedding> list2 = (List) embeddingModel.embedAll(list).content();
            List<String> addAll = embeddingStore.addAll(list2);
            jSONObject.put("embeddings", list2);
            jSONObject.put("redisIds", addAll);
            return jSONObject;
        } catch (Exception e) {
            this.logger.info("将文件存入向量数据库：{}", e);
            throw new BusinessException("将文件存入向量数据库");
        }
    }

    public static void main(String[] strArr) throws Exception {
        RedisEmbeddingStore.builder().indexName("tttttt").host("47.93.115.124").port(6379).user("default").password("17Liancloud").dimension(384).build().add((Embedding) new AllMiniLmL6V2EmbeddingModel().embed("测试111111111").content());
        System.out.println(new JedisPooled("47.93.115.124", 6379, "default", "17Liancloud").ftSearch("tttttt"));
    }
}
