package com.ejianc.langchain.documentSplit;

import com.ejianc.support.idworker.util.IdWorker;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.loader.UrlDocumentLoader;
import dev.langchain4j.data.document.parser.TextDocumentParser;
import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
import dev.langchain4j.data.document.splitter.DocumentSplitters;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.onnx.allminilml6v2.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.rag.AugmentationRequest;
import dev.langchain4j.rag.DefaultRetrievalAugmentor;
import dev.langchain4j.rag.content.injector.DefaultContentInjector;
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
import dev.langchain4j.rag.query.Metadata;
import dev.langchain4j.rag.query.transformer.DefaultQueryTransformer;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Paths;
import java.util.List;
import okhttp3.MediaType;

/* loaded from: input_file:com/ejianc/langchain/documentSplit/DocumentSplitUtil.class */
public class DocumentSplitUtil {
    private static final MediaType mediaType = MediaType.parse("application/json");

    public Document loadDocument() {
        return UrlDocumentLoader.load("C:/Users/Think/Desktop/test.txt", new TextDocumentParser());
    }

    private static Document getDocument(URL url) {
        Document document = null;
        try {
            document = FileSystemDocumentLoader.loadDocument(Paths.get(url.toURI()));
        } catch (URISyntaxException e) {
        }
        return document;
    }

    public static void main(String[] strArr) throws Exception {
        Document load = UrlDocumentLoader.load("https://dev-ejc-attachment.oss-cn-beijing.aliyuncs.com/999999/202502/1892832659437510657.pdf", new ApachePdfBoxDocumentParser());
        DocumentSplitter recursive = DocumentSplitters.recursive(300, 10);
        List split = recursive.split(load);
        AllMiniLmL6V2EmbeddingModel allMiniLmL6V2EmbeddingModel = new AllMiniLmL6V2EmbeddingModel();
        InMemoryEmbeddingStore inMemoryEmbeddingStore = new InMemoryEmbeddingStore();
        EmbeddingStoreIngestor.builder().documentSplitter(recursive).embeddingModel(allMiniLmL6V2EmbeddingModel).embeddingStore(inMemoryEmbeddingStore).build().ingest(load);
        DefaultRetrievalAugmentor build = DefaultRetrievalAugmentor.builder().contentRetriever(EmbeddingStoreContentRetriever.builder().embeddingStore(inMemoryEmbeddingStore).embeddingModel(allMiniLmL6V2EmbeddingModel).build()).contentInjector(DefaultContentInjector.builder().build()).queryTransformer(new DefaultQueryTransformer()).build();
        UserMessage userMessage = new UserMessage("新建调度任务");
        build.augment(new AugmentationRequest(userMessage, Metadata.from(userMessage, Long.valueOf(IdWorker.getId()), (List) null))).chatMessage();
        System.out.println("Retrieved Content: The capital of France is Paris.");
        System.out.println(((TextSegment) split.get(0)).text());
    }
}
