package com.hw.langchain.text.splitter;

import com.hw.langchain.schema.BaseDocumentTransformer;
import com.hw.langchain.schema.Document;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/hw/langchain/text/splitter/TextSplitter.class */
public abstract class TextSplitter implements BaseDocumentTransformer {
    private static final Logger LOG = LoggerFactory.getLogger(TextSplitter.class);
    protected int chunkSize;
    protected int chunkOverlap;
    protected Function<String, Integer> lengthFunction;
    protected boolean keepSeparator;
    protected boolean addStartIndex;

    /* loaded from: input_file:com/hw/langchain/text/splitter/TextSplitter$TextSplitterBuilder.class */
    public static abstract class TextSplitterBuilder<C extends TextSplitter, B extends TextSplitterBuilder<C, B>> {
        private boolean chunkSize$set;
        private int chunkSize$value;
        private boolean chunkOverlap$set;
        private int chunkOverlap$value;
        private boolean lengthFunction$set;
        private Function<String, Integer> lengthFunction$value;
        private boolean keepSeparator;
        private boolean addStartIndex;

        public B chunkSize(int i) {
            this.chunkSize$value = i;
            this.chunkSize$set = true;
            return self();
        }

        public B chunkOverlap(int i) {
            this.chunkOverlap$value = i;
            this.chunkOverlap$set = true;
            return self();
        }

        public B lengthFunction(Function<String, Integer> function) {
            this.lengthFunction$value = function;
            this.lengthFunction$set = true;
            return self();
        }

        public B keepSeparator(boolean z) {
            this.keepSeparator = z;
            return self();
        }

        public B addStartIndex(boolean z) {
            this.addStartIndex = z;
            return self();
        }

        protected abstract B self();

        public abstract C build();

        public String toString() {
            return "TextSplitter.TextSplitterBuilder(chunkSize$value=" + this.chunkSize$value + ", chunkOverlap$value=" + this.chunkOverlap$value + ", lengthFunction$value=" + this.lengthFunction$value + ", keepSeparator=" + this.keepSeparator + ", addStartIndex=" + this.addStartIndex + ")";
        }
    }

    public abstract List<String> splitText(String str);

    public List<Document> createDocuments(List<String> list, List<Map<String, Object>> list2) {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < list.size(); i++) {
            String str = list.get(i);
            int i2 = -1;
            for (String str2 : splitText(str)) {
                HashMap hashMap = new HashMap(list2.get(i));
                if (this.addStartIndex) {
                    i2 = str.indexOf(str2, i2 + 1);
                    hashMap.put("start_index", Integer.valueOf(i2));
                }
                arrayList.add(new Document(str2, hashMap));
            }
        }
        return arrayList;
    }

    public List<Document> splitDocuments(List<Document> list) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (Document document : list) {
            arrayList.add(document.getPageContent());
            arrayList2.add(document.getMetadata());
        }
        return createDocuments(arrayList, arrayList2);
    }

    private String joinDocs(List<String> list, String str) {
        String strip = String.join(str, list).strip();
        if (strip.isEmpty()) {
            return null;
        }
        return strip;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<String> mergeSplits(List<String> list, String str) {
        int intValue = this.lengthFunction.apply(str).intValue();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        for (String str2 : list) {
            int intValue2 = this.lengthFunction.apply(str2).intValue();
            if (i + intValue2 + (!arrayList2.isEmpty() ? intValue : 0) > this.chunkSize) {
                if (i > this.chunkSize) {
                    LOG.warn("Created a chunk of size {}, which is longer than the specified {}", Integer.valueOf(i), Integer.valueOf(this.chunkSize));
                }
                if (!arrayList2.isEmpty()) {
                    String joinDocs = joinDocs(arrayList2, str);
                    if (joinDocs != null) {
                        arrayList.add(joinDocs);
                    }
                    while (true) {
                        if (i <= this.chunkOverlap) {
                            if (i + intValue2 + (!arrayList2.isEmpty() ? intValue : 0) <= this.chunkSize) {
                                break;
                            }
                            if (i <= 0) {
                                break;
                            }
                        }
                        i -= this.lengthFunction.apply(arrayList2.get(0)).intValue() + (arrayList2.size() > 1 ? intValue : 0);
                        arrayList2.remove(0);
                    }
                }
            }
            arrayList2.add(str2);
            i += intValue2 + (arrayList2.size() > 1 ? intValue : 0);
        }
        String joinDocs2 = joinDocs(arrayList2, str);
        if (joinDocs2 != null) {
            arrayList.add(joinDocs2);
        }
        return arrayList;
    }

    @Override // com.hw.langchain.schema.BaseDocumentTransformer
    public List<Document> transformDocuments(List<Document> list, Map<String, Object> map) {
        return splitDocuments(list);
    }

    private static int $default$chunkSize() {
        return 4000;
    }

    private static int $default$chunkOverlap() {
        return 200;
    }

    private static Function<String, Integer> $default$lengthFunction() {
        return (v0) -> {
            return v0.length();
        };
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public TextSplitter(TextSplitterBuilder<?, ?> textSplitterBuilder) {
        if (((TextSplitterBuilder) textSplitterBuilder).chunkSize$set) {
            this.chunkSize = ((TextSplitterBuilder) textSplitterBuilder).chunkSize$value;
        } else {
            this.chunkSize = $default$chunkSize();
        }
        if (((TextSplitterBuilder) textSplitterBuilder).chunkOverlap$set) {
            this.chunkOverlap = ((TextSplitterBuilder) textSplitterBuilder).chunkOverlap$value;
        } else {
            this.chunkOverlap = $default$chunkOverlap();
        }
        if (((TextSplitterBuilder) textSplitterBuilder).lengthFunction$set) {
            this.lengthFunction = ((TextSplitterBuilder) textSplitterBuilder).lengthFunction$value;
        } else {
            this.lengthFunction = $default$lengthFunction();
        }
        this.keepSeparator = ((TextSplitterBuilder) textSplitterBuilder).keepSeparator;
        this.addStartIndex = ((TextSplitterBuilder) textSplitterBuilder).addStartIndex;
    }
}
