/*
 * Decompiled with CFR 0.152.
 */
package dev.langchain4j.data.document.splitter;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.data.document.splitter.DocumentBySentenceSplitter;
import dev.langchain4j.data.document.splitter.SegmentBuilder;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.internal.ValidationUtils;
import dev.langchain4j.model.Tokenizer;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.lang.StringUtils;

public abstract class HierarchicalDocumentSplitter
implements DocumentSplitter {
    private HierarchicalDocumentSplitter overlapSentenceSplitter;
    private static final String INDEX = "index";
    protected final int maxSegmentSize;
    protected final int maxOverlapSize;
    protected final Tokenizer tokenizer;
    protected final DocumentSplitter subSplitter;

    private HierarchicalDocumentSplitter getOverlapSentenceSplitter() {
        if (this.overlapSentenceSplitter == null) {
            this.overlapSentenceSplitter = new DocumentBySentenceSplitter(1, 0, null, null);
        }
        return this.overlapSentenceSplitter;
    }

    protected HierarchicalDocumentSplitter(int maxSegmentSizeInChars, int maxOverlapSizeInChars) {
        this(maxSegmentSizeInChars, maxOverlapSizeInChars, null, null);
    }

    protected HierarchicalDocumentSplitter(int maxSegmentSizeInChars, int maxOverlapSizeInChars, HierarchicalDocumentSplitter subSplitter) {
        this(maxSegmentSizeInChars, maxOverlapSizeInChars, null, subSplitter);
    }

    protected HierarchicalDocumentSplitter(int maxSegmentSizeInTokens, int maxOverlapSizeInTokens, Tokenizer tokenizer) {
        this(maxSegmentSizeInTokens, maxOverlapSizeInTokens, tokenizer, null);
    }

    protected HierarchicalDocumentSplitter(int maxSegmentSizeInTokens, int maxOverlapSizeInTokens, Tokenizer tokenizer, DocumentSplitter subSplitter) {
        this.maxSegmentSize = ValidationUtils.ensureGreaterThanZero((Integer)maxSegmentSizeInTokens, (String)"maxSegmentSize");
        this.maxOverlapSize = ValidationUtils.ensureBetween((Integer)maxOverlapSizeInTokens, (int)0, (int)this.maxSegmentSize, (String)"maxOverlapSize");
        this.tokenizer = tokenizer;
        this.subSplitter = subSplitter == null ? this.defaultSubSplitter() : subSplitter;
    }

    protected abstract String[] split(String var1);

    protected abstract String joinDelimiter();

    protected abstract DocumentSplitter defaultSubSplitter();

    public List<TextSegment> split(Document document) {
        ValidationUtils.ensureNotNull((Object)document, (String)"document");
        ArrayList<TextSegment> segments = new ArrayList<TextSegment>();
        SegmentBuilder segmentBuilder = new SegmentBuilder(this.maxSegmentSize, this::estimateSize, this.joinDelimiter());
        AtomicInteger index = new AtomicInteger(0);
        String[] parts = this.split(document.text());
        boolean isParagraphSplitter = false;
        if ("\n\n".equals(this.joinDelimiter())) {
            isParagraphSplitter = true;
        }
        String overlap = null;
        for (int i = 0; i < parts.length; ++i) {
            String segmentText;
            String part = parts[i];
            if (segmentBuilder.isNotEmpty()) {
                segmentText = segmentBuilder.toString();
                if (StringUtils.isBlank((String)segmentText)) continue;
                segmentBuilder.reset();
                overlap = isParagraphSplitter && segmentText.length() < 50 ? segmentText : this.overlapFrom(segmentText);
                segmentBuilder.append(overlap);
                segmentBuilder.append(part);
                segmentText = segmentBuilder.toString();
                if (isParagraphSplitter && segmentText.length() < 50 && i != parts.length - 1) continue;
                segments.add(HierarchicalDocumentSplitter.createSegment(segmentText, document, index.getAndIncrement()));
                continue;
            }
            if (!StringUtils.isNotBlank((String)part)) continue;
            segmentBuilder.append(part);
            segmentText = segmentBuilder.toString();
            if (isParagraphSplitter && segmentText.length() < 50) continue;
            segments.add(HierarchicalDocumentSplitter.createSegment(segmentText, document, index.getAndIncrement()));
        }
        return segments;
    }

    String overlapFrom(String segmentText) {
        if (this.maxOverlapSize == 0) {
            return "";
        }
        if (StringUtils.isNotBlank((String)segmentText)) {
            String[] charArr = segmentText.split("");
            StringBuilder segment = new StringBuilder("");
            if (charArr.length <= this.maxOverlapSize) {
                for (int i = 0; i < charArr.length; ++i) {
                    segment.append(charArr[i]);
                }
            } else {
                for (int i = charArr.length - this.maxOverlapSize; i < charArr.length; ++i) {
                    segment.append(charArr[i]);
                }
            }
            return segment + "";
        }
        return "";
    }

    int estimateSize(String text) {
        if (this.tokenizer != null) {
            return this.tokenizer.estimateTokenCountInText(text);
        }
        return text.length();
    }

    static TextSegment createSegment(String text, Document document, int index) {
        Metadata metadata = document.metadata().copy().put(INDEX, String.valueOf(index));
        return TextSegment.from(text, metadata);
    }
}

