package com.hw.langchain.text.splitter;

import com.google.common.collect.Maps;
import com.hw.langchain.schema.Document;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;

/* loaded from: input_file:com/hw/langchain/text/splitter/MarkdownHeaderTextSplitter.class */
public class MarkdownHeaderTextSplitter {
    private final List<Pair<String, String>> headersToSplitOn;
    private final boolean returnEachLine;

    public MarkdownHeaderTextSplitter(List<Pair<String, String>> list) {
        this(list, false);
    }

    public MarkdownHeaderTextSplitter(List<Pair<String, String>> list, boolean z) {
        this.returnEachLine = z;
        this.headersToSplitOn = list.stream().sorted(Comparator.comparingInt(pair -> {
            return ((String) pair.getKey()).length();
        }).reversed()).toList();
    }

    public List<Document> aggregateLinesToChunks(List<LineType> list) {
        ArrayList arrayList = new ArrayList();
        for (LineType lineType : list) {
            if (arrayList.isEmpty() || !((LineType) arrayList.get(arrayList.size() - 1)).getMetadata().equals(lineType.getMetadata())) {
                arrayList.add(lineType);
            } else {
                LineType lineType2 = (LineType) arrayList.get(arrayList.size() - 1);
                lineType2.setContent(lineType2.getContent() + "  \n" + lineType.getContent());
            }
        }
        return arrayList.stream().map(lineType3 -> {
            return new Document(lineType3.getContent(), lineType3.getMetadata());
        }).toList();
    }

    public List<Document> splitText(String str) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        HashMap newHashMap = Maps.newHashMap();
        ArrayList arrayList3 = new ArrayList();
        HashMap newHashMap2 = Maps.newHashMap();
        for (String str2 : str.split("\n")) {
            String strip = str2.strip();
            if (!processLine(strip, arrayList, arrayList2, newHashMap, arrayList3, newHashMap2) && !strip.isEmpty()) {
                arrayList2.add(strip);
            } else if (!arrayList2.isEmpty()) {
                arrayList.add(new LineType(String.join("\n", arrayList2), new HashMap(newHashMap)));
                arrayList2.clear();
            }
            newHashMap = new HashMap(newHashMap2);
        }
        return processOutput(arrayList, arrayList2, newHashMap);
    }

    private boolean processLine(String str, List<LineType> list, List<String> list2, Map<String, Object> map, List<HeaderType> list3, Map<String, String> map2) {
        for (Pair<String, String> pair : this.headersToSplitOn) {
            String str2 = (String) pair.getLeft();
            String str3 = (String) pair.getValue();
            if (isHeaderToSplitOn(str, str2)) {
                if (str3 != null) {
                    int countMatches = StringUtils.countMatches(str2, "#");
                    while (!list3.isEmpty() && list3.get(list3.size() - 1).getLevel() >= countMatches) {
                        map2.remove(list3.remove(list3.size() - 1).getName());
                    }
                    HeaderType headerType = new HeaderType(countMatches, str3, str.substring(str2.length()).strip());
                    list3.add(headerType);
                    map2.put(str3, headerType.getData());
                }
                if (list2.isEmpty()) {
                    return true;
                }
                list.add(new LineType(String.join("\n", list2), new HashMap(map)));
                list2.clear();
                return true;
            }
        }
        return false;
    }

    private boolean isHeaderToSplitOn(String str, String str2) {
        return str.startsWith(str2) && (str.length() == str2.length() || str.charAt(str2.length()) == ' ');
    }

    private List<Document> processOutput(List<LineType> list, List<String> list2, Map<String, Object> map) {
        if (!list2.isEmpty()) {
            list.add(new LineType(String.join("\n", list2), map));
        }
        return !this.returnEachLine ? aggregateLinesToChunks(list) : list.stream().map(lineType -> {
            return new Document(lineType.getContent(), lineType.getMetadata());
        }).toList();
    }
}
