package com.hw.langchain.document.loaders;

import com.google.common.collect.Maps;
import com.hw.langchain.document.loaders.base.BaseLoader;
import com.hw.langchain.exception.LangChainException;
import com.hw.langchain.schema.Document;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;

/* loaded from: input_file:com/hw/langchain/document/loaders/WebBaseLoader.class */
public class WebBaseLoader extends BaseLoader {
    private final List<String> webUrls;

    public WebBaseLoader(List<String> list) {
        this.webUrls = list;
    }

    @Override // com.hw.langchain.document.loaders.base.BaseLoader
    public List<Document> load() {
        ArrayList arrayList = new ArrayList(this.webUrls.size());
        for (String str : this.webUrls) {
            try {
                org.jsoup.nodes.Document document = Jsoup.connect(str).get();
                arrayList.add(new Document(document.wholeText(), buildMetadata(document, str)));
            } catch (IOException e) {
                throw new LangChainException(errorMessage(str), e);
            }
        }
        return arrayList;
    }

    private Map<String, Object> buildMetadata(org.jsoup.nodes.Document document, String str) {
        HashMap newHashMap = Maps.newHashMap();
        newHashMap.put("source", str);
        Element first = document.select("title").first();
        if (first != null) {
            newHashMap.put("title", first.text());
        }
        Element first2 = document.select("meta[name=description]").first();
        newHashMap.put("description", first2 != null ? first2.attr("content") : "No description found.");
        Element first3 = document.select("html").first();
        newHashMap.put("language", first3 != null ? first3.attr("lang") : "No language found.");
        return newHashMap;
    }
}
