/*
 * Decompiled with CFR 0.152.
 */
package com.alibaba.cloud.ai.transformer.splitter;

import com.knuddels.jtokkit.Encodings;
import com.knuddels.jtokkit.api.Encoding;
import com.knuddels.jtokkit.api.EncodingRegistry;
import com.knuddels.jtokkit.api.EncodingType;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import org.springframework.ai.transformer.splitter.TextSplitter;
import org.springframework.util.Assert;

public class SentenceSplitter
extends TextSplitter {
    private final EncodingRegistry registry = Encodings.newLazyEncodingRegistry();
    private final Encoding encoding = this.registry.getEncoding(EncodingType.CL100K_BASE);
    private static final int DEFAULT_CHUNK_SIZE = 1024;
    private final SentenceModel sentenceModel;
    private final int chunkSize;

    public SentenceSplitter() {
        this(1024);
    }

    public SentenceSplitter(int chunkSize) {
        this.chunkSize = chunkSize;
        this.sentenceModel = this.getSentenceModel();
    }

    protected List<String> splitText(String text) {
        SentenceDetectorME sentenceDetector = new SentenceDetectorME(this.sentenceModel);
        String[] texts = sentenceDetector.sentDetect((CharSequence)text);
        if (texts == null || texts.length == 0) {
            return Collections.emptyList();
        }
        ArrayList<String> chunks = new ArrayList<String>();
        StringBuilder chunk = new StringBuilder();
        for (int i = 0; i < texts.length; ++i) {
            int textTokenSize;
            int currentChunkSize = this.getEncodedTokens(chunk.toString()).size();
            if (currentChunkSize + (textTokenSize = this.getEncodedTokens(texts[i]).size()) > this.chunkSize) {
                chunks.add(chunk.toString());
                chunk = new StringBuilder(texts[i]);
            } else {
                chunk.append(texts[i]);
            }
            if (i != texts.length - 1) continue;
            chunks.add(chunk.toString());
        }
        return chunks;
    }

    private SentenceModel getSentenceModel() {
        SentenceModel sentenceModel;
        block9: {
            InputStream is = ((Object)((Object)this)).getClass().getResourceAsStream("/opennlp/opennlp-en-ud-ewt-sentence-1.2-2.5.0.bin");
            try {
                if (is == null) {
                    throw new RuntimeException("sentence model is invalid");
                }
                sentenceModel = new SentenceModel(is);
                if (is == null) break block9;
            }
            catch (Throwable throwable) {
                try {
                    if (is != null) {
                        try {
                            is.close();
                        }
                        catch (Throwable throwable2) {
                            throwable.addSuppressed(throwable2);
                        }
                    }
                    throw throwable;
                }
                catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            is.close();
        }
        return sentenceModel;
    }

    private List<Integer> getEncodedTokens(String text) {
        Assert.notNull((Object)text, (String)"Text must not be null");
        return this.encoding.encode(text).boxed();
    }
}

