import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.File;
import java.util.List;

public class GetRawTextFromPDF {
    private String text;
    private List<String> ignoreWords;
    public GetRawTextFromPDF(String path, Integer pageFrom, Integer pageTo, List<String> ignoreWords) {
        // Constructor
        File file = new File(path);
        this.ignoreWords = ignoreWords;
        try {
            // Načítanie dokumentu pomocou Loadera (PDFBox 3.x)
            PDDocument document = Loader.loadPDF(file);

            PDFTextStripper stripper = new PDFTextStripper();
            if (pageFrom != null && pageFrom > 0) {
                stripper.setStartPage(pageFrom);
            }
            if (pageTo != null) stripper.setEndPage(pageTo);
            text = stripper.getText(document);
            document.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    public GetRawTextFromPDF(File fileName, Integer pageFrom, Integer pageTo, List<String> ignoreWords) {
        this.ignoreWords = ignoreWords;
        try {
            // Načítanie dokumentu pomocou Loadera (PDFBox 3.x)
            PDDocument document = Loader.loadPDF(fileName);

            PDFTextStripper stripper = new PDFTextStripper();
            if (pageFrom != null && pageFrom > 0) {
                stripper.setStartPage(pageFrom);
            }
            if (pageTo != null) stripper.setEndPage(pageTo);
            text = stripper.getText(document);
            document.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    public String getText() {
        if (text == null) return "";

        String cleanedText = text;
        for (String word : ignoreWords) {
            // Odstráni všetky výskyty celých slov bez ohľadu na veľkosť písmen
            cleanedText = cleanedText.replaceAll("(?im)^.*\\s*\\n\\s*\\d+\\s*$\\n?", "");

        }

        return cleanedText;
    }
}
