import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class test {
    static Map<String, Integer> wordCount = null;
    public static void main(String[] args) throws IOException {
        String path = "literatura/emr/naZapadeNicNove/EN";
        File file = new File(path + "/EN.pdf");
        Integer pageFrom = 9;
        List<String> ignoreWords = new ArrayList<>();
        ignoreWords.add("Erich Maria Remarque");
        ignoreWords.add("ALL QUIET ON THE WESTERN FRONT");
        GetRawTextFromPDF pdf = new GetRawTextFromPDF(file, pageFrom, null, ignoreWords);
        String text = pdf.getText();
        //System.out.println(text);
        TypeSplitter ts = new TypeSplitter(text, path);

        try (OutputStream os = new FileOutputStream(path + "/EN_tokenCount.txt")) {
            ts.printTokensCountSorted(os, 50);
        }
        try (OutputStream os = new FileOutputStream(path + "/EN_tokenCountWhite.txt")) {
            ts.printWhiteTokensCountSorted(os);
        }
        try (OutputStream os = new FileOutputStream(path + "/EN_tokenTypesCount.txt")) {
            ts.printTokenTypesCountSorted(os);
        }
        try (OutputStream os = new FileOutputStream(path + "/EN_tokenNeighbors.txt")) {
            ts.printNeighborCountSortedByValue(os);
        }
        try (OutputStream os = new FileOutputStream(path + "/EN_sentences.txt")) {
            ts.printSentenceStatistic(os);
        }
        //System.out.println(ts.getTokens());
        //System.out.println(ts.getTokens().size());

    }

}