pdf_local

1from intelli3text import PipelineBuilder, Intelli3Config
2cfg = Intelli3Config()
3pipeline = PipelineBuilder(cfg).build()
4print(pipeline.process("paper_bilingue.pdf")["language_global"])
cfg = Intelli3Config(cleaners=['ftfy', 'ocr_tilde_fix', 'pdf_breaks', 'pt_diacritics_repair', 'clean_text', 'strip_accents'], lid_primary='fasttext', lid_fallback=None, languages_supported={'es', 'pt', 'en'}, nlp_model_pref='lg', paragraph_min_chars=30, lid_min_chars=60, lid_threshold=0.65, lid_max_chars=2500, lid_use_cld3=True, lid_cld3_weight=0.4, export=None)
pipeline = <intelli3text.pipeline.Pipeline object>