batch_process
1from intelli3text import PipelineBuilder, Intelli3Config 2 3sources = [ 4 "https://pt.wikipedia.org/wiki/Howard_Gardner", 5 "docs/mixed_language.txt", 6 "papers/sample.pdf", 7] 8 9cfg = Intelli3Config( 10 nlp_model_pref="md", 11 export=None, 12) 13pipeline = PipelineBuilder(cfg).build() 14 15for src in sources: 16 res = pipeline.process(src) 17 print(f"[{src}] → global language: {res['language_global']}; paragraphs: {len(res['paragraphs'])}")
sources =
['https://pt.wikipedia.org/wiki/Howard_Gardner', 'docs/mixed_language.txt', 'papers/sample.pdf']
cfg =
Intelli3Config(cleaners=['ftfy', 'ocr_tilde_fix', 'pdf_breaks', 'pt_diacritics_repair', 'clean_text', 'strip_accents'], lid_primary='fasttext', lid_fallback=None, languages_supported={'es', 'pt', 'en'}, nlp_model_pref='md', paragraph_min_chars=30, lid_min_chars=60, lid_threshold=0.65, lid_max_chars=2500, lid_use_cld3=True, lid_cld3_weight=0.4, export=None)
pipeline =
<intelli3text.pipeline.Pipeline object>