use_cld3_primary
1# Requires: pip install intelli3text[cld3] 2from intelli3text import PipelineBuilder, Intelli3Config 3 4cfg = Intelli3Config( 5 lid_primary="cld3", 6 lid_fallback=None, 7 export=None, 8) 9pipeline = PipelineBuilder(cfg).build() 10res = pipeline.process("docs/bilingual.txt") 11 12langs = [p["language"] for p in res["paragraphs"]] 13print("Per-paragraph languages:", langs) 14print("Global language:", res["language_global"])
cfg =
Intelli3Config(cleaners=['ftfy', 'ocr_tilde_fix', 'pdf_breaks', 'pt_diacritics_repair', 'clean_text', 'strip_accents'], lid_primary='cld3', lid_fallback=None, languages_supported={'es', 'pt', 'en'}, nlp_model_pref='lg', paragraph_min_chars=30, lid_min_chars=60, lid_threshold=0.65, lid_max_chars=2500, lid_use_cld3=True, lid_cld3_weight=0.4, export=None)
pipeline =
<intelli3text.pipeline.Pipeline object>
res =
{'language_global': 'pt', 'language_mixed': False, 'language_distribution': {}, 'raw': '', 'cleaned': '', 'normalized': '', 'paragraphs': []}
langs =
[]