c-ddlZddlZddlZddlmZddlmZddlmZddl m Z ddl m Z m Z mZmZmZmZddlmZdd lmZmZmZmZmZmZee d ed efd Zee d ed efdZee d ed eefdZee d ed efdZ ee d ed efdZ!ee d ed efdZ"ee d ed efdZ#ee d ed efdZ$ee d ed efdZ%ee d ed efdZ&d ed efdZ'ee d ed efdZ(ee d ed efdZ)ee d ed efdZ*ee d ed efdZ+ee d ed efdZ,ee-e ded efdZ.ee d ed efdZ/dCd!e0d"e1d eefd#Z2ed$ d%ed efd&Z3d!e0d eeee0ffd'Z4d(ed efd)Z5dDd+ed,ed efd-Z6d.ed e efd/Z7d0ed1ed e8fd2Z9d0ed1ed efd3Z:d4ej;d5fd%ed6e1d7ed dfd8Z< dEd9e0d:ed;e=ded?e0d@edAeed e eddffdBZ>dS)FN)IncrementalDecoder)aliases) lru_cache)findall) GeneratorListOptionalSetTupleUnion)MultibyteIncrementalDecoder)ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize characterreturnc tj|}n#t$rYdSwxYwd|vpd|vpd|vp d|vpd|vpd|vS)NFz WITH GRAVEz WITH ACUTEz WITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz WITH TILDE unicodedataname ValueErrorr descriptions :/usr/lib/python3/dist-packages/charset_normalizer/utils.pyis_accentuatedr s&+I66 uu  # ' ; & ' [ ( ' { * '  +  ' ; &   %%ctj|}|s|S|d}tt |ddS)N r)r decompositionsplitchrint)r decomposedcodess r remove_accentr+&sO!/ ::J !'',,E s58R  ! !!cft|}tjD] \}}||vr|cSdS)zK Retrieve the Unicode range official name from a single character. N)ordritems)r character_ord range_name ord_ranges r unicode_ranger31sO YM!8!>!@!@ I I % %    & 4r,cX tj|}n#t$rYdSwxYwd|vS)NFLATINrrs ris_latinr6?sF&+I66 uu k !!r!cV |dn#t$rYdSwxYwdS)NasciiFT)encodeUnicodeEncodeErrorrs ris_asciir<HsE!!!! uu 4s  &&cdtj|}d|vrdSt|}|dSd|vS)NPTF Punctuationrcategoryr3rcharacter_categorycharacter_ranges ris_punctuationrEQsG)29==    t%29%=%=Ou O ++r,cltj|}d|vsd|vrdSt|}|dSd|vS)NSNTFFormsr@rBs r is_symbolrJ`sR)29==    C+=$=$=t%29%=%=Ou o %%r,c0t|}|dSd|vS)NF Emoticons)r3)rrDs r is_emoticonrMos%%29%=%=Ou / ))r,cf|s|dvrdStj|}d|vS)N>|+,;<>TZ)isspacerrArrCs r is_separatorrXysCi+KKKt)29== $ $$r,cV||kSN)islowerisupperr;s ris_case_variabler]s%     )"3"3"5"5 55r,c6tj|}|dkS)NCo)rrArWs ris_private_use_onlyr`s)29==  %%r,cX tj|}n#t$rYdSwxYwd|vS)NFCJKrrcharacter_names ris_cjkresH$))44 uu N ""r!cX tj|}n#t$rYdSwxYwd|vS)NFHIRAGANArrcs r is_hiraganarhH$))44 uu  ''r!cX tj|}n#t$rYdSwxYwd|vS)NFKATAKANArrcs r is_katakanarlrir!cX tj|}n#t$rYdSwxYwd|vS)NFHANGULrrcs r is_hangulrosH$))44 uu ~ %%r!cX tj|}n#t$rYdSwxYwd|vS)NFTHAIrrcs ris_thairrsH$))44 uu ^ ##r!r1cDtfdtDS)Nc3 K|]}|vV dSrZ).0keywordr1s r z-is_unicode_range_secondary..s(TTw*$TTTTTTr,)anyr)r1s`ris_unicode_range_secondaryrzs' TTTT4STTT T TTr,cr|duo!|duo |dko|dkS)NFu)rV isprintabler;s ris_unprintabler~sR u$ "  ! ! # #u , "   "  ! r,sequence search_zonec t|tstt|}t t |dt ||dd}t|dkrdS|D][}| dd}tj D]\}}||kr|ccS||kr|ccS\dS)zW Extract using ASCII-only decoder any specified encoding in the first n-bytes. Nr8ignoreerrorsr-_) isinstancebytes TypeErrorlenrrmindecodelowerreplacerr/)rrseq_lenresultsspecified_encodingencoding_alias encoding_ianas rany_specified_encodingrs h & &x==G ',3w ,,,-44WX4NNG  7||qt% % %/5577??SII .5]__ % % )NM!333$$$$$$ 222$$$$$$3 % 4r,rc|dvp>ttjd|jt S)zQ Verify is a specific encoding is a multi byte one based on it IANA name > utf_7utf_8utf_16utf_32 utf_16_be utf_16_le utf_32_be utf_32_le utf_8_sig encodings.{}) issubclass importlib import_moduleformatrr )rs ris_multi_byte_encodingrsL      5 5d ; ;<<O#   r,ctD]I}t|}t|tr|g}|D]}||r||fccS JdS)z9 Identify and extract SIG/BOM in given sequence. )Nr,)rrr startswith)r iana_encodingmarksmarks ridentify_sig_or_bomrs (++ +9-+H eU # # GE + +D""4(( +$d****** + + 9r,rc |dvS)N>rrru)rs rshould_strip_sig_or_bomrs  4 44r,Tcp_namestrictc|dd}tjD]\}}|||fvr|cS|r"t d||S)Nrrz Unable to retrieve IANA for '{}')rrrr/rr)rrrrs r iana_namersmmoo%%c3//G *1!!% ~}5 5 5  6M;BB7KKLLL Nr,decoded_sequencect}|D])}t|}|||*t|SrZ)setr3addlist)rrangesrrDs r range_scanr+sQuuF%$$ )6y)A)A  "  ?#### <<r, iana_name_a iana_name_bct|st|rdStjd|j}tjd|j}|d}|d}d}t dD]C}t |g}||||kr|dz }D|dz S) Ngrrrrr)rrrrrrangerr) rr decoder_a decoder_bid_aid_bcharacter_match_counti to_be_decodeds r cp_similarityr9sk**.D[.Q.Qs'k**'k** )y999D(y999D!" 3ZZ''$aSzz ;;} % %])C)C C C !Q & ! 3 &&r,c2|tvo|t|vS)z Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using the function cp_similarity. )r)rrs r is_cp_similarrRs% -- ? 1+> >r,charset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevel format_stringctj|}||tj}|tj|||dSrZ)logging getLoggersetLevel StreamHandler setFormatter Formatter addHandler)rrrloggerhandlers rset_logging_handlerr]sm  t $ $F OOE#%%G *=99::: gr, sequencesroffsets chunk_sizebom_or_sig_availablestrip_sig_or_bom sig_payloadis_multi_byte_decoderdecoded_payloadc #K|r!|dur|D]} || | |z} | sdS| VdS|D]} | |z} | t|dzkr|| | |z} |r |dur|| z} | ||rdnd} |ru| dkrot|d} |r]| d| |vrQt| | dz d D]<}||| } |r |dur|| z} | |d} | d| |vrn=| VdS) NFrrrrr$)rrrr)rrrrrrrrrrchunk chunk_end cut_sequencechunk_partial_size_chkjs rcut_sequence_chunksrks+0E99  A#AJ$67E KKKK    $ $ AJI3y>>A---$QZ%78L# :(8E(A(A*\9  ''#8Fxxh(E% "Q.1*b.A.A&$ "5556oMM"1a!eR00 " "'09'= /F4D4M4M+6+EL , 3 3M( 3 S S !8"8!89_LL!EMKKKKI$ $ r,)r)TrZ)?rrrcodecsrencodings.aliasesr functoolsrrertypingrrr r r r _multibytecodecr constantrrrrrrstrboolr r+r3r6r<rErJrMrXr]r`rerhrlrorrrrzr~rr(rrrrrrfloatrrINFOrrrrur,rrs%%%%%%%%%%%%????????????????777777 *+++ c d   ,+  *+++"S"S""",+" *+++ S Xc]   ,+  *+++""""",+" *+++,+ *+++ ,c ,d , , ,,+ , *+++ & & & & &,+ & *+++*3*4***,+* *+++%C%D%%%,+% *+++66666,+6&3&4&&&&  *+++#c#d###,+# *+++(3(4(((,+( *+++(3(4(((,+( *+++&&&&&,+& *+++$s$t$$$,+$ 33.//000U3U4UUU10U *+++cd,+UQT @ 3(%E(3-2F,G$53545555  s D C      c    's''''''2s%D           .&*77777 7  7  77 7c]7sD$777777r,