
    ~ g                     *   S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKJr  S SK	J	r	  S SK
Jr  S SKrS SKJr  S SKJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJr  S SKJrJrJ r J!r!  S SK"J#r#J$r$  S SK%J&r&  S SK'J(r(J)r)J*r*J+r+   " S S\5      r,g)    N)ThreadPoolExecutor)datetime)getpwuid)__version__)SoSComponent)SoSIPParser)SoSMacParser)SoSHostnameParser)SoSKeywordParser)SoSUsernameParser)SoSIPv6Parser)SoSReportArchiveSoSReportDirectorySoSCollectorArchiveSoSCollectorDirectory)DataDirArchiveTarballArchive)InsightsArchive)get_human_readableimport_moduleImporterHelperfile_is_binaryc                   F  ^  \ rS rSrSrSrS/ / / S/ SSSSS	/ S
.r  S-U 4S jjrS.S jrS.S jr	S.S jr
S.S jr\S 5       rS rS r\S 5       rS rS rS rS rS rS rS rS rS rS/S jrS rS rS  rS! rS" rS# r S$ r!S% r"S0S& jr#S' r$S( r%S) r&S.S* jr'S+ r(S,r)U =r*$ )1
SoSCleaner*   a  
This function is designed to obfuscate potentially sensitive information
from an sos report archive in a consistent and reproducible manner.

It may either be invoked during the creation of a report by using the
--clean option in the report command, or may be used on an already existing
archive by way of 'sos clean'.

The target of obfuscation are items such as IP addresses, MAC addresses,
hostnames, usernames, and also keywords provided by users via the
--keywords and/or --keyword-file options.

For every collection made in a report the collection is parsed for such
items, and when items are found SoS will generate an obfuscated replacement
for it, and in all places that item is found replace the text with the
obfuscated replacement mapped to it. These mappings are saved locally so
that future iterations will maintain the same consistent obfuscation
pairing.

In the case of IP addresses, support is for IPv4 and IPv6 - effort is made
to keep network topology intact so that later analysis is as accurate and
easily understandable as possible. If an IP address is encountered that we
cannot determine the netmask for, a random IP address is used instead.

For IPv6, note that IPv4-mapped addresses, e.g. ::ffff:10.11.12.13, are
NOT supported currently, and will remain unobfuscated.

For hostnames, domains are obfuscated as whole units, leaving the TLD in
place.

For instance, 'example.com' may be obfuscated to 'obfuscateddomain0.com'
and 'foo.example.com' may end up being 'obfuscateddomain1.com'.

Users will be notified of a 'mapping' file that records all items and the
obfuscated counterpart mapped to them for ease of reference later on. This
file should be kept private.
z6Obfuscate sensitive networking information in a reportauto   N /etc/sos/cleaner/default_mappingF )archive_typedomainsdisable_parsersskip_cleaning_filesjobskeywordskeyword_filemap_file	no_updatekeep_binary_filestarget	usernamesc           	        > U(       d  [         TU ]  XU5        SU l        OUS   U l        US   U l        US   U l        US   U l        US   U l        SU l        [        U R                  S5      (       d  S	U R                  l	        S
U R                  l
        [        R                  " S5      U l        [        R                  " S5      U l        [        R                   " [        R"                  R%                  U R                  S5      SS9  U R'                  5         U R)                  5       U l        [        R,                  " S5        X@l        U R                  R1                  5       U l        U R                  R4                  R7                  S5      U l        U R                  R:                  n[=        U R*                  U5      [?        U R*                  U5      [A        U R*                  U5      [C        U R*                  U5      [E        U R*                  U5      [G        U R*                  U5      /U l$        U R                  RJ                   H  nU RH                   H  nURL                  RO                  5       RQ                  SSS9S   n	U	RS                  5       n
URO                  5       RS                  5       U
:X  d  Mb  U RU                  SU
 35        U R                  RW                  SU S35        U RH                  RY                  U5        M     M     [Z        [\        [^        [`        [b        [d        [f        /U l4        S U l5        U RU                  SU R                   35        g )NToptionstmpdirsys_tmppolicymanifestFr$   r   r   sossos_uicleanerexist_ok?   parser   )maxsplitr   zDisabling parser: zDisabling the 'zP' parser. Be aware that this may leave sensitive plain-text data in the archive.z#Cleaner initialized. From cmdline: )6super__init__from_cmdlineoptsr.   r/   r0   r1   hasattrr$   r    logging	getLoggersoslogui_logosmakedirspathjoinreview_parser_valuesload_map_filecleaner_mappingumaskin_placeget_preferred_hash_name	hash_name
componentsadd_section
cleaner_mdr#   r
   r   r   r	   r   r   parsersr"   namelowersplitstriplog_infowarningremover   r   r   r   r   r   r   archive_typesnested_archive)selfr8   argscmdlinerL   hook_commonsr#   _parser_loaded_temp_loaded_name	__class__s              6/usr/lib/python3/dist-packages/sos/cleaner/__init__.pyr<   SoSCleaner.__init__b   s   GV73 $D
 %Y/DI&x0DK'	2DL&x0DK(4DM %D499f--!"		%+DII"!++E2DK!++H5DK KKT[[)<tL!!##113
 <<>--22>>yI"ii;;d224GH,,.AB$..0CD--/BCT113FGd224GH
 yy00G<<**,228a2HK${{}==?((*l:MM$6|n"EFKK'')' 3O O LL''0 ( 1 !	
 #1$2C2C1DE	G    c                 *    SU(       a  SU 3OS SU 3$ )Nz[cleaner:r   z]  r\   msgcallers      re   _fmt_log_msgSoSCleaner._fmt_log_msg   s     &AfX,b9C5AArg   c                 X    U R                   R                  U R                  X5      5        g N)rB   debugrn   rk   s      re   	log_debugSoSCleaner.log_debug       $++C89rg   c                 X    U R                   R                  U R                  X5      5        g rq   )rB   inforn   rk   s      re   rW   SoSCleaner.log_info   s    **378rg   c                 X    U R                   R                  U R                  X5      5        g rq   )rB   errorrn   rk   s      re   	log_errorSoSCleaner.log_error   ru   rg   c                 \    UR                  S5        UR                  U R                  5        g )NzSoS Cleaner Detailed Help)	set_titleadd_text__doc__)clssections     re   display_helpSoSCleaner.display_help   s#    56%rg   c                 0   0 nSn[         R                  R                  U R                  R                  5      (       a#  [        SU R                  R                   S35      e[         R                  R                  U R                  R                  5      (       dE  U R                  R                  U:w  a)  U R                  SU R                  R                   S35        U$ [        U R                  R                  SSS9 n [        R                  " U5      nSSS5        U$ ! [        R                   a    U R                  S	5         N1[
         a5  nU R                  S
U R                  R                   SU 35         SnANjSnAff = f! , (       d  f       U$ = f)zVerifies that the map file exists and has usable content.

If the provided map file does not exist, or it is empty, we will print
a warning and continue on with cleaning building a fresh map
r   zRequested map file z is a directoryzERROR: map file z6 does not exist, will not load any obfuscation matchesrutf-8encodingzOERROR: Unable to parse map file, json is malformed. Will not load any mappings.zERROR: Could not load '': N)rD   rF   isdirr>   r'   	Exceptionexistsr{   openjsonloadJSONDecodeError)r\   _confdefault_mapmferrs        re   rI   SoSCleaner.load_map_file   sh    877==++,,1$))2D2D1E F( ( ) )ww~~dii0011yy!![0&tyy'9'9&: ;< <=  dii((#@BE IIbME A  ++ MNN $L M  ENN $''+yy'9'9&:#cU$D E EE A@ s<   <F>D%FF	F+E>9F>FF
Fc                    U R                  S5      nU R                  R                  S[         S35        U R                  R                  U5        U R                  R
                  (       d   [        S5        gg! [         a/    U R                  R                  S5        U R                  S5         g[         a  nU R                  SU5         SnAgSnAff = f)	zWhen we are directly running `sos clean`, rather than hooking into
SoSCleaner via report or collect, print a disclaimer banner
a  This command will attempt to obfuscate information that is generally considered to be potentially sensitive. Such information includes IP addresses, MAC addresses, domain names, and any user-provided keywords.

Note that this utility provides a best-effort approach to data obfuscation, but it does not guarantee that such obfuscation provides complete coverage of all such data in the archive, or that any obfuscation is provided to data that does not fit the description above.

Users should review any resulting data and/or archives generated or processed by this utility for remaining sensitive content before being passed to a third party.
z
sos clean (version z)
z-
Press ENTER to continue, or CTRL-C to quit.
z
Exiting on user cancel   r9   N)
_fmt_msgrC   rw   r   r>   batchinputKeyboardInterrupt_exitr   )r\   rl   es      re   print_disclaimerSoSCleaner.print_disclaimer   s     mm   	0SAByy!GH  %    !;<

3 !

1a  !s   ,A9 96C1	C:CCc           	          SUl         UR                  SS5      nUR                  SSSS9  UR                  SS	/ S
QSS9  UR                  SS/ SS9  UR                  SS/ SSS9  UR                  SSS/ SSS9  UR                  SSS[        SS9  UR                  SS/ SS S9  UR                  S!S S"S#S$9  UR                  S%S&S'S(S)9  UR                  S*S+S,S-S.S/9  UR                  S0S,S-S1S2S39  UR                  S4S5/ SS6S/9  g )7Nzsos clean|mask TARGET [options]zCleaner/Masking Optionsz7These options control how data obfuscation is performedr*   TARGETz%The directory or archive to obfuscate)metavarhelpz--archive-typer   )r   reportcollectinsightszdata-dirtarballz8Specify what kind of archive the target was generated as)defaultchoicesr   z	--domainsextendz!List of domain names to obfuscate)actionr   r   z--disable-parsersr"   zCDisable specific parsers, so that those elements are not obfuscated)r   r   destr   z--skip-cleaning-filesz--skip-masking-filesr#   zBList of files to skip/ignore during cleaning. Globs are supported.z-jz--jobsr   z&Number of concurrent archives to clean)r   typer   z
--keywordsr%   zList of keywords to obfuscatez--keyword-filer&   z&Provide a file a keywords to obfuscate)r   r   r   z
--map-filer'   r   z;Provide a previously generated mapping file for obfuscation)r   r   r   z--no-updater(   F
store_truez<Do not update the --map-file with new mappings from this run)r   r   r   r   z--keep-binary-filesr)   zGKeep unprocessable binary files in the archive instead of removing them)r   r   r   r   z--usernamesr+   zList of usernames to obfuscate)usageadd_argument_groupadd_argumentint)r   r8   	clean_grps      re   add_parser_optionsSoSCleaner.add_parser_options   s   8--%E
	 	x$K 	 	M/(K&8 	 	:
 	{8R$G 	 	I28')0A&C 	 	E 	68N&.$9&F 	 	H
 	tXqs$L 	 	N|Hb$.$C 	 	E 	/$2$L 	 	N 	|*'I&< 	 	> 	};&2%= 	 	> 	4e&2$7%G 	 	H
 	};&.$D 	 	Frg   c                 $    XR                   l        g)zFor use by report and collect to set the TARGET option appropriately
so that execute() can be called just as if we were running `sos clean`
directly from the cmdline.
N)r>   r*   )r\   rF   s     re   set_target_pathSoSCleaner.set_target_path%  s    
  		rg   c                 X   SnU R                   R                  S:w  as  U R                   R                  R                  SS5      nU R                   H<  nUR                  U:X  d  M  U" U R                   R
                  U R                  5      nM>     OfU R                   HV  nUR                  U R                   R
                  5      (       d  M/  U" U R                   R
                  U R                  5      n  O   U(       d  gU R                  R                  U5        UR                  (       aJ  U R                  R                  UR                  5       5        U R                  R                  U5        Xl        U R                  (       a&  U R                  R                  U R                  l        gg)zThe target path is not a directory, so inspect it for being an
archive or an archive of archives.

In the event the target path is not an archive, abort.
Nr   -_)r>   r    replacerZ   	type_namer*   r.   check_is_typereport_pathsappend	is_nestedr   get_nested_archivesrY   r[   descriptionui_name)r\   _arc
check_typearchivearcs        re   inspect_target_archive!SoSCleaner.inspect_target_archive,  s=    99!!V+//77SAJ--$$
2"499#3#3T[[AD . ))$$TYY%5%566tyy//=D *   &>>$$T%=%=%?@ $$T*"&*.*=*=*I*ID' rg   c                 6   U R                   R                   H0  n[        UR                  S5      5      S:  d  M#  [	        SU S35      e   U R                   R
                   Vs/ s H  n[        R                  " U5      PM     snU R                   l        gs  snf )zCheck any values passed to the parsers via the commandline:
- For the --domains option, ensure that they are valid for the parser
  in question.
- Convert --skip-cleaning-files from globs to regular expressions.
.   zInvalid value 'z0' given: --domains values must be actual domainsN)r>   r!   lenrU   r   r#   fnmatch	translate)r\   _domps      re   rH   SoSCleaner.review_parser_valuesI  s     II%%D4::c?#a'%dV ,% %  & *.)F)F)H)F CD):):1)=)F)H		% )Hs   # Bc                 	   U R                   R                  R                  S5      S   R                  S5      S   U l        U R                  (       a  U R                  5         / U l        [        R                  R                  U R                   R                  5      (       dC  U R                  R                  SU R                   R                   35        U R                  S5        U R                  5         U R                  (       d,  U R                  R                  S5        U R                  S5        / U l        U R                   H/  nUR                   S:X  d  M  UR"                  R%                  5         M1     U R'                  5         U R)                  5         U R+                  5         U R                  (       d>  U R,                  (       a  g	U R                  R/                  S
5        U R                  S5        U R                  R/                  S[1        U R                  5       S35        U R3                  5       nU R5                  U5      nU R7                  U5        U R9                  5         U R,                  (       a(  U R                   Vs/ s H  oDR:                  PM     nnX54$ S	n[1        U R                  5      S:  a  U R=                  5       nOU R                  S   nUR:                  nU R?                  UR:                  5      n	U	b  U RA                  UR                  S5      S    SU RB                   35      n
[E        [        R                  RG                  U RH                  U
5      SSS9 nURK                  U	5        S	S	S	5        U RM                  5         [        R                  RG                  U RH                  U RA                  UR                  S5      S   5      5      n[N        RP                  " Xv5        [        RR                  " U5      nU R                  R/                  SU 35        U R                  R/                  SU S35        U R                  R/                  S[U        URV                  5       35        U R                  R/                  S[Y        URZ                  5      R\                   S35        U R                  R/                  S5        U R_                  5         g	s  snf ! , (       d  f       GNq= f)a  SoSCleaner will begin by inspecting the TARGET option to determine
if it is a directory, archive, or archive of archives.

In the case of a directory, the default behavior will be to edit the
data in place. For an archive will we unpack the archive, iterate
over the contents, and then repack the archive. In the case of an
archive of archives, such as one from SoSCollector, each archive will
be unpacked, cleaned, and repacked and the final top-level archive will
then be repacked as well.
/z.tarr   z*Invalid target: no such file or directory r9   z'No valid archives or directories found
zHostname ParserNz#No reports obfuscated, aborting...
z
Successfully obfuscated z report(s)
r   wr   r   z2A mapping of obfuscated elements is available at
	z)
The obfuscated archive is available at
	
z	Size	z	Owner	zcPlease send the obfuscated archive to your support representative and keep the mapping file private)0r>   r*   rU   arc_namer=   r   r   rD   rF   r   rC   rz   r   r   completed_reportsrR   rS   mappingset_initial_countspreload_all_archives_into_mapsgenerate_parser_item_regexesobfuscate_report_pathsrL   rw   r   compile_mapping_dictwrite_map_for_archivewrite_map_for_configwrite_stats_to_manifestfinal_archive_pathrebuild_nested_archiveget_new_checksumobfuscate_stringrN   r   rG   r/   writewrite_cleaner_logshutilmovestatr   st_sizer   st_uidpw_namecleanup)r\   r8   _mapmap_patha	arc_paths
final_patharc_pathr   checksumchksum_namecfarcstats                re   executeSoSCleaner.executeX  s    		((..s3B7==fEaH!!#ww~~dii..//KKJ!%!1!1 24 5JJqM##%  KKHIJJqM "$ llF{{//113 # 	++-))+##%%%}}KKCDJJqM5 6 678F 	G ((*--d3!!$'$$&==7;7M7MN7M!--7MIN&&
t%%&*224H((+C--H,,S-C-CDH#"33~~c*2./q0@A "'',,t||[A3#*,/1HHX&, ""$WW\\LL!!(.."5b"9:

 	H)''*%
 	B8*M	
 	::,bI	
 	8$6w$G#HIJ9Xgnn%=%E%E$FbIJ L 	M 	U O, ,s   S#,S((
S7c                 :   U R                   S-   nU R                  US9  U R                   Hl  nUR                  R	                  S5      S   nU R                  UR                  5      nUc  MA  SU SU R                   3nU R                  R                  XES9  Mn     [        R                  " U R                  R                  5       H  u  pgnU H  n	[        R                  R                  Xi5      n
U
R	                  U R                  R                  5      S   nUR                  S5      nU R                  R!                  XS9  [        R"                  " U
5        M     M     U R%                  SS	9  U R                  R'                  U R(                  R*                  5      $ )
zxHandles repacking the nested tarball, now containing only obfuscated
copies of the reports, log files, manifest, etc...
z-obfuscated)rS   r   r   z
checksums/r   r   T)r   )r   setup_archiver   r   rU   r   rN   r   
add_stringrD   walkr[   extracted_pathrF   rG   lstripadd_filerY   r   finalizer>   compression_type)r\   r   r   arc_destr   dnamedirnr   filesfilenamefnames              re   r   !SoSCleaner.rebuild_nested_archive  sX   
 ===0)--G1177<R@H,,W-G-GHH#$XJa/?@'''= . !ggd&9&9&H&HINDU!T4D$7$7$F$FGKS)%%e%8		%  " J 	t,||$$TYY%?%?@@rg   c                     0 nU R                    H<  n0 XR                  '   XR                     R                  UR                  5       5        M>     U$ )zBuild a dict that contains each parser's map as a key, with the
contents as that key's value. This will then be written to disk in the
same directory as the obfuscated report so that sysadmins have a way
to 'decode' the obfuscation locally
)rR   map_file_keyupdateget_map_contents)r\   r   r8   s      re   r   SoSCleaner.compile_mapping_dict  sL     llF(*D$$%$$%,,V-D-D-FG # rg   c                     [        USSS9 nUR                  [        R                  " USS95        SSS5        U$ ! , (       d  f       U$ = f)zZWrite the mapping to a file on disk that is in the same location as
the final archive(s).
r   r   r   r   )indentN)r   r   r   dumps)r\   r   rF   r   s       re   write_map_to_fileSoSCleaner.write_map_to_file  sA     $g."HHTZZQ/0 / /.s	   %;
A
c                     [         R                  R                  U R                  U R	                  U R
                   S35      5      nU R                  X5      $ ! [         a  nU R                  SU 35         S nAg S nAff = f)Nz-private_mapz"Could not write private map file: )	rD   rF   rG   r/   r   r   r  r   r{   )r\   r   r   r   s       re   r    SoSCleaner.write_map_for_archive  ss    	ww||%%|&DEH ))$99 	NN?uEF	s   AA 
B#A<<Bc                    U R                   R                  (       a  U R                   R                  (       d  [        R                  R                  U R                   R                  5      n [        R                  " USS9  U R                  XR                   R                  5        U R                  SU R                   R                   35        ggg! [         a  nU R                  SU 35         SnAgSnAff = f)zmWrite the mapping to the config file so that subsequent runs are
able to provide the same consistent mapping
Tr5   zWrote mapping to z&Could not update mapping config file: N)r>   r'   r(   rD   rF   dirnamerE   r  rs   r   r{   )r\   r   cleaner_dirr   s       re   r   SoSCleaner.write_map_for_config  s     99dii&9&9''//$))*<*<=KOK$7&&tYY-?-?@!24993E3E2FGH ':  O!GuMNNOs   +A"C 
C8C33C8c                    [         R                  R                  U R                  U R                   S35      n[        USSS9 nU R                  R                  S5        U R                  R                  5        H  nUR                  U5        M     SSS5        U(       a,  U R                  U5        U R                  R                  USS9  gg! , (       d  f       NB= f)	zWhen invoked via the command line, the logging from SoSCleaner will
not be added to the archive(s) it processes, so we need to write it
separately to disk
z-obfuscation.logr   r   r   r   Nzsos_logs/cleaner.logr   )rD   rF   rG   r/   r   r   sos_log_fileseek	readlinesr   obfuscate_filer   r  )r\   r   log_namelogfilelines        re   r   SoSCleaner.write_cleaner_log  s    
 77<<LLT]]O+;<
 (C'2g""1%))335d# 6 3
 )LL!!(1G!H  32s   AC
Cc                 l    Sn[        US5       n[        R                  " U R                  5      n UR	                  U5      nU(       d  OUR                  U5        M,  UR                  5       S-   sSSS5        $ ! , (       d  f       g= f! [         a  nU R                  SU 35         SnAgSnAff = f)zfCalculate a new checksum for the obfuscated archive, as the previous
checksum will no longer be valid
i   rbr   Nz!Could not generate new checksum: )	r   hashlibnewrN   readr  	hexdigestr   rs   )r\   archive_path	hash_size
archive_fpdigesthashdatar   s          re   r   SoSCleaner.get_new_checksum  s    	FIlD)Z T^^4)y9H#MM(+	 
 '')D0 *))   	FNN>seDEE	Fs5   B A A:0	B :
BB B 
B3B..B3c                     S[        U R                  5       SU R                  R                   S3nU R                  R                  U5        U R                  R                  (       a  U R                  R                  S5        [        U R                  R                  5      nUR                  U R                  U R                  SS9  UR                  SS9  U R                  (       a,  U R                  5         U R                  U R                  5        gg! [         a4    U R                  R                  S	5        [        R                   " S
5         gf = f)zPerform the obfuscation for each archive or sos directory discovered
during setup.

Each archive is handled in a separate thread, up to self.opts.jobs will
be obfuscated concurrently.
zFound z. total reports to obfuscate, processing up to z concurrently
zpWARNING: binary files that potentially contain sensitive information will NOT be removed from the final archive
r9   )	chunksizeT)waitzExiting on user cancelr   N)r   r   r>   r$   rC   rw   r)   rX   r   mapobfuscate_reportshutdownr[   _replace_obfuscated_archivesr   rD   r   )r\   rl   pools      re   r   !SoSCleaner.obfuscate_report_paths"  s   	T../0 1$$(IINN#3?D  KKS!yy**##O &diinn5DHHT**D,=,=HKMMtM$""113%%d&9&9: # ! 	KK56HHSM	s   DD ;EEc                 ^   U R                    H  n[        R                  " UR                  5        U R                  R
                  nUR                  R                  S5      S   n[        R                  R                  X#5      n[        R                  " UR                  U5        XAl        M     g)zWhen we have a nested archive, we need to rebuild the original
archive, which entails replacing the existing archives with their
obfuscated counterparts
r   r   N)r   rD   rY   r.  r[   r  r   rU   rF   rG   r   r   )r\   r   r   r   	dest_names        re   r:  'SoSCleaner._replace_obfuscated_archives?  s~    
 --GIIg**+&&55D0066s;B?GT3IKK22D9)2& .rg   c                 J    U R                    H  nUR                  5         M     g)zFor the parsers that use prebuilt lists of items, generate those
regexes now since all the parsers should be preloaded by the archive(s)
as well as being handed cmdline options and mapping file configuration.
N)rR   generate_item_regexes)r\   r8   s     re   r   'SoSCleaner.generate_parser_item_regexesL  s    
 llF((* #rg   c                    U R                    GHJ  nUR                  R                  5       R                  5       S   R	                  5       nUR                  XA5       Hj  nUR                  U5      nU(       d  M  U R                  SU SU SUR                   35        UR                  5        H  n UR                  U5        M     Ml     UR                  XA5      n	U	(       aE  U R                  SU S	UR                   35        U	 H  n
UR                  R                  U
5        M      UR                  U    H  nUR                  R!                  U5        M      GMM     g! [         a%  nU R                  SU SU SU 35         SnAM  SnAff = f)
a  
For each archive we've determined we need to operate on, pass it to
each prepper so that we can extract necessary files and/or items for
direct regex replacement. Preppers define these methods per parser,
so it is possible that a single prepper will read the same file for
different parsers/mappings. This is preferable to the alternative of
building up monolithic lists of file paths, as we'd still need to
manipulate these on a per-archive basis.

:param archive: The archive we are currently using to prepare our
                mappings with
:type archive:  ``SoSObfuscationArchive`` subclass

:param prepper: The individual prepper we're using to source items
:type prepper:  ``SoSPrepper`` subclass
r   z	Prepping z parser with file z from zFailed to prep z
 map from : Nz mapping with items from )rR   rS   rT   rU   rV   get_parser_file_listget_file_contentrs   r   
splitlines
parse_liner   get_items_for_mapr   addregex_itemsadd_regex_item)r\   r   prepperr`   pname_filecontentr&  r   	map_itemsitemritems               re   _prepare_archive_with_prepper(SoSCleaner._prepare_archive_with_prepperT  sr   " ||GLL&&(..0399;E 55eE!22595'1CE7 K''.&7 9 :#..0D**40 1 F  11%AI5'1J")//!2 4 5%DOO''- & !,,U3..u5 4- $ % -eWJugRuM s   0E
F	'F	F	c              #     #    [        [        R                  R                  5      n/ nUR	                  5        H   nUR                  [        SU 35      5        M"     [        US S9 H  nU" U R                  S9v   M     g7f)z
Discover all locally available preppers so that we can prepare the
mappings with obfuscation matches in a controlled manner

:returns: All preppers that can be leveraged locally
:rtype:   A generator of `SoSPrepper` items
zsos.cleaner.preppers.c                     U R                   $ rq   )priority)xs    re   <lambda>)SoSCleaner.get_preppers.<locals>.<lambda>  s    1::rg   )key)r-   N)	r   r2   r4   preppersget_modulesr   r   sortedr>   )r\   helperpreps_preprM  s        re   get_preppersSoSCleaner.get_preppers~  sm        4 45'')ELL)>ug'FGH *e)=>G$)),, ?s   A?Bc                     U R                  S5        U R                  5        H'  nU R                   H  nU R                  X!5        M     M)     g)zBefore doing the actual obfuscation, if we have multiple archives
to obfuscate then we need to preload each of them into the mappings
to ensure that node1 is obfuscated in node2 as well as node2 being
obfuscated in node1's archive.
z.Pre-loading all archives into obfuscation mapsN)rW   rc  r   rT  )r\   rM  r   s      re   r   )SoSCleaner.preload_all_archives_into_maps  sB     	FG((*G,,227D - +rg   c                 <    U R                   R                  UR                  5      n[        R                  " 5       nUR                  SU5        UR                  (       d  UR                  5         UR                  S5        UR                  5        H  nUR                  UR                  S-   5      S   nUR                  U5      (       a  M<  U R                  R                  (       d)  UR                  U5      (       a  UR                  U5        M  U R                  R                  (       a(  [!        U5      (       d  UR                  U5      (       a  M   U R#                  XEUR                  5      nU(       a  UR%                  XV5        M  M      U R+                  U5         U R/                  U5        UR0                  (       d  UR3                  5       nU(       aM  UR                  S5         UR5                  U R7                  UR                  5      5        UR9                  U5        U R:                  R=                  U5        [        R                  " 5       n	UR                  SU	5        UR                  SX-
  5        UR                  S[?        UR@                  5      5        UR                  SURB                  5        Sn
URD                  (       a  Sn
XRD                  -  n
UR                  SU
 35        g! [&         a#  nU R)                  SU SU 35         SnAGM  SnAff = f! [&         a(  nU R-                  SU 3UR                  S	9   SnAGNSnAff = f! [&         a(  nU R-                  S
U 3UR                  S	9   SnAGNSnAff = f! [&         a?  nU R)                  SUR                   SU 35        UR                  SU 35         SnAgSnAff = f! [&         a5  nU RF                  RI                  SUR                   SU 35         SnAgSnAff = f)zIndividually handle each archive or directory we've discovered by
running through each file therein.

Positional arguments:

    :param report str:      Filepath to the directory or archive

start_timezBeginning obfuscation...r   r9   zUnable to parse file rD  Nz!Failed to obfuscate directories: rm   zFailed to obfuscate symlinks: zRe-compressing...zArchive z failed to compress: zFailed to re-compress archive: end_timerun_timefiles_obfuscatedtotal_substitutionsr   z! [removed %s unprocessable files]zObfuscation completedzException while processing )%rQ   rP   archive_namer   now	add_fieldis_extractedextract
report_msgget_file_listrU   should_skip_filer>   r)   should_remove_fileremove_filer   r#  update_sub_countr   rs   obfuscate_directory_namesrW   obfuscate_symlinksr   get_compressionrename_top_dirr   compressr   r   r   file_sub_listtotal_sub_countremoved_file_countrC   rw   )r\   r   arc_mdrh  r  
short_namecountr   methodrj  rmsgs              re   r8  SoSCleaner.obfuscate_report  s   M	?__001E1EFF!J\:6''!9: ..0"[[)=)=)CDQG
++J77		3322:>> ''
3II//'.. 33J?? P //070D0DFE00C ) 12;..w7
;''0 $$ 002&&':;
.. 11'2F2FG  ((0 &&--g6||~HZ2Z)>?/W5J5J1KL2G4K4KLD)):888!6tf=>U ! PNN%::,b#NOOP
  ; A#G%,%9%9  ;;  ; >seD%,%9%9  ;; % '2F2F1G H88;u(> ?**=cUCE&  	?KK: ' 4 45Ru> ? ?	?s   EO 4K6O L& !M 29O ,;N 'CO 6
L# LO L##O &
M0MO MO 
N%NO NO 
O5OO OO 
P&+PPc           
        ^ U(       d  gSnT(       d  UR                  S5      S   m[        R                  R                  U5      (       Gd,  U R                   Vs/ s H+  n[        U4S jUR                   5       5      (       a  M)  UPM-     nnU(       d  U R                  ST=(       d    U S35        gU R                  ST=(       d    U 3US	9  [        R                  " S
U R                  S9 n[        USSSS9 nU H,  n	 U R                  X5      u  pXJ-  nUR                  U	5        M.     SSS5        UR                  S5        U(       a!  [         R"                  " UR$                  U5        SSS5        U R'                  TR                  S5      S   5      nTR)                  TR                  S5      S   U5      nUT:w  a  UR                  T5      S   n[        R                  R+                  X5      n[        R                  R                  U5      (       d  [        R,                  " X5        U$ U R'                  [        R.                  " U5      5      n[        R0                  " U5        [        R2                  " UU5        U$ s  snf ! [         a"  nU R                  ST SU 3US	9   SnAGM  SnAff = f! , (       d  f       GN= f! , (       d  f       GNe= f)a  Obfuscate and individual file, line by line.

Lines processed, even if no substitutions occur, are then written to a
temp file without our own tmpdir. Once the file has been completely
iterated through, if there have been substitutions then the temp file
overwrites the original file. If there are no substitutions, then the
original file is left in place.

Positional arguments:

    :param filename str:        Filename relative to the extracted
                                archive root
Nr   r   r   c              3   D   >#    U  H  oR                  T5      v   M     g 7frq   match).0_skipr  s     re   	<genexpr>,SoSCleaner.obfuscate_file.<locals>.<genexpr>	  s      9IKK
++9I    zSkipping obfuscation of z" due to matching file skip patternzObfuscating ri  r   )modedirr   r   r   )r   errorszUnable to obfuscate ri   )rU   rD   rF   islinkrR   anyskip_patternsrs   tempfileNamedTemporaryFiler.   r   obfuscate_liner   r   r!  r   copyfilerS   r   r   rG   renamereadlinkrY   symlink)r\   r  r  r   subs_p_parserstfiler  r&  r  r   _ob_short_name_ob_filenamer   _ob_path
_target_obs     `              re   r#  SoSCleaner.obfuscate_file  s    !,R0Jww~~h'' "\\)r 9;9I9I  \   .z/EX.F G1 2 NN\**@)AB"*  ,,,#4;;G(C'!*,/4 %F*.*=*=d*MKD MD!KK-	 !&, 

1OOEJJ9 H  ..z/?/?/DR/HI!))**:*:3*?*C*8: :%~~j1!4Hww||H;H 77>>(++		(-  "222;;x3HI
 		(# 

:x0i.  ) F NN-A*Q.1U,4<D + F FF, , HGs[   (JJ/K<J<(J+J</AK
J9	J4	-J<4J9	9J<<
K	K
Kc                 ^  ^	 U R                  SUR                  S9  UR                  5        GHH  n UR                  UR                  5      S   R                  S5      m	U R                   Vs/ s H+  n[        U	4S jUR                   5       5      (       a  M)  UPM-     nnU(       d  U R                  ST	 S35        M  U R                  ST	 3UR                  S9  [        R                  " U5      n[        R                  R                  UR                  U R                  T	5      5      nU R                  U5      nXb:w  d  Xu:w  a/  [        R                  " U5        [        R                   " Xv5        GMH  GMK     gs  snf ! ["         a#  nU R                  S	U S
U 35         SnAGM{  SnAff = f)a  Iterate over symlinks in the archive and obfuscate their names.
The content of the link target will have already been cleaned, and this
second pass over just the names of the links is to ensure we avoid a
possible race condition dependent on the order in which the link or the
target get obfuscated.

:param archive:     The archive being obfuscated
:type archive:      ``SoSObfuscationArchive``
zObfuscating symlink namesri  r9   r   c              3   D   >#    U  H  oR                  T5      v   M     g 7frq   r  )r  r  _syms     re   r  0SoSCleaner.obfuscate_symlinks.<locals>.<genexpr>P  s     H7GeD))7Gr  z Skipping obfuscation of symlink z due to skip pattern matchzObfuscating symlink zError obfuscating symlink 'r   N)rW   rn  get_symlinksrU   r  r  rR   r  r  rs   rD   r  rF   rG   r   rY   r  r   )
r\   r   r  r  r  _target_ob_sym_name
_ob_targetr   r  s
            @re   rz  SoSCleaner.obfuscate_symlinks=  s    	1':N:NO++-G O}}W%;%;<Q?FFsK "&!-2Hr7G7GHH     NN:4& A( ) !5dV<&-&:&:  < ++g.  "ww||G,B,B,0,A,A$,G I "227;
 !+1FIIg&JJz8 2G; .4  O ;G9CuMNNOs6   ;E?,(E:E:E?>B3E?:E??
F,	F''F,c                 p   U R                  SUR                   35        [        UR                  5       SS9 H  n[        R
                  " U5       H  n[        R                  R                  X#5      nUR                  UR                  5      S   n[        R                  R                  U5      (       d  Mf  U R                  U5      nXc:w  d  M~  UR                  U5      n[        R                  R                  UR                  UR                  S5      U5      n[        R                  " XG5        M     M     g)zFor all directories that exist within the archive, obfuscate the
directory name if it contains sensitive strings found during execution
z'Obfuscating directory names in archive T)reverser   r   N)rW   rn  r_  get_directory_listrD   listdirrF   rG   rU   r  r   r   rstripr  r  )r\   r   dirpath_name_dirname_arc_dir_ob_dirname_ob_arc_dirs           re   ry  $SoSCleaner.obfuscate_directory_namesk  s     	? --.0 	1g88:DIGG,77<<7#>>'*@*@A"E77==**"&"7"7">K"+&.ooe&<&(ggll#22'..s3''
 		(8 - Jrg   c                     U R                    H  n UR                  U5      nM     U$ ! [         a  nU R                  SU 35         S nAM?  S nAff = f)NzError obfuscating string data: )rR   parse_string_for_keysr   rW   )r\   string_datar8   r   s       re   r   SoSCleaner.obfuscate_string  sY    llFG$::;G #
   G ?uEFFGs   (
AAAc                 
   SnUR                  5       (       d  X4$ Uc  U R                  nU H  n UR                  U5      u  pX5-  nM     X4$ ! [         a*  nU R	                  SU 3UR
                  5         SnAMQ  SnAff = f)a  Run a line through each of the obfuscation parsers, keeping a
cumulative total of substitutions done on that particular line.

Positional arguments:

    :param line str:        The raw line as read from the file being
                            processed
    :param parsers:         A list of parser objects to obfuscate
                            with. If None, use all.

Returns the fully obfuscated line and the number of substitutions made
r   Nzfailed to parse line: )rV   rR   rH  r   rs   rS   )r\   r&  rR   r  r8   _countr   s          re   r  SoSCleaner.obfuscate_line  s     zz||;?llGFL%006  {  L!7u=v{{KKLs   A
BA==Bc                 L   U R                   R                  S5      nU R                   Hy  nUR                  UR                  R	                  SS5      R                  5       5      nUR                  S[        UR                  R                  R                  5       5      5        M{     g)zLWrite some cleaner-level, non-report-specific stats to the manifest
        rR    r   entriesN)rQ   rP   rR   rS   r   rT   rp  r   r   datasetkeys)r\   	parse_secr8   _secs       re   r   "SoSCleaner.write_stats_to_manifest  sv     OO//	:	llF(()<)<S#)F)L)L)NODNN9c&..*@*@*E*E*G&HI #rg   )r   rZ   rJ   rQ   r   r=   rN   rL   r1   r[   r>   rR   r0   r   rB   r/   r.   rC   )NNNFNrq   )F)NN)+__name__
__module____qualname____firstlineno__r   descarg_defaultsr<   rn   rs   rW   r{   classmethodr   rI   r   r   r   r   rH   r   r   r   r  r   r   r   r   r   r:  r   rT  rc  r   r8  r#  rz  ry  r   r  r   __static_attributes____classcell__)rd   s   @re   r   r   *   s(   $L DD !6"L GL"EGNB:9: & &8!: /F /Fb J:H^@A0	OI"$:3+(6T-	EU?nL\,O\9*8J Jrg   r   )-r*  r   r@   rD   r   r  r   concurrent.futuresr   r   pwdr   sos.cleaner.preppersr2   r   sos.componentr   sos.cleaner.parsers.ip_parserr   sos.cleaner.parsers.mac_parserr	   #sos.cleaner.parsers.hostname_parserr
   "sos.cleaner.parsers.keyword_parserr   #sos.cleaner.parsers.username_parserr   sos.cleaner.parsers.ipv6_parserr   sos.cleaner.archives.sosr   r   r   r   sos.cleaner.archives.genericr   r   sos.cleaner.archives.insightsr   sos.utilitiesr   r   r   r   r   rj   rg   re   <module>r     sm       	    1     & 5 7 A ? A 9= = H 9+ +@J @Jrg   