diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3f547616..3f15eb5b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -28,7 +28,7 @@ jobs: if: ${{ needs.changed_states.outputs.matrix != '[]' }} strategy: matrix: - salt: [3004, 3005] + salt: [3006] os: [20.04, 22.04] state: ${{ fromJson(needs.changed_states.outputs.matrix) }} include: @@ -40,7 +40,7 @@ jobs: - os: 22.04 salt: 3004 container: - image: docker://ghcr.io/teamdfir/sift-saltstack-tester:${{ matrix.salt }}-${{ matrix.code }} + image: docker://ghcr.io/ekristen/cast-tools/saltstack-tester:${{ matrix.code }}-${{ matrix.salt }} steps: - uses: actions/checkout@v4 - name: test-state diff --git a/.github/workflows/weekly-tests.yml b/.github/workflows/weekly-tests.yml index 30564503..50abf37a 100644 --- a/.github/workflows/weekly-tests.yml +++ b/.github/workflows/weekly-tests.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - salt: [3004, 3005] + salt: [3006] os: [20.04, 22.04] state: - sift.packages.python2 @@ -22,11 +22,8 @@ jobs: code: focal - os: 22.04 code: jammy - exclude: - - salt: 3004 - os: 22.04 container: - image: docker://ghcr.io/teamdfir/sift-saltstack-tester:${{ matrix.salt }}-${{ matrix.code }} + image: docker://ghcr.io/ekristen/cast-tools/saltstack-tester:${{ matrix.code }}-${{ matrix.salt }} steps: - uses: actions/checkout@v4 - name: test-state diff --git a/README.md b/README.md index adf25fc3..b21851a1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # SIFT +![GitHub Downloads (all assets, all releases)](https://img.shields.io/github/downloads/teamdfir/sift-saltstack/total) Built with [SaltStack](https://saltproject.io) installed with [Cast](https://github.com/ekristen/cast) diff --git a/sift/files/page-brute/page_brute-BETA.py b/sift/files/page-brute/page_brute-BETA.py index 212b5b7d..29c99faa 100755 --- a/sift/files/page-brute/page_brute-BETA.py +++ b/sift/files/page-brute/page_brute-BETA.py @@ -1,8 +1,8 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -# page_brute.py -# by @matonis - secualexploits.blogspot.com - www.mike-matonis.com -# Summer of 2013 +# page_brute.py +# by @matonis - secualexploits.blogspot.com - www.mike-matonis.com +# Summer of 2013 # import sys import argparse @@ -13,215 +13,276 @@ import binascii try: - import yara + import yara except: - print "[!] - ERROR: Could not import YARA..." - print "...did you install yara and yara-python? Exiting." - sys.exit() + print("[!] - ERROR: Could not import YARA...") + print("...did you install yara and yara-python? Exiting.") + sys.exit() def is_block_null(block): - #Here we test to see if the block is null..if so, skip. - RAW_BLOCK=binascii.hexlify(block) - NULL_REF=binascii.hexlify(NULL_REFERENCE) - if RAW_BLOCK == NULL_REF: - return True - else: - return False + # Here we test to see if the block is null..if so, skip. + RAW_BLOCK = binascii.hexlify(block) + NULL_REF = binascii.hexlify(NULL_REFERENCE) + if RAW_BLOCK == NULL_REF: + return True + else: + return False + def build_ruleset(): - if RULETYPE == "FILE": - try: - rules=yara.compile(str(RULES)) - print "..... Ruleset Compilation Successful." - return rules - except: - print "[!] - Could not compile YARA rule: %s" % RULES - print "Exiting." - sys.exit() - - elif RULETYPE == "FOLDER": - RULEDATA="" - #::Get list of files ending in .yara - - RULE_COUNT = len(glob.glob1(RULES,"*.yar")) - if RULE_COUNT != 0: - for yara_file in glob.glob(os.path.join(RULES, "*.yar")): - try: - yara.compile(str(yara_file)) - print "..... Syntax appears to be OK: %s " % yara_file - try: - with open(yara_file, "r") as sig_file: - file_contents=sig_file.read() - RULEDATA=RULEDATA + "\n" + file_contents - except: - print "..... SKIPPING: Could not open file for reading: %s " % yara_file - except: - print "..... SKIPPING: Could not compile rule: %s " % yara_file - try: - rules=yara.compile(source=RULEDATA) - print "..... SUCCESS! Compiled noted yara rulesets.\n" - return rules - except: - print "[!] - Some catastropic error occurred in the compilation of signatureswithin the directory. Exiting." - sys.exit() - else: - print "No files ending in .yar within: %s " % RULES - print "Exiting." - sys.exit() - - elif RULETYPE == "DEFAULT": - rules=yara.compile(str(RULES)) - print "[+] - Ruleset Compilation Successful." - return rules - - else: - print "[!] - ERROR: Possible catastrophic error on build_ruleset. Exiting." - sys.exit() + if RULETYPE == "FILE": + try: + rules = yara.compile(str(RULES)) + print("..... Ruleset Compilation Successful.") + return rules + except: + print("[!] - Could not compile YARA rule: %s" % RULES) + print("Exiting.") + sys.exit() + + elif RULETYPE == "FOLDER": + RULEDATA = "" + #::Get list of files ending in .yara + + RULE_COUNT = len(glob.glob1(RULES, "*.yar")) + if RULE_COUNT != 0: + for yara_file in glob.glob(os.path.join(RULES, "*.yar")): + try: + yara.compile(str(yara_file)) + print("..... Syntax appears to be OK: %s " % yara_file) + try: + with open(yara_file, "r") as sig_file: + file_contents = sig_file.read() + RULEDATA = RULEDATA + "\n" + file_contents + except: + print( + "..... SKIPPING: Could not open file for reading: %s " + % yara_file + ) + except: + print("..... SKIPPING: Could not compile rule: %s " % yara_file) + try: + rules = yara.compile(source=RULEDATA) + print("..... SUCCESS! Compiled noted yara rulesets.\n") + return rules + except: + print( + "[!] - Some catastropic error occurred in the compilation of signatureswithin the directory. Exiting." + ) + sys.exit() + else: + print("No files ending in .yar within: %s " % RULES) + print("Exiting.") + sys.exit() + + elif RULETYPE == "DEFAULT": + rules = yara.compile(str(RULES)) + print("[+] - Ruleset Compilation Successful.") + return rules + + else: + print("[!] - ERROR: Possible catastrophic error on build_ruleset. Exiting.") + sys.exit() + def print_procedures(): - print "[+] - PAGE_BRUTE running with the following options:" - print "\t[-] - FILE: %s" % FILE - print "\t[-] - PAGE_SIZE: %s" % PAGE_SIZE - print "\t[-] - RULES TYPE: %s" % RULETYPE - print "\t[-] - RULE LOCATION: %s" % RULES - print "\t[-] - INVERSION SCAN: %s" % INVERT - print "\t[-] - WORKING DIR: %s" % WORKING_DIR - print "\t=================\n" + print("[+] - PAGE_BRUTE running with the following options:") + print("\t[-] - FILE: %s" % FILE) + print("\t[-] - PAGE_SIZE: %s" % PAGE_SIZE) + print("\t[-] - RULES TYPE: %s" % RULETYPE) + print("\t[-] - RULE LOCATION: %s" % RULES) + print("\t[-] - INVERSION SCAN: %s" % INVERT) + print("\t[-] - WORKING DIR: %s" % WORKING_DIR) + print("\t=================\n") + def main(): - global FILE - global PAGE_SIZE - global RULES - global SCANNAME - global INVERT - global RULETYPE - global NULL_REFERENCE - - argument_parser = argparse.ArgumentParser(description="Checks pages in pagefiles for YARA-based rule matches. Useful to identify forensic artifacts within Windows-based page files and characterize blocks based on regular expressions.") - - group_arg = argument_parser.add_argument_group() - group_arg.add_argument("-f", "--file", metavar="FILE", help="Pagefile or any chunk/block-based binary file") - group_arg.add_argument("-p", "--size", metavar="SIZE", help="Size of chunk/block in bytes (Default 4096)") - group_arg.add_argument("-o", "--scanname", metavar="SCANNAME", help="Descriptor of the scan session - used for output directory") - group_arg.add_argument("-i", "--invert", help="Given scan options, match all blocks that DO NOT match a ruleset",action='store_true') - - group_arg = argument_parser.add_mutually_exclusive_group() - group_arg.add_argument("-r", "--rules", metavar="RULEFILE", help="File/directory containing YARA signatures (must end with .yar)") - - args = argument_parser.parse_args() - - if len(sys.argv) < 2: - print argument_parser.print_help() - sys.exit() - - #::Check to see if file was provided::# - if args.file: - try: - with open(args.file): - FILE=args.file - print "[+] - PAGE_BRUTE processing file: %s" % FILE - except: - print "[!] - Could not open %s. Exiting." % FILE - sys.exit() - else: - print "[!] - No file provided. Use -f, --file to provide a file. Exiting." - sys.exit() - - #::Check to see if page size provided::# - if args.size: - PAGE_SIZE=int(args.size) - NULL_REFERENCE= '\x00' * PAGE_SIZE - else: - PAGE_SIZE=4096 - NULL_REFERENCE= '\x00' * PAGE_SIZE - - #::Check if --scan-name provided::# - if args.scanname: - SCANNAME=args.scanname - else: - SCANNAME="PAGE_BRUTE-" + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + "-RESULTS" - - #::Check if --invert-match provided::# - if args.invert: - INVERT=True - else: - INVERT=False - - #::Check if --rule-file provdided - if not, use default ruleset::# - if args.rules: - RULES=args.rules - try: - #::Is File?::# - if os.path.isfile(RULES): - RULETYPE="FILE" - print "[+] - YARA rule of File type provided for compilation: %s" % RULES - elif os.path.isdir(RULES): - print "[+] - YARA rule of Folder type provided for compilation: %s" % RULES - RULETYPE="FOLDER" - except: - print "[!] - Possible catastrophic error with the provided rule file...exiting." - sys.exit() - else: - try: - with open("default_signatures.yar"): - RULES="default_signatures.yar" - RULETYPE="DEFAULT" - except: - print "[!] - Could not locate \"default_signature.yar\". Find it or provide custom signatures via --rules. Exiting." - sys.exit() - - #::Compile rules::# - authoritative_rules=build_ruleset() - #::Build directory structure - global WORKING_DIR - WORKING_DIR=SCANNAME - if not os.path.exists(WORKING_DIR): - os.makedirs(WORKING_DIR) - #::Let People Know what we're doing::# - print_procedures() - #::Find Evil::# - page_id=0 - with open(FILE, "rb") as page_file: - while True: - matched=False - raw_page=page_file.read(PAGE_SIZE) - if raw_page == "": - print "Done!" - print "Ending page_id is: %s" % page_id - break - if not is_block_null(raw_page): - #::Determine if block is null...: - for matches in authoritative_rules.match(data=raw_page): - if INVERT == True: - matched=True - else: - CHUNK_OUTPUT_DIR=os.path.join(WORKING_DIR,matches.rule) - print " [!] FLAGGED BLOCK " + str(page_id) + ": " + matches.rule - - if not os.path.exists(CHUNK_OUTPUT_DIR): - os.makedirs(CHUNK_OUTPUT_DIR) - - #::Save chunk to file::# - CHUNK_OUTPUT_FWD=os.path.join(CHUNK_OUTPUT_DIR,str(page_id) + ".block") - page_export=open(CHUNK_OUTPUT_FWD,'w+') - page_export.write(raw_page) - page_export.close() - - if INVERT == True: - if matched == False: - CHUNK_OUTPUT_DIR=os.path.join(WORKING_DIR,"INVERTED-MATCH") - print " [!] BLOCK DOES NOT MATCH ANY KNOWN SIGNATURE " + str(page_id) - if not os.path.exists(CHUNK_OUTPUT_DIR): - os.makedirs(CHUNK_OUTPUT_DIR) - - CHUNK_OUTPUT_FWD=os.path.join(CHUNK_OUTPUT_DIR,str(page_id) + ".block") - page_export=open(CHUNK_OUTPUT_FWD,'w+') - page_export.write(raw_page) - page_export.close() - #::Increment Counter for offset increment::# - page_id=page_id+1 + global FILE + global PAGE_SIZE + global RULES + global SCANNAME + global INVERT + global RULETYPE + global NULL_REFERENCE + + argument_parser = argparse.ArgumentParser( + description="Checks pages in pagefiles for YARA-based rule matches. Useful to identify forensic artifacts within Windows-based page files and characterize blocks based on regular expressions." + ) + + group_arg = argument_parser.add_argument_group() + group_arg.add_argument( + "-f", + "--file", + metavar="FILE", + help="Pagefile or any chunk/block-based binary file", + ) + group_arg.add_argument( + "-p", + "--size", + metavar="SIZE", + help="Size of chunk/block in bytes (Default 4096)", + ) + group_arg.add_argument( + "-o", + "--scanname", + metavar="SCANNAME", + help="Descriptor of the scan session - used for output directory", + ) + group_arg.add_argument( + "-i", + "--invert", + help="Given scan options, match all blocks that DO NOT match a ruleset", + action="store_true", + ) + + group_arg = argument_parser.add_mutually_exclusive_group() + group_arg.add_argument( + "-r", + "--rules", + metavar="RULEFILE", + help="File/directory containing YARA signatures (must end with .yar)", + ) + + args = argument_parser.parse_args() + + if len(sys.argv) < 2: + argument_parser.print_help() + sys.exit() + + #::Check to see if file was provided::# + if args.file: + try: + with open(args.file): + FILE = args.file + print("[+] - PAGE_BRUTE processing file: %s" % FILE) + except: + print("[!] - Could not open %s. Exiting." % FILE) + sys.exit() + else: + print("[!] - No file provided. Use -f, --file to provide a file. Exiting.") + sys.exit() + + #::Check to see if page size provided::# + if args.size: + PAGE_SIZE = int(args.size) + NULL_REFERENCE = "\x00" * PAGE_SIZE + else: + PAGE_SIZE = 4096 + NULL_REFERENCE = "\x00" * PAGE_SIZE + + #::Check if --scan-name provided::# + if args.scanname: + SCANNAME = args.scanname + else: + SCANNAME = ( + "PAGE_BRUTE-" + + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + + "-RESULTS" + ) + + #::Check if --invert-match provided::# + if args.invert: + INVERT = True + else: + INVERT = False + + #::Check if --rule-file provdided - if not, use default ruleset::# + if args.rules: + RULES = args.rules + try: + #::Is File?::# + if os.path.isfile(RULES): + RULETYPE = "FILE" + print( + "[+] - YARA rule of File type provided for compilation: %s" % RULES + ) + elif os.path.isdir(RULES): + print( + "[+] - YARA rule of Folder type provided for compilation: %s" + % RULES + ) + RULETYPE = "FOLDER" + except: + print( + "[!] - Possible catastrophic error with the provided rule file...exiting." + ) + sys.exit() + else: + try: + with open("default_signatures.yar"): + RULES = "default_signatures.yar" + RULETYPE = "DEFAULT" + except: + print( + '[!] - Could not locate "default_signature.yar". Find it or provide custom signatures via --rules. Exiting.' + ) + sys.exit() + + #::Compile rules::# + authoritative_rules = build_ruleset() + #::Build directory structure + global WORKING_DIR + WORKING_DIR = SCANNAME + if not os.path.exists(WORKING_DIR): + os.makedirs(WORKING_DIR) + #::Let People Know what we're doing::# + print_procedures() + #::Find Evil::# + page_id = 0 + with open(FILE, "rb") as page_file: + while True: + matched = False + raw_page = page_file.read(PAGE_SIZE) + if raw_page == "": + print("Done!") + print("Ending page_id is: %s" % page_id) + break + if not is_block_null(raw_page): + #::Determine if block is null...: + for matches in authoritative_rules.match(data=raw_page): + if INVERT == True: + matched = True + else: + CHUNK_OUTPUT_DIR = os.path.join(WORKING_DIR, matches.rule) + print( + " [!] FLAGGED BLOCK " + + str(page_id) + + ": " + + matches.rule + ) + + if not os.path.exists(CHUNK_OUTPUT_DIR): + os.makedirs(CHUNK_OUTPUT_DIR) + + #::Save chunk to file::# + CHUNK_OUTPUT_FWD = os.path.join( + CHUNK_OUTPUT_DIR, str(page_id) + ".block" + ) + page_export = open(CHUNK_OUTPUT_FWD, "w+") + page_export.write(raw_page) + page_export.close() + + if INVERT == True: + if matched == False: + CHUNK_OUTPUT_DIR = os.path.join(WORKING_DIR, "INVERTED-MATCH") + print( + " [!] BLOCK DOES NOT MATCH ANY KNOWN SIGNATURE " + + str(page_id) + ) + if not os.path.exists(CHUNK_OUTPUT_DIR): + os.makedirs(CHUNK_OUTPUT_DIR) + + CHUNK_OUTPUT_FWD = os.path.join( + CHUNK_OUTPUT_DIR, str(page_id) + ".block" + ) + page_export = open(CHUNK_OUTPUT_FWD, "w+") + page_export.write(raw_page) + page_export.close() + #::Increment Counter for offset increment::# + page_id = page_id + 1 + if __name__ == "__main__": main() diff --git a/sift/files/pdf-tools/mPDF.py b/sift/files/pdf-tools/mPDF.py index 836931f2..f749d3f2 100644 --- a/sift/files/pdf-tools/mPDF.py +++ b/sift/files/pdf-tools/mPDF.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#! /usr/bin/env python3 # module with simple class to build PDF documents with basic PDF elements # Source code put in public domain by Didier Stevens, no Copyright @@ -6,7 +6,7 @@ # Use at your own risk # # History: -# +# # 2008/05/18: continue # 2008/05/19: continue # 2008/05/28: stream2 @@ -16,37 +16,102 @@ # 2011/03/03: Added support for info in trailer and xrefAndTrailer # 2011/07/01: V0.1.4: Added support for filters i and I; added support for Python 3 # 2012/02/25: fixed printing \n for filters i and I +# 2013/04/03: V0.2.0: Added cNameObfuscation; filter j and *; cFuzzer +# 2013/04/05: added docstrings +# 2013/04/11: added SetReference +# 2013/04/14: V0.2.1: added return value to stream method +# 2013/04/20: V0.2.2: added version parameter to header function +# 2014/09/25: V0.2.3: added comment method +# 2014/10/15: V0.2.4: added CObjectStream +# 2017/04/16: V0.2.5: added support for filter i## # Todo: # - add support for extra filters to stream2 __author__ = 'Didier Stevens' -__version__ = '0.1.4' -__date__ = '2012/02/25' +__version__ = '0.2.5' +__date__ = '2017/04/16' import sys import zlib import platform +import random +import re +import struct + +def ReadBinaryFile(name): + """Read a binary file and return the content, return None if error occured + """ + + try: + fBinary = open(name, 'rb') + except: + return None + try: + content = fBinary.read() + except: + return None + finally: + fBinary.close() + return content -def SplitByLength(input, length): +def ParseFilters(definition): + filters = [] + number = '' + for character in definition + ' ': + if character.isdigit(): + number += character + else: + if number != '': + filters.append(number) + number = '' + filters.append(character) result = [] - while len(input) > length: - result.append(input[0:length] + '\n') - input = input[length:] - result.append(input + '>') + filters = filters[:-1] + while filters != []: + token = filters[0] + filters = filters[1:] + if token.lower() == 'i': + if filters != [] and filters[0].isdigit(): + result.append((token, int(filters[0]))) + filters = filters[1:] + else: + result.append((token, 512)) + else: + result.append((token, None)) return result +def IsLastFilterI(filters): + if filters == []: + return False + return filters[-1][0].lower() == 'i' + class cPDF: + """ + Class to create a PDF file + """ def __init__(self, filename): + """ + class instantiation arguments: + + filename is the name of the PDF file to be created + """ self.filename = filename self.indirectObjects = {} - + self.objstms = [] + def appendString(self, str): + """ + Internal helper function + """ fPDF = open(self.filename, 'a') fPDF.write(str) fPDF.close() def appendBinary(self, str): + """ + Internal helper function + """ fPDF = open(self.filename, 'ab') if sys.version_info[0] == 2: fPDF.write(str) @@ -55,90 +120,202 @@ def appendBinary(self, str): fPDF.close() def filesize(self): + """ + Internal helper function + """ fPDF = open(self.filename, 'rb') fPDF.seek(0, 2) size = fPDF.tell() fPDF.close() return size - + def IsWindows(self): + """ + Internal helper function + """ return platform.system() in ('Windows', 'Microsoft') - - def header(self): + + def header(self, version='1.1'): + """ + Method to create a PDF header (%PDF-1.1) and output it + to the PDF file. + + By default, the version is 1.1, but can be specified with + the version argument. + """ fPDF = open(self.filename, 'w') - fPDF.write("%PDF-1.1\n") + fPDF.write('%%PDF-%s\n' % version) fPDF.close() - + def binary(self): + """ + Method to create a comment (%\\xD0\\xD0\\xD0\\xD0) and output it + to the PDF file. + Use this after the header to indicate a PDF file has binary + (not printable) content. + """ self.appendString("%\xD0\xD0\xD0\xD0\n") + def comment(self, comment): + """ + Method to create a comment and output it to the PDF file. + """ + self.appendString('%' + comment + '\n') + def indirectobject(self, index, version, io): + """ + Method to create an indirect object and output it to the PDF file. + + index is the index number of the object. + + version is the version number of the object. Use 0 by convention. + + io is the content of the indirect object. + """ self.appendString("\n") self.indirectObjects[index] = self.filesize() self.appendString("%d %d obj\n%s\nendobj\n" % (index, version, io)) def stream(self, index, version, streamdata, dictionary="<< /Length %d >>"): + """ + Method to create an indirect object with a stream and output it + to the PDF file. + + index is the index number of the object. + + version is the version number of the object. Use 0 by convention. + + streamdata is the stream that will be put inside the object + without any modifications. + + dictionary is the PDF dictionary to be put before the stream. + By default this is << /Length %d >>. If you provide a dictionary, + you must include /Length %d. + + The return value is the file position of the stream data. + + Use this method when you want to provide the stream yourself. + """ self.appendString("\n") self.indirectObjects[index] = self.filesize() self.appendString(("%d %d obj\n" + dictionary + "\nstream\n") % (index, version, len(streamdata))) + position = self.filesize() self.appendBinary(streamdata) self.appendString("\nendstream\nendobj\n") - def Data2HexStr(self, data): + return position + + def Data2HexStr(self, data, whitespace=0): + """ + Internal helper function + """ hex = '' if sys.version_info[0] == 2: for b in data: - hex += "%02x" % ord(b) + hex += "%02x%s" % (ord(b), ' ' * random.randint(0, whitespace)) else: for b in data: - hex += "%02x" % b + hex += "%02x%s" % (b, ' ' * random.randint(0, whitespace)) return hex - def stream2(self, index, version, streamdata, entries="", filters=""): - """ - * h ASCIIHexDecode - * H AHx - * i like ASCIIHexDecode but with 512 long lines - * I like AHx but with 512 long lines - * ASCII85Decode - * LZWDecode - * f FlateDecode - * F Fl - * RunLengthDecode - * CCITTFaxDecode - * JBIG2Decode - * DCTDecode - * JPXDecode - * Crypt - """ - + def stream2(self, index, version, streamdata, entries="", filters="", fuzzer=None): + """ + Method to create an indirect object with a stream and + output it to the PDF file. + + index is the index number of the object. + + version is the version number of the object. Use 0 by convention. + + streamdata is the stream that will be put inside the object + modified according to the filters. + + entries is a string with a list of entries to be put inside + the PDF dictionary. Empty string by default. + + filters is a string with the encoding filters to be applied. + Each filter is represented by a letter, and filters are applied + from left to right. + For example, "hf" will apply the ASCIIHexDecode encoding filter and + then the FlateDecode encoding filter. For more details regarding + filters, see below. + Empty string by default. + + fuzzer is a fuzzer object to be used by the fuzzer filter (*). + If no object is provided, a default instance of class cFuzzer + is used. + + Use this method when you want the stream to be encoded. + + Implemented filters: + h ASCIIHexDecode + H AHx + i like ASCIIHexDecode but with 512 character long lines (default) + add number to speficy length of line, example: i80 for 80 characters + I like AHx but with 512 character long lines (default) + add number to speficy length of line, example: I80 for 80 characters + j like ASCIIHexDecode but with random whitespace + J like AHx but with random whitespace + f FlateDecode + F Fl + + Special filters (these are applied but not added to /Filters): + * for fuzzing + + Not implemented filters: + ASCII85Decode + LZWDecode + RunLengthDecode + CCITTFaxDecode + JBIG2Decode + DCTDecode + JPXDecode + Crypt + """ + + if fuzzer == None: + oFuzzer = cFuzzer() + else: + oFuzzer = fuzzer encodeddata = streamdata filter = [] + filters = ParseFilters(filters) for i in filters: - if i.lower() == "h": + if i[0].lower() == 'h': encodeddata = self.Data2HexStr(encodeddata) + '>' - if i == "h": + if i[0] == 'h': filter.insert(0, "/ASCIIHexDecode") else: filter.insert(0, "/AHx") - elif i.lower() == "i": - encodeddata = ''.join(SplitByLength(self.Data2HexStr(encodeddata), 512)) - if i == "i": + elif i[0].lower() == "i": + encodeddata = ''.join(self.SplitByLength(self.Data2HexStr(encodeddata), i[1])) + if i[0] == "i": filter.insert(0, "/ASCIIHexDecode") else: filter.insert(0, "/AHx") - elif i.lower() == "f": + elif i[0].lower() == "j": + encodeddata = self.Data2HexStr(encodeddata, 2) + '>' + if i[0] == "j": + filter.insert(0, "/ASCIIHexDecode") + else: + filter.insert(0, "/AHx") + elif i[0].lower() == "f": encodeddata = zlib.compress(encodeddata) - if i == "f": + if i[0] == "f": filter.insert(0, "/FlateDecode") else: filter.insert(0, "/Fl") + elif i[0] == "*": + encodeddata = oFuzzer.Fuzz(encodeddata) else: print("Error") return self.appendString("\n") self.indirectObjects[index] = self.filesize() - self.appendString("%d %d obj\n<<\n /Length %d\n" % (index, version, len(encodeddata))) + length = len(encodeddata) + if IsLastFilterI(filters) and self.IsWindows(): + length += encodeddata.count('\n') + self.appendString("%d %d obj\n<<\n /Length %d\n" % (index, version, length)) if len(filter) == 1: self.appendString(" /Filter %s\n" % filter[0]) if len(filter) > 1: @@ -146,45 +323,425 @@ def stream2(self, index, version, streamdata, entries="", filters=""): if entries != "": self.appendString(" %s\n" % entries) self.appendString(">>\nstream\n") - if filters[-1].lower() == 'i': + if IsLastFilterI(filters): self.appendString(encodeddata) else: self.appendBinary(encodeddata) self.appendString("\nendstream\nendobj\n") def xref(self): + """ + Method to create an xref table and output it to the PDF file. + + Returns the file position of the xref table and the size of the + xref table in a list. + """ self.appendString("\n") startxref = self.filesize() - max = 0 + maximumIndexValue = 0 for i in self.indirectObjects.keys(): - if i > max: - max = i - self.appendString("xref\n0 %d\n" % (max+1)) + if i > maximumIndexValue: + maximumIndexValue = i + self.appendString("xref\n0 %d\n" % (maximumIndexValue+1)) if self.IsWindows(): eol = '\n' else: eol = ' \n' - for i in range(0, max+1): + for i in range(0, maximumIndexValue+1): if i in self.indirectObjects: self.appendString("%010d %05d n%s" % (self.indirectObjects[i], 0, eol)) else: self.appendString("0000000000 65535 f%s" % eol) - return (startxref, (max+1)) + return (startxref, (maximumIndexValue+1)) def trailer(self, startxref, size, root, info=None): + """ + Method to create a trailer and output it to the PDF file. + + startxref is the file position of the xref table (this value is + returned by the xref method) + + size is the size of the xref table (this value is + returned by the xref method) + + root is a string with a reference to the root object (/Root). + Example: "1 0 R" + + info is a string with a reference to the info object (/Info). + This argument is optional. + Example: "9 0 R" + """ if info == None: self.appendString("trailer\n<<\n /Size %d\n /Root %s\n>>\nstartxref\n%d\n%%%%EOF\n" % (size, root, startxref)) else: self.appendString("trailer\n<<\n /Size %d\n /Root %s\n /Info %s\n>>\nstartxref\n%d\n%%%%EOF\n" % (size, root, info, startxref)) def xrefAndTrailer(self, root, info=None): + """ + Method to create an xref table together with a trailer and + output it to the PDF file. + + root is a string with a reference to the root object (/Root). + Example: "1 0 R" + + info is a string with a reference to the info object (/Info). + This argument is optional. + Example: "9 0 R" + """ xrefdata = self.xref() self.trailer(xrefdata[0], xrefdata[1], root, info) def template1(self): + """ + Method to create 5 indirect objects that form a template for + the start of a PDF file. + """ self.indirectobject(1, 0, "<<\n /Type /Catalog\n /Outlines 2 0 R\n /Pages 3 0 R\n>>") self.indirectobject(2, 0, "<<\n /Type /Outlines\n /Count 0\n>>") self.indirectobject(3, 0, "<<\n /Type /Pages\n /Kids [4 0 R]\n /Count 1\n>>") self.indirectobject(4, 0, "<<\n /Type /Page\n /Parent 3 0 R\n /MediaBox [0 0 612 792]\n /Contents 5 0 R\n /Resources <<\n /ProcSet [/PDF /Text]\n /Font << /F1 6 0 R >>\n >>\n>>") self.indirectobject(6, 0, "<<\n /Type /Font\n /Subtype /Type1\n /Name /F1\n /BaseFont /Helvetica\n /Encoding /MacRomanEncoding\n>>") + def MatchDictionary(self, string): + """ + Internal helper function + """ + status = 0 + level = 0 + result = '' + for c in string: + result += c + if status == 0 and c == '<': + status = 1 + elif status == 1: + if c == '<': + level += 1 + status = 0 + elif status == 0 and c == '>': + status = 2 + elif status == 2: + if c == '>': + level -= 1 + if level == 0: + return result + status = 0 + return None + + def originalIncrementalUpdate(self, pdffilename): + """ + Method to start an incremental update of an existing PDF file. + + pdffilename is the name of the PDF file to be used for the + incremental update. + + This methods returns the dictionary of the root object, + the dictionary of the trailer and the file position of the + xrf table found in the existing PDF file. These 3 values are + returned in a list. + + Use this method to start an incremental update. + """ + original = ReadBinaryFile(pdffilename) + fPDF = open(self.filename, 'wb') + if sys.version_info[0] == 2: + fPDF.write(original) + else: + fPDF.write(bytes(original, 'ascii')) + fPDF.close() + startxrefs = re.findall(r'startxref\s+(\d+)', original) + if startxrefs == []: + return None, None, None + oMatch = re.search(r'trailer\s+', original[int(startxrefs[-1]):]) + if oMatch == None: + return None, None, None + positionDictionaryTrailer = oMatch.end() + int(startxrefs[-1]) + dictionaryTrailer = self.MatchDictionary(original[positionDictionaryTrailer:]) + if dictionaryTrailer == None: + return None, None, None + oDictionaryTrailer = cDictionary(dictionaryTrailer) + idRoot = oDictionaryTrailer.GetID('Root') + if idRoot == None: + return None, None, None + oMatch = re.search(r'\s+%d\s+0\s+obj\s+' % idRoot, original) + if oMatch == None: + return None, None, None + dictionaryRoot = self.MatchDictionary(original[oMatch.end():]) + if dictionaryRoot == None: + return None, None, None + oDictionaryRoot = cDictionary(dictionaryRoot) + return oDictionaryTrailer, oDictionaryRoot, int(startxrefs[-1]) + + def xrefIncrementalAndTrailer(self, dictionaryTrailer): + """ + Method to create an xref table together with a trailer for + an incremental update and output it to the PDF file. + + dictionaryTrailer is a (modified) dictionary returned by method + originalIncrementalUpdate. + + Use this method to terminate an incremental update. + """ + if self.IsWindows(): + eol = '\n' + else: + eol = ' \n' + + self.appendString("\n") + startxref = self.filesize() + self.appendString("xref\n0 1\n") + self.appendString("0000000000 65535 f%s" % eol) + for i in self.indirectObjects.keys(): + self.appendString("%d 1\n" % i) + self.appendString("%010d %05d n%s" % (self.indirectObjects[i], 0, eol)) + self.appendString("trailer\n%s\nstartxref\n%d\n%%%%EOF\n" % (dictionaryTrailer, startxref)) + return startxref + + def SplitByLength(self, input, length): + """ + Internal helper function + """ + result = [] + while len(input) > length: + result.append(input[0:length] + '\n') + input = input[length:] + result.append(input + '>') + return result + + def objstm(self, oObjectStream): + """ + Method to add an object stream to the PDF file. + + oObjectStream is an instantiated object of class cObjectStream. + """ + self.stream2(oObjectStream.index, oObjectStream.version, oObjectStream.getStream(), oObjectStream.getDictionaryEntries(), oObjectStream.filters) + self.objstms.append(oObjectStream) + + def xrefobjAndTrailer(self, index, version, root): + """ + Method to create an xref object together with a trailer and + output it to the PDF file. + + index is the index number of the xref object. + + version is the version number of the xref object. Use 0 by convention. + + root is a string with a reference to the root object (/Root). + Example: "1 0 R" + """ + maximumIndexValue = max(index, max(self.indirectObjects.keys())) + dObjects = {} + for objstm in self.objstms: + for indexIter in objstm.objects: + dObjects[indexIter] = objstm + maximumIndexValue = max(maximumIndexValue, max(dObjects.keys())) + + self.appendString('\n') + self.indirectObjects[index] = self.filesize() + + xrefFormat = '>BII' + xrefStream = '' + for iter in range(maximumIndexValue + 1): + if iter in self.indirectObjects.keys(): + xrefStream += struct.pack(xrefFormat, 1, self.indirectObjects[iter], 0) + elif iter in dObjects.keys(): + xrefStream += struct.pack(xrefFormat, 2, dObjects[iter].index, dObjects[iter].objects.index(iter)) + else: + xrefStream += struct.pack(xrefFormat, 0, 0, 0) + + formatSizes = ' '.join([str(size) for size in map(struct.calcsize, [c for c in xrefFormat]) if size != 0]) + self.appendString(('%d %d obj\n<< /Type /XRef /Length %d /W [%s] /Root %s /Size %d >>\nstream\n') % (index, version, len(xrefStream), formatSizes, root, maximumIndexValue + 1)) + self.appendBinary(xrefStream) + self.appendString('\nendstream\nendobj\n') + + self.appendString('\nstartxref\n%d\n%%%%EOF\n' % self.indirectObjects[index]) + +class cNameObfuscation: + """ + Class to implement random PDF name obfuscation + Example: /Page becomes /P#61ge + """ + + def __init__(self, probability=0.5, characters=1): + """ + class instantiation arguments: + + probability is a number between 0.0 and 1.0. It indicates + the probability a name gets obfuscated. 0.0 means a name will + never be obfuscated, 1.0 means a name will always be obfuscated. + default 0.5 + + characters is the number of characters in the name to obfuscated + by replacing them with the hex-equivalent (#??); default 1 + """ + self.probability = probability + self.characters = characters + + def IsNameCharacter(self, c): + """ + Internal helper function + """ + return c.lower() >= 'a' and c.lower() <= 'z' or c >= '0' and c <= '9' + + def ObfuscateName(self, name): + """ + Internal helper function + """ + if random.random() < self.probability: + if self.characters >= len(name): + population = range(len(name)) + else: + population = random.sample(range(len(name)), self.characters) + for iIndex in population: + name[iIndex] = '#%02X' % ord(name[iIndex]) + return '/' + ''.join(name) + + def Obfuscate(self, str): + """ + Use this method to randomly obfuscate the names found in the + provided string according to the instantiated class parameters. + The return value is the string with obfuscated names. + """ + result = '' + foundName = False + for c in str: + if not foundName and c == '/': + foundName = True + name = [] + elif foundName: + if self.IsNameCharacter(c): + name.append(c) + else: + result += self.ObfuscateName(name) + result += c + foundName = False + name = [] + else: + result += c + if foundName: + result += self.ObfuscateName(name) + return result + +class cFuzzer: + """ + Class to implement a simple fuzzer + """ + + def __init__(self, count=10, minimum=1, maximum=10, character='A'): + """ + class instantiation arguments: + + count is the number of fuzzed sequences (i.e. overwritten bytes) + produced by the fuzzer; default 10 + + minimum is the minimum length of a fuzzed sequence; default 1 + + maximum is the maximum length of a fuzzed sequence; default 10 + + character is the character used to generate the + fuzzed sequences; default 'A' + """ + self.count = count + self.minimum = minimum + self.maximum = maximum + self.character = character + + def Fuzz(self, str): + """ + Use this method to fuzz a string according to the + instantiated class parameters. + The return value is the fuzzed string. + """ + exploded = [c for c in str] + for count in range(self.count): + size = random.randint(self.minimum, self.maximum) + position = random.randint(0, len(str) - size) + for iIter in range(size): + exploded[position + iIter] = self.character + return ''.join(exploded) + +class cDictionary: + """ + Helper class to get and set values in PDF dictionaries + """ + + def __init__(self, string): + self.dictionary = string + + def GetID(self, name): + result = re.findall(r'/' + name + r'\s+(\d+)\s+0\s+[rR]', self.dictionary) + if result == []: + return None + return int(result[0]) + + def GetNumber(self, name): + result = re.findall(r'/' + name + r'\s+(\d+)', self.dictionary) + if result == []: + return None + return int(result[0]) + + def SetNumber(self, name, value): + oMatch = re.search(r'/' + name + r'\s+(\d+)', self.dictionary) + if oMatch == None: + self.Insert(name, str(value)) + else: + self.dictionary = self.dictionary[0:oMatch.start()] + '/' + name + ' ' + str(value) + self.dictionary[oMatch.end():] + + def Insert(self, name, value): + self.dictionary = self.dictionary[0:2] + '/' + name + ' ' + value + self.dictionary[2:] + + def SetReference(self, name, value): + oMatch = re.search(r'/' + name + r'\s+(\d+)\s+(\d+)\s+R', self.dictionary) + if oMatch == None: + oMatch = re.search(r'/' + name + r'\s*\[[^\[\]]+\]', self.dictionary) + if oMatch == None: + self.Insert(name, str(value)) + else: + self.dictionary = self.dictionary[0:oMatch.start()] + '/' + name + ' ' + str(value) + self.dictionary[oMatch.end():] + +class cObjectStream: + """ + Class to create an object stream (/ObjStm) + """ + + def __init__(self, index, version, filters=''): + """ + class instantiation arguments: + + index is the index number of the /ObjStm object. + + version is the version number of the /ObjStm object. Use 0 by convention. + + filters is a string with the encoding filters to be applied (see method stream2) + """ + self.index = index + self.version = version + self.filters = filters + self.indices = '' + self.ios = '' + self.objects = [] + + def indirectobject(self, index, io): + """ + Method to add an indirect object to the object stream. + + index is the index number of the object. + + io is the content of the indirect object. + """ + if self.indices != '': + self.indices += ' ' + self.indices += '%d %d' % (index, len(self.ios)) + self.ios += io + self.objects.append(index) + + def getDictionaryEntries(self): + """ + Internal helper function + """ + return '/Type /ObjStm\n /N %d\n /First %d' % (len(self.objects), len(self.indices)) + + def getStream(self): + """ + Internal helper function + """ + return self.indices + self.ios diff --git a/sift/files/pdf-tools/make-pdf-embedded.py b/sift/files/pdf-tools/make-pdf-embedded.py index 1a080d13..71ff6c4e 100644 --- a/sift/files/pdf-tools/make-pdf-embedded.py +++ b/sift/files/pdf-tools/make-pdf-embedded.py @@ -1,9 +1,9 @@ -#!/usr/bin/python +#!/usr/bin/env python3 __description__ = 'tool to create a PDF document with an embedded file' __author__ = 'Didier Stevens' -__version__ = '0.5.0' -__date__ = '2011/07/01' +__version__ = '0.5.1' +__date__ = '2017/04/23' """ Source code put in public domain by Didier Stevens, no Copyright @@ -19,13 +19,28 @@ 2008/11/09: V0.3, added autostart and button 2009/06/15: V0.4.0: added stego 2011/07/01: V0.5.0: added support for Python 3 - + 2017/04/23: V0.5.1: added option -n + Todo: """ import mPDF import optparse +# CIC: Call If Callable +def CIC(expression): + if callable(expression): + return expression() + else: + return expression + +# IFF: IF Function +def IFF(expression, valueTrue, valueFalse): + if expression: + return CIC(valueTrue) + else: + return CIC(valueFalse) + def ReadBinaryFile(name): """Read a binary file and return the content, return None if error occured """ @@ -93,6 +108,7 @@ def Main(): oParser.add_option('-b', '--button', action='store_true', default=False, help='add a "button" to launch the embedded file') oParser.add_option('-s', '--stego', action='store_true', default=False, help='"hide" the embedded file by replacing /EmbeddedFiles with /Embeddedfiles') oParser.add_option('-m', '--message', default='', help='text to display in the PDF document') + oParser.add_option('-n', '--name', default='', help='filename to use in the PDF objects (by default same as file-to-embed name)') (options, args) = oParser.parse_args() if len(args) != 2: @@ -111,7 +127,7 @@ def Main(): if embeddedFileContent == None: print('Error opening/reading file %s' % embeddedFileName) else: - CreatePDFWithEmbeddedFile(pdfFileName, embeddedFileName, embeddedFileContent, options.filters, options.nobinary, options.autoopen, options.button, options.stego, options.message) + CreatePDFWithEmbeddedFile(pdfFileName, IFF(options.name == '', embeddedFileName, options.name), embeddedFileContent, options.filters, options.nobinary, options.autoopen, options.button, options.stego, options.message) if __name__ == '__main__': Main() diff --git a/sift/files/pdf-tools/make-pdf-helloworld.py b/sift/files/pdf-tools/make-pdf-helloworld.py index c191c1c6..0a1cf11e 100644 --- a/sift/files/pdf-tools/make-pdf-helloworld.py +++ b/sift/files/pdf-tools/make-pdf-helloworld.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 #20080518 #20080519 @@ -8,11 +8,11 @@ import sys if len(sys.argv) != 2: - print "Usage: make-pdf-helloworld pdf-file" - print " " - print " Source code put in the public domain by Didier Stevens, no Copyright" - print " Use at your own risk" - print " https://DidierStevens.com" + print("Usage: make-pdf-helloworld pdf-file") + print(" ") + print(" Source code put in the public domain by Didier Stevens, no Copyright") + print(" Use at your own risk") + print(" https://DidierStevens.com") else: pdffile = sys.argv[1] @@ -24,10 +24,10 @@ oPDF.template1() #oPDF.stream(5, 0, "BT /F1 24 Tf 100 700 Td (Hello World) Tj ET") - oPDF.stream(5, 0, """BT /F1 12 Tf 100 700 Td 15 TL -(Hello World) Tj -(Second Line) ' -(Third Line) ' + oPDF.stream(5, 0, """BT /F1 12 Tf 100 700 Td 15 TL +(Hello World) Tj +(Second Line) ' +(Third Line) ' ET 100 712 100 -100 re S""") diff --git a/sift/files/pdf-tools/make-pdf-javascript.py b/sift/files/pdf-tools/make-pdf-javascript.py index f22c6ecc..1a1a0287 100644 --- a/sift/files/pdf-tools/make-pdf-javascript.py +++ b/sift/files/pdf-tools/make-pdf-javascript.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # V0.1 2008/05/23 # make-pdf-javascript, use it to create a PDF document with embedded JavaScript that will execute automatically when the document is opened @@ -8,7 +8,7 @@ # Use at your own risk # # History: -# +# # 2008/05/29: continue # 2008/11/09: cleanup for release @@ -26,24 +26,21 @@ def Main(): if len(args) != 1: parser.print_help() - print '' - print ' make-pdf-javascript, use it to create a PDF document with embedded JavaScript that will execute automatically when the document is opened' - print ' Source code put in the public domain by Didier Stevens, no Copyright' - print ' Use at your own risk' - print ' https://DidierStevens.com' - + print('') + print(' make-pdf-javascript, use it to create a PDF document with embedded JavaScript that will execute automatically when the document is opened') + print(' Source code put in the public domain by Didier Stevens, no Copyright') + print(' Use at your own risk') + print(' https://DidierStevens.com') + else: oPDF = mPDF.cPDF(args[0]) - oPDF.header() - oPDF.indirectobject(1, 0, '<<\n /Type /Catalog\n /Outlines 2 0 R\n /Pages 3 0 R\n /OpenAction 7 0 R\n>>') oPDF.indirectobject(2, 0, '<<\n /Type /Outlines\n /Count 0\n>>') oPDF.indirectobject(3, 0, '<<\n /Type /Pages\n /Kids [4 0 R]\n /Count 1\n>>') oPDF.indirectobject(4, 0, '<<\n /Type /Page\n /Parent 3 0 R\n /MediaBox [0 0 612 792]\n /Contents 5 0 R\n /Resources <<\n /ProcSet [/PDF /Text]\n /Font << /F1 6 0 R >>\n >>\n>>') oPDF.stream(5, 0, 'BT /F1 12 Tf 100 700 Td 15 TL (JavaScript example) Tj ET') oPDF.indirectobject(6, 0, '<<\n /Type /Font\n /Subtype /Type1\n /Name /F1\n /BaseFont /Helvetica\n /Encoding /MacRomanEncoding\n>>') - if options.javascript == None and options.javascriptfile == None: javascript = """app.alert({cMsg: 'Hello from PDF JavaScript', cTitle: 'Testing PDF JavaScript', nIcon: 3});""" elif options.javascript != None: @@ -52,19 +49,19 @@ def Main(): try: fileJavasScript = open(options.javascriptfile, 'rb') except: - print "error opening file %s" % options.javascriptfile + print("error opening file %s" % options.javascriptfile) return try: javascript = fileJavasScript.read() except: - print "error reading file %s" % options.javascriptfile + print("error reading file %s" % options.javascriptfile) return finally: fileJavasScript.close() - + oPDF.indirectobject(7, 0, '<<\n /Type /Action\n /S /JavaScript\n /JS (%s)\n>>' % javascript) - + oPDF.xrefAndTrailer('1 0 R') if __name__ == '__main__': diff --git a/sift/files/pdf-tools/pdf-parser.py b/sift/files/pdf-tools/pdf-parser.py index 190b6928..b4083288 100644 --- a/sift/files/pdf-tools/pdf-parser.py +++ b/sift/files/pdf-tools/pdf-parser.py @@ -1,11 +1,11 @@ -#!/usr/bin/python +#!/usr/bin/env python3 __description__ = 'pdf-parser, use it to parse a PDF document' __author__ = 'Didier Stevens' -__version__ = '0.6.0' -__date__ = '2015/01/11' +__version__ = '0.7.10' +__date__ = '2024/10/26' __minimum_python_version__ = (2, 5, 1) -__maximum_python_version__ = (3, 4, 2) +__maximum_python_version__ = (3, 11, 1) """ Source code put in public domain by Didier Stevens, no Copyright @@ -49,10 +49,37 @@ 2014/12/09: cleanup, refactoring 2014/12/13: Python 3 fixes 2015/01/11: Added support for multiple YARA rule files; added request to search in trailer + 2015/01/31: V0.6.1 Added optionyarastrings + 2015/02/09: Added decoders + 2015/04/05: V0.6.2 Added generateembedded + 2015/04/06: fixed bug reported by Kurt for stream produced by Ghostscript where endstream is not preceded by whitespace; fixed prettyprint bug + 2015/04/24: V0.6.3 when option dump's filename is -, content is dumped to stdout + 2015/08/12: V0.6.4 option hash now also calculates hashes of streams when selecting or searching objects; and displays hexasciidump first line + 2016/07/27: V0.6.5 bugfix whitespace 0x00 0x0C after stream 0x0D 0x0A reported by @mr_me + 2016/11/20: V0.6.6 added workaround zlib errors FlateDecode + 2016/12/17: V0.6.7 added option -k + 2017/01/07: V0.6.8 changed cPDFParseDictionary to handle strings () with % character + 2017/10/28: fixed bug + 2017/10/29: added # support for option -y + 2018/06/29: V0.6.9 added option --overridingfilters + 2018/10/20: added keywords to statistics + 2019/02/22: V0.7.0 added option -O --objstm to parse the stream of /ObjStm objects, inspired by a contributor wishing anonymity + 2019/03/01: V0.7.1 added ContainsName for correct keyword statistics (-a) + 2019/04/12: V0.7.2 Python 2.6.6 compatibility fix + 2019/07/30: bug fixes (including fixes Josef Hinteregger) + 2019/09/26: V0.7.3 added multiple id selection to option -o; added man page (-m); added environment variable PDFPARSER_OPTIONS; bug fixes + 2019/11/05: V0.7.4 fixed plugin path when compiled with pyinstaller, replaced eval with int + 2021/07/03: V0.7.5 bug fixes; fixed ASCII85Decode Python 3 bug thanks to R Primus + 2021/11/23: V0.7.6 Python 3 bug fixes + 2022/05/24: bug fixes + 2022/11/09: V0.7.7 added support for environment variable DSS_DEFAULT_HASH_ALGORITHMS + 2023/01/03: V0.7.8 added unreferenced objects to statistics + 2024/03/21: V0.7.9 added option jsonoutput; added verbose YARA rules + 2024/10/25: V0.7.10 /ObjStm fix (x9090 PR) + 2024/10/26: added pyzipper support Todo: - handle printf todo - - fix PrettyPrint - support for JS hex string EC61C64349DB8D88AF0523C4C06E0F4D.pdf.vir """ @@ -63,21 +90,28 @@ import binascii import hashlib import sys -import zipfile import time import os +import textwrap +import json if sys.version_info[0] >= 3: from io import StringIO import urllib.request urllib23 = urllib.request + import configparser as ConfigParser else: from cStringIO import StringIO import urllib2 urllib23 = urllib2 + import ConfigParser try: import yara except: pass +try: + import pyzipper as zipfile +except ImportError: + import zipfile CHAR_WHITESPACE = 1 CHAR_DELIMITER = 2 @@ -95,13 +129,67 @@ PDF_ELEMENT_STARTXREF = 5 PDF_ELEMENT_MALFORMED = 6 +dumplinelength = 16 + +def PrintManual(): + manual = ''' +Manual: + +This manual is a work in progress. + +There is a free PDF analysis book: +https://blog.didierstevens.com/2010/09/26/free-malicious-pdf-analysis-e-book/ + +Option -o is used to select objects by id. Provide a single id or multiple ids separated by a comma (,). + +When environment variable PDFPARSER_OPTIONS is defined, the options it defines are added implicitely to the command line arguments. +Use this to define options you want included with each use of pdf-parser.py. +Like option -O, to parse stream objects (/ObjStm). +By defining PDFPARSER_OPTIONS=-O, pdf-parser will always parse stream objects (when found). +PS: this feature is experimental. + +Option -H calculates the MD5 hash by default. +This can be changed by setting environment variable DSS_DEFAULT_HASH_ALGORITHMS. +Like this: set DSS_DEFAULT_HASH_ALGORITHMS=sha256 + +Option --jsonoutput produces JSON output with the stream content of all objects with streams. Options -f and --overridingfilters apply. +For example, if option -f is used, the JSON output contains the filtered streams, otherwise the JSON output contains the unfiltered streams. + +''' + for line in manual.split('\n'): + print(textwrap.fill(line)) + #Convert 2 Bytes If Python 3 def C2BIP3(string): if sys.version_info[0] > 2: - return bytes([ord(x) for x in string]) + if type(string) == bytes: + return string + else: + return bytes([ord(x) for x in string]) else: return string +#Convert 2 String If Python 3 +def C2SIP3(bytes): + if sys.version_info[0] > 2: + return ''.join([chr(byte) for byte in bytes]) + else: + return bytes + +# CIC: Call If Callable +def CIC(expression): + if callable(expression): + return expression() + else: + return expression + +# IFF: IF Function +def IFF(expression, valueTrue, valueFalse): + if expression: + return CIC(valueTrue) + else: + return CIC(valueFalse) + def Timestamp(epoch=None): if epoch == None: localTime = time.localtime() @@ -119,10 +207,18 @@ def CopyWithoutWhiteSpace(content): def Obj2Str(content): return ''.join(map(lambda x: repr(x[1])[1:-1], CopyWithoutWhiteSpace(content))) +def CreateZipFileObject(arg1, arg2): + if 'AESZipFile' in dir(zipfile): + return zipfile.AESZipFile(arg1, arg2) + else: + return zipfile.ZipFile(arg1, arg2) + class cPDFDocument: def __init__(self, file): self.file = file - if file.lower().startswith('http://') or file.lower().startswith('https://'): + if type(file) != str: + self.infile = file + elif file.lower().startswith('http://') or file.lower().startswith('https://'): try: if sys.hexversion >= 0x020601F0: self.infile = urllib23.urlopen(file, timeout=5) @@ -134,7 +230,7 @@ def __init__(self, file): sys.exit() elif file.lower().endswith('.zip'): try: - self.zipfile = zipfile.ZipFile(file, 'r') + self.zipfile = CreateZipFileObject(file, 'r') self.infile = self.zipfile.open(self.zipfile.infolist()[0], 'r', C2BIP3('infected')) except: print('Error opening file %s' % file) @@ -251,16 +347,25 @@ def TokenIgnoreWhiteSpace(self): token = self.Token() return token + def Tokens(self): + tokens = [] + token = self.Token() + while token != None: + tokens.append(token) + token = self.Token() + return tokens + def unget(self, byte): self.ungetted.append(byte) class cPDFParser: - def __init__(self, file, verbose=False, extract=None): + def __init__(self, file, verbose=False, extract=None, objstm=None): self.context = CONTEXT_NONE self.content = [] self.oPDFTokenizer = cPDFTokenizer(file) self.verbose = verbose self.extract = extract + self.objstm = objstm def GetObject(self): while True: @@ -300,7 +405,7 @@ def GetObject(self): else: if self.context == CONTEXT_OBJ: if self.token[1] == 'endobj': - self.oPDFElementIndirectObject = cPDFElementIndirectObject(self.objectId, self.objectVersion, self.content) + self.oPDFElementIndirectObject = cPDFElementIndirectObject(self.objectId, self.objectVersion, self.content, self.objstm) self.context = CONTEXT_NONE self.content = [] return self.oPDFElementIndirectObject @@ -330,8 +435,8 @@ def GetObject(self): if IsNumeric(self.token2[1]): self.token3 = self.oPDFTokenizer.TokenIgnoreWhiteSpace() if self.token3[1] == 'obj': - self.objectId = eval(self.token[1]) - self.objectVersion = eval(self.token2[1]) + self.objectId = int(self.token[1], 10) + self.objectVersion = int(self.token2[1], 10) self.context = CONTEXT_OBJ else: self.oPDFTokenizer.unget(self.token3) @@ -351,7 +456,7 @@ def GetObject(self): elif self.token[1] == 'startxref': self.token2 = self.oPDFTokenizer.TokenIgnoreWhiteSpace() if self.token2 and IsNumeric(self.token2[1]): - return cPDFElementStartxref(eval(self.token2[1])) + return cPDFElementStartxref(int(self.token2[1], 10)) else: self.oPDFTokenizer.unget(self.token2) if self.verbose: @@ -402,11 +507,31 @@ def IIf(expr, truepart, falsepart): return falsepart class cPDFElementIndirectObject: - def __init__(self, id, version, content): + def __init__(self, id, version, content, objstm=None): self.type = PDF_ELEMENT_INDIRECT_OBJECT self.id = id self.version = version self.content = content + self.objstm = objstm + #fix stream for Ghostscript bug reported by Kurt + if self.ContainsStream(): + position = len(self.content) - 1 + if position < 0: + return + while self.content[position][0] == CHAR_WHITESPACE and position >= 0: + position -= 1 + if position < 0: + return + if self.content[position][1].endswith('endstream\n'): + self.content = self.content[0:position] + [(self.content[position][0], self.content[position][1][:-len('endstream\n')])] + [(CHAR_REGULAR, 'endstream')] + self.content[position+1:] + return + if self.content[position][0] != CHAR_REGULAR: + return + if self.content[position][1] == 'endstream': + return + if not self.content[position][1].endswith('endstream'): + return + self.content = self.content[0:position] + [(self.content[position][0], self.content[position][1][:-len('endstream')])] + [(self.content[position][0], 'endstream')] + self.content[position+1:] def GetType(self): content = CopyWithoutWhiteSpace(self.content) @@ -449,12 +574,22 @@ def Contains(self, keyword): data += Canonicalize(self.content[i][1]) return data.upper().find(keyword.upper()) != -1 - def StreamContains(self, keyword, filter, casesensitive, regex): + def ContainsName(self, keyword): + for token in self.content: + if token[1] == 'stream': + return False + if token[0] == CHAR_DELIMITER and EqualCanonical(token[1], keyword): + return True + return False + + def StreamContains(self, keyword, filter, casesensitive, regex, overridingfilters): if not self.ContainsStream(): return False - streamData = self.Stream(filter) + streamData = self.Stream(filter, overridingfilters) if filter and streamData == 'No filters': - streamData = self.Stream(False) + streamData = self.Stream(False, overridingfilters) + if isinstance(streamData, bytes): + keyword = keyword.encode() if regex: return re.search(keyword, streamData, IIf(casesensitive, 0, re.I)) elif casesensitive: @@ -462,7 +597,7 @@ def StreamContains(self, keyword, filter, casesensitive, regex): else: return keyword.lower() in streamData.lower() - def Stream(self, filter=True): + def Stream(self, filter=True, overridingfilters=''): state = 'start' countDirectories = 0 data = '' @@ -492,13 +627,24 @@ def Stream(self, filter=True): if self.content[i][0] == CHAR_REGULAR and self.content[i][1] == 'stream': state = 'stream-whitespace' elif state == 'stream-whitespace': - if self.content[i][0] != CHAR_WHITESPACE: + if self.content[i][0] == CHAR_WHITESPACE: + whitespace = self.content[i][1] + if whitespace.startswith('\x0D\x0A') and len(whitespace) > 2: + data += whitespace[2:] + elif whitespace.startswith('\x0A') and len(whitespace) > 1: + data += whitespace[1:] + else: data += self.content[i][1] state = 'stream-concat' elif state == 'stream-concat': if self.content[i][0] == CHAR_REGULAR and self.content[i][1] == 'endstream': if filter: - return self.Decompress(data, filters) + if overridingfilters == '': + return self.Decompress(data, filters) + elif overridingfilters == 'raw': + return data + else: + return self.Decompress(data, overridingfilters.split(' ')) else: return data else: @@ -546,19 +692,30 @@ def Decompress(self, data, filters): else: return data - def StreamYARAMatch(self, rules, filter): + def StreamYARAMatch(self, rules, decoders, decoderoptions, filter, overridingfilters): if not self.ContainsStream(): return None - streamData = self.Stream(filter) + streamData = self.Stream(filter, overridingfilters) if filter and streamData == 'No filters': - streamData = self.Stream(False) - return rules.match(data=streamData) -# return rules.match(data=streamData, callback=mycallback) - -def mycallback(data): - print(data['rule']) - yara.CALLBACK_CONTINUE - + streamData = self.Stream(False, overridingfilters) + + oDecoders = [cIdentity(streamData, None)] + for cDecoder in decoders: + try: + oDecoder = cDecoder(streamData, decoderoptions) + oDecoders.append(oDecoder) + except Exception as e: + print('Error instantiating decoder: %s' % cDecoder.name) + raise e + results = [] + for oDecoder in oDecoders: + while oDecoder.Available(): + yaraResults = rules.match(data=oDecoder.Decode()) + if yaraResults != []: + results.append([oDecoder.Name(), yaraResults]) + + return results + class cPDFElementStartxref: def __init__(self, index): self.type = PDF_ELEMENT_STARTXREF @@ -586,7 +743,7 @@ def __init__(self, content, nocanonicalizedoutput): dataTrimmed = TrimLWhiteSpace(TrimRWhiteSpace(self.content)) if dataTrimmed == []: self.parsed = None - elif self.isOpenDictionary(dataTrimmed[0]) and self.isCloseDictionary(dataTrimmed[-1]): + elif self.isOpenDictionary(dataTrimmed[0]) and (self.isCloseDictionary(dataTrimmed[-1]) or self.couldBeCloseDictionary(dataTrimmed[-1])): self.parsed = self.ParseDictionary(dataTrimmed)[0] else: self.parsed = None @@ -597,6 +754,9 @@ def isOpenDictionary(self, token): def isCloseDictionary(self, token): return token[0] == CHAR_DELIMITER and token[1] == '>>' + def couldBeCloseDictionary(self, token): + return token[0] == CHAR_DELIMITER and token[1].rstrip().endswith('>>') + def ParseDictionary(self, tokens): state = 0 # start dictionary = [] @@ -634,6 +794,28 @@ def ParseDictionary(self, tokens): dictionary.append((key, value)) value = [] state = 1 + elif value == [] and tokens[0][1] == '(': + value.append(tokens[0][1]) + elif value != [] and value[0] == '(' and tokens[0][1] != ')': + if tokens[0][1][0] == '%': + tokens = [tokens[0]] + cPDFTokenizer(StringIO(tokens[0][1][1:])).Tokens() + tokens[1:] + value.append('%') + else: + value.append(tokens[0][1]) + elif value != [] and value[0] == '(' and tokens[0][1] == ')': + value.append(tokens[0][1]) + balanced = 0 + for item in value: + if item == '(': + balanced += 1 + elif item == ')': + balanced -= 1 + if balanced < 0 and self.verbose: + print('todo 11: ' + repr(value)) + if balanced < 1: + dictionary.append((key, value)) + value = [] + state = 1 elif value != [] and tokens[0][1][0] == '/': dictionary.append((key, value)) key = ConditionalCanonicalize(tokens[0][1], self.nocanonicalizedoutput) @@ -642,25 +824,33 @@ def ParseDictionary(self, tokens): else: value.append(ConditionalCanonicalize(tokens[0][1], self.nocanonicalizedoutput)) tokens = tokens[1:] + return None, tokens def Retrieve(self): return self.parsed + def PrettyPrintSubElement(self, prefix, e): + if e[1] == []: + print('%s %s' % (prefix, e[0])) + elif type(e[1][0]) == type(''): + if len(e[1]) == 3 and IsNumeric(e[1][0]) and e[1][1] == '0' and e[1][2] == 'R': + joiner = ' ' + else: + joiner = '' + value = joiner.join(e[1]).strip() + reprValue = repr(value) + if "'" + value + "'" != reprValue: + value = reprValue + print('%s %s %s' % (prefix, e[0], value)) + else: + print('%s %s' % (prefix, e[0])) + self.PrettyPrintSub(prefix + ' ', e[1]) + def PrettyPrintSub(self, prefix, dictionary): if dictionary != None: print('%s<<' % prefix) for e in dictionary: - if e[1] == []: - print('%s %s' % (prefix, e[0])) - elif type(e[1][0]) == type(''): - value = ''.join(e[1]).strip() - reprValue = repr(value) - if "'" + value + "'" != reprValue: - value = reprValue - print('%s %s %s' % (prefix, e[0], value)) - else: - print('%s %s' % (prefix, e[0])) - self.PrettyPrintSub(prefix + ' ', e[1]) + self.PrettyPrintSubElement(prefix, e) print('%s>>' % prefix) def PrettyPrint(self, prefix): @@ -672,17 +862,60 @@ def Get(self, select): return value return None + def GetNestedSub(self, dictionary, select): + for key, value in dictionary: + if key == select: + return self.PrettyPrintSubElement('', [select, value]) + if type(value) == type([]) and len(value) > 0 and type(value[0]) == type((None,)): + result = self.GetNestedSub(value, select) + if result !=None: + return self.PrettyPrintSubElement('', [select, result]) + return None + + def GetNested(self, select): + return self.GetNestedSub(self.parsed, select) + def FormatOutput(data, raw): if raw: if type(data) == type([]): return ''.join(map(lambda x: x[1], data)) else: return data + elif sys.version_info[0] > 2: + return ascii(data) else: return repr(data) +#Fix for http://bugs.python.org/issue11395 +def StdoutWriteChunked(data): + if sys.version_info[0] > 2: + sys.stdout.buffer.write(data) + else: + while data != '': + sys.stdout.write(data[0:10000]) + try: + sys.stdout.flush() + except IOError: + return + data = data[10000:] + +def IfWIN32SetBinary(io): + if sys.platform == 'win32': + import msvcrt + msvcrt.setmode(io.fileno(), os.O_BINARY) + def PrintOutputObject(object, options): + if options.dump == '-': + filtered = object.Stream(options.filter == True, options.overridingfilters) + if filtered == []: + filtered = '' + IfWIN32SetBinary(sys.stdout) + StdoutWriteChunked(filtered) + return + print('obj %d %d' % (object.id, object.version)) + if object.objstm != None: + print(' Containing /ObjStm: %d %d' % object.objstm) print(' Type: %s' % ConditionalCanonicalize(object.GetType(), options.nocanonicalizedoutput)) print(' Referencing: %s' % ', '.join(map(lambda x: '%s %s %s' % x, object.GetReferences()))) dataPrecedingStream = object.ContainsStream() @@ -692,6 +925,16 @@ def PrintOutputObject(object, options): if options.debug: print(' %s' % FormatOutput(dataPrecedingStream, options.raw)) oPDFParseDictionary = cPDFParseDictionary(dataPrecedingStream, options.nocanonicalizedoutput) + if options.hash: + streamContent = object.Stream(False, options.overridingfilters) + print(' unfiltered') + print(' len: %6d md5: %s' % (len(streamContent), hashlib.md5(streamContent).hexdigest())) + print(' %s' % HexAsciiDumpLine(streamContent)) + streamContent = object.Stream(True, options.overridingfilters) + print(' filtered') + print(' len: %6d md5: %s' % (len(streamContent), hashlib.md5(streamContent).hexdigest())) + print(' %s' % HexAsciiDumpLine(streamContent)) + streamContent = None else: if options.debug or options.raw: print(' %s' % FormatOutput(object.content, options.raw)) @@ -700,14 +943,14 @@ def PrintOutputObject(object, options): oPDFParseDictionary.PrettyPrint(' ') print('') if options.filter and not options.dump: - filtered = object.Stream() + filtered = object.Stream(overridingfilters=options.overridingfilters) if filtered == []: print(' %s' % FormatOutput(object.content, options.raw)) else: print(' %s' % FormatOutput(filtered, options.raw)) if options.content: if object.ContainsStream(): - stream = object.Stream(False) + stream = object.Stream(False, options.overridingfilters) if stream != []: print(' %s' % FormatOutput(stream, options.raw)) else: @@ -715,7 +958,7 @@ def PrintOutputObject(object, options): if options.dump: - filtered = object.Stream(options.filter == True) + filtered = object.Stream(options.filter == True, options.overridingfilters) if filtered == []: filtered = '' try: @@ -766,7 +1009,7 @@ def ConditionalCanonicalize(sIn, nocanonicalizedoutput): def ASCII85Decode(data): import struct n = b = 0 - out = '' + out = b'' for c in data: if '!' <= c and c <= 'u': n += 1 @@ -776,7 +1019,7 @@ def ASCII85Decode(data): n = b = 0 elif c == 'z': assert n == 0 - out += '\0\0\0\0' + out += b'\0\0\0\0' elif c == '~': if n: for _ in range(5-n): @@ -788,8 +1031,26 @@ def ASCII85Decode(data): def ASCIIHexDecode(data): return binascii.unhexlify(''.join([c for c in data if c not in ' \t\n\r']).rstrip('>')) +# if inflating fails, we try to inflate byte per byte (sample 4da299d6e52bbb79c0ac00bad6a1d51d4d5fe42965a8d94e88a359e5277117e2) def FlateDecode(data): - return zlib.decompress(C2BIP3(data)) + try: + return zlib.decompress(C2BIP3(data)) + except: + if len(data) <= 10: + raise + oDecompress = zlib.decompressobj() + oStringIO = StringIO() + count = 0 + for byte in C2BIP3(data): + try: + oStringIO.write(oDecompress.decompress(byte)) + count += 1 + except: + break + if len(data) - count <= 2: + return oStringIO.getvalue() + else: + raise def RunLengthDecode(data): f = StringIO(data) @@ -892,13 +1153,17 @@ def run(self): def LZWDecode(data): return ''.join(LZWDecoder(StringIO(data)).run()) -def PrintGenerateObject(object, options): +def PrintGenerateObject(object, options, newId=None): + if newId == None: + objectId = object.id + else: + objectId = newId dataPrecedingStream = object.ContainsStream() if dataPrecedingStream: if options.filter: - decompressed = object.Stream(True) + decompressed = object.Stream(True, options.overridingfilters) if decompressed == 'No filters' or decompressed.startswith('Unsupported filter: '): - print(' oPDF.stream(%d, %d, %s, %s)' % (object.id, object.version, repr(object.Stream(False).rstrip()), repr(re.sub('/Length\s+\d+', '/Length %d', FormatOutput(dataPrecedingStream, True)).strip()))) + print(' oPDF.stream(%d, %d, %s, %s)' % (objectId, object.version, repr(object.Stream(False, options.overridingfilters).rstrip()), repr(re.sub('/Length\s+\d+', '/Length %d', FormatOutput(dataPrecedingStream, True)).strip()))) else: dictionary = FormatOutput(dataPrecedingStream, True) dictionary = re.sub(r'/Length\s+\d+', '', dictionary) @@ -907,11 +1172,11 @@ def PrintGenerateObject(object, options): dictionary = re.sub(r'^\s*<<', '', dictionary) dictionary = re.sub(r'>>\s*$', '', dictionary) dictionary = dictionary.strip() - print(" oPDF.stream2(%d, %d, %s, %s, 'f')" % (object.id, object.version, repr(decompressed.rstrip()), repr(dictionary))) + print(" oPDF.stream2(%d, %d, %s, %s, 'f')" % (objectId, object.version, repr(decompressed.rstrip()), repr(dictionary))) else: - print(' oPDF.stream(%d, %d, %s, %s)' % (object.id, object.version, repr(object.Stream(False).rstrip()), repr(re.sub('/Length\s+\d+', '/Length %d', FormatOutput(dataPrecedingStream, True)).strip()))) + print(' oPDF.stream(%d, %d, %s, %s)' % (objectId, object.version, repr(object.Stream(False, options.overridingfilters).rstrip()), repr(re.sub('/Length\s+\d+', '/Length %d', FormatOutput(dataPrecedingStream, True)).strip()))) else: - print(' oPDF.indirectobject(%d, %d, %s)' % (object.id, object.version, repr(FormatOutput(object.content, True).strip()))) + print(' oPDF.indirectobject(%d, %d, %s)' % (objectId, object.version, repr(FormatOutput(object.content, True).strip()))) def PrintObject(object, options): if options.generate: @@ -941,31 +1206,243 @@ def ProcessAt(argument): else: return [argument] -def YARACompile(fileordirname): - dFilepaths = {} - if os.path.isdir(fileordirname): - for root, dirs, files in os.walk(fileordirname): - for file in files: - filename = os.path.join(root, file) +def YARACompile(ruledata): + if ruledata.startswith('#'): + if ruledata.startswith('#h#'): + rule = binascii.a2b_hex(ruledata[3:]) + elif ruledata.startswith('#b#'): + rule = binascii.a2b_base64(ruledata[3:]) + elif ruledata.startswith('#s#'): + rule = 'rule string {strings: $a = "%s" ascii wide nocase condition: $a}' % ruledata[3:] + elif ruledata.startswith('#q#'): + rule = ruledata[3:].replace("'", '"') + elif ruledata.startswith('#x#'): + rule = 'rule hexadecimal {strings: $a = { %s } condition: $a}' % ruledata[3:] + elif ruledata.startswith('#r#'): + rule = 'rule regex {strings: $a = /%s/ ascii wide nocase condition: $a}' % ruledata[3:] + else: + rule = ruledata[1:] + return yara.compile(source=rule), rule + else: + dFilepaths = {} + if os.path.isdir(ruledata): + for root, dirs, files in os.walk(ruledata): + for file in files: + filename = os.path.join(root, file) + dFilepaths[filename] = filename + else: + for filename in ProcessAt(ruledata): dFilepaths[filename] = filename + return yara.compile(filepaths=dFilepaths), ','.join(dFilepaths.values()) + +def AddDecoder(cClass): + global decoders + + decoders.append(cClass) + +class cDecoderParent(): + pass + +def GetScriptPath(): + if getattr(sys, 'frozen', False): + return os.path.dirname(sys.executable) + else: + return os.path.dirname(sys.argv[0]) + +def LoadDecoders(decoders, verbose): + if decoders == '': + return + scriptPath = GetScriptPath() + for decoder in sum(map(ProcessAt, decoders.split(',')), []): + try: + if not decoder.lower().endswith('.py'): + decoder += '.py' + if os.path.dirname(decoder) == '': + if not os.path.exists(decoder): + scriptDecoder = os.path.join(scriptPath, decoder) + if os.path.exists(scriptDecoder): + decoder = scriptDecoder + exec(open(decoder, 'r').read(), globals(), globals()) + except Exception as e: + print('Error loading decoder: %s' % decoder) + if verbose: + raise e + +class cIdentity(cDecoderParent): + name = 'Identity function decoder' + + def __init__(self, stream, options): + self.stream = stream + self.options = options + self.available = True + + def Available(self): + return self.available + + def Decode(self): + self.available = False + return self.stream + + def Name(self): + return '' + +def DecodeFunction(decoders, options, stream): + if decoders == []: + return stream + return decoders[0](stream, options.decoderoptions).Decode() + +class cDumpStream(): + def __init__(self): + self.text = '' + + def Addline(self, line): + if line != '': + self.text += line + '\n' + + def Content(self): + return self.text + +def HexDump(data): + oDumpStream = cDumpStream() + hexDump = '' + for i, b in enumerate(data): + if i % dumplinelength == 0 and hexDump != '': + oDumpStream.Addline(hexDump) + hexDump = '' + hexDump += IFF(hexDump == '', '', ' ') + '%02X' % ord(b) + oDumpStream.Addline(hexDump) + return oDumpStream.Content() + +def CombineHexAscii(hexDump, asciiDump): + if hexDump == '': + return '' + return hexDump + ' ' + (' ' * (3 * (dumplinelength - len(asciiDump)))) + asciiDump + +def HexAsciiDump(data): + oDumpStream = cDumpStream() + hexDump = '' + asciiDump = '' + for i, b in enumerate(data): + if i % dumplinelength == 0: + if hexDump != '': + oDumpStream.Addline(CombineHexAscii(hexDump, asciiDump)) + hexDump = '%08X:' % i + asciiDump = '' + hexDump+= ' %02X' % ord(b) + asciiDump += IFF(ord(b) >= 32, b, '.') + oDumpStream.Addline(CombineHexAscii(hexDump, asciiDump)) + return oDumpStream.Content() + +def HexAsciiDumpLine(data): + return HexAsciiDump(data[0:16])[10:-1] + +def ParseINIFile(): + oConfigParser = ConfigParser.ConfigParser(allow_no_value=True) + oConfigParser.optionxform = str + oConfigParser.read(os.path.join(GetScriptPath(), 'pdfid.ini')) + keywords = [] + if oConfigParser.has_section('keywords'): + for key, value in oConfigParser.items('keywords'): + if not key in keywords: + keywords.append(key) + return keywords + +def MatchObjectID(id, selection): + return str(id) in selection.split(',') + +def GetArguments(): + arguments = sys.argv[1:] + envvar = os.getenv('PDFPARSER_OPTIONS') + if envvar == None: + return arguments + return envvar.split(' ') + arguments + +class cHashCRC32(): + def __init__(self): + self.crc32 = None + + def update(self, data): + self.crc32 = zlib.crc32(data) + + def hexdigest(self): + return '%08x' % (self.crc32 & 0xffffffff) + +class cHashChecksum8(): + def __init__(self): + self.sum = 0 + + def update(self, data): + if sys.version_info[0] >= 3: + self.sum += sum(data) + else: + self.sum += sum(map(ord, data)) + + def hexdigest(self): + return '%08x' % (self.sum) + +dSpecialHashes = {'crc32': cHashCRC32, 'checksum8': cHashChecksum8} + +def GetHashObjects(algorithms): + global dSpecialHashes + + dHashes = {} + + if algorithms == '': + algorithms = os.getenv('DSS_DEFAULT_HASH_ALGORITHMS', 'md5') + if ',' in algorithms: + hashes = algorithms.split(',') else: - for filename in ProcessAt(fileordirname): - dFilepaths[filename] = filename - return yara.compile(filepaths=dFilepaths) + hashes = algorithms.split(';') + for name in hashes: + if not name in dSpecialHashes.keys() and not name in hashlib.algorithms_available: + print('Error: unknown hash algorithm: %s' % name) + print('Available hash algorithms: ' + ' '.join([name for name in list(hashlib.algorithms_available)] + list(dSpecialHashes.keys()))) + return [], {} + elif name in dSpecialHashes.keys(): + dHashes[name] = dSpecialHashes[name]() + else: + dHashes[name] = hashlib.new(name) + + return hashes, dHashes + +def CalculateChosenHash(data): + hashes, dHashes = GetHashObjects('') + dHashes[hashes[0]].update(data) + return dHashes[hashes[0]].hexdigest(), hashes[0] + +class cMyJSONOutput(): + + def __init__(self): + self.items = [] + self.counter = 1 + + def AddIdItem(self, id, name, data): + self.items.append({'id': id, 'name': name, 'content': binascii.b2a_base64(data).strip(b'\n').decode()}) + + def AddItem(self, name, data): + self.AddIdItem(self.counter, name, data) + self.counter += 1 + + def GetJSON(self): + return json.dumps({'version': 2, 'id': 'didierstevens.com', 'type': 'content', 'fields': ['id', 'name', 'content'], 'items': self.items}) def Main(): """pdf-parser, use it to parse a PDF document """ + global decoders + oParser = optparse.OptionParser(usage='usage: %prog [options] pdf-file|zip-file|url\n' + __description__, version='%prog ' + __version__) + oParser.add_option('-m', '--man', action='store_true', default=False, help='Print manual') oParser.add_option('-s', '--search', help='string to search in indirect objects (except streams)') oParser.add_option('-f', '--filter', action='store_true', default=False, help='pass stream object through filters (FlateDecode, ASCIIHexDecode, ASCII85Decode, LZWDecode and RunLengthDecode only)') - oParser.add_option('-o', '--object', help='id of indirect object to select (version independent)') + oParser.add_option('-o', '--object', help='id(s) of indirect object(s) to select, use comma (,) to separate ids (version independent)') oParser.add_option('-r', '--reference', help='id of indirect object being referenced (version independent)') oParser.add_option('-e', '--elements', help='type of elements to select (cxtsi)') oParser.add_option('-w', '--raw', action='store_true', default=False, help='raw output for data and filters') oParser.add_option('-a', '--stats', action='store_true', default=False, help='display stats for pdf document') oParser.add_option('-t', '--type', help='type of indirect object to select') + oParser.add_option('-O', '--objstm', action='store_true', default=False, help='parse stream of /ObjStm objects') oParser.add_option('-v', '--verbose', action='store_true', default=False, help='display malformed PDF elements') oParser.add_option('-x', '--extract', help='filename to extract malformed content to') oParser.add_option('-H', '--hash', action='store_true', default=False, help='display hash of objects') @@ -977,9 +1454,21 @@ def Main(): oParser.add_option('--unfiltered', action='store_true', default=False, help='search in unfiltered streams') oParser.add_option('--casesensitive', action='store_true', default=False, help='case sensitive search in streams') oParser.add_option('--regex', action='store_true', default=False, help='use regex to search in streams') + oParser.add_option('--overridingfilters', type=str, default='', help='override filters with given filters (use raw for the raw stream content)') oParser.add_option('-g', '--generate', action='store_true', default=False, help='generate a Python program that creates the parsed PDF file') + oParser.add_option('--generateembedded', type=int, default=0, help='generate a Python program that embeds the selected indirect object as a file') oParser.add_option('-y', '--yara', help='YARA rule (or directory or @file) to check streams (can be used with option --unfiltered)') - (options, args) = oParser.parse_args() + oParser.add_option('--yarastrings', action='store_true', default=False, help='Print YARA strings') + oParser.add_option('--decoders', type=str, default='', help='decoders to load (separate decoders with a comma , ; @file supported)') + oParser.add_option('--decoderoptions', type=str, default='', help='options for the decoder') + oParser.add_option('-k', '--key', help='key to search in dictionaries') + oParser.add_option('-j', '--jsonoutput', action='store_true', default=False, help='produce json output') + (options, args) = oParser.parse_args(GetArguments()) + + if options.man: + oParser.print_help() + PrintManual() + return 0 if len(args) != 1: oParser.print_help() @@ -990,6 +1479,9 @@ def Main(): print(' https://DidierStevens.com') else: + decoders = [] + LoadDecoders(options.decoders, True) + oPDFParser = cPDFParser(args[0], options.verbose, options.extract) cntComment = 0 cntXref = 0 @@ -997,6 +1489,19 @@ def Main(): cntStartXref = 0 cntIndirectObject = 0 dicObjectTypes = {} + objectsAll = set() + objectsReferenced = set() + objectsWithStream = [] + keywords = ['/JS', '/JavaScript', '/AA', '/OpenAction', '/AcroForm', '/RichMedia', '/Launch', '/EmbeddedFile', '/XFA', '/URI'] + for extrakeyword in ParseINIFile(): + if not extrakeyword in keywords: + keywords.append(extrakeyword) + +# dKeywords = {keyword: [] for keyword in keywords} +# Done for compatibility with 2.6.6 + dKeywords = {} + for keyword in keywords: + dKeywords[keyword] = [] selectComment = False selectXref = False @@ -1020,12 +1525,12 @@ def Main(): return else: selectIndirectObject = True - if not options.search and not options.object and not options.reference and not options.type and not options.searchstream: + if not options.search and not options.object and not options.reference and not options.type and not options.searchstream and not options.key: selectComment = True selectXref = True selectTrailer = True selectStartXref = True - if options.search: + if options.search or options.key or options.reference: selectTrailer = True if options.type == '-': @@ -1033,7 +1538,7 @@ def Main(): else: optionsType = options.type - if options.generate: + if options.generate or options.generateembedded != 0: savedRoot = ['1', '0', 'R'] print('#!/usr/bin/python') print('') @@ -1057,14 +1562,55 @@ def Main(): print(' return') print(' oPDF = mPDF.cPDF(sys.argv[1])') + if options.generateembedded != 0: + print(" oPDF.header('1.1')") + print(r" oPDF.comment('\xd0\xd0\xd0\xd0')") + print(r" oPDF.indirectobject(1, 0, '<<\r\n /Type /Catalog\r\n /Outlines 2 0 R\r\n /Pages 3 0 R\r\n /Names << /EmbeddedFiles << /Names [(test.bin) 7 0 R] >> >>\r\n>>')") + print(r" oPDF.indirectobject(2, 0, '<<\r\n /Type /Outlines\r\n /Count 0\r\n>>')") + print(r" oPDF.indirectobject(3, 0, '<<\r\n /Type /Pages\r\n /Kids [4 0 R]\r\n /Count 1\r\n>>')") + print(r" oPDF.indirectobject(4, 0, '<<\r\n /Type /Page\r\n /Parent 3 0 R\r\n /MediaBox [0 0 612 792]\r\n /Contents 5 0 R\r\n /Resources <<\r\n /ProcSet [/PDF /Text]\r\n /Font << /F1 6 0 R >>\r\n >>\r\n>>')") + print(r" oPDF.stream(5, 0, 'BT /F1 12 Tf 70 700 Td 15 TL (This PDF document embeds file test.bin) Tj ET', '<< /Length %d >>')") + print(r" oPDF.indirectobject(6, 0, '<<\r\n /Type /Font\r\n /Subtype /Type1\r\n /Name /F1\r\n /BaseFont /Helvetica\r\n /Encoding /MacRomanEncoding\r\n>>')") + print(r" oPDF.indirectobject(7, 0, '<<\r\n /Type /Filespec\r\n /F (test.bin)\r\n /EF << /F 8 0 R >>\r\n>>')") + if options.yara != None: if not 'yara' in sys.modules: print('Error: option yara requires the YARA Python module.') return - rules = YARACompile(options.yara) + rules, rulesVerbose = YARACompile(options.yara) + if options.verbose: + print(rulesVerbose) + oPDFParserOBJSTM = None + oMyJSONOutput = cMyJSONOutput() while True: - object = oPDFParser.GetObject() + if oPDFParserOBJSTM == None: + object = oPDFParser.GetObject() + else: + object = oPDFParserOBJSTM.GetObject() + if object == None: + oPDFParserOBJSTM = None + object = oPDFParser.GetObject() + if options.objstm and hasattr(object, 'GetType') and EqualCanonical(object.GetType(), '/ObjStm') and object.ContainsStream(): + # parsing objects inside an /ObjStm object by extracting & parsing the stream content to create a synthesized PDF document, that is then parsed by cPDFParser + oPDFParseDictionary = cPDFParseDictionary(object.ContainsStream(), options.nocanonicalizedoutput) + numberOfObjects = int(oPDFParseDictionary.Get('/N')[0]) + offsetFirstObject = int(oPDFParseDictionary.Get('/First')[0]) + indexes = list(map(int, C2SIP3(object.Stream())[:offsetFirstObject].strip().replace('\n', ' ').split(' '))) + if len(indexes) % 2 != 0 or len(indexes) / 2 != numberOfObjects: + raise Exception('Error in index of /ObjStm stream') + streamObject = C2SIP3(object.Stream()[offsetFirstObject:]) + synthesizedPDF = '' + while len(indexes) > 0: + objectNumber = indexes[0] + offset = indexes[1] + indexes = indexes[2:] + if len(indexes) >= 2: + offsetNextObject = indexes[1] + else: + offsetNextObject = len(streamObject) + synthesizedPDF += '%d 0 obj\n%s\nendobj\n' % (objectNumber, streamObject[offset:offsetNextObject]) + oPDFParserOBJSTM = cPDFParser(StringIO(synthesizedPDF), options.verbose, options.extract, (object.id, object.version)) if object != None: if options.stats: if object.type == PDF_ELEMENT_COMMENT: @@ -1073,6 +1619,10 @@ def Main(): cntXref += 1 elif object.type == PDF_ELEMENT_TRAILER: cntTrailer += 1 + oPDFParseDictionary = cPDFParseDictionary(object.content[1:], options.nocanonicalizedoutput) + for keyTrailer, valueTrailer in oPDFParseDictionary.parsed: + if len(valueTrailer) == 3 and valueTrailer[2] == 'R' and IsNumeric(valueTrailer[0]) and IsNumeric(valueTrailer[1]): + objectsReferenced.add(tuple(valueTrailer)) elif object.type == PDF_ELEMENT_STARTXREF: cntStartXref += 1 elif object.type == PDF_ELEMENT_INDIRECT_OBJECT: @@ -1082,6 +1632,21 @@ def Main(): dicObjectTypes[type1] = [object.id] else: dicObjectTypes[type1].append(object.id) + for keyword in dKeywords.keys(): + if object.ContainsName(keyword): + dKeywords[keyword].append(object.id) + if object.ContainsStream(): + objectsWithStream.append(object.id) + for reference in object.GetReferences(): + objectsReferenced.add(reference) + objectsAll.add((str(object.id), str(object.version), 'R')) + elif options.jsonoutput: + if object.type == PDF_ELEMENT_INDIRECT_OBJECT: + if object.ContainsStream(): + filtered = object.Stream(options.filter == True, options.overridingfilters) + if filtered == []: + filtered = '' + oMyJSONOutput.AddItem('obj %s %s' % (object.id, object.version), C2BIP3(filtered)) else: if object.type == PDF_ELEMENT_COMMENT and selectComment: if options.generate: @@ -1090,11 +1655,11 @@ def Main(): print(" oPDF.header('%s')" % comment[4:]) elif comment != '%EOF': print(' oPDF.comment(%s)' % repr(comment)) - elif options.yara == None: + elif options.yara == None and options.generateembedded == 0: print('PDF Comment %s' % FormatOutput(object.comment, options.raw)) print('') elif object.type == PDF_ELEMENT_XREF and selectXref: - if not options.generate and options.yara == None: + if not options.generate and options.yara == None and options.generateembedded == 0: if options.debug: print('xref %s' % FormatOutput(object.content, options.raw)) else: @@ -1106,24 +1671,43 @@ def Main(): result = oPDFParseDictionary.Get('/Root') if result != None: savedRoot = result - elif options.yara == None: - if not options.search or options.search and object.Contains(options.search): + elif options.yara == None and options.generateembedded == 0: + if not options.search and not options.key and not options.reference or options.search and object.Contains(options.search): if oPDFParseDictionary == None: print('trailer %s' % FormatOutput(object.content, options.raw)) else: print('trailer') oPDFParseDictionary.PrettyPrint(' ') print('') + elif options.key: + if oPDFParseDictionary.parsed != None: + result = oPDFParseDictionary.GetNested(options.key) + if result != None: + print(result) + elif options.reference: + for key, value in oPDFParseDictionary.Retrieve(): + if value == [str(options.reference), '0', 'R']: + print('trailer') + oPDFParseDictionary.PrettyPrint(' ') elif object.type == PDF_ELEMENT_STARTXREF and selectStartXref: - if not options.generate and options.yara == None: + if not options.generate and options.yara == None and options.generateembedded == 0: print('startxref %d' % object.index) print('') elif object.type == PDF_ELEMENT_INDIRECT_OBJECT and selectIndirectObject: if options.search: if object.Contains(options.search): PrintObject(object, options) + elif options.key: + contentDictionary = object.ContainsStream() + if not contentDictionary: + contentDictionary = object.content[1:] + oPDFParseDictionary = cPDFParseDictionary(contentDictionary, options.nocanonicalizedoutput) + if oPDFParseDictionary.parsed != None: + result = oPDFParseDictionary.GetNested(options.key) + if result != None: + print(result) elif options.object: - if object.id == eval(options.object): + if MatchObjectID(object.id, options.object): PrintObject(object, options) elif options.reference: if object.References(options.reference): @@ -1134,17 +1718,27 @@ def Main(): elif options.hash: print('obj %d %d' % (object.id, object.version)) rawContent = FormatOutput(object.content, True) - print(' len: %d md5: %s' % (len(rawContent), hashlib.md5(rawContent).hexdigest())) + hashHexdigest, hashAlgo = CalculateChosenHash(rawContent.encode('latin')) + print(' len: %d %s: %s' % (len(rawContent), hashAlgo, hashHexdigest)) print('') elif options.searchstream: - if object.StreamContains(options.searchstream, not options.unfiltered, options.casesensitive, options.regex): + if object.StreamContains(options.searchstream, not options.unfiltered, options.casesensitive, options.regex, options.overridingfilters): PrintObject(object, options) elif options.yara != None: - results = object.StreamYARAMatch(rules, not options.unfiltered) + results = object.StreamYARAMatch(rules, decoders, options.decoderoptions, not options.unfiltered, options.overridingfilters) if results != None and results != []: for result in results: - print('YARA rule: %s (%s)' % (result.rule, result.namespace)) - PrintObject(object, options) + for yaraResult in result[1]: + print('YARA rule%s: %s (%s)' % (IFF(result[0] == '', '', ' (stream decoder: %s)' % result[0]), yaraResult.rule, yaraResult.namespace)) + if options.yarastrings: + for stringdata in yaraResult.strings: + print('%06x %s:' % (stringdata[0], stringdata[1])) + print(' %s' % binascii.hexlify(C2BIP3(stringdata[2]))) + print(' %s' % repr(stringdata[2])) + PrintObject(object, options) + elif options.generateembedded != 0: + if object.id == options.generateembedded: + PrintGenerateObject(object, options, 8) else: PrintObject(object, options) elif object.type == PDF_ELEMENT_MALFORMED: @@ -1166,18 +1760,33 @@ def Main(): print('Trailer: %s' % cntTrailer) print('StartXref: %s' % cntStartXref) print('Indirect object: %s' % cntIndirectObject) - names = dicObjectTypes.keys() - names.sort() - for key in names: + print('Indirect objects with a stream: %s' % ', '.join([str(id) for id in objectsWithStream])) + objectsUnreferenced = objectsAll - objectsReferenced + for key in sorted(dicObjectTypes.keys()): print(' %s %d: %s' % (key, len(dicObjectTypes[key]), ', '.join(map(lambda x: '%d' % x, dicObjectTypes[key])))) - - if options.generate: + if len(objectsUnreferenced) > 0: + print('Unreferenced indirect objects: %s' % ', '.join([' '.join(reference) for reference in sorted(objectsUnreferenced, key=lambda a: int(a[0]))])) + if '/ObjStm' in dicObjectTypes: + objectsUnreferencedMinusObjStm = set() + for unreferencedObject in objectsUnreferenced: + if not int(unreferencedObject[0]) in dicObjectTypes['/ObjStm']: + objectsUnreferencedMinusObjStm.add(unreferencedObject) + print('Unreferenced indirect objects without /ObjStm objects: %s' % ', '.join([' '.join(reference) for reference in sorted(objectsUnreferencedMinusObjStm, key=lambda a: int(a[0]))])) + if sum(map(len, dKeywords.values())) > 0: + print('Search keywords:') + for keyword in keywords: + if len(dKeywords[keyword]) > 0: + print(' %s %d: %s' % (keyword, len(dKeywords[keyword]), ', '.join(map(lambda x: '%d' % x, dKeywords[keyword])))) + + if options.jsonoutput: + print(oMyJSONOutput.GetJSON()) + + if options.generate or options.generateembedded != 0: print(" oPDF.xrefAndTrailer('%s')" % ' '.join(savedRoot)) print('') print("if __name__ == '__main__':") print(' Main()') - def TestPythonVersion(enforceMaximumVersion=False, enforceMinimumVersion=False): if sys.version_info[0:3] > __maximum_python_version__: if enforceMaximumVersion: diff --git a/sift/files/pdf-tools/pdfid.ini b/sift/files/pdf-tools/pdfid.ini new file mode 100644 index 00000000..8ae6c6f5 --- /dev/null +++ b/sift/files/pdf-tools/pdfid.ini @@ -0,0 +1,2 @@ +[keywords] +/URI diff --git a/sift/files/pdf-tools/pdfid.py b/sift/files/pdf-tools/pdfid.py index 95c5b766..22ac85a9 100644 --- a/sift/files/pdf-tools/pdfid.py +++ b/sift/files/pdf-tools/pdfid.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 __description__ = 'Tool to test a PDF file' __author__ = 'Didier Stevens' -__version__ = '0.2.1' -__date__ = '2014/10/18' +__version__ = '0.2.9' +__date__ = '2024/10/26' """ @@ -45,6 +45,19 @@ 2014/09/30: added CSV header 2014/10/16: V0.2.1: added output when plugin & file not pdf 2014/10/18: some fixes for Python 3 + 2015/08/12: V0.2.2: added option pluginoptions + 2015/08/13: added plugin Instructions method + 2016/04/12: added option literal + 2017/10/29: added pdfid.ini support + 2017/11/05: V0.2.3: added option -n + 2018/01/03: V0.2.4: bugfix entropy calculation for PDFs without streams; sample 28cb208d976466b295ee879d2d233c8a https://twitter.com/DubinRan/status/947783629123416069 + 2018/01/15: bugfix ConfigParser privately reported + 2018/01/29: bugfix oPDFEOF.cntCharsAfterLastEOF when no %%EOF + 2018/07/05: V0.2.5 introduced cExpandFilenameArguments; renamed option literal to literalfilenames + 2019/09/30: V0.2.6 color bugfix, thanks to Leo + 2019/11/05: V0.2.7 fixed plugin path when compiled with pyinstaller + 2020/11/21: V0.2.8 added data argument to PDFiD function + 2024/10/26: V0.2.9 added pyzipper support Todo: - update XML example (entropy, EOF) @@ -61,15 +74,25 @@ import os.path import sys import json -import zipfile import collections import glob +import fnmatch +if sys.version_info[0] >= 3: + import urllib.request as urllib23 +else: + import urllib2 as urllib23 +if sys.version_info[0] >= 3: + import configparser as ConfigParser +else: + import ConfigParser +if sys.version_info[0] >= 3: + from io import BytesIO as DataIO +else: + from cStringIO import StringIO as DataIO try: - import urllib2 - urllib23 = urllib2 -except: - import urllib.request - urllib23 = urllib.request + import pyzipper as zipfile +except ImportError: + import zipfile #Convert 2 Bytes If Python 3 def C2BIP3(string): @@ -78,10 +101,18 @@ def C2BIP3(string): else: return string +def CreateZipFileObject(arg1, arg2): + if 'AESZipFile' in dir(zipfile): + return zipfile.AESZipFile(arg1, arg2) + else: + return zipfile.ZipFile(arg1, arg2) + class cBinaryFile: - def __init__(self, file): + def __init__(self, file, data=None): self.file = file - if file == '': + if data != None: + self.infile = DataIO(data) + elif file == '': self.infile = sys.stdin elif file.lower().startswith('http://') or file.lower().startswith('https://'): try: @@ -95,7 +126,7 @@ def __init__(self, file): sys.exit() elif file.lower().endswith('.zip'): try: - self.zipfile = zipfile.ZipFile(file, 'r') + self.zipfile = CreateZipFileObject(file, 'r') self.infile = self.zipfile.open(self.zipfile.infolist()[0], 'r', C2BIP3('infected')) except: print('Error opening file %s' % file) @@ -230,11 +261,14 @@ def removeInsideStream(self, byte): self.streamBucket[byte] -= 1 def calc(self): - self.nonStreamBucket = map(operator.sub, self.allBucket, self.streamBucket) + self.nonStreamBucket = list(map(operator.sub, self.allBucket, self.streamBucket)) allCount = sum(self.allBucket) streamCount = sum(self.streamBucket) nonStreamCount = sum(self.nonStreamBucket) - return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket))) + if streamCount == 0: + return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, None, nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket))) + else: + return (allCount, sum(map(lambda x: fEntropy(x, allCount), self.allBucket)), streamCount, sum(map(lambda x: fEntropy(x, streamCount), self.streamBucket)), nonStreamCount, sum(map(lambda x: fEntropy(x, nonStreamCount), self.nonStreamBucket))) class cPDFEOF: def __init__(self): @@ -338,7 +372,7 @@ def __init__(self): self.count = 0 def Check(self, lastName, word): - if (lastName == '/Colors' and word.isdigit() and int(word) > 2^24): # decided to alert when the number of colors is expressed with more than 3 bytes + if (lastName == '/Colors' and word.isdigit() and int(word) > 2**24): # decided to alert when the number of colors is expressed with more than 3 bytes self.count += 1 def XMLAddAttribute(xmlDoc, name, value=None): @@ -346,8 +380,26 @@ def XMLAddAttribute(xmlDoc, name, value=None): xmlDoc.documentElement.setAttributeNode(att) if value != None: att.nodeValue = value + return att + +def GetScriptPath(): + if getattr(sys, 'frozen', False): + return os.path.dirname(sys.executable) + else: + return os.path.dirname(sys.argv[0]) -def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False): +def ParseINIFile(): + oConfigParser = ConfigParser.ConfigParser(allow_no_value=True) + oConfigParser.optionxform = str + oConfigParser.read(os.path.join(GetScriptPath(), 'pdfid.ini')) + keywords = [] + if oConfigParser.has_section('keywords'): + for key, value in oConfigParser.items('keywords'): + if not key in keywords: + keywords.append(key) + return keywords + +def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False, data=None): """Example of XML output: @@ -377,7 +429,7 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False): hexcode = False lastName = '' insideStream = False - keywords = ('obj', + keywords = ['obj', 'endobj', 'stream', 'endstream', @@ -397,9 +449,12 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False): '/Launch', '/EmbeddedFile', '/XFA', - ) + ] words = {} dates = [] + for extrakeyword in ParseINIFile(): + if not extrakeyword in keywords: + keywords.append(extrakeyword) for keyword in keywords: words[keyword] = [0, 0] slash = '' @@ -416,7 +471,7 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False): try: attIsPDF = xmlDoc.createAttribute('IsPDF') xmlDoc.documentElement.setAttributeNode(attIsPDF) - oBinaryFile = cBinaryFile(file) + oBinaryFile = cBinaryFile(file, data) if extraData: oPDFDate = cPDFDate() oEntropy = cEntropy() @@ -532,7 +587,10 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False): (countAll, entropyAll , countStream, entropyStream, countNonStream, entropyNonStream) = oEntropy.calc() attEntropyAll.nodeValue = '%f' % entropyAll attCountAll.nodeValue = '%d' % countAll - attEntropyStream.nodeValue = '%f' % entropyStream + if entropyStream == None: + attEntropyStream.nodeValue = 'N/A ' + else: + attEntropyStream.nodeValue = '%f' % entropyStream attCountStream.nodeValue = '%d' % countStream attEntropyNonStream.nodeValue = '%f' % entropyNonStream attCountNonStream.nodeValue = '%d' % countNonStream @@ -549,7 +607,10 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False): xmlDoc.documentElement.setAttributeNode(attCountCharsAfterLastEOF) if oPDFEOF != None: attCountEOF.nodeValue = '%d' % oPDFEOF.cntEOFs - attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF + if oPDFEOF.cntEOFs > 0: + attCountCharsAfterLastEOF.nodeValue = '%d' % oPDFEOF.cntCharsAfterLastEOF + else: + attCountCharsAfterLastEOF.nodeValue = '' else: attCountEOF.nodeValue = '' attCountCharsAfterLastEOF.nodeValue = '' @@ -608,7 +669,7 @@ def PDFiD(file, allNames=False, extraData=False, disarm=False, force=False): eleDate.setAttributeNode(att) return xmlDoc -def PDFiD2String(xmlDoc, force): +def PDFiD2String(xmlDoc, nozero, force): result = 'PDFiD %s %s\n' % (xmlDoc.documentElement.getAttribute('Version'), xmlDoc.documentElement.getAttribute('Filename')) if xmlDoc.documentElement.getAttribute('ErrorOccured') == 'True': return result + '***Error occured***\n%s\n' % xmlDoc.documentElement.getAttribute('ErrorMessage') @@ -616,10 +677,11 @@ def PDFiD2String(xmlDoc, force): return result + ' Not a PDF document\n' result += ' PDF Header: %s\n' % xmlDoc.documentElement.getAttribute('Header') for node in xmlDoc.documentElement.getElementsByTagName('Keywords')[0].childNodes: - result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count'))) - if int(node.getAttribute('HexcodeCount')) > 0: - result += '(%d)' % int(node.getAttribute('HexcodeCount')) - result += '\n' + if not nozero or nozero and int(node.getAttribute('Count')) > 0: + result += ' %-16s %7d' % (node.getAttribute('Name'), int(node.getAttribute('Count'))) + if int(node.getAttribute('HexcodeCount')) > 0: + result += '(%d)' % int(node.getAttribute('HexcodeCount')) + result += '\n' if xmlDoc.documentElement.getAttribute('CountEOF') != '': result += ' %-16s %7d\n' % ('%%EOF', int(xmlDoc.documentElement.getAttribute('CountEOF'))) if xmlDoc.documentElement.getAttribute('CountCharsAfterLastEOF') != '': @@ -703,7 +765,7 @@ def MakeCSVLine(fields, separator=';', quote='"'): def ProcessFile(filename, options, plugins): xmlDoc = PDFiD(filename, options.all, options.extra, options.disarm, options.force) if plugins == [] and options.select == '': - Print(PDFiD2String(xmlDoc, options.force), options) + Print(PDFiD2String(xmlDoc, options.nozero, options.force), options) return oPDFiD = cPDFiD(xmlDoc, options.force) @@ -721,12 +783,12 @@ def ProcessFile(filename, options, plugins): if options.csv: Print(filename, options) else: - Print(PDFiD2String(xmlDoc, options.force), options) + Print(PDFiD2String(xmlDoc, options.nozero, options.force), options) else: for cPlugin in plugins: if not cPlugin.onlyValidPDF or not oPDFiD.errorOccured and oPDFiD.isPDF: try: - oPlugin = cPlugin(oPDFiD) + oPlugin = cPlugin(oPDFiD, options.pluginoptions) except Exception as e: Print('Error instantiating plugin: %s' % cPlugin.name, options) if options.verbose: @@ -746,8 +808,12 @@ def ProcessFile(filename, options, plugins): Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%.02f', score))), options) else: if score >= options.minimumscore: - Print(PDFiD2String(xmlDoc, options.force), options) - Print('%s score: %.02f' % (cPlugin.name, score), options) + Print(PDFiD2String(xmlDoc, options.nozero, options.force), options) + Print('%s score: %.02f' % (cPlugin.name, score), options) + try: + Print('%s instructions: %s' % (cPlugin.name, oPlugin.Instructions(score)), options) + except AttributeError: + pass else: if options.csv: if oPDFiD.errorOccured: @@ -755,7 +821,7 @@ def ProcessFile(filename, options, plugins): if not oPDFiD.isPDF: Print(MakeCSVLine((('%s', filename), ('%s', cPlugin.name), ('%s', 'Not a PDF document'))), options) else: - Print(PDFiD2String(xmlDoc, options.force), options) + Print(PDFiD2String(xmlDoc, options.nozero, options.force), options) def Scan(directory, options, plugins): @@ -842,8 +908,93 @@ def AddPlugin(cClass): plugins.append(cClass) -def ExpandFilenameArguments(filenames): - return list(collections.OrderedDict.fromkeys(sum(map(glob.glob, sum(map(ProcessAt, filenames), [])), []))) +class cExpandFilenameArguments(): + def __init__(self, filenames, literalfilenames=False, recursedir=False, checkfilenames=False, expressionprefix=None): + self.containsUnixShellStyleWildcards = False + self.warning = False + self.message = '' + self.filenameexpressions = [] + self.expressionprefix = expressionprefix + self.literalfilenames = literalfilenames + + expression = '' + if len(filenames) == 0: + self.filenameexpressions = [['', '']] + elif literalfilenames: + self.filenameexpressions = [[filename, ''] for filename in filenames] + elif recursedir: + for dirwildcard in filenames: + if expressionprefix != None and dirwildcard.startswith(expressionprefix): + expression = dirwildcard[len(expressionprefix):] + else: + if dirwildcard.startswith('@'): + for filename in ProcessAt(dirwildcard): + self.filenameexpressions.append([filename, expression]) + elif os.path.isfile(dirwildcard): + self.filenameexpressions.append([dirwildcard, expression]) + else: + if os.path.isdir(dirwildcard): + dirname = dirwildcard + basename = '*' + else: + dirname, basename = os.path.split(dirwildcard) + if dirname == '': + dirname = '.' + for path, dirs, files in os.walk(dirname): + for filename in fnmatch.filter(files, basename): + self.filenameexpressions.append([os.path.join(path, filename), expression]) + else: + for filename in list(collections.OrderedDict.fromkeys(sum(map(self.Glob, sum(map(ProcessAt, filenames), [])), []))): + if expressionprefix != None and filename.startswith(expressionprefix): + expression = filename[len(expressionprefix):] + else: + self.filenameexpressions.append([filename, expression]) + self.warning = self.containsUnixShellStyleWildcards and len(self.filenameexpressions) == 0 + if self.warning: + self.message = "Your filename argument(s) contain Unix shell-style wildcards, but no files were matched.\nCheck your wildcard patterns or use option literalfilenames if you don't want wildcard pattern matching." + return + if self.filenameexpressions == [] and expression != '': + self.filenameexpressions = [['', expression]] + if checkfilenames: + self.CheckIfFilesAreValid() + + def Glob(self, filename): + if not ('?' in filename or '*' in filename or ('[' in filename and ']' in filename)): + return [filename] + self.containsUnixShellStyleWildcards = True + return glob.glob(filename) + + def CheckIfFilesAreValid(self): + valid = [] + doesnotexist = [] + isnotafile = [] + for filename, expression in self.filenameexpressions: + hashfile = False + try: + hashfile = FilenameCheckHash(filename, self.literalfilenames)[0] == FCH_DATA + except: + pass + if filename == '' or hashfile: + valid.append([filename, expression]) + elif not os.path.exists(filename): + doesnotexist.append(filename) + elif not os.path.isfile(filename): + isnotafile.append(filename) + else: + valid.append([filename, expression]) + self.filenameexpressions = valid + if len(doesnotexist) > 0: + self.warning = True + self.message += 'The following files do not exist and will be skipped: ' + ' '.join(doesnotexist) + '\n' + if len(isnotafile) > 0: + self.warning = True + self.message += 'The following files are not regular files and will be skipped: ' + ' '.join(isnotafile) + '\n' + + def Filenames(self): + if self.expressionprefix == None: + return [filename for filename, expression in self.filenameexpressions] + else: + return self.filenameexpressions class cPluginParent(): onlyValidPDF = True @@ -851,7 +1002,7 @@ class cPluginParent(): def LoadPlugins(plugins, verbose): if plugins == '': return - scriptPath = os.path.dirname(sys.argv[0]) + scriptPath = GetScriptPath() for plugin in sum(map(ProcessAt, plugins.split(',')), []): try: if not plugin.lower().endswith('.py'): @@ -907,7 +1058,11 @@ def Main(): oParser.add_option('-m', '--minimumscore', type=float, default=0.0, help='minimum score for plugin results output') oParser.add_option('-v', '--verbose', action='store_true', default=False, help='verbose (will also raise catched exceptions)') oParser.add_option('-S', '--select', type=str, default='', help='selection expression') + oParser.add_option('-n', '--nozero', action='store_true', default=False, help='supress output for counts equal to zero') oParser.add_option('-o', '--output', type=str, default='', help='output to log file') + oParser.add_option('--pluginoptions', type=str, default='', help='options for the plugin') + oParser.add_option('-l', '--literalfilenames', action='store_true', default=False, help='take filenames literally, no wildcard matching') + oParser.add_option('--recursedir', action='store_true', default=False, help='Recurse directories (wildcards and here files (@...) allowed)') (options, args) = oParser.parse_args() if len(args) == 0: @@ -920,7 +1075,10 @@ def Main(): filenames = [''] else: try: - filenames = ExpandFilenameArguments(args) + oExpandFilenameArguments = cExpandFilenameArguments(args, options.literalfilenames, options.recursedir, False) + filenames = oExpandFilenameArguments.Filenames() + if oExpandFilenameArguments.warning: + print(oExpandFilenameArguments.message) except Exception as e: print(e) return diff --git a/sift/files/pdf-tools/pdftool.py b/sift/files/pdf-tools/pdftool.py new file mode 100644 index 00000000..8421f2e0 --- /dev/null +++ b/sift/files/pdf-tools/pdftool.py @@ -0,0 +1,1727 @@ +#!/usr/bin/env python3 + +from __future__ import print_function + +__description__ = 'Tool to process PDFs' +__author__ = 'Didier Stevens' +__version__ = '0.0.1' +__date__ = '2021/01/08' + +""" +Source code put in the public domain by Didier Stevens, no Copyright +https://DidierStevens.com +Use at your own risk + +History: + 2020/08/22: start + 2020/08/22: continue + 2020/10/21: Python 3 fix in cBinaryFile + 2021/01/06: man page + 2021/01/07: DataIO + 2021/01/08: sync with template; man + +Todo: + +""" + +import optparse +import sys +import os +import zipfile +import binascii +import random +import gzip +import collections +import glob +import textwrap +import re +import struct +import string +import math +import fnmatch +import json +import time +import hashlib +import csv +if sys.version_info[0] >= 3: + from io import BytesIO as DataIO +else: + from cStringIO import StringIO as DataIO +if sys.version_info[0] >= 3: + from io import StringIO +else: + from cStringIO import StringIO + +def PrintManual(): + manual = r''' +Manual: + +pdftool.py is a tool to process PDFs. + +For the moment, it has one command: iu (incremental updates). + +Command iu can be used to analyze PDFs with incremental updates. + +When a PDF without incremental updates is analyzed by this tool, one or two "versions" will be listed. +Each "version" is prefixed with a number (starting from 1) allowing selection of an update for further processing. + +For a non-linearized PDF without incremental updates, only one "version" will be listed: + +pdftool.py iu hello.pdf + +File: hello.pdf +1: objects= 6 length= 859 difference= 859 MD5= 8433a21e2ab8d9cb6655dec30aea1c2a + +For a linearized PDF without incremental updates, two "versions" will be listed: + +pdftool.py iu hello-linearized.pdf + +File: hello-linearized.pdf +1: objects= 1 (Linearized) length= 500 difference= 500 MD5= f8a1feff9f47b0bfacaacad1303ab1e2 +2: objects= 7 length= 1367 difference= 867 MD5= e0331685b60664a10f405325473541c5 + +Notice "(Linearized)" for the first "version": this indicates that this is a linearized PDF. + +For a non-linearized PDF with incremental updates, at least two "versions" will be listed: + +pdftool.py iu pdf-puzzle.pdf + +File: pdf-puzzle.pdf +1: objects= 6 length= 933 difference= 933 MD5= 489a96a2621f9abe13156b22afca5fcf +2: objects= 1 length= 1243 difference= 310 MD5= dabe6cb9c5fe3d213a08fb75f8d33ac4 + + +And for a linearized PDF with incremental updates, at least three "versions" will be listed: + +pdftool.py iu data.pdf.zip + +File: data.pdf.zip (extracted) +1: objects= 1 (Linearized) length= 672 difference= 672 MD5= 9c818e7a9ecd864e3bae97f5c3fa0816 +2: objects= 35 length= 8535 difference= 7863 MD5= e0e44d544ac922c3d64c7408944bf60b +3: objects= 6 length= 14241 difference= 5706 MD5= 4943ccbe72c8ba2e81e5bc030b730f69 +4: objects= 5 length= 19639 difference= 5398 MD5= be45f57e2056745d6da0569b5f154ac2 +5: objects= 5 length= 25042 difference= 5403 MD5= 69953f5809e74cad3f3e63323f990cae +6: objects= 6 length= 30658 difference= 5616 MD5= 1a8e5242f21727959683fa8cc7aa94ad + +"Versions" can be selected with option -s. + +For example: + +pdftool.py -s 1 iu pdf-puzzle.pdf + +00000000: 25 50 44 46 2D 31 2E 31 0D 0A 0D 0A 31 20 30 20 %PDF-1.1....1 0 +00000010: 6F 62 6A 0D 0A 3C 3C 0D 0A 20 2F 54 79 70 65 20 obj..<<.. /Type +00000020: 2F 43 61 74 61 6C 6F 67 0D 0A 20 2F 4F 75 74 6C /Catalog.. /Outl +00000030: 69 6E 65 73 20 32 20 30 20 52 0D 0A 20 2F 50 61 ines 2 0 R.. /Pa +00000040: 67 65 73 20 33 20 30 20 52 0D 0A 3E 3E 0D 0A 65 ges 3 0 R..>>..e +... +00000360: 30 20 6E 0D 0A 74 72 61 69 6C 65 72 0D 0A 3C 3C 0 n..trailer..<< +00000370: 0D 0A 20 2F 53 69 7A 65 20 37 0D 0A 20 2F 52 6F .. /Size 7.. /Ro +00000380: 6F 74 20 31 20 30 20 52 0D 0A 3E 3E 0D 0A 73 74 ot 1 0 R..>>..st +00000390: 61 72 74 78 72 65 66 0D 0A 37 31 38 0D 0A 25 25 artxref..718..%% +000003A0: 45 4F 46 0D 0A EOF.. + +By default, a selected version is dumped as an hexadecimal & ascii dump. +This output format can be changed to pure hexadecimal (-x), binary (-d) or run-length compressed hexadecimal & ascii dump (-A). +The default hexadecimal & ascii dump is -a. + +When a version is selected, the complete PDF for the version is produced. If the desired output is the difference between the selected version and the previous version, suffix d (delta) must be used. +In this example, -s 2d selects the delta between version 1 and 2: + +pdftool.py -s 2d iu pdf-puzzle.pdf + +00000000: 0D 0A 35 20 30 20 6F 62 6A 0D 0A 3C 3C 0D 0A 20 ..5 0 obj..<<.. +00000010: 2F 4C 65 6E 67 74 68 20 38 39 0D 0A 20 2F 46 69 /Length 89.. /Fi +00000020: 6C 74 65 72 20 2F 41 53 43 49 49 38 35 44 65 63 lter /ASCII85Dec +00000030: 6F 64 65 0D 0A 3E 3E 0D 0A 73 74 72 65 61 6D 0D ode..>>..stream. +00000040: 0A 36 3C 23 27 5C 37 50 51 23 40 31 61 23 62 30 .6<#'\7PQ#@1a#b0 +00000050: 2B 3E 47 51 28 2B 3F 28 75 2E 2B 42 32 6B 6F 2D +>GQ(+?(u.+B2ko- +00000060: 72 61 6B 6B 2B 45 31 62 31 46 29 59 66 35 40 3C rakk+E1b1F)Yf5@< +00000070: 36 21 26 42 6C 62 44 21 3D 42 4A 5B 2D 3D 42 4A 6!&BlbD!=BJ[-=BJ +00000080: 5B 2D 3D 42 4A 5B 2D 3D 42 4A 5B 2D 3D 42 49 21 [-=BJ[-=BJ[-=BI! +00000090: 70 3C 2C 2A 4F 45 3B 75 7E 3E 0D 0A 65 6E 64 73 p<,*OE;u~>..ends +000000A0: 74 72 65 61 6D 0D 0A 65 6E 64 6F 62 6A 0D 0A 0D tream..endobj... +000000B0: 0A 78 72 65 66 0D 0A 30 20 31 0D 0A 30 30 30 30 .xref..0 1..0000 +000000C0: 30 30 30 30 30 30 20 36 35 35 33 35 20 66 0D 0A 000000 65535 f.. +000000D0: 35 20 31 0D 0A 30 30 30 30 30 30 30 39 33 35 20 5 1..0000000935 +000000E0: 30 30 30 30 30 20 6E 0D 0A 74 72 61 69 6C 65 72 00000 n..trailer +000000F0: 0D 0A 3C 3C 0D 0A 20 2F 53 69 7A 65 20 37 0D 0A ..<<.. /Size 7.. +00000100: 20 2F 52 6F 6F 74 20 31 20 30 20 52 0D 0A 20 2F /Root 1 0 R.. / +00000110: 50 72 65 76 20 37 31 38 0D 0A 3E 3E 0D 0A 73 74 Prev 718..>>..st +00000120: 61 72 74 78 72 65 66 0D 0A 31 31 31 30 0D 0A 25 artxref..1110..% +00000130: 25 45 4F 46 0D 0A %EOF.. + + + +Output can also be directed to a file using option -o. + + +This tool is very versatile when it comes to handling files. This will be explained now. + +This tool reads files in binary mode. It can read files from disk, from standard input (stdin) and from "generated" files via the command line. +It can also partially read files (this is done with the cut operator). + +If no file arguments are provided to this tool, it will read data from standard input (stdin). This way, this tool can be used in a piped chain of commands, like this: + +oledump.py -s 4 -d sample.doc.vir | tool.py + +When one or more file arguments are provided to this tool, it will read the files and process the content. +How the files are read, depends on the type of file arguments that are provided. File arguments that start with character @ or # have special meaning, and will be explained later. + +If a file argument does not start with @ or #, it is considered to be a file on disk and the content will be read from disk. +If the file is not a compressed file, the binary content of the file is read from disk for processing. +Compressed files are solely recognized based on their extension: .zip and .gz. +If a file argument with extension .gz is provided, the tool will decompress the gzip file in memory and process the decompressed content. No checks are made to ensure that the file with extension .gz is an actual gzip compressed file. +If a file argument with extension .zip is provided and it contains a single file, the tool will extract the file from the ZIP file in memory and process the decompressed content. No checks are made to ensure that the file with extension .zip is an actual ZIP compressed file. +Password protected ZIP files can be processed too. The tool uses password 'infected' (without quotes) as default password. A different password can be provided using option --password. + +Example: + +tool.py sample.zip + +To prevent the tool from decompressing .zip or .gz files, but to process the compressed file itself, use option --noextraction. + +File arguments that start with character @ ("here files"), are read as text files that contain file arguments (one per line) to be processed. +For example, we take a text file with filename list.txt and following content: + +sample-1.bin +sample-5.bin +sample-7.bin + +When using this file (list.txt) in the following command: + +tool.py @list.txt + +the tool will process the following files: sample-1.bin, sample-5.bin and sample-7.bin. +A single @ character as filename is a here file read from stdin. + +Wildcards are supported too. The classic *, ? and [] wildcard characters are supported. For example, use the following command to process all .exe and .dll files in the Windows directory: + +tool.py C:\Windows\*.exe C:\Windows\*.dll + +To prevent the tool from processing file arguments with wildcard characters or special initial characters (@ and #) differently, but to process them as normal files, use option --literalfilenames. + +The content of folders can be processed too: use option --recursedir and provide folder names as argument. Wildcards and here files (for folder names) can be used too. + +File arguments that start with character # have special meaning. These are not processed as actual files on disk (except when option --literalfilenames is used), but as file arguments that specify how to "generate" the file content. + +File arguments that start with #, #h#, #b# or #e# are used to "generate" the file content. +Arguments that start with #c# are not file arguments, but cut operators (explained later). +Arguments that start with #f# are not file arguments, but flags (explained later). + +Generating the file content with a # file argument means that the file content is not read from disk, but generated in memory based on the characteristics provided via the file argument. + +When a file argument starts with # (and not with #h#, #b#, #e# or #c#), all characters that follow the # character specify the content of the generated file. +For example, file argument #ABCDE specifies a file containing exactly 5 bytes: ASCII characters A, B, C, D and E. +Thus the following command: + +tool.py #ABCDE + +will make the tool process data with binary content ABCDE. #ABCDE is not an actual file written on disk, but it is a notational convention to provide data via the command line. + +Since this notation can not be used to specify all possible byte values, hexadecimal encoding (#h#) and BASE64 encoding (#b#) notation is supported too. +For example, #h#4142434445 is an hexadecimal notation that generates data ABCDE. Hexadecimal notation allows the generation of non-printable characters for example, like NULL bytes: #h#00 +File argument #b#QUJDREU= is another example, this time BASE64 notation, that generates data ABCDE. + +File arguments that start with #e# are a notational convention to use expressions to generate data. An expression is a single function/string or the concatenation of several functions/strings (using character + as concatenation operator). +Strings can be characters enclosed by single quotes ('example') or hexadecimal strings prefixed by 0x (0xBEEF). +4 functions are available: random, loremipsum, repeat and chr. + +Function random takes exactly one argument: an integer (with value 1 or more). Integers can be specified using decimal notation or hexadecimal notation (prefix 0x). +The random function generates a sequence of bytes with a random value (between 0 and 255), the argument specifies how many bytes need to be generated. Remark that the random number generator that is used is just the Python random number generator, not a cryptographic random number generator. + +Example: + +tool.py #e#random(100) + +will make the tool process data consisting of a sequence of 100 random bytes. + +Function loremipsum takes exactly one argument: an integer (with value 1 or more). +The loremipsum function generates "lorem ipsum" text (fake latin), the argument specifies the number of sentences to generate. + +Example: #e#loremipsum(2) generates this text: +Ipsum commodo proin pulvinar hac vel nunc dignissim neque eget odio erat magna lorem urna cursus fusce facilisis porttitor congue eleifend taciti. Turpis duis suscipit facilisi tristique dictum praesent natoque sem mi egestas venenatis per dui sit sodales est condimentum habitasse ipsum phasellus non bibendum hendrerit. + +Function chr takes one argument or two arguments. +chr with one argument takes an integer between 0 and 255, and generates a single byte with the value specified by the integer. +chr with two arguments takes two integers between 0 and 255, and generates a byte sequence with the values specified by the integers. +For example #e#chr(0x41,0x45) generates data ABCDE. + +Function repeat takes two arguments: an integer (with value 1 or more) and a byte sequence. This byte sequence can be a quoted string of characters (single quotes), like 'ABCDE' or an hexadecimal string prefixed with 0x, like 0x4142434445. +The repeat function will create a sequence of bytes consisting of the provided byte sequence (the second argument) repeated as many times as specified by the first argument. +For example, #e#repeat(3, 'AB') generates byte sequence ABABAB. + +When more than one function needs to be used, the byte sequences generated by the functions can be concatenated with the + operator. +For example, #e#repeat(10,0xFF)+random(100) will generate a byte sequence of 10 FF bytes followed by 100 random bytes. + +File arguments that start with #p# are a notational convention to pack a Python expression to generate data (using Python module struct). +The string after #p# must contain 2 expressions separated by a # character, like #p#I#123456. +The first expression (I in this example) is the format string for the Python struct.pack function, and the second expression (123456 in this example) is a Python expression that needs to be packed by struct.pack. +In this example, format string I represents an unsigned, 32-bit, little-endian integer, and thus #p#I#123456 generates byte sequence 40E20100 (hexadecimal). + +The cut argument (or cut operator) allows for the partial selection of the content of a file. This argument starts with #c# followed by a "cut-expression". Use this expression to "cut out" part of the content. +The cut-argument must be put in front of a file argument, like in this example: + +tool.py #c#0:100l data.bin + +With these arguments, tool.py will only process the first 100 bytes (0:100l) of file data.bin. + +A cut argument is applied to all file arguments that follow it. Example: + +tool.py #c#0:100l data-1.bin data-2.bin + +With these arguments, tool.py will only process the first 100 bytes (0:100l) of file data-1.bin and the first 100 bytes file data-2.bin. + +More than one cut argument can be used, like in this example: + +tool.py #c#0:100l data-1.bin #c#0:200l data-2.bin + +With these arguments, tool.py will only process the first 100 bytes (0:100l) of file data-1.bin and the first 200 bytes (0:200l) of file data-2.bin. + +A cut-expression is composed of 2 terms separated by a colon (:), like this: +termA:termB +termA and termB can be: +- nothing (an empty string) +- a positive decimal number; example: 10 +- an hexadecimal number (to be preceded by 0x); example: 0x10 +- a case sensitive ASCII string to search for (surrounded by square brackets and single quotes); example: ['MZ'] +- a case sensitive UNICODE string to search for (surrounded by square brackets and single quotes prefixed with u); example: [u'User'] +- an hexadecimal string to search for (surrounded by square brackets); example: [d0cf11e0] +If termA is nothing, then the cut section of bytes starts with the byte at position 0. +If termA is a number, then the cut section of bytes starts with the byte at the position given by the number (first byte has index 0). +If termA is a string to search for, then the cut section of bytes starts with the byte at the position where the string is first found. If the string is not found, the cut is empty (0 bytes). +If termB is nothing, then the cut section of bytes ends with the last byte. +If termB is a number, then the cut section of bytes ends with the byte at the position given by the number (first byte has index 0). +When termB is a number, it can have suffix letter l. This indicates that the number is a length (number of bytes), and not a position. +termB can also be a negative number (decimal or hexademical): in that case the position is counted from the end of the file. For example, :-5 selects the complete file except the last 5 bytes. +If termB is a string to search for, then the cut section of bytes ends with the last byte at the position where the string is first found. If the string is not found, the cut is empty (0 bytes). +No checks are made to assure that the position specified by termA is lower than the position specified by termB. This is left up to the user. +Search string expressions (ASCII, UNICODE and hexadecimal) can be followed by an instance (a number equal to 1 or greater) to indicate which instance needs to be taken. For example, ['ABC']2 will search for the second instance of string 'ABC'. If this instance is not found, then nothing is selected. +Search string expressions (ASCII, UNICODE and hexadecimal) can be followed by an offset (+ or - a number) to add (or substract) an offset to the found instance. This number can be a decimal or hexadecimal (prefix 0x) value. For example, ['ABC']+3 will search for the first instance of string 'ABC' and then select the bytes after ABC (+ 3). +Finally, search string expressions (ASCII, UNICODE and hexadecimal) can be followed by an instance and an offset. +Examples: +This cut-expression can be used to dump the first 256 bytes of a PE file located inside the file content: ['MZ']:0x100l +This cut-expression can be used to dump the OLE file located inside the file content: [d0cf11e0]: + +A flag argument starts with #f# and is passed on for all files that are provided after the flag argument. It can be used to change the behavior of the tool for certain files. +Example: + +tool.py data-1.bin #f#-l data-2.bin + +data-2.bin will be processed differently (using flag option -l) than file data-1.bin. + +With option --jsoninput, the tool will parse the output produced by another tool using option --jsonoutput. +Example: +zipdump.py --jsonoutput Book1.xlsm | file-magic.py --jsoninput +[Content_Types].xml XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators +_rels/.rels XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators +xl/_rels/workbook.xml.rels XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators +xl/workbook.xml XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators +xl/drawings/drawing1.xml XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators +xl/worksheets/_rels/sheet1.xml.rels XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators +xl/theme/theme1.xml XML 1.0 document, UTF-8 Unicode text, with very long lines, with CRLF line terminators +xl/styles.xml XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators +xl/worksheets/sheet1.xml XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators +xl/vbaProject.bin Composite Document File V2 Document, Cannot read section info +xl/drawings/vmlDrawing1.vml ASCII text, with CRLF line terminators +docProps/app.xml XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators +xl/ctrlProps/ctrlProp1.xml XML 1.0 document, ASCII text, with CRLF line terminators +docProps/core.xml XML 1.0 document, ASCII text, with very long lines, with CRLF line terminators + +In this example, zipdump is used to produce JSON data with the content of each file contained inside file Book1.xlsm (a ZIP container), which is then consumed by file-magic.py to identify (libmagic) the type of each file. + +With option --ignoreprocessingerrors, the tool will continue processing the next file when an error occurs while processing the current file. Files that can not be opened will always be skipped to move to the next file. + +Option --logfile direct the tool to create a logfile, and option --logcomment can be used to add a comment to the log file. The log file will contain metadata and a list of processed files, it does not contain processing results. +It is best to use this option when option --ignoreprocessingerrors is used, to have a record of file processing errors. + +The lines are written to standard output, except when option -o is used. When option -o is used, the lines are written to the filename specified by option -o. +Filenames used with option -o starting with # have special meaning. +#c#example.txt will write output both to the console (stdout) and file example.txt. +#g# will write output to a file with a filename generated by the tool like this: toolname-date-time.txt. +#g#KEYWORD will write output to a file with a filename generated by the tool like this: toolname-KEYWORD-date-time.txt. +Use #p#filename to display execution progress. +To process several files while creating seperate output files for each input file, use -o #s#%f%.result *. +This will create output files with the name of the inputfile and extension .result. +There are several variables available when creating separate output files: + %f% is the full filename (with directory if present) + %b% is the base name: the filename without directory + %d% is the directory + %r% is the root: the filename without extension + %ru% is the root made unique by appending a counter (if necessary) + %e% is the extension +#h# is like the head command: only the first 10 lines will be outputed. +#t# is like the tail command: only the last 10 lines will be outputed. +Most options can be combined, like #ps# for example. +#l# is used for literal filenames: if the output filename has to start with # (#example.txt for example), use filename #l##example.txt for example. + +''' + for line in manual.split('\n'): + print(textwrap.fill(line, 79)) + +DEFAULT_SEPARATOR = ',' +QUOTE = '"' + +def PrintError(*args, **kwargs): + print(*args, file=sys.stderr, **kwargs) + +#Convert 2 Bytes If Python 3 +def C2BIP3(string): + if sys.version_info[0] > 2: + return bytes([ord(x) for x in string]) + else: + return string + +#Convert 2 Integer If Python 2 +def C2IIP2(data): + if sys.version_info[0] > 2: + return data + else: + return ord(data) + +# CIC: Call If Callable +def CIC(expression): + if callable(expression): + return expression() + else: + return expression + +# IFF: IF Function +def IFF(expression, valueTrue, valueFalse): + if expression: + return CIC(valueTrue) + else: + return CIC(valueFalse) + +#-BEGINCODE cBinaryFile------------------------------------------------------------------------------ +#import random +#import binascii +#import zipfile +#import gzip +#import sys +#if sys.version_info[0] >= 3: +# from io import BytesIO as DataIO +#else: +# from cStringIO import StringIO as DataIO + +def LoremIpsumSentence(minimum, maximum): + words = ['lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit', 'etiam', 'tortor', 'metus', 'cursus', 'sed', 'sollicitudin', 'ac', 'sagittis', 'eget', 'massa', 'praesent', 'sem', 'fermentum', 'dignissim', 'in', 'vel', 'augue', 'scelerisque', 'auctor', 'libero', 'nam', 'a', 'gravida', 'odio', 'duis', 'vestibulum', 'vulputate', 'quam', 'nec', 'cras', 'nibh', 'feugiat', 'ut', 'vitae', 'ornare', 'justo', 'orci', 'varius', 'natoque', 'penatibus', 'et', 'magnis', 'dis', 'parturient', 'montes', 'nascetur', 'ridiculus', 'mus', 'curabitur', 'nisl', 'egestas', 'urna', 'iaculis', 'lectus', 'maecenas', 'ultrices', 'velit', 'eu', 'porta', 'hac', 'habitasse', 'platea', 'dictumst', 'integer', 'id', 'commodo', 'mauris', 'interdum', 'malesuada', 'fames', 'ante', 'primis', 'faucibus', 'accumsan', 'pharetra', 'aliquam', 'nunc', 'at', 'est', 'non', 'leo', 'nulla', 'sodales', 'porttitor', 'facilisis', 'aenean', 'condimentum', 'rutrum', 'facilisi', 'tincidunt', 'laoreet', 'ultricies', 'neque', 'diam', 'euismod', 'consequat', 'tempor', 'elementum', 'lobortis', 'erat', 'ligula', 'risus', 'donec', 'phasellus', 'quisque', 'vivamus', 'pellentesque', 'tristique', 'venenatis', 'purus', 'mi', 'dictum', 'posuere', 'fringilla', 'quis', 'magna', 'pretium', 'felis', 'pulvinar', 'lacinia', 'proin', 'viverra', 'lacus', 'suscipit', 'aliquet', 'dui', 'molestie', 'dapibus', 'mollis', 'suspendisse', 'sapien', 'blandit', 'morbi', 'tellus', 'enim', 'maximus', 'semper', 'arcu', 'bibendum', 'convallis', 'hendrerit', 'imperdiet', 'finibus', 'fusce', 'congue', 'ullamcorper', 'placerat', 'nullam', 'eros', 'habitant', 'senectus', 'netus', 'turpis', 'luctus', 'volutpat', 'rhoncus', 'mattis', 'nisi', 'ex', 'tempus', 'eleifend', 'vehicula', 'class', 'aptent', 'taciti', 'sociosqu', 'ad', 'litora', 'torquent', 'per', 'conubia', 'nostra', 'inceptos', 'himenaeos'] + sample = random.sample(words, random.randint(minimum, maximum)) + sample[0] = sample[0].capitalize() + return ' '.join(sample) + '.' + +def LoremIpsum(sentences): + return ' '.join([LoremIpsumSentence(15, 30) for i in range(sentences)]) + +STATE_START = 0 +STATE_IDENTIFIER = 1 +STATE_STRING = 2 +STATE_SPECIAL_CHAR = 3 +STATE_ERROR = 4 + +FUNCTIONNAME_REPEAT = 'repeat' +FUNCTIONNAME_RANDOM = 'random' +FUNCTIONNAME_CHR = 'chr' +FUNCTIONNAME_LOREMIPSUM = 'loremipsum' + +def Tokenize(expression): + result = [] + token = '' + state = STATE_START + while expression != '': + char = expression[0] + expression = expression[1:] + if char == "'": + if state == STATE_START: + state = STATE_STRING + elif state == STATE_IDENTIFIER: + result.append([STATE_IDENTIFIER, token]) + state = STATE_STRING + token = '' + elif state == STATE_STRING: + result.append([STATE_STRING, token]) + state = STATE_START + token = '' + elif char >= '0' and char <= '9' or char.lower() >= 'a' and char.lower() <= 'z': + if state == STATE_START: + token = char + state = STATE_IDENTIFIER + else: + token += char + elif char == ' ': + if state == STATE_IDENTIFIER: + result.append([STATE_IDENTIFIER, token]) + token = '' + state = STATE_START + elif state == STATE_STRING: + token += char + else: + if state == STATE_IDENTIFIER: + result.append([STATE_IDENTIFIER, token]) + token = '' + state = STATE_START + result.append([STATE_SPECIAL_CHAR, char]) + elif state == STATE_STRING: + token += char + else: + result.append([STATE_SPECIAL_CHAR, char]) + token = '' + if state == STATE_IDENTIFIER: + result.append([state, token]) + elif state == STATE_STRING: + result = [[STATE_ERROR, 'Error: string not closed', token]] + return result + +def ParseFunction(tokens): + if len(tokens) == 0: + print('Parsing error') + return None, tokens + if tokens[0][0] == STATE_STRING or tokens[0][0] == STATE_IDENTIFIER and tokens[0][1].startswith('0x'): + return [[FUNCTIONNAME_REPEAT, [[STATE_IDENTIFIER, '1'], tokens[0]]], tokens[1:]] + if tokens[0][0] != STATE_IDENTIFIER: + print('Parsing error') + return None, tokens + function = tokens[0][1] + tokens = tokens[1:] + if len(tokens) == 0: + print('Parsing error') + return None, tokens + if tokens[0][0] != STATE_SPECIAL_CHAR or tokens[0][1] != '(': + print('Parsing error') + return None, tokens + tokens = tokens[1:] + if len(tokens) == 0: + print('Parsing error') + return None, tokens + arguments = [] + while True: + if tokens[0][0] != STATE_IDENTIFIER and tokens[0][0] != STATE_STRING: + print('Parsing error') + return None, tokens + arguments.append(tokens[0]) + tokens = tokens[1:] + if len(tokens) == 0: + print('Parsing error') + return None, tokens + if tokens[0][0] != STATE_SPECIAL_CHAR or (tokens[0][1] != ',' and tokens[0][1] != ')'): + print('Parsing error') + return None, tokens + if tokens[0][0] == STATE_SPECIAL_CHAR and tokens[0][1] == ')': + tokens = tokens[1:] + break + tokens = tokens[1:] + if len(tokens) == 0: + print('Parsing error') + return None, tokens + return [[function, arguments], tokens] + +def Parse(expression): + tokens = Tokenize(expression) + if len(tokens) == 0: + print('Parsing error') + return None + if tokens[0][0] == STATE_ERROR: + print(tokens[0][1]) + print(tokens[0][2]) + print(expression) + return None + functioncalls = [] + while True: + functioncall, tokens = ParseFunction(tokens) + if functioncall == None: + return None + functioncalls.append(functioncall) + if len(tokens) == 0: + return functioncalls + if tokens[0][0] != STATE_SPECIAL_CHAR or tokens[0][1] != '+': + print('Parsing error') + return None + tokens = tokens[1:] + +def InterpretInteger(token): + if token[0] != STATE_IDENTIFIER: + return None + try: + return int(token[1]) + except: + return None + +def Hex2Bytes(hexadecimal): + if len(hexadecimal) % 2 == 1: + hexadecimal = '0' + hexadecimal + try: + return binascii.a2b_hex(hexadecimal) + except: + return None + +def InterpretHexInteger(token): + if token[0] != STATE_IDENTIFIER: + return None + if not token[1].startswith('0x'): + return None + bytes = Hex2Bytes(token[1][2:]) + if bytes == None: + return None + integer = 0 + for byte in bytes: + integer = integer * 0x100 + C2IIP2(byte) + return integer + +def InterpretNumber(token): + number = InterpretInteger(token) + if number == None: + return InterpretHexInteger(token) + else: + return number + +def InterpretBytes(token): + if token[0] == STATE_STRING: + return token[1] + if token[0] != STATE_IDENTIFIER: + return None + if not token[1].startswith('0x'): + return None + return Hex2Bytes(token[1][2:]) + +def CheckFunction(functionname, arguments, countarguments, maxcountarguments=None): + if maxcountarguments == None: + if countarguments == 0 and len(arguments) != 0: + print('Error: function %s takes no arguments, %d are given' % (functionname, len(arguments))) + return True + if countarguments == 1 and len(arguments) != 1: + print('Error: function %s takes 1 argument, %d are given' % (functionname, len(arguments))) + return True + if countarguments != len(arguments): + print('Error: function %s takes %d arguments, %d are given' % (functionname, countarguments, len(arguments))) + return True + else: + if len(arguments) < countarguments or len(arguments) > maxcountarguments: + print('Error: function %s takes between %d and %d arguments, %d are given' % (functionname, countarguments, maxcountarguments, len(arguments))) + return True + return False + +def CheckNumber(argument, minimum=None, maximum=None): + number = InterpretNumber(argument) + if number == None: + print('Error: argument should be a number: %s' % argument[1]) + return None + if minimum != None and number < minimum: + print('Error: argument should be minimum %d: %d' % (minimum, number)) + return None + if maximum != None and number > maximum: + print('Error: argument should be maximum %d: %d' % (maximum, number)) + return None + return number + +def Interpret(expression): + functioncalls = Parse(expression) + if functioncalls == None: + return None + decoded = '' + for functioncall in functioncalls: + functionname, arguments = functioncall + if functionname == FUNCTIONNAME_REPEAT: + if CheckFunction(functionname, arguments, 2): + return None + number = CheckNumber(arguments[0], minimum=1) + if number == None: + return None + bytes = InterpretBytes(arguments[1]) + if bytes == None: + print('Error: argument should be a byte sequence: %s' % arguments[1][1]) + return None + decoded += number * bytes + elif functionname == FUNCTIONNAME_RANDOM: + if CheckFunction(functionname, arguments, 1): + return None + number = CheckNumber(arguments[0], minimum=1) + if number == None: + return None + decoded += ''.join([chr(random.randint(0, 255)) for x in range(number)]) + elif functionname == FUNCTIONNAME_LOREMIPSUM: + if CheckFunction(functionname, arguments, 1): + return None + number = CheckNumber(arguments[0], minimum=1) + if number == None: + return None + decoded += LoremIpsum(number) + elif functionname == FUNCTIONNAME_CHR: + if CheckFunction(functionname, arguments, 1, 2): + return None + number = CheckNumber(arguments[0], minimum=0, maximum=255) + if number == None: + return None + if len(arguments) == 1: + decoded += chr(number) + else: + number2 = CheckNumber(arguments[1], minimum=0, maximum=255) + if number2 == None: + return None + if number < number2: + decoded += ''.join([chr(n) for n in range(number, number2 + 1)]) + else: + decoded += ''.join([chr(n) for n in range(number, number2 - 1, -1)]) + else: + print('Error: unknown function: %s' % functionname) + return None + return decoded + +def ParsePackExpression(data): + try: + packFormat, pythonExpression = data.split('#', 1) + data = struct.pack(packFormat, int(pythonExpression)) + return data + except: + return None + +FCH_FILENAME = 0 +FCH_DATA = 1 +FCH_ERROR = 2 + +def FilenameCheckHash(filename, literalfilename): + if literalfilename: + return FCH_FILENAME, filename + elif filename.startswith('#h#'): + result = Hex2Bytes(filename[3:].replace(' ', '')) + if result == None: + return FCH_ERROR, 'hexadecimal' + else: + return FCH_DATA, result + elif filename.startswith('#b#'): + try: + return FCH_DATA, binascii.a2b_base64(filename[3:]) + except: + return FCH_ERROR, 'base64' + elif filename.startswith('#e#'): + result = Interpret(filename[3:]) + if result == None: + return FCH_ERROR, 'expression' + else: + return FCH_DATA, C2BIP3(result) + elif filename.startswith('#p#'): + result = ParsePackExpression(filename[3:]) + if result == None: + return FCH_ERROR, 'pack' + else: + return FCH_DATA, result + elif filename.startswith('#'): + return FCH_DATA, C2BIP3(filename[1:]) + else: + return FCH_FILENAME, filename + +def AnalyzeFileError(filename): + PrintError('Error opening file %s' % filename) + PrintError(sys.exc_info()[1]) + try: + if not os.path.exists(filename): + PrintError('The file does not exist') + elif os.path.isdir(filename): + PrintError('The file is a directory') + elif not os.path.isfile(filename): + PrintError('The file is not a regular file') + except: + pass + +class cBinaryFile: + def __init__(self, filename, zippassword='infected', noextraction=False, literalfilename=False): + self.filename = filename + self.zippassword = zippassword + self.noextraction = noextraction + self.literalfilename = literalfilename + self.oZipfile = None + self.extracted = False + self.fIn = None + + fch, data = FilenameCheckHash(self.filename, self.literalfilename) + if fch == FCH_ERROR: + line = 'Error %s parsing filename: %s' % (data, self.filename) + raise Exception(line) + + try: + if self.filename == '': + if sys.platform == 'win32': + import msvcrt + msvcrt.setmode(sys.stdin.fileno(), os.O_BINARY) + self.fIn = sys.stdin + elif fch == FCH_DATA: + self.fIn = DataIO(data) + elif not self.noextraction and self.filename.lower().endswith('.zip'): + self.oZipfile = zipfile.ZipFile(self.filename, 'r') + if len(self.oZipfile.infolist()) == 1: + self.fIn = self.oZipfile.open(self.oZipfile.infolist()[0], 'r', self.zippassword) + self.extracted = True + else: + self.oZipfile.close() + self.oZipfile = None + self.fIn = open(self.filename, 'rb') + elif not self.noextraction and self.filename.lower().endswith('.gz'): + self.fIn = gzip.GzipFile(self.filename, 'rb') + self.extracted = True + else: + self.fIn = open(self.filename, 'rb') + except: + AnalyzeFileError(self.filename) + raise + + def close(self): + if self.fIn != sys.stdin and self.fIn != None: + self.fIn.close() + if self.oZipfile != None: + self.oZipfile.close() + + def read(self, size=None): + try: + fRead = self.fIn.buffer + except: + fRead = self.fIn + if size == None: + return fRead.read() + else: + return fRead.read(size) + + def Data(self): + data = self.read() + self.close() + return data + +#-ENDCODE cBinaryFile-------------------------------------------------------------------------------- + +def File2Strings(filename): + try: + if filename == '': + f = sys.stdin + else: + f = open(filename, 'r') + except: + return None + try: + return map(lambda line:line.rstrip('\n'), f.readlines()) + except: + return None + finally: + if f != sys.stdin: + f.close() + +def File2String(filename): + try: + f = open(filename, 'rb') + except: + return None + try: + return f.read() + except: + return None + finally: + f.close() + +def ProcessAt(argument): + if argument.startswith('@'): + strings = File2Strings(argument[1:]) + if strings == None: + raise Exception('Error reading %s' % argument) + else: + return strings + else: + return [argument] + +def Glob(filename): + filenames = glob.glob(filename) + if len(filenames) == 0: + return [filename] + else: + return filenames + +class cExpandFilenameArguments(): + def __init__(self, filenames, literalfilenames=False, recursedir=False, checkfilenames=False, expressionprefix=None, flagprefix=None): + self.containsUnixShellStyleWildcards = False + self.warning = False + self.message = '' + self.filenameexpressionsflags = [] + self.expressionprefix = expressionprefix + self.flagprefix = flagprefix + self.literalfilenames = literalfilenames + + expression = '' + flag = '' + if len(filenames) == 0: + self.filenameexpressionsflags = [['', '', '']] + elif literalfilenames: + self.filenameexpressionsflags = [[filename, '', ''] for filename in filenames] + elif recursedir: + for dirwildcard in filenames: + if expressionprefix != None and dirwildcard.startswith(expressionprefix): + expression = dirwildcard[len(expressionprefix):] + elif flagprefix != None and dirwildcard.startswith(flagprefix): + flag = dirwildcard[len(flagprefix):] + else: + if dirwildcard.startswith('@'): + for filename in ProcessAt(dirwildcard): + self.filenameexpressionsflags.append([filename, expression, flag]) + elif os.path.isfile(dirwildcard): + self.filenameexpressionsflags.append([dirwildcard, expression, flag]) + else: + if os.path.isdir(dirwildcard): + dirname = dirwildcard + basename = '*' + else: + dirname, basename = os.path.split(dirwildcard) + if dirname == '': + dirname = '.' + for path, dirs, files in os.walk(dirname): + for filename in fnmatch.filter(files, basename): + self.filenameexpressionsflags.append([os.path.join(path, filename), expression, flag]) + else: + for filename in list(collections.OrderedDict.fromkeys(sum(map(self.Glob, sum(map(ProcessAt, filenames), [])), []))): + if expressionprefix != None and filename.startswith(expressionprefix): + expression = filename[len(expressionprefix):] + elif flagprefix != None and filename.startswith(flagprefix): + flag = filename[len(flagprefix):] + else: + self.filenameexpressionsflags.append([filename, expression, flag]) + self.warning = self.containsUnixShellStyleWildcards and len(self.filenameexpressionsflags) == 0 + if self.warning: + self.message = "Your filename argument(s) contain Unix shell-style wildcards, but no files were matched.\nCheck your wildcard patterns or use option literalfilenames if you don't want wildcard pattern matching." + return + if self.filenameexpressionsflags == [] and (expression != '' or flag != ''): + self.filenameexpressionsflags = [['', expression, flag]] + if checkfilenames: + self.CheckIfFilesAreValid() + + def Glob(self, filename): + if not ('?' in filename or '*' in filename or ('[' in filename and ']' in filename)): + return [filename] + self.containsUnixShellStyleWildcards = True + return glob.glob(filename) + + def CheckIfFilesAreValid(self): + valid = [] + doesnotexist = [] + isnotafile = [] + for filename, expression, flag in self.filenameexpressionsflags: + hashfile = False + try: + hashfile = FilenameCheckHash(filename, self.literalfilenames)[0] == FCH_DATA + except: + pass + if filename == '' or hashfile: + valid.append([filename, expression, flag]) + elif not os.path.exists(filename): + doesnotexist.append(filename) + elif not os.path.isfile(filename): + isnotafile.append(filename) + else: + valid.append([filename, expression, flag]) + self.filenameexpressionsflags = valid + if len(doesnotexist) > 0: + self.warning = True + self.message += 'The following files do not exist and will be skipped: ' + ' '.join(doesnotexist) + '\n' + if len(isnotafile) > 0: + self.warning = True + self.message += 'The following files are not regular files and will be skipped: ' + ' '.join(isnotafile) + '\n' + + def Filenames(self): + if self.expressionprefix == None: + return [filename for filename, expression, flag in self.filenameexpressionsflags] + else: + return self.filenameexpressionsflags + +def CheckJSON(stringJSON): + try: + object = json.loads(stringJSON) + except: + print('Error parsing JSON') + print(sys.exc_info()[1]) + return None + if not isinstance(object, dict): + print('Error JSON is not a dictionary') + return None + if not 'version' in object: + print('Error JSON dictionary has no version') + return None + if object['version'] != 2: + print('Error JSON dictionary has wrong version') + return None + if not 'id' in object: + print('Error JSON dictionary has no id') + return None + if object['id'] != 'didierstevens.com': + print('Error JSON dictionary has wrong id') + return None + if not 'type' in object: + print('Error JSON dictionary has no type') + return None + if object['type'] != 'content': + print('Error JSON dictionary has wrong type') + return None + if not 'fields' in object: + print('Error JSON dictionary has no fields') + return None + if not 'name' in object['fields']: + print('Error JSON dictionary has no name field') + return None + if not 'content' in object['fields']: + print('Error JSON dictionary has no content field') + return None + if not 'items' in object: + print('Error JSON dictionary has no items') + return None + for item in object['items']: + item['content'] = binascii.a2b_base64(item['content']) + return object['items'] + +CUTTERM_NOTHING = 0 +CUTTERM_POSITION = 1 +CUTTERM_FIND = 2 +CUTTERM_LENGTH = 3 + +def Replace(string, dReplacements): + if string in dReplacements: + return dReplacements[string] + else: + return string + +def ParseInteger(argument): + sign = 1 + if argument.startswith('+'): + argument = argument[1:] + elif argument.startswith('-'): + argument = argument[1:] + sign = -1 + if argument.startswith('0x'): + return sign * int(argument[2:], 16) + else: + return sign * int(argument) + +def ParseCutTerm(argument): + if argument == '': + return CUTTERM_NOTHING, None, '' + oMatch = re.match(r'\-?0x([0-9a-f]+)', argument, re.I) + if oMatch == None: + oMatch = re.match(r'\-?(\d+)', argument) + else: + value = int(oMatch.group(1), 16) + if argument.startswith('-'): + value = -value + return CUTTERM_POSITION, value, argument[len(oMatch.group(0)):] + if oMatch == None: + oMatch = re.match(r'\[([0-9a-f]+)\](\d+)?([+-](?:0x[0-9a-f]+|\d+))?', argument, re.I) + else: + value = int(oMatch.group(1)) + if argument.startswith('-'): + value = -value + return CUTTERM_POSITION, value, argument[len(oMatch.group(0)):] + if oMatch == None: + oMatch = re.match(r"\[u?\'(.+?)\'\](\d+)?([+-](?:0x[0-9a-f]+|\d+))?", argument) + else: + if len(oMatch.group(1)) % 2 == 1: + raise Exception("Uneven length hexadecimal string") + else: + return CUTTERM_FIND, (binascii.a2b_hex(oMatch.group(1)), int(Replace(oMatch.group(2), {None: '1'})), ParseInteger(Replace(oMatch.group(3), {None: '0'}))), argument[len(oMatch.group(0)):] + if oMatch == None: + return None, None, argument + else: + if argument.startswith("[u'"): + # convert ascii to unicode 16 byte sequence + searchtext = oMatch.group(1).decode('unicode_escape').encode('utf16')[2:] + else: + searchtext = oMatch.group(1) + return CUTTERM_FIND, (searchtext, int(Replace(oMatch.group(2), {None: '1'})), ParseInteger(Replace(oMatch.group(3), {None: '0'}))), argument[len(oMatch.group(0)):] + +def ParseCutArgument(argument): + type, value, remainder = ParseCutTerm(argument.strip()) + if type == CUTTERM_NOTHING: + return CUTTERM_NOTHING, None, CUTTERM_NOTHING, None + elif type == None: + if remainder.startswith(':'): + typeLeft = CUTTERM_NOTHING + valueLeft = None + remainder = remainder[1:] + else: + return None, None, None, None + else: + typeLeft = type + valueLeft = value + if typeLeft == CUTTERM_POSITION and valueLeft < 0: + return None, None, None, None + if typeLeft == CUTTERM_FIND and valueLeft[1] == 0: + return None, None, None, None + if remainder.startswith(':'): + remainder = remainder[1:] + else: + return None, None, None, None + type, value, remainder = ParseCutTerm(remainder) + if type == CUTTERM_POSITION and remainder == 'l': + return typeLeft, valueLeft, CUTTERM_LENGTH, value + elif type == None or remainder != '': + return None, None, None, None + elif type == CUTTERM_FIND and value[1] == 0: + return None, None, None, None + else: + return typeLeft, valueLeft, type, value + +def Find(data, value, nth, startposition=-1): + position = startposition + while nth > 0: + position = data.find(value, position + 1) + if position == -1: + return -1 + nth -= 1 + return position + +def CutData(stream, cutArgument): + if cutArgument == '': + return [stream, None, None] + + typeLeft, valueLeft, typeRight, valueRight = ParseCutArgument(cutArgument) + + if typeLeft == None: + return [stream, None, None] + + if typeLeft == CUTTERM_NOTHING: + positionBegin = 0 + elif typeLeft == CUTTERM_POSITION: + positionBegin = valueLeft + elif typeLeft == CUTTERM_FIND: + positionBegin = Find(stream, valueLeft[0], valueLeft[1]) + if positionBegin == -1: + return ['', None, None] + positionBegin += valueLeft[2] + else: + raise Exception("Unknown value typeLeft") + + if typeRight == CUTTERM_NOTHING: + positionEnd = len(stream) + elif typeRight == CUTTERM_POSITION and valueRight < 0: + positionEnd = len(stream) + valueRight + elif typeRight == CUTTERM_POSITION: + positionEnd = valueRight + 1 + elif typeRight == CUTTERM_LENGTH: + positionEnd = positionBegin + valueRight + elif typeRight == CUTTERM_FIND: + positionEnd = Find(stream, valueRight[0], valueRight[1], positionBegin) + if positionEnd == -1: + return ['', None, None] + else: + positionEnd += len(valueRight[0]) + positionEnd += valueRight[2] + else: + raise Exception("Unknown value typeRight") + + return [stream[positionBegin:positionEnd], positionBegin, positionEnd] + +#-BEGINCODE cDump------------------------------------------------------------------------------------ +#import binascii +#import sys +#if sys.version_info[0] >= 3: +# from io import StringIO +#else: +# from cStringIO import StringIO + +class cDump(): + def __init__(self, data, prefix='', offset=0, dumplinelength=16): + self.data = data + self.prefix = prefix + self.offset = offset + self.dumplinelength = dumplinelength + + def HexDump(self): + oDumpStream = self.cDumpStream(self.prefix) + hexDump = '' + for i, b in enumerate(self.data): + if i % self.dumplinelength == 0 and hexDump != '': + oDumpStream.Addline(hexDump) + hexDump = '' + hexDump += IFF(hexDump == '', '', ' ') + '%02X' % self.C2IIP2(b) + oDumpStream.Addline(hexDump) + return oDumpStream.Content() + + def CombineHexAscii(self, hexDump, asciiDump): + if hexDump == '': + return '' + countSpaces = 3 * (self.dumplinelength - len(asciiDump)) + if len(asciiDump) <= self.dumplinelength / 2: + countSpaces += 1 + return hexDump + ' ' + (' ' * countSpaces) + asciiDump + + def HexAsciiDump(self, rle=False): + oDumpStream = self.cDumpStream(self.prefix) + position = '' + hexDump = '' + asciiDump = '' + previousLine = None + countRLE = 0 + for i, b in enumerate(self.data): + b = self.C2IIP2(b) + if i % self.dumplinelength == 0: + if hexDump != '': + line = self.CombineHexAscii(hexDump, asciiDump) + if not rle or line != previousLine: + if countRLE > 0: + oDumpStream.Addline('* %d 0x%02x' % (countRLE, countRLE * self.dumplinelength)) + oDumpStream.Addline(position + line) + countRLE = 0 + else: + countRLE += 1 + previousLine = line + position = '%08X:' % (i + self.offset) + hexDump = '' + asciiDump = '' + if i % self.dumplinelength == self.dumplinelength / 2: + hexDump += ' ' + hexDump += ' %02X' % b + asciiDump += IFF(b >= 32 and b < 127, chr(b), '.') + if countRLE > 0: + oDumpStream.Addline('* %d 0x%02x' % (countRLE, countRLE * self.dumplinelength)) + oDumpStream.Addline(self.CombineHexAscii(position + hexDump, asciiDump)) + return oDumpStream.Content() + + def Base64Dump(self, nowhitespace=False): + encoded = binascii.b2a_base64(self.data).decode().strip() + if nowhitespace: + return encoded + oDumpStream = self.cDumpStream(self.prefix) + length = 64 + for i in range(0, len(encoded), length): + oDumpStream.Addline(encoded[0+i:length+i]) + return oDumpStream.Content() + + class cDumpStream(): + def __init__(self, prefix=''): + self.oStringIO = StringIO() + self.prefix = prefix + + def Addline(self, line): + if line != '': + self.oStringIO.write(self.prefix + line + '\n') + + def Content(self): + return self.oStringIO.getvalue() + + @staticmethod + def C2IIP2(data): + if sys.version_info[0] > 2: + return data + else: + return ord(data) +#-ENDCODE cDump-------------------------------------------------------------------------------------- + +def IfWIN32SetBinary(io): + if sys.platform == 'win32': + import msvcrt + msvcrt.setmode(io.fileno(), os.O_BINARY) + +#Fix for http://bugs.python.org/issue11395 +def StdoutWriteChunked(data): + if sys.version_info[0] > 2: + if isinstance(data, str): + sys.stdout.write(data) + else: + sys.stdout.buffer.write(data) + else: + while data != '': + sys.stdout.write(data[0:10000]) + try: + sys.stdout.flush() + except IOError: + return + data = data[10000:] + +class cVariables(): + def __init__(self, variablesstring='', separator=DEFAULT_SEPARATOR): + self.dVariables = {} + if variablesstring == '': + return + for variable in variablesstring.split(separator): + name, value = VariableNameValue(variable) + self.dVariables[name] = value + + def SetVariable(self, name, value): + self.dVariables[name] = value + + def Instantiate(self, astring): + for key, value in self.dVariables.items(): + astring = astring.replace('%' + key + '%', value) + return astring + +class cOutput(): + def __init__(self, filenameOption=None, binary=False): + self.starttime = time.time() + self.filenameOption = filenameOption + self.separateFiles = False + self.progress = False + self.console = False + self.head = False + self.headCounter = 0 + self.tail = False + self.tailQueue = [] + self.fOut = None + self.oCsvWriter = None + self.rootFilenames = {} + self.binary = binary + if self.binary: + self.fileoptions = 'wb' + else: + self.fileoptions = 'w' + if self.filenameOption: + if self.ParseHash(self.filenameOption): + if not self.separateFiles and self.filename != '': + self.fOut = open(self.filename, self.fileoptions) + elif self.filenameOption != '': + self.fOut = open(self.filenameOption, self.fileoptions) + + def ParseHash(self, option): + if option.startswith('#'): + position = self.filenameOption.find('#', 1) + if position > 1: + switches = self.filenameOption[1:position] + self.filename = self.filenameOption[position + 1:] + for switch in switches: + if switch == 's': + self.separateFiles = True + elif switch == 'p': + self.progress = True + elif switch == 'c': + self.console = True + elif switch == 'l': + pass + elif switch == 'g': + if self.filename != '': + extra = self.filename + '-' + else: + extra = '' + self.filename = '%s-%s%s.txt' % (os.path.splitext(os.path.basename(sys.argv[0]))[0], extra, self.FormatTime()) + elif switch == 'h': + self.head = True + elif switch == 't': + self.tail = True + else: + return False + return True + return False + + @staticmethod + def FormatTime(epoch=None): + if epoch == None: + epoch = time.time() + return '%04d%02d%02d-%02d%02d%02d' % time.localtime(epoch)[0:6] + + def RootUnique(self, root): + if not root in self.rootFilenames: + self.rootFilenames[root] = None + return root + iter = 1 + while True: + newroot = '%s_%04d' % (root, iter) + if not newroot in self.rootFilenames: + self.rootFilenames[newroot] = None + return newroot + iter += 1 + + def LineSub(self, line, eol): + if self.fOut == None or self.console: + try: + print(line, end=eol) + except UnicodeEncodeError: + encoding = sys.stdout.encoding + print(line.encode(encoding, errors='backslashreplace').decode(encoding), end=eol) +# sys.stdout.flush() + if self.fOut != None: + self.fOut.write(line + '\n') + self.fOut.flush() + + def Line(self, line, eol='\n'): + if self.head: + if self.headCounter < 10: + self.LineSub(line, eol) + elif self.tail: + self.tailQueue = self.tailQueue[-9:] + [[line, eol]] + self.headCounter += 1 + elif self.tail: + self.tailQueue = self.tailQueue[-9:] + [[line, eol]] + else: + self.LineSub(line, eol) + + def LineTimestamped(self, line): + self.Line('%s: %s' % (self.FormatTime(), line)) + + def WriteBinary(self, data): + if self.fOut != None: + self.fOut.write(data) + self.fOut.flush() + else: + IfWIN32SetBinary(sys.stdout) + StdoutWriteChunked(data) + + def CSVWriteRow(self, row): + if self.oCsvWriter == None: + self.StringIOCSV = StringIO() +# self.oCsvWriter = csv.writer(self.fOut) + self.oCsvWriter = csv.writer(self.StringIOCSV) + self.oCsvWriter.writerow(row) + self.Line(self.StringIOCSV.getvalue(), '') + self.StringIOCSV.truncate(0) + self.StringIOCSV.seek(0) + + def Filename(self, filename, index, total): + self.separateFilename = filename + if self.progress: + if index == 0: + eta = '' + else: + seconds = int(float((time.time() - self.starttime) / float(index)) * float(total - index)) + eta = 'estimation %d seconds left, finished %s ' % (seconds, self.FormatTime(time.time() + seconds)) + PrintError('%d/%d %s%s' % (index + 1, total, eta, self.separateFilename)) + if self.separateFiles and self.filename != '': + oFilenameVariables = cVariables() + oFilenameVariables.SetVariable('f', self.separateFilename) + basename = os.path.basename(self.separateFilename) + oFilenameVariables.SetVariable('b', basename) + oFilenameVariables.SetVariable('d', os.path.dirname(self.separateFilename)) + root, extension = os.path.splitext(basename) + oFilenameVariables.SetVariable('r', root) + oFilenameVariables.SetVariable('ru', self.RootUnique(root)) + oFilenameVariables.SetVariable('e', extension) + + self.Close() + self.fOut = open(oFilenameVariables.Instantiate(self.filename), self.fileoptions) + + def Close(self): + if self.head and self.tail and len(self.tailQueue) > 0: + self.LineSub('...', '\n') + + for line, eol in self.tailQueue: + self.LineSub(line, eol) + + self.headCounter = 0 + self.tailQueue = [] + + if self.fOut != None: + self.fOut.close() + self.fOut = None + +def ToString(value): + if isinstance(value, str): + return value + else: + return str(value) + +def Quote(value, separator, quote): + value = ToString(value) + if len(value) > 1 and value[0] == quote and value[-1] == quote: + return value + if separator in value or value == '': + return quote + value + quote + else: + return value + +def MakeCSVLine(row, separator, quote): + return separator.join([Quote(value, separator, quote) for value in row]) + +class cLogfile(): + def __init__(self, keyword, comment): + self.starttime = time.time() + self.errors = 0 + if keyword == '': + self.oOutput = None + else: + self.oOutput = cOutput('%s-%s-%s.log' % (os.path.splitext(os.path.basename(sys.argv[0]))[0], keyword, self.FormatTime())) + self.Line('Start') + self.Line('UTC', '%04d%02d%02d-%02d%02d%02d' % time.gmtime(time.time())[0:6]) + self.Line('Comment', comment) + self.Line('Args', repr(sys.argv)) + self.Line('Version', __version__) + self.Line('Python', repr(sys.version_info)) + self.Line('Platform', sys.platform) + self.Line('CWD', repr(os.getcwd())) + + @staticmethod + def FormatTime(epoch=None): + if epoch == None: + epoch = time.time() + return '%04d%02d%02d-%02d%02d%02d' % time.localtime(epoch)[0:6] + + def Line(self, *line): + if self.oOutput != None: + self.oOutput.Line(MakeCSVLine((self.FormatTime(), ) + line, DEFAULT_SEPARATOR, QUOTE)) + + def LineError(self, *line): + self.Line('Error', *line) + self.errors += 1 + + def Close(self): + if self.oOutput != None: + self.Line('Finish', '%d error(s)' % self.errors, '%d second(s)' % (time.time() - self.starttime)) + self.oOutput.Close() + +def CalculateByteStatistics(dPrevalence=None, data=None): + averageConsecutiveByteDifference = None + if dPrevalence == None: + dPrevalence = {iter: 0 for iter in range(0x100)} + sumDifferences = 0.0 + previous = None + if len(data) > 1: + for byte in data: + byte = C2IIP2(byte) + dPrevalence[byte] += 1 + if previous != None: + sumDifferences += abs(byte - previous) + previous = byte + averageConsecutiveByteDifference = sumDifferences /float(len(data)-1) + sumValues = sum(dPrevalence.values()) + countNullByte = dPrevalence[0] + countControlBytes = 0 + countWhitespaceBytes = 0 + countUniqueBytes = 0 + for iter in range(1, 0x21): + if chr(iter) in string.whitespace: + countWhitespaceBytes += dPrevalence[iter] + else: + countControlBytes += dPrevalence[iter] + countControlBytes += dPrevalence[0x7F] + countPrintableBytes = 0 + for iter in range(0x21, 0x7F): + countPrintableBytes += dPrevalence[iter] + countHighBytes = 0 + for iter in range(0x80, 0x100): + countHighBytes += dPrevalence[iter] + countHexadecimalBytes = 0 + countBASE64Bytes = 0 + for iter in range(0x30, 0x3A): + countHexadecimalBytes += dPrevalence[iter] + countBASE64Bytes += dPrevalence[iter] + for iter in range(0x41, 0x47): + countHexadecimalBytes += dPrevalence[iter] + for iter in range(0x61, 0x67): + countHexadecimalBytes += dPrevalence[iter] + for iter in range(0x41, 0x5B): + countBASE64Bytes += dPrevalence[iter] + for iter in range(0x61, 0x7B): + countBASE64Bytes += dPrevalence[iter] + countBASE64Bytes += dPrevalence[ord('+')] + dPrevalence[ord('/')] + dPrevalence[ord('=')] + entropy = 0.0 + for iter in range(0x100): + if dPrevalence[iter] > 0: + prevalence = float(dPrevalence[iter]) / float(sumValues) + entropy += - prevalence * math.log(prevalence, 2) + countUniqueBytes += 1 + return sumValues, entropy, countUniqueBytes, countNullByte, countControlBytes, countWhitespaceBytes, countPrintableBytes, countHighBytes, countHexadecimalBytes, countBASE64Bytes, averageConsecutiveByteDifference + +def Unpack(format, data): + size = struct.calcsize(format) + result = list(struct.unpack(format, data[:size])) + result.append(data[size:]) + return result + +def InstantiateCOutput(options): + filenameOption = None + if options.output != '': + filenameOption = options.output + return cOutput(filenameOption, binary=options.dump) + +def WhitespaceOnly(data): + for byte in data: + if not byte in [9, 10, 11, 12, 13, 32]: + return False + return True + +def PDFIncrementalUpdatesSub(data, oOutput, options): + accumulate = DataIO(b'') + token = b'' + data += b'\x00' + dCounters = {} + versions = [] + for iter in range(len(data)): + byte = data[iter:iter+1] + if token == b'': + if byte == b'%' or byte == b'/' or byte >= b'a' and byte <= b'z' or byte >= b'A' and byte <= b'Z': + token += byte + else: + accumulate.write(byte) + elif token[0] == ord(b'%') and byte == b'%': + token += byte + elif byte >= b'a' and byte <= b'z' or byte >= b'A' and byte <= b'Z': + token += byte + elif token == b'%%EOF' and byte in [b'\x0a', b'\x0d']: + token += byte + elif token == b'%%EOF\x0d' and byte == b'\x0a': + token += byte + else: + accumulate.write(token) + if token[:5] == b'%%EOF': + dCounters[b'%%EOF'] = 1 + if len(versions) == 0: + offset = 0 + else: + offset = len(versions[-1][1]) + versions.append([dCounters, accumulate.getvalue(), accumulate.getvalue()[offset:]]) + dCounters = {} + else: + if token in [b'obj', b'endobj', b'/Linearized']: + dCounters[token] = dCounters.get(token, 0) + 1 + token = b'' + accumulate.write(byte) + data = data[:-1] + accumulate = accumulate.getvalue()[:-1] + if len(versions[-1][1]) != len(accumulate): + offset = len(versions[-1][1]) + versions.append([dCounters, accumulate, accumulate[offset:]]) + newVersions = [] + for index, version in enumerate(versions): + index += 1 + if b'obj' in version[0] and b'endobj' in version[0] and version[0][b'obj'] == version[0][b'endobj']: + info = 'objects= %d' % version[0][b'obj'] + if b'/Linearized' in version[0]: + info += ' (Linearized)' + else: + info = repr(version[0]) + if version[0] == {}: + info = 'no objects' + if WhitespaceOnly(version[2]): + info = 'whitespace' + newVersions.append([index, info] + version) + return newVersions + +def PDFIncrementalUpdates(data, oOutput, options): + newVersions = PDFIncrementalUpdatesSub(data, oOutput, options) + if options.select == '': + for version in newVersions: + oOutput.Line('%d: %s length= %d difference= %d MD5= %s' % (version[0], version[1], len(version[3]), len(version[4]), hashlib.md5(version[3]).hexdigest())) + else: + if options.select.endswith('d'): + indexData = 4 + index = int(options.select[:-1]) + else: + indexData = 3 + index = int(options.select) + for version in newVersions: + if index == version[0]: + if options.dump: + oOutput.WriteBinary(version[indexData]) + elif options.hexdump: + oOutput.Line(cDump(version[indexData]).HexDump()) + elif options.asciidumprle: + oOutput.Line(cDump(version[indexData]).HexAsciiDump(True)) + else: + oOutput.Line(cDump(version[indexData]).HexAsciiDump()) + +def ProcessBinaryFile(command, filename, content, cutexpression, flag, oOutput, oLogfile, options, oParserFlag): + if content == None: + try: + oBinaryFile = cBinaryFile(filename, C2BIP3(options.password), options.noextraction, options.literalfilenames) + except: + oLogfile.LineError('Opening file %s %s' % (filename, repr(sys.exc_info()[1]))) + return + oLogfile.Line('Success', 'Opening file %s' % filename) + try: + data = oBinaryFile.read() + except: + oLogfile.LineError('Reading file %s %s' % (filename, repr(sys.exc_info()[1]))) + return + data = CutData(data, cutexpression)[0] + oBinaryFile.close() + else: + data = content + + (flagoptions, flagargs) = oParserFlag.parse_args(flag.split(' ')) + + try: + # ----- Put your data processing code here ----- + if options.select == '': + oOutput.Line('File: %s%s' % (filename, IFF(oBinaryFile.extracted, ' (extracted)', ''))) + PDFIncrementalUpdates(data, oOutput, options) + # ---------------------------------------------- + except: + oLogfile.LineError('Processing file %s %s' % (filename, repr(sys.exc_info()[1]))) + if not options.ignoreprocessingerrors: + raise + +# data = CutData(cBinaryFile(filename, C2BIP3(options.password), options.noextraction, options.literalfilenames).Data(), cutexpression)[0] + +def ProcessBinaryFiles(command, filenames, oLogfile, options, oParserFlag): + oOutput = InstantiateCOutput(options) + index = 0 + if options.jsoninput: + items = CheckJSON(sys.stdin.read()) + if items == None: + return + for item in items: + oOutput.Filename(item['name'], index, len(items)) + index += 1 + ProcessBinaryFile(command, item['name'], item['content'], '', '', oOutput, oLogfile, options, oParserFlag) + else: + for filename, cutexpression, flag in filenames: + oOutput.Filename(filename, index, len(filenames)) + index += 1 + ProcessBinaryFile(command, filename, None, cutexpression, flag, oOutput, oLogfile, options, oParserFlag) + +def Main(): + moredesc = ''' + +Source code put in the public domain by Didier Stevens, no Copyright +Use at your own risk +https://DidierStevens.com''' + + oParserFlag = optparse.OptionParser(usage='\nFlag arguments start with #f#:') + oParserFlag.add_option('-l', '--length', action='store_true', default=False, help='Print length of files') + + oParser = optparse.OptionParser(usage='usage: %prog [options] command [[@]file|cut-expression|flag-expression ...]\n' + __description__ + moredesc, version='%prog ' + __version__, epilog='This tool also accepts flag arguments (#f#), read the man page (-m) for more info.') + oParser.add_option('-m', '--man', action='store_true', default=False, help='Print manual') + oParser.add_option('-o', '--output', type=str, default='', help='Output to file (# supported)') + oParser.add_option('-s', '--select', default='', help='select item nr for dumping (a for all)') + oParser.add_option('-d', '--dump', action='store_true', default=False, help='perform dump') + oParser.add_option('-x', '--hexdump', action='store_true', default=False, help='perform hex dump') + oParser.add_option('-a', '--asciidump', action='store_true', default=False, help='perform ascii dump') + oParser.add_option('-A', '--asciidumprle', action='store_true', default=False, help='perform ascii dump with RLE') + oParser.add_option('-p', '--password', default='infected', help='The ZIP password to be used (default infected)') + oParser.add_option('-n', '--noextraction', action='store_true', default=False, help='Do not extract from archive file') + oParser.add_option('-l', '--literalfilenames', action='store_true', default=False, help='Do not interpret filenames') + oParser.add_option('-r', '--recursedir', action='store_true', default=False, help='Recurse directories (wildcards and here files (@...) allowed)') + oParser.add_option('--checkfilenames', action='store_true', default=False, help='Perform check if files exist prior to file processing') + oParser.add_option('-j', '--jsoninput', action='store_true', default=False, help='Consume JSON from stdin') + oParser.add_option('--logfile', type=str, default='', help='Create logfile with given keyword') + oParser.add_option('--logcomment', type=str, default='', help='A string with comments to be included in the log file') + oParser.add_option('--ignoreprocessingerrors', action='store_true', default=False, help='Ignore errors during file processing') + (options, args) = oParser.parse_args() + + if options.man: + oParser.print_help() + oParserFlag.print_help() + PrintManual() + return + + if len(args) == 0: + oParser.print_help() + return + + command = args[0] + args = args[1:] + + commands = ['iu'] + + if not command in commands: + print('Error: unknown command: %s' % command) + print('Available commands: %s' % ' '.join(commands)) + oParser.print_help() + return + + if len(args) != 0 and options.jsoninput: + print('Error: option -j can not be used with files') + return + + oLogfile = cLogfile(options.logfile, options.logcomment) + oExpandFilenameArguments = cExpandFilenameArguments(args, options.literalfilenames, options.recursedir, options.checkfilenames, '#c#', '#f#') + oLogfile.Line('FilesCount', str(len(oExpandFilenameArguments.Filenames()))) + oLogfile.Line('Files', repr(oExpandFilenameArguments.Filenames())) + if oExpandFilenameArguments.warning: + PrintError('\nWarning:') + PrintError(oExpandFilenameArguments.message) + oLogfile.Line('Warning', repr(oExpandFilenameArguments.message)) + + ProcessBinaryFiles(command, oExpandFilenameArguments.Filenames(), oLogfile, options, oParserFlag) + + if oLogfile.errors > 0: + PrintError('Number of errors: %d' % oLogfile.errors) + oLogfile.Close() + +if __name__ == '__main__': + Main() diff --git a/sift/files/pdf-tools/plugin_triage.py b/sift/files/pdf-tools/plugin_triage.py index 8245c6f5..b619bae5 100644 --- a/sift/files/pdf-tools/plugin_triage.py +++ b/sift/files/pdf-tools/plugin_triage.py @@ -1,22 +1,51 @@ #!/usr/bin/env python #2014/09/30 +#2015/08/12 added options; changed scoring: /ObjStm 0.75; obj/endobj or stream/endstream discrepancy: 0.50 +#2015/08/13 added instructions +#2017/10/29 added /URI class cPDFiDTriage(cPluginParent): -# onlyValidPDF = True + onlyValidPDF = False name = 'Triage plugin' - def __init__(self, oPDFiD): + def __init__(self, oPDFiD, options): + self.options = options self.oPDFiD = oPDFiD def Score(self): - for keyword in ('/ObjStm', '/JS', '/JavaScript', '/AA', '/OpenAction', '/AcroForm', '/JBIG2Decode', '/RichMedia', '/Launch', '/EmbeddedFile', '/XFA', '/Colors > 2^24'): + for keyword in ('/JS', '/JavaScript', '/AA', '/OpenAction', '/AcroForm', '/JBIG2Decode', '/RichMedia', '/Launch', '/EmbeddedFile', '/XFA', '/Colors > 2^24'): if keyword in self.oPDFiD.keywords and self.oPDFiD.keywords[keyword].count > 0: return 1.0 - if self.oPDFiD.keywords['obj'].count != self.oPDFiD.keywords['endobj'].count: - return 1.0 - if self.oPDFiD.keywords['stream'].count != self.oPDFiD.keywords['endstream'].count: - return 1.0 + if self.options != '--io': + for keyword in ('/ObjStm', ): + if keyword in self.oPDFiD.keywords and self.oPDFiD.keywords[keyword].count > 0: + return 0.75 + for keyword in ('/URI', ): + if keyword in self.oPDFiD.keywords and self.oPDFiD.keywords[keyword].count > 0: + return 0.6 + if self.oPDFiD.keywords['obj'].count != self.oPDFiD.keywords['endobj'].count: + return 0.5 + if self.oPDFiD.keywords['stream'].count != self.oPDFiD.keywords['endstream'].count: + return 0.5 return 0.0 + def Instructions(self, score): + if score == 1.0: + return 'Sample is likely malicious and requires further analysis' + + if score == 0.75: + return '/ObjStm detected, analyze sample with pdfid-objstm.bat' + + if score == 0.5: + return 'Sample is likely not malicious but requires further analysis' + + if score == 0.6: + return 'Sample is likely not malicious but could contain phishing or payload URL' + + if score == 0.0: + return 'Sample is likely not malicious, unless you suspect this is used in a targeted/sophisticated attack' + + return '' + AddPlugin(cPDFiDTriage) diff --git a/sift/files/volatility/__init__.py b/sift/files/volatility/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/sift/files/volatility/pstotal.py b/sift/files/volatility/pstotal.py deleted file mode 100644 index 18fde0bc..00000000 --- a/sift/files/volatility/pstotal.py +++ /dev/null @@ -1,227 +0,0 @@ -# pstotal -# Copyright (C) 2014 Sue Stirrup -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or (at -# your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -''' -Rewrite of and enhancements to the SANS(tm) Institute's text based pstotal plugin -based on Jesse Kornblum's original for Volatility 2.0. - -@author: Sue Stirrup -@license: GNU General Public License 2.0 or later -@contact: info@sans.org -@organization: The SANS(tm) Institute -Amendments + enhancements - * Default behaviour to display complete list of processes (process scan) - - Interesting column added to show processes hidden from pslist. - * Command line option to display only processes hidden from process list (original behaviour) - * Graphical visualisation option using Graphviz and .dot format via the command line added: - - Command line option to display process command (graphical representation only) - - Command line option to display process path name (graphical representation only) - - Processes from prior boot rendered in light blue (with exit time before current boot or not available - - Processes from prior boot rendered in medium blue (with exit time after current boot - - Processes from current boot but hidden from pslist rendered in red - - Suspected pid reuse rendered in yellow -''' - -import volatility.plugins.filescan as filescan -import volatility.plugins.common as common -import volatility.utils as utils -import volatility.obj as obj -import volatility.win32.tasks as tasks -import pdb -import re - -class pstotal(common.AbstractWindowsCommand): - ''' Combination of pslist,psscan & pstree --output=dot gives graphical representation ''' - - def __init__(self, config,*args, **kwargs): - common.AbstractWindowsCommand.__init__(self, config, *args, **kwargs) - config.add_option('SHORT', short_option = 'S', default = False, help = 'Interesting processes only', action = 'store_true') - config.add_option('CMD', short_option = 'c', default = False, help = 'Display process command line. All {} removed', action = 'store_true') - config.add_option('PATH', short_option = 'P', default = False, help = 'Display process image path', action = 'store_true') - - def render_text(self, outfd, data): - processes = data[0] - interest = data[1] - outfd.write("Offset (P) Name PID PPID PDB Time created Time exited Interesting \n" + \ - "----------- ---------------- ------ ------ ----------- ---------------------------- ---------------------------- ----------- \n") - for eprocess in processes: - if interest[processes[eprocess].obj_offset] == 1: - interesting = 'TRUE' - else: - interesting = ' ' - outfd.write("0x{0:09x} {1:16} {2:6} {3:6} 0x{4:09x} {5:28} {6:28} {7:7}\n".format( - processes[eprocess].obj_offset, - processes[eprocess].ImageFileName, - processes[eprocess].UniqueProcessId, - processes[eprocess].InheritedFromUniqueProcessId, - processes[eprocess].Pcb.DirectoryTableBase, - processes[eprocess].CreateTime or '', - processes[eprocess].ExitTime or '', interesting)) - - def render_dot(self, outfd, data): - objects = set() - links = set() - proc_seen = set() - procs_to_check = set() - proc_times = {} - processes = data[0] - filling = data[1] - cmdline = data[2] - pathname = data[3] - smssTime = ' ' - - # Obtain boot time - for proc in processes: - proc_name = processes[proc].ImageFileName - ppid = processes[proc].InheritedFromUniqueProcessId - processp = "%s" % (processes[proc].UniqueProcessId) - proc_times[processp] = processes[proc].CreateTime - if proc_name.find("System") == 0 and processes[proc].CreateTime: - smssTime = processes[proc].CreateTime - elif proc_name.find("smss.exe") == 0 and ppid == 4: - smssTime = processes[proc].CreateTime - - for eprocess in processes: - proc_offset = processes[eprocess].obj_offset - parentp = "%s" % (processes.get(eprocess).InheritedFromUniqueProcessId) - label = "{0} | offset (P)\\n0x{1:08x} | {2} | ".format(processes[eprocess].UniqueProcessId, - proc_offset, - processes[eprocess].ImageFileName) - # Display process command line option - if self._config.CMD : - try: - if not processes[eprocess].CreateTime < smssTime: - s = "%s" % (cmdline[proc_offset]) - s = s.replace('"', '') - s = s.replace('\\', '\\\\') - pos = s.find("csrss.exe") - if pos > 0: - pos = pos + 9 - s = s[:pos] + "\\n (Run pstree to get command parameters)" - pos = s.find("conhost.exe") - if pos > 0: - pos = pos + 11 - s = s[:pos] + "\\n (Run pstree to get command parameters)" - label += "command:\\n{0} | ".format(s or 'not available') - label = label.replace('{', '') - label = label.replace('}', '') - except KeyError: - pass - # Display process path option - if self._config.PATH : - try: - if not processes[eprocess].CreateTime < smssTime: - s = "%s" % (pathname[proc_offset]) - s = s.replace('"', '') - s = s.replace('\\', '\\\\') - pos = s.find("csrss.exe") - if pos > 0: - pos = pos + 9 - s = s[:pos] + "\\n (Run pstree to get command parameters)" - pos = s.find("conhost.exe") - if pos > 0: - pos = pos + 11 - s = s[:pos] + "\\n (Run pstree to get command parameters)" - label += "path:\\n{0} | ".format(s or 'not available') - except KeyError: - pass - label += "created:\\n{0} |".format(processes[eprocess].CreateTime or 'not available') - # Identify processes that have exited - if processes[eprocess].ExitTime: - label += "exited:\\n{0}".format(processes[eprocess].ExitTime) - options = ' style = "filled" fillcolor = "lightgray" ' - else: - label += "running" - options = '' - # Identify processes that are 'hidden' and relate to the current boot - if filling[proc_offset] == 1: - options = ' style = "filled" fillcolor = "red" ' - # Identify processes that are 'hidden' and relate to the previous boot - if processes[eprocess].CreateTime < smssTime and processes[eprocess].UniqueProcessId != 4: - options = ' style = "filled" fillcolor = "lightblue" ' - if not processes[eprocess].ExitTime: - label = label[:-7] - label += "not available\\nprior boot" - # Exit time is after current boot time - elif processes[eprocess].ExitTime > smssTime: - options = ' style = "filled" fillcolor = "darkblue" ' - label = "{" + label + "}" - # Sometimes windows creates duplicate process blocks - one in the doubly linked list and one scraped. We need to see both - pid = "%s" % (processes[eprocess].UniqueProcessId) - - if pid in proc_seen: - objects.add('pid{0}a [label="{1}" shape="record" {2}];\n'.format(processes[eprocess].UniqueProcessId, - label, options)) - links.add("pid{0} -> pid{1}a [];\n".format(processes[eprocess].InheritedFromUniqueProcessId, - processes[eprocess].UniqueProcessId)) - else: - proc_seen.add(pid) - if parentp in proc_times and (processes.get(eprocess).CreateTime < proc_times[parentp]): - links.add("pid{0}r -> pid{1} [];\n".format(processes[eprocess].InheritedFromUniqueProcessId, processes[eprocess].UniqueProcessId)) - parent = "%sr" % processes[eprocess].InheritedFromUniqueProcessId - if not parent in proc_seen: - proc_seen.add(parent) - objects.add('pid{0} [label="pid{1}" shape="oval" style = "filled" fillcolor = "yellow" ];\n'.format(parent, parentp)) - else: - links.add("pid{0} -> pid{1} [];\n".format(processes[eprocess].InheritedFromUniqueProcessId, - processes[eprocess].UniqueProcessId)) - objects.add('pid{0} [label="{1}" shape="record" {2}];\n'.format(processes[eprocess].UniqueProcessId, - label, options)) - - ## Now write the dot file - outfd.write("digraph processtree { \ngraph [rankdir = \"TB\"];\n") - for link in links: - outfd.write(link) - - for item in objects: - outfd.write(item) - outfd.write("}") - - def calculate(self): - eproc = {} - found = {} - cmdline = {} - pathname = {} - - # Brute force search for eproc blocks in pool memory - address_space = utils.load_as(self._config) - for eprocess in filescan.PSScan(self._config).calculate(): - eproc[eprocess.obj_offset] = eprocess - found[eprocess.obj_offset] = 1 - - # Walking the active process list. - # Remove any tasks we find here from the brute force search if the --short option is set. - # Anything left is something which was hidden/terminated/of interest. - address_space = utils.load_as(self._config) - for task in tasks.pslist(address_space): - phys = address_space.vtop(task.obj_offset) - if phys in eproc: - if self._config.SHORT : - del eproc[phys] - del found[phys] - else: - found[phys] = 0 - - # Grab command line and parameters - peb = task.Peb - if peb: - cmdline[phys] = peb.ProcessParameters.CommandLine - pathname[phys] = peb.ProcessParameters.ImagePathName - - ret = [eproc, found, cmdline, pathname] - - return ret diff --git a/sift/files/volatility/sqlite_help.py b/sift/files/volatility/sqlite_help.py deleted file mode 100755 index 23238ce5..00000000 --- a/sift/files/volatility/sqlite_help.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright (C) 2014 Dave Lassalle (@superponible) -# Donated under Volatility Foundation, Inc. Individual Contributor Licensing Agreement -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or (at -# your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# - -""" -@author: Dave Lassalle (@superponible) -@license: GNU General Public License 2.0 or later -@contact: dave@superponible.com -""" -# Helper functions for working with a SQLite database - -import struct -import datetime -import math - -FORWARD = 1 -BACKWARD = -1 - -def unix_time(dt): - epoch = datetime.datetime.utcfromtimestamp(0) - delta = dt - epoch - return int(delta.total_seconds()) - -def get_wintime_from_msec(msec): - """ Convert windows epoch time in microseconds to a date string """ - seconds, msec= divmod(msec, 1000000) - days, seconds = divmod(seconds, 86400) - if days > 160000 or days < 140000: - days = 0 - seconds = 0 - msec = 0 - return datetime.datetime(1601, 1, 1) + datetime.timedelta(days, seconds, msec) - -def get_nixtime_from_sec(sec): - """ Convert unix epoch time in seconds to a date string """ - return get_nixtime_from_msec(sec*1000000) - -def get_nixtime_from_msec(msec): - """ Convert unix epoch time in microseconds to a date string """ - seconds, msec= divmod(msec, 1000000) - days, seconds = divmod(seconds, 86400) - if days > 20000 or days < 9000: - days = 0 - seconds = 0 - msec = 0 - return datetime.datetime(1970, 1, 1) + datetime.timedelta(days, seconds, msec) - -def varint_type_to_length(varint): - """ Return the number of bytes used by a varint type """ - if varint == 5: - return (6, "") - elif varint == 6 or varint == 7: - return (8, "") - elif varint == 8: - return (0,0) - elif varint == 9: - return (0,1) - else: - return (varint, "") - -def ones_comp(bin_str): - """ Return the ones complement of a string of 0s and 1s """ - output = "" - for i in bin_str: - if i == '0': - output += '1' - if i == '1': - output += '0' - return output - -def find_varint(buff, start, direct): - """ varint are 1-9 bytes, big-endian. The most sig bit is not used, which is why 128 is subtracted - in the for loops below. - See: http://www.evolane.com/support/manuals/shared/manuals/tcltk/sqlite/fileformat.html#varint_format""" - buff_len = len(buff) - varint_len = 1 - varint_buff = "" - begin = 0 - # at start index and going backwards, so only 1 byte available - if direct == BACKWARD and start == 0: - begin = 0 - # going backwards - elif direct == BACKWARD: - # set stopping point, lowest possible is start of the buffer - if start >= 9: - stop = start - 9 - else: - stop = 0 - for i in range(start, stop, direct): - if ord(buff[i-1]) < 128: - break - if i > stop + 1: - varint_len += 1 - begin = start - varint_len + 1 - # going forwards - else: - # set a stopping point, maximum length of 9 bytes - if start + 9 > buff_len: - stop = buff_len - else: - stop = start + 9 - begin = start - for i in range(start, stop, direct): - if ord(buff[i]) < 128: - break - if i < stop-1: - varint_len += 1 - # num_buff contains the varint that was extracted - num_buff = buff[begin:begin+varint_len] - - if num_buff == "": - return (-1, 0) - return (varint_to_int(num_buff), varint_len) - -def varint_to_int(buff): - """ convert a varint to an integer """ - - bin_str = "" - varint_len = len(buff) - # convert each byte to a binary string, keeping 7 bytes, unless the buffer is 9 bytes and - # and we are grabbing the last byte, then keep all 8 - for i in range(0,varint_len): - if i == 8 and varint_len == 9: - bin_str += bin(ord(buff[i]))[2:].zfill(8) - else: - bin_str += bin(ord(buff[i]))[2:].zfill(8)[1:] - - if len(bin_str) == 64 and bin_str[0] == '1': - # negative numbers use all 64 bits and will start with a 1. - # take the ones complement, add 1, then put a negative sign in front - sub_bin_str = ones_comp(bin_str) - value = -(int(sub_bin_str, 2) + 1) - else: - value = int(bin_str, 2) - - return value - -def varint_to_blob_length(l): - """ Blob field lengths are doubled and 12 is added so that they are even and at least 12 """ - if l == 0: - return 0 - else: - return (l - 12) / 2 - -def varint_to_text_length(l): - """ Text field lengths are doubled and 13 is added so that they are odd and at least 13 """ - if l == 0: - return 0 - else: - return (l - 13) / 2 - -def sql_unpack(buff): - """ Convert SQL integer bytes into decimal integer """ - size = len(buff) - value = "" - if size == 1: - value = struct.unpack(">b", buff)[0] - elif size == 2: - value = struct.unpack(">h", buff)[0] - elif size == 3: - tmp = "\x00" + buff - value = struct.unpack(">l", tmp)[0] - elif size == 4: - value = struct.unpack(">l", buff)[0] - elif size == 6: - tmp = "\x00\x00" + buff - value = struct.unpack(">q", tmp)[0] - elif size == 8: - value = struct.unpack(">q", buff)[0] - return value - diff --git a/sift/include-server.sls b/sift/include-server.sls index 8feb251d..adc224a3 100644 --- a/sift/include-server.sls +++ b/sift/include-server.sls @@ -1,7 +1,6 @@ include: - sift.repos - sift.packages - - sift.python-packages - sift.python3-packages - sift.perl-packages - sift.scripts @@ -12,7 +11,6 @@ sift-server-include: - require: - sls: sift.repos - sls: sift.packages - - sls: sift.python-packages - sls: sift.python3-packages - sls: sift.perl-packages - sls: sift.scripts diff --git a/sift/packages/aws-cli.sls b/sift/packages/aws-cli.sls index 46b3fa3e..79ac94bf 100644 --- a/sift/packages/aws-cli.sls +++ b/sift/packages/aws-cli.sls @@ -4,22 +4,23 @@ # Category: # Author: Amazon # License: Apache License v2.0 (https://github.com/aws/aws-cli/blob/develop/LICENSE.txt) -# Version: 2.15.21 +# Version: 2.15.24 # Notes: -{% set hash = '77c6c3d1ac23638b07a02587ed6b1d4adc84c561c9ecc7df1ff3778e43a4b49b' %} +{% set hash = 'c939005170508b19f53730ead3e15b959ec2d9d078a44668e43275864d73ef2f' %} +{% set version = '2.15.24' %} aws-cli-download: file.managed: - - name: /tmp/awscli-exe-linux-x86_64.zip - - source: https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip + - name: /tmp/awscli-exe-linux-x86_64-{{ version }}.zip + - source: https://awscli.amazonaws.com/awscli-exe-linux-x86_64-{{ version }}.zip - source_hash: sha256={{ hash }} - makedirs: True aws-cli-extract: archive.extracted: - name: /tmp/ - - source: /tmp/awscli-exe-linux-x86_64.zip + - source: /tmp/awscli-exe-linux-x86_64-{{ version }}.zip - enforce_toplevel: False - overwrite: True - trim_output: True @@ -37,7 +38,7 @@ aws-cli-install: aws-cli-cleanup: file.absent: - names: - - /tmp/awscli-exe-linux-x86_64.zip + - /tmp/awscli-exe-linux-x86_64-{{ version }}.zip - /tmp/aws/ - require: - cmd: aws-cli-install diff --git a/sift/packages/init.sls b/sift/packages/init.sls index bb451a40..f72df38e 100644 --- a/sift/packages/init.sls +++ b/sift/packages/init.sls @@ -6,6 +6,7 @@ include: - sift.packages.apache2 - sift.packages.arp-scan - sift.packages.autopsy + - sift.packages.aws-cli - sift.packages.avfs - sift.packages.bless - sift.packages.blt @@ -211,6 +212,7 @@ sift-packages: - sls: sift.packages.apache2 - sls: sift.packages.arp-scan - sls: sift.packages.autopsy + - sls: sift.packages.aws-cli - sls: sift.packages.avfs - sls: sift.packages.bless - sls: sift.packages.blt diff --git a/sift/packages/powershell.sls b/sift/packages/powershell.sls index 089f41c2..cd1d01f5 100644 --- a/sift/packages/powershell.sls +++ b/sift/packages/powershell.sls @@ -1,5 +1,5 @@ {# renovate: datasource=github-release-attachments depName=Powershell/Powershell #} -{%- set version = "7.4.1" -%} +{%- set version = "7.4.6" -%} {%- set hash = "625B7EE0B71147421723CB6022A41B5D8FC0D6E19DF25B1240008EE491BF6997" -%} {%- set filename = "powershell_" ~ version ~ "-1.deb_amd64.deb" -%} {%- set base_url = "https://github.com/Powershell/Powershell/releases/download/v" -%} diff --git a/sift/packages/python-wxgtk3.sls b/sift/packages/python-wxgtk3.sls deleted file mode 100644 index bae4bd18..00000000 --- a/sift/packages/python-wxgtk3.sls +++ /dev/null @@ -1,8 +0,0 @@ -include: - - sift.packages.python2 - -sift-package-python-wxgtk3: - pkg.installed: - - name: python-wxgtk3.0 - - require: - - sls: sift.packages.python2 diff --git a/sift/packages/python2-dev.sls b/sift/packages/python2-dev.sls deleted file mode 100644 index 8fbd8c57..00000000 --- a/sift/packages/python2-dev.sls +++ /dev/null @@ -1,3 +0,0 @@ -sift-packages-python2-dev: - pkg.installed: - - name: python2-dev diff --git a/sift/packages/python2-pip.sls b/sift/packages/python2-pip.sls deleted file mode 100644 index c4c4118b..00000000 --- a/sift/packages/python2-pip.sls +++ /dev/null @@ -1,18 +0,0 @@ -include: - - sift.packages.python2 - - sift.packages.curl - -sift-package-python2-pip-install-script: - cmd.run: - - name: curl -o /tmp/get-pip.py https://bootstrap.pypa.io/pip/2.7/get-pip.py - - unless: which pip2 - - require: - - sls: sift.packages.python2 - - sls: sift.packages.curl - -sift-package-python2-pip-install: - cmd.run: - - name: python2 /tmp/get-pip.py - - unless: which pip2 - - require: - - cmd: sift-package-python2-pip-install-script diff --git a/sift/packages/python2.sls b/sift/packages/python2.sls deleted file mode 100644 index ccbd59d8..00000000 --- a/sift/packages/python2.sls +++ /dev/null @@ -1,3 +0,0 @@ -sift-package-python2: - pkg.installed: - - name: python2 diff --git a/sift/packages/python3-keyrings-alt.sls b/sift/packages/python3-keyrings-alt.sls new file mode 100644 index 00000000..6b04cc9d --- /dev/null +++ b/sift/packages/python3-keyrings-alt.sls @@ -0,0 +1,3 @@ +sift-package-python3-keyrings-alt: + pkg.installed: + - name: python3-keyrings.alt diff --git a/sift/packages/python3-setuptools-rust.sls b/sift/packages/python3-setuptools-rust.sls new file mode 100644 index 00000000..b4107de2 --- /dev/null +++ b/sift/packages/python3-setuptools-rust.sls @@ -0,0 +1,3 @@ +sift-package-python3-setuptools-rust: + pkg.installed: + - name: python3-setuptools-rust diff --git a/sift/packages/virtualenv.sls b/sift/packages/virtualenv.sls new file mode 100644 index 00000000..d7075d1e --- /dev/null +++ b/sift/packages/virtualenv.sls @@ -0,0 +1,3 @@ +sift-package-virtualenv: + pkg.installed: + - name: virtualenv diff --git a/sift/python-packages/analyzemft.sls b/sift/python-packages/analyzemft.sls deleted file mode 100644 index 0f6cd22b..00000000 --- a/sift/python-packages/analyzemft.sls +++ /dev/null @@ -1,14 +0,0 @@ -{%- set commit="64c71d7c8905a119b7abdf9813e6ef5f11d3ccf1" -%} -include: - - sift.packages.git - - sift.packages.python3-pip - - sift.packages.python2-pip - -analyzemft: - pip.installed: - - name: git+https://github.com/dkovar/analyzeMFT.git@{{ commit }} - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.git - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/appcompatprocessor.sls b/sift/python-packages/appcompatprocessor.sls deleted file mode 100644 index b81a040c..00000000 --- a/sift/python-packages/appcompatprocessor.sls +++ /dev/null @@ -1,19 +0,0 @@ -{%- set commit="6c847937c5a836e2ce2fe2b915f213c345a3c389" -%} - -include: - - sift.packages.git - - sift.packages.python3-pip - - sift.packages.python2-pip - - sift.packages.libregf - - sift.packages.python2-dev - -appcompatprocessor: - pip.installed: - - name: git+https://github.com/mbevilacqua/appcompatprocessor.git@{{ commit }} - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.git - - sls: sift.packages.python2-pip - - sls: sift.packages.libregf - - sls: sift.packages.python2-dev diff --git a/sift/python-packages/argparse.sls b/sift/python-packages/argparse.sls deleted file mode 100644 index b5bd02d0..00000000 --- a/sift/python-packages/argparse.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-argparse: - pip.installed: - - name: argparse - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/bitstring.sls b/sift/python-packages/bitstring.sls deleted file mode 100644 index 878d939b..00000000 --- a/sift/python-packages/bitstring.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-bitstring: - pip.installed: - - name: bitstring - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/capstone.sls b/sift/python-packages/capstone.sls deleted file mode 100644 index 6ead9242..00000000 --- a/sift/python-packages/capstone.sls +++ /dev/null @@ -1,16 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-pkg-remove: - pkg.removed: - - name: python-capstone - -sift-python-packages-capstone: - pip.installed: - - name: capstone - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip - - pkg: sift-python-packages-pkg-remove diff --git a/sift/python-packages/colorama.sls b/sift/python-packages/colorama.sls deleted file mode 100644 index e1acfec8..00000000 --- a/sift/python-packages/colorama.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-colorama: - pip.installed: - - name: colorama - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/construct.sls b/sift/python-packages/construct.sls deleted file mode 100644 index 008885d5..00000000 --- a/sift/python-packages/construct.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-construct: - pip.installed: - - name: construct == 2.10.54 - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/distorm3.sls b/sift/python-packages/distorm3.sls deleted file mode 100644 index 7a819d96..00000000 --- a/sift/python-packages/distorm3.sls +++ /dev/null @@ -1,12 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - - sift.packages.python2-dev - -sift-python-packages-distorm3: - pip.installed: - - name: distorm3 == 3.4.4 - - bin_env: /usr/bin/python2 - - require: - - sls: sift.packages.python2-pip - - sls: sift.packages.python2-dev diff --git a/sift/python-packages/docopt.sls b/sift/python-packages/docopt.sls deleted file mode 100644 index 745b1cc3..00000000 --- a/sift/python-packages/docopt.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-docopt: - pip.installed: - - name: docopt - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/dpapick.sls b/sift/python-packages/dpapick.sls deleted file mode 100644 index 88d209ce..00000000 --- a/sift/python-packages/dpapick.sls +++ /dev/null @@ -1,18 +0,0 @@ -# Note: not included in init.sls, only required by volatility - -include: - - sift.packages.libssl-dev - - sift.packages.python3-pip - - sift.packages.python2-pip - - sift.python-packages.m2crypto - -dpapick: - pip.installed: - - name: dpapick - - upgrade: True - - bin_env: /usr/bin/python2 - #- install_options: --upgrade-strategy=only-if-needed - - require: - - sls: sift.packages.libssl-dev - - sls: sift.packages.python2-pip - - sls: sift.python-packages.m2crypto diff --git a/sift/python-packages/geoip2.sls b/sift/python-packages/geoip2.sls deleted file mode 100644 index 501d06ed..00000000 --- a/sift/python-packages/geoip2.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-geoip2: - pip.installed: - - name: geoip2 - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/indxparse.sls b/sift/python-packages/indxparse.sls deleted file mode 100644 index b02c1f69..00000000 --- a/sift/python-packages/indxparse.sls +++ /dev/null @@ -1,41 +0,0 @@ -{% if grains['oscodename'] != "jammy" %} - -{%- set user = salt['pillar.get']('sift_user', 'sansforensics') -%} -{%- set commit = "ca08236b0f70798cb6f89785820c9b82ee0c66ff" -%} - -include: - - sift.packages.git - - sift.packages.g++ - - sift.packages.libfuse-dev - - sift.packages.pkg-config - - sift.packages.python3-pip - - sift.packages.python2-pip - - sift.packages.python2-dev - - sift.packages.python-wxgtk3 - -sift-python-packages-indxparse: - pip.installed: - - name: git+https://github.com/williballenthin/INDXParse.git@{{ commit }} - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.git - - sls: sift.packages.g++ - - sls: sift.packages.pkg-config - - sls: sift.packages.python2-pip - - sls: sift.packages.python2-dev - - sls: sift.packages.libfuse-dev - - sls: sift.packages.python-wxgtk3 - -sift-python-packages-indxparse-shebang: - file.prepend: - - name: /usr/local/bin/INDXParse.py - - text: '#!/usr/bin/env python2' - - watch: - - pip: sift-python-packages-indxparse - -{% else %} -python-wxgtk3-not-in-jammy: - test.nop - -{% endif %} diff --git a/sift/python-packages/init.sls b/sift/python-packages/init.sls deleted file mode 100644 index 6bd0298e..00000000 --- a/sift/python-packages/init.sls +++ /dev/null @@ -1,62 +0,0 @@ -include: - - sift.python-packages.analyzemft - - sift.python-packages.appcompatprocessor - - sift.python-packages.argparse - - sift.python-packages.bitstring - - sift.python-packages.colorama - - sift.python-packages.construct - - sift.python-packages.distorm3 - - sift.python-packages.docopt - - sift.python-packages.geoip2 - - sift.python-packages.indxparse - - sift.python-packages.ioc_writer - - sift.python-packages.lxml - - sift.python-packages.ntdsxtract - - sift.python-packages.pefile - - sift.python-packages.pillow - - sift.python-packages.poster - - sift.python-packages.pysocks - - sift.python-packages.python-dateutil - - sift.python-packages.python-magic - - sift.python-packages.python-registry - - sift.python-packages.setuptools - - sift.python-packages.shellbags - - sift.python-packages.six - - sift.python-packages.unicodecsv - - sift.python-packages.usnparser - - sift.python-packages.volatility - - sift.python-packages.wheel - - sift.python-packages.windowsprefetch - -sift-python-packages: - test.nop: - - name: sift-python-packages - - require: - - sls: sift.python-packages.analyzemft - - sls: sift.python-packages.appcompatprocessor - - sls: sift.python-packages.argparse - - sls: sift.python-packages.bitstring - - sls: sift.python-packages.colorama - - sls: sift.python-packages.construct - - sls: sift.python-packages.distorm3 - - sls: sift.python-packages.docopt - - sls: sift.python-packages.geoip2 - - sls: sift.python-packages.indxparse - - sls: sift.python-packages.ioc_writer - - sls: sift.python-packages.lxml - - sls: sift.python-packages.ntdsxtract - - sls: sift.python-packages.pefile - - sls: sift.python-packages.pillow - - sls: sift.python-packages.poster - - sls: sift.python-packages.pysocks - - sls: sift.python-packages.python-dateutil - - sls: sift.python-packages.python-magic - - sls: sift.python-packages.python-registry - - sls: sift.python-packages.setuptools - - sls: sift.python-packages.shellbags - - sls: sift.python-packages.six - - sls: sift.python-packages.unicodecsv - - sls: sift.python-packages.usnparser - - sls: sift.python-packages.volatility - - sls: sift.python-packages.wheel - - sls: sift.python-packages.windowsprefetch diff --git a/sift/python-packages/ioc_writer.sls b/sift/python-packages/ioc_writer.sls deleted file mode 100644 index 076521eb..00000000 --- a/sift/python-packages/ioc_writer.sls +++ /dev/null @@ -1,14 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - - sift.python-packages.lxml - -sift-python-packages-ioc-writer: - pip.installed: - - name: ioc_writer - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip - - sls: sift.python-packages.lxml - diff --git a/sift/python-packages/lxml.sls b/sift/python-packages/lxml.sls deleted file mode 100644 index 34077869..00000000 --- a/sift/python-packages/lxml.sls +++ /dev/null @@ -1,14 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - - sift.packages.libxml2-dev - - sift.packages.libxslt-dev - -sift-python-packages-lxml: - pip.installed: - - name: lxml - - bin_env: /usr/bin/python2 - - require: - - sls: sift.packages.python2-pip - - sls: sift.packages.libxml2-dev - - sls: sift.packages.libxslt-dev diff --git a/sift/python-packages/m2crypto.sls b/sift/python-packages/m2crypto.sls deleted file mode 100644 index a005bb83..00000000 --- a/sift/python-packages/m2crypto.sls +++ /dev/null @@ -1,17 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - - sift.packages.swig - - sift.packages.python2-dev - - sift.packages.libssl-dev - -sift-python-packages-m2crypto: - pip.installed: - - name: m2crypto==0.40.1 - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip - - sls: sift.packages.swig - - sls: sift.packages.python2-dev - - sls: sift.packages.libssl-dev diff --git a/sift/python-packages/ntdsxtract.sls b/sift/python-packages/ntdsxtract.sls deleted file mode 100644 index df847052..00000000 --- a/sift/python-packages/ntdsxtract.sls +++ /dev/null @@ -1,13 +0,0 @@ -include: - - sift.packages.git - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-ntdsxtract: - pip.installed: - - name: git+https://github.com/csababarta/ntdsxtract.git@7fa1c8c28cbbf97a42bef40f20009dba85e4c25f - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.git - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/openpyxl.sls b/sift/python-packages/openpyxl.sls deleted file mode 100644 index 6bb8d982..00000000 --- a/sift/python-packages/openpyxl.sls +++ /dev/null @@ -1,10 +0,0 @@ -include: - - sift.packages.python2-pip - - sift.packages.python3-pip - -openpyxl==2.1.2: - pip.installed: - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/pefile.sls b/sift/python-packages/pefile.sls deleted file mode 100644 index 570aa8c9..00000000 --- a/sift/python-packages/pefile.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-pefile: - pip.installed: - - name: pefile - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/pillow.sls b/sift/python-packages/pillow.sls deleted file mode 100644 index bf036117..00000000 --- a/sift/python-packages/pillow.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python2-pip - - sift.packages.python3-pip - -sift-python-packages-pillow: - pip.installed: - - name: pillow - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/poster.sls b/sift/python-packages/poster.sls deleted file mode 100644 index edb35706..00000000 --- a/sift/python-packages/poster.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-poster: - pip.installed: - - name: poster - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/pycoin.sls b/sift/python-packages/pycoin.sls deleted file mode 100644 index 100e745b..00000000 --- a/sift/python-packages/pycoin.sls +++ /dev/null @@ -1,13 +0,0 @@ -# Note: not included in init.sls, only required by python-volatility - -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-pycoin: - pip.installed: - - name: pycoin - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/pycrypto.sls b/sift/python-packages/pycrypto.sls deleted file mode 100644 index f5db146c..00000000 --- a/sift/python-packages/pycrypto.sls +++ /dev/null @@ -1,14 +0,0 @@ -# This package is only required for volatility, so it is not included in the init.sls - -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-pycrypto: - pip.installed: - - name: pycrypto - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip - diff --git a/sift/python-packages/pydasm.sls b/sift/python-packages/pydasm.sls deleted file mode 100644 index 179f2660..00000000 --- a/sift/python-packages/pydasm.sls +++ /dev/null @@ -1,14 +0,0 @@ -include: - - sift.packages.git - - sift.packages.python3-pip - - sift.packages.python2-pip - -pydasm: - pip.installed: - - name: git+https://github.com/jtpereyda/libdasm.git@68d61b1#egg=version_subpkg&subdirectory=pydasm - - bin_env: /usr/bin/python2 -# - editable: git+https://github.com/jtpereyda/libdasm.git@68d61b1#egg=version_subpkg&subdirectory=pydasm - - upgrade: True - - require: - - sls: sift.packages.git - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/pysocks.sls b/sift/python-packages/pysocks.sls deleted file mode 100644 index e7b588cc..00000000 --- a/sift/python-packages/pysocks.sls +++ /dev/null @@ -1,10 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-pysocks: - pip.installed: - - name: pysocks - - bin_env: /usr/bin/python2 - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/python-dateutil.sls b/sift/python-packages/python-dateutil.sls deleted file mode 100644 index aec61491..00000000 --- a/sift/python-packages/python-dateutil.sls +++ /dev/null @@ -1,10 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-python-dateutil: - pip.installed: - - name: python-dateutil >= 2.4.2 - - bin_env: /usr/bin/python2 - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/python-magic.sls b/sift/python-packages/python-magic.sls deleted file mode 100644 index 01bf4af0..00000000 --- a/sift/python-packages/python-magic.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-python-magic: - pip.installed: - - name: python-magic - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/python-registry.sls b/sift/python-packages/python-registry.sls deleted file mode 100644 index a68ed743..00000000 --- a/sift/python-packages/python-registry.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-python-registry: - pip.installed: - - name: python-registry - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/requests.sls b/sift/python-packages/requests.sls deleted file mode 100644 index 74913c29..00000000 --- a/sift/python-packages/requests.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-requests: - pip.installed: - - name: requests - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/setuptools.sls b/sift/python-packages/setuptools.sls deleted file mode 100644 index 51fb55df..00000000 --- a/sift/python-packages/setuptools.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-setuptools: - pip.installed: - - name: setuptools - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/shellbags.sls b/sift/python-packages/shellbags.sls deleted file mode 100644 index dea62f42..00000000 --- a/sift/python-packages/shellbags.sls +++ /dev/null @@ -1,15 +0,0 @@ -{%- set user = salt['pillar.get']('sift_user', 'sansforensics') -%} - -include: - - sift.packages.git - - sift.packages.python3-pip - - sift.packages.python2-pip - -shellbags: - pip.installed: - - name: git+https://github.com/williballenthin/shellbags.git@fee76eb - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.git - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/simplejson.sls b/sift/python-packages/simplejson.sls deleted file mode 100644 index 8681f5a3..00000000 --- a/sift/python-packages/simplejson.sls +++ /dev/null @@ -1,13 +0,0 @@ -# Note: not included in init.sls, only required by python-volatility - -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-simplejson: - pip.installed: - - name: simplejson - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/six.sls b/sift/python-packages/six.sls deleted file mode 100644 index 1a095ac8..00000000 --- a/sift/python-packages/six.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-six: - pip.installed: - - name: six - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/unicodecsv.sls b/sift/python-packages/unicodecsv.sls deleted file mode 100644 index 686df4ac..00000000 --- a/sift/python-packages/unicodecsv.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-unicodecsv: - pip.installed: - - name: unicodecsv - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/usnparser.sls b/sift/python-packages/usnparser.sls deleted file mode 100644 index 9fbe5ea7..00000000 --- a/sift/python-packages/usnparser.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-usnparser: - pip.installed: - - name: usnparser - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/volatility.sls b/sift/python-packages/volatility.sls deleted file mode 100644 index 97f27649..00000000 --- a/sift/python-packages/volatility.sls +++ /dev/null @@ -1,120 +0,0 @@ -{%- set remove_plugins = ["malprocfind.py","idxparser.py","chromehistory.py","mimikatz.py","openioc_scan.py","pstotal.py","firefoxhistory.py","autoruns.py","malfinddeep.py","prefetch.py","ssdeepscan.py","uninstallinfo.py","trustrecords.py","usnparser.py","apihooksdeep.py","editbox.py","javarat.py"] -%} - -# Name: Volatility Framework -# Website: https://github.com/volatilityfoundation/volatility -# Description: Memory forensics tool and framework -# Category: Perform Memory Forensics -# Author: https://github.com/volatilityfoundation/volatility/blob/2.6.1/AUTHORS.txt -# License: GNU General Public License (GPL) v2: https://github.com/volatilityfoundation/volatility/blob/2.6.1/LICENSE.txt -# Notes: Use vol.py to invoke this version of Volatility. To eliminate conflicts among command-line options for Volatility plugins, the following `yarascan` options have been changed: `-Y` became `-U` and `-C` became `-c`. - -include: - - sift.packages.git - - sift.packages.python3-pip - - sift.packages.python2-pip - - sift.python-packages.colorama - - sift.python-packages.construct - - sift.python-packages.dpapick - - sift.python-packages.distorm3 - - sift.python-packages.ioc_writer - - sift.python-packages.lxml - - sift.python-packages.openpyxl - - sift.python-packages.pefile - - sift.python-packages.pillow - - sift.python-packages.pycoin - - sift.python-packages.pycrypto - - sift.python-packages.pysocks - - sift.python-packages.requests - - sift.python-packages.simplejson - - sift.python-packages.yara-python - -sift-python-packages-volatility: - pip.installed: - - name: git+https://github.com/volatilityfoundation/volatility.git@master - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.git - - sls: sift.packages.python2-pip - -sift-python-volatility-community-plugins: - git.latest: - - name: https://github.com/sans-dfir/volatility-plugins-community.git - # Note: This path changed to /usr/local/lib starting in 18.04 - - target: /usr/local/lib/python2.7/dist-packages/volatility/plugins/community - - user: root - - branch: master - - force_fetch: True - - force_checkout: True - - force_clone: True - - force_reset: True - - require: - - sls: sift.packages.git - - pip: sift-python-packages-volatility - - sls: sift.python-packages.colorama - - sls: sift.python-packages.construct - - sls: sift.python-packages.dpapick - - sls: sift.python-packages.distorm3 - - sls: sift.python-packages.ioc_writer - - sls: sift.python-packages.lxml - - sls: sift.python-packages.openpyxl - - sls: sift.python-packages.pefile - - sls: sift.python-packages.pillow - - sls: sift.python-packages.pycoin - - sls: sift.python-packages.pycrypto - - sls: sift.python-packages.pysocks - - sls: sift.python-packages.requests - - sls: sift.python-packages.simplejson - - sls: sift.python-packages.yara-python - -sift-python-volatility-sift-plugins: - file.recurse: - # Note: This path changed to /usr/local/lib starting in 18.04 - - name: /usr/local/lib/python2.7/dist-packages/volatility/plugins/sift/ - - source: salt://sift/files/volatility - - makedirs: True - - file_mode: 644 - - include_pat: '*.py' - - watch: - - git: sift-python-volatility-community-plugins - - pip: sift-python-packages-volatility - -{% for plugin in remove_plugins -%} -sift-python-volatility-plugins-{{ plugin }}-absent: - file.absent: - # Note: This path changed to /usr/local/lib starting in 18.04 - - name: /usr/local/lib/python2.7/dist-packages/volatility/plugins/{{ plugin }} - - watch: - - git: sift-python-volatility-community-plugins - - pip: sift-python-packages-volatility -{% endfor -%} - -sift-python-volatility-mimikatz-plugin-update: - file.managed: - - name: /usr/local/lib/python2.7/dist-packages/volatility/plugins/community/FrancescoPicasso/mimikatz.py - - source: https://github.com/RealityNet/hotoloti/raw/master/volatility/mimikatz.py - - source_hash: sha256=75e2e6d3b09daffad83211ba0215ed3f204623b8c37c2a2950665b88a3d2ce86 - - mode: 644 - - watch: - - git: sift-python-volatility-community-plugins - - pip: sift-python-packages-volatility - -sift-python-packages-volatility-malfind-yarascan-options1: - file.replace: - - name: /usr/local/lib/python2.7/dist-packages/volatility/plugins/malware/malfind.py - - pattern: short_option = 'C' - - repl: short_option = 'c' - - prepend_if_not_found: False - - count: 1 - - require: - - git: sift-python-volatility-community-plugins - -sift-python-packages-volatility-malfind-yarascan-options2: - file.replace: - - name: /usr/local/lib/python2.7/dist-packages/volatility/plugins/malware/malfind.py - - pattern: short_option = 'Y' - - repl: short_option = 'U' - - prepend_if_not_found: False - - count: 1 - - require: - - file: sift-python-packages-volatility-malfind-yarascan-options1 diff --git a/sift/python-packages/wheel.sls b/sift/python-packages/wheel.sls deleted file mode 100644 index 815bac2c..00000000 --- a/sift/python-packages/wheel.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-wheel: - pip.installed: - - name: wheel - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/windowsprefetch.sls b/sift/python-packages/windowsprefetch.sls deleted file mode 100644 index 02beb15d..00000000 --- a/sift/python-packages/windowsprefetch.sls +++ /dev/null @@ -1,11 +0,0 @@ -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-windowsprefetch: - pip.installed: - - name: windowsprefetch - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python-packages/yara-python.sls b/sift/python-packages/yara-python.sls deleted file mode 100644 index 5fec01d0..00000000 --- a/sift/python-packages/yara-python.sls +++ /dev/null @@ -1,13 +0,0 @@ -# Note: not included in init.sls, only required by python-volatility - -include: - - sift.packages.python3-pip - - sift.packages.python2-pip - -sift-python-packages-yara-python: - pip.installed: - - name: yara-python - - bin_env: /usr/bin/python2 - - upgrade: True - - require: - - sls: sift.packages.python2-pip diff --git a/sift/python3-packages/analyzemft.sls b/sift/python3-packages/analyzemft.sls new file mode 100644 index 00000000..03959820 --- /dev/null +++ b/sift/python3-packages/analyzemft.sls @@ -0,0 +1,10 @@ +include: + - sift.python3-packages.core + +analyzemft: + pip.installed: + - name: analyzeMFT + - bin_env: /usr/bin/python3 + - upgrade: True + - require: + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/argparse.sls b/sift/python3-packages/argparse.sls index c3226407..1921d08d 100644 --- a/sift/python3-packages/argparse.sls +++ b/sift/python3-packages/argparse.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-argparse: pip.installed: - name: argparse - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/bitstring.sls b/sift/python3-packages/bitstring.sls index 3f0eb9b9..7d5ca3bd 100644 --- a/sift/python3-packages/bitstring.sls +++ b/sift/python3-packages/bitstring.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-bitstring: pip.installed: - name: bitstring - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/capstone.sls b/sift/python3-packages/capstone.sls new file mode 100644 index 00000000..f5eda6e2 --- /dev/null +++ b/sift/python3-packages/capstone.sls @@ -0,0 +1,10 @@ +include: + - sift.python3-packages.core + +sift-python3-package-capstone: + pip.installed: + - name: capstone + - bin_env: /usr/bin/python3 + - upgrade: True + - require: + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/colorama.sls b/sift/python3-packages/colorama.sls index 57033ee4..2b985b26 100644 --- a/sift/python3-packages/colorama.sls +++ b/sift/python3-packages/colorama.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-colorama: pip.installed: - name: colorama - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/core.sls b/sift/python3-packages/core.sls new file mode 100644 index 00000000..3b4cd898 --- /dev/null +++ b/sift/python3-packages/core.sls @@ -0,0 +1,12 @@ +include: + - sift.python3-packages.pip + - sift.python3-packages.wheel + - sift.python3-packages.setuptools + +sift-python3-packages-core: + test.nop: + - name: python3-packages-core + - require: + - sls: sift.python3-packages.pip + - sls: sift.python3-packages.wheel + - sls: sift.python3-packages.setuptools diff --git a/sift/python3-packages/defang.sls b/sift/python3-packages/defang.sls index d1bf3c66..74e31cb9 100644 --- a/sift/python3-packages/defang.sls +++ b/sift/python3-packages/defang.sls @@ -1,11 +1,11 @@ # WEBSITE: https://github.com/HurricaneLabs/machinae # LICENSE: MIT include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-defang: pip.installed: - - name: defang==0.5.2 + - name: defang - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/geoip2.sls b/sift/python3-packages/geoip2.sls index cfb7349d..0d4224a5 100644 --- a/sift/python3-packages/geoip2.sls +++ b/sift/python3-packages/geoip2.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-geoip2: pip.installed: - name: geoip2 - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/ijson.sls b/sift/python3-packages/ijson.sls index 75ad193c..7558c4e4 100644 --- a/sift/python3-packages/ijson.sls +++ b/sift/python3-packages/ijson.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-ijson: pip.installed: - name: ijson - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/imagemounter.sls b/sift/python3-packages/imagemounter.sls index 107c9656..478aab0c 100644 --- a/sift/python3-packages/imagemounter.sls +++ b/sift/python3-packages/imagemounter.sls @@ -1,5 +1,5 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core - sift.python3-packages.python-magic - sift.packages.afflib-tools - sift.packages.avfs @@ -24,7 +24,7 @@ sift-python3-packages-imagemounter: - name: imagemounter - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core - sls: sift.python3-packages.python-magic - sls: sift.packages.afflib-tools - sls: sift.packages.avfs diff --git a/sift/python3-packages/indxparse.sls b/sift/python3-packages/indxparse.sls new file mode 100644 index 00000000..0a269785 --- /dev/null +++ b/sift/python3-packages/indxparse.sls @@ -0,0 +1,16 @@ +{%- set user = salt['pillar.get']('sift_user', 'sansforensics') -%} +{%- set commit = "038e8ec836cf23600124db74b40757b7184c08c5" -%} + +include: + - sift.packages.git + - sift.packages.python3-pip + - sift.packages.python3-dev + +sift-python-packages-indxparse: + pip.installed: + - name: git+https://github.com/williballenthin/INDXParse.git@{{ commit }} + - bin_env: /usr/bin/python3 + - require: + - sls: sift.packages.git + - sls: sift.packages.python3-pip + - sls: sift.packages.python3-dev diff --git a/sift/python3-packages/init.sls b/sift/python3-packages/init.sls index 4d8a3aff..fde068cb 100644 --- a/sift/python3-packages/init.sls +++ b/sift/python3-packages/init.sls @@ -1,13 +1,13 @@ include: - - sift.python3-packages.python3-keyring - sift.python3-packages.pip - sift.python3-packages.python3-keyring + - sift.python3-packages.analyzemft - sift.python3-packages.argparse - sift.python3-packages.bitstring - sift.python3-packages.colorama - sift.python3-packages.geoip2 - sift.python3-packages.ioc_writer -### - sift.python3-packages.imagemounter + - sift.python3-packages.imagemounter - sift.python3-packages.keyrings-alt - sift.python3-packages.lxml - sift.python3-packages.machinae @@ -18,28 +18,31 @@ include: - sift.python3-packages.python-evtx - sift.python3-packages.python-magic - sift.python3-packages.python-registry - - sift.python3-packages.setuptools - sift.python3-packages.setuptools-rust - sift.python3-packages.six - sift.python3-packages.stix-validator - sift.python3-packages.stix - sift.python3-packages.virustotal-api - - sift.python3-packages.wheel - sift.python3-packages.yara-python + - sift.python3-packages.indxparse + - sift.python3-packages.pe-carver + - sift.python3-packages.usbdeviceforensics + - sift.python3-packages.usnparser + - sift.python3-packages.windowsprefetch sift-python3-packages: test.nop: - name: sift-python3-packages - require: - - sls: sift.python3-packages.python3-keyring - sls: sift.python3-packages.pip - sls: sift.python3-packages.python3-keyring + - sls: sift.python3-packages.analyzemft - sls: sift.python3-packages.argparse - sls: sift.python3-packages.bitstring - sls: sift.python3-packages.colorama - sls: sift.python3-packages.geoip2 - sls: sift.python3-packages.ioc_writer -### - sls: sift.python3-packages.imagemounter + - sls: sift.python3-packages.imagemounter - sls: sift.python3-packages.keyrings-alt - sls: sift.python3-packages.lxml - sls: sift.python3-packages.machinae @@ -50,11 +53,14 @@ sift-python3-packages: - sls: sift.python3-packages.python-evtx - sls: sift.python3-packages.python-magic - sls: sift.python3-packages.python-registry - - sls: sift.python3-packages.setuptools - sls: sift.python3-packages.setuptools-rust - sls: sift.python3-packages.six - sls: sift.python3-packages.stix-validator - sls: sift.python3-packages.stix - sls: sift.python3-packages.virustotal-api - - sls: sift.python3-packages.wheel - sls: sift.python3-packages.yara-python + - sls: sift.python3-packages.indxparse + - sls: sift.python3-packages.pe-carver + - sls: sift.python3-packages.usbdeviceforensics + - sls: sift.python3-packages.usnparser + - sls: sift.python3-packages.windowsprefetch diff --git a/sift/python3-packages/ioc_writer.sls b/sift/python3-packages/ioc_writer.sls index 34161993..0c271746 100644 --- a/sift/python3-packages/ioc_writer.sls +++ b/sift/python3-packages/ioc_writer.sls @@ -1,5 +1,5 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core - sift.python3-packages.lxml - sift.python3-packages.yara-python @@ -8,6 +8,6 @@ sift-python3-packages-ioc-writer: - name: ioc_writer - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core - sls: sift.python3-packages.lxml - sls: sift.python3-packages.yara-python diff --git a/sift/python3-packages/keyrings-alt.sls b/sift/python3-packages/keyrings-alt.sls index 6761c66e..6f96b9b2 100644 --- a/sift/python3-packages/keyrings-alt.sls +++ b/sift/python3-packages/keyrings-alt.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-keyrings-alt: pip.installed: - name: keyrings.alt - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/lxml.sls b/sift/python3-packages/lxml.sls index 98bd67f5..af9c3605 100644 --- a/sift/python3-packages/lxml.sls +++ b/sift/python3-packages/lxml.sls @@ -1,5 +1,5 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core - sift.packages.libxml2-dev - sift.packages.libxslt-dev @@ -8,6 +8,6 @@ sift-python3-packages-lxml: - name: lxml - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core - sls: sift.packages.libxml2-dev - sls: sift.packages.libxslt-dev diff --git a/sift/python3-packages/machinae.sls b/sift/python3-packages/machinae.sls index 5203d7d2..8a43b892 100644 --- a/sift/python3-packages/machinae.sls +++ b/sift/python3-packages/machinae.sls @@ -1,7 +1,7 @@ # WEBSITE: https://github.com/HurricaneLabs/machinae # LICENSE: MIT include: - - sift.python3-packages.pip + - sift.python3-packages.core - sift.python3-packages.defang sift-python3-packages-machinae: @@ -9,5 +9,5 @@ sift-python3-packages-machinae: - name: machinae - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core - sls: sift.python3-packages.defang diff --git a/sift/python3-packages/pe-carver.sls b/sift/python3-packages/pe-carver.sls new file mode 100644 index 00000000..6a31c165 --- /dev/null +++ b/sift/python3-packages/pe-carver.sls @@ -0,0 +1,13 @@ +# source=https://github.com/digitalsleuth/pe-carver +# license=apache2.0 + +include: + - sift.python3-packages.core + +sift-python3-package-pe-carver: + pip.installed: + - name: pe-carver + - bin_env: /usr/bin/python3 + - upgrade: True + - require: + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/pe-scanner.sls b/sift/python3-packages/pe-scanner.sls new file mode 100644 index 00000000..23cd1384 --- /dev/null +++ b/sift/python3-packages/pe-scanner.sls @@ -0,0 +1,15 @@ +# source=https://github.com/digitalsleuth/pe-scanner +# license=gplv3.0 + +include: + - sift.python3-packages.core + - sift.packages.git + +sift-python3-package-pe-scanner: + pip.installed: + - name: git+https://github.com/digitalsleuth/pe-scanner.git + - bin_env: /usr/bin/python3 + - upgrade: True + - require: + - sls: sift.python3-packages.core + - sls: sift.packages.git diff --git a/sift/python3-packages/pefile.sls b/sift/python3-packages/pefile.sls index 2602c403..ef69bdb1 100644 --- a/sift/python3-packages/pefile.sls +++ b/sift/python3-packages/pefile.sls @@ -1,10 +1,10 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-pefile: pip.installed: - name: pefile - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/pillow.sls b/sift/python3-packages/pillow.sls index 5c5cfbe7..6bc3c3e0 100644 --- a/sift/python3-packages/pillow.sls +++ b/sift/python3-packages/pillow.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-pillow: pip.installed: - name: pillow - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/pip.sls b/sift/python3-packages/pip.sls index 1f059aeb..478248da 100644 --- a/sift/python3-packages/pip.sls +++ b/sift/python3-packages/pip.sls @@ -1,9 +1,8 @@ -include: - - sift.packages.python3-pip - sift-python3-packages-pip: - pip.installed: - - name: pip==21.0.1 - - bin_env: /usr/bin/python3 + cmd.run: + - names: + - /usr/bin/python3 -m pip install --upgrade pip + - /usr/bin/python3 -m pip install setuptools==70.0.0 + - /usr/bin/python3 -m pip install --upgrade wheel - require: - - sls: sift.packages.python3-pip + - pkg: python3-pip diff --git a/sift/python3-packages/pyhindsight.sls b/sift/python3-packages/pyhindsight.sls index 3fe531af..9aab70fb 100644 --- a/sift/python3-packages/pyhindsight.sls +++ b/sift/python3-packages/pyhindsight.sls @@ -1,5 +1,5 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core - sift.python3-packages.setuptools-rust - sift.python3-packages.keyrings-alt @@ -8,7 +8,7 @@ sift-python3-packages-pyhindsight: - name: pyhindsight - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core - sls: sift.python3-packages.setuptools-rust - sls: sift.python3-packages.keyrings-alt diff --git a/sift/python3-packages/python-dateutil.sls b/sift/python3-packages/python-dateutil.sls index b347a7d9..33f989af 100644 --- a/sift/python3-packages/python-dateutil.sls +++ b/sift/python3-packages/python-dateutil.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-python-dateutil: pip.installed: - name: python-dateutil - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/python-evtx.sls b/sift/python3-packages/python-evtx.sls index cde132cc..23a8c234 100644 --- a/sift/python3-packages/python-evtx.sls +++ b/sift/python3-packages/python-evtx.sls @@ -7,7 +7,7 @@ # Notes: evtx_dates.py, evtx_dump.py, evtx_dump_chunk_slack.py, evtx_dump_json.py, evtx_info.py include: - - sift.python3-packages.pip + - sift.python3-packages.core - sift.packages.git sift-python3-packages-python-evtx: @@ -15,5 +15,5 @@ sift-python3-packages-python-evtx: - name: git+https://github.com/williballenthin/python-evtx.git - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core - sls: sift.packages.git diff --git a/sift/python3-packages/python-magic.sls b/sift/python3-packages/python-magic.sls index 2c88fffd..f83ab01c 100644 --- a/sift/python3-packages/python-magic.sls +++ b/sift/python3-packages/python-magic.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-python-magic: pip.installed: - name: python-magic - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/python-registry.sls b/sift/python3-packages/python-registry.sls index e8c3ab71..50246330 100644 --- a/sift/python3-packages/python-registry.sls +++ b/sift/python3-packages/python-registry.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-python-registry: pip.installed: - name: python-registry - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python-packages/s2sphere.sls b/sift/python3-packages/s2sphere.sls similarity index 58% rename from sift/python-packages/s2sphere.sls rename to sift/python3-packages/s2sphere.sls index b1f3ba7a..5c9831a2 100644 --- a/sift/python-packages/s2sphere.sls +++ b/sift/python3-packages/s2sphere.sls @@ -1,11 +1,10 @@ include: - sift.packages.python3-pip - - sift.packages.python2-pip sift-python-packages-s2sphere: pip.installed: - name: s2sphere - - bin_env: /usr/bin/python2 + - bin_env: /usr/bin/python3 - upgrade: True - require: - - sls: sift.packages.python2-pip + - sls: sift.packages.python3-pip diff --git a/sift/python3-packages/setuptools-rust.sls b/sift/python3-packages/setuptools-rust.sls index 439c1625..97b31abc 100644 --- a/sift/python3-packages/setuptools-rust.sls +++ b/sift/python3-packages/setuptools-rust.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-setuptools-rust: pip.installed: - name: setuptools_rust - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/setuptools.sls b/sift/python3-packages/setuptools.sls index 51837150..a31bd8ec 100644 --- a/sift/python3-packages/setuptools.sls +++ b/sift/python3-packages/setuptools.sls @@ -3,7 +3,7 @@ include: sift-python3-packages-setuptools: pip.installed: - - name: 'setuptools<66.0.0' + - name: setuptools==70.0.0 - bin_env: /usr/bin/python3 - require: - sls: sift.python3-packages.pip diff --git a/sift/python3-packages/six.sls b/sift/python3-packages/six.sls index 43f7c2a2..45fdb73f 100644 --- a/sift/python3-packages/six.sls +++ b/sift/python3-packages/six.sls @@ -1,9 +1,10 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-six: pip.installed: - name: six - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core + diff --git a/sift/python3-packages/stix-validator.sls b/sift/python3-packages/stix-validator.sls index e9e8d559..f840a902 100644 --- a/sift/python3-packages/stix-validator.sls +++ b/sift/python3-packages/stix-validator.sls @@ -1,5 +1,5 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core - sift.python3-packages.stix sift-python3-packages-stix-validator: @@ -7,5 +7,5 @@ sift-python3-packages-stix-validator: - name: stix-validator - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core - sls: sift.python3-packages.stix diff --git a/sift/python3-packages/stix.sls b/sift/python3-packages/stix.sls index fd1c2f0e..eb94e494 100644 --- a/sift/python3-packages/stix.sls +++ b/sift/python3-packages/stix.sls @@ -1,5 +1,5 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core - sift.python3-packages.lxml sift-python3-packages-stix: @@ -7,5 +7,5 @@ sift-python3-packages-stix: - name: stix - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core - sls: sift.python3-packages.lxml diff --git a/sift/python3-packages/upgrade.sls b/sift/python3-packages/upgrade.sls deleted file mode 100644 index ae8a4459..00000000 --- a/sift/python3-packages/upgrade.sls +++ /dev/null @@ -1,72 +0,0 @@ -argparse.sls: - upgrade: True -bitstring.sls: - upgrade: True -colorama.sls: - upgrade: True -geoip2.sls: - upgrade: True -ioc_writer.sls: - upgrade: True -lxml.sls: - upgrade: True -pefile.sls: - upgrade: True -pillow.sls: - upgrade: True -pyhindsight.sls: - upgrade: True -python-dateutil.sls: - upgrade: True -python-evtx.sls: - upgrade: True -python-magic.sls: - upgrade: True -python-registry.sls: - upgrade: True -setuptools.sls: - upgrade: True -six.sls: - upgrade: True -stix-validator.sls: - upgrade: True -stix.sls: - upgrade: True -virustotal-api.sls: - upgrade: True -wheel.sls: - upgrade: True -yara-python.sls: - upgrade: True - -include: - - sift.python3-packages.pip - - sift.python3-packages.argparse - - sift.python3-packages.bitstring - - sift.python3-packages.colorama - - sift.python3-packages.geoip2 - - sift.python3-packages.ioc_writer - - sift.python3-packages.lxml - - sift.python3-packages.pefile - - sift.python3-packages.pillow - - sift.python3-packages.pyhindsight - - sift.python3-packages.python-dateutil - - sift.python3-packages.python-evtx - - sift.python3-packages.python-magic - - sift.python3-packages.python-registry - - sift.python3-packages.setuptools - - sift.python3-packages.setuptools-rust - - sift.python3-packages.six - - sift.python3-packages.stix-validator - - sift.python3-packages.stix - - sift.python3-packages.virustotal-api - - sift.python3-packages.wheel - - sift.python3-packages.yara-python - -sift-python3-packages-upgrade: - cmd.run: - - name: /usr/bin/python3 -m pip install --upgrade argparse bitstring colorama geoip2 ioc_writer lxml pefile pillow pyhindsight python-dateutil python-evtx python-magic python-registry setuptools setuptools_rust six stix-validator stix virustotal-api wheel yara-python pip - - require: - - sls: sift.python3-packages.pip - - sls: sift.python3-packages.argparse - - sls: sift.python3-packages.bitstring - - sls: sift.python3-packages.colorama - - sls: sift.python3-packages.geoip2 - - sls: sift.python3-packages.ioc_writer - - sls: sift.python3-packages.lxml - - sls: sift.python3-packages.pefile - - sls: sift.python3-packages.pillow - - sls: sift.python3-packages.pyhindsight - - sls: sift.python3-packages.python-dateutil - - sls: sift.python3-packages.python-evtx - - sls: sift.python3-packages.python-magic - - sls: sift.python3-packages.python-registry - - sls: sift.python3-packages.setuptools - - sls: sift.python3-packages.setuptools-rust - - sls: sift.python3-packages.six - - sls: sift.python3-packages.stix-validator - - sls: sift.python3-packages.stix - - sls: sift.python3-packages.virustotal-api - - sls: sift.python3-packages.wheel - - sls: sift.python3-packages.yara-python - diff --git a/sift/python3-packages/usbdeviceforensics.sls b/sift/python3-packages/usbdeviceforensics.sls new file mode 100644 index 00000000..68c62bf4 --- /dev/null +++ b/sift/python3-packages/usbdeviceforensics.sls @@ -0,0 +1,15 @@ +# source=https://github.com/digitalsleuth/usbdeviceforensics +# license=unknown + +include: + - sift.python3-packages.core + - sift.packages.git + +sift-python3-package-usbdeviceforensics: + pip.installed: + - name: git+https://github.com/digitalsleuth/usbdeviceforensics.git + - bin_env: /usr/bin/python3 + - require: + - sls: sift.python3-packages.core + - sls: sift.packages.git + diff --git a/sift/python3-packages/usnparser.sls b/sift/python3-packages/usnparser.sls new file mode 100644 index 00000000..80d04c7c --- /dev/null +++ b/sift/python3-packages/usnparser.sls @@ -0,0 +1,9 @@ +include: + - sift.python3-packages.core + +sift-python3-package-usnparser: + pip.installed: + - name: git+https://github.com/digitalsleuth/USN-Journal-Parser.git + - bin_env: /usr/bin/python3 + - require: + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/virustotal-api.sls b/sift/python3-packages/virustotal-api.sls index 580cc85e..952eeef0 100644 --- a/sift/python3-packages/virustotal-api.sls +++ b/sift/python3-packages/virustotal-api.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-virustotal-api: pip.installed: - name: virustotal-api - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/windowsprefetch.sls b/sift/python3-packages/windowsprefetch.sls new file mode 100644 index 00000000..32266f09 --- /dev/null +++ b/sift/python3-packages/windowsprefetch.sls @@ -0,0 +1,10 @@ +include: + - sift.python3-packages.core + +sift-python3-package-windowsprefetch: + pip.installed: + - name: windowsprefetch + - bin_env: /usr/bin/python3 + - upgrade: True + - require: + - sls: sift.python3-packages.core diff --git a/sift/python3-packages/yara-python.sls b/sift/python3-packages/yara-python.sls index 382cdedc..9e6affb2 100644 --- a/sift/python3-packages/yara-python.sls +++ b/sift/python3-packages/yara-python.sls @@ -1,9 +1,9 @@ include: - - sift.python3-packages.pip + - sift.python3-packages.core sift-python3-packages-yara-python: pip.installed: - name: yara-python - bin_env: /usr/bin/python3 - require: - - sls: sift.python3-packages.pip + - sls: sift.python3-packages.core diff --git a/sift/scripts/4n6.sls b/sift/scripts/4n6.sls index b635d382..35e6bb87 100644 --- a/sift/scripts/4n6.sls +++ b/sift/scripts/4n6.sls @@ -1,6 +1,6 @@ {% set files = [('Android', ['fbmsg-extractor.py','imgcache-parse-mod.py','imgcache-parse.py','print_apk_perms.py','wwf-chat-parser.py']), ('Ford', ['sync3-unisearch.py','sync3-unisearch2kml.py']), - ('Google_Takeout_Records', ['gRecordsActivity_ijson_date.py']), + ('Google_Takeout_Records', ['gRecordsActivity_json_date.py']), ('Samsung_Gallery3d_2022', ['java-hashcode.py','samsung_gallery3d_filesysmon_parser_v11.py','samsung_gallery3d_log_parser_v10.py','samsung_gallery3d_log_parser_v11.py','samsung_gallery3d_trash_parser_v10.py']), ('WindowsPhone8', ['WP8_AppPerms.py','wp8-1-callhistory.py','wp8-1-contacts.py','wp8-1-mms-filesort.py','wp8-1-mms.py','wp8-1-sms.py','wp8-callhistory.py','wp8-contacts.py','wp8-fb-msg.py','wp8-sha256-pin-finder.py','wp8-sms.py']), ('iOS', ['ios14_maps_history.py','vmail-db-2-html.pl']), @@ -14,9 +14,8 @@ include: - sift.python3-packages.ijson - - sift.python-packages.s2sphere + - sift.python3-packages.s2sphere - sift.packages.git - - sift.packages.python2 - sift.packages.python3 - sift.perl-packages.exiftool - sift.perl-packages.cgi @@ -28,15 +27,14 @@ include: sift-scripts-4n6-git: git.latest: - - name: https://github.com/cheeky4n6monkey/4n6-scripts.git + - name: https://github.com/digitalsleuth/4n6-scripts.git - target: /usr/local/src/4n6-scripts - user: root - - rev: f57a5301b317a9842c0d43853595161843086923 + - rev: master - force_clone: True - force_reset: True - require: - sls: sift.packages.git - - sls: sift.packages.python2 - sls: sift.packages.python3 {% for folder, file_list in files %} @@ -52,61 +50,3 @@ sift-scripts-4n6-{{ file }}: {% endfor %} {% endfor %} -sift-scripts-4n6-gLocationHistoryActivity: - file.copy: - - name: /usr/local/bin/gLocationHistoryActivity.py - - source: '/usr/local/src/4n6-scripts/Google_Takeout_Location_History/# gLocationHistoryActivity.py' - - force: True - - mode: 755 - - watch: - - git: sift-scripts-4n6-git - -{% for file in fixshebangpy2 %} -sift-scripts-4n6-python2-{{ file }}: - file.replace: - - name: /usr/local/bin/{{ file }} - - pattern: '#! /usr/bin/env python\n' - - repl: '#! /usr/bin/env python2\n' - - count: 1 - - watch: - - git: sift-scripts-4n6-git -{% endfor %} - -{% for file in fixshebangpy3 %} -sift-scripts-4n6-python3-{{ file }}: - file.replace: - - name: /usr/local/bin/{{ file }} - - pattern: '#! /usr/bin/env python\n' - - repl: '#! /usr/bin/env python3\n' - - count: 1 - - watch: - - git: sift-scripts-4n6-git -{% endfor %} - -{% for file in fixshebangpy3 %} -sift-scripts-4n6-python3-CRLF{{ file }}: - file.replace: - - name: /usr/local/bin/{{ file }} - - pattern: '#! /usr/bin/env python\r' - - repl: '#! /usr/bin/env python3\n' - - count: 1 - - watch: - - git: sift-scripts-4n6-git -{% endfor %} - -{%- for file in noshebang %} -sift-scripts-4n6-add-shebang-{{ file }}: - file.prepend: - - name: /usr/local/bin/{{ file }} - - text: '#!/usr/bin/env python2' - - watch: - - git: sift-scripts-4n6-git -{% endfor %} - -sift-scripts-4n6-plistdb2py-shebang: - file.prepend: - - name: /usr/local/bin/plist2db.py - - text: '#!/usr/bin/env python3' - - watch: - - git: sift-scripts-4n6-git - diff --git a/sift/scripts/amcache.sls b/sift/scripts/amcache.sls deleted file mode 100644 index 0d6b2764..00000000 --- a/sift/scripts/amcache.sls +++ /dev/null @@ -1,22 +0,0 @@ -# source=https://github.com/williballenthin/python-registry -# license=apache2 -# license_source=https://github.com/williballenthin/python-registry/blob/master/LICENSE.TXT - -{% set commit = "1a669eada6f7933798751e0cf482a9eb654c739b" -%} -{% set hash = "1065c23fdea1fde90e931bf5ccabc93b508bee0f6855a6ef2b3b9fd74495e279" -%} - -sift-scripts-amcache: - file.managed: - - name: /usr/local/bin/amcache.py - - source: https://raw.githubusercontent.com/williballenthin/python-registry/{{ commit }}/samples/amcache.py - - source_hash: sha256={{ hash }} - - mode: 755 - -sift-scripts-amcache-shebang: - file.replace: - - name: /usr/local/bin/amcache.py - - pattern: '#!/usr/bin/python' - - repl: '#!/usr/bin/env python2' - - count: 1 - - watch: - - file: sift-scripts-amcache diff --git a/sift/scripts/exiftool.sls b/sift/scripts/exiftool.sls deleted file mode 100644 index a381209e..00000000 --- a/sift/scripts/exiftool.sls +++ /dev/null @@ -1,65 +0,0 @@ -# source=https://owl.phy.queensu.ca/~phil/exiftool/ -# license=free - -{% set exiftool_version = '10.60' -%} -{% set exiftool_sha256 = 'df0988f60e1a6c086799e1f2ecd419e8abbad4dfb5dfa66c6080c78a5cb7acfa' -%} - -include: - - sift.packages.patch - -sift-exiftool-source: - file.managed: - - name: /var/cache/sift/archives/Image-ExifTool-{{ exiftool_version }}.tar.gz - - source: https://owl.phy.queensu.ca/~phil/exiftool/Image-ExifTool-{{ exiftool_version }}.tar.gz - - source_hash: sha256={{ exiftool_sha256 }} - - makedirs: True - -sift-exiftool-extracted: - archive.extracted: - - name: /usr/local/src/exiftool-{{ exiftool_version }} - - source: /var/cache/sift/archives/Image-ExifTool-{{ exiftool_version }}.tar.gz - - source_hash: sha256={{ exiftool_sha256 }} - - watch: - - file: sift-exiftool-source - -sift-exiftool-patch-file: - file.managed: - - name: /usr/local/src/exiftool-{{ exiftool_version }}/exiftool.patch - - contents: | - diff --git 1/exiftool 2/exiftool - index eeff10b..a7c1259 100755 - --- 1/exiftool - +++ 2/exiftool - @@ -18,7 +18,7 @@ my $version = '10.60'; - my $exeDir; - BEGIN { - # get exe directory - - $exeDir = ($0 =~ /(.*)[\\\/]/) ? $1 : '.'; - + $exeDir = "/usr/local/share/exiftool-$version/Image-ExifTool-$version"; - # add lib directory at start of include path - unshift @INC, "$exeDir/lib"; - # load or disable config file if specified - - watch: - - archive: sift-exiftool-extracted - -sift-exiftool-patch: - file.patch: - - name: /usr/local/src/exiftool-{{ exiftool_version }}/Image-ExifTool-{{ exiftool_version }}/exiftool - - source: /usr/local/src/exiftool-{{ exiftool_version }}/exiftool.patch - - hash: sha256=8790e165825aa7028d3a71ce656c876f8430d2505c6ca5aa058e74b16faee611 - - require: - - sls: sift.packages.patch - - file: sift-exiftool-patch-file - - watch: - - archive: sift-exiftool-extracted - -sift-exiftool-binary: - file.managed: - - name: /usr/local/bin/exiftool - - source: /usr/local/src/exiftool-{{ exiftool_version }}/Image-ExifTool-{{ exiftool_version }}/exiftool - - mode: 755 - - watch: - - file: sift-exiftool-patch - - - diff --git a/sift/scripts/image-mounter.sls b/sift/scripts/image-mounter.sls deleted file mode 100644 index 20709190..00000000 --- a/sift/scripts/image-mounter.sls +++ /dev/null @@ -1,22 +0,0 @@ -# source=https://github.com/kevthehermit/Scripts -# license=unknown - -{% set commit = "28b3e08a5ad16576ffe487691376f3e2a2bc0cf5" -%} -{% set hash = "sha256=7e810482b5aa58f8085a7a03be266c113530145306c73c75ba9956ba83e39151" -%} - -sift-scripts-image-mounter: - file.managed: - - name: /usr/local/bin/imageMounter.py - - source: https://raw.githubusercontent.com/kevthehermit/Scripts/{{ commit }}/imageMounter.py - - source_hash: {{ hash }} - - mode: 755 - -sift-scripts-image-mounter-shebang: - file.replace: - - name: /usr/local/bin/imageMounter.py - - pattern: '#!/usr/bin/env python\n' - - repl: '#!/usr/bin/env python2\n' - - count: 1 - - prepend_if_not_found: False - - watch: - - file: sift-scripts-image-mounter diff --git a/sift/scripts/init.sls b/sift/scripts/init.sls index 4097a35a..a92f4fb3 100644 --- a/sift/scripts/init.sls +++ b/sift/scripts/init.sls @@ -1,31 +1,19 @@ include: - sift.scripts.4n6 - sift.scripts.afterglow - - sift.scripts.amcache - sift.scripts.cyberchef - sift.scripts.densityscout - sift.scripts.dumbpig - sift.scripts.dump-mft-entry - - sift.scripts.image-mounter - - sift.scripts.java-idx-parser - - sift.scripts.jobparser - sift.scripts.keydet-tools - sift.scripts.packerid - sift.scripts.page-brute - - sift.scripts.parseusn - sift.scripts.pdf-tools - - sift.scripts.pe-carver - - sift.scripts.pescanner - sift.scripts.regripper - sift.scripts.screen-scale - - sift.scripts.shim-cache-parser - sift.scripts.sift - sift.scripts.sorter - sift.scripts.sqlite_miner - - sift.scripts.sqlparser - - sift.scripts.usbdeviceforensics - - sift.scripts.virustotal-tools - - sift.scripts.vshot sift-scripts: test.nop: @@ -33,28 +21,16 @@ sift-scripts: - require: - sls: sift.scripts.4n6 - sls: sift.scripts.afterglow - - sls: sift.scripts.amcache - sls: sift.scripts.cyberchef - sls: sift.scripts.densityscout - sls: sift.scripts.dumbpig - sls: sift.scripts.dump-mft-entry - - sls: sift.scripts.image-mounter - - sls: sift.scripts.java-idx-parser - - sls: sift.scripts.jobparser - sls: sift.scripts.keydet-tools - sls: sift.scripts.packerid - sls: sift.scripts.page-brute - - sls: sift.scripts.parseusn - sls: sift.scripts.pdf-tools - - sls: sift.scripts.pe-carver - - sls: sift.scripts.pescanner - sls: sift.scripts.regripper - sls: sift.scripts.screen-scale - - sls: sift.scripts.shim-cache-parser - sls: sift.scripts.sift - sls: sift.scripts.sorter - sls: sift.scripts.sqlite_miner - - sls: sift.scripts.sqlparser - - sls: sift.scripts.usbdeviceforensics - - sls: sift.scripts.virustotal-tools - - sls: sift.scripts.vshot diff --git a/sift/scripts/java-idx-parser.sls b/sift/scripts/java-idx-parser.sls deleted file mode 100644 index 73fdf18c..00000000 --- a/sift/scripts/java-idx-parser.sls +++ /dev/null @@ -1,20 +0,0 @@ -# source=https://github.com/Rurik/Java_IDX_Parser -# license=apache2.0 -# license_source=https://github.com/Rurik/Java_IDX_Parser#copyright-and-license - -{% set commit = "f9b7a3aeb66a86e891e28d5e762483dff5e15851" -%} -{% set hash = "sha256=963d5f38b93016f147295ab6871dcba326c9315ea9402652745ae6290b594f45" -%} - -sift-scripts-java-idx-parser: - file.managed: - - name: /usr/local/bin/idx_parser.py - - source: https://raw.githubusercontent.com/Rurik/Java_IDX_Parser/{{ commit }}/idx_parser.py - - source_hash: {{ hash }} - - mode: 755 - -sift-scripts-java-idx-parser-shebang: - file.prepend: - - name: /usr/local/bin/idx_parser.py - - text: '#!/usr/bin/env python2' - - watch: - - file: sift-scripts-java-idx-parser diff --git a/sift/scripts/jobparser.sls b/sift/scripts/jobparser.sls deleted file mode 100644 index a15b7026..00000000 --- a/sift/scripts/jobparser.sls +++ /dev/null @@ -1,21 +0,0 @@ -# source=https://github.com/gleeda/misc-scripts -# license=gplv2 - -{% set commit = "03a0d9126359c6b4b0b508062d3422bea9b69036" -%} -{% set hash = "sha256=a6869e7f0f2f360681ff67a67b65c627b0084ebec25d7a9bb44abe8a1cdfb467" -%} - -sift-scripts-jobparser: - file.managed: - - name: /usr/local/bin/jobparser.py - - source: https://raw.githubusercontent.com/gleeda/misc-scripts/{{ commit }}/misc_python/jobparser.py - - source_hash: {{ hash }} - - mode: 755 - -sift-scripts-jobparser-python: - file.replace: - - name: /usr/local/bin/jobparser.py - - pattern: '#!/usr/bin/env python\n' - - repl: '#!/usr/bin/env python2\n' - - count: 1 - - watch: - - file: sift-scripts-jobparser diff --git a/sift/scripts/packerid.sls b/sift/scripts/packerid.sls index 172fda40..776be671 100644 --- a/sift/scripts/packerid.sls +++ b/sift/scripts/packerid.sls @@ -5,9 +5,9 @@ {% set hash = "sha256=be589d4cbe70ecdc3424a6da48d8fc24630d51a6ebf92e5328b36e39423eb038" -%} include: - - sift.packages.python2 - - sift.python-packages.pefile - - sift.python-packages.capstone + - sift.python3-packages.core + - sift.python3-packages.pefile + - sift.python3-packages.capstone sift-scripts-packerid: file.managed: @@ -16,15 +16,15 @@ sift-scripts-packerid: - source_hash: {{ hash }} - mode: 755 - require: - - sls: sift.packages.python2 - - sls: sift.python-packages.pefile - - sls: sift.python-packages.capstone + - sls: sift.python3-packages.core + - sls: sift.python3-packages.pefile + - sls: sift.python3-packages.capstone sift-scripts-packerid-shebang: file.replace: - name: /usr/local/bin/packerid.py - pattern: '#!/usr/local/bin/python' - - repl: '#!/usr/bin/env python2' + - repl: '#!/usr/bin/env python3' - count: 1 - watch: - file: sift-scripts-packerid diff --git a/sift/scripts/page-brute.sls b/sift/scripts/page-brute.sls index 273d349b..0f12ccbd 100644 --- a/sift/scripts/page-brute.sls +++ b/sift/scripts/page-brute.sls @@ -4,12 +4,3 @@ sift-scripts-page-brute: - source: salt://sift/files/page-brute - file_mode: 755 - include_pat: '*.py' - -sift-scripts-page-brute-shebang: - file.replace: - - name: /usr/local/bin/page_brute-BETA.py - - pattern: '#!/usr/bin/python\n' - - repl: '#!/usr/bin/env python2\n' - - count: 1 - - watch: - - file: sift-scripts-page-brute diff --git a/sift/scripts/parseusn.sls b/sift/scripts/parseusn.sls deleted file mode 100644 index 8e63021e..00000000 --- a/sift/scripts/parseusn.sls +++ /dev/null @@ -1,21 +0,0 @@ -# source=https://github.com/superponible/DFIR/ -# license=MIT - -{% set commit = "ee681a07a0c32a5ccaea788cd7d012d19872f181" -%} -{% set hash = "sha256=4540eba4cdddcb0eab1bc21ccea6a6ab7c010936909bb233807dc9bf4189ab10" -%} - -sift-scripts-parseusn: - file.managed: - - name: /usr/local/bin/parseusn.py - - source: https://raw.githubusercontent.com/superponible/DFIR/{{ commit }}/parseusn.py - - source_hash: {{ hash }} - - mode: 755 - -sift-scripts-parseusn-shebang: - file.replace: - - name: /usr/local/bin/parseusn.py - - pattern: '#!/usr/bin/env python\n' - - repl: '#!/usr/bin/env python2\n' - - count: 1 - - watch: - - file: sift-scripts-parseusn diff --git a/sift/scripts/pdf-tools.sls b/sift/scripts/pdf-tools.sls index 83a7ea11..1ae9e317 100644 --- a/sift/scripts/pdf-tools.sls +++ b/sift/scripts/pdf-tools.sls @@ -3,11 +3,3 @@ sift-scripts-pdf-tools: - name: /usr/local/bin - source: salt://sift/files/pdf-tools - file_mode: 755 - -sift-scripts-pdf-tools-helloworld-shebang: - file.prepend: - - name: /usr/local/bin/make-pdf-helloworld.py - - text: '#!/usr/bin/env python2' - - watch: - - file: sift-scripts-pdf-tools - diff --git a/sift/scripts/pe-carver.sls b/sift/scripts/pe-carver.sls deleted file mode 100644 index f52a1ec4..00000000 --- a/sift/scripts/pe-carver.sls +++ /dev/null @@ -1,26 +0,0 @@ -# source=https://github.com/Rurik/PE_Carver -# license=unknown - -{% set commit = "9026cd2ca4bd0633f9898a93cb798cd19cffc8f6" -%} -{% set hash = "sha256=6b245decadde4652ff6d1e2b24f6496dd252bee4bf57e7c934fbb9c9f21df849" -%} - -include: - - sift.python-packages.bitstring - - sift.python-packages.pefile - -sift-scripts-pecarve: - file.managed: - - name: /usr/local/bin/pecarve.py - - source: https://raw.githubusercontent.com/Rurik/PE_Carver/{{ commit }}/pe_carve.py - - source_hash: {{ hash }} - - mode: 755 - - require: - - sls: sift.python-packages.bitstring - - sls: sift.python-packages.pefile - -sift-scripts-pecarve-shebang: - file.prepend: - - name: /usr/local/bin/pecarve.py - - text: '#!/usr/bin/env python2' - - watch: - - file: sift-scripts-pecarve diff --git a/sift/scripts/pescanner.sls b/sift/scripts/pescanner.sls deleted file mode 100644 index eefbfb8a..00000000 --- a/sift/scripts/pescanner.sls +++ /dev/null @@ -1,34 +0,0 @@ -# source=https://github.com/hiddenillusion/AnalyzePE/ -# license=unknown - -{% set commit = "9c76ecbc3ac417bc07439c244f2d5ed19af06578" -%} -{% set hash = "sha256=0c4e2a8916df3de0bde67ef47543db6f6068b267fa2b665667a52bc6002e6529" -%} - -include: - - sift.packages.python2 - - sift.python-packages.pefile - - sift.python-packages.pydasm - - sift.python-packages.python-magic - - sift.python-packages.yara-python - -sift-scripts-pescanner: - file.managed: - - name: /usr/local/bin/pescanner.py - - source: https://raw.githubusercontent.com/hiddenillusion/AnalyzePE/{{ commit }}/pescanner.py - - source_hash: {{ hash }} - - mode: 755 - - require: - - sls: sift.packages.python2 - - sls: sift.python-packages.pefile - - sls: sift.python-packages.pydasm - - sls: sift.python-packages.python-magic - - sls: sift.python-packages.yara-python - -sift-scripts-pescanner-shebang: - file.replace: - - name: /usr/local/bin/pescanner.py - - pattern: '#!/usr/bin/env python\n' - - repl: '#!/usr/bin/env python2\n' - - count: 1 - - watch: - - file: sift-scripts-pescanner diff --git a/sift/scripts/shim-cache-parser.sls b/sift/scripts/shim-cache-parser.sls deleted file mode 100644 index bc712ab6..00000000 --- a/sift/scripts/shim-cache-parser.sls +++ /dev/null @@ -1,25 +0,0 @@ -# source=https://github.com/mandiant/ShimCacheParser -# license=apache2 -# license_source=https://github.com/mandiant/ShimCacheParser/blob/master/LICENSE - -{% set commit = "d7c517af9f3b09b810c5859ee52a6540f3b25855" -%} -{% set shasum = "sha256=61e75e485c0efc862e7b1c7746a493ca944afcf3e96512fb864706089f89d9aa" -%} - -include: - - sift.python-packages.python-registry - -sift-scripts-shim-cache-parser: - file.managed: - - name: /usr/local/bin/ShimCacheParser.py - - source: https://raw.githubusercontent.com/mandiant/ShimCacheParser/{{ commit }}/ShimCacheParser.py - - source_hash: {{ shasum }} - - mode: 755 - - require: - - sls: sift.python-packages.python-registry - -sift-scripts-shim-cache-parser-shebang: - file.prepend: - - name: /usr/local/bin/ShimCacheParser.py - - text: '#!/usr/bin/env python2' - - watch: - - file: sift-scripts-shim-cache-parser diff --git a/sift/scripts/sqlparser.sls b/sift/scripts/sqlparser.sls deleted file mode 100644 index e5b0e291..00000000 --- a/sift/scripts/sqlparser.sls +++ /dev/null @@ -1,18 +0,0 @@ -# source=https://github.com/mdegrazia/SQLite-Deleted-Records-Parser -# license=unknown - -{% set hash = "sha256=0bb28498141380821d5adc43cc3557ce6a96aeb8a33c414a48e3ccc2a1aad8c9" -%} - -sift-scripts-sqlparser: - file.managed: - - name: /usr/local/bin/sqlparser.py - - source: https://github.com/mdegrazia/SQLite-Deleted-Records-Parser/releases/download/v.1.1/sqlparse_v1.1.py - - source_hash: {{ hash }} - - mode: 755 - -sift-scripts-sqlparser-shebang: - file.prepend: - - name: /usr/local/bin/sqlparser.py - - text: '#!/usr/bin/env python2' - - watch: - - file: sift-scripts-sqlparser diff --git a/sift/scripts/usbdeviceforensics.sls b/sift/scripts/usbdeviceforensics.sls deleted file mode 100644 index 2aa04ae5..00000000 --- a/sift/scripts/usbdeviceforensics.sls +++ /dev/null @@ -1,21 +0,0 @@ -# source=https://github.com/woanware/usbdeviceforensics -# license=unknown - -{% set commit = "5a0705d5beca09eab2fd5a47a52240dbc0db5bc9" -%} -{% set hash = "sha256=cc643ae2ccd7b772f6d8a2abaa0e9dd33514c60328c5bc3b7d60bb69398b9637" -%} - -sift-scripts-usbdeviceforensics: - file.managed: - - name: /usr/local/bin/usbdeviceforensics.py - - source: https://raw.githubusercontent.com/woanware/usbdeviceforensics/{{ commit }}/usbdeviceforensics.py - - source_hash: {{ hash }} - - mode: 755 - -sift-scripts-usbdeviceforensics-shebang: - file.replace: - - name: /usr/local/bin/usbdeviceforensics.py - - pattern: '#!/usr/bin/python' - - repl: '#!/usr/bin/env python2' - - count: 1 - - watch: - - file: sift-scripts-usbdeviceforensics diff --git a/sift/scripts/virustotal-tools.sls b/sift/scripts/virustotal-tools.sls deleted file mode 100644 index 0e2cef0c..00000000 --- a/sift/scripts/virustotal-tools.sls +++ /dev/null @@ -1,56 +0,0 @@ -# source=https://blog.didierstevens.com/programs/virustotal-tools/ -# license=unknown - -include: - - sift.python-packages.poster - -sift-scripts-virustotal-search-archive: - archive.extracted: - - name: /usr/local/src/virustotal-search-v0.1.4 - - source: https://didierstevens.com/files/software/virustotal-search_V0_1_4.zip - - source_hash: sha256=8c033b3c46767590c54c191aeedc0162b3b8ccde0d7b75841a6552ca9de76044 - - enforce_toplevel: False - -sift-scripts-virustotal-search-script: - file.managed: - - name: /usr/local/bin/virustotal-search.py - - source: /usr/local/src/virustotal-search-v0.1.4/virustotal-search.py - - mode: 755 - - watch: - - archive: sift-scripts-virustotal-search-archive - -sift-scripts-virustotal-submit-archive: - archive.extracted: - - name: /usr/local/src/virustotal-submit-v0.0.3 - - source: https://didierstevens.com/files/software/virustotal-submit_V0_0_3.zip - - source_hash: sha256=37cce3e8469de097912cb23bac6b909c9c7f5a5cee09c9279d32bdb9d6e23bcc - - enforce_toplevel: False - -sift-scripts-virustotal-submit-script: - file.managed: - - name: /usr/local/bin/virustotal-submit.py - - source: /usr/local/src/virustotal-submit-v0.0.3/virustotal-submit.py - - mode: 755 - - watch: - - archive: sift-scripts-virustotal-submit-archive - - require: - - sls: sift.python-packages.poster - -sift-scripts-virustotal-search-shebang: - file.replace: - - name: /usr/local/bin/virustotal-search.py - - pattern: '#!/usr/bin/env python\n' - - repl: '#!/usr/bin/env python2\n' - - count: 1 - - watch: - - file: sift-scripts-virustotal-search-script - -sift-scripts-virustotal-submit-shebang: - file.replace: - - name: /usr/local/bin/virustotal-submit.py - - pattern: '#!/usr/bin/env python\n' - - repl: '#!/usr/bin/env python2\n' - - count: 1 - - watch: - - file: sift-scripts-virustotal-submit-script - diff --git a/sift/scripts/vshot.sls b/sift/scripts/vshot.sls deleted file mode 100644 index acd11fa8..00000000 --- a/sift/scripts/vshot.sls +++ /dev/null @@ -1,35 +0,0 @@ -{% set commit = "62d8ae4ed1ca276f2a1ffe251e1750d10538ae52" -%} -{% set hash = "sha256=590fb825df2d17f2e83fcbf1a578f39d8c7bd38017d85edfb250c0fb92db8b3a" -%} - -include: - - sift.packages.bulk-extractor - - sift.python-packages.volatility - -sift-scripts-vshot: - file.managed: - - name: /usr/local/bin/vshot - - source: https://raw.githubusercontent.com/CrowdStrike/Forensics/{{ commit }}/vshot - - source_hash: {{ hash }} - - mode: 755 - - require: - - sls: sift.python-packages.volatility - - sls: sift.packages.bulk-extractor - -sift-scripts-vshot-config-volatility: - file.replace: - - name: /usr/local/bin/vshot - - pattern: 'volpath="/usr/bin/volatility"' - - repl: 'volpath="/usr/local/bin/vol.py"' - - count: 1 - - watch: - - file: sift-scripts-vshot - -sift-scripts-vshot-config-bulk-extractor: - file.replace: - - name: /usr/local/bin/vshot - - pattern: 'bulkpath="/usr/local/bin/bulk_extractor"' - - repl: 'bulkpath="/usr/bin/bulk_extractor"' - - count: 1 - - watch: - - file: sift-scripts-vshot - diff --git a/sift/tests/gift.sls b/sift/tests/gift.sls index 70008282..5ee9b6f7 100644 --- a/sift/tests/gift.sls +++ b/sift/tests/gift.sls @@ -11,16 +11,15 @@ include: - sift.packages.libevtx-tools - sift.packages.libewf - sift.packages.libewf-dev - - sift.packages.libewf-python + - sift.packages.libewf-python3 - sift.packages.libewf-tools - sift.packages.libfvde - sift.packages.libfvde-tools - sift.packages.libvmdk - sift.packages.libvshadow - sift.packages.libvshadow-dev - - sift.packages.libvshadow-python + - sift.packages.libvshadow-python3 - sift.packages.libvshadow-tools - - sift.packages.plaso-data - sift.packages.plaso-tools - sift.packages.python3-plaso @@ -40,15 +39,14 @@ sift-tests-gift: - sls: sift.packages.libevtx-tools - sls: sift.packages.libewf - sls: sift.packages.libewf-dev - - sls: sift.packages.libewf-python + - sls: sift.packages.libewf-python3 - sls: sift.packages.libewf-tools - sls: sift.packages.libfvde - sls: sift.packages.libfvde-tools - sls: sift.packages.libvmdk - sls: sift.packages.libvshadow - sls: sift.packages.libvshadow-dev - - sls: sift.packages.libvshadow-python + - sls: sift.packages.libvshadow-python3 - sls: sift.packages.libvshadow-tools - - sls: sift.packages.plaso-data - sls: sift.packages.plaso-tools - sls: sift.packages.python3-plaso