Commit 099fd67a authored by Devon Kearns's avatar Devon Kearns

Imported Upstream version 1.0

parent a7e262b1
#!/usr/bin/env python
import sys
import os.path
import binwalk
from threading import Thread
from binwalk.common import str2int
from getopt import GetoptError, getopt as GetOpt
def display_status(bwalk):
while True:
# Display the current scan progress when the enter key is pressed.
print "Progress: %.2f%% (%d / %d)\n" % (((float(bwalk.total_scanned) / float(bwalk.scan_length)) * 100), bwalk.total_scanned, bwalk.scan_length)
def usage(fd):
fd.write("Binwalk v%s\n" % binwalk.Config.VERSION)
fd.write("Craig Heffner,\n")
fd.write("Usage: %s [OPTIONS] [FILE1] [FILE2] [FILE3] ...\n" % os.path.basename(sys.argv[0]))
fd.write("\t-o, --offset=<int> Start scan at this file offset\n")
fd.write("\t-l, --length=<int> Number of bytes to scan\n")
fd.write("\t-b, --align=<int> Set byte alignment [default: 1]\n")
fd.write("\t-m, --magic=<file> Specify an alternate magic file to use\n")
fd.write("\t-i, --include=<filter> Include matches that are normally excluded and that have <filter> in their description\n")
fd.write("\t-x, --exclude=<filter> Exclude matches that have <filter> in their description\n")
fd.write("\t-y, --search=<filter> Only search for matches that have <filter> in their description\n")
fd.write("\t-g, --grep=<text> Grep results for the specified text\n")
fd.write("\t-R, --raw-bytes=<string> Search for a sequence of raw bytes instead of using the default magic signatures\n")
fd.write("\t-f, --file=<file> Log results to file\n")
fd.write("\t-D, --dd=<type:ext[:cmd]> Extract entries whose descriptions match <type>, give them file extension <ext>, and execute <cmd>\n")
fd.write("\t-e, --extract=[file] Automatically extract known file types. Load rules from file, if specified.\n")
fd.write("\t-r, --rm Cleanup extracted files and zero-size files\n")
fd.write("\t-d, --delay Delay file extraction for files with known footers\n")
fd.write("\t-a, --all Include all short signatures\n")
fd.write("\t-I, --show-invalid Show results marked as invalid\n")
fd.write("\t-A, --opcodes Scan for executable code\n")
fd.write("\t-C, --cast Cast file contents as various data types\n")
fd.write("\t-k, --keep-going Show all matching results at a given offset, not just the first one\n")
fd.write("\t-q, --quiet Supress output to stdout\n")
fd.write("\t-v, --verbose Be verbose (specify twice for very verbose)\n")
fd.write("\t-u, --update Update magic signature files\n")
fd.write("\t-h, --help Show help output\n")
if fd == sys.stderr:
def main():
align = 1
offset = 0
length = 0
quiet = False
pre_filter = True
verbose = 0
log_file = None
show_invalid = False
short_sig = True
custom_signature = None
delay_extraction = False
extract_rules_file = None
extract_from_config = False
cleanup_after_extract = False
magic_flags = binwalk.magic.MAGIC_NONE
options = []
magic_files = []
target_files = []
greps = []
includes = []
excludes = []
searches = []
extracts = []
config = binwalk.Config()
short_options = "aACdhkeqruvPIf:o:l:b:i:x:y:D:m:R:g:"
long_options = [
# Require at least one argument (the target file)
if len(sys.argv) < MIN_ARGC:
opts, args = GetOpt(sys.argv[1:], short_options, long_options)
except GetoptError, e:
sys.stderr.write("%s\n" % str(e))
for opt, arg in opts:
if opt in ("-h", "--help"):
elif opt in ("-d", "--delay"):
delay_extraction = True
elif opt in ("-f", "--file"):
log_file = arg
elif opt in ("-q", "--quiet"):
quiet = True
elif opt in ("-v", "--verbose"):
verbose += 1
elif opt in ("-o", "--offset"):
offset = str2int(arg)
elif opt in ("-l", "--length"):
length = str2int(arg)
elif opt in ("-b", "--align"):
align = str2int(arg)
elif opt in ("-i", "--include"):
elif opt in ("-y", "--search"):
elif opt in ("-x", "--exclude"):
elif opt in ("-D", "--dd"):
elif opt in ("-g", "--grep"):
elif opt in ("-e", "--extract"):
if arg:
extract_rules_file = arg
extract_from_config = True
elif opt in ("-r", "--rm"):
cleanup_after_extract = True
elif opt in ("-m", "--magic"):
elif opt in ("-a", "--all"):
short_sig = False
elif opt in ("-k", "--keep-going"):
magic_flags |= binwalk.magic.MAGIC_CONTINUE
elif opt in ("-I", "--show-invalid"):
show_invalid = True
elif opt in ("-A", "--opcodes"):
# Check every single offset
align = 1
# Don't filter out short signatures as some opcode sigs are only 2 bytes
short_sig = False
# Load user file first so its signatures take precedence
elif opt in ("-C", "--cast"):
# Check every single offset
align = 1
# Don't stop at the first match (everything matches everything in this scan)
magic_flags |= binwalk.magic.MAGIC_CONTINUE
# Disable all pre filtering; we want to check everything for this scan
pre_filter = False
# Don't filter shot signatures, or else some casts won't be displayed
short_sig = False
# Load user file first so its signatures take precedence
elif opt in ("-R", "--raw-bytes"):
# Disable short signature filtering, as the supplied string may be short
short_sig = False
custom_signature = arg
elif opt in ("-u", "--update"):
sys.stdout.write("Updating signatures...")
except Exception, e:
if 'Permission denied' in str(e):
sys.stderr.write("failed (permission denied). Check your user permissions, or run the update as root.\n")
sys.stderr.write('\n' + str(e) + '\n')
# The --profile option is handled prior to calling main()
elif opt not in ('-P', '--profile'):
# Append the option and argument to the list of processed options
# This is used later to determine which argv entries are file names
options.append("%s%s" % (opt, arg))
options.append("%s=%s" % (opt, arg))
# Treat any command line options not processed by getopt as target file paths
for opt in sys.argv[1:]:
#TODO: Do we really want to not process valid files that start with a '-'?
# This is probably OK, and ensures that no options are treated as target files.
if opt not in options and not opt.startswith('-'):
# If more than one target file was specified, enable verbose mode; else, there is
# nothing in the output to indicate which scan corresponds to which file.
if len(target_files) > 1:
verbose = True
# Instantiate the Binwalk class
bwalk = binwalk.Binwalk(flags=magic_flags, verbose=verbose, log=log_file, quiet=quiet)
# If a custom signature was specified, create a temporary magic file containing the custom signature
# and ensure that it is the only magic file that will be loaded when Binwalk.scan() is called.
if custom_signature is not None:
magic_files = bwalk.parser.file_from_string(custom_signature)
# Set any specified filters
bwalk.filter.include(includes, exclusive=False)
# Add any specified extract rules
# If -e was specified, load the default extract rules
if extract_from_config:
# If --extract was specified, load the specified extraction rules file
if extract_rules_file is not None:
# Set the extractor cleanup value (True to clean up files, False to leave them on disk)
# Enable delayed extraction, which will prevent supported file types from having trailing data when extracted
# Load the magic file(s)
bwalk.load_signatures(magic_files=magic_files, pre_filter_signatures=pre_filter, filter_short_signatures=short_sig)
# Scan each target file
for target_file in target_files:
# Start the display_status function as a daemon thread
t = Thread(target=display_status, args=(bwalk,))
# Catch keyboard interrupts so that we can properly clean up after the scan
except KeyboardInterrupt:
# Be sure to drink your ovaltine.
# And also to clean up any temporary magic files.
# Special options for profiling the code. For debug use only.
if '--profile' in sys.argv or '-P' in sys.argv:
import cProfile'main()')
except KeyboardInterrupt:
import os
import magic
from config import *
from update import *
from filter import *
from parser import *
from smartsig import *
from extractor import *
from prettyprint import *
from common import file_size
class Binwalk:
Primary Binwalk class.
Interesting class objects:
self.filter - An instance of the MagicFilter class.
self.extractor - An instance of the Extractor class.
self.parser - An instance of the MagicParser class.
self.display - An instance of the PrettyPrint class.
self.magic_files - A list of magic file path strings to use whenever the scan() method is invoked.
self.scan_length - The total number of bytes to be scanned.
self.total_scanned - The number of bytes that have already been scanned.
# Default libmagic flags. Basically disable anything we don't need in the name of speed.
# The MAX_SIGNATURE_SIZE limits the amount of data available to a signature.
# While most headers/signatures are far less than this value, some may reference
# pointers in the header structure which may point well beyond the header itself.
# Passing the entire remaining buffer to libmagic is resource intensive and will
# significantly slow the scan; this value represents a reasonable buffer size to
# pass to libmagic which will not drastically affect scan time.
# Max number of bytes to process at one time. Everyone should have 50MB of memory, right?
READ_BLOCK_SIZE = 50 * 1024 * 1024
# Minimum verbosity level at which to enable extractor verbosity.
# Scan every byte by default.
def __init__(self, magic_files=[], flags=magic.MAGIC_NONE, log=None, quiet=False, verbose=0):
Class constructor.
@magic_files - A list of magic files to use.
@flags - Flags to pass to magic_open. [TODO: Might this be more appropriate as an argument to load_signaures?]
@log - Output PrettyPrint data to log file as well as to stdout.
@quiet - If set to True, supress PrettyPrint output to stdout.
@verbose - Verbosity level.
Returns None.
self.flags = self.DEFAULT_FLAGS | flags
self.magic_files = magic_files
self.verbose = verbose
self.total_scanned = 0
self.scan_length = 0
self.total_read = 0
self.magic = None
self.mfile = None
# Instantiate the config class so we can access file/directory paths
self.config = Config()
# Use the system default magic file if no other was specified
if not self.magic_files or self.magic_files is None:
# Append the user's magic file first so that those signatures take precedence
self.magic_files = [
# Only set the extractor verbosity if told to be very verbose
if self.verbose >= self.VERY_VERBOSE:
extractor_verbose = True
extractor_verbose = False
# Create an instance of the PrettyPrint class, which can be used to print results to screen/file.
self.display = PrettyPrint(log=log, quiet=quiet, verbose=verbose, bwalk=self)
# Create MagicFilter and Extractor class instances. These can be used to:
# o Create include/exclude filters
# o Specify file extraction rules to be applied during a scan
self.filter = MagicFilter()
self.extractor = Extractor(verbose=extractor_verbose)
# Create SmartSignature and MagicParser class instances. These are mostly for internal use. = SmartSignature(self.filter)
self.parser = MagicParser(self.filter,
def __del__(self):
Class deconstructor.
def cleanup(self):
Cleanup any temporary files generated by the internal instance of MagicParser.
Returns None.
def load_signatures(self, magic_files=[], pre_filter_signatures=True, filter_short_signatures=True):
Load signatures from magic file(s).
Called automatically by Binwalk.scan() with all defaults, if not already called manually.
@magic_files - A list of magic files to use (default: self.magic_files).
@pre_filter_signatures - Set to False to disable pre-filtering of signatures before invoking libmagic.
@filter_short_signatures - Set to True to include signatures with short (<= 2 byte) magic strings.
Returns None.
# Disable pre filtering in the smart signature class instance.
# This is also checked by Binwalk.scan() before performing pre-filtering. = pre_filter_signatures
# The magic files specified here override any already set
if magic_files and magic_files is not None:
self.magic_files = magic_files
# Parse the magic file(s) and initialize libmagic
self.mfile = self.parser.parse(self.magic_files, filter_short_signatures=filter_short_signatures, pre_filter_signatures=pre_filter_signatures)
self.magic =
def scan(self, target_file, offset=0, length=0, align=DEFAULT_BYTE_ALIGNMENT, show_invalid_results=False, callback=None):
Performs a Binwalk scan on the target file.
@target_file - File to scan.
@offset - Starting offset at which to start the scan.
@length - Number of bytes to scan.
@align - Look for signatures every align bytes.
@show_invalid_results - Set to True to display invalid results.
@callback - Callback function to be invoked when matches are found.
The callback function is passed two arguments: a list of result dictionaries containing the scan results
(one result per dict), and the offset at which those results were identified. Example callback function:
def my_callback(offset, results):
print "Found %d results at offset %d:" % (len(results), offset)
for result in results:
print "\t%s" % result['description']
Upon completion, the scan method returns a sorted list of tuples containing a list of results dictionaries
and the offsets at which those results were identified:
scan_items = [
(0, [{description : "LZMA compressed data..."}]),
(112, [{description : "gzip compressed data..."}])
See SmartSignature.parse for a more detailed description of the results dictionary structure.
scan_results = {}
self.total_read = 0
self.total_scanned = 0
self.scan_length = length
self.filter.show_invalid_results = show_invalid_results
# Load the default signatures if self.load_signatures has not already been invoked
if self.magic is None:
# Get a local copy of the signature sets generated by self.parser.build_signature_set.
# This is accessed heavily throughout the scan, and there is less overhead for accessing local variables in Python.
signature_set = self.parser.build_signature_set()
# Need the total size of the target file, even if we aren't scanning the whole thing
fsize = file_size(target_file)
# Open the target file and seek to the specified start offset
fd = open(target_file)
# If no length was specified, make the length the size of the target file minus the starting offset
if self.scan_length == 0:
self.scan_length = fsize - offset
# Sanity check on the byte alignment; default to 1
if align <= 0:
align = 1
# Main loop, scan through all the data
while True:
i = 0
# Read in the next block of data from the target file and make sure it's valid
(data, dlen) = self._read_block(fd)
if data is None or dlen == 0:
# The total number of bytes scanned could be bigger than the total number
# of bytes read from the file under the following circumstances:
# o The previous dlen was not a multiple of align
# o A previous result specified a jump offset that was beyond the end of the
# then current data block
# If this is the case, we need to index into this data block appropriately in order to
# resume the scan from the appropriate offset, and adjust dlen accordingly.
bufindex = self.total_scanned - self.total_read
if bufindex > 0:
# If the total_scanned > total_read, then the total_scanned offset is in a subsequent block.
# Set i to bufindex, which will cause i to be greater than dlen and this block will be skipped.
i = bufindex
elif bufindex < 0:
# If the total_scanned offset is less than total_read, then the total_scanned offset is
# somewhere inside this block. Set i to index into the block appropriately.
i = dlen + bufindex
# If the total_scanned offset ends at the end of this block, don't scan any of this block
i = dlen
# Scan through each block of data looking for signatures
while i < dlen:
smart = {}
results = []
results_offset = -1
pre_filter_ok = False
smart_jump_done = False
# Pre-filter data by checking to see if the parser thinks this might be a valid match.
# This eliminates unnecessary calls into libmagic, which are very expensive.
# Ideally, this should be done in the MagicParser class, but function calls are expensive.
# Doing it here greatly decreases the scan time.
for (sig_offset, sigset) in signature_set:
if data[i+sig_offset:i+sig_offset+self.parser.MATCH_INDEX_SIZE] in sigset:
pre_filter_ok = True
pre_filter_ok = True
if pre_filter_ok:
# Pass the data to libmagic, and split out multiple results into a list
for magic_result in self.parser.split(self.magic.buffer(data[i:i+self.MAX_SIGNATURE_SIZE])):
# Some file names are not NULL byte terminated, but rather their length is
# specified in a size field. To ensure these are not marked as invalid due to
# non-printable characters existing in the file name, parse the filename(s) and
# trim them to the specified filename length, if one was specified.
magic_result =
# Make sure this is a valid result before further processing
if not self.filter.invalid(magic_result):
# The smart filter parser returns a dictionary of keyword values and the signature description.
smart =
# Validate the jump value and check if the response description should be displayed
if smart['jump'] > -1 and self._should_display(smart['description']):
# If multiple results are returned and one of them has smart['jump'] set to a non-zero value,
# the calculated results offset will be wrong since i will have been incremented. Only set the
# results_offset value when the first match is encountered.
if results_offset < 0:
results_offset = offset + smart['adjust'] + self.total_scanned
# Double check to make sure the smart['adjust'] value is sane.
# If it makes results_offset negative, then it is not sane.
if results_offset >= 0:
# Extract the result, if it matches one of the extract rules and is not a delayed extract.
if self.extractor.enabled and not (self.extractor.delayed and smart['delay']):
# If the signature did not specify a size, extract to the end of the file.
if smart['size'] == 0:
smart['size'] = fsize-results_offset
smart['extract'] = self.extractor.extract( results_offset,
# This appears to be a valid result, so append it to the results list.
# Jump to the offset specified by jump. Only do this once, so that if multiple results
# are returned each of which specify a jump offset, only the first will be honored.
if smart['jump'] > 0 and not smart_jump_done:
# Once a jump offset has been honored, we need to start scanning every byte since the
# jump offset may have thrown off the original alignment. In terms of speed this is fine,
# since the jump offset usually saves more time anyway. If this is not what the user
# wanted/intended, disabling pre filtering will disable jump offset processing completely.
smart_jump_done = True
i += (smart['jump'] - align)
self.total_scanned += (smart['jump'] - align)
# Did we find any valid results?
if results_offset >= 0:
scan_results[results_offset] = results
if callback is not None:
callback(results_offset, results)
# Track the number of bytes scanned in this block, and the total number of bytes scanned.
i += align
self.total_scanned += align
# Sort the results before returning them
scan_items = scan_results.items()
# Do delayed extraction, if specified.
if self.extractor.enabled and self.extractor.delayed:
scan_items = self.extractor.delayed_extract(scan_items, target_file, fsize)
return scan_items
def _should_display(self, data):
Determines if a result string should be displayed to the user or not.
@data - Display string.
Returns True if the string should be displayed.
Returns False if the string should not be displayed.
return (data and data is not None and not self.filter.invalid(data) and self.filter.filter(data) != self.filter.FILTER_EXCLUDE)
def _read_block(self, fd):
Reads in a block of data from the target file.
@fd - File object for the target file.
Returns a tuple of (file block data, block data length).
dlen = 0
data = None
# Read in READ_BLOCK_SIZE plus MAX_SIGNATURE_SIZE bytes, but return a max dlen value
# of READ_BLOCK_SIZE. This ensures that there is a MAX_SIGNATURE_SIZE buffer at the
# end of the returned data in case a signature is found at or near data[dlen].
if self.total_read < self.scan_length:
data =
if data and data is not None:
# Get the actual length of the read in data
dlen = len(data)
# If we've read in more data than the scan length, truncate the dlen value
if (self.total_read + dlen) >= self.scan_length:
dlen = self.scan_length - self.total_read
# If dlen is the expected rlen size, it should be set to READ_BLOCK_SIZE
elif dlen == rlen:
dlen = self.READ_BLOCK_SIZE
# Increment self.total_read to reflect the amount of data that has been read
# for processing (actual read size is larger of course, due to the MAX_SIGNATURE_SIZE
# buffer of data at the end of each block).
self.total_read += dlen
# Seek to the self.total_read offset so the next read can pick up where this one left off
return (data, dlen)
# Common functions.
import os
import re
def file_size(filename):
Obtains the size of a given file.
@filename - Path to the file.
Returns the size of the file.
# Using open/lseek works on both regular files and block devices
fd =, os.O_RDONLY)
return os.lseek(fd, 0, os.SEEK_END)
except Exception, e:
raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e)))
def str2int(string):
Attempts to convert string to a base 10 integer; if that fails, then base 16.
@string - String to convert to an integer.
Returns the integer value on success.
Throws an exception if the string cannot be converted into either a base 10 or base 16 integer value.
return int(string)
return int(string, 16)
def strip_quoted_strings(string):
Strips out data in between double quotes.
@string - String to strip.
Returns a sanitized string.
# This regex removes all quoted data from string.
# Note that this removes everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped.