Commit 20c7e28c authored by Devon Kearns's avatar Devon Kearns

Imported Upstream version 1.2.2-1

parent e2e4d59b
......@@ -4,12 +4,12 @@ import sys
import os.path
import binwalk
from threading import Thread
from getopt import GetoptError, getopt as GetOpt
from getopt import GetoptError, gnu_getopt as GetOpt
def display_status():
global bwalk
while True:
while bwalk is not None:
# Display the current scan progress when the enter key is pressed.
......@@ -29,6 +29,10 @@ Extracting files from firmware:
\t$ %s -Me firmware.bin
Hueristic compression/encryption analysis:
\t$ %s -H firmware.bin
Scanning firmware for executable code:
\t$ %s -A firmware.bin
......@@ -45,8 +49,12 @@ Display identified file signatures on entropy graph:
\t$ %s -EB firmware.bin
Diffing multiple files:
\t$ %s -W firmware1.bin firmware2.bin firmware3.bin
See for more.
""" % (name, name, name, name, name, name)
""" % (name, name, name, name, name, name, name, name)
def usage(fd):
......@@ -68,6 +76,7 @@ def usage(fd):
fd.write("\t-x, --exclude=<filter> Exclude matches that have <filter> in their description\n")
fd.write("\t-y, --include=<filter> Only search for matches that have <filter> in their description\n")
fd.write("\t-I, --show-invalid Show results marked as invalid\n")
fd.write("\t-T, --ignore-time-skew Do not show results that have timestamps more than 1 year in the future\n")
fd.write("\t-k, --keep-going Show all matching results at a given offset, not just the first one\n")
fd.write("\t-b, --dumb Disable smart signature keywords\n")
......@@ -79,14 +88,24 @@ def usage(fd):
fd.write("Entropy Analysis:\n")
fd.write("\t-E, --entropy Plot file entropy (may be combined with -B, -R, -A, or -S)\n")
fd.write("\t-K, --block=<int> Set the block size for entropy analysis\n")
fd.write("\t-a, --shannon Use the Shannon entropy algorithm\n")
fd.write("\t-H, --heuristic Identify unknown compression/encryption based on entropy heuristics (implies -E)\n")
fd.write("\t-K, --block=<int> Set the block size for entropy analysis (default: %d)\n" % binwalk.entropy.FileEntropy.DEFAULT_BLOCK_SIZE)
fd.write("\t-a, --gzip Use gzip compression ratios to measure entropy\n")
fd.write("\t-N, --no-plot Do not generate an entropy plot graph\n")
fd.write("\t-F, --marker=<offset:name> Add a marker to the entropy plot graph\n")
fd.write("\t-Q, --no-legend Omit the legend from the entropy plot graph\n")
fd.write("\t-J, --save-plot Save plot as an SVG (implied if multiple files are specified)\n")
fd.write("Binary Diffing:\n")
fd.write("\t-W, --diff Hexdump / diff the specified files\n")
fd.write("\t-K, --block=<int> Number of bytes to display per line (default: %d)\n" % binwalk.hexdiff.HexDiff.DEFAULT_BLOCK_SIZE)
fd.write("\t-G, --green Only show hex dump lines that contain bytes which were the same in all files\n")
fd.write("\t-i, --red Only show hex dump lines that contain bytes which were different in all files\n")
fd.write("\t-U, --blue Only show hex dump lines that contain bytes which were different in some files\n")
fd.write("\t-w, --terse Diff all files, but only display a hex dump of the first file\n")
fd.write("Extraction Options:\n")
fd.write("\t-D, --dd=<type:ext[:cmd]> Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>\n")
fd.write("\t-e, --extract=[file] Automatically extract known file types; load rules from file, if specified\n")
......@@ -134,9 +153,10 @@ def main():
strlen = 0
verbose = 0
matryoshka = 1
entropy_block = 0
block_size = 0
failed_open_count = 0
quiet = False
do_comp = False
do_files = False
log_file = None
do_csv = False
......@@ -150,9 +170,11 @@ def main():
format_to_terminal = False
custom_signature = None
delay_extraction = False
ignore_time_skew = True
extract_rules_file = None
ignore_failed_open = False
extract_from_config = False
show_single_hex_dump = False
cleanup_after_extract = False
explicit_signature_scan = False
ignore_signature_keywords = False
......@@ -172,10 +194,13 @@ def main():
config = binwalk.Config()
short_options = "AaBbCcdEehIJkLMNnOPpQqrStuv?D:F:f:g:K:o:l:m:R:s:X:x:Y:y:"
short_options = "AaBbCcdEeGHhIiJkLMNnOPpQqrSTtUuvWw?D:F:f:g:K:o:l:m:R:s:X:x:Y:y:"
long_options = [
......@@ -186,14 +211,19 @@ def main():
......@@ -248,10 +278,16 @@ def main():
show_legend = False
elif opt in ("-J", "--save-plot"):
save_plot = True
elif opt in ("-N", "--no-plot"):
show_plot = False
elif opt in ("-E", "--entropy"):
elif opt in ("-a", "--shannon"):
entropy_algorithm = 'shannon'
elif opt in ("-W", "--diff"):
elif opt in ("-w", "--terse"):
show_single_hex_dump = True
elif opt in ("-a", "--gzip"):
entropy_algorithm = 'gzip'
elif opt in("-t", "--term", "--tim"):
format_to_terminal = True
elif opt in("-p", "--disable-plugins"):
......@@ -260,8 +296,6 @@ def main():
ignore_signature_keywords = True
elif opt in ("-v", "--verbose"):
verbose += 1
elif opt in ("-N", "--no-plot"):
show_plot = False
elif opt in ("-S", "--strings"):
elif opt in ("-O", "--skip-unopened"):
......@@ -278,6 +312,12 @@ def main():
elif opt in ("-g", "--grep"):
elif opt in ("-G", "--green"):
elif opt in ("-i", "--red"):
elif opt in ("-U", "--blue"):
elif opt in ("-r", "--rm"):
cleanup_after_extract = True
elif opt in ("-m", "--magic"):
......@@ -292,12 +332,18 @@ def main():
# Original Zvyozdochkin matrhoska set had 8 dolls. This is a good number.
matryoshka = 8
elif opt in ("-K", "--block"):
entropy_block = binwalk.common.str2int(arg)
block_size = binwalk.common.str2int(arg)
elif opt in ("-X", "--disable-plugin"):
elif opt in ("-Y", "--enable-plugin"):
elif opt in ("-T", "--ignore-time-skew"):
ignore_time_skew = False
elif opt in ("-H", "--heuristic", "--math"):
do_comp = True
if binwalk.Binwalk.ENTROPY not in requested_scans:
elif opt in ("-F", "--marker"):
if ':' in arg:
(location, description) = arg.split(':', 1)
......@@ -402,12 +448,12 @@ def main():
# Instantiate the Binwalk class
bwalk = binwalk.Binwalk(flags=magic_flags, verbose=verbose, log=log_file, quiet=quiet, ignore_smart_keywords=ignore_signature_keywords, load_plugins=enable_plugins)
bwalk = binwalk.Binwalk(magic_files=magic_files, flags=magic_flags, verbose=verbose, log=log_file, quiet=quiet, ignore_smart_keywords=ignore_signature_keywords, load_plugins=enable_plugins, ignore_time_skews=ignore_time_skew)
# If a custom signature was specified, create a temporary magic file containing the custom signature
# and ensure that it is the only magic file that will be loaded when Binwalk.scan() is called.
if custom_signature is not None:
magic_files = bwalk.parser.file_from_string(custom_signature)
bwalk.magic_files = [bwalk.parser.file_from_string(custom_signature)]
# Set any specified filters
......@@ -432,7 +478,7 @@ def main():
# Load the magic file(s)
# If --term was specified, enable output formatting to terminal
if format_to_terminal:
......@@ -488,14 +534,18 @@ def main():
bwalk.concatenate_results(results, r)
elif scan_type == binwalk.Binwalk.COMPRESSION:
r = bwalk.analyze_compression(target_files, offset=offset, length=length)
bwalk.concatenate_results(results, r)
elif scan_type == binwalk.Binwalk.ENTROPY:
if not results:
......@@ -511,19 +561,26 @@ def main():
elif scan_type == binwalk.Binwalk.HEXDIFF:
bwalk.hexdiff(target_files, offset=offset, length=length, block=block_size, first=show_single_hex_dump)
except KeyboardInterrupt:
# except Exception, e:
# print "Unexpected error:", str(e)
except IOError:
except Exception, e:
print "Unexpected error:", str(e)
This diff is collapsed.
#!/usr/bin/env python
# Routines to perform Monte Carlo Pi approximation and Chi Squared tests.
# Used for fingerprinting unknown areas of high entropy (e.g., is this block of high entropy data compressed or encrypted?).
# Inspired by people who actually know what they're doing:
import math
class MonteCarloPi(object):
Performs a Monte Carlo Pi approximation.
Currently unused.
def __init__(self):
Class constructor.
Returns None.
def reset(self):
Reset state to the beginning.
self.pi = 0
self.error = 0
self.m = 0
self.n = 0
def update(self, data):
Update the pi approximation with new data.
@data - A string of bytes to update (length must be >= 6).
Returns None.
c = 0
dlen = len(data)
while (c+6) < dlen:
# Treat 3 bytes as an x coordinate, the next 3 bytes as a y coordinate.
# Our box is 1x1, so divide by 2^24 to put the x y values inside the box.
x = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
c += 3
y = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
c += 3
# Does the x,y point lie inside the circle inscribed within our box, with diameter == 1?
if ((x**2) + (y**2)) <= 1:
self.m += 1
self.n += 1
def montecarlo(self):
Approximates the value of Pi based on the provided data.
Returns a tuple of (approximated value of pi, percent deviation).
if self.n:
self.pi = (float(self.m) / float(self.n) * 4.0)
if self.pi:
self.error = math.fabs(1.0 - (math.pi / self.pi)) * 100.0
return (self.pi, self.error)
return (0.0, 0.0)
class ChiSquare(object):
Performs a Chi Squared test against the provided data.
IDEAL = 256.0
def __init__(self):
Class constructor.
Returns None.
self.bytes = {}
self.freedom = self.IDEAL - 1
# Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255)
for i in range(0, int(self.IDEAL)):
self.bytes[chr(i)] = 0
def reset(self):
self.xc2 = 0.0
self.byte_count = 0
for key in self.bytes.keys():
self.bytes[key] = 0
def update(self, data):
Updates the current byte counts with new data.
@data - String of bytes to update.
Returns None.
# Count the number of occurances of each byte value
for i in data:
self.bytes[i] += 1
self.byte_count += len(data)
def chisq(self):
Calculate the Chi Square critical value.
Returns the critical value.
expected = self.byte_count / self.IDEAL
if expected:
for byte in self.bytes.values():
self.xc2 += ((byte - expected) ** 2 ) / expected
return self.xc2
class CompressionEntropyAnalyzer(object):
Class wrapper around ChiSquare.
Performs analysis and attempts to interpret the results.
DESCRIPTION = "Statistical Compression Analysis"
def __init__(self, fname, start, length, binwalk=None, fp=None):
Class constructor.
@fname - The file to scan.
@start - The start offset to begin analysis at.
@length - The number of bytes to analyze.
@callback - Callback function compatible with Binwalk.display.
Returns None.
if fname:
self.fp = open(fname, 'rb')
self.fp = fp
self.start = start
self.length = length
self.binwalk = binwalk
def analyze(self):
Perform analysis and interpretation.
Returns a descriptive string containing the results and attempted interpretation.
i = 0
num_error = 0
analyzer_results = []
if self.binwalk:
self.binwalk.display.header(, description=self.DESCRIPTION)
chi = ChiSquare()
while i < self.length:
rsize = self.length - i
if rsize > self.BLOCK_SIZE:
rsize = self.BLOCK_SIZE
d =
if len(d) != rsize:
if chi.chisq() >= self.CHI_CUTOFF:
num_error += 1
i += rsize
if num_error > 0:
verdict = 'Moderate entropy data, best guess: compressed'
verdict = 'High entropy data, best guess: encrypted'
result = [{'offset' : self.start, 'description' : '%s, size: %d, %d low entropy blocks' % (verdict, self.length, num_error)}]
if self.binwalk:
self.binwalk.display.results(self.start, result)
return result
......@@ -33,7 +33,7 @@ class Config:
o PLUGINS - Path to the plugins directory.
# Release version
VERSION = "1.2.1"
VERSION = "1.2.2-1"
# Sub directories
BINWALK_USER_DIR = ".binwalk"
......@@ -47,6 +47,7 @@ class Config:
def __init__(self):
......@@ -74,6 +75,7 @@ class Config:
self.paths['system'][self.BINWALK_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE)
self.paths['system'][self.BINCAST_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINCAST_MAGIC_FILE)
self.paths['system'][self.BINARCH_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
self.paths['system'][self.ZLIB_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.ZLIB_MAGIC_FILE)
self.paths['system'][self.EXTRACT_FILE] = self._system_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE)
self.paths['system'][self.PLUGINS] = self._system_path(self.BINWALK_PLUGINS_DIR)
......@@ -14,16 +14,31 @@
^posix tar archive:tar:tar xvf '%e'
^rar archive data:rar:unrar e '%e'
^arj archive data.*comment header:arj:arj e '%e'
^iso 9660:iso:7z x '%e' -oiso-root
# These assume the firmware-mod-kit is installed to /opt/firmware-mod-kit.
# If not, change the file paths appropriately.
^squashfs filesystem:squashfs:/opt/firmware-mod-kit/ '%e'
^jffs2 filesystem:jffs2:/opt/firmware-mod-kit/src/jffs2/unjffs2 '%e'
^ascii cpio archive:cpio:/opt/firmware-mod-kit/ '%e'
^cramfs filesystem:cramfs:/opt/firmware-mod-kit/ '%e'
^bff volume entry:bff:/opt/firmware-mod-kit/src/bff/ '%e'
^wdk file system:wdk:/opt/firmware-mod-kit/src/firmware-tools/ '%e'
^zlib header:zlib:/opt/firmware-mod-kit/src/firmware-tools/ '%e'
^ext2 filesystem:ext2:/opt/firmware-mod-kit/src/mountcp/mountcp '%e' ext2-root
^romfs filesystem:romfs:/opt/firmware-mod-kit/src/mountcp/mountcp '%e' romfs-root
# These paths are for the depreciated firmware-mod-kit file paths, which included the 'trunk' directory.
# These will only be run if the above file paths don't exist.
^squashfs filesystem:squashfs:/opt/firmware-mod-kit/trunk/ '%e'
^jffs2 filesystem:jffs2:/opt/firmware-mod-kit/trunk/src/jffs2/unjffs2 '%e' # requires root
^ascii cpio archive:cpio:/opt/firmware-mod-kit/trunk/ '%e'
^cramfs filesystem:cramfs:/opt/firmware-mod-kit/trunk/ '%e'
^bff volume entry:bff:/opt/firmware-mod-kit/trunk/src/bff/ '%e'
# If FMK isn't installed, try the system's unsquashfs for SquashFS files
^squashfs filesystem:squashfs:unsquashfs '%e'
# Extract, but don't run anything
^ext2 filesystem:ext2
^romfs filesystem:romfs
^private key:key
private key:key
This diff is collapsed.
......@@ -56,7 +56,7 @@ class Extractor:
self.enabled = False
self.delayed = False
self.verbose = verbose
self.extract_rules = {}
self.extract_rules = []
self.remove_after_execute = False
self.extract_path = os.getcwd()
......@@ -110,12 +110,8 @@ class Extractor:
r['cmd'] = cmd
# Verify that the match string and file extension were retrieved.
# Only add the rule if it is a new one (first come, first served).
if match and r['extension'] and not self.extract_rules.has_key(match):
self.extract_rules[match] = {}
self.extract_rules[match]['cmd'] = r['cmd']
self.extract_rules[match]['extension'] = r['extension']
self.extract_rules[match]['regex'] = r['regex']
if match and r['extension']:
# Once any rule is added, set self.enabled to True
self.enabled = True
......@@ -127,13 +123,16 @@ class Extractor:
Returns the number of rules removed.
i = 0
rm = []
for key in self.extract_rules.keys():
if self.extract_rules[key]['regex'].match(text):
del self.extract_rules[key]
i += 1
return i
for i in range(0, len(self.extract_rules)):
if self.extract_rules[i]['regex'].match(text):
for i in rm:
return len(rm)
def clear_rules(self):
......@@ -141,12 +140,12 @@ class Extractor:
Returns None.
self.extract_rules = {}
self.extract_rules = []
self.enabled = False
def get_rules(self):
Returns a dictionary of all extraction rules.
Returns a list of all extraction rules.
return self.extract_rules
......@@ -247,10 +246,18 @@ class Extractor:
if os.path.isfile(file_path):
rules = self._match(description)
# Loop through each extraction rule until one succeeds
for i in range(0, len(rules)):
rule = rules[i]
rule = self._match(description)
if rule is not None:
# Copy out the data to disk, if we haven't already
fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name)
# If there was a command specified for this rule, try to execute it.
# If execution fails, the next rule will be attempted.
if rule['cmd']:
# Many extraction utilities will extract the file to a new file, just without
......@@ -263,7 +270,7 @@ class Extractor:
cleanup_extracted_fname = False
# Execute the specified command against the extracted file
self._execute(rule['cmd'], fname)
extract_ok = self._execute(rule['cmd'], fname)
# Only clean up files if remove_after_execute was specified
if self.remove_after_execute:
......@@ -282,11 +289,28 @@ class Extractor:
fname = os.path.join(self.extract_path, fname)
# If the command executed OK, don't try any more rules
if extract_ok:
# Else, remove the extracted file if this isn't the last rule in the list.
# If it is the last rule, leave the file on disk for the user to examine.
elif i != len(rules):
# If there was no command to execute, just use the first rule
# If a file was extracted, return the full path to that file
if fname:
fname = os.path.join(self.extract_path, fname)
return fname
def delayed_extract(self, results, file_name, size):
......@@ -355,12 +379,13 @@ class Extractor:
Returns the associated rule dictionary if a match is found.
Returns None if no match is found.
rules = []
description = description.lower()
for (m, rule) in self.extract_rules.iteritems():
for rule in self.extract_rules:
if rule['regex'].search(description):
return rule
return None
return rules
def _parse_rule(self, rule):
......@@ -432,9 +457,10 @@ class Extractor:
@cmd - Command to execute.
@fname - File to run command against.
Returns None.
Returns True on success, False on failure.
tmp = None
retval = True
if callable(cmd):
......@@ -450,10 +476,16 @@ class Extractor:
# Execute., stdout=tmp, stderr=tmp)
except Exception, e:
sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (cmd, str(e)))
# Silently ignore no such file or directory errors. Why? Because these will inevitably be raised when
# making the switch to the new firmware mod kit directory structure. We handle this elsewhere, but it's
# annoying to see this spammed out to the console every time.
if e.errno != 2:
sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e)))
retval = False
if tmp is not None:
return retval
......@@ -155,7 +155,7 @@ class MagicFilter:
if self.INVALID_RESULT in common.strip_quoted_strings(
return True
# There should be no non-printable data in any of the data
# There should be no non-printable characters in any of the data
if self.NON_PRINTABLE_RESULT in data:
return True
#!/usr/bin/env python
import os
import sys
import string
import curses
import platform
class HexDiff(object):
'red' : '31',
'green' : '32',
'blue' : '34',
def __init__(self, binwalk=None):
self.block_hex = ""