Commit 099fd67a authored by Devon Kearns's avatar Devon Kearns

Imported Upstream version 1.0

parent a7e262b1
This diff is collapsed.
This diff is collapsed.
# Common functions.
import os
import re
def file_size(filename):
'''
Obtains the size of a given file.
@filename - Path to the file.
Returns the size of the file.
'''
# Using open/lseek works on both regular files and block devices
fd = os.open(filename, os.O_RDONLY)
try:
return os.lseek(fd, 0, os.SEEK_END)
except Exception, e:
raise Exception("file_size failed to obtain the size of '%s': %s" % (filename, str(e)))
finally:
os.close(fd)
def str2int(string):
'''
Attempts to convert string to a base 10 integer; if that fails, then base 16.
@string - String to convert to an integer.
Returns the integer value on success.
Throws an exception if the string cannot be converted into either a base 10 or base 16 integer value.
'''
try:
return int(string)
except:
return int(string, 16)
def strip_quoted_strings(string):
'''
Strips out data in between double quotes.
@string - String to strip.
Returns a sanitized string.
'''
# This regex removes all quoted data from string.
# Note that this removes everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" you won't see me "quote 2"') will also be stripped.
return re.sub(r'\"(.*)\"', "", string)
def get_quoted_strings(string):
'''
Returns a string comprised of all data in between double quotes.
@string - String to get quoted data from.
Returns a string of quoted data on success.
Returns a blank string if no quoted data is present.
'''
try:
# This regex grabs all quoted data from string.
# Note that this gets everything in between the first and last double quote.
# This is intentional, as printed (and quoted) strings from a target file may contain
# double quotes, and this function should ignore those. However, it also means that any
# data between two quoted strings (ex: '"quote 1" non-quoted data "quote 2"') will also be included.
return re.findall(r'\"(.*)\"', string)[0]
except:
return ''
import os
class Config:
'''
Binwalk configuration class, used for accessing user and system file paths.
After instatiating the class, file paths can be accessed via the self.paths dictionary.
System file paths are listed under the 'system' key, user file paths under the 'user' key.
For example, to get the path to both the user and system binwalk magic files:
from binwalk import Config
conf = Config()
user_binwalk_file = conf.paths['user'][conf.BINWALK_MAGIC_FILE]
system_binwalk_file = conf.paths['system'][conf.BINWALK_MAGIC_FILE]
There is also an instance of this class available via the Binwalk.config object:
import binwalk
bw = binwalk.Binwalk()
user_binwalk_file = bw.config.paths['user'][conf.BINWALK_MAGIC_FILE]
system_binwalk_file = bw.config.paths['system'][conf.BINWALK_MAGIC_FILE]
Valid file names under both the 'user' and 'system' keys are as follows:
o BINWALK_MAGIC_FILE - Path to the default binwalk magic file.
o BINCAST_MAGIC_FILE - Path to the bincast magic file (used when -C is specified with the command line binwalk script)
o BINARCH_MAGIC_FILE - Path to the binarch magic file (used when -A is specified with the command line binwalk script)
o EXTRACT_FILE - Path to the extract configuration file (used when -e is specified with the command line binwalk script)
'''
# Release version
VERSION = "1.0"
# Sub directories
BINWALK_USER_DIR = ".binwalk"
BINWALK_MAGIC_DIR = "magic"
BINWALK_CONFIG_DIR = "config"
# File names
EXTRACT_FILE = "extract.conf"
BINWALK_MAGIC_FILE = "binwalk"
BINCAST_MAGIC_FILE = "bincast"
BINARCH_MAGIC_FILE = "binarch"
def __init__(self):
'''
Class constructor. Enumerates file paths and populates self.paths.
'''
# Path to the user binwalk directory
self.user_dir = self._get_user_dir()
# Path to the system wide binwalk directory
self.system_dir = self._get_system_dir()
# Dictionary of all absolute user/system file paths
self.paths = {
'user' : {},
'system' : {},
}
# Build the paths to all user-specific files
self.paths['user'][self.BINWALK_MAGIC_FILE] = self._user_file(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE)
self.paths['user'][self.BINCAST_MAGIC_FILE] = self._user_file(self.BINWALK_MAGIC_DIR, self.BINCAST_MAGIC_FILE)
self.paths['user'][self.BINARCH_MAGIC_FILE] = self._user_file(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
self.paths['user'][self.EXTRACT_FILE] = self._user_file(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE)
# Build the paths to all system-wide files
self.paths['system'][self.BINWALK_MAGIC_FILE] = self._system_file(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE)
self.paths['system'][self.BINCAST_MAGIC_FILE] = self._system_file(self.BINWALK_MAGIC_DIR, self.BINCAST_MAGIC_FILE)
self.paths['system'][self.BINARCH_MAGIC_FILE] = self._system_file(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
self.paths['system'][self.EXTRACT_FILE] = self._system_file(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE)
def _get_system_dir(self):
'''
Find the directory where the binwalk module is installed on the system.
'''
try:
root = __file__
if os.path.islink(root):
root = os.path.realpath(root)
return os.path.dirname(os.path.abspath(root))
except:
return ''
def _get_user_dir(self):
'''
Get the user's home directory.
'''
try:
# This should work in both Windows and Unix environments
return os.getenv('USERPROFILE') or os.getenv('HOME')
except:
return ''
def _file_path(self, dirname, filename):
'''
Builds an absolute path and creates the directory and file if they don't already exist.
@dirname - Directory path.
@filename - File name.
Returns a full path of 'dirname/filename'.
'''
if not os.path.exists(dirname):
try:
os.makedirs(dirname)
except:
pass
fpath = os.path.join(dirname, filename)
if not os.path.exists(fpath):
try:
open(fpath, "w").close()
except:
pass
return fpath
def _user_file(self, subdir, basename):
'''
Gets the full path to the 'subdir/basename' file in the user binwalk directory.
@subdir - Subdirectory inside the user binwalk directory.
@basename - File name inside the subdirectory.
Returns the full path to the 'subdir/basename' file.
'''
return self._file_path(os.path.join(self.user_dir, self.BINWALK_USER_DIR, subdir), basename)
def _system_file(self, subdir, basename):
'''
Gets the full path to the 'subdir/basename' file in the system binwalk directory.
@subdir - Subdirectory inside the system binwalk directory.
@basename - File name inside the subdirectory.
Returns the full path to the 'subdir/basename' file.
'''
return self._file_path(os.path.join(self.system_dir, subdir), basename)
#################################################################################################################
# Default extract rules loaded when --extract is specified.
#
# <case-insensitive unique string from binwalk output text>:<desired file extension>:<command to execute>
#
# Note that %e is a place holder for the extracted file name.
#################################################################################################################
# Assumes these utilities are installed in $PATH.
gzip compressed data:gz:gzip -d -f '%e'
lzma compressed data:7z:7zr e -y '%e'
bzip2 compressed data:bz2:bzip2 -d -f '%e'
zip archive data:zip:jar xf '%e' # jar does a better job of unzipping than unzip does...
posix tar archive:tar:tar xvf '%e'
# These assume the firmware-mod-kit is installed to /opt/firmware-mod-kit.
# If not, change the file paths appropriately.
squashfs filesystem:squashfs:/opt/firmware-mod-kit/trunk/unsquashfs_all.sh '%e'
jffs2 filesystem:jffs2:/opt/firmware-mod-kit/trunk/src/jffs2/unjffs2 '%e' # requires root
cpio archive:cpio:/opt/firmware-mod-kit/trunk/uncpio.sh '%e'
# Extract, but don't run anything
ext2 filesystem:ext2
romfs filesystem:romfs
cramfs filesystem:cramfs
private key:key
This diff is collapsed.
import common
from smartsig import SmartSignature
class MagicFilter:
'''
Class to filter libmagic results based on include/exclude rules and false positive detection.
An instance of this class is available via the Binwalk.filter object.
Example code which creates include, exclude, and grep filters before running a Binwalk scan:
import binwalk
bw = binwalk.Binwalk()
# Include all signatures whose descriptions contain the string 'filesystem' in the first line of the signature, even if those signatures are normally excluded.
# Note that if exclusive=False was specified, this would merely add these signatures to the default signatures.
# Since exclusive=True (the default) has been specified, ONLY those matching signatures will be loaded; all others will be ignored.
bw.filter.include('filesystem')
# Exclude all signatures whose descriptions contain the string 'jffs2', even if those signatures are normally included.
# In this case, we are now searching for all filesystem signatures, except JFFS2.
bw.filter.exclude('jffs2')
# Add a grep filter. Unlike the include and exclude filters, it does not affect which results are returned by Binwalk.scan(), but it does affect which results
# are printed by Binwalk.display.results(). This is particularly useful for cases like the bincast scan, where multiple lines of results are returned per offset,
# but you only want certian ones displayed. In this case, only file systems whose description contain the string '2012' will be displayed.
bw.filter.grep(filters=['2012'])
bw.scan('firmware.bin')
'''
# If the result returned by libmagic is "data" or contains the text
# 'invalid' or a backslash are known to be invalid/false positives.
DATA_RESULT = "data"
INVALID_RESULTS = ["invalid", "\\"]
INVALID_RESULT = "invalid"
NON_PRINTABLE_RESULT = "\\"
FILTER_INCLUDE = 0
FILTER_EXCLUDE = 1
def __init__(self, show_invalid_results=False):
'''
Class constructor.
@show_invalid_results - Set to True to display results marked as invalid.
Returns None.
'''
self.filters = []
self.grep_filters = []
self.show_invalid_results = show_invalid_results
self.exclusive_filter = False
self.smart = SmartSignature(self)
def include(self, match, exclusive=True):
'''
Adds a new filter which explicitly includes results that contain
the specified matching text.
@match - Case insensitive text, or list of texts, to match.
@exclusive - If True, then results that do not explicitly contain
a FILTER_INCLUDE match will be excluded. If False,
signatures that contain the FILTER_INCLUDE match will
be included in the scan, but will not cause non-matching
results to be excluded.
Returns None.
'''
include_filter = {
'type' : self.FILTER_INCLUDE,
'filter' : ''
}
if type(match) != type([]):
matches = [match]
else:
matches = match
for m in matches:
if m:
if exclusive and not self.exclusive_filter:
self.exclusive_filter = True
include_filter['filter'] = m.lower()
self.filters.append(include_filter)
def exclude(self, match):
'''
Adds a new filter which explicitly excludes results that contain
the specified matching text.
@match - Case insensitive text, or list of texts, to match.
Returns None.
'''
exclude_filter = {
'type' : self.FILTER_EXCLUDE,
'filter' : ''
}
if type(match) != type([]):
matches = [match]
else:
matches = match
for m in matches:
if m:
exclude_filter['filter'] = m.lower()
self.filters.append(exclude_filter)
def filter(self, data):
'''
Checks to see if a given string should be excluded from or included in the results.
Called internally by Binwalk.scan().
@data - String to check.
Returns FILTER_INCLUDE if the string should be included.
Returns FILTER_EXCLUDE if the string should be excluded.
'''
data = data.lower()
# Loop through the filters to see if any of them are a match.
# If so, return the registered type for the matching filter (FILTER_INCLUDE | FILTER_EXCLUDE).
for f in self.filters:
if f['filter'] in data:
return f['type']
# If there was not explicit match and exclusive filtering is enabled, return FILTER_EXCLUDE.
if self.exclusive_filter:
return self.FILTER_EXCLUDE
return self.FILTER_INCLUDE
def invalid(self, data):
'''
Checks if the given string contains invalid data.
Called internally by Binwalk.scan().
@data - String to validate.
Returns True if data is invalid, False if valid.
'''
# A result of 'data' is never ever valid.
if data == self.DATA_RESULT:
return True
# If showing invalid results, just return False.
if self.show_invalid_results:
return False
# Don't include quoted strings or keyword arguments in this search, as
# strings from the target file may legitimately contain the INVALID_RESULT text.
if self.INVALID_RESULT in common.strip_quoted_strings(self.smart._strip_tags(data)):
return True
# There should be no non-printable data in any of the data
if self.NON_PRINTABLE_RESULT in data:
return True
return False
def grep(self, data=None, filters=[]):
'''
Add or check case-insensitive grep filters against the supplied data string.
@data - Data string to check grep filters against. Not required if filters is specified.
@filters - Filter, or list of filters, to add to the grep filters list. Not required if data is specified.
Returns None if data is not specified.
If data is specified, returns True if the data contains a grep filter, or if no grep filters exist.
If data is specified, returns False if the data does not contain any grep filters.
'''
# Add any specified filters to self.grep_filters
if filters:
if type(filters) != type([]):
gfilters = [filters]
else:
gfilters = filters
for gfilter in gfilters:
# Filters are case insensitive
self.grep_filters.append(gfilter.lower())
# Check the data against all grep filters until one is found
if data is not None:
# If no grep filters have been created, always return True
if not self.grep_filters:
return True
# Filters are case insensitive
data = data.lower()
# If a filter exists in data, return True
for gfilter in self.grep_filters:
if gfilter in data:
return True
# Else, return False
return False
return None
def clear(self):
'''
Clears all include, exclude and grep filters.
Retruns None.
'''
self.filters = []
self.grep_filters = []
This diff is collapsed.
This diff is collapsed.
import sys
import hashlib
from datetime import datetime
class PrettyPrint:
'''
Class for printing Binwalk results to screen/log files.
An instance of PrettyPrint is available via the Binwalk.display object.
The PrettyPrint.results() method is of particular interest, as it is suitable for use as a Binwalk.scan() callback function,
and can be used to print Binwalk.scan() results to stdout, a log file, or both.
Example usage:
import binwalk
bw = binwalk.Binwalk()
bw.display.header()
bw.scan('firmware.bin', callback=bw.display.results)
bw.display.footer()
'''
def __init__(self, log=None, quiet=False, bwalk=None, verbose=0):
'''
Class constructor.
@log - Output log file.
@quiet - If True, results will not be displayed to screen.
@bwalk - The Binwalk class instance.
@verbose - If set to True, target file information will be displayed when file_info() is called.
Returns None.
'''
self.fp = None
self.log =log
self.quiet = quiet
self.binwalk = bwalk
self.verbose = verbose
if self.log is not None:
self.fp = open(log, "w")
def __del__(self):
'''
Class deconstructor.
'''
# Close the log file.
try:
self.fp.close()
except:
pass
def _log(self, data):
'''
Log data to the log file.
'''
if self.fp is not None:
self.fp.write(data)
def _pprint(self, data):
'''
Print data to stdout and the log file.
'''
if not self.quiet:
sys.stdout.write(data)
self._log(data)
def _file_md5(self, file_name):
'''
Generate an MD5 hash of the specified file.
'''
md5 = hashlib.md5()
with open(file_name, 'rb') as f:
for chunk in iter(lambda: f.read(128*md5.block_size), b''):
md5.update(chunk)
return md5.hexdigest()
def file_info(self, file_name):
'''
Prints detailed info about the specified file, including file name, scan time and the file's MD5 sum.
Called internally by self.header if self.verbose is not 0.
@file_name - The path to the target file.
Returns None.
'''
self._pprint("\n")
self._pprint("Scan Time: %s\n" % datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
self._pprint("Signatures: %d\n" % self.binwalk.parser.signature_count)
self._pprint("Target File: %s\n" % file_name)
self._pprint("MD5 Checksum: %s\n" % self._file_md5(file_name))
def header(self, file_name=None):
'''
Prints the Binwalk header, typically used just before starting a scan.
@file_name - If specified, and if self.verbose > 0, then detailed file info will be included in the header.
Returns None.
'''
if self.verbose and file_name is not None:
self.file_info(file_name)
self._pprint("\nDECIMAL \tHEX \tDESCRIPTION\n")
self._pprint("-------------------------------------------------------------------------------------------------------\n")
def footer(self):
'''
Prints the Binwalk footer, typically used just after completing a scan.
Returns None.
'''
self._pprint("\n")
def results(self, offset, results):
'''
Prints the results of a scan. Suitable for use as a callback function for Binwalk.scan().
@offset - The offset at which the results were found.
@results - A list of libmagic result strings.
Returns None.
'''
offset_printed = False
for info in results:
# Check for any grep filters before printing
if self.binwalk is not None and self.binwalk.filter.grep(info['description']):
# Only display the offset once per list of results
if not offset_printed:
self._pprint("%-10d\t0x%-8X\t%s\n" % (offset, offset, info['description']))
offset_printed = True
else:
self._pprint("%s\t %s\t%s\n" % (' '*10, ' '*8, info['description']))
import re
from common import str2int, get_quoted_strings
class SmartSignature:
'''
Class for parsing smart signature tags in libmagic result strings.
This class is intended for internal use only, but a list of supported 'smart keywords' that may be used
in magic files is available via the SmartSignature.KEYWORDS dictionary:
from binwalk import SmartSignature
for (i, keyword) in SmartSignature().KEYWORDS.iteritems():
print keyword
'''
KEYWORD_DELIM_START = "{"
KEYWORD_DELIM_END = "}"
KEYWORDS = {
'jump' : '%sjump-to-offset:' % KEYWORD_DELIM_START,
'filename' : '%sfile-name:' % KEYWORD_DELIM_START,
'filesize' : '%sfile-size:' % KEYWORD_DELIM_START,
'raw-string' : '%sraw-string:' % KEYWORD_DELIM_START, # This one is special and must come last in a signature block
'raw-size' : '%sraw-string-length:' % KEYWORD_DELIM_START,
'adjust' : '%soffset-adjust:' % KEYWORD_DELIM_START,
'delay' : '%sextract-delay:' % KEYWORD_DELIM_START,
'raw-replace' : '%sraw-replace%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
'one-of-many' : '%sone-of-many%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
'include' : '%sfilter-include%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
'exclude' : '%sfilter-exclude%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
}
def __init__(self, filter, pre_filter_signatures=True):
'''
Class constructor.
@filter - Instance of the MagicFilter class.
@pre_filter_signatures - Set to False to disable the pre-filtering of magic signatures.
Returns None.
'''
self.filter = filter
self.last_one_of_many = None
self.pre_filter_signatures = pre_filter_signatures
def parse(self, data):
'''
Parse a given data string for smart signature keywords. If any are found, interpret them and strip them.
@data - String to parse, as returned by libmagic.
Returns a dictionary of parsed values.
'''
results = {
'description' : '', # The libmagic data string, stripped of all keywords
'name' : '', # The original name of the file, if known
'delay' : '', # Extract delay description
'extract' : '', # Name of the extracted file, filled in by Binwalk.Scan.
'jump' : 0, # The relative offset to resume the scan from
'size' : 0, # The size of the file, if known
'adjust' : 0, # The relative offset to add to the reported offset
}
# If pre-filtering is disabled, or the result data is not valid (i.e., potentially malicious),
# don't parse anything, just return the raw data as the description.
if not self.pre_filter_signatures or not self._is_valid(data):
results['description'] = data
else:
# Parse the offset-adjust value. This is used to adjust the reported offset at which
# a signature was located due to the fact that MagicParser.match expects all signatures
# to be located at offset 0, which some wil not be.
results['adjust'] = self._get_math_arg(data, 'adjust')
# Parse the file-size value. This is used to determine how many bytes should be extracted
# when extraction is enabled. If not specified, everything to the end of the file will be
# extracted (see Binwalk.scan).
try:
results['size'] = str2int(self._get_keyword_arg(data, 'filesize'))
except:
pass
results['delay'] = self._get_keyword_arg(data, 'delay')
# Parse the string for the jump-to-offset keyword.
# This keyword is honored, even if this string result is one of many.
results['jump'] = self._get_math_arg(data, 'jump')
# If this is one of many, don't do anything and leave description as a blank string.
# Else, strip all keyword tags from the string and process additional keywords as necessary.
if not self._one_of_many(data):
results['name'] = self._get_keyword_arg(data, 'filename').strip('"')
results['description'] = self._strip_tags(data)
return results
def _is_valid(self, data):
'''
Validates that result data does not contain smart keywords in file-supplied strings.
@data - Data string to validate.
Returns True if data is OK.
Returns False if data is not OK.
'''
# All strings printed from the target file should be placed in strings, else there is
# no way to distinguish between intended keywords and unintended keywords. Get all the
# quoted strings.
quoted_data = get_quoted_strings(data)
# Check to see if there was any quoted data, and if so, if it contained the keyword starting delimiter
if quoted_data and self.KEYWORD_DELIM_START in quoted_data:
# If so, check to see if the quoted data contains any of our keywords.
# If any keywords are found inside of quoted data, consider the keywords invalid.
for (name, keyword) in self.KEYWORDS.iteritems():
if keyword in quoted_data:
return False
return True
def _one_of_many(self, data):
'''
Determines if a given data string is one result of many.
@data - String result data.