Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test():
"""Test for LogToDataFrame Class"""
import os
pd.set_option('display.width', 1000)
from zat.utils import file_utils
# Grab a test file
data_path = file_utils.relative_dir(__file__, '../data')
log_path = os.path.join(data_path, 'conn.log')
# Convert it to a Pandas DataFrame
log_to_df = LogToDataFrame()
my_df = log_to_df.create_dataframe(log_path)
# Print out the head
print(my_df.head())
# Print out the datatypes
print(my_df.dtypes)
# Test a bunch
tests = ['app_stats.log', 'dns.log', 'http.log', 'notice.log', 'tor_ssl.log',
'conn.log', 'dhcp_002.log', 'files.log', 'smtp.log', 'weird.log',
'ftp.log', 'ssl.log', 'x509.log']
for log_path in [os.path.join(data_path, log) for log in tests]:
print('Testing: {:s}...'.format(log_path))
my_df = log_to_df.create_dataframe(log_path)
print(my_df.head())
args.bro_log = os.path.expanduser(args.bro_log)
# Sanity check either http or dns log
if 'http' in args.bro_log:
log_type = 'http'
features = ['id.resp_p', 'method', 'resp_mime_types', 'request_body_len']
elif 'dns' in args.bro_log:
log_type = 'dns'
features = ['Z', 'proto', 'qtype_name', 'query_length', 'answer_length', 'entropy']
else:
print('This example only works with Zeek with http.log or dns.log files..')
sys.exit(1)
# Create a Pandas dataframe from a Zeek log
try:
log_to_df = log_to_dataframe.LogToDataFrame()
bro_df = log_to_df.create_dataframe(args.bro_log)
print(bro_df.head())
except IOError:
print('Could not open or parse the specified logfile: %s' % args.bro_log)
sys.exit(1)
print('Read in {:d} Rows...'.format(len(bro_df)))
# Using Pandas we can easily and efficiently compute additional data metrics
# Here we use the vectorized operations of Pandas/Numpy to compute query length
# We'll also compute entropy of the query
if log_type == 'dns':
bro_df['query_length'] = bro_df['query'].str.len()
bro_df['answer_length'] = bro_df['answers'].str.len()
bro_df['entropy'] = bro_df['query'].map(lambda x: entropy(x))
# Use the zat DataframeToMatrix class
# Check for unknown args
if commands:
print('Unrecognized args: %s' % commands)
sys.exit(1)
# Sanity check that this is a dns log
if 'dns' not in args.bro_log:
print('This example only works with Zeek dns.log files..')
sys.exit(1)
# File may have a tilde in it
if args.bro_log:
args.bro_log = os.path.expanduser(args.bro_log)
# Create a Pandas dataframe from the Zeek log
log_to_df = log_to_dataframe.LogToDataFrame()
bro_df = log_to_df.create_dataframe(args.bro_log)
# Add query length
bro_df['query_length'] = bro_df['query'].str.len()
# Normalize this field
#ql = bro_df['query_length']
#bro_df['query_length_norm'] = (ql - ql.min()) / (ql.max()-ql.min())
# These are the features we want (note some of these are categorical!)
features = ['AA', 'RA', 'RD', 'TC', 'Z', 'rejected', 'proto', 'qtype_name', 'rcode_name', 'query_length']
feature_df = bro_df[features]
# Use the super awesome DataframeToMatrix class (handles categorical data!)
to_matrix = dataframe_to_matrix.DataFrameToMatrix()
bro_matrix = to_matrix.fit_transform(feature_df)
# Collect args from the command line
parser = argparse.ArgumentParser()
parser.add_argument('bro_log', type=str, help='Specify a bro log to run BroLogReader test on')
args, commands = parser.parse_known_args()
# Check for unknown args
if commands:
print('Unrecognized args: %s' % commands)
sys.exit(1)
# File may have a tilde in it
if args.bro_log:
args.bro_log = os.path.expanduser(args.bro_log)
# Create a Pandas dataframe from a Zeek log
log_to_df = LogToDataFrame()
bro_df = log_to_df.create_dataframe(args.bro_log)
# Print out the head of the dataframe
print(bro_df.head())
# Print out the types of the columns
print(bro_df.dtypes)
# Print out size and memory usage
print('DF Shape: {:s}'.format(str(bro_df.shape)))
print('DF Memory:')
memory_usage = bro_df.memory_usage(deep=True)
total = memory_usage.sum()
for item in memory_usage.items():
print('\t {:s}: \t{:.2f} MB'.format(item[0], item[1]/1e6))
print('DF Total: {:.2f} GB'.format(total/(1e9)))