merge master => import_export_v1

This commit is contained in:
Terrtia 2020-02-19 09:35:00 +01:00
commit 62e3d95f22
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
13 changed files with 154 additions and 297 deletions

View file

@ -5,7 +5,7 @@
The BankAccount Module
======================
It apply IBAN regexes on paste content and warn if above a threshold.
It apply IBAN regexes on item content and warn if above a threshold.
"""
@ -17,7 +17,7 @@ import re
import string
from itertools import chain
from packages import Paste
from packages import Item
from pubsublogger import publisher
from Helper import Process
@ -49,7 +49,7 @@ def is_valid_iban(iban):
return True
return False
def check_all_iban(l_iban, paste, filename):
def check_all_iban(l_iban, obj_id):
nb_valid_iban = 0
for iban in l_iban:
iban = iban[0]+iban[1]+iban[2]
@ -65,14 +65,14 @@ def check_all_iban(l_iban, paste, filename):
server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1)
if(nb_valid_iban > 0):
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
to_print = 'Iban;{};{};{};'.format(Item.get_source(obj_id), Item.get_item_date(obj_id), Item.get_basename(obj_id))
publisher.warning('{}Checked found {} IBAN;{}'.format(
to_print, nb_valid_iban, paste.p_rel_path))
msg = 'infoleak:automatic-detection="iban";{}'.format(filename)
to_print, nb_valid_iban, obj_id))
msg = 'infoleak:automatic-detection="iban";{}'.format(obj_id)
p.populate_set_out(msg, 'Tags')
#Send to duplicate
p.populate_set_out(filename, 'Duplicate')
p.populate_set_out(obj_id, 'Duplicate')
if __name__ == "__main__":
publisher.port = 6380
@ -103,21 +103,21 @@ if __name__ == "__main__":
if message is not None:
filename = message
paste = Paste.Paste(filename)
content = paste.get_p_content()
obj_id = Item.get_item_id(message)
content = Item.get_item_content(obj_id)
signal.alarm(max_execution_time)
try:
l_iban = iban_regex.findall(content)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_rel_path))
print ("{0} processing timeout".format(obj_id))
continue
else:
signal.alarm(0)
if(len(l_iban) > 0):
check_all_iban(l_iban, paste, filename)
check_all_iban(l_iban, obj_id)
else:
publisher.debug("Script BankAccount is Idling 10s")

View file

@ -17,7 +17,6 @@ import datetime
from pubsublogger import publisher
from Helper import Process
from packages import Paste
from packages import Item
from lib import Decoded
@ -56,14 +55,14 @@ def decode_string(content, item_id, item_date, encoded_list, decoder_name, encod
Decoded.save_item_relationship(sha1_string, item_id)
Decoded.create_decoder_matadata(sha1_string, item_id, decoder_name)
#remove encoded from paste content
#remove encoded from item content
content = content.replace(encoded, '', 1)
if(find):
set_out_paste(decoder_name, item_id)
set_out_item(decoder_name, message)
return content
def set_out_paste(decoder_name, message):
def set_out_item(decoder_name, message):
publisher.warning(decoder_name+' decoded')
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
@ -130,12 +129,11 @@ if __name__ == '__main__':
time.sleep(1)
continue
filename = message
paste = Paste.Paste(filename)
obj_id = Item.get_item_id(message)
# Do something with the message from the queue
content = paste.get_p_content()
date = str(paste._get_p_date())
content = Item.get_item_content(obj_id)
date = Item.get_item_date(obj_id)
for decoder in decoder_order: # add threshold and size limit
@ -146,7 +144,7 @@ if __name__ == '__main__':
except TimeoutException:
encoded_list = []
p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(paste.p_rel_path))
print ("{0} processing timeout".format(obj_id))
continue
else:
signal.alarm(0)

View file

@ -21,14 +21,35 @@ Requirements
"""
import base64
import hashlib
import io
import gzip
import os
import sys
import time
import uuid
import datetime
import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from pubsublogger import publisher
from Helper import Process
import magic
config_loader = ConfigLoader.ConfigLoader()
r_stats = config_loader.get_redis_conn("ARDB_Statistics")
config_loader = None
def gunzip_bytes_obj(bytes_obj):
in_ = io.BytesIO()
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
return gunzipped_bytes_obj
def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence)
@ -45,8 +66,10 @@ if __name__ == '__main__':
p = Process(config_section)
# get and sanityze PASTE DIRECTORY
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
PASTES_FOLDERS = PASTES_FOLDER + '/'
PASTES_FOLDERS = os.path.join(os.path.realpath(PASTES_FOLDERS), '')
# LOGGING #
publisher.info("Feed Script started to receive & publish.")
@ -65,9 +88,9 @@ if __name__ == '__main__':
publisher.debug("Empty Paste: {0} not processed".format(message))
continue
else:
print("Empty Queues: Waiting...")
#print("Empty Queues: Waiting...")
if int(time.time() - time_1) > 30:
to_print = 'Global; ; ; ;glob Processed {0} paste(s)'.format(processed_paste)
to_print = 'Global; ; ; ;glob Processed {0} paste(s) in {1} s'.format(processed_paste, time.time() - time_1)
print(to_print)
#publisher.info(to_print)
time_1 = time.time()
@ -75,6 +98,10 @@ if __name__ == '__main__':
time.sleep(1)
continue
# remove PASTES_FOLDER from item path (crawled item + submited)
if PASTES_FOLDERS in paste:
paste = paste.replace(PASTES_FOLDERS, '', 1)
file_name_paste = paste.split('/')[-1]
if len(file_name_paste)>255:
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
@ -82,33 +109,73 @@ if __name__ == '__main__':
# Creating the full filepath
filename = os.path.join(PASTES_FOLDER, paste)
filename = os.path.realpath(filename)
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
# incorrect filename
if not os.path.commonprefix([filename, PASTES_FOLDER]) == PASTES_FOLDER:
print('Path traversal detected {}'.format(filename))
publisher.warning('Global; Path traversal detected')
else:
decoded = base64.standard_b64decode(gzip64encoded)
# decode compressed base64
decoded = base64.standard_b64decode(gzip64encoded)
with open(filename, 'wb') as f:
f.write(decoded)
'''try:
decoded2 = gunzip_bytes_obj(decoded)
except:
decoded2 =''
# check if file exist
if os.path.isfile(filename):
print('File already exist {}'.format(filename))
publisher.warning('Global; File already exist')
type = magic.from_buffer(decoded2, mime=True)
try:
with gzip.open(filename, 'rb') as f:
curr_file_content = f.read()
except EOFError:
publisher.warning('Global; Incomplete file: {}'.format(filename))
# save daily stats
r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
# discard item
continue
except OSError:
publisher.warning('Global; Not a gzipped file: {}'.format(filename))
# save daily stats
r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
# discard item
continue
if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby':
curr_file_md5 = hashlib.md5(curr_file_content).hexdigest()
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
print(filename)
print(type)
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
'''
new_file_content = gunzip_bytes_obj(decoded)
new_file_md5 = hashlib.md5(new_file_content).hexdigest()
# remove PASTES_FOLDER from item path (crawled item + submited)
if PASTES_FOLDERS in paste:
paste = paste.replace(PASTES_FOLDERS, '', 1)
if new_file_md5 != curr_file_md5:
p.populate_set_out(paste)
processed_paste+=1
if filename.endswith('.gz'):
filename = '{}_{}.gz'.format(filename[:-3], new_file_md5)
else:
filename = '{}_{}'.format(filename, new_file_md5)
# continue if new file already exist
if os.path.isfile(filename):
print('ignore duplicated file')
continue
print('new file: {}'.format(filename))
# ignore duplicate
else:
print('ignore duplicated file')
continue
# create subdir
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filename, 'wb') as f:
f.write(decoded)
paste = filename
# remove PASTES_FOLDER from
if PASTES_FOLDERS in paste:
paste = paste.replace(PASTES_FOLDERS, '', 1)
p.populate_set_out(paste)
processed_paste+=1

View file

@ -6,7 +6,7 @@ The IP Module
This module is consuming the global channel.
It first performs a regex to find IP addresses and then matches those IPs to
It first performs a regex to find IP addresses and then matches those IPs to
some configured ip ranges.
The list of IP ranges are expected to be in CIDR format (e.g. 192.168.0.0/16)
@ -16,6 +16,7 @@ and should be defined in the config.cfg file, under the [IP] section
import time
import re
import sys
from pubsublogger import publisher
from packages import Paste
from Helper import Process
@ -31,8 +32,9 @@ def search_ip(message):
results = reg_ip.findall(content)
matching_ips = []
for res in results:
address = IPv4Address(res)
for ip in results:
ip = '.'.join([str(int(x)) for x in ip.split('.')])
address = IPv4Address(ip)
for network in ip_networks:
if address in network:
matching_ips.append(address)
@ -60,8 +62,12 @@ if __name__ == '__main__':
p = Process(config_section)
ip_networks = []
for network in p.config.get("IP", "networks").split(","):
ip_networks.append(IPv4Network(network))
try:
for network in p.config.get("IP", "networks").split(","):
ip_networks.append(IPv4Network(network))
except:
print('Please provide a list of valid IP addresses')
sys.exit(0)
# Sent to the logging a description of the module
@ -78,4 +84,3 @@ if __name__ == '__main__':
# Do something with the message from the queue
search_ip(message)

View file

@ -26,6 +26,7 @@ publisher.channel = "Script"
def sendEmailNotification(recipient, alert_name, content):
sender = config_loader.get_config_str("Notifications", "sender")
sender_user = config_loader.get_config_str("Notifications", "sender_user")
sender_host = config_loader.get_config_str("Notifications", "sender_host")
sender_port = config_loader.get_config_int("Notifications", "sender_port")
sender_pw = config_loader.get_config_str("Notifications", "sender_pw")
@ -49,7 +50,10 @@ def sendEmailNotification(recipient, alert_name, content):
smtp_server = smtplib.SMTP_SSL(sender_host, sender_port)
smtp_server.ehlo()
smtp_server.login(sender, sender_pw)
if sender_user is not None:
smtp_server.login(sender_user, sender_pw)
else:
smtp_server.login(sender, sender_pw)
else:
smtp_server = smtplib.SMTP(sender_host, sender_port)

View file

@ -30,7 +30,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize
from nltk import tokenize, download
# Config Variables
accepted_Mime_type = ['text/plain']
@ -62,7 +62,12 @@ def Analyse(message, server):
combined_datetime = datetime.datetime.combine(the_date, the_time)
timestamp = calendar.timegm(combined_datetime.timetuple())
sentences = tokenize.sent_tokenize(p_content)
try:
sentences = tokenize.sent_tokenize(p_content)
except:
# use the NLTK Downloader to obtain the resource
download('punkt')
sentences = tokenize.sent_tokenize(p_content)
if len(sentences) > 0:
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}

View file

@ -29,7 +29,10 @@ num_day_to_look = 5 # the detection of the progression start num_day_to_lo
def analyse(server, field_name, date, url_parsed):
field = url_parsed[field_name]
if field is not None:
field = field.decode('utf8')
try: # faup version
field = field.decode()
except:
pass
server.hincrby(field, date, 1)
if field_name == "domain": #save domain in a set for the monthly plot
domain_set_name = "domain_set_" + date[0:6]

View file

@ -37,6 +37,9 @@ def exist_item(item_id):
else:
return False
def get_basename(item_id):
return os.path.basename(item_id)
def get_item_id(full_path):
return full_path.replace(PASTES_FOLDER, '', 1)

View file

@ -394,6 +394,11 @@ def add_tag(object_type, tag, object_id, obj_date=None):
# new tag
if not is_obj_tagged(object_id, tag):
# # TODO: # FIXME: sanityze object_type
if obj_date:
try:
obj_date = int(obj_date)
except:
obj_date = None
if not obj_date:
obj_date = get_obj_date(object_type, object_id)
add_global_tag(tag, object_type=object_type)
@ -455,7 +460,7 @@ def delete_obj_tags(object_id, object_type, tags=[]):
return res
def sanitise_tags_date_range(l_tags, date_from=None, date_to=None):
if date_from or date_to is None:
if date_from is None or date_to is None:
date_from = get_tags_min_last_seen(l_tags, r_int=False)
date_to = date_from
return Date.sanitise_date_range(date_from, date_to)

View file

@ -28,6 +28,9 @@ sender = sender@example.com
sender_host = smtp.example.com
sender_port = 1337
sender_pw = None
# Only needed when the credentials for email server needs a username instead of an email address
#sender_user = sender
sender_user =
# optional for using with authenticated SMTP over SSL
# sender_pw = securepassword

View file

@ -154,5 +154,5 @@ def domains_explorer_web():
except:
page = 1
dict_data = Domain.domains_up_by_page('regular', page=page)
dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to)
return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular')

View file

@ -83,6 +83,9 @@
{%else%}
{% set target_url=url_for('crawler_splash.domains_explorer_web') + "?domain_type=regular" %}
{%endif%}
{%if 'date_from' in dict_data %}
{% set target_url = target_url + '&date_from=' + dict_data['date_from'] + '&date_to=' + dict_data['date_to'] %}
{%endif%}
{% include 'pagination.html' %}
{% endwith %}
{%endif%}

View file

@ -1,239 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Show Domain - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js') }}"></script>
<style>
.card-columns {
column-count: 4;
}
</style>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'crawler/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="card my-2 border-secondary" >
<div class="card-body py-2">
<div class="row">
<div class="col-md-3 text-center">
<button class="btn btn-primary" onclick="blocks.value=0;pixelate_all();">
<i class="fas fa-eye-slash"></i>
<span class="label-icon">Hide</span>
</button>
</div>
<div class="col-md-6">
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="5">
</div>
<div class="col-md-3 text-center">
<button class="btn btn-primary" onclick="blocks.value=50;pixelate_all();">
<i class="fas fa-plus-square"></i>
<span class="label-icon">Full resolution</span>
</button>
</div>
</div>
</div>
</div>
{% for dict_domain in dict_data['list_elem'] %}
{% if loop.index0 % 4 == 0 %}
<div class="card-deck mt-3">
{% endif %}
<div class="card">
<div class="text-center">
<canvas id="canvas_{{loop.index0}}" style="max-height: 400px; max-width: 100%;"></canvas>
<!-- <img style="height:400px;" src="{{url_for('showsavedpastes.screenshot', filename='a6/b9/33/f5/f1/0a/16d8b1467093dd5469bfd86bdb2c12f3694677c44406fa758f8b')}}" alt="Card image cap"> -->
</div>
<div class="card-body">
<h5 class="card-title">
<a target="_blank" href="{{ url_for('crawler_splash.showDomain') }}?domain={{dict_domain["id"]}}">
{{dict_domain["id"]}}
</a>
</h5>
<p class="card-text">
<small class="text-muted">
First seen: {{dict_domain["first_seen"]}}<br>
Last_seen: {{dict_domain["first_seen"]}}<br>
Ports: {{dict_domain["ports"]}}
</small>
</p>
<small class="text-muted">Status: </small>
{% if dict_domain["status"] %}
<span style="color:Green;">
<i class="fas fa-check-circle"></i>
UP
</span>
{% else %}
<span style="color:Red;">
<i class="fas fa-times-circle"></i>
DOWN
</span>
{% endif %}
<div>
{% for tag in dict_domain['tags'] %}
<a href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=domain&ltags={{ tag }}">
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
</a>
{% endfor %}
</div>
</div>
</div>
{% if loop.index0 % 4 == 3 %}
</div>
{% endif %}
{% endfor %}
{% if dict_data['list_elem']|length % 4 != 0 %}
</div>
{% endif %}
<br>
<br>
{%if 'list_elem' in dict_data%}
{% with page=dict_data['page'], nb_page_max=dict_data['nb_pages'], nb_first_elem=dict_data['nb_first_elem'], nb_last_elem=dict_data['nb_last_elem'], nb_all_elem=dict_data['nb_all_elem'] %}
{% set object_name="domain" %}
{%if domain_type=='onion'%}
{% set target_url=url_for('crawler_splash.domains_explorer_onion') + "?domain_type=onion" %}
{%else%}
{% set target_url=url_for('crawler_splash.domains_explorer_web') + "?domain_type=regular" %}
{%endif%}
{% include 'pagination.html' %}
{% endwith %}
{%endif%}
</div>
</div>
</div>
</body>
<script>
$(document).ready(function(){
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
<script>
// img_url
// ctx
// canevas_id
var dict_canevas_blurr_img = {}
function init_canevas_blurr_img(canevas_id, img_url){
// ctx, turn off image smoothin
dict_canevas_blurr_img[canevas_id] = {}
var canvas_container = document.getElementById(canevas_id);
var ctx = canvas_container.getContext('2d');
ctx.webkitImageSmoothingEnabled = false;
ctx.imageSmoothingEnabled = false;
dict_canevas_blurr_img[canevas_id]["ctx"] = ctx;
// img
dict_canevas_blurr_img[canevas_id]["img"] = new Image();
dict_canevas_blurr_img[canevas_id]["img"].onload = function() {pixelate_img(canevas_id);};
dict_canevas_blurr_img[canevas_id]["img"].addEventListener("error", function() {img_error(canevas_id);});
dict_canevas_blurr_img[canevas_id]["img"].src = img_url;
}
function pixelate_all(){
Object.entries(dict_canevas_blurr_img).forEach(([key, value]) => {
pixelate_img(key);
});
}
function pixelate_img(canevas_id) {
if (typeof canevas_id !== 'undefined') {
var canevas_to_blurr = document.getElementById(canevas_id);
/// use slider value
if( blocks.value == 50 ){
size = 1;
} else {
var size = (blocks.value) * 0.01;
}
canevas_to_blurr.width = dict_canevas_blurr_img[canevas_id]["img"].width;
canevas_to_blurr.height = dict_canevas_blurr_img[canevas_id]["img"].height;
/// cache scaled width and height
w = canevas_to_blurr.width * size;
h = canevas_to_blurr.height * size;
/// draw original image to the scaled size
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(dict_canevas_blurr_img[canevas_id]["img"], 0, 0, w, h);
/// pixelated
dict_canevas_blurr_img[canevas_id]["ctx"].drawImage(canevas_to_blurr, 0, 0, w, h, 0, 0, canevas_to_blurr.width, canevas_to_blurr.height);
}
}
function img_error(canevas_id) {
dict_canevas_blurr_img[canevas_id]["img"].onerror=null;
dict_canevas_blurr_img[canevas_id]["img"].src="{{ url_for('static', filename='image/AIL.png') }}";
dict_canevas_blurr_img[canevas_id]["img"].width = 50
}
blocks.addEventListener('change', pixelate_all, false);
{% for dict_domain in dict_data['list_elem'] %}
{% if 'screenshot' in dict_domain %}
{% if dict_domain['is_tags_safe'] %}
var screenshot_url = "{{ url_for('showsavedpastes.screenshot', filename="") }}{{dict_domain['screenshot']}}";
{% else %}
var screenshot_url = "{{ url_for('static', filename='image/misp-logo.png') }}";
{% endif %}
init_canevas_blurr_img("canvas_{{loop.index0}}", screenshot_url);
{% endif %}
{% endfor %}
</script>
</html>