mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-26 15:57:16 +00:00
chg; [Duplicates module] refactor module + DB keys
This commit is contained in:
parent
8672671e51
commit
2f8a5a333a
11 changed files with 480 additions and 290 deletions
|
@ -217,6 +217,8 @@ def item_submit_migration():
|
||||||
# /!\ KEY COLISION
|
# /!\ KEY COLISION
|
||||||
# # TODO: change db
|
# # TODO: change db
|
||||||
def tags_migration():
|
def tags_migration():
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -1,198 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
|
|
||||||
"""
|
|
||||||
The Duplicate module
|
|
||||||
====================
|
|
||||||
|
|
||||||
This huge module is, in short term, checking duplicates.
|
|
||||||
Its input comes from other modules, namely:
|
|
||||||
Credential, CreditCard, Keys, Mails, SQLinjectionDetection, CVE and Phone
|
|
||||||
|
|
||||||
This one differ from v1 by only using redis and not json file stored on disk
|
|
||||||
|
|
||||||
Perform comparisions with ssdeep and tlsh
|
|
||||||
|
|
||||||
Requirements:
|
|
||||||
-------------
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
import redis
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
import json
|
|
||||||
import ssdeep
|
|
||||||
import tlsh
|
|
||||||
from packages import Paste
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
from Helper import Process
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
config_section = 'Duplicates'
|
|
||||||
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
|
|
||||||
|
|
||||||
maximum_month_range = int(p.config.get("Modules_Duplicates", "maximum_month_range"))
|
|
||||||
threshold_duplicate_ssdeep = int(p.config.get("Modules_Duplicates", "threshold_duplicate_ssdeep"))
|
|
||||||
threshold_duplicate_tlsh = int(p.config.get("Modules_Duplicates", "threshold_duplicate_tlsh"))
|
|
||||||
threshold_set = {}
|
|
||||||
threshold_set['ssdeep'] = threshold_duplicate_ssdeep
|
|
||||||
threshold_set['tlsh'] = threshold_duplicate_tlsh
|
|
||||||
min_paste_size = float(p.config.get("Modules_Duplicates", "min_paste_size"))
|
|
||||||
|
|
||||||
# REDIS #
|
|
||||||
dico_redis = {}
|
|
||||||
date_today = datetime.today()
|
|
||||||
for year in range(2013, date_today.year+1):
|
|
||||||
for month in range(0, 13):
|
|
||||||
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
|
|
||||||
host=p.config.get("ARDB_DB", "host"),
|
|
||||||
port=p.config.get("ARDB_DB", "port"),
|
|
||||||
db=str(year) + str(month),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
# FUNCTIONS #
|
|
||||||
publisher.info("Script duplicate started")
|
|
||||||
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
hash_dico = {}
|
|
||||||
dupl = set()
|
|
||||||
dico_range_list = []
|
|
||||||
|
|
||||||
x = time.time()
|
|
||||||
|
|
||||||
message = p.get_from_set()
|
|
||||||
if message is not None:
|
|
||||||
path = message
|
|
||||||
PST = Paste.Paste(path)
|
|
||||||
else:
|
|
||||||
publisher.debug("Script Attribute is idling 10s")
|
|
||||||
print('sleeping')
|
|
||||||
time.sleep(10)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# the paste is too small
|
|
||||||
if (PST._get_p_size() < min_paste_size):
|
|
||||||
continue
|
|
||||||
|
|
||||||
PST._set_p_hash_kind("ssdeep")
|
|
||||||
PST._set_p_hash_kind("tlsh")
|
|
||||||
|
|
||||||
# Assignate the correct redis connexion
|
|
||||||
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
|
|
||||||
|
|
||||||
# Creating the dico name: yyyymm
|
|
||||||
# Get the date of the range
|
|
||||||
date_range = date_today - timedelta(days = maximum_month_range*30.4166666)
|
|
||||||
num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month)
|
|
||||||
for diff_month in range(0, num_of_month+1):
|
|
||||||
curr_date_range = date_today - timedelta(days = diff_month*30.4166666)
|
|
||||||
to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2)
|
|
||||||
dico_range_list.append(to_append)
|
|
||||||
|
|
||||||
# Use all dico in range
|
|
||||||
dico_range_list = dico_range_list[0:maximum_month_range]
|
|
||||||
|
|
||||||
# UNIQUE INDEX HASHS TABLE
|
|
||||||
yearly_index = str(date_today.year)+'00'
|
|
||||||
r_serv0 = dico_redis[yearly_index]
|
|
||||||
r_serv0.incr("current_index")
|
|
||||||
index = (r_serv0.get("current_index")) + str(PST.p_date)
|
|
||||||
|
|
||||||
# Open selected dico range
|
|
||||||
opened_dico = []
|
|
||||||
for dico_name in dico_range_list:
|
|
||||||
opened_dico.append([dico_name, dico_redis[dico_name]])
|
|
||||||
|
|
||||||
# retrieve hash from paste
|
|
||||||
paste_hashes = PST._get_p_hash()
|
|
||||||
|
|
||||||
# Go throught the Database of the dico (of the month)
|
|
||||||
for curr_dico_name, curr_dico_redis in opened_dico:
|
|
||||||
for hash_type, paste_hash in paste_hashes.items():
|
|
||||||
for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):
|
|
||||||
|
|
||||||
try:
|
|
||||||
if hash_type == 'ssdeep':
|
|
||||||
percent = 100-ssdeep.compare(dico_hash, paste_hash)
|
|
||||||
else:
|
|
||||||
percent = tlsh.diffxlen(dico_hash, paste_hash)
|
|
||||||
if percent > 100:
|
|
||||||
percent = 100
|
|
||||||
|
|
||||||
threshold_duplicate = threshold_set[hash_type]
|
|
||||||
if percent < threshold_duplicate:
|
|
||||||
percent = 100 - percent if hash_type == 'ssdeep' else percent #recovert the correct percent value for ssdeep
|
|
||||||
# Go throught the Database of the dico filter (month)
|
|
||||||
r_serv_dico = dico_redis[curr_dico_name]
|
|
||||||
|
|
||||||
# index of paste
|
|
||||||
index_current = r_serv_dico.get(dico_hash)
|
|
||||||
index_current = index_current
|
|
||||||
paste_path = r_serv_dico.get(index_current)
|
|
||||||
paste_path = paste_path
|
|
||||||
paste_date = r_serv_dico.get(index_current+'_date')
|
|
||||||
paste_date = paste_date
|
|
||||||
paste_date = paste_date if paste_date != None else "No date available"
|
|
||||||
if paste_path != None:
|
|
||||||
paste_path = paste_path.replace(PASTES_FOLDER+'/', '', 1)
|
|
||||||
if paste_path != PST.p_rel_path:
|
|
||||||
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
|
|
||||||
|
|
||||||
print('['+hash_type+'] '+'comparing: ' + str(PST.p_rel_path) + ' and ' + str(paste_path) + ' percentage: ' + str(percent))
|
|
||||||
|
|
||||||
except Exception:
|
|
||||||
print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash)
|
|
||||||
|
|
||||||
# Add paste in DB after checking to prevent its analysis twice
|
|
||||||
# hash_type_i -> index_i AND index_i -> PST.PATH
|
|
||||||
r_serv1.set(index, PST.p_rel_path)
|
|
||||||
r_serv1.set(index+'_date', PST._get_p_date())
|
|
||||||
r_serv1.sadd("INDEX", index)
|
|
||||||
# Adding hashes in Redis
|
|
||||||
for hash_type, paste_hash in paste_hashes.items():
|
|
||||||
r_serv1.set(paste_hash, index)
|
|
||||||
#bad hash
|
|
||||||
if paste_hash == '':
|
|
||||||
print('bad Hash: ' + hash_type)
|
|
||||||
else:
|
|
||||||
r_serv1.sadd("HASHS_"+hash_type, paste_hash)
|
|
||||||
|
|
||||||
##################### Similarity found #######################
|
|
||||||
|
|
||||||
# if there is data in this dictionnary
|
|
||||||
if len(hash_dico) != 0:
|
|
||||||
# paste_tuple = (hash_type, date, paste_path, percent)
|
|
||||||
for dico_hash, paste_tuple in hash_dico.items():
|
|
||||||
dupl.add(paste_tuple)
|
|
||||||
|
|
||||||
# Creating the object attribute and save it.
|
|
||||||
to_print = 'Duplicate;{};{};{};'.format(
|
|
||||||
PST.p_source, PST.p_date, PST.p_name)
|
|
||||||
if dupl != []:
|
|
||||||
dupl = list(dupl)
|
|
||||||
PST.__setattr__("p_duplicate", dupl)
|
|
||||||
PST.save_attribute_duplicate(dupl)
|
|
||||||
PST.save_others_pastes_attribute_duplicate(dupl)
|
|
||||||
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_rel_path))
|
|
||||||
print('{}Detected {}'.format(to_print, len(dupl)))
|
|
||||||
print('')
|
|
||||||
|
|
||||||
y = time.time()
|
|
||||||
|
|
||||||
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
|
|
||||||
|
|
||||||
except IOError:
|
|
||||||
to_print = 'Duplicate;{};{};{};'.format(
|
|
||||||
PST.p_source, PST.p_date, PST.p_name)
|
|
||||||
print("CRC Checksum Failed on :", PST.p_rel_path)
|
|
||||||
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
|
130
bin/lib/Duplicate.py
Executable file
130
bin/lib/Duplicate.py
Executable file
|
@ -0,0 +1,130 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import ssdeep
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import tlsh
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
r_serv_db = config_loader.get_redis_conn("Kvrocks_DB")
|
||||||
|
MIN_ITEM_SIZE = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: RENAME ME
|
||||||
|
config_loader = None
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Hash != Duplicates => New correlation HASH => check if same hash if duplicate == 100
|
||||||
|
#
|
||||||
|
# Object Hash => correlation decoded => don't need correlation to exists
|
||||||
|
#
|
||||||
|
# New CORRELATION => HASH
|
||||||
|
# -> compute/get(if exist we have a correlation) hash -> get correlation same hash
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# Duplicates between differents objects ?????
|
||||||
|
# Diff Decoded -> Item => Diff Item decoded - Item
|
||||||
|
#
|
||||||
|
# Duplicates domains != Duplicates items
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_ssdeep_hash(content):
|
||||||
|
return ssdeep.hash(content)
|
||||||
|
|
||||||
|
def get_ssdeep_similarity(obj_hash, other_hash):
|
||||||
|
return ssdeep.compare(obj_hash, other_hash)
|
||||||
|
|
||||||
|
def get_tlsh_hash(content):
|
||||||
|
return tlsh.hash(content)
|
||||||
|
|
||||||
|
def get_tlsh_similarity(obj_hash, other_hash):
|
||||||
|
similarity = tlsh.diffxlen(obj_hash, other_hash)
|
||||||
|
if similarity > 100:
|
||||||
|
similarity = 100
|
||||||
|
similarity = 100 - similarity
|
||||||
|
return similarity
|
||||||
|
|
||||||
|
def get_algo_similarity(algo, obj_hash, other_hash):
|
||||||
|
if algo == 'ssdeep':
|
||||||
|
return get_ssdeep_similarity(obj_hash, other_hash)
|
||||||
|
elif algo == 'tlsh':
|
||||||
|
return get_tlsh_similarity(obj_hash, other_hash)
|
||||||
|
|
||||||
|
def get_algo_hashs_by_month(algo, date_ymonth):
|
||||||
|
return r_serv_db.hkeys(f'duplicates:hashs:{algo}:{date_ymonth}')
|
||||||
|
|
||||||
|
def exists_algo_hash_by_month(algo, hash, date_ymonth):
|
||||||
|
return r_serv_db.hexists(f'duplicates:hashs:{algo}:{date_ymonth}', hash)
|
||||||
|
|
||||||
|
def get_object_id_by_hash(algo, hash, date_ymonth):
|
||||||
|
return r_serv_db.hget(f'duplicates:hashs:{algo}:{date_ymonth}', hash)
|
||||||
|
|
||||||
|
def save_object_hash(algo, date_ymonth, hash, obj_id):
|
||||||
|
r_serv_db.hset(f'duplicates:hashs:{algo}:{date_ymonth}', hash, obj_id)
|
||||||
|
|
||||||
|
|
||||||
|
def get_duplicates(obj_type, subtype, id):
|
||||||
|
dict_dup = {}
|
||||||
|
duplicates = r_serv_db.smembers(f'obj:duplicates:{obj_type}:{subtype}:{id}')
|
||||||
|
for str_dup in duplicates:
|
||||||
|
similarity, algo, id = str_dup.split(':', 2)
|
||||||
|
if not dict_dup.get(id):
|
||||||
|
dict_dup[id] = []
|
||||||
|
dict_dup[id].append({'algo': algo, 'similarity': int(similarity)})
|
||||||
|
return dict_dup
|
||||||
|
|
||||||
|
|
||||||
|
def _add_obj_duplicate(algo, similarity, obj_type, subtype, id, id_2):
|
||||||
|
r_serv_db.sadd(f'obj:duplicates:{obj_type}:{subtype}:{id}', f'{similarity}:{algo}:{id_2}')
|
||||||
|
|
||||||
|
def add_obj_duplicate(algo, hash, similarity, obj_type, subtype, id, date_ymonth):
|
||||||
|
obj2_id = get_object_id_by_hash(algo, hash, date_ymonth)
|
||||||
|
# same content
|
||||||
|
if similarity == 100:
|
||||||
|
dups = get_duplicates(obj_type, subtype, id)
|
||||||
|
for dup_id in dups:
|
||||||
|
for algo_dict in dups[dup_id]:
|
||||||
|
if algo_dict['similarity'] == 100 and algo_dict['algo'] == algo:
|
||||||
|
_add_obj_duplicate(algo, similarity, obj_type, subtype, id, dups[dup_id])
|
||||||
|
_add_obj_duplicate(algo, similarity, obj_type, subtype, dups[dup_id], id)
|
||||||
|
_add_obj_duplicate(algo, similarity, obj_type, subtype, id, obj2_id)
|
||||||
|
_add_obj_duplicate(algo, similarity, obj_type, subtype, obj2_id, id)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_x_month_dates(nb_months):
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
result = [now.strftime("%Y%m")]
|
||||||
|
for x in range(0, nb_months):
|
||||||
|
now = now.replace(day=1) - datetime.timedelta(days=1)
|
||||||
|
result.append(now.strftime("%Y%m"))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
res = get_last_x_month_dates(7)
|
||||||
|
print(res)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#################################
|
|
@ -54,6 +54,16 @@ def is_crawled(item_id):
|
||||||
def get_item_domain(item_id):
|
def get_item_domain(item_id):
|
||||||
return item_id[19:-36]
|
return item_id[19:-36]
|
||||||
|
|
||||||
|
def get_item_content_binary(item_id):
|
||||||
|
item_full_path = os.path.join(PASTES_FOLDER, item_id)
|
||||||
|
try:
|
||||||
|
with gzip.open(item_full_path, 'rb') as f:
|
||||||
|
item_content = f.read()
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
item_content = ''
|
||||||
|
return item_content
|
||||||
|
|
||||||
def get_item_content(item_id):
|
def get_item_content(item_id):
|
||||||
item_full_path = os.path.join(PASTES_FOLDER, item_id)
|
item_full_path = os.path.join(PASTES_FOLDER, item_id)
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -91,11 +91,14 @@ class Item(AbstractObject):
|
||||||
else:
|
else:
|
||||||
return filename
|
return filename
|
||||||
|
|
||||||
def get_content(self):
|
def get_content(self, binary=False):
|
||||||
"""
|
"""
|
||||||
Returns Item content
|
Returns Item content
|
||||||
"""
|
"""
|
||||||
return item_basic.get_item_content(self.id)
|
if binary:
|
||||||
|
return item_basic.get_item_content_binary(self.id)
|
||||||
|
else:
|
||||||
|
return item_basic.get_item_content(self.id)
|
||||||
|
|
||||||
def get_raw_content(self):
|
def get_raw_content(self):
|
||||||
filepath = self.get_filename()
|
filepath = self.get_filename()
|
||||||
|
@ -110,15 +113,34 @@ class Item(AbstractObject):
|
||||||
content = base64.b64encode(content)
|
content = base64.b64encode(content)
|
||||||
return content.decode()
|
return content.decode()
|
||||||
|
|
||||||
|
def get_html2text_content(self, content=None, ignore_links=False):
|
||||||
|
if not content:
|
||||||
|
content = self.get_content()
|
||||||
|
h = html2text.HTML2Text()
|
||||||
|
h.ignore_links = ignore_links
|
||||||
|
h.ignore_images = ignore_links
|
||||||
|
return h.handle(content)
|
||||||
|
|
||||||
|
def get_size(self, str=False):
|
||||||
|
size = os.path.getsize(self.get_filename())/1024.0
|
||||||
|
if str:
|
||||||
|
size = round(size, 2)
|
||||||
|
return size
|
||||||
|
|
||||||
def get_ail_2_ail_payload(self):
|
def get_ail_2_ail_payload(self):
|
||||||
payload = {'raw': self.get_gzip_content(b64=True)}
|
payload = {'raw': self.get_gzip_content(b64=True)}
|
||||||
return payload
|
return payload
|
||||||
|
|
||||||
def set_origin(self): # set_parent ?
|
def set_father(self, father_id): # UPDATE KEYS ?????????????????????????????
|
||||||
pass
|
r_serv_metadata.sadd(f'paste_children:{father_id}', self.id)
|
||||||
|
r_serv_metadata.hset(f'paste_metadata:{self.id}', 'father', father_id)
|
||||||
|
|
||||||
|
#f'obj:children:{obj_type}:{subtype}:{id}, {obj_type}:{subtype}:{id}
|
||||||
|
#f'obj:metadata:{obj_type}:{subtype}:{id}', 'father', fathe
|
||||||
|
# => ON Object LEVEL ?????????
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def add_duplicate(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def sanitize_id(self):
|
def sanitize_id(self):
|
||||||
pass
|
pass
|
||||||
|
@ -150,18 +172,25 @@ class Item(AbstractObject):
|
||||||
# origin
|
# origin
|
||||||
# duplicate -> all item iterations ???
|
# duplicate -> all item iterations ???
|
||||||
#
|
#
|
||||||
def create(self, content, tags, origin=None, duplicate=None):
|
def create(self, content, tags, father=None, duplicates=[], _save=True):
|
||||||
self.save_on_disk(content, binary=True, compressed=False, base64=False)
|
if _save:
|
||||||
|
self.save_on_disk(content, binary=True, compressed=False, base64=False)
|
||||||
|
|
||||||
# # TODO:
|
# # TODO:
|
||||||
# for tag in tags:
|
# for tag in tags:
|
||||||
# self.add_tag(tag)
|
# self.add_tag(tag)
|
||||||
|
|
||||||
if origin:
|
if father:
|
||||||
|
pass
|
||||||
|
|
||||||
|
for obj_id in duplicates:
|
||||||
|
for dup in duplicates[obj_id]:
|
||||||
|
self.add_duplicate(obj_id, dup['algo'], dup['similarity'])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if duplicate:
|
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||||
# TODO: DELETE ITEM CORRELATION + TAGS + METADATA + ...
|
# TODO: DELETE ITEM CORRELATION + TAGS + METADATA + ...
|
||||||
|
@ -204,6 +233,80 @@ class Item(AbstractObject):
|
||||||
def exist_correlation(self):
|
def exist_correlation(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def is_crawled(self):
|
||||||
|
return self.id.startswith('crawled')
|
||||||
|
|
||||||
|
# if is_crawled
|
||||||
|
def get_domain(self):
|
||||||
|
return self.id[19:-36]
|
||||||
|
|
||||||
|
def get_screenshot(self):
|
||||||
|
s = r_serv_metadata.hget(f'paste_metadata:{self.id}', 'screenshot')
|
||||||
|
if s:
|
||||||
|
return os.path.join(s[0:2], s[2:4], s[4:6], s[6:8], s[8:10], s[10:12], s[12:])
|
||||||
|
|
||||||
|
def get_har(self):
|
||||||
|
har_path = os.path.join(har_directory, self.id) + '.json'
|
||||||
|
if os.path.isfile(har_path):
|
||||||
|
return har_path
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_url(self):
|
||||||
|
return r_serv_metadata.hget(f'paste_metadata:{self.id}', 'real_link')
|
||||||
|
|
||||||
|
# options: set of optional meta fields
|
||||||
|
def get_meta(self, options=set()):
|
||||||
|
meta = {}
|
||||||
|
meta['id'] = self.id
|
||||||
|
meta['date'] = self.get_date(separator=True) ############################ # TODO:
|
||||||
|
meta['source'] = self.get_source()
|
||||||
|
meta['tags'] = self.get_tags()
|
||||||
|
# optional meta fields
|
||||||
|
if 'content' in options:
|
||||||
|
meta['content'] = self.get_content()
|
||||||
|
if 'crawler' in options:
|
||||||
|
if self.is_crawled():
|
||||||
|
tags = meta.get('tags')
|
||||||
|
meta['crawler'] = self.get_meta_crawler(tags=tags)
|
||||||
|
if 'duplicates' in options:
|
||||||
|
meta['duplicates'] = self.get_duplicates()
|
||||||
|
if 'lines' in options:
|
||||||
|
content = meta.get('content')
|
||||||
|
meta['lines'] = self.get_meta_lines(content=content)
|
||||||
|
if 'size' in options:
|
||||||
|
meta['size'] = self.get_size(str=True)
|
||||||
|
|
||||||
|
# # TODO: ADD GET FATHER
|
||||||
|
|
||||||
|
# meta['encoding'] = None
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def get_meta_crawler(self, tags=[]):
|
||||||
|
crawler = {}
|
||||||
|
if self.is_crawled():
|
||||||
|
crawler['domain'] = self.get_domain()
|
||||||
|
crawler['har'] = self.get_har()
|
||||||
|
crawler['screenshot'] = self.get_screenshot()
|
||||||
|
crawler['url'] = self.get_url()
|
||||||
|
if not tags:
|
||||||
|
tags = self.get_tags()
|
||||||
|
crawler['is_tags_safe'] = Tag.is_tags_safe(tags)
|
||||||
|
return crawler
|
||||||
|
|
||||||
|
def get_meta_lines(self, content=None):
|
||||||
|
if not content:
|
||||||
|
content = self.get_content()
|
||||||
|
max_length = 0
|
||||||
|
line_id = 0
|
||||||
|
nb_line = 0
|
||||||
|
for line in content.splitlines():
|
||||||
|
length = len(line)
|
||||||
|
if length > max_length:
|
||||||
|
max_length = length
|
||||||
|
nb_line += 1
|
||||||
|
return {'nb': nb_line, 'max_length': max_length}
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
||||||
|
@ -547,7 +650,7 @@ def get_item_list_desc(list_item_id):
|
||||||
def is_crawled(item_id):
|
def is_crawled(item_id):
|
||||||
return item_basic.is_crawled(item_id)
|
return item_basic.is_crawled(item_id)
|
||||||
|
|
||||||
def get_crawler_matadata(item_id, ltags=None):
|
def get_crawler_matadata(item_id, tags=None):
|
||||||
dict_crawler = {}
|
dict_crawler = {}
|
||||||
if is_crawled(item_id):
|
if is_crawled(item_id):
|
||||||
dict_crawler['domain'] = get_item_domain(item_id)
|
dict_crawler['domain'] = get_item_domain(item_id)
|
||||||
|
@ -759,5 +862,7 @@ def delete_domain_node(item_id):
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
content = 'test file content'
|
content = 'test file content'
|
||||||
|
duplicates = {'tests/2020/01/02/test.gz': [{'algo':'ssdeep', 'similarity':75}, {'algo':'tlsh', 'similarity':45}]}
|
||||||
|
|
||||||
item = Item('tests/2020/01/02/test_save.gz')
|
item = Item('tests/2020/01/02/test_save.gz')
|
||||||
item.save_on_disk(content, binary=False)
|
item.create(content, _save=False)
|
||||||
|
|
|
@ -17,6 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
# Import Project packages
|
# Import Project packages
|
||||||
##################################
|
##################################
|
||||||
from packages import Tag
|
from packages import Tag
|
||||||
|
from lib import Duplicate
|
||||||
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
|
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
|
||||||
from lib.Tracker import is_obj_tracked, get_obj_all_trackers, delete_obj_trackers
|
from lib.Tracker import is_obj_tracked, get_obj_all_trackers, delete_obj_trackers
|
||||||
|
|
||||||
|
@ -69,6 +70,9 @@ class AbstractObject(ABC):
|
||||||
tags = set(tags)
|
tags = set(tags)
|
||||||
return tags
|
return tags
|
||||||
|
|
||||||
|
def get_duplicates(self):
|
||||||
|
return Duplicate.get_duplicates(self.type, self.get_subtype(r_str=True), self.id)
|
||||||
|
|
||||||
## ADD TAGS ????
|
## ADD TAGS ????
|
||||||
#def add_tags(self):
|
#def add_tags(self):
|
||||||
|
|
||||||
|
|
|
@ -113,21 +113,49 @@ class AbstractSubtypeObject(AbstractObject):
|
||||||
if date > last_seen:
|
if date > last_seen:
|
||||||
self.set_last_seen(date)
|
self.set_last_seen(date)
|
||||||
|
|
||||||
def add(self, date):
|
def add(self, date, item_id):
|
||||||
self.update_correlation_daterange()
|
self.update_correlation_daterange()
|
||||||
# daily
|
# daily
|
||||||
r_metadata.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
|
r_metadata.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
|
||||||
# all type
|
# all type
|
||||||
r_metadata.zincrby(f'{self.type}_all:{self.subtype}', self.id, 1)
|
r_metadata.zincrby(f'{self.type}_all:{self.subtype}', self.id, 1)
|
||||||
|
|
||||||
|
#######################################################################
|
||||||
|
#######################################################################
|
||||||
|
# REPLACE WITH CORRELATION ?????
|
||||||
|
|
||||||
|
# global set
|
||||||
|
r_serv_metadata.sadd(f'set_{self.type}_{self.subtype}:{self.id}', item_id)
|
||||||
|
|
||||||
|
## object_metadata
|
||||||
|
# item
|
||||||
|
r_serv_metadata.sadd(f'item_{self.type}_{self.subtype}:{item_id}', self.id)
|
||||||
|
|
||||||
|
# new correlation
|
||||||
|
#
|
||||||
|
# How to filter by correlation type ????
|
||||||
|
#
|
||||||
|
f'correlation:obj:{self.type}:{self.subtype}:{self.id}', f'{obj_type}:{obj_subtype}:{obj_id}'
|
||||||
|
f'correlation:obj:{self.type}:{self.subtype}:{obj_type}:{self.id}', f'{obj_subtype}:{obj_id}'
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# # domain
|
# # domain
|
||||||
# if item_basic.is_crawled(item_id):
|
# if item_basic.is_crawled(item_id):
|
||||||
# domain = item_basic.get_item_domain(item_id)
|
# domain = item_basic.get_item_domain(item_id)
|
||||||
# self.save_domain_correlation(domain, subtype, obj_id)
|
# self.save_domain_correlation(domain, subtype, obj_id)
|
||||||
|
|
||||||
|
def create(self, first_seen, last_seen):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _delete(self):
|
def _delete(self):
|
||||||
|
|
108
bin/modules/Duplicates.py
Executable file
108
bin/modules/Duplicates.py
Executable file
|
@ -0,0 +1,108 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
"""
|
||||||
|
The Duplicate module
|
||||||
|
====================
|
||||||
|
|
||||||
|
This huge module is, in short term, checking duplicates.
|
||||||
|
Its input comes from other modules, namely:
|
||||||
|
Credential
|
||||||
|
|
||||||
|
Perform comparisions with ssdeep and tlsh
|
||||||
|
|
||||||
|
"""
|
||||||
|
import redis
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
#from datetime import datetime, timedelta
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from modules.abstract_module import AbstractModule
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
from lib import Duplicate
|
||||||
|
from lib.objects.Items import Item
|
||||||
|
|
||||||
|
|
||||||
|
class Duplicates(AbstractModule):
|
||||||
|
"""Duplicates module."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Duplicates, self).__init__()
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
THRESHOLD_SSDEEP = config_loader.get_config_int('Modules_Duplicates', 'threshold_duplicate_ssdeep')
|
||||||
|
THRESHOLD_TLSH = config_loader.get_config_int('Modules_Duplicates', 'threshold_duplicate_tlsh')
|
||||||
|
self.min_item_size = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: # FIXME: rename me
|
||||||
|
self.maximum_month_range = config_loader.get_config_int('Modules_Duplicates', 'maximum_month_range')
|
||||||
|
|
||||||
|
self.algos = {
|
||||||
|
"ssdeep": {"threshold": THRESHOLD_SSDEEP},
|
||||||
|
"tlsh": {"threshold": THRESHOLD_TLSH}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||||
|
|
||||||
|
|
||||||
|
def compute(self, message):
|
||||||
|
# IOError: "CRC Checksum Failed on : {id}"
|
||||||
|
|
||||||
|
item = Item(message)
|
||||||
|
|
||||||
|
# Check file size
|
||||||
|
if item.get_size() < self.min_item_size:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# one month
|
||||||
|
curr_date_ymonth = datetime.datetime.now().strftime("%Y%m")
|
||||||
|
last_month_dates = Duplicate.get_last_x_month_dates(self.maximum_month_range)
|
||||||
|
|
||||||
|
x = time.time()
|
||||||
|
|
||||||
|
# Get Hashs
|
||||||
|
content = item.get_content(binary=True)
|
||||||
|
self.algos['ssdeep']['hash'] = Duplicate.get_ssdeep_hash(content)
|
||||||
|
self.algos['tlsh']['hash'] = Duplicate.get_tlsh_hash(content)
|
||||||
|
|
||||||
|
# TODO: Handle coputed duplicates
|
||||||
|
|
||||||
|
nb_duplicates = 0
|
||||||
|
|
||||||
|
for algo in self.algos:
|
||||||
|
obj_hash = self.algos[algo]['hash']
|
||||||
|
for date_ymonth in last_month_dates:
|
||||||
|
if Duplicate.exists_algo_hash_by_month(algo, obj_hash, date_ymonth):
|
||||||
|
Duplicate.add_obj_duplicate(algo, obj_hash, 100, 'item', '', item.get_id(), date_ymonth)
|
||||||
|
nb_duplicates +=1
|
||||||
|
else:
|
||||||
|
for hash in Duplicate.get_algo_hashs_by_month(algo, date_ymonth):
|
||||||
|
# # FIXME: try - catch 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash
|
||||||
|
similarity = Duplicate.get_algo_similarity(algo, obj_hash, hash)
|
||||||
|
print(f'[{algo}] comparing: {obj_hash} and {hash} similarity: {similarity}') # DEBUG:
|
||||||
|
if similarity >= self.algos[algo]['threshold']:
|
||||||
|
Duplicate.add_obj_duplicate(algo, hash, similarity, 'item', '', item.get_id(), date_ymonth)
|
||||||
|
nb_duplicates +=1
|
||||||
|
|
||||||
|
# Save Hashs
|
||||||
|
Duplicate.save_object_hash(algo, curr_date_ymonth, self.algos[algo]['hash'], item.get_id())
|
||||||
|
|
||||||
|
if nb_duplicates:
|
||||||
|
self.redis_logger.info(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {nb_duplicates};{item.get_id()}')
|
||||||
|
|
||||||
|
y = time.time()
|
||||||
|
print(f'{item.get_id()} Processed in {y-x} sec')
|
||||||
|
#self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
module = Duplicates()
|
||||||
|
module.run()
|
|
@ -66,15 +66,15 @@ publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Urls,Redis_Credential,R
|
||||||
|
|
||||||
[CreditCards]
|
[CreditCards]
|
||||||
subscribe = Redis_CreditCards
|
subscribe = Redis_CreditCards
|
||||||
publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
|
publish = Redis_ModuleStats,Redis_Tags
|
||||||
|
|
||||||
[BankAccount]
|
[BankAccount]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[Mail]
|
[Mail]
|
||||||
subscribe = Redis_Mail
|
subscribe = Redis_Mail
|
||||||
publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
|
publish = Redis_ModuleStats,Redis_Tags
|
||||||
|
|
||||||
[Onion]
|
[Onion]
|
||||||
subscribe = Redis_Onion
|
subscribe = Redis_Onion
|
||||||
|
@ -92,11 +92,11 @@ publish = Redis_Url
|
||||||
|
|
||||||
[LibInjection]
|
[LibInjection]
|
||||||
subscribe = Redis_Url
|
subscribe = Redis_Url
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[SQLInjectionDetection]
|
[SQLInjectionDetection]
|
||||||
subscribe = Redis_Url
|
subscribe = Redis_Url
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[ModuleStats]
|
[ModuleStats]
|
||||||
subscribe = Redis_ModuleStats
|
subscribe = Redis_ModuleStats
|
||||||
|
@ -128,31 +128,31 @@ publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
|
||||||
|
|
||||||
[Cve]
|
[Cve]
|
||||||
subscribe = Redis_Cve
|
subscribe = Redis_Cve
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[Phone]
|
[Phone]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[Keys]
|
[Keys]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
publish = Redis_Duplicate,Redis_PgpDump,Redis_Tags
|
publish = Redis_PgpDump,Redis_Tags
|
||||||
|
|
||||||
[PgpDump]
|
[PgpDump]
|
||||||
subscribe = Redis_PgpDump
|
subscribe = Redis_PgpDump
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[ApiKey]
|
[ApiKey]
|
||||||
subscribe = Redis_ApiKey
|
subscribe = Redis_ApiKey
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[Decoder]
|
[Decoder]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[Bitcoin]
|
[Bitcoin]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[submit_paste]
|
[submit_paste]
|
||||||
subscribe = Redis
|
subscribe = Redis
|
||||||
|
@ -164,7 +164,8 @@ publish = Redis_Mixer,Redis_Tags
|
||||||
|
|
||||||
[IP]
|
[IP]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
publish = Redis_Duplicate,Redis_Tags
|
publish = Redis_Tags
|
||||||
|
|
||||||
[Zerobins]
|
[Zerobins]
|
||||||
subscribe = Redis_Url
|
subscribe = Redis_Url
|
||||||
|
|
||||||
|
|
|
@ -15,12 +15,15 @@ from flask_login import login_required, current_user
|
||||||
# Import Role_Manager
|
# Import Role_Manager
|
||||||
from Role_Manager import login_admin, login_analyst, login_read_only
|
from Role_Manager import login_admin, login_analyst, login_read_only
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
import Item
|
##################################
|
||||||
import Tag
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from lib import item_basic
|
||||||
|
from lib.objects.Items import Item
|
||||||
|
from export import Export
|
||||||
|
from packages import Tag
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'export'))
|
|
||||||
import Export
|
|
||||||
|
|
||||||
# ============ BLUEPRINT ============
|
# ============ BLUEPRINT ============
|
||||||
objects_item = Blueprint('objects_item', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/item'))
|
objects_item = Blueprint('objects_item', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/item'))
|
||||||
|
@ -38,28 +41,22 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
|
||||||
@login_read_only
|
@login_read_only
|
||||||
def showItem(): # # TODO: support post
|
def showItem(): # # TODO: support post
|
||||||
item_id = request.args.get('id')
|
item_id = request.args.get('id')
|
||||||
if not item_id or not Item.exist_item(item_id):
|
if not item_id or not item_basic.exist_item(item_id):
|
||||||
abort(404)
|
abort(404)
|
||||||
|
|
||||||
dict_item = {}
|
item = Item(item_id)
|
||||||
dict_item['id'] = item_id
|
meta = item.get_meta(options=set(['content', 'crawler', 'duplicates', 'lines', 'size']))
|
||||||
dict_item['name'] = dict_item['id'].replace('/', ' / ')
|
|
||||||
dict_item['father'] = Item.get_item_parent(item_id)
|
|
||||||
dict_item['content'] = Item.get_item_content(item_id)
|
|
||||||
dict_item['metadata'] = Item.get_item_metadata(item_id, item_content=dict_item['content'])
|
|
||||||
dict_item['tags'] = Tag.get_obj_tag(item_id)
|
|
||||||
#dict_item['duplicates'] = Item.get_item_nb_duplicates(item_id)
|
|
||||||
dict_item['duplicates'] = Item.get_item_duplicates_dict(item_id)
|
|
||||||
dict_item['crawler'] = Item.get_crawler_matadata(item_id, ltags=dict_item['tags'])
|
|
||||||
|
|
||||||
|
meta['name'] = meta['id'].replace('/', ' / ')
|
||||||
|
meta['father'] = item_basic.get_item_parent(item_id)
|
||||||
## EXPORT SECTION
|
## EXPORT SECTION
|
||||||
# # TODO: ADD in Export SECTION
|
# # TODO: ADD in Export SECTION
|
||||||
dict_item['hive_case'] = Export.get_item_hive_cases(item_id)
|
meta['hive_case'] = Export.get_item_hive_cases(item_id)
|
||||||
|
|
||||||
return render_template("show_item.html", bootstrap_label=bootstrap_label,
|
return render_template("show_item.html", bootstrap_label=bootstrap_label,
|
||||||
modal_add_tags=Tag.get_modal_add_tags(dict_item['id'], object_type='item'),
|
modal_add_tags=Tag.get_modal_add_tags(meta['id'], object_type='item'),
|
||||||
is_hive_connected=Export.get_item_hive_cases(item_id),
|
is_hive_connected=Export.get_item_hive_cases(item_id),
|
||||||
dict_item=dict_item)
|
meta=meta)
|
||||||
|
|
||||||
# kvrocks data
|
# kvrocks data
|
||||||
|
|
||||||
|
@ -74,24 +71,27 @@ def showItem(): # # TODO: support post
|
||||||
@login_read_only
|
@login_read_only
|
||||||
def html2text(): # # TODO: support post
|
def html2text(): # # TODO: support post
|
||||||
item_id = request.args.get('id')
|
item_id = request.args.get('id')
|
||||||
if not item_id or not Item.exist_item(item_id):
|
if not item_id or not item_basic.exist_item(item_id):
|
||||||
abort(404)
|
abort(404)
|
||||||
return Item.get_item_content_html2text(item_id)
|
item = Item(item_id)
|
||||||
|
return item.get_html2text_content()
|
||||||
|
|
||||||
@objects_item.route("/object/item/raw_content")
|
@objects_item.route("/object/item/raw_content")
|
||||||
@login_required
|
@login_required
|
||||||
@login_read_only
|
@login_read_only
|
||||||
def item_raw_content(): # # TODO: support post
|
def item_raw_content(): # # TODO: support post
|
||||||
item_id = request.args.get('id')
|
item_id = request.args.get('id')
|
||||||
if not item_id or not Item.exist_item(item_id):
|
if not item_id or not item_basic.exist_item(item_id):
|
||||||
abort(404)
|
abort(404)
|
||||||
return Response(Item.get_item_content(item_id), mimetype='text/plain')
|
item = Item(item_id)
|
||||||
|
return Response(item.get_content(), mimetype='text/plain')
|
||||||
|
|
||||||
@objects_item.route("/object/item/download")
|
@objects_item.route("/object/item/download")
|
||||||
@login_required
|
@login_required
|
||||||
@login_read_only
|
@login_read_only
|
||||||
def item_download(): # # TODO: support post
|
def item_download(): # # TODO: support post
|
||||||
item_id = request.args.get('id')
|
item_id = request.args.get('id')
|
||||||
if not item_id or not Item.exist_item(item_id):
|
if not item_id or not item_basic.exist_item(item_id):
|
||||||
abort(404)
|
abort(404)
|
||||||
return send_file(Item.get_raw_content(item_id), attachment_filename=item_id, as_attachment=True)
|
item = Item(item_id)
|
||||||
|
return send_file(item.get_raw_content(), attachment_filename=item_id, as_attachment=True)
|
||||||
|
|
|
@ -38,7 +38,7 @@
|
||||||
|
|
||||||
<div class="card my-2 mx-2">
|
<div class="card my-2 mx-2">
|
||||||
<div class="card-header bg-dark">
|
<div class="card-header bg-dark">
|
||||||
<h3 class="text-white text-center" >{{ dict_item['name'] }}</h3>
|
<h3 class="text-white text-center" >{{ meta['name'] }}</h3>
|
||||||
</div>
|
</div>
|
||||||
<div class="card-body pb-1">
|
<div class="card-body pb-1">
|
||||||
<table class="table table-condensed">
|
<table class="table table-condensed">
|
||||||
|
@ -46,7 +46,7 @@
|
||||||
<tr>
|
<tr>
|
||||||
<th>Date</th>
|
<th>Date</th>
|
||||||
<th>Source</th>
|
<th>Source</th>
|
||||||
<th>Encoding</th>
|
<!-- <th>Encoding</th> -->
|
||||||
<th>Size (Kb)</th>
|
<th>Size (Kb)</th>
|
||||||
<th>Number of lines</th>
|
<th>Number of lines</th>
|
||||||
<th>Max line length</th>
|
<th>Max line length</th>
|
||||||
|
@ -54,12 +54,12 @@
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr>
|
<tr>
|
||||||
<td>{{ dict_item['metadata']['date'] }}</td>
|
<td>{{ meta['date'] }}</td>
|
||||||
<td>{{ dict_item['metadata']['source'] }}</td>
|
<td>{{ meta['source'] }}</td>
|
||||||
<td>{{ dict_item['metadata']['encoding'] }}</td>
|
<!-- <td>{{ meta['encoding'] }}</td> -->
|
||||||
<td>{{ dict_item['metadata']['size'] }}</td>
|
<td>{{ meta['size'] }}</td>
|
||||||
<td>{{ dict_item['metadata']['lines']['nb'] }}</td>
|
<td>{{ meta['lines']['nb'] }}</td>
|
||||||
<td>{{ dict_item['metadata']['lines']['max_length'] }}</td>
|
<td>{{ meta['lines']['max_length'] }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
@ -68,9 +68,9 @@
|
||||||
<h5>
|
<h5>
|
||||||
<div>
|
<div>
|
||||||
{% include 'modals/edit_tag.html' %}
|
{% include 'modals/edit_tag.html' %}
|
||||||
{% for tag in dict_item['tags'] %}
|
{% for tag in meta['tags'] %}
|
||||||
<button class="btn btn-{{ bootstrap_label[loop.index0 % 5] }}" data-toggle="modal" data-target="#edit_tags_modal"
|
<button class="btn btn-{{ bootstrap_label[loop.index0 % 5] }}" data-toggle="modal" data-target="#edit_tags_modal"
|
||||||
data-tagid="{{ tag }}" data-objtype="item" data-objid="{{ dict_item['id'] }}">
|
data-tagid="{{ tag }}" data-objtype="item" data-objid="{{ meta['id'] }}">
|
||||||
{{ tag }}
|
{{ tag }}
|
||||||
</button>
|
</button>
|
||||||
|
|
||||||
|
@ -84,21 +84,21 @@
|
||||||
</h5>
|
</h5>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% if dict_item['father'] %}
|
{% if meta['father'] %}
|
||||||
<div class="mt-3">
|
<div class="mt-3">
|
||||||
Father: <a href="{{ url_for('objects_item.showItem')}}?id={{dict_item['father']}}" target="_blank">{{dict_item['father']}}</a>
|
Father: <a href="{{ url_for('objects_item.showItem')}}?id={{meta['father']}}" target="_blank">{{meta['father']}}</a>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<div class="d-flex flex-row-reverse bd-highlight">
|
<div class="d-flex flex-row-reverse bd-highlight">
|
||||||
<div>
|
<div>
|
||||||
<a href="{{ url_for('correlation.show_correlation')}}?object_type=paste&correlation_id={{ dict_item['id'] }}&correlation_objects=paste" target="_blank">
|
<a href="{{ url_for('correlation.show_correlation')}}?object_type=paste&correlation_id={{ meta['id'] }}&correlation_objects=paste" target="_blank">
|
||||||
<button class="btn btn-lg btn-info"><i class="fas fa-project-diagram"></i> Correlations Graph
|
<button class="btn btn-lg btn-info"><i class="fas fa-project-diagram"></i> Correlations Graph
|
||||||
</button>
|
</button>
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
{% with obj_type='item', obj_id=dict_item['id'], obj_subtype=''%}
|
{% with obj_type='item', obj_id=meta['id'], obj_subtype=''%}
|
||||||
{% include 'modals/investigations_register_obj.html' %}
|
{% include 'modals/investigations_register_obj.html' %}
|
||||||
{% endwith %}
|
{% endwith %}
|
||||||
<div class="mr-2">
|
<div class="mr-2">
|
||||||
|
@ -108,7 +108,7 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="mx-2">
|
<div class="mx-2">
|
||||||
{% with obj_type='item', obj_id=dict_item['id'], obj_lvl=0%}
|
{% with obj_type='item', obj_id=meta['id'], obj_lvl=0%}
|
||||||
{% include 'import_export/block_add_user_object_to_export.html' %}
|
{% include 'import_export/block_add_user_object_to_export.html' %}
|
||||||
{% endwith %}
|
{% endwith %}
|
||||||
</div>
|
</div>
|
||||||
|
@ -134,14 +134,14 @@
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% if dict_item['hive_case'] %}
|
{% if meta['hive_case'] %}
|
||||||
<div class="list-group" id="misp_event">
|
<div class="list-group" id="misp_event">
|
||||||
<li class="list-group-item active">The Hive Case already Created</li>
|
<li class="list-group-item active">The Hive Case already Created</li>
|
||||||
<a target="_blank" href="{{ hive_url }}" class="list-group-item">{{ hive_url }}</a>
|
<a target="_blank" href="{{ hive_url }}" class="list-group-item">{{ hive_url }}</a>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% if dict_item['duplicates'] != 0 %}
|
{% if meta['duplicates'] != 0 %}
|
||||||
<div id="accordionDuplicate" class="mb-2 mx-3">
|
<div id="accordionDuplicate" class="mb-2 mx-3">
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<div class="card-header py-1" id="headingDuplicate">
|
<div class="card-header py-1" id="headingDuplicate">
|
||||||
|
@ -149,7 +149,7 @@
|
||||||
<div class="col-11">
|
<div class="col-11">
|
||||||
<div class="mt-2">
|
<div class="mt-2">
|
||||||
<i class="far fa-clone"></i> duplicates
|
<i class="far fa-clone"></i> duplicates
|
||||||
<div class="badge badge-warning">{{dict_item['duplicates']|length}}</div>
|
<div class="badge badge-warning">{{meta['duplicates']|length}}</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="col-1">
|
<div class="col-1">
|
||||||
|
@ -173,19 +173,19 @@
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for duplicate_id in dict_item['duplicates'] %}
|
{% for duplicate_id in meta['duplicates'] %}
|
||||||
<tr>
|
<tr>
|
||||||
<td>{{dict_item['duplicates'][duplicate_id]['date']}}</td>
|
<td>{{meta['duplicates'][duplicate_id]['date']}}</td>
|
||||||
<td class="py-0">
|
<td class="py-0">
|
||||||
<table class="table table-borderless table-sm my-0">
|
<table class="table table-borderless table-sm my-0">
|
||||||
<tbody>
|
<tbody>
|
||||||
{%for algo in dict_item['duplicates'][duplicate_id]['algo']|sort()%}
|
{%for dict_algo in meta['duplicates'][duplicate_id]|sort(attribute='algo')%}
|
||||||
<tr>
|
<tr>
|
||||||
<td class="py-0">{{algo}}</td>
|
<td class="py-0">{{dict_algo['algo']}}</td>
|
||||||
<td class="w-100 py-0">
|
<td class="w-100 py-0">
|
||||||
<div class="progress mt-1">
|
<div class="progress mt-1">
|
||||||
<div class="progress-bar progress-bar-striped {%if algo=='tlsh'%}bg-secondary{%endif%}" role="progressbar" style="width: {{dict_item['duplicates'][duplicate_id]['algo'][algo]}}%;" aria-valuenow="{{dict_item['duplicates'][duplicate_id]['algo'][algo]}}" aria-valuemin="0" aria-valuemax="100">
|
<div class="progress-bar progress-bar-striped {%if dict_algo['algo']=='tlsh'%}bg-secondary{%endif%}" role="progressbar" style="width: {{dict_algo['similarity']}}%;" aria-valuenow="{{dict_algo['similarity']}}" aria-valuemin="0" aria-valuemax="100">
|
||||||
{{dict_item['duplicates'][duplicate_id]['algo'][algo]}}%
|
{{dict_algo['similarity']}}%
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</td>
|
</td>
|
||||||
|
@ -200,7 +200,7 @@
|
||||||
</a>
|
</a>
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
<a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{dict_item['id']}}&s2={{duplicate_id}}" class="fa fa-columns" title="Show diff"></a>
|
<a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{meta['id']}}&s2={{duplicate_id}}" class="fa fa-columns" title="Show diff"></a>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
@ -261,7 +261,7 @@
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
||||||
{% if dict_item['crawler'] %}
|
{% if meta['crawler'] %}
|
||||||
<div id="accordionCrawler" class="mb-3 mx-3">
|
<div id="accordionCrawler" class="mb-3 mx-3">
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<div class="card-header py-1" id="headingCrawler">
|
<div class="card-header py-1" id="headingCrawler">
|
||||||
|
@ -294,18 +294,18 @@
|
||||||
<tr>
|
<tr>
|
||||||
<td><i class="far fa-file"></i></td>
|
<td><i class="far fa-file"></i></td>
|
||||||
<td>
|
<td>
|
||||||
<a class="badge" target="_blank" href="{{ url_for('objects_item.showItem', paste=dict_item['father']) }}" />{{ dict_item['father'] }}</a>
|
<a class="badge" target="_blank" href="{{ url_for('objects_item.showItem', paste=meta['father']) }}" />{{ meta['father'] }}</a>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<td><i class="fab fa-html5"></i></td>
|
<td><i class="fab fa-html5"></i></td>
|
||||||
<td>
|
<td>
|
||||||
<a class="badge" target="_blank" href="{{ url_for('crawler_splash.showDomain', domain=dict_item['crawler']['domain']) }}" />{{ dict_item['crawler']['domain'] }}</a>
|
<a class="badge" target="_blank" href="{{ url_for('crawler_splash.showDomain', domain=meta['crawler']['domain']) }}" />{{ meta['crawler']['domain'] }}</a>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td>url</td>
|
<td>url</td>
|
||||||
<td>
|
<td>
|
||||||
{{ dict_item['crawler']['url'] }}
|
{{ meta['crawler']['url'] }}
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
|
@ -318,11 +318,11 @@
|
||||||
<div class="card-body py-2">
|
<div class="card-body py-2">
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-md-8">
|
<div class="col-md-8">
|
||||||
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="{%if dict_item['crawler']['is_tags_safe']%}13{%else%}0{%endif%}">
|
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="{%if meta['crawler']['is_tags_safe']%}13{%else%}0{%endif%}">
|
||||||
</div>
|
</div>
|
||||||
<div class="col-md-4">
|
<div class="col-md-4">
|
||||||
<button class="btn {%if dict_item['crawler']['is_tags_safe']%}btn-primary{%else%}btn-danger{%endif%}" onclick="blocks.value=50;pixelate();">
|
<button class="btn {%if meta['crawler']['is_tags_safe']%}btn-primary{%else%}btn-danger{%endif%}" onclick="blocks.value=50;pixelate();">
|
||||||
{%if dict_item['crawler']['is_tags_safe']%}
|
{%if meta['crawler']['is_tags_safe']%}
|
||||||
<i class="fas fas fa-plus-square"></i>
|
<i class="fas fas fa-plus-square"></i>
|
||||||
{%else%}
|
{%else%}
|
||||||
<i class="fas fa-exclamation-triangle"></i>
|
<i class="fas fa-exclamation-triangle"></i>
|
||||||
|
@ -358,8 +358,8 @@
|
||||||
<li class="nav-item dropdown">
|
<li class="nav-item dropdown">
|
||||||
<a class="nav-link dropdown-toggle" data-toggle="dropdown" href="#">Others</a>
|
<a class="nav-link dropdown-toggle" data-toggle="dropdown" href="#">Others</a>
|
||||||
<div class="dropdown-menu">
|
<div class="dropdown-menu">
|
||||||
<a class="dropdown-item" href="{{ url_for('objects_item.item_raw_content', id=dict_item['id']) }}"><i class="far fa-file"></i> Raw Content</a>
|
<a class="dropdown-item" href="{{ url_for('objects_item.item_raw_content', id=meta['id']) }}"><i class="far fa-file"></i> Raw Content</a>
|
||||||
<a class="dropdown-item" href="{{ url_for('objects_item.item_download', id=dict_item['id']) }}"><i class="fas fa-download"></i> Download</i></a>
|
<a class="dropdown-item" href="{{ url_for('objects_item.item_download', id=meta['id']) }}"><i class="fas fa-download"></i> Download</i></a>
|
||||||
</div>
|
</div>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
@ -367,7 +367,7 @@
|
||||||
|
|
||||||
<div class="tab-content" id="pills-tabContent">
|
<div class="tab-content" id="pills-tabContent">
|
||||||
<div class="tab-pane fade show active" id="pills-content" role="tabpanel" aria-labelledby="pills-content-tab">
|
<div class="tab-pane fade show active" id="pills-content" role="tabpanel" aria-labelledby="pills-content-tab">
|
||||||
<p class="my-0"> <pre class="border">{{ dict_item['content'] }}</pre></p>
|
<p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
|
||||||
</div>
|
</div>
|
||||||
<div class="tab-pane fade" id="pills-html2text" role="tabpanel" aria-labelledby="pills-html2text-tab">
|
<div class="tab-pane fade" id="pills-html2text" role="tabpanel" aria-labelledby="pills-html2text-tab">
|
||||||
<p class="my-0"> <pre id="html2text-container" class="border"></pre></p>
|
<p class="my-0"> <pre id="html2text-container" class="border"></pre></p>
|
||||||
|
@ -393,7 +393,7 @@
|
||||||
|
|
||||||
$('#pills-html2text-tab').on('shown.bs.tab', function (e) {
|
$('#pills-html2text-tab').on('shown.bs.tab', function (e) {
|
||||||
if ($('#html2text-container').is(':empty')){
|
if ($('#html2text-container').is(':empty')){
|
||||||
$.get("{{ url_for('objects_item.html2text') }}?id={{ dict_item['id'] }}").done(function(data){
|
$.get("{{ url_for('objects_item.html2text') }}?id={{ meta['id'] }}").done(function(data){
|
||||||
$('#html2text-container').text(data);
|
$('#html2text-container').text(data);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -401,7 +401,7 @@
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
{% if dict_item['crawler'] %}
|
{% if meta['crawler'] %}
|
||||||
<script>
|
<script>
|
||||||
var ctx = canvas.getContext('2d'), img = new Image();
|
var ctx = canvas.getContext('2d'), img = new Image();
|
||||||
|
|
||||||
|
@ -413,7 +413,7 @@
|
||||||
img.addEventListener("error", img_error);
|
img.addEventListener("error", img_error);
|
||||||
var draw_img = false;
|
var draw_img = false;
|
||||||
|
|
||||||
img.src = "{{ url_for('showsavedpastes.screenshot', filename=dict_item['crawler']['screenshot']) }}";
|
img.src = "{{ url_for('showsavedpastes.screenshot', filename=meta['crawler']['screenshot']) }}";
|
||||||
|
|
||||||
function pixelate() {
|
function pixelate() {
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue