mirror of
synced 2025-02-18 15:26:25 +00:00
chg; [Duplicates module] refactor module + DB keys
This commit is contained in:
11 changed files with 480 additions and 290 deletions
@ -218,6 +218,8 @@ def item_submit_migration():
# # TODO: change db
def tags_migration():
def items_migration():
@ -1,198 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
The Duplicate module
This huge module is, in short term, checking duplicates.
Its input comes from other modules, namely:
Credential, CreditCard, Keys, Mails, SQLinjectionDetection, CVE and Phone
This one differ from v1 by only using redis and not json file stored on disk
Perform comparisions with ssdeep and tlsh
import redis
import os
import time
from datetime import datetime, timedelta
import json
import ssdeep
import tlsh
from packages import Paste
from pubsublogger import publisher
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Duplicates'
p = Process(config_section)
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
maximum_month_range = int(p.config.get("Modules_Duplicates", "maximum_month_range"))
threshold_duplicate_ssdeep = int(p.config.get("Modules_Duplicates", "threshold_duplicate_ssdeep"))
threshold_duplicate_tlsh = int(p.config.get("Modules_Duplicates", "threshold_duplicate_tlsh"))
threshold_set = {}
threshold_set['ssdeep'] = threshold_duplicate_ssdeep
threshold_set['tlsh'] = threshold_duplicate_tlsh
min_paste_size = float(p.config.get("Modules_Duplicates", "min_paste_size"))
dico_redis = {}
date_today = datetime.today()
for year in range(2013, date_today.year+1):
for month in range(0, 13):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("ARDB_DB", "host"),
port=p.config.get("ARDB_DB", "port"),
db=str(year) + str(month),
publisher.info("Script duplicate started")
while True:
hash_dico = {}
dupl = set()
dico_range_list = []
x = time.time()
message = p.get_from_set()
if message is not None:
path = message
PST = Paste.Paste(path)
publisher.debug("Script Attribute is idling 10s")
# the paste is too small
if (PST._get_p_size() < min_paste_size):
# Assignate the correct redis connexion
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
# Creating the dico name: yyyymm
# Get the date of the range
date_range = date_today - timedelta(days = maximum_month_range*30.4166666)
num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month)
for diff_month in range(0, num_of_month+1):
curr_date_range = date_today - timedelta(days = diff_month*30.4166666)
to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2)
# Use all dico in range
dico_range_list = dico_range_list[0:maximum_month_range]
yearly_index = str(date_today.year)+'00'
r_serv0 = dico_redis[yearly_index]
index = (r_serv0.get("current_index")) + str(PST.p_date)
# Open selected dico range
opened_dico = []
for dico_name in dico_range_list:
opened_dico.append([dico_name, dico_redis[dico_name]])
# retrieve hash from paste
paste_hashes = PST._get_p_hash()
# Go throught the Database of the dico (of the month)
for curr_dico_name, curr_dico_redis in opened_dico:
for hash_type, paste_hash in paste_hashes.items():
for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):
if hash_type == 'ssdeep':
percent = 100-ssdeep.compare(dico_hash, paste_hash)
percent = tlsh.diffxlen(dico_hash, paste_hash)
if percent > 100:
percent = 100
threshold_duplicate = threshold_set[hash_type]
if percent < threshold_duplicate:
percent = 100 - percent if hash_type == 'ssdeep' else percent #recovert the correct percent value for ssdeep
# Go throught the Database of the dico filter (month)
r_serv_dico = dico_redis[curr_dico_name]
# index of paste
index_current = r_serv_dico.get(dico_hash)
index_current = index_current
paste_path = r_serv_dico.get(index_current)
paste_path = paste_path
paste_date = r_serv_dico.get(index_current+'_date')
paste_date = paste_date
paste_date = paste_date if paste_date != None else "No date available"
if paste_path != None:
paste_path = paste_path.replace(PASTES_FOLDER+'/', '', 1)
if paste_path != PST.p_rel_path:
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
print('['+hash_type+'] '+'comparing: ' + str(PST.p_rel_path) + ' and ' + str(paste_path) + ' percentage: ' + str(percent))
except Exception:
print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash)
# Add paste in DB after checking to prevent its analysis twice
# hash_type_i -> index_i AND index_i -> PST.PATH
r_serv1.set(index, PST.p_rel_path)
r_serv1.set(index+'_date', PST._get_p_date())
r_serv1.sadd("INDEX", index)
# Adding hashes in Redis
for hash_type, paste_hash in paste_hashes.items():
r_serv1.set(paste_hash, index)
#bad hash
if paste_hash == '':
print('bad Hash: ' + hash_type)
r_serv1.sadd("HASHS_"+hash_type, paste_hash)
##################### Similarity found #######################
# if there is data in this dictionnary
if len(hash_dico) != 0:
# paste_tuple = (hash_type, date, paste_path, percent)
for dico_hash, paste_tuple in hash_dico.items():
# Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
if dupl != []:
dupl = list(dupl)
PST.__setattr__("p_duplicate", dupl)
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_rel_path))
print('{}Detected {}'.format(to_print, len(dupl)))
y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
except IOError:
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
print("CRC Checksum Failed on :", PST.p_rel_path)
publisher.error('{}CRC Checksum Failed'.format(to_print))
Executable file
Executable file
@ -0,0 +1,130 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import ssdeep
import sys
import time
import tlsh
import datetime
# Import Project packages
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_serv_db = config_loader.get_redis_conn("Kvrocks_DB")
MIN_ITEM_SIZE = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: RENAME ME
config_loader = None
# Hash != Duplicates => New correlation HASH => check if same hash if duplicate == 100
# Object Hash => correlation decoded => don't need correlation to exists
# -> compute/get(if exist we have a correlation) hash -> get correlation same hash
# Duplicates between differents objects ?????
# Diff Decoded -> Item => Diff Item decoded - Item
# Duplicates domains != Duplicates items
def get_ssdeep_hash(content):
return ssdeep.hash(content)
def get_ssdeep_similarity(obj_hash, other_hash):
return ssdeep.compare(obj_hash, other_hash)
def get_tlsh_hash(content):
return tlsh.hash(content)
def get_tlsh_similarity(obj_hash, other_hash):
similarity = tlsh.diffxlen(obj_hash, other_hash)
if similarity > 100:
similarity = 100
similarity = 100 - similarity
return similarity
def get_algo_similarity(algo, obj_hash, other_hash):
if algo == 'ssdeep':
return get_ssdeep_similarity(obj_hash, other_hash)
elif algo == 'tlsh':
return get_tlsh_similarity(obj_hash, other_hash)
def get_algo_hashs_by_month(algo, date_ymonth):
return r_serv_db.hkeys(f'duplicates:hashs:{algo}:{date_ymonth}')
def exists_algo_hash_by_month(algo, hash, date_ymonth):
return r_serv_db.hexists(f'duplicates:hashs:{algo}:{date_ymonth}', hash)
def get_object_id_by_hash(algo, hash, date_ymonth):
return r_serv_db.hget(f'duplicates:hashs:{algo}:{date_ymonth}', hash)
def save_object_hash(algo, date_ymonth, hash, obj_id):
r_serv_db.hset(f'duplicates:hashs:{algo}:{date_ymonth}', hash, obj_id)
def get_duplicates(obj_type, subtype, id):
dict_dup = {}
duplicates = r_serv_db.smembers(f'obj:duplicates:{obj_type}:{subtype}:{id}')
for str_dup in duplicates:
similarity, algo, id = str_dup.split(':', 2)
if not dict_dup.get(id):
dict_dup[id] = []
dict_dup[id].append({'algo': algo, 'similarity': int(similarity)})
return dict_dup
def _add_obj_duplicate(algo, similarity, obj_type, subtype, id, id_2):
r_serv_db.sadd(f'obj:duplicates:{obj_type}:{subtype}:{id}', f'{similarity}:{algo}:{id_2}')
def add_obj_duplicate(algo, hash, similarity, obj_type, subtype, id, date_ymonth):
obj2_id = get_object_id_by_hash(algo, hash, date_ymonth)
# same content
if similarity == 100:
dups = get_duplicates(obj_type, subtype, id)
for dup_id in dups:
for algo_dict in dups[dup_id]:
if algo_dict['similarity'] == 100 and algo_dict['algo'] == algo:
_add_obj_duplicate(algo, similarity, obj_type, subtype, id, dups[dup_id])
_add_obj_duplicate(algo, similarity, obj_type, subtype, dups[dup_id], id)
_add_obj_duplicate(algo, similarity, obj_type, subtype, id, obj2_id)
_add_obj_duplicate(algo, similarity, obj_type, subtype, obj2_id, id)
def get_last_x_month_dates(nb_months):
now = datetime.datetime.now()
result = [now.strftime("%Y%m")]
for x in range(0, nb_months):
now = now.replace(day=1) - datetime.timedelta(days=1)
return result
if __name__ == '__main__':
res = get_last_x_month_dates(7)
@ -54,6 +54,16 @@ def is_crawled(item_id):
def get_item_domain(item_id):
return item_id[19:-36]
def get_item_content_binary(item_id):
item_full_path = os.path.join(PASTES_FOLDER, item_id)
with gzip.open(item_full_path, 'rb') as f:
item_content = f.read()
except Exception as e:
item_content = ''
return item_content
def get_item_content(item_id):
item_full_path = os.path.join(PASTES_FOLDER, item_id)
@ -91,11 +91,14 @@ class Item(AbstractObject):
return filename
def get_content(self):
def get_content(self, binary=False):
Returns Item content
return item_basic.get_item_content(self.id)
if binary:
return item_basic.get_item_content_binary(self.id)
return item_basic.get_item_content(self.id)
def get_raw_content(self):
filepath = self.get_filename()
@ -110,15 +113,34 @@ class Item(AbstractObject):
content = base64.b64encode(content)
return content.decode()
def get_html2text_content(self, content=None, ignore_links=False):
if not content:
content = self.get_content()
h = html2text.HTML2Text()
h.ignore_links = ignore_links
h.ignore_images = ignore_links
return h.handle(content)
def get_size(self, str=False):
size = os.path.getsize(self.get_filename())/1024.0
if str:
size = round(size, 2)
return size
def get_ail_2_ail_payload(self):
payload = {'raw': self.get_gzip_content(b64=True)}
return payload
def set_origin(self): # set_parent ?
def set_father(self, father_id): # UPDATE KEYS ?????????????????????????????
r_serv_metadata.sadd(f'paste_children:{father_id}', self.id)
r_serv_metadata.hset(f'paste_metadata:{self.id}', 'father', father_id)
#f'obj:children:{obj_type}:{subtype}:{id}, {obj_type}:{subtype}:{id}
#f'obj:metadata:{obj_type}:{subtype}:{id}', 'father', fathe
# => ON Object LEVEL ?????????
def add_duplicate(self):
def sanitize_id(self):
@ -150,18 +172,25 @@ class Item(AbstractObject):
# origin
# duplicate -> all item iterations ???
def create(self, content, tags, origin=None, duplicate=None):
self.save_on_disk(content, binary=True, compressed=False, base64=False)
def create(self, content, tags, father=None, duplicates=[], _save=True):
if _save:
self.save_on_disk(content, binary=True, compressed=False, base64=False)
# # TODO:
# for tag in tags:
# self.add_tag(tag)
if origin:
if father:
for obj_id in duplicates:
for dup in duplicates[obj_id]:
self.add_duplicate(obj_id, dup['algo'], dup['similarity'])
if duplicate:
@ -204,6 +233,80 @@ class Item(AbstractObject):
def exist_correlation(self):
def is_crawled(self):
return self.id.startswith('crawled')
# if is_crawled
def get_domain(self):
return self.id[19:-36]
def get_screenshot(self):
s = r_serv_metadata.hget(f'paste_metadata:{self.id}', 'screenshot')
if s:
return os.path.join(s[0:2], s[2:4], s[4:6], s[6:8], s[8:10], s[10:12], s[12:])
def get_har(self):
har_path = os.path.join(har_directory, self.id) + '.json'
if os.path.isfile(har_path):
return har_path
return None
def get_url(self):
return r_serv_metadata.hget(f'paste_metadata:{self.id}', 'real_link')
# options: set of optional meta fields
def get_meta(self, options=set()):
meta = {}
meta['id'] = self.id
meta['date'] = self.get_date(separator=True) ############################ # TODO:
meta['source'] = self.get_source()
meta['tags'] = self.get_tags()
# optional meta fields
if 'content' in options:
meta['content'] = self.get_content()
if 'crawler' in options:
if self.is_crawled():
tags = meta.get('tags')
meta['crawler'] = self.get_meta_crawler(tags=tags)
if 'duplicates' in options:
meta['duplicates'] = self.get_duplicates()
if 'lines' in options:
content = meta.get('content')
meta['lines'] = self.get_meta_lines(content=content)
if 'size' in options:
meta['size'] = self.get_size(str=True)
# meta['encoding'] = None
return meta
def get_meta_crawler(self, tags=[]):
crawler = {}
if self.is_crawled():
crawler['domain'] = self.get_domain()
crawler['har'] = self.get_har()
crawler['screenshot'] = self.get_screenshot()
crawler['url'] = self.get_url()
if not tags:
tags = self.get_tags()
crawler['is_tags_safe'] = Tag.is_tags_safe(tags)
return crawler
def get_meta_lines(self, content=None):
if not content:
content = self.get_content()
max_length = 0
line_id = 0
nb_line = 0
for line in content.splitlines():
length = len(line)
if length > max_length:
max_length = length
nb_line += 1
return {'nb': nb_line, 'max_length': max_length}
@ -547,7 +650,7 @@ def get_item_list_desc(list_item_id):
def is_crawled(item_id):
return item_basic.is_crawled(item_id)
def get_crawler_matadata(item_id, ltags=None):
def get_crawler_matadata(item_id, tags=None):
dict_crawler = {}
if is_crawled(item_id):
dict_crawler['domain'] = get_item_domain(item_id)
@ -759,5 +862,7 @@ def delete_domain_node(item_id):
if __name__ == '__main__':
content = 'test file content'
duplicates = {'tests/2020/01/02/test.gz': [{'algo':'ssdeep', 'similarity':75}, {'algo':'tlsh', 'similarity':45}]}
item = Item('tests/2020/01/02/test_save.gz')
item.save_on_disk(content, binary=False)
item.create(content, _save=False)
@ -17,6 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
from packages import Tag
from lib import Duplicate
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
from lib.Tracker import is_obj_tracked, get_obj_all_trackers, delete_obj_trackers
@ -69,6 +70,9 @@ class AbstractObject(ABC):
tags = set(tags)
return tags
def get_duplicates(self):
return Duplicate.get_duplicates(self.type, self.get_subtype(r_str=True), self.id)
## ADD TAGS ????
#def add_tags(self):
@ -113,14 +113,39 @@ class AbstractSubtypeObject(AbstractObject):
if date > last_seen:
def add(self, date):
def add(self, date, item_id):
# daily
r_metadata.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
# all type
r_metadata.zincrby(f'{self.type}_all:{self.subtype}', self.id, 1)
# global set
r_serv_metadata.sadd(f'set_{self.type}_{self.subtype}:{self.id}', item_id)
## object_metadata
# item
r_serv_metadata.sadd(f'item_{self.type}_{self.subtype}:{item_id}', self.id)
# new correlation
# How to filter by correlation type ????
f'correlation:obj:{self.type}:{self.subtype}:{self.id}', f'{obj_type}:{obj_subtype}:{obj_id}'
f'correlation:obj:{self.type}:{self.subtype}:{obj_type}:{self.id}', f'{obj_subtype}:{obj_id}'
# # domain
@ -128,6 +153,9 @@ class AbstractSubtypeObject(AbstractObject):
# domain = item_basic.get_item_domain(item_id)
# self.save_domain_correlation(domain, subtype, obj_id)
def create(self, first_seen, last_seen):
def _delete(self):
Executable file
Executable file
@ -0,0 +1,108 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
The Duplicate module
This huge module is, in short term, checking duplicates.
Its input comes from other modules, namely:
Perform comparisions with ssdeep and tlsh
import redis
import os
import sys
import time
#from datetime import datetime, timedelta
import datetime
# Import Project packages
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib import Duplicate
from lib.objects.Items import Item
class Duplicates(AbstractModule):
"""Duplicates module."""
def __init__(self):
super(Duplicates, self).__init__()
config_loader = ConfigLoader()
THRESHOLD_SSDEEP = config_loader.get_config_int('Modules_Duplicates', 'threshold_duplicate_ssdeep')
THRESHOLD_TLSH = config_loader.get_config_int('Modules_Duplicates', 'threshold_duplicate_tlsh')
self.min_item_size = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: # FIXME: rename me
self.maximum_month_range = config_loader.get_config_int('Modules_Duplicates', 'maximum_month_range')
self.algos = {
"ssdeep": {"threshold": THRESHOLD_SSDEEP},
"tlsh": {"threshold": THRESHOLD_TLSH}
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
# IOError: "CRC Checksum Failed on : {id}"
item = Item(message)
# Check file size
if item.get_size() < self.min_item_size:
return None
# one month
curr_date_ymonth = datetime.datetime.now().strftime("%Y%m")
last_month_dates = Duplicate.get_last_x_month_dates(self.maximum_month_range)
x = time.time()
# Get Hashs
content = item.get_content(binary=True)
self.algos['ssdeep']['hash'] = Duplicate.get_ssdeep_hash(content)
self.algos['tlsh']['hash'] = Duplicate.get_tlsh_hash(content)
# TODO: Handle coputed duplicates
nb_duplicates = 0
for algo in self.algos:
obj_hash = self.algos[algo]['hash']
for date_ymonth in last_month_dates:
if Duplicate.exists_algo_hash_by_month(algo, obj_hash, date_ymonth):
Duplicate.add_obj_duplicate(algo, obj_hash, 100, 'item', '', item.get_id(), date_ymonth)
nb_duplicates +=1
for hash in Duplicate.get_algo_hashs_by_month(algo, date_ymonth):
# # FIXME: try - catch 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash
similarity = Duplicate.get_algo_similarity(algo, obj_hash, hash)
print(f'[{algo}] comparing: {obj_hash} and {hash} similarity: {similarity}') # DEBUG:
if similarity >= self.algos[algo]['threshold']:
Duplicate.add_obj_duplicate(algo, hash, similarity, 'item', '', item.get_id(), date_ymonth)
nb_duplicates +=1
# Save Hashs
Duplicate.save_object_hash(algo, curr_date_ymonth, self.algos[algo]['hash'], item.get_id())
if nb_duplicates:
self.redis_logger.info(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {nb_duplicates};{item.get_id()}')
y = time.time()
print(f'{item.get_id()} Processed in {y-x} sec')
#self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))
if __name__ == "__main__":
module = Duplicates()
@ -66,15 +66,15 @@ publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Urls,Redis_Credential,R
subscribe = Redis_CreditCards
publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
publish = Redis_ModuleStats,Redis_Tags
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis_Mail
publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
publish = Redis_ModuleStats,Redis_Tags
subscribe = Redis_Onion
@ -92,11 +92,11 @@ publish = Redis_Url
subscribe = Redis_Url
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis_Url
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis_ModuleStats
@ -128,31 +128,31 @@ publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
subscribe = Redis_Cve
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_PgpDump,Redis_Tags
publish = Redis_PgpDump,Redis_Tags
subscribe = Redis_PgpDump
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis_ApiKey
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis
@ -164,7 +164,8 @@ publish = Redis_Mixer,Redis_Tags
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
subscribe = Redis_Url
@ -15,12 +15,15 @@ from flask_login import login_required, current_user
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item
import Tag
# Import Project packages
from lib import item_basic
from lib.objects.Items import Item
from export import Export
from packages import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'export'))
import Export
# ============ BLUEPRINT ============
objects_item = Blueprint('objects_item', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/item'))
@ -38,28 +41,22 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
def showItem(): # # TODO: support post
item_id = request.args.get('id')
if not item_id or not Item.exist_item(item_id):
if not item_id or not item_basic.exist_item(item_id):
dict_item = {}
dict_item['id'] = item_id
dict_item['name'] = dict_item['id'].replace('/', ' / ')
dict_item['father'] = Item.get_item_parent(item_id)
dict_item['content'] = Item.get_item_content(item_id)
dict_item['metadata'] = Item.get_item_metadata(item_id, item_content=dict_item['content'])
dict_item['tags'] = Tag.get_obj_tag(item_id)
#dict_item['duplicates'] = Item.get_item_nb_duplicates(item_id)
dict_item['duplicates'] = Item.get_item_duplicates_dict(item_id)
dict_item['crawler'] = Item.get_crawler_matadata(item_id, ltags=dict_item['tags'])
item = Item(item_id)
meta = item.get_meta(options=set(['content', 'crawler', 'duplicates', 'lines', 'size']))
meta['name'] = meta['id'].replace('/', ' / ')
meta['father'] = item_basic.get_item_parent(item_id)
# # TODO: ADD in Export SECTION
dict_item['hive_case'] = Export.get_item_hive_cases(item_id)
meta['hive_case'] = Export.get_item_hive_cases(item_id)
return render_template("show_item.html", bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(dict_item['id'], object_type='item'),
modal_add_tags=Tag.get_modal_add_tags(meta['id'], object_type='item'),
# kvrocks data
@ -74,24 +71,27 @@ def showItem(): # # TODO: support post
def html2text(): # # TODO: support post
item_id = request.args.get('id')
if not item_id or not Item.exist_item(item_id):
if not item_id or not item_basic.exist_item(item_id):
return Item.get_item_content_html2text(item_id)
item = Item(item_id)
return item.get_html2text_content()
def item_raw_content(): # # TODO: support post
item_id = request.args.get('id')
if not item_id or not Item.exist_item(item_id):
if not item_id or not item_basic.exist_item(item_id):
return Response(Item.get_item_content(item_id), mimetype='text/plain')
item = Item(item_id)
return Response(item.get_content(), mimetype='text/plain')
def item_download(): # # TODO: support post
item_id = request.args.get('id')
if not item_id or not Item.exist_item(item_id):
if not item_id or not item_basic.exist_item(item_id):
return send_file(Item.get_raw_content(item_id), attachment_filename=item_id, as_attachment=True)
item = Item(item_id)
return send_file(item.get_raw_content(), attachment_filename=item_id, as_attachment=True)
@ -38,7 +38,7 @@
<div class="card my-2 mx-2">
<div class="card-header bg-dark">
<h3 class="text-white text-center" >{{ dict_item['name'] }}</h3>
<h3 class="text-white text-center" >{{ meta['name'] }}</h3>
<div class="card-body pb-1">
<table class="table table-condensed">
@ -46,7 +46,7 @@
<!-- <th>Encoding</th> -->
<th>Size (Kb)</th>
<th>Number of lines</th>
<th>Max line length</th>
@ -54,12 +54,12 @@
<td>{{ dict_item['metadata']['date'] }}</td>
<td>{{ dict_item['metadata']['source'] }}</td>
<td>{{ dict_item['metadata']['encoding'] }}</td>
<td>{{ dict_item['metadata']['size'] }}</td>
<td>{{ dict_item['metadata']['lines']['nb'] }}</td>
<td>{{ dict_item['metadata']['lines']['max_length'] }}</td>
<td>{{ meta['date'] }}</td>
<td>{{ meta['source'] }}</td>
<!-- <td>{{ meta['encoding'] }}</td> -->
<td>{{ meta['size'] }}</td>
<td>{{ meta['lines']['nb'] }}</td>
<td>{{ meta['lines']['max_length'] }}</td>
@ -68,9 +68,9 @@
{% include 'modals/edit_tag.html' %}
{% for tag in dict_item['tags'] %}
{% for tag in meta['tags'] %}
<button class="btn btn-{{ bootstrap_label[loop.index0 % 5] }}" data-toggle="modal" data-target="#edit_tags_modal"
data-tagid="{{ tag }}" data-objtype="item" data-objid="{{ dict_item['id'] }}">
data-tagid="{{ tag }}" data-objtype="item" data-objid="{{ meta['id'] }}">
{{ tag }}
@ -84,21 +84,21 @@
{% if dict_item['father'] %}
{% if meta['father'] %}
<div class="mt-3">
Father: <a href="{{ url_for('objects_item.showItem')}}?id={{dict_item['father']}}" target="_blank">{{dict_item['father']}}</a>
Father: <a href="{{ url_for('objects_item.showItem')}}?id={{meta['father']}}" target="_blank">{{meta['father']}}</a>
{% endif %}
<div class="d-flex flex-row-reverse bd-highlight">
<a href="{{ url_for('correlation.show_correlation')}}?object_type=paste&correlation_id={{ dict_item['id'] }}&correlation_objects=paste" target="_blank">
<a href="{{ url_for('correlation.show_correlation')}}?object_type=paste&correlation_id={{ meta['id'] }}&correlation_objects=paste" target="_blank">
<button class="btn btn-lg btn-info"><i class="fas fa-project-diagram"></i> Correlations Graph
{% with obj_type='item', obj_id=dict_item['id'], obj_subtype=''%}
{% with obj_type='item', obj_id=meta['id'], obj_subtype=''%}
{% include 'modals/investigations_register_obj.html' %}
{% endwith %}
<div class="mr-2">
@ -108,7 +108,7 @@
<div class="mx-2">
{% with obj_type='item', obj_id=dict_item['id'], obj_lvl=0%}
{% with obj_type='item', obj_id=meta['id'], obj_lvl=0%}
{% include 'import_export/block_add_user_object_to_export.html' %}
{% endwith %}
@ -134,14 +134,14 @@
{% endif %}
{% if dict_item['hive_case'] %}
{% if meta['hive_case'] %}
<div class="list-group" id="misp_event">
<li class="list-group-item active">The Hive Case already Created</li>
<a target="_blank" href="{{ hive_url }}" class="list-group-item">{{ hive_url }}</a>
{% endif %}
{% if dict_item['duplicates'] != 0 %}
{% if meta['duplicates'] != 0 %}
<div id="accordionDuplicate" class="mb-2 mx-3">
<div class="card">
<div class="card-header py-1" id="headingDuplicate">
@ -149,7 +149,7 @@
<div class="col-11">
<div class="mt-2">
<i class="far fa-clone"></i> duplicates
<div class="badge badge-warning">{{dict_item['duplicates']|length}}</div>
<div class="badge badge-warning">{{meta['duplicates']|length}}</div>
<div class="col-1">
@ -173,19 +173,19 @@
{% for duplicate_id in dict_item['duplicates'] %}
{% for duplicate_id in meta['duplicates'] %}
<td class="py-0">
<table class="table table-borderless table-sm my-0">
{%for algo in dict_item['duplicates'][duplicate_id]['algo']|sort()%}
{%for dict_algo in meta['duplicates'][duplicate_id]|sort(attribute='algo')%}
<td class="py-0">{{algo}}</td>
<td class="py-0">{{dict_algo['algo']}}</td>
<td class="w-100 py-0">
<div class="progress mt-1">
<div class="progress-bar progress-bar-striped {%if algo=='tlsh'%}bg-secondary{%endif%}" role="progressbar" style="width: {{dict_item['duplicates'][duplicate_id]['algo'][algo]}}%;" aria-valuenow="{{dict_item['duplicates'][duplicate_id]['algo'][algo]}}" aria-valuemin="0" aria-valuemax="100">
<div class="progress-bar progress-bar-striped {%if dict_algo['algo']=='tlsh'%}bg-secondary{%endif%}" role="progressbar" style="width: {{dict_algo['similarity']}}%;" aria-valuenow="{{dict_algo['similarity']}}" aria-valuemin="0" aria-valuemax="100">
@ -200,7 +200,7 @@
<a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{dict_item['id']}}&s2={{duplicate_id}}" class="fa fa-columns" title="Show diff"></a>
<a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{meta['id']}}&s2={{duplicate_id}}" class="fa fa-columns" title="Show diff"></a>
{% endfor %}
@ -261,7 +261,7 @@
{% endif %}
{% if dict_item['crawler'] %}
{% if meta['crawler'] %}
<div id="accordionCrawler" class="mb-3 mx-3">
<div class="card">
<div class="card-header py-1" id="headingCrawler">
@ -294,18 +294,18 @@
<td><i class="far fa-file"></i></td>
<a class="badge" target="_blank" href="{{ url_for('objects_item.showItem', paste=dict_item['father']) }}" />{{ dict_item['father'] }}</a>
<a class="badge" target="_blank" href="{{ url_for('objects_item.showItem', paste=meta['father']) }}" />{{ meta['father'] }}</a>
<td><i class="fab fa-html5"></i></td>
<a class="badge" target="_blank" href="{{ url_for('crawler_splash.showDomain', domain=dict_item['crawler']['domain']) }}" />{{ dict_item['crawler']['domain'] }}</a>
<a class="badge" target="_blank" href="{{ url_for('crawler_splash.showDomain', domain=meta['crawler']['domain']) }}" />{{ meta['crawler']['domain'] }}</a>
{{ dict_item['crawler']['url'] }}
{{ meta['crawler']['url'] }}
@ -318,11 +318,11 @@
<div class="card-body py-2">
<div class="row">
<div class="col-md-8">
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="{%if dict_item['crawler']['is_tags_safe']%}13{%else%}0{%endif%}">
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="{%if meta['crawler']['is_tags_safe']%}13{%else%}0{%endif%}">
<div class="col-md-4">
<button class="btn {%if dict_item['crawler']['is_tags_safe']%}btn-primary{%else%}btn-danger{%endif%}" onclick="blocks.value=50;pixelate();">
{%if dict_item['crawler']['is_tags_safe']%}
<button class="btn {%if meta['crawler']['is_tags_safe']%}btn-primary{%else%}btn-danger{%endif%}" onclick="blocks.value=50;pixelate();">
{%if meta['crawler']['is_tags_safe']%}
<i class="fas fas fa-plus-square"></i>
<i class="fas fa-exclamation-triangle"></i>
@ -358,8 +358,8 @@
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" data-toggle="dropdown" href="#">Others</a>
<div class="dropdown-menu">
<a class="dropdown-item" href="{{ url_for('objects_item.item_raw_content', id=dict_item['id']) }}"><i class="far fa-file"></i> Raw Content</a>
<a class="dropdown-item" href="{{ url_for('objects_item.item_download', id=dict_item['id']) }}"><i class="fas fa-download"></i> Download</i></a>
<a class="dropdown-item" href="{{ url_for('objects_item.item_raw_content', id=meta['id']) }}"><i class="far fa-file"></i> Raw Content</a>
<a class="dropdown-item" href="{{ url_for('objects_item.item_download', id=meta['id']) }}"><i class="fas fa-download"></i> Download</i></a>
@ -367,7 +367,7 @@
<div class="tab-content" id="pills-tabContent">
<div class="tab-pane fade show active" id="pills-content" role="tabpanel" aria-labelledby="pills-content-tab">
<p class="my-0"> <pre class="border">{{ dict_item['content'] }}</pre></p>
<p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
<div class="tab-pane fade" id="pills-html2text" role="tabpanel" aria-labelledby="pills-html2text-tab">
<p class="my-0"> <pre id="html2text-container" class="border"></pre></p>
@ -393,7 +393,7 @@
$('#pills-html2text-tab').on('shown.bs.tab', function (e) {
if ($('#html2text-container').is(':empty')){
$.get("{{ url_for('objects_item.html2text') }}?id={{ dict_item['id'] }}").done(function(data){
$.get("{{ url_for('objects_item.html2text') }}?id={{ meta['id'] }}").done(function(data){
@ -401,7 +401,7 @@
{% if dict_item['crawler'] %}
{% if meta['crawler'] %}
var ctx = canvas.getContext('2d'), img = new Image();
@ -413,7 +413,7 @@
img.addEventListener("error", img_error);
var draw_img = false;
img.src = "{{ url_for('showsavedpastes.screenshot', filename=dict_item['crawler']['screenshot']) }}";
img.src = "{{ url_for('showsavedpastes.screenshot', filename=meta['crawler']['screenshot']) }}";
function pixelate() {
Add table
Reference in a new issue