2019-12-17 14:13:36 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import redis
|
|
|
|
|
2020-01-31 16:01:47 +00:00
|
|
|
from io import BytesIO
|
2019-12-17 14:13:36 +00:00
|
|
|
|
|
|
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
|
|
|
|
import Item
|
|
|
|
import Date
|
2020-01-14 15:14:21 +00:00
|
|
|
import Tag
|
2019-12-17 14:13:36 +00:00
|
|
|
|
2020-01-31 16:01:47 +00:00
|
|
|
import Correlate_object
|
2019-12-17 14:13:36 +00:00
|
|
|
import ConfigLoader
|
|
|
|
|
|
|
|
config_loader = ConfigLoader.ConfigLoader()
|
|
|
|
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
|
|
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
|
|
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot')
|
|
|
|
config_loader = None
|
|
|
|
|
|
|
|
# get screenshot relative path
|
|
|
|
def get_screenshot_rel_path(sha256_string, add_extension=False):
|
|
|
|
screenshot_path = os.path.join(sha256_string[0:2], sha256_string[2:4], sha256_string[4:6], sha256_string[6:8], sha256_string[8:10], sha256_string[10:12], sha256_string[12:])
|
|
|
|
if add_extension:
|
|
|
|
screenshot_path = screenshot_path + '.png'
|
|
|
|
return screenshot_path
|
|
|
|
|
2020-01-31 16:01:47 +00:00
|
|
|
def get_screenshot_filepath(sha256_string):
|
|
|
|
return os.path.join(SCREENSHOT_FOLDER, get_screenshot_rel_path(sha256_string, add_extension=True))
|
|
|
|
|
2019-12-17 14:13:36 +00:00
|
|
|
def exist_screenshot(sha256_string):
|
2020-01-31 16:01:47 +00:00
|
|
|
screenshot_path = get_screenshot_filepath(sha256_string)
|
2019-12-17 14:13:36 +00:00
|
|
|
return os.path.isfile(screenshot_path)
|
|
|
|
|
|
|
|
def get_metadata(sha256_string):
|
|
|
|
metadata_dict = {}
|
2019-12-18 15:17:29 +00:00
|
|
|
metadata_dict['img'] = get_screenshot_rel_path(sha256_string)
|
2020-01-14 15:14:21 +00:00
|
|
|
metadata_dict['tags'] = get_screenshot_tags(sha256_string)
|
|
|
|
metadata_dict['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags'])
|
2019-12-17 14:13:36 +00:00
|
|
|
return metadata_dict
|
|
|
|
|
2020-01-14 15:14:21 +00:00
|
|
|
def get_screenshot_tags(sha256_string):
|
|
|
|
return Tag.get_obj_tag(sha256_string)
|
|
|
|
|
2019-12-17 14:13:36 +00:00
|
|
|
|
|
|
|
def get_screenshot_items_list(sha256_string):
|
|
|
|
res = r_serv_onion.smembers('screenshot:{}'.format(sha256_string))
|
|
|
|
if res:
|
|
|
|
return list(res)
|
|
|
|
else:
|
|
|
|
return []
|
|
|
|
|
2020-01-06 16:07:52 +00:00
|
|
|
def get_item_screenshot(item_id):
|
|
|
|
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot')
|
|
|
|
|
2019-12-17 14:13:36 +00:00
|
|
|
def get_item_screenshot_list(item_id):
|
|
|
|
'''
|
|
|
|
Retun all decoded item of a given item id.
|
|
|
|
|
|
|
|
:param item_id: item id
|
|
|
|
'''
|
2020-01-06 16:07:52 +00:00
|
|
|
screenshot = get_item_screenshot(item_id)
|
2019-12-17 14:13:36 +00:00
|
|
|
if screenshot:
|
|
|
|
return [screenshot]
|
|
|
|
else:
|
|
|
|
return []
|
|
|
|
|
|
|
|
def get_domain_screenshot(domain):
|
|
|
|
'''
|
|
|
|
Retun all screenshot of a given domain.
|
|
|
|
|
|
|
|
:param domain: crawled domain
|
|
|
|
'''
|
|
|
|
res = r_serv_onion.smembers('domain_screenshot:{}'.format(domain))
|
|
|
|
if res:
|
|
|
|
return list(res)
|
|
|
|
else:
|
|
|
|
return []
|
|
|
|
|
2020-01-23 14:43:54 +00:00
|
|
|
def get_randon_domain_screenshot(domain, r_path=True):
|
|
|
|
'''
|
|
|
|
Retun all screenshot of a given domain.
|
|
|
|
|
|
|
|
:param domain: crawled domain
|
|
|
|
'''
|
|
|
|
res = r_serv_onion.srandmember('domain_screenshot:{}'.format(domain))
|
|
|
|
if res and r_path:
|
|
|
|
return get_screenshot_rel_path(res)
|
|
|
|
return res
|
|
|
|
|
2019-12-17 14:13:36 +00:00
|
|
|
def get_screenshot_domain(sha256_string):
|
|
|
|
'''
|
|
|
|
Retun all domain of a given screenshot.
|
|
|
|
|
|
|
|
:param sha256_string: sha256_string
|
|
|
|
'''
|
|
|
|
res = r_serv_onion.smembers('screenshot_domain:{}'.format(sha256_string))
|
|
|
|
if res:
|
|
|
|
return list(res)
|
|
|
|
else:
|
|
|
|
return []
|
|
|
|
|
|
|
|
def get_screenshot_correlated_object(sha256_string, correlation_objects=[]):
|
|
|
|
'''
|
|
|
|
Retun all correlation of a given sha256.
|
|
|
|
|
|
|
|
:param sha1_string: sha256
|
|
|
|
:type sha1_string: str
|
|
|
|
|
|
|
|
:return: a dict of all correlation for a given sha256
|
|
|
|
:rtype: dict
|
|
|
|
'''
|
|
|
|
if correlation_objects is None:
|
2020-01-31 16:01:47 +00:00
|
|
|
correlation_objects = Correlate_object.get_all_correlation_objects()
|
2019-12-17 14:13:36 +00:00
|
|
|
decoded_correlation = {}
|
|
|
|
for correlation_object in correlation_objects:
|
|
|
|
if correlation_object == 'paste':
|
|
|
|
res = get_screenshot_items_list(sha256_string)
|
|
|
|
elif correlation_object == 'domain':
|
|
|
|
res = get_screenshot_domain(sha256_string)
|
|
|
|
else:
|
|
|
|
res = None
|
|
|
|
if res:
|
|
|
|
decoded_correlation[correlation_object] = res
|
|
|
|
return decoded_correlation
|
2020-01-31 16:01:47 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_screenshot_file_content(sha256_string):
|
|
|
|
filepath = get_screenshot_filepath(sha256_string)
|
|
|
|
with open(filepath, 'rb') as f:
|
|
|
|
file_content = BytesIO(f.read())
|
|
|
|
return file_content
|