mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-22 14:17:16 +00:00
chg: [crawler + core + cve] migrate crawler to lacus + add new CVE object and correlation + migrate core
This commit is contained in:
parent
eeff786ea5
commit
104eaae793
109 changed files with 4310 additions and 4551 deletions
457
bin/Crawler.py
457
bin/Crawler.py
|
@ -1,457 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import uuid
|
||||
import json
|
||||
import redis
|
||||
import datetime
|
||||
import time
|
||||
import subprocess
|
||||
import requests
|
||||
|
||||
from collections import deque
|
||||
from pyfaup.faup import Faup
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
from Helper import Process
|
||||
from pubsublogger import publisher
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import crawlers
|
||||
|
||||
# ======== FUNCTIONS ========
|
||||
|
||||
def load_blacklist(service_type):
|
||||
try:
|
||||
with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_{}.txt'.format(service_type), 'r') as f:
|
||||
redis_crawler.delete('blacklist_{}'.format(service_type))
|
||||
lines = f.read().splitlines()
|
||||
for line in lines:
|
||||
redis_crawler.sadd('blacklist_{}'.format(service_type), line)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def update_auto_crawler():
|
||||
current_epoch = int(time.time())
|
||||
list_to_crawl = redis_crawler.zrangebyscore('crawler_auto_queue', '-inf', current_epoch)
|
||||
for elem_to_crawl in list_to_crawl:
|
||||
mess, type = elem_to_crawl.rsplit(';', 1)
|
||||
redis_crawler.sadd('{}_crawler_priority_queue'.format(type), mess)
|
||||
redis_crawler.zrem('crawler_auto_queue', elem_to_crawl)
|
||||
|
||||
# Extract info form url (url, domain, domain url, ...)
|
||||
def unpack_url(url):
|
||||
to_crawl = {}
|
||||
faup.decode(url)
|
||||
url_unpack = faup.get()
|
||||
# # FIXME: # TODO: remove me
|
||||
try:
|
||||
to_crawl['domain'] = url_unpack['domain'].decode()
|
||||
except:
|
||||
to_crawl['domain'] = url_unpack['domain']
|
||||
to_crawl['domain'] = to_crawl['domain'].lower()
|
||||
|
||||
|
||||
# force lower case domain/subdomain (rfc4343)
|
||||
# # FIXME: # TODO: remove me
|
||||
try:
|
||||
url_host = url_unpack['host'].decode()
|
||||
except:
|
||||
url_host = url_unpack['host']
|
||||
|
||||
new_url_host = url_host.lower()
|
||||
url_lower_case = url.replace(url_host, new_url_host, 1)
|
||||
|
||||
if url_unpack['scheme'] is None:
|
||||
to_crawl['scheme'] = 'http'
|
||||
url= 'http://{}'.format(url_lower_case)
|
||||
else:
|
||||
# # FIXME: # TODO: remove me
|
||||
try:
|
||||
scheme = url_unpack['scheme'].decode()
|
||||
except Exception as e:
|
||||
scheme = url_unpack['scheme']
|
||||
if scheme in default_proto_map:
|
||||
to_crawl['scheme'] = scheme
|
||||
url = url_lower_case
|
||||
else:
|
||||
redis_crawler.sadd('new_proto', '{} {}'.format(scheme, url_lower_case))
|
||||
to_crawl['scheme'] = 'http'
|
||||
url= 'http://{}'.format(url_lower_case.replace(scheme, '', 1))
|
||||
|
||||
if url_unpack['port'] is None:
|
||||
to_crawl['port'] = default_proto_map[to_crawl['scheme']]
|
||||
else:
|
||||
# # FIXME: # TODO: remove me
|
||||
try:
|
||||
port = url_unpack['port'].decode()
|
||||
except:
|
||||
port = url_unpack['port']
|
||||
# Verify port number #################### make function to verify/correct port number
|
||||
try:
|
||||
int(port)
|
||||
# Invalid port Number
|
||||
except Exception as e:
|
||||
port = default_proto_map[to_crawl['scheme']]
|
||||
to_crawl['port'] = port
|
||||
|
||||
#if url_unpack['query_string'] is None:
|
||||
# if to_crawl['port'] == 80:
|
||||
# to_crawl['url']= '{}://{}'.format(to_crawl['scheme'], url_unpack['host'].decode())
|
||||
# else:
|
||||
# to_crawl['url']= '{}://{}:{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'])
|
||||
#else:
|
||||
# to_crawl['url']= '{}://{}:{}{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'], url_unpack['query_string'].decode())
|
||||
|
||||
to_crawl['url'] = url
|
||||
if to_crawl['port'] == 80:
|
||||
to_crawl['domain_url'] = '{}://{}'.format(to_crawl['scheme'], new_url_host)
|
||||
else:
|
||||
to_crawl['domain_url'] = '{}://{}:{}'.format(to_crawl['scheme'], new_url_host, to_crawl['port'])
|
||||
|
||||
# # FIXME: # TODO: remove me
|
||||
try:
|
||||
to_crawl['tld'] = url_unpack['tld'].decode()
|
||||
except:
|
||||
to_crawl['tld'] = url_unpack['tld']
|
||||
|
||||
return to_crawl
|
||||
|
||||
def get_crawler_config(redis_server, mode, service_type, domain, url=None):
|
||||
crawler_options = {}
|
||||
if mode=='auto':
|
||||
config = redis_server.get('crawler_config:{}:{}:{}:{}'.format(mode, service_type, domain, url))
|
||||
else:
|
||||
config = redis_server.get('crawler_config:{}:{}:{}'.format(mode, service_type, domain))
|
||||
if config is None:
|
||||
config = {}
|
||||
else:
|
||||
config = json.loads(config)
|
||||
for option in default_crawler_config:
|
||||
if option in config:
|
||||
crawler_options[option] = config[option]
|
||||
else:
|
||||
crawler_options[option] = default_crawler_config[option]
|
||||
if mode == 'auto':
|
||||
crawler_options['time'] = int(config['time'])
|
||||
elif mode == 'manual':
|
||||
redis_server.delete('crawler_config:{}:{}:{}'.format(mode, service_type, domain))
|
||||
return crawler_options
|
||||
|
||||
def load_crawler_config(queue_type, service_type, domain, paste, url, date):
|
||||
crawler_config = {}
|
||||
crawler_config['splash_url'] = f'http://{splash_url}'
|
||||
crawler_config['item'] = paste
|
||||
crawler_config['service_type'] = service_type
|
||||
crawler_config['domain'] = domain
|
||||
crawler_config['date'] = date
|
||||
|
||||
if queue_type and queue_type != 'tor':
|
||||
service_type = queue_type
|
||||
|
||||
# Auto and Manual Crawling
|
||||
# Auto ################################################# create new entry, next crawling => here or when ended ?
|
||||
if paste == 'auto':
|
||||
crawler_config['crawler_options'] = get_crawler_config(redis_crawler, 'auto', service_type, domain, url=url)
|
||||
crawler_config['requested'] = True
|
||||
# Manual
|
||||
elif paste == 'manual':
|
||||
crawler_config['crawler_options'] = get_crawler_config(r_cache, 'manual', service_type, domain)
|
||||
crawler_config['requested'] = True
|
||||
# default crawler
|
||||
else:
|
||||
crawler_config['crawler_options'] = get_crawler_config(redis_crawler, 'default', service_type, domain)
|
||||
crawler_config['requested'] = False
|
||||
return crawler_config
|
||||
|
||||
def is_domain_up_day(domain, type_service, date_day):
|
||||
if redis_crawler.sismember('{}_up:{}'.format(type_service, date_day), domain):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def set_crawled_domain_metadata(type_service, date, domain, father_item):
|
||||
# first seen
|
||||
if not redis_crawler.hexists('{}_metadata:{}'.format(type_service, domain), 'first_seen'):
|
||||
redis_crawler.hset('{}_metadata:{}'.format(type_service, domain), 'first_seen', date['date_day'])
|
||||
|
||||
redis_crawler.hset('{}_metadata:{}'.format(type_service, domain), 'paste_parent', father_item)
|
||||
# last check
|
||||
redis_crawler.hset('{}_metadata:{}'.format(type_service, domain), 'last_check', date['date_day'])
|
||||
|
||||
# Put message back on queue
|
||||
def on_error_send_message_back_in_queue(type_service, domain, message):
|
||||
if not redis_crawler.sismember('{}_domain_crawler_queue'.format(type_service), domain):
|
||||
redis_crawler.sadd('{}_domain_crawler_queue'.format(type_service), domain)
|
||||
redis_crawler.sadd('{}_crawler_priority_queue'.format(type_service), message)
|
||||
|
||||
def crawl_onion(url, domain, port, type_service, message, crawler_config):
|
||||
crawler_config['url'] = url
|
||||
crawler_config['port'] = port
|
||||
print('Launching Crawler: {}'.format(url))
|
||||
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'crawling_domain', domain)
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
|
||||
|
||||
retry = True
|
||||
nb_retry = 0
|
||||
while retry:
|
||||
try:
|
||||
r = requests.get(f'http://{splash_url}' , timeout=30.0)
|
||||
retry = False
|
||||
except Exception:
|
||||
# TODO: relaunch docker or send error message
|
||||
nb_retry += 1
|
||||
|
||||
if nb_retry == 2:
|
||||
crawlers.restart_splash_docker(splash_url, splash_name)
|
||||
time.sleep(20)
|
||||
|
||||
if nb_retry == 6:
|
||||
on_error_send_message_back_in_queue(type_service, domain, message)
|
||||
publisher.error('{} SPASH DOWN'.format(splash_url))
|
||||
print('--------------------------------------')
|
||||
print(' \033[91m DOCKER SPLASH DOWN\033[0m')
|
||||
print(' {} DOWN'.format(splash_url))
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'SPLASH DOWN')
|
||||
nb_retry == 0
|
||||
|
||||
print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m')
|
||||
print(' Retry({}) in 10 seconds'.format(nb_retry))
|
||||
time.sleep(10)
|
||||
|
||||
if r.status_code == 200:
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Crawling')
|
||||
# save config in cash
|
||||
UUID = str(uuid.uuid4())
|
||||
r_cache.set('crawler_request:{}'.format(UUID), json.dumps(crawler_config))
|
||||
|
||||
process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', UUID],
|
||||
stdout=subprocess.PIPE)
|
||||
while process.poll() is None:
|
||||
time.sleep(1)
|
||||
|
||||
if process.returncode == 0:
|
||||
output = process.stdout.read().decode()
|
||||
print(output)
|
||||
# error: splash:Connection to proxy refused
|
||||
if 'Connection to proxy refused' in output:
|
||||
on_error_send_message_back_in_queue(type_service, domain, message)
|
||||
publisher.error('{} SPASH, PROXY DOWN OR BAD CONFIGURATION'.format(splash_url))
|
||||
print('------------------------------------------------------------------------')
|
||||
print(' \033[91m SPLASH: Connection to proxy refused')
|
||||
print('')
|
||||
print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url))
|
||||
print('------------------------------------------------------------------------')
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Error')
|
||||
exit(-2)
|
||||
else:
|
||||
crawlers.update_splash_manager_connection_status(True)
|
||||
else:
|
||||
print(process.stdout.read())
|
||||
exit(-1)
|
||||
else:
|
||||
on_error_send_message_back_in_queue(type_service, domain, message)
|
||||
print('--------------------------------------')
|
||||
print(' \033[91m DOCKER SPLASH DOWN\033[0m')
|
||||
print(' {} DOWN'.format(splash_url))
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Crawling')
|
||||
exit(1)
|
||||
|
||||
# check external links (full_crawl)
|
||||
def search_potential_source_domain(type_service, domain):
|
||||
external_domains = set()
|
||||
for link in redis_crawler.smembers('domain_{}_external_links:{}'.format(type_service, domain)):
|
||||
# unpack url
|
||||
url_data = unpack_url(link)
|
||||
if url_data['domain'] != domain:
|
||||
if url_data['tld'] == 'onion' or url_data['tld'] == 'i2p':
|
||||
external_domains.add(url_data['domain'])
|
||||
# # TODO: add special tag ?
|
||||
if len(external_domains) >= 20:
|
||||
redis_crawler.sadd('{}_potential_source'.format(type_service), domain)
|
||||
print('New potential source found: domain')
|
||||
redis_crawler.delete('domain_{}_external_links:{}'.format(type_service, domain))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print('usage:', 'Crawler.py', 'splash_url')
|
||||
exit(1)
|
||||
##################################################
|
||||
splash_url = sys.argv[1]
|
||||
|
||||
splash_name = crawlers.get_splash_name_by_url(splash_url)
|
||||
proxy_name = crawlers.get_splash_proxy(splash_name)
|
||||
crawler_type = crawlers.get_splash_crawler_type(splash_name)
|
||||
|
||||
print(f'SPLASH Name: {splash_name}')
|
||||
print(f'Proxy Name: {proxy_name}')
|
||||
print(f'Crawler Type: {crawler_type}')
|
||||
|
||||
#time.sleep(10)
|
||||
#sys.exit(0)
|
||||
|
||||
#rotation_mode = deque(['onion', 'regular'])
|
||||
all_crawler_queues = crawlers.get_crawler_queue_types_by_splash_name(splash_name)
|
||||
rotation_mode = deque(all_crawler_queues)
|
||||
print(rotation_mode)
|
||||
|
||||
default_proto_map = {'http': 80, 'https': 443}
|
||||
######################################################## add ftp ???
|
||||
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
publisher.info("Script Crawler started")
|
||||
config_section = 'Crawler'
|
||||
|
||||
# Setup the I/O queues
|
||||
p = Process(config_section)
|
||||
|
||||
print('splash url: {}'.format(splash_url))
|
||||
|
||||
r_cache = redis.StrictRedis(
|
||||
host=p.config.get("Redis_Cache", "host"),
|
||||
port=p.config.getint("Redis_Cache", "port"),
|
||||
db=p.config.getint("Redis_Cache", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
redis_crawler = redis.StrictRedis(
|
||||
host=p.config.get("ARDB_Onion", "host"),
|
||||
port=p.config.getint("ARDB_Onion", "port"),
|
||||
db=p.config.getint("ARDB_Onion", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
faup = crawlers.get_faup()
|
||||
|
||||
# get HAR files
|
||||
default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har")
|
||||
if default_crawler_har:
|
||||
default_crawler_har = True
|
||||
else:
|
||||
default_crawler_har = False
|
||||
|
||||
# get PNG files
|
||||
default_crawler_png = p.config.getboolean("Crawler", "default_crawler_png")
|
||||
if default_crawler_png:
|
||||
default_crawler_png = True
|
||||
else:
|
||||
default_crawler_png = False
|
||||
|
||||
# Default crawler options
|
||||
default_crawler_config = {'html': True,
|
||||
'har': default_crawler_har,
|
||||
'png': default_crawler_png,
|
||||
'depth_limit': p.config.getint("Crawler", "crawler_depth_limit"),
|
||||
'closespider_pagecount': p.config.getint("Crawler", "default_crawler_closespider_pagecount"),
|
||||
'cookiejar_uuid': None,
|
||||
'user_agent': p.config.get("Crawler", "default_crawler_user_agent")}
|
||||
|
||||
# Track launched crawler
|
||||
r_cache.sadd('all_splash_crawlers', splash_url)
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Waiting')
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
|
||||
|
||||
# update hardcoded blacklist
|
||||
load_blacklist('onion')
|
||||
load_blacklist('regular')
|
||||
|
||||
while True:
|
||||
|
||||
update_auto_crawler()
|
||||
|
||||
rotation_mode.rotate()
|
||||
to_crawl = crawlers.get_elem_to_crawl_by_queue_type(rotation_mode)
|
||||
if to_crawl:
|
||||
url_data = unpack_url(to_crawl['url'])
|
||||
# remove domain from queue
|
||||
redis_crawler.srem('{}_domain_crawler_queue'.format(to_crawl['type_service']), url_data['domain'])
|
||||
|
||||
print()
|
||||
print()
|
||||
print('\033[92m------------------START CRAWLER------------------\033[0m')
|
||||
print('crawler type: {}'.format(to_crawl['type_service']))
|
||||
print('\033[92m-------------------------------------------------\033[0m')
|
||||
print('url: {}'.format(url_data['url']))
|
||||
print('domain: {}'.format(url_data['domain']))
|
||||
print('domain_url: {}'.format(url_data['domain_url']))
|
||||
print()
|
||||
|
||||
# Check blacklist
|
||||
if not redis_crawler.sismember('blacklist_{}'.format(to_crawl['type_service']), url_data['domain']):
|
||||
date = {'date_day': datetime.datetime.now().strftime("%Y%m%d"),
|
||||
'date_month': datetime.datetime.now().strftime("%Y%m"),
|
||||
'epoch': int(time.time())}
|
||||
|
||||
# Update crawler status type
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'type', to_crawl['type_service'])
|
||||
|
||||
crawler_config = load_crawler_config(to_crawl['queue_type'], to_crawl['type_service'], url_data['domain'], to_crawl['paste'], to_crawl['url'], date)
|
||||
# check if default crawler
|
||||
if not crawler_config['requested']:
|
||||
# Auto crawl only if service not up this month
|
||||
if redis_crawler.sismember('month_{}_up:{}'.format(to_crawl['type_service'], date['date_month']), url_data['domain']):
|
||||
continue
|
||||
|
||||
set_crawled_domain_metadata(to_crawl['type_service'], date, url_data['domain'], to_crawl['paste'])
|
||||
|
||||
|
||||
#### CRAWLER ####
|
||||
# Manual and Auto Crawler
|
||||
if crawler_config['requested']:
|
||||
|
||||
######################################################crawler strategy
|
||||
# CRAWL domain
|
||||
crawl_onion(url_data['url'], url_data['domain'], url_data['port'], to_crawl['type_service'], to_crawl['original_message'], crawler_config)
|
||||
|
||||
# Default Crawler
|
||||
else:
|
||||
# CRAWL domain
|
||||
crawl_onion(url_data['domain_url'], url_data['domain'], url_data['port'], to_crawl['type_service'], to_crawl['original_message'], crawler_config)
|
||||
#if url != domain_url and not is_domain_up_day(url_data['domain'], to_crawl['type_service'], date['date_day']):
|
||||
# crawl_onion(url_data['url'], url_data['domain'], to_crawl['original_message'])
|
||||
|
||||
|
||||
# Save last_status day (DOWN)
|
||||
if not is_domain_up_day(url_data['domain'], to_crawl['type_service'], date['date_day']):
|
||||
redis_crawler.sadd('{}_down:{}'.format(to_crawl['type_service'], date['date_day']), url_data['domain'])
|
||||
|
||||
# if domain was UP at least one time
|
||||
if redis_crawler.exists('crawler_history_{}:{}:{}'.format(to_crawl['type_service'], url_data['domain'], url_data['port'])):
|
||||
# add crawler history (if domain is down)
|
||||
if not redis_crawler.zrangebyscore('crawler_history_{}:{}:{}'.format(to_crawl['type_service'], url_data['domain'], url_data['port']), date['epoch'], date['epoch']):
|
||||
# Domain is down
|
||||
redis_crawler.zadd('crawler_history_{}:{}:{}'.format(to_crawl['type_service'], url_data['domain'], url_data['port']), int(date['epoch']), int(date['epoch']))
|
||||
|
||||
############################
|
||||
# extract page content
|
||||
############################
|
||||
|
||||
# update list, last crawled domains
|
||||
redis_crawler.lpush('last_{}'.format(to_crawl['type_service']), '{}:{};{}'.format(url_data['domain'], url_data['port'], date['epoch']))
|
||||
redis_crawler.ltrim('last_{}'.format(to_crawl['type_service']), 0, 15)
|
||||
|
||||
#update crawler status
|
||||
r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', 'Waiting')
|
||||
r_cache.hdel('metadata_crawler:{}'.format(splash_url), 'crawling_domain')
|
||||
|
||||
# Update crawler status type
|
||||
r_cache.hdel('metadata_crawler:{}'.format(splash_url), 'type', to_crawl['type_service'])
|
||||
|
||||
# add next auto Crawling in queue:
|
||||
if to_crawl['paste'] == 'auto':
|
||||
redis_crawler.zadd('crawler_auto_queue', int(time.time()+crawler_config['crawler_options']['time']) , '{};{}'.format(to_crawl['original_message'], to_crawl['type_service']))
|
||||
# update list, last auto crawled domains
|
||||
redis_crawler.lpush('last_auto_crawled', '{}:{};{}'.format(url_data['domain'], url_data['port'], date['epoch']))
|
||||
redis_crawler.ltrim('last_auto_crawled', 0, 9)
|
||||
else:
|
||||
print(' Blacklisted Domain')
|
||||
print()
|
||||
print()
|
||||
|
||||
else:
|
||||
time.sleep(1)
|
|
@ -119,11 +119,11 @@ def core_migration():
|
|||
# Auto Export Migration
|
||||
ail_misp = r_serv_db.get('ail:misp')
|
||||
if ail_misp != 'True':
|
||||
ail_misp == 'False'
|
||||
ail_misp = 'False'
|
||||
r_kvrocks.set('ail:misp', ail_misp)
|
||||
ail_thehive = r_serv_db.get('ail:thehive')
|
||||
if ail_thehive != 'True':
|
||||
ail_thehive == 'False'
|
||||
ail_thehive = 'False'
|
||||
r_kvrocks.set('ail:thehive', ail_thehive)
|
||||
|
||||
|
||||
|
@ -494,7 +494,7 @@ def domain_migration():
|
|||
domain = Domains.Domain(dom)
|
||||
domain.update_daterange(first_seen)
|
||||
domain.update_daterange(last_check)
|
||||
domain._set_ports(ports)
|
||||
domain._set_ports(ports) # TODO ############################################################################
|
||||
if last_origin:
|
||||
domain.set_last_origin(last_origin)
|
||||
for language in languages:
|
||||
|
@ -520,13 +520,13 @@ def domain_migration():
|
|||
epoch = history['epoch']
|
||||
# DOMAIN DOWN
|
||||
if not history.get('status'): # domain DOWN
|
||||
domain.add_history(epoch, port)
|
||||
domain.add_history(epoch)
|
||||
print(f'DOWN {epoch}')
|
||||
# DOMAIN UP
|
||||
else:
|
||||
root_id = history.get('root')
|
||||
if root_id:
|
||||
domain.add_history(epoch, port, root_item=root_id)
|
||||
domain.add_history(epoch, root_item=root_id)
|
||||
print(f'UP {root_id}')
|
||||
crawled_items = get_crawled_items(dom, root_id)
|
||||
for item_id in crawled_items:
|
||||
|
@ -534,7 +534,7 @@ def domain_migration():
|
|||
item_father = get_item_father(item_id)
|
||||
if item_father and url:
|
||||
print(f'{url} {item_id}')
|
||||
domain.add_crawled_item(url, port, item_id, item_father)
|
||||
domain.add_crawled_item(url, item_id, item_father)
|
||||
|
||||
|
||||
#print()
|
||||
|
|
|
@ -18,17 +18,18 @@ import time
|
|||
import re
|
||||
import sys
|
||||
from pubsublogger import publisher
|
||||
from packages import Paste
|
||||
from lib.objects.Items import Item
|
||||
from Helper import Process
|
||||
from ipaddress import IPv4Network, IPv4Address
|
||||
|
||||
# TODO REWRITE ME -> IMPROVE + MIGRATE TO MODULE
|
||||
|
||||
def search_ip(message):
|
||||
paste = Paste.Paste(message)
|
||||
content = paste.get_p_content()
|
||||
item = Item(message)
|
||||
content = item.get_content()
|
||||
# regex to find IPs
|
||||
reg_ip = re.compile(r'^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', flags=re.MULTILINE)
|
||||
# list of the regex results in the Paste, may be null
|
||||
# list of the regex results in the Item, may be null
|
||||
results = reg_ip.findall(content)
|
||||
matching_ips = []
|
||||
|
||||
|
@ -40,14 +41,13 @@ def search_ip(message):
|
|||
matching_ips.append(address)
|
||||
|
||||
if len(matching_ips) > 0:
|
||||
print('{} contains {} IPs'.format(paste.p_name, len(matching_ips)))
|
||||
publisher.warning('{} contains {} IPs'.format(paste.p_name, len(matching_ips)))
|
||||
print(f'{item.get_id()} contains {len(matching_ips)} IPs')
|
||||
publisher.warning(f'{item.get_id()} contains {item.get_id()} IPs')
|
||||
|
||||
#Tag message with IP
|
||||
msg = 'infoleak:automatic-detection="ip";{}'.format(message)
|
||||
# Tag message with IP
|
||||
msg = f'infoleak:automatic-detection="ip";{item.get_id()}'
|
||||
p.populate_set_out(msg, 'Tags')
|
||||
#Send to duplicate
|
||||
p.populate_set_out(message, 'Duplicate')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||
|
|
|
@ -40,7 +40,6 @@ is_ail_core=`screen -ls | egrep '[0-9]+.Core_AIL' | cut -d. -f1`
|
|||
is_ail_2_ail=`screen -ls | egrep '[0-9]+.AIL_2_AIL' | cut -d. -f1`
|
||||
isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1`
|
||||
isflasked=`screen -ls | egrep '[0-9]+.Flask_AIL' | cut -d. -f1`
|
||||
iscrawler=`screen -ls | egrep '[0-9]+.Crawler_AIL' | cut -d. -f1`
|
||||
isfeeded=`screen -ls | egrep '[0-9]+.Feeder_Pystemon' | cut -d. -f1`
|
||||
|
||||
function helptext {
|
||||
|
@ -126,6 +125,8 @@ function launching_logs {
|
|||
screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Script -l ../logs/; read x"
|
||||
sleep 0.1
|
||||
screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Sync -l ../logs/; read x"
|
||||
sleep 0.1
|
||||
screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Crawler -l ../logs/; read x"
|
||||
}
|
||||
|
||||
function launching_queues {
|
||||
|
@ -174,8 +175,6 @@ function launching_scripts {
|
|||
|
||||
screen -S "Script_AIL" -X screen -t "JSON_importer" bash -c "cd ${AIL_BIN}/import; ${ENV_PY} ./JSON_importer.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Crawler_manager" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./Crawler_manager.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "D4_client" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./D4_client.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./DbCleaner.py; read x"
|
||||
|
@ -202,6 +201,9 @@ function launching_scripts {
|
|||
screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./submit_paste.py; read x"
|
||||
sleep 0.1
|
||||
|
||||
screen -S "Script_AIL" -X screen -t "Crawler" bash -c "cd ${AIL_BIN}/crawlers; ${ENV_PY} ./Crawler.py; read x"
|
||||
sleep 0.1
|
||||
|
||||
screen -S "Script_AIL" -X screen -t "Sync_module" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./Sync_module.py; read x"
|
||||
sleep 0.1
|
||||
|
||||
|
@ -225,8 +227,6 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Mail" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Mail.py; read x"
|
||||
sleep 0.1
|
||||
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
|
||||
# sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./ModuleStats.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Onion" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Onion.py; read x"
|
||||
|
@ -265,8 +265,12 @@ function launching_scripts {
|
|||
##################################
|
||||
# DISABLED MODULES #
|
||||
##################################
|
||||
#screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
|
||||
#sleep 0.1
|
||||
# screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
|
||||
# sleep 0.1
|
||||
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
|
||||
# sleep 0.1
|
||||
# screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x"
|
||||
# sleep 0.1
|
||||
|
||||
##################################
|
||||
# #
|
||||
|
@ -285,8 +289,6 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "IPAddress" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./IPAddress.py; read x"
|
||||
|
||||
#screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x"
|
||||
#sleep 0.1
|
||||
|
||||
}
|
||||
|
||||
|
@ -476,19 +478,19 @@ function launch_feeder {
|
|||
}
|
||||
|
||||
function killscript {
|
||||
if [[ $islogged || $isqueued || $is_ail_core || $isscripted || $isflasked || $isfeeded || $iscrawler || $is_ail_2_ail ]]; then
|
||||
if [[ $islogged || $isqueued || $is_ail_core || $isscripted || $isflasked || $isfeeded || $is_ail_2_ail ]]; then
|
||||
echo -e $GREEN"Killing Script"$DEFAULT
|
||||
kill $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $iscrawler $is_ail_2_ail
|
||||
kill $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $is_ail_2_ail
|
||||
sleep 0.2
|
||||
echo -e $ROSE`screen -ls`$DEFAULT
|
||||
echo -e $GREEN"\t* $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $iscrawler $is_ail_2_ail killed."$DEFAULT
|
||||
echo -e $GREEN"\t* $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $is_ail_2_ail killed."$DEFAULT
|
||||
else
|
||||
echo -e $RED"\t* No script to kill"$DEFAULT
|
||||
fi
|
||||
}
|
||||
|
||||
function killall {
|
||||
if [[ $isredis || $isardb || $iskvrocks || $islogged || $isqueued || $is_ail_2_ail || $isscripted || $isflasked || $isfeeded || $iscrawler || $is_ail_core || $is_ail_2_ail ]]; then
|
||||
if [[ $isredis || $isardb || $iskvrocks || $islogged || $isqueued || $is_ail_2_ail || $isscripted || $isflasked || $isfeeded || $is_ail_core || $is_ail_2_ail ]]; then
|
||||
if [[ $isredis ]]; then
|
||||
echo -e $GREEN"Gracefully closing redis servers"$DEFAULT
|
||||
shutting_down_redis;
|
||||
|
@ -503,10 +505,10 @@ function killall {
|
|||
shutting_down_kvrocks;
|
||||
fi
|
||||
echo -e $GREEN"Killing all"$DEFAULT
|
||||
kill $isredis $isardb $iskvrocks $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $iscrawler $is_ail_2_ail
|
||||
kill $isredis $isardb $iskvrocks $islogged $isqueued $is_ail_core $isscripted $isflasked $isfeeded $is_ail_2_ail
|
||||
sleep 0.2
|
||||
echo -e $ROSE`screen -ls`$DEFAULT
|
||||
echo -e $GREEN"\t* $isredis $isardb $iskvrocks $islogged $isqueued $isscripted $is_ail_2_ail $isflasked $isfeeded $iscrawler $is_ail_core killed."$DEFAULT
|
||||
echo -e $GREEN"\t* $isredis $isardb $iskvrocks $islogged $isqueued $isscripted $is_ail_2_ail $isflasked $isfeeded $is_ail_core killed."$DEFAULT
|
||||
else
|
||||
echo -e $RED"\t* No screen to kill"$DEFAULT
|
||||
fi
|
||||
|
|
|
@ -15,7 +15,7 @@ import json
|
|||
import redis
|
||||
import psutil
|
||||
from subprocess import PIPE, Popen
|
||||
from packages import Paste
|
||||
from lib.objects.Items import Item
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
@ -51,7 +51,7 @@ QUEUE_STATUS = {}
|
|||
CPU_TABLE = {}
|
||||
CPU_OBJECT_TABLE = {}
|
||||
|
||||
# Path of the current paste for a pid
|
||||
# Path of the current item for a pid
|
||||
COMPLETE_PASTE_PATH_PER_PID = {}
|
||||
|
||||
'''
|
||||
|
@ -443,10 +443,10 @@ class Show_paste(Frame):
|
|||
self.label_list[i]._text = ""
|
||||
return
|
||||
|
||||
paste = Paste.Paste(COMPLETE_PASTE_PATH_PER_PID[current_selected_value])
|
||||
old_content = paste.get_p_content()[0:4000] # Limit number of char to be displayed
|
||||
item = Item(COMPLETE_PASTE_PATH_PER_PID[current_selected_value])
|
||||
old_content = item.get_content()[0:4000] # Limit number of char to be displayed
|
||||
|
||||
#Replace unprintable char by ?
|
||||
# Replace unprintable char by ?
|
||||
content = ""
|
||||
for i, c in enumerate(old_content):
|
||||
if ord(c) > 127: # Used to avoid printing unprintable char
|
||||
|
@ -456,7 +456,7 @@ class Show_paste(Frame):
|
|||
else:
|
||||
content += c
|
||||
|
||||
#Print in the correct label, END or more
|
||||
# Print in the correct label, END or more
|
||||
to_print = ""
|
||||
i = 0
|
||||
for line in content.split("\n"):
|
||||
|
@ -472,7 +472,7 @@ class Show_paste(Frame):
|
|||
self.label_list[i]._text = "- END of PASTE -"
|
||||
i += 1
|
||||
|
||||
while i<self.num_label: #Clear out remaining lines
|
||||
while i<self.num_label: # Clear out remaining lines
|
||||
self.label_list[i]._text = ""
|
||||
i += 1
|
||||
|
||||
|
@ -491,6 +491,7 @@ class Show_paste(Frame):
|
|||
for i in range(2,self.num_label):
|
||||
self.label_list[i]._text = ""
|
||||
|
||||
|
||||
'''
|
||||
END SCENES DEFINITION
|
||||
'''
|
||||
|
|
|
@ -72,6 +72,7 @@ def sendEmailNotification(recipient, mail_subject, mail_body):
|
|||
traceback.print_tb(err.__traceback__)
|
||||
publisher.warning(err)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Test notification sender.')
|
||||
parser.add_argument("addr", help="Test mail 'to' address")
|
||||
|
|
|
@ -180,9 +180,9 @@ if __name__ == '__main__':
|
|||
|
||||
key_id_str = 'Key ID - '
|
||||
regex_key_id = '{}.+'.format(key_id_str)
|
||||
regex_pgp_public_blocs = '-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
|
||||
regex_pgp_signature = '-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
|
||||
regex_pgp_message = '-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
|
||||
regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
|
||||
regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
|
||||
regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
|
||||
regex_tool_version = r"\bVersion:.*\n"
|
||||
regex_block_comment = r"\bComment:.*\n"
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
import time
|
||||
from packages import Paste
|
||||
from lib.objects.Items import Item
|
||||
from pubsublogger import publisher
|
||||
from Helper import Process
|
||||
import re
|
||||
|
@ -45,8 +45,8 @@ if __name__ == "__main__":
|
|||
time.sleep(10)
|
||||
continue
|
||||
|
||||
paste = Paste.Paste(filepath)
|
||||
content = paste.get_p_content()
|
||||
item = Item(filepath)
|
||||
content = item.get_content()
|
||||
|
||||
#signal.alarm(max_execution_time)
|
||||
try:
|
||||
|
@ -54,7 +54,7 @@ if __name__ == "__main__":
|
|||
if len(releases) == 0:
|
||||
continue
|
||||
|
||||
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_rel_path)
|
||||
to_print = f'Release;{item.get_source()};{item.get_date()};{item.get_basename()};{len(releases)} releases;{item.get_id()}'
|
||||
print(to_print)
|
||||
if len(releases) > 30:
|
||||
publisher.warning(to_print)
|
||||
|
@ -63,7 +63,7 @@ if __name__ == "__main__":
|
|||
|
||||
except TimeoutException:
|
||||
p.incr_module_timeout_statistic()
|
||||
print ("{0} processing timeout".format(paste.p_rel_path))
|
||||
print(f"{item.get_id()} processing timeout")
|
||||
continue
|
||||
else:
|
||||
signal.alarm(0)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
import time
|
||||
from packages import Paste
|
||||
from lib.objects.Items import Item
|
||||
from pubsublogger import publisher
|
||||
from Helper import Process
|
||||
import re
|
||||
|
@ -15,17 +15,17 @@ if __name__ == "__main__":
|
|||
|
||||
critical = 0 # AS TO BE IMPORTANT, MIGHT BE REMOVED
|
||||
|
||||
#RELEVANTS LANGUAGES
|
||||
shell = "[a-zA-Z0-9]+@[a-zA-Z0-9\-]+\:\~\$"
|
||||
c = "\#include\ \<[a-z\/]+.h\>"
|
||||
php = "\<\?php"
|
||||
python = "import\ [\w]+"
|
||||
bash = "#!\/[\w]*\/bash"
|
||||
javascript = "function\(\)"
|
||||
ruby = "require \ [\w]+"
|
||||
adr = "0x[a-f0-9]{2}"
|
||||
# RELEVANT LANGUAGES
|
||||
shell = r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+\:\~\$"
|
||||
c = r"\#include\ \<[a-z\/]+.h\>"
|
||||
php = r"\<\?php"
|
||||
python = r"import\ [\w]+"
|
||||
bash = r"#!\/[\w]*\/bash"
|
||||
javascript = r"function\(\)"
|
||||
ruby = r"require \ [\w]+"
|
||||
adr = r"0x[a-f0-9]{2}"
|
||||
|
||||
#asm = "\"((?s).{1}x[0-9a-f]{2}){3,}" ISSUES WITH FINDALL, pattern like \x54\xaf\x23\..
|
||||
# asm = r"\"((?s).{1}x[0-9a-f]{2}){3,}" ISSUES WITH FINDALL, pattern like \x54\xaf\x23\..
|
||||
|
||||
languages = [shell, c, php, bash, python, javascript, bash, ruby, adr]
|
||||
regex = '|'.join(languages)
|
||||
|
@ -41,13 +41,13 @@ if __name__ == "__main__":
|
|||
|
||||
filepath, count = message.split()
|
||||
|
||||
paste = Paste.Paste(filepath)
|
||||
content = paste.get_p_content()
|
||||
item = Item(filepath)
|
||||
content = item.get_content()
|
||||
match_set = set(re.findall(regex, content))
|
||||
if len(match_set) == 0:
|
||||
continue
|
||||
|
||||
to_print = 'SourceCode;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message)
|
||||
to_print = f'SourceCode;{item.get_source()};{item.get_date()};{item.get_basename()};{item.get_id()}'
|
||||
|
||||
if len(match_set) > critical:
|
||||
publisher.warning(to_print)
|
||||
|
|
|
@ -1,68 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import ConfigLoader
|
||||
import crawlers
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
config_loader = None
|
||||
|
||||
# # TODO: lauch me in core screen
|
||||
# # TODO: check if already launched in tor screen
|
||||
|
||||
# # TODO: handle mutltiple splash_manager
|
||||
if __name__ == '__main__':
|
||||
|
||||
is_manager_connected = crawlers.ping_splash_manager()
|
||||
if not is_manager_connected:
|
||||
print('Error, Can\'t connect to Splash manager')
|
||||
session_uuid = None
|
||||
else:
|
||||
print('Splash manager connected')
|
||||
session_uuid = crawlers.get_splash_manager_session_uuid()
|
||||
is_manager_connected = crawlers.reload_splash_and_proxies_list()
|
||||
print(is_manager_connected)
|
||||
if is_manager_connected:
|
||||
if crawlers.test_ail_crawlers():
|
||||
crawlers.relaunch_crawlers()
|
||||
last_check = int(time.time())
|
||||
|
||||
while True:
|
||||
|
||||
# # TODO: avoid multiple ping
|
||||
|
||||
# check if manager is connected
|
||||
if int(time.time()) - last_check > 60:
|
||||
is_manager_connected = crawlers.is_splash_manager_connected()
|
||||
current_session_uuid = crawlers.get_splash_manager_session_uuid()
|
||||
# reload proxy and splash list
|
||||
if current_session_uuid and current_session_uuid != session_uuid:
|
||||
is_manager_connected = crawlers.reload_splash_and_proxies_list()
|
||||
if is_manager_connected:
|
||||
print('reload proxies and splash list')
|
||||
if crawlers.test_ail_crawlers():
|
||||
crawlers.relaunch_crawlers()
|
||||
session_uuid = current_session_uuid
|
||||
if not is_manager_connected:
|
||||
print('Error, Can\'t connect to Splash manager')
|
||||
last_check = int(time.time())
|
||||
|
||||
# # TODO: lauch crawlers if was never connected
|
||||
# refresh splash and proxy list
|
||||
elif False:
|
||||
crawlers.reload_splash_and_proxies_list()
|
||||
print('list of splash and proxies refreshed')
|
||||
else:
|
||||
time.sleep(5)
|
||||
|
||||
# kill/launch new crawler / crawler manager check if already launched
|
||||
|
||||
|
||||
# # TODO: handle mutltiple splash_manager
|
||||
# catch reload request
|
331
bin/crawlers/Crawler.py
Executable file
331
bin/crawlers/Crawler.py
Executable file
|
@ -0,0 +1,331 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib import crawlers
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.objects.Domains import Domain
|
||||
from lib.objects import Screenshots
|
||||
|
||||
class Crawler(AbstractModule):
|
||||
|
||||
def __init__(self):
|
||||
super(Crawler, self, ).__init__(logger_channel='Crawler')
|
||||
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
self.r_log_submit = config_loader.get_redis_conn('Redis_Log_submit')
|
||||
|
||||
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
|
||||
self.default_screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
|
||||
self.default_depth_limit = config_loader.get_config_int('Crawler', 'default_depth_limit')
|
||||
|
||||
# TODO: LIMIT MAX NUMBERS OF CRAWLED PAGES
|
||||
|
||||
# update hardcoded blacklist
|
||||
crawlers.load_blacklist()
|
||||
# update captures cache
|
||||
crawlers.reload_crawler_captures()
|
||||
|
||||
# LACUS
|
||||
self.lacus = crawlers.get_lacus()
|
||||
|
||||
# Capture
|
||||
self.har = None
|
||||
self.screenshot = None
|
||||
self.root_item = None
|
||||
self.har_dir = None
|
||||
self.items_dir = None
|
||||
self.domain = None
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info('Crawler initialized')
|
||||
|
||||
def print_crawler_start_info(self, url, domain, domain_url):
|
||||
print()
|
||||
print()
|
||||
print('\033[92m------------------START CRAWLER------------------\033[0m')
|
||||
print(f'crawler type: {domain}')
|
||||
print('\033[92m-------------------------------------------------\033[0m')
|
||||
print(f'url: {url}')
|
||||
print(f'domain: {domain}')
|
||||
print(f'domain_url: {domain_url}')
|
||||
print()
|
||||
|
||||
def get_message(self):
|
||||
# Check if a new Capture can be Launched
|
||||
if crawlers.get_nb_crawler_captures() < crawlers.get_crawler_max_captures():
|
||||
task_row = crawlers.get_crawler_task_from_queue()
|
||||
if task_row:
|
||||
print(task_row)
|
||||
task_uuid, priority = task_row
|
||||
self.enqueue_capture(task_uuid, priority)
|
||||
|
||||
# Check if a Capture is Done
|
||||
capture = crawlers.get_crawler_capture()
|
||||
if capture:
|
||||
print(capture)
|
||||
capture_uuid = capture[0][0]
|
||||
capture_status = self.lacus.get_capture_status(capture_uuid)
|
||||
if capture_status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time
|
||||
crawlers.update_crawler_capture(capture_uuid)
|
||||
print(capture_uuid, capture_status, int(time.time()))
|
||||
else:
|
||||
self.compute(capture_uuid)
|
||||
crawlers.remove_crawler_capture(capture_uuid)
|
||||
print('capture', capture_uuid, 'completed')
|
||||
|
||||
|
||||
time.sleep(self.pending_seconds)
|
||||
|
||||
def enqueue_capture(self, task_uuid, priority):
|
||||
task = crawlers.get_crawler_task(task_uuid)
|
||||
print(task)
|
||||
# task = {
|
||||
# 'uuid': task_uuid,
|
||||
# 'url': 'https://foo.be',
|
||||
# 'domain': 'foo.be',
|
||||
# 'depth': 1,
|
||||
# 'har': True,
|
||||
# 'screenshot': True,
|
||||
# 'user_agent': crawlers.get_default_user_agent(),
|
||||
# 'cookiejar': [],
|
||||
# 'header': '',
|
||||
# 'proxy': 'force_tor',
|
||||
# 'parent': 'manual',
|
||||
# }
|
||||
url = task['url']
|
||||
force = priority != 0
|
||||
|
||||
# TODO unpack cookiejar
|
||||
|
||||
# TODO HEADER
|
||||
|
||||
capture_uuid = self.lacus.enqueue(url=url,
|
||||
depth=task['depth'],
|
||||
user_agent=task['user_agent'],
|
||||
proxy=task['proxy'],
|
||||
cookies=[],
|
||||
force=force,
|
||||
general_timeout_in_sec=90)
|
||||
|
||||
crawlers.add_crawler_capture(task_uuid, capture_uuid)
|
||||
print(task_uuid, capture_uuid, 'launched')
|
||||
return capture_uuid
|
||||
|
||||
# CRAWL DOMAIN
|
||||
# TODO: CATCH ERRORS
|
||||
def compute(self, capture_uuid):
|
||||
|
||||
print('saving capture', capture_uuid)
|
||||
|
||||
task_uuid = crawlers.get_crawler_capture_task_uuid(capture_uuid)
|
||||
task = crawlers.get_crawler_task(task_uuid)
|
||||
|
||||
print(task['domain'])
|
||||
|
||||
self.domain = Domain(task['domain'])
|
||||
|
||||
# TODO CHANGE EPOCH
|
||||
epoch = int(time.time())
|
||||
parent_id = task['parent']
|
||||
print(task)
|
||||
|
||||
entries = self.lacus.get_capture(capture_uuid)
|
||||
print(entries['status'])
|
||||
self.har = task['har']
|
||||
self.screenshot = task['screenshot']
|
||||
str_date = crawlers.get_current_date(separator=True)
|
||||
self.har_dir = crawlers.get_date_har_dir(str_date)
|
||||
self.items_dir = crawlers.get_date_crawled_items_source(str_date)
|
||||
self.root_item = None
|
||||
|
||||
# Save Capture
|
||||
self.save_capture_response(parent_id, entries)
|
||||
|
||||
self.domain.update_daterange(str_date.replace('/', ''))
|
||||
# Origin + History
|
||||
if self.root_item:
|
||||
# domain.add_ports(port)
|
||||
self.domain.set_last_origin(parent_id)
|
||||
self.domain.add_history(epoch, root_item=self.root_item)
|
||||
elif self.domain.was_up():
|
||||
self.domain.add_history(epoch, root_item=epoch)
|
||||
|
||||
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
|
||||
crawlers.clear_crawler_task(task_uuid, self.domain.get_domain_type())
|
||||
|
||||
def save_capture_response(self, parent_id, entries):
|
||||
print(entries.keys())
|
||||
if 'error' in entries:
|
||||
# TODO IMPROVE ERROR MESSAGE
|
||||
self.redis_logger.warning(str(entries['error']))
|
||||
print(entries['error'])
|
||||
if entries.get('html'):
|
||||
print('retrieved content')
|
||||
# print(entries.get('html'))
|
||||
|
||||
# TODO LOGS IF != domain
|
||||
if 'last_redirected_url' in entries and entries['last_redirected_url']:
|
||||
last_url = entries['last_redirected_url']
|
||||
unpacked_last_url = crawlers.unpack_url(last_url)
|
||||
current_domain = unpacked_last_url['domain']
|
||||
# REDIRECTION TODO CHECK IF WEB
|
||||
if current_domain != self.domain.id and not self.root_item:
|
||||
self.redis_logger.warning(f'External redirection {self.domain.id} -> {current_domain}')
|
||||
print(f'External redirection {self.domain.id} -> {current_domain}')
|
||||
if not self.root_item:
|
||||
self.domain = Domain(current_domain)
|
||||
# TODO LAST URL
|
||||
# FIXME
|
||||
else:
|
||||
last_url = f'http://{self.domain.id}'
|
||||
|
||||
if 'html' in entries and entries['html']:
|
||||
item_id = crawlers.create_item_id(self.items_dir, self.domain.id)
|
||||
print(item_id)
|
||||
gzip64encoded = crawlers.get_gzipped_b64_item(item_id, entries['html'])
|
||||
# send item to Global
|
||||
relay_message = f'{item_id} {gzip64encoded}'
|
||||
self.send_message_to_queue(relay_message, 'Mixer')
|
||||
# increase nb of paste by feeder name
|
||||
self.r_log_submit.hincrby('mixer_cache:list_feeder', 'crawler', 1)
|
||||
|
||||
# Tag
|
||||
msg = f'infoleak:submission="crawler";{item_id}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
crawlers.create_item_metadata(item_id, self.domain.id, last_url, parent_id)
|
||||
if self.root_item is None:
|
||||
self.root_item = item_id
|
||||
parent_id = item_id
|
||||
|
||||
# SCREENSHOT
|
||||
if self.screenshot:
|
||||
if 'png' in entries and entries['png']:
|
||||
screenshot = Screenshots.create_screenshot(entries['png'], b64=False)
|
||||
if screenshot:
|
||||
# Create Correlations
|
||||
screenshot.add_correlation('item', '', item_id)
|
||||
screenshot.add_correlation('domain', '', self.domain.id)
|
||||
# HAR
|
||||
if self.har:
|
||||
if 'har' in entries and entries['har']:
|
||||
crawlers.save_har(self.har_dir, item_id, entries['har'])
|
||||
# Next Children
|
||||
entries_children = entries.get('children')
|
||||
if entries_children:
|
||||
for children in entries_children:
|
||||
self.save_capture_response(parent_id, children)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
module = Crawler()
|
||||
module.debug = True
|
||||
# module.compute(('ooooo', 0))
|
||||
module.run()
|
||||
|
||||
|
||||
##################################
|
||||
##################################
|
||||
##################################
|
||||
##################################
|
||||
##################################
|
||||
|
||||
|
||||
# from Helper import Process
|
||||
# from pubsublogger import publisher
|
||||
|
||||
|
||||
# ======== FUNCTIONS ========
|
||||
|
||||
|
||||
# def update_auto_crawler():
|
||||
# current_epoch = int(time.time())
|
||||
# list_to_crawl = redis_crawler.zrangebyscore('crawler_auto_queue', '-inf', current_epoch)
|
||||
# for elem_to_crawl in list_to_crawl:
|
||||
# mess, type = elem_to_crawl.rsplit(';', 1)
|
||||
# redis_crawler.sadd('{}_crawler_priority_queue'.format(type), mess)
|
||||
# redis_crawler.zrem('crawler_auto_queue', elem_to_crawl)
|
||||
|
||||
# Extract info form url (url, domain, domain url, ...)
|
||||
# def unpack_url(url):
|
||||
# to_crawl = {}
|
||||
# faup.decode(url)
|
||||
# url_unpack = faup.get()
|
||||
# to_crawl['domain'] = to_crawl['domain'].lower()
|
||||
# new_url_host = url_host.lower()
|
||||
# url_lower_case = url.replace(url_host, new_url_host, 1)
|
||||
#
|
||||
# if url_unpack['scheme'] is None:
|
||||
# to_crawl['scheme'] = 'http'
|
||||
# url= 'http://{}'.format(url_lower_case)
|
||||
# else:
|
||||
# try:
|
||||
# scheme = url_unpack['scheme'].decode()
|
||||
# except Exception as e:
|
||||
# scheme = url_unpack['scheme']
|
||||
# if scheme in default_proto_map:
|
||||
# to_crawl['scheme'] = scheme
|
||||
# url = url_lower_case
|
||||
# else:
|
||||
# redis_crawler.sadd('new_proto', '{} {}'.format(scheme, url_lower_case))
|
||||
# to_crawl['scheme'] = 'http'
|
||||
# url= 'http://{}'.format(url_lower_case.replace(scheme, '', 1))
|
||||
#
|
||||
# if url_unpack['port'] is None:
|
||||
# to_crawl['port'] = default_proto_map[to_crawl['scheme']]
|
||||
# else:
|
||||
# try:
|
||||
# port = url_unpack['port'].decode()
|
||||
# except:
|
||||
# port = url_unpack['port']
|
||||
# # Verify port number #################### make function to verify/correct port number
|
||||
# try:
|
||||
# int(port)
|
||||
# # Invalid port Number
|
||||
# except Exception as e:
|
||||
# port = default_proto_map[to_crawl['scheme']]
|
||||
# to_crawl['port'] = port
|
||||
#
|
||||
# #if url_unpack['query_string'] is None:
|
||||
# # if to_crawl['port'] == 80:
|
||||
# # to_crawl['url']= '{}://{}'.format(to_crawl['scheme'], url_unpack['host'].decode())
|
||||
# # else:
|
||||
# # to_crawl['url']= '{}://{}:{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'])
|
||||
# #else:
|
||||
# # to_crawl['url']= '{}://{}:{}{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port'], url_unpack['query_string'].decode())
|
||||
#
|
||||
# to_crawl['url'] = url
|
||||
# if to_crawl['port'] == 80:
|
||||
# to_crawl['domain_url'] = '{}://{}'.format(to_crawl['scheme'], new_url_host)
|
||||
# else:
|
||||
# to_crawl['domain_url'] = '{}://{}:{}'.format(to_crawl['scheme'], new_url_host, to_crawl['port'])
|
||||
#
|
||||
# try:
|
||||
# to_crawl['tld'] = url_unpack['tld'].decode()
|
||||
# except:
|
||||
# to_crawl['tld'] = url_unpack['tld']
|
||||
#
|
||||
# return to_crawl
|
||||
|
||||
# ##################################################### add ftp ???
|
||||
# update_auto_crawler()
|
||||
|
||||
# # add next auto Crawling in queue:
|
||||
# if to_crawl['paste'] == 'auto':
|
||||
# redis_crawler.zadd('crawler_auto_queue', int(time.time()+crawler_config['crawler_options']['time']) , '{};{}'.format(to_crawl['original_message'], to_crawl['type_service']))
|
||||
# # update list, last auto crawled domains
|
||||
# redis_crawler.lpush('last_auto_crawled', '{}:{};{}'.format(url_data['domain'], url_data['port'], date['epoch']))
|
||||
# redis_crawler.ltrim('last_auto_crawled', 0, 9)
|
||||
#
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages import Paste
|
||||
from lib.objects.Items import Item
|
||||
from Helper import Process
|
||||
|
||||
import os
|
||||
|
@ -12,11 +12,13 @@ import configparser
|
|||
|
||||
from collections import defaultdict
|
||||
|
||||
# TODO FIX ME OR REMOVE ME
|
||||
|
||||
def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False):
|
||||
dict_keyword = {}
|
||||
|
||||
for paste_cve in list_paste_cve:
|
||||
paste_content = Paste.Paste(paste_cve).get_p_content()
|
||||
paste_content = Item(paste_cve).get_content()
|
||||
|
||||
cve_list = reg_cve.findall(paste_content)
|
||||
if only_one_same_cve_by_paste:
|
||||
|
|
|
@ -35,17 +35,17 @@ class ConfigLoader(object):
|
|||
else:
|
||||
self.cfg.read(default_config_file)
|
||||
|
||||
def get_redis_conn(self, redis_name, decode_responses=True): ## TODO: verify redis name
|
||||
return redis.StrictRedis( host=self.cfg.get(redis_name, "host"),
|
||||
def get_redis_conn(self, redis_name, decode_responses=True):
|
||||
return redis.StrictRedis(host=self.cfg.get(redis_name, "host"),
|
||||
port=self.cfg.getint(redis_name, "port"),
|
||||
db=self.cfg.getint(redis_name, "db"),
|
||||
decode_responses=decode_responses )
|
||||
decode_responses=decode_responses)
|
||||
|
||||
def get_db_conn(self, db_name, decode_responses=True): ## TODO: verify redis name
|
||||
return redis.StrictRedis( host=self.cfg.get(db_name, "host"),
|
||||
def get_db_conn(self, db_name, decode_responses=True):
|
||||
return redis.StrictRedis(host=self.cfg.get(db_name, "host"),
|
||||
port=self.cfg.getint(db_name, "port"),
|
||||
password=self.cfg.get(db_name, "password"),
|
||||
decode_responses=decode_responses )
|
||||
decode_responses=decode_responses)
|
||||
|
||||
def get_files_directory(self, key_name):
|
||||
directory_path = self.cfg.get('Directories', key_name)
|
||||
|
@ -79,3 +79,33 @@ class ConfigLoader(object):
|
|||
return all_keys_values
|
||||
else:
|
||||
return []
|
||||
|
||||
# # # # Directory Config # # # #
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
ITEMS_FOLDER = config_loader.get_config_str("Directories", "pastes")
|
||||
if ITEMS_FOLDER == 'PASTES':
|
||||
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], ITEMS_FOLDER)
|
||||
ITEMS_FOLDER = ITEMS_FOLDER + '/'
|
||||
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
|
||||
|
||||
HARS_DIR = config_loader.get_files_directory('har')
|
||||
if HARS_DIR == 'CRAWLED_SCREENSHOT':
|
||||
HARS_DIR = os.path.join(os.environ['AIL_HOME'], HARS_DIR)
|
||||
|
||||
SCREENSHOTS_FOLDER = config_loader.get_files_directory('screenshot')
|
||||
if SCREENSHOTS_FOLDER == 'CRAWLED_SCREENSHOT/screenshot':
|
||||
SCREENSHOTS_FOLDER = os.path.join(os.environ['AIL_HOME'], SCREENSHOTS_FOLDER)
|
||||
config_loader = None
|
||||
|
||||
def get_hars_dir():
|
||||
return HARS_DIR
|
||||
|
||||
def get_items_dir():
|
||||
return ITEMS_FOLDER
|
||||
|
||||
def get_screenshots_dir():
|
||||
return SCREENSHOTS_FOLDER
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -58,7 +58,6 @@ def get_item_stats_nb_by_date():
|
|||
def _set_item_stats_nb_by_date(date, source):
|
||||
return r_statistics.zrange(f'providers_set_{date}', )
|
||||
|
||||
|
||||
# # TODO: load ZSET IN CACHE => FAST UPDATE
|
||||
def update_item_stats_size_nb(item_id, source, size, date):
|
||||
# Add/Update in Redis
|
||||
|
@ -106,7 +105,7 @@ def update_module_stats(module_name, num, keyword, date):
|
|||
# check if this keyword is eligible for progression
|
||||
keyword_total_sum = 0
|
||||
|
||||
curr_value = r_statistics.hget(date, module+'-'+keyword)
|
||||
curr_value = r_statistics.hget(date, f'{module_name}-{keyword}')
|
||||
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
||||
|
||||
if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY:
|
||||
|
|
|
@ -22,7 +22,7 @@ def get_ail_uuid():
|
|||
|
||||
# # TODO: check change paste => item
|
||||
def get_all_objects():
|
||||
return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username']
|
||||
return ['cve', 'domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username']
|
||||
|
||||
def get_object_all_subtypes(obj_type):
|
||||
if obj_type == 'cryptocurrency':
|
||||
|
|
|
@ -43,12 +43,13 @@ config_loader = None
|
|||
|
||||
CORRELATION_TYPES_BY_OBJ = {
|
||||
"cryptocurrency" : ["domain", "item"],
|
||||
"decoded" : ["domain", "item"],
|
||||
"domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
|
||||
"item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
|
||||
"cve": ["domain", "item"],
|
||||
"decoded": ["domain", "item"],
|
||||
"domain": ["cve", "cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
|
||||
"item": ["cve", "cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
|
||||
"pgp" : ["domain", "item"],
|
||||
"username" : ["domain", "item"],
|
||||
"screenshot" : ["domain", "item"],
|
||||
"username": ["domain", "item"],
|
||||
"screenshot": ["domain", "item"],
|
||||
}
|
||||
|
||||
def get_obj_correl_types(obj_type):
|
||||
|
|
1435
bin/lib/crawlers.py
1435
bin/lib/crawlers.py
File diff suppressed because it is too large
Load diff
|
@ -8,7 +8,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
|||
import ConfigLoader
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
|
||||
r_serv_db = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
config_loader = None
|
||||
|
||||
def get_first_object_date(object_type, subtype, field=''):
|
||||
|
@ -24,15 +24,15 @@ def get_last_object_date(object_type, subtype, field=''):
|
|||
return int(last_date)
|
||||
|
||||
def _set_first_object_date(object_type, subtype, date, field=''):
|
||||
return r_serv_db.zadd('objs:first_date', f'{object_type}:{subtype}:{field}', date)
|
||||
return r_serv_db.zadd('objs:first_date', {f'{object_type}:{subtype}:{field}': date})
|
||||
|
||||
def _set_last_object_date(object_type, subtype, date, field=''):
|
||||
return r_serv_db.zadd('objs:last_date', f'{object_type}:{subtype}:{field}', date)
|
||||
return r_serv_db.zadd('objs:last_date', {f'{object_type}:{subtype}:{field}': float(date)})
|
||||
|
||||
def update_first_object_date(object_type, subtype, date, field=''):
|
||||
first_date = get_first_object_date(object_type, subtype, field=field)
|
||||
if int(date) < first_date:
|
||||
_set_first_object_date(object_typel, subtype, date, field=field)
|
||||
_set_first_object_date(object_type, subtype, date, field=field)
|
||||
return date
|
||||
else:
|
||||
return first_date
|
||||
|
|
|
@ -7,15 +7,15 @@ import gzip
|
|||
|
||||
import magic
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
import Tag
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib import ConfigLoader
|
||||
from lib import Tag
|
||||
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
# get and sanityze PASTE DIRECTORY
|
||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
||||
PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
|
||||
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
config_loader = None
|
||||
|
@ -28,15 +28,15 @@ def exist_item(item_id):
|
|||
return False
|
||||
|
||||
def get_item_filepath(item_id):
|
||||
filename = os.path.join(PASTES_FOLDER, item_id)
|
||||
filename = os.path.join(ConfigLoader.get_items_dir(), item_id)
|
||||
return os.path.realpath(filename)
|
||||
|
||||
def get_item_date(item_id, add_separator=False):
|
||||
l_directory = item_id.split('/')
|
||||
l_dir = item_id.split('/')
|
||||
if add_separator:
|
||||
return '{}/{}/{}'.format(l_directory[-4], l_directory[-3], l_directory[-2])
|
||||
return f'{l_dir[-4]}/{l_dir[-3]}/{l_dir[-2]}'
|
||||
else:
|
||||
return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2])
|
||||
return f'{l_dir[-4]}{l_dir[-3]}{l_dir[-2]}'
|
||||
|
||||
def get_basename(item_id):
|
||||
return os.path.basename(item_id)
|
||||
|
@ -53,17 +53,17 @@ def get_item_domain(item_id):
|
|||
return item_id[19:-36]
|
||||
|
||||
def get_item_content_binary(item_id):
|
||||
item_full_path = os.path.join(PASTES_FOLDER, item_id)
|
||||
item_full_path = os.path.join(ConfigLoader.get_items_dir(), item_id)
|
||||
try:
|
||||
with gzip.open(item_full_path, 'rb') as f:
|
||||
item_content = f.read()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
item_content = ''
|
||||
item_content = b''
|
||||
return item_content
|
||||
|
||||
def get_item_content(item_id):
|
||||
item_full_path = os.path.join(PASTES_FOLDER, item_id)
|
||||
item_full_path = os.path.join(ConfigLoader.get_items_dir(), item_id)
|
||||
try:
|
||||
item_content = r_cache.get(item_full_path)
|
||||
except UnicodeDecodeError:
|
||||
|
@ -84,7 +84,7 @@ def get_item_content(item_id):
|
|||
def get_item_mimetype(item_id):
|
||||
return magic.from_buffer(get_item_content(item_id), mime=True)
|
||||
|
||||
#### TREE CHILD/FATHER ####
|
||||
# # # # TREE CHILD/FATHER # # # #
|
||||
def is_father(item_id):
|
||||
return r_serv_metadata.exists('paste_children:{}'.format(item_id))
|
||||
|
||||
|
@ -127,6 +127,18 @@ def is_domain_root(item_id):
|
|||
def get_item_url(item_id):
|
||||
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link')
|
||||
|
||||
def get_item_har(item_id):
|
||||
har = '/'.join(item_id.rsplit('/')[-4:])
|
||||
har = f'{har}.json'
|
||||
path = os.path.join(ConfigLoader.get_hars_dir(), har)
|
||||
if os.path.isfile(path):
|
||||
return har
|
||||
|
||||
def get_item_har_content(har):
|
||||
with open(har, 'rb') as f:
|
||||
har_content = f.read()
|
||||
return har_content
|
||||
|
||||
def get_nb_children(item_id):
|
||||
return r_serv_metadata.scard('paste_children:{}'.format(item_id))
|
||||
|
||||
|
@ -140,7 +152,7 @@ def get_item_children(item_id):
|
|||
# # TODO: handle domain last origin in domain lib
|
||||
def _delete_node(item_id):
|
||||
# only if item isn't deleted
|
||||
#if is_crawled(item_id):
|
||||
# if is_crawled(item_id):
|
||||
# r_serv_metadata.hrem('paste_metadata:{}'.format(item_id), 'real_link')
|
||||
for children_id in get_item_children(item_id):
|
||||
r_serv_metadata.hdel('paste_metadata:{}'.format(children_id), 'father')
|
||||
|
@ -210,7 +222,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
|
|||
else:
|
||||
for src_name in l_dir:
|
||||
if len(src_name) == 4:
|
||||
#try:
|
||||
# try:
|
||||
int(src_name)
|
||||
to_add = os.path.join(source_name)
|
||||
# filter sources, remove first directory
|
||||
|
@ -218,7 +230,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
|
|||
to_add = to_add.replace('archive/', '').replace('alerts/', '')
|
||||
l_sources_name.add(to_add)
|
||||
return l_sources_name
|
||||
#except:
|
||||
# except:
|
||||
# pass
|
||||
if source_name:
|
||||
src_name = os.path.join(source_name, src_name)
|
||||
|
@ -227,7 +239,7 @@ def _get_dir_source_name(directory, source_name=None, l_sources_name=set(), filt
|
|||
|
||||
|
||||
def get_all_items_sources(filter_dir=False, r_list=False):
|
||||
res = _get_dir_source_name(PASTES_FOLDER, filter_dir=filter_dir)
|
||||
res = _get_dir_source_name(ConfigLoader.get_items_dir(), filter_dir=filter_dir)
|
||||
if res:
|
||||
if r_list:
|
||||
res = list(res)
|
||||
|
|
|
@ -52,9 +52,9 @@ class CryptoCurrency(AbstractSubtypeObject):
|
|||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', object_type=self.type, type_id=self.subtype, correlation_id=self.id)
|
||||
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&type_id={self.subtype}&correlation_id={self.id}'
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
|
||||
return url
|
||||
|
||||
def get_svg_icon(self):
|
||||
|
@ -89,7 +89,11 @@ class CryptoCurrency(AbstractSubtypeObject):
|
|||
return obj
|
||||
|
||||
def get_meta(self, options=set()):
|
||||
return self._get_meta()
|
||||
meta = self._get_meta()
|
||||
meta['id'] = self.id
|
||||
meta['subtype'] = self.subtype
|
||||
meta['tags'] = self.get_tags()
|
||||
return meta
|
||||
|
||||
|
||||
|
||||
|
|
99
bin/lib/objects/Cves.py
Executable file
99
bin/lib/objects/Cves.py
Executable file
|
@ -0,0 +1,99 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from flask import url_for
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.objects.abstract_daterange_object import AbstractDaterangeObject
|
||||
from packages import Date
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||
config_loader = None
|
||||
|
||||
|
||||
################################################################################
|
||||
################################################################################
|
||||
################################################################################
|
||||
|
||||
# # TODO: COMPLETE CLASS
|
||||
|
||||
class Cve(AbstractDaterangeObject):
|
||||
"""
|
||||
AIL Cve Object.
|
||||
"""
|
||||
|
||||
def __init__(self, id):
|
||||
super(Cve, self).__init__('cve', id)
|
||||
|
||||
# def get_ail_2_ail_payload(self):
|
||||
# payload = {'raw': self.get_gzip_content(b64=True),
|
||||
# 'compress': 'gzip'}
|
||||
# return payload
|
||||
|
||||
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||
def delete(self):
|
||||
# # TODO:
|
||||
pass
|
||||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||
return url
|
||||
|
||||
# TODO # CHANGE COLOR
|
||||
def get_svg_icon(self):
|
||||
return {'style': 'fas', 'icon': '\uf188', 'color': '#1E88E5', 'radius': 5}
|
||||
|
||||
# TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO
|
||||
def get_misp_object(self):
|
||||
pass
|
||||
|
||||
def get_meta(self, options=set()):
|
||||
meta = self._get_meta(options=options)
|
||||
meta['id'] = self.id
|
||||
meta['subtype'] = self.subtype
|
||||
meta['tags'] = self.get_tags()
|
||||
return meta
|
||||
|
||||
def add(self, date, item_id):
|
||||
self._add(date, item_id)
|
||||
|
||||
|
||||
# TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO # TODO
|
||||
def get_all_cves():
|
||||
cves = []
|
||||
return cves
|
||||
|
||||
def get_cves_by_date(date):
|
||||
# return r_objects.zrange(f'cve:date:{date}', 0, -1)
|
||||
return set(r_objects.hkeys(f'cve:date:{date}'))
|
||||
|
||||
def get_cves_by_daterange(date_from, date_to):
|
||||
cves = set()
|
||||
for date in Date.substract_date(date_from, date_to):
|
||||
cves | get_cves_by_date(date)
|
||||
return cves
|
||||
|
||||
def get_cves_meta(cves_id, options=set()):
|
||||
dict_cve = {}
|
||||
for cve_id in cves_id:
|
||||
cve = Cve(cve_id)
|
||||
dict_cve[cve_id] = cve.get_meta(options=options)
|
||||
return dict_cve
|
||||
|
||||
def api_get_cves_meta_by_daterange(date_from, date_to):
|
||||
date = Date.sanitise_date_range(date_from, date_to)
|
||||
return get_cves_meta(get_cves_by_daterange(date['date_from'], date['date_to']), options=['sparkline'])
|
||||
|
||||
# if __name__ == '__main__':
|
|
@ -69,9 +69,9 @@ class Decoded(AbstractObject):
|
|||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', object_type="decoded", correlation_id=self.id)
|
||||
url = url_for('correlation.show_correlation', type="decoded", id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&correlation_id={self.id}'
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||
return url
|
||||
|
||||
def get_svg_icon(self):
|
||||
|
@ -90,7 +90,7 @@ class Decoded(AbstractObject):
|
|||
return {'style': 'fas', 'icon': icon, 'color': '#88CCEE', 'radius':5}
|
||||
|
||||
'''
|
||||
Return the estimed type of a given decoded item.
|
||||
Return the estimated type of a given decoded item.
|
||||
|
||||
:param sha1_string: sha1_string
|
||||
'''
|
||||
|
@ -170,8 +170,11 @@ class Decoded(AbstractObject):
|
|||
if date > last_seen:
|
||||
self.set_last_seen(date)
|
||||
|
||||
def get_meta(self):
|
||||
pass
|
||||
def get_meta(self, options=set()):
|
||||
meta = {'id': self.id,
|
||||
'subtype': self.subtype,
|
||||
'tags': self.get_tags()}
|
||||
return meta
|
||||
|
||||
def get_meta_vt(self):
|
||||
meta = {}
|
||||
|
@ -209,7 +212,7 @@ class Decoded(AbstractObject):
|
|||
|
||||
def is_seen_this_day(self, date):
|
||||
for decoder in get_decoders_names():
|
||||
if r_metadata.zscore(f'{decoder_name}_date:{date}', self.id):
|
||||
if r_metadata.zscore(f'{decoder}_date:{date}', self.id):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@ -324,6 +327,9 @@ class Decoded(AbstractObject):
|
|||
#######################################################################################
|
||||
#######################################################################################
|
||||
|
||||
def is_vt_enabled(self):
|
||||
return VT_ENABLED
|
||||
|
||||
def set_vt_report(self, report):
|
||||
r_metadata.hset(f'metadata_hash:{self.id}', 'vt_report', report)
|
||||
|
||||
|
@ -354,7 +360,6 @@ class Decoded(AbstractObject):
|
|||
print(report)
|
||||
return report
|
||||
elif response.status_code == 403:
|
||||
Flask_config.vt_enabled = False
|
||||
return 'Virustotal key is incorrect (e.g. for public API not for virustotal intelligence), authentication failed'
|
||||
elif response.status_code == 204:
|
||||
return 'Rate Limited'
|
||||
|
|
|
@ -4,18 +4,31 @@
|
|||
import os
|
||||
import sys
|
||||
import time
|
||||
import zipfile
|
||||
|
||||
from datetime import datetime
|
||||
from flask import url_for
|
||||
from io import BytesIO
|
||||
from pymisp import MISPObject
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib import ConfigLoader
|
||||
from lib.objects.abstract_object import AbstractObject
|
||||
|
||||
from lib.item_basic import get_item_children, get_item_date, get_item_url
|
||||
from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_har
|
||||
from lib import data_retention_engine
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
r_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||
from packages import Date
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_crawler = config_loader.get_db_conn("Kvrocks_Crawler")
|
||||
|
||||
r_metadata = config_loader.get_redis_conn("ARDB_Metadata") ######################################
|
||||
|
||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||
config_loader = None
|
||||
|
||||
|
||||
|
@ -42,36 +55,37 @@ class Domain(AbstractObject):
|
|||
if str(self.id).endswith('.onion'):
|
||||
return 'onion'
|
||||
else:
|
||||
return 'regular'
|
||||
return 'web'
|
||||
|
||||
def exists(self):
|
||||
return r_onion.exists(f'{self.domain_type}_metadata:{self.id}')
|
||||
return r_crawler.exists(f'domain:meta:{self.id}')
|
||||
|
||||
def get_first_seen(self, r_int=False, separator=True):
|
||||
first_seen = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'first_seen')
|
||||
first_seen = r_crawler.hget(f'domain:meta:{self.id}', 'first_seen')
|
||||
if first_seen:
|
||||
if separator:
|
||||
first_seen = f'{first_seen[0:4]}/{first_seen[4:6]}/{first_seen[6:8]}'
|
||||
elif r_int==True:
|
||||
if r_int:
|
||||
first_seen = int(first_seen)
|
||||
elif separator:
|
||||
first_seen = f'{first_seen[0:4]}/{first_seen[4:6]}/{first_seen[6:8]}'
|
||||
return first_seen
|
||||
|
||||
def get_last_check(self, r_int=False, separator=True):
|
||||
last_check = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'last_check')
|
||||
last_check = r_crawler.hget(f'domain:meta:{self.id}', 'last_check')
|
||||
if last_check is not None:
|
||||
if separator:
|
||||
last_check = f'{last_check[0:4]}/{last_check[4:6]}/{last_check[6:8]}'
|
||||
elif r_format=="int":
|
||||
if r_int:
|
||||
last_check = int(last_check)
|
||||
elif separator:
|
||||
last_check = f'{last_check[0:4]}/{last_check[4:6]}/{last_check[6:8]}'
|
||||
return last_check
|
||||
|
||||
def _set_first_seen(self, date):
|
||||
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', date)
|
||||
r_crawler.hset(f'domain:meta:{self.id}', 'first_seen', date)
|
||||
|
||||
def _set_last_check(self, date):
|
||||
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', date)
|
||||
r_crawler.hset(f'domain:meta:{self.id}', 'last_check', date)
|
||||
|
||||
def update_daterange(self, date):
|
||||
date = int(date)
|
||||
first_seen = self.get_first_seen(r_int=True)
|
||||
last_check = self.get_last_check(r_int=True)
|
||||
if not first_seen:
|
||||
|
@ -82,65 +96,101 @@ class Domain(AbstractObject):
|
|||
elif int(last_check) < date:
|
||||
self._set_last_check(date)
|
||||
|
||||
def get_last_origin(self):
|
||||
return r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'paste_parent')
|
||||
def get_last_origin(self, obj=False):
|
||||
origin = {'item': r_crawler.hget(f'domain:meta:{self.id}', 'last_origin')}
|
||||
if obj and origin['item']:
|
||||
if origin['item'] != 'manual' and origin['item'] != 'auto':
|
||||
item_id = origin['item']
|
||||
origin['domain'] = r_metadata.hget(f'paste_metadata:{item_id}', 'domain')
|
||||
origin['url'] = r_metadata.hget(f'paste_metadata:{item_id}', 'url')
|
||||
return origin
|
||||
|
||||
def set_last_origin(self, origin_id):
|
||||
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'paste_parent', origin_id)
|
||||
r_crawler.hset(f'domain:meta:{self.id}', 'last_origin', origin_id)
|
||||
|
||||
def is_up(self, ports=[]):
|
||||
if not ports:
|
||||
ports = self.get_ports()
|
||||
for port in ports:
|
||||
res = r_onion.zrevrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, 0, withscores=True)
|
||||
def is_up(self):
|
||||
res = r_crawler.zrevrange(f'domain:history:{self.id}', 0, 0, withscores=True)
|
||||
if res:
|
||||
item_core, epoch = res[0]
|
||||
try:
|
||||
epoch = int(item_core)
|
||||
except:
|
||||
print('True')
|
||||
int(item_core)
|
||||
except ValueError:
|
||||
return True
|
||||
print('False')
|
||||
return False
|
||||
|
||||
def was_up(self):
|
||||
return r_onion.hexists(f'{self.domain_type}_metadata:{self.id}', 'ports')
|
||||
return r_crawler.exists(f'domain:history:{self.id}')
|
||||
|
||||
def is_up_by_month(self, date_month):
|
||||
# FIXME DIRTY PATCH
|
||||
if r_crawler.exists(f'month_{self.domain_type}_up:{date_month}'):
|
||||
return r_crawler.sismember(f'month_{self.domain_type}_up:{date_month}', self.get_id())
|
||||
else:
|
||||
return False
|
||||
|
||||
def is_up_this_month(self):
|
||||
date_month = datetime.now().strftime("%Y%m")
|
||||
return self.is_up_by_month(date_month)
|
||||
|
||||
def is_down_by_day(self, date):
|
||||
# FIXME DIRTY PATCH
|
||||
if r_crawler.exists(f'{self.domain_type}_down:{date}'):
|
||||
return r_crawler.sismember(f'{self.domain_type}_down:{date}', self.id)
|
||||
else:
|
||||
return False
|
||||
|
||||
def is_down_today(self):
|
||||
date = datetime.now().strftime("%Y%m%d")
|
||||
return self.is_down_by_day(date)
|
||||
|
||||
def is_up_by_epoch(self, epoch):
|
||||
history = r_crawler.zrevrangebyscore(f'domain:history:{self.id}', int(epoch), int(epoch))
|
||||
if not history:
|
||||
return False
|
||||
else:
|
||||
history = history[0]
|
||||
try:
|
||||
int(history)
|
||||
return False
|
||||
except ValueError:
|
||||
return True
|
||||
|
||||
def get_ports(self, r_set=False):
|
||||
l_ports = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'ports')
|
||||
l_ports = r_crawler.hget(f'domain:meta:{self.id}', 'ports')
|
||||
if l_ports:
|
||||
l_ports = l_ports.split(";")
|
||||
else:
|
||||
l_ports = []
|
||||
if r_set:
|
||||
return set(l_ports)
|
||||
else:
|
||||
return l_ports
|
||||
return []
|
||||
|
||||
def _set_ports(self, ports):
|
||||
ports = ';'.join(ports)
|
||||
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'ports', ports)
|
||||
ports = ';'.join(str(p) for p in ports)
|
||||
r_crawler.hset(f'domain:meta:{self.id}', 'ports', ports)
|
||||
|
||||
def add_ports(self, port):
|
||||
ports = self.get_ports(r_set=True)
|
||||
ports.add(port)
|
||||
self._set_ports(ports)
|
||||
|
||||
def get_history_by_port(self, port, status=False, root=False):
|
||||
'''
|
||||
def get_history(self, status=False, root=False):
|
||||
"""
|
||||
Return .
|
||||
|
||||
:return:
|
||||
:rtype: list of tuple (item_core, epoch)
|
||||
'''
|
||||
history_tuple = r_onion.zrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, -1, withscores=True)
|
||||
"""
|
||||
history_tuple = r_crawler.zrange(f'domain:history:{self.id}', 0, -1, withscores=True)
|
||||
history = []
|
||||
for root_id, epoch in history_tuple:
|
||||
dict_history = {}
|
||||
epoch = int(epoch) # force int
|
||||
dict_history["epoch"] = epoch
|
||||
dict_history["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch_val))
|
||||
dict_history["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(epoch))
|
||||
try:
|
||||
int(root_item)
|
||||
int(root_id)
|
||||
if status:
|
||||
dict_history['status'] = False
|
||||
except ValueError:
|
||||
|
@ -152,30 +202,31 @@ class Domain(AbstractObject):
|
|||
return history
|
||||
|
||||
def get_languages(self):
|
||||
return r_onion.smembers(f'domain:language:{self.id}')
|
||||
return r_crawler.smembers(f'domain:language:{self.id}')
|
||||
|
||||
def get_meta_keys(self):
|
||||
return ['type', 'first_seen', 'last_check', 'last_origin', 'ports', 'status', 'tags', 'languages']
|
||||
|
||||
# options: set of optional meta fields
|
||||
def get_meta(self, options=set()):
|
||||
meta = {}
|
||||
meta['type'] = self.domain_type
|
||||
meta['first_seen'] = self.get_first_seen()
|
||||
meta['last_check'] = self.get_last_check()
|
||||
meta['tags'] = self.get_tags(r_list=True)
|
||||
meta['ports'] = self.get_ports()
|
||||
meta['status'] = self.is_up(ports=meta['ports'])
|
||||
meta = {'type': self.domain_type,
|
||||
'id': self.id,
|
||||
'domain': self.id, # TODO Remove me -> Fix templates
|
||||
'first_seen': self.get_first_seen(),
|
||||
'last_check': self.get_last_check(),
|
||||
'tags': self.get_tags(r_list=True),
|
||||
'status': self.is_up()
|
||||
}
|
||||
# meta['ports'] = self.get_ports()
|
||||
|
||||
if 'last_origin' in options:
|
||||
meta['last_origin'] = self.get_last_origin()
|
||||
#meta['is_tags_safe'] = ##################################
|
||||
meta['last_origin'] = self.get_last_origin(obj=True)
|
||||
# meta['is_tags_safe'] = ##################################
|
||||
if 'languages' in options:
|
||||
meta['languages'] = self.get_languages()
|
||||
#meta['screenshot'] =
|
||||
# meta['screenshot'] =
|
||||
return meta
|
||||
|
||||
|
||||
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||
def delete(self):
|
||||
# # TODO:
|
||||
|
@ -196,12 +247,12 @@ class Domain(AbstractObject):
|
|||
else:
|
||||
style = 'fab'
|
||||
icon = '\uf13b'
|
||||
return {'style': style, 'icon': icon, 'color':color, 'radius':5}
|
||||
return {'style': style, 'icon': icon, 'color': color, 'radius': 5}
|
||||
|
||||
def is_crawled_item(self, item_id):
|
||||
domain_lenght = len(self.id)
|
||||
if len(item_id) > (domain_lenght+48):
|
||||
if item_id[-36-domain_lenght:-36] == self.id:
|
||||
domain_length = len(self.id)
|
||||
if len(item_id) > (domain_length+48):
|
||||
if item_id[-36-domain_length:-36] == self.id:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@ -215,169 +266,231 @@ class Domain(AbstractObject):
|
|||
for item_id in get_item_children(root_id):
|
||||
if self.is_crawled_item(item_id):
|
||||
crawled_items.append(item_id)
|
||||
crawled_items.extend(self.get_crawled_items_children(self.id, item_id))
|
||||
crawled_items.extend(self.get_crawled_items_children(item_id))
|
||||
return crawled_items
|
||||
|
||||
def get_all_urls(self, date=False): ## parameters to add first_seen/last_seen ??????????????????????????????
|
||||
def get_last_item_root(self):
|
||||
root_item = r_crawler.zrevrange(f'domain:history:{self.id}', 0, 0, withscores=True)
|
||||
if not root_item:
|
||||
return None
|
||||
root_item = root_item[0][0]
|
||||
try:
|
||||
int(root_item)
|
||||
return None
|
||||
except ValueError:
|
||||
pass
|
||||
return root_item
|
||||
|
||||
def get_item_root_by_epoch(self, epoch):
|
||||
root_item = r_crawler.zrevrangebyscore(f'domain:history:{self.id}', int(epoch), int(epoch), withscores=True)
|
||||
if not root_item:
|
||||
return None
|
||||
root_item = root_item[0][0]
|
||||
try:
|
||||
int(root_item)
|
||||
return None
|
||||
except ValueError:
|
||||
pass
|
||||
return root_item
|
||||
|
||||
def get_crawled_items_by_epoch(self, epoch=None):
|
||||
if epoch:
|
||||
root_item = self.get_item_root_by_epoch(epoch)
|
||||
else:
|
||||
root_item = self.get_last_item_root()
|
||||
if root_item:
|
||||
return self.get_crawled_items(root_item)
|
||||
|
||||
# TODO FIXME
|
||||
def get_all_urls(self, date=False, epoch=None):
|
||||
if date:
|
||||
urls = {}
|
||||
else:
|
||||
urls = set()
|
||||
for port in self.get_ports():
|
||||
for history in self.get_history_by_port(port, root=True):
|
||||
if history.get('root'):
|
||||
for item_id in self.get_crawled_items(history.get('root')):
|
||||
|
||||
items = self.get_crawled_items_by_epoch(epoch=epoch)
|
||||
if items:
|
||||
for item_id in items:
|
||||
url = get_item_url(item_id)
|
||||
if url:
|
||||
if date:
|
||||
item_date = int(get_item_date(item_id))
|
||||
if url not in urls:
|
||||
urls[url] = {'first_seen': item_date,'last_seen': item_date}
|
||||
urls[url] = {'first_seen': item_date, 'last_seen': item_date}
|
||||
else: # update first_seen / last_seen
|
||||
if item_date < urls[url]['first_seen']:
|
||||
all_url[url]['first_seen'] = item_date
|
||||
urls[url]['first_seen'] = item_date
|
||||
if item_date > urls[url]['last_seen']:
|
||||
all_url[url]['last_seen'] = item_date
|
||||
urls[url]['last_seen'] = item_date
|
||||
else:
|
||||
urls.add(url)
|
||||
return urls
|
||||
|
||||
def get_misp_object(self):
|
||||
def get_misp_object(self, epoch=None):
|
||||
# create domain-ip obj
|
||||
obj_attrs = []
|
||||
obj = MISPObject('domain-crawled', standalone=True)
|
||||
obj.first_seen = self.get_first_seen()
|
||||
obj.last_seen = self.get_last_check()
|
||||
|
||||
obj_attrs.append( obj.add_attribute('domain', value=self.id) )
|
||||
urls = self.get_all_urls(date=True)
|
||||
obj_attrs.append(obj.add_attribute('domain', value=self.id))
|
||||
urls = self.get_all_urls(date=True, epoch=epoch)
|
||||
for url in urls:
|
||||
attribute = obj.add_attribute('url', value=url)
|
||||
attribute.first_seen = str(urls[url]['first_seen'])
|
||||
attribute.last_seen = str(urls[url]['last_seen'])
|
||||
obj_attrs.append( attribute )
|
||||
obj_attrs.append(attribute)
|
||||
for obj_attr in obj_attrs:
|
||||
for tag in self.get_tags():
|
||||
obj_attr.add_tag(tag)
|
||||
return obj
|
||||
|
||||
# TODO ADD MISP Event Export
|
||||
# TODO DOWN DOMAIN
|
||||
def get_download_zip(self, epoch=None):
|
||||
hars_dir = ConfigLoader.get_hars_dir()
|
||||
items_dir = ConfigLoader.get_items_dir()
|
||||
screenshots_dir = ConfigLoader.get_screenshots_dir()
|
||||
items = self.get_crawled_items_by_epoch(epoch=epoch)
|
||||
if not items:
|
||||
return None
|
||||
map_file = 'ITEM ID : URL'
|
||||
# zip buffer
|
||||
zip_buffer = BytesIO()
|
||||
with zipfile.ZipFile(zip_buffer, "a") as zf:
|
||||
for item_id in items:
|
||||
url = get_item_url(item_id)
|
||||
basename = os.path.basename(item_id)
|
||||
# Item
|
||||
_write_in_zip_buffer(zf, os.path.join(items_dir, item_id), f'{basename}.gz')
|
||||
map_file = map_file + f'\n{item_id} : {url}'
|
||||
# HAR
|
||||
har = get_item_har(item_id)
|
||||
if har:
|
||||
print(har)
|
||||
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json')
|
||||
# Screenshot
|
||||
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
|
||||
if screenshot:
|
||||
screenshot = screenshot['screenshot'].pop()[1:]
|
||||
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
|
||||
screenshot[8:10], screenshot[10:12], screenshot[12:])
|
||||
_write_in_zip_buffer(zf, os.path.join(screenshots_dir, f'{screenshot}.png'), f'{basename}.png')
|
||||
|
||||
zf.writestr('_URL_MAP_', BytesIO(map_file.encode()).getvalue())
|
||||
misp_object = self.get_misp_object().to_json().encode()
|
||||
zf.writestr('misp.json', BytesIO(misp_object).getvalue())
|
||||
zip_buffer.seek(0)
|
||||
return zip_buffer
|
||||
|
||||
def add_language(self, language):
|
||||
r_onion.sadd('all_domains_languages', language)
|
||||
r_onion.sadd(f'all_domains_languages:{self.domain_type}', language)
|
||||
r_onion.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
|
||||
r_onion.sadd(f'domain:language:{self.id}', language)
|
||||
|
||||
r_crawler.sadd('all_domains_languages', language)
|
||||
r_crawler.sadd(f'all_domains_languages:{self.domain_type}', language)
|
||||
r_crawler.sadd(f'language:domains:{self.domain_type}:{language}', self.id)
|
||||
r_crawler.sadd(f'domain:language:{self.id}', language)
|
||||
|
||||
############################################################################
|
||||
############################################################################
|
||||
|
||||
|
||||
def create(self, first_seen, last_check, ports, status, tags, languages):
|
||||
def create(self, first_seen, last_check, status, tags, languages):
|
||||
|
||||
|
||||
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', first_seen)
|
||||
r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', last_check)
|
||||
r_crawler.hset(f'domain:meta:{self.id}', 'first_seen', first_seen)
|
||||
r_crawler.hset(f'domain:meta:{self.id}', 'last_check', last_check)
|
||||
|
||||
for language in languages:
|
||||
self.add_language(language)
|
||||
|
||||
#### CRAWLER ####
|
||||
|
||||
# add root_item to history
|
||||
# if domain down -> root_item = epoch
|
||||
def _add_history_root_item(self, root_item, epoch, port):
|
||||
def _add_history_root_item(self, root_item, epoch):
|
||||
# Create/Update crawler history
|
||||
r_onion.zadd(f'crawler_history_{self.domain_type}:{self.id}:{port}', epoch, int(root_item))
|
||||
r_crawler.zadd(f'domain:history:{self.id}', {root_item: epoch})
|
||||
|
||||
# if domain down -> root_item = epoch
|
||||
def add_history(self, epoch, port, root_item=None, date=None):
|
||||
def add_history(self, epoch, root_item=None, date=None):
|
||||
if not date:
|
||||
date = time.strftime('%Y%m%d', time.gmtime(epoch))
|
||||
try:
|
||||
int(root_item)
|
||||
except ValueError:
|
||||
root_item = None
|
||||
root_item = int(root_item)
|
||||
status = False
|
||||
except (ValueError, TypeError):
|
||||
status = True
|
||||
|
||||
data_retention_engine.update_object_date('domain', self.domain_type, date)
|
||||
update_first_object_date(date, self.domain_type)
|
||||
update_last_object_date(date, self.domain_type)
|
||||
# UP
|
||||
if root_item:
|
||||
r_onion.srem(f'full_{self.domain_type}_down', self.id)
|
||||
r_onion.sadd(f'full_{self.domain_type}_up', self.id)
|
||||
r_onion.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day
|
||||
r_onion.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month
|
||||
self._add_history_root_item(root_item, epoch, port)
|
||||
if status:
|
||||
r_crawler.srem(f'full_{self.domain_type}_down', self.id)
|
||||
r_crawler.sadd(f'full_{self.domain_type}_up', self.id)
|
||||
r_crawler.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day
|
||||
r_crawler.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month
|
||||
self._add_history_root_item(root_item, epoch)
|
||||
else:
|
||||
if port:
|
||||
r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) # # TODO: -> store first month
|
||||
self._add_history_root_item(epoch, epoch, port)
|
||||
r_crawler.sadd(f'{self.domain_type}_down:{date}', self.id)
|
||||
if self.was_up():
|
||||
self._add_history_root_item(epoch, epoch)
|
||||
else:
|
||||
r_onion.sadd(f'{self.domain_type}_down:{date}', self.id)
|
||||
if not self.was_up():
|
||||
r_onion.sadd(f'full_{self.domain_type}_down', self.id)
|
||||
r_crawler.sadd(f'full_{self.domain_type}_down', self.id)
|
||||
|
||||
def add_crawled_item(self, url, port, item_id, item_father):
|
||||
# TODO RENAME PASTE_METADATA
|
||||
def add_crawled_item(self, url, item_id, item_father):
|
||||
r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father)
|
||||
r_metadata.hset(f'paste_metadata:{item_id}', 'domain', f'{self.id}:{port}')
|
||||
r_metadata.hset(f'paste_metadata:{item_id}', 'domain', self.id) # FIXME REMOVE ME -> extract for real link ?????????
|
||||
r_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url)
|
||||
# add this item_id to his father
|
||||
r_metadata.sadd(f'paste_children:{item_father}', item_id)
|
||||
|
||||
##-- CRAWLER --##
|
||||
|
||||
############################################################################
|
||||
# In memory zipfile
|
||||
def _write_in_zip_buffer(zf, path, filename):
|
||||
with open(path, "rb") as f:
|
||||
content = f.read()
|
||||
zf.writestr( filename, BytesIO(content).getvalue())
|
||||
|
||||
############################################################################
|
||||
############################################################################
|
||||
############################################################################
|
||||
|
||||
def get_all_domains_types():
|
||||
return ['onion', 'regular'] # i2p
|
||||
return ['onion', 'web'] # i2p
|
||||
|
||||
def get_all_domains_languages():
|
||||
return r_onion.smembers('all_domains_languages')
|
||||
return r_crawler.smembers('all_domains_languages')
|
||||
|
||||
def get_domains_up_by_type(domain_type):
|
||||
return r_onion.smembers(f'full_{domain_type}_up')
|
||||
return r_crawler.smembers(f'full_{domain_type}_up')
|
||||
|
||||
def get_domains_down_by_type(domain_type):
|
||||
return r_onion.smembers(f'full_{domain_type}_down')
|
||||
return r_crawler.smembers(f'full_{domain_type}_down')
|
||||
|
||||
def get_first_object_date(subtype, field=''):
|
||||
first_date = r_onion.zscore('objs:first_date', f'domain:{subtype}:{field}')
|
||||
if not first_date:
|
||||
first_date = 99999999
|
||||
return int(first_date)
|
||||
def get_domains_up_by_date(date, domain_type):
|
||||
return r_crawler.smembers(f'{domain_type}_up:{date}')
|
||||
|
||||
def get_last_object_date(subtype, field=''):
|
||||
last_date = r_onion.zscore('objs:last_date', f'domain:{subtype}:{field}')
|
||||
if not last_date:
|
||||
last_date = 0
|
||||
return int(last_date)
|
||||
def get_domains_down_by_date(date, domain_type):
|
||||
return r_crawler.smembers(f'{domain_type}_down:{date}')
|
||||
|
||||
def _set_first_object_date(date, subtype, field=''):
|
||||
return r_onion.zadd('objs:first_date', f'domain:{subtype}:{field}', date)
|
||||
|
||||
def _set_last_object_date(date, subtype, field=''):
|
||||
return r_onion.zadd('objs:last_date', f'domain:{subtype}:{field}', date)
|
||||
|
||||
def update_first_object_date(date, subtype, field=''):
|
||||
first_date = get_first_object_date(subtype, field=field)
|
||||
if int(date) < first_date:
|
||||
_set_first_object_date(date, subtype, field=field)
|
||||
return date
|
||||
else:
|
||||
return first_date
|
||||
|
||||
def update_last_object_date(date, subtype, field=''):
|
||||
last_date = get_last_object_date(subtype, field=field)
|
||||
if int(date) > last_date:
|
||||
_set_last_object_date(date, subtype, field=field)
|
||||
return date
|
||||
else:
|
||||
return last_date
|
||||
def get_domains_by_daterange(date_from, date_to, domain_type, up=True, down=False):
|
||||
date_domains = {}
|
||||
for date in Date.substract_date(date_from, date_to):
|
||||
domains = []
|
||||
if up:
|
||||
domains.extend(get_domains_up_by_date(date, domain_type))
|
||||
if down:
|
||||
domains.extend(get_domains_down_by_date(date, domain_type))
|
||||
if domains:
|
||||
date_domains[date] = list(domains)
|
||||
return date_domains
|
||||
|
||||
def get_domains_meta(domains):
|
||||
metas = []
|
||||
for domain in domains:
|
||||
dom = Domain(domain)
|
||||
metas.append(dom.get_meta())
|
||||
return metas
|
||||
|
||||
################################################################################
|
||||
################################################################################
|
||||
|
||||
#if __name__ == '__main__':
|
||||
if __name__ == '__main__':
|
||||
dom = Domain('')
|
||||
dom.get_download_zip()
|
||||
|
|
|
@ -3,10 +3,10 @@
|
|||
|
||||
import base64
|
||||
import gzip
|
||||
import magic
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import redis
|
||||
import cld3
|
||||
import html2text
|
||||
|
||||
|
@ -233,8 +233,9 @@ class Item(AbstractObject):
|
|||
return self.id[19:-36]
|
||||
|
||||
def get_screenshot(self):
|
||||
s = r_serv_metadata.hget(f'paste_metadata:{self.id}', 'screenshot')
|
||||
s = self.get_correlation('screenshot')
|
||||
if s:
|
||||
s = s['screenshot'].pop()[1:]
|
||||
return os.path.join(s[0:2], s[2:4], s[4:6], s[6:8], s[8:10], s[10:12], s[12:])
|
||||
|
||||
def get_har(self):
|
||||
|
@ -315,6 +316,11 @@ class Item(AbstractObject):
|
|||
all_languages.append(lang)
|
||||
return all_languages
|
||||
|
||||
def get_mimetype(self, content=None):
|
||||
if not content:
|
||||
content = self.get_content()
|
||||
return magic.from_buffer(content, mime=True)
|
||||
|
||||
############################################################################
|
||||
############################################################################
|
||||
|
||||
|
|
|
@ -41,14 +41,18 @@ class Pgp(AbstractSubtypeObject):
|
|||
pass
|
||||
|
||||
# # TODO:
|
||||
def get_meta(self):
|
||||
return None
|
||||
def get_meta(self, options=set()):
|
||||
meta = self._get_meta()
|
||||
meta['id'] = self.id
|
||||
meta['subtype'] = self.subtype
|
||||
meta['tags'] = self.get_tags()
|
||||
return meta
|
||||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', object_type=self.type, type_id=self.subtype, correlation_id=self.id)
|
||||
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&type_id={self.subtype}&correlation_id={self.id}'
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
|
||||
return url
|
||||
|
||||
def get_svg_icon(self):
|
||||
|
|
|
@ -1,14 +1,18 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import base64
|
||||
import os
|
||||
import sys
|
||||
|
||||
from hashlib import sha256
|
||||
from io import BytesIO
|
||||
from flask import url_for
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
#from lib import Tag
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.objects.abstract_object import AbstractObject
|
||||
|
||||
|
@ -17,14 +21,15 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|||
SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
|
||||
config_loader = None
|
||||
|
||||
|
||||
class Screenshot(AbstractObject):
|
||||
"""
|
||||
AIL Screenshot Object. (strings)
|
||||
"""
|
||||
|
||||
# ID = SHA256
|
||||
def __init__(self, id):
|
||||
super(Screenshot, self).__init__('screenshot', id)
|
||||
def __init__(self, screenshot_id):
|
||||
super(Screenshot, self).__init__('screenshot', screenshot_id)
|
||||
|
||||
# def get_ail_2_ail_payload(self):
|
||||
# payload = {'raw': self.get_gzip_content(b64=True),
|
||||
|
@ -41,13 +46,13 @@ class Screenshot(AbstractObject):
|
|||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', object_type=self.type, correlation_id=self.id)
|
||||
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&correlation_id={self.id}'
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||
return url
|
||||
|
||||
def get_svg_icon(self):
|
||||
return {'style': 'fas', 'icon': '\uf03e', 'color': '#E1F5DF', 'radius':5}
|
||||
return {'style': 'fas', 'icon': '\uf03e', 'color': '#E1F5DF', 'radius': 5}
|
||||
|
||||
def get_rel_path(self, add_extension=False):
|
||||
rel_path = os.path.join(self.id[0:2], self.id[2:4], self.id[4:6], self.id[6:8], self.id[8:10], self.id[10:12], self.id[12:])
|
||||
|
@ -77,12 +82,11 @@ class Screenshot(AbstractObject):
|
|||
return obj
|
||||
|
||||
def get_meta(self, options=set()):
|
||||
meta = {}
|
||||
meta['id'] = self.id
|
||||
meta = {'id': self.id}
|
||||
meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
|
||||
meta['tags'] = self.get_tags(r_list=True)
|
||||
# TODO: ADD IN ABSTRACT CLASS
|
||||
#meta['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags']) ################## # TODO: ADD IN ABSZTRACT CLASS
|
||||
#meta['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags']) ################## # TODO: ADD IN ABSTRACT CLASS
|
||||
return meta
|
||||
|
||||
def get_screenshot_dir():
|
||||
|
@ -106,5 +110,22 @@ def get_all_screenshots():
|
|||
screenshots.append(screenshot_id)
|
||||
return screenshots
|
||||
|
||||
# FIXME STR SIZE LIMIT
|
||||
def create_screenshot(content, size_limit=5000000, b64=True, force=False):
|
||||
size = (len(content)*3) / 4
|
||||
if size <= size_limit or size_limit < 0 or force:
|
||||
if b64:
|
||||
content = base64.standard_b64decode(content.encode())
|
||||
screenshot_id = sha256(content).hexdigest()
|
||||
screenshot = Screenshot(screenshot_id)
|
||||
if not screenshot.exists():
|
||||
filepath = screenshot.get_filepath()
|
||||
dirname = os.path.dirname(filepath)
|
||||
if not os.path.exists(dirname):
|
||||
os.makedirs(dirname)
|
||||
with open(filepath, 'wb') as f:
|
||||
f.write(content)
|
||||
return screenshot
|
||||
return None
|
||||
|
||||
#if __name__ == '__main__':
|
||||
|
|
|
@ -10,12 +10,14 @@ from pymisp import MISPObject
|
|||
|
||||
# sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
config_loader = ConfigLoader()
|
||||
|
||||
config_loader = None
|
||||
|
||||
|
@ -44,9 +46,9 @@ class Username(AbstractSubtypeObject):
|
|||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', object_type=self.type, type_id=self.subtype, correlation_id=self.id)
|
||||
url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&type_id={self.subtype}&correlation_id={self.id}'
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
|
||||
return url
|
||||
|
||||
def get_svg_icon(self):
|
||||
|
@ -61,6 +63,13 @@ class Username(AbstractSubtypeObject):
|
|||
icon = '\uf007'
|
||||
return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius':5}
|
||||
|
||||
def get_meta(self, options=set()):
|
||||
meta = self._get_meta()
|
||||
meta['id'] = self.id
|
||||
meta['subtype'] = self.subtype
|
||||
meta['tags'] = self.get_tags()
|
||||
return meta
|
||||
|
||||
def get_misp_object(self):
|
||||
obj_attrs = []
|
||||
if self.subtype == 'telegram':
|
||||
|
|
139
bin/lib/objects/abstract_daterange_object.py
Executable file
139
bin/lib/objects/abstract_daterange_object.py
Executable file
|
@ -0,0 +1,139 @@
|
|||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
Base Class for AIL Objects
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import os
|
||||
import sys
|
||||
from abc import abstractmethod, ABC
|
||||
|
||||
#from flask import url_for
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.objects.abstract_object import AbstractObject
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.item_basic import is_crawled, get_item_domain
|
||||
|
||||
from packages import Date
|
||||
|
||||
# LOAD CONFIG
|
||||
config_loader = ConfigLoader()
|
||||
# r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
config_loader = None
|
||||
|
||||
class AbstractDaterangeObject(AbstractObject, ABC):
|
||||
"""
|
||||
Abstract Subtype Object
|
||||
"""
|
||||
|
||||
def __init__(self, obj_type, id):
|
||||
""" Abstract for all the AIL object
|
||||
|
||||
:param obj_type: object type (item, ...)
|
||||
:param id: Object ID
|
||||
"""
|
||||
super().__init__(obj_type, id)
|
||||
|
||||
def exists(self):
|
||||
return r_object.exists(f'{self.type}:meta:{self.id}')
|
||||
|
||||
def get_first_seen(self, r_int=False):
|
||||
first_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'first_seen')
|
||||
if r_int:
|
||||
if first_seen:
|
||||
return int(first_seen)
|
||||
else:
|
||||
return 99999999
|
||||
else:
|
||||
return first_seen
|
||||
|
||||
def get_last_seen(self, r_int=False):
|
||||
last_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'last_seen')
|
||||
if r_int:
|
||||
if last_seen:
|
||||
return int(last_seen)
|
||||
else:
|
||||
return 0
|
||||
else:
|
||||
return last_seen
|
||||
|
||||
def get_nb_seen(self):
|
||||
return r_object.hget(f'{self.type}:meta:{self.id}', 'nb')
|
||||
|
||||
def get_nb_seen_by_date(self, date):
|
||||
nb = r_object.hget(f'{self.type}:date:{date}', self.id)
|
||||
if nb is None:
|
||||
return 0
|
||||
else:
|
||||
return int(nb)
|
||||
|
||||
def _get_meta(self, options=[]):
|
||||
meta_dict = {'first_seen': self.get_first_seen(),
|
||||
'last_seen': self.get_last_seen(),
|
||||
'nb_seen': self.get_nb_seen()}
|
||||
if 'sparkline' in options:
|
||||
meta_dict['sparkline'] = self.get_sparkline()
|
||||
return meta_dict
|
||||
|
||||
def set_first_seen(self, first_seen):
|
||||
r_object.hset(f'{self.type}:meta:{self.id}', 'first_seen', first_seen)
|
||||
|
||||
def set_last_seen(self, last_seen):
|
||||
r_object.hset(f'{self.type}:meta:{self.id}', 'last_seen', last_seen)
|
||||
|
||||
def update_daterange(self, date):
|
||||
date = int(date)
|
||||
# obj don't exit
|
||||
if not self.exists():
|
||||
self.set_first_seen(date)
|
||||
self.set_last_seen(date)
|
||||
else:
|
||||
first_seen = self.get_first_seen(r_int=True)
|
||||
last_seen = self.get_last_seen(r_int=True)
|
||||
if date < first_seen:
|
||||
self.set_first_seen(date)
|
||||
if date > last_seen:
|
||||
self.set_last_seen(date)
|
||||
|
||||
def get_sparkline(self):
|
||||
sparkline = []
|
||||
for date in Date.get_previous_date_list(6):
|
||||
sparkline.append(self.get_nb_seen_by_date(date))
|
||||
return sparkline
|
||||
|
||||
def _add(self, date, item_id):
|
||||
if not self.exists():
|
||||
self.set_first_seen(date)
|
||||
self.set_last_seen(date)
|
||||
r_object.sadd(f'{self.type}:all', self.id)
|
||||
else:
|
||||
self.update_daterange(date)
|
||||
|
||||
# NB Object seen by day
|
||||
r_object.hincrby(f'{self.type}:date:{date}', self.id, 1)
|
||||
r_object.zincrby(f'{self.type}:date:{date}', self.id, 1) # # # # # # # # # #
|
||||
# NB Object seen
|
||||
r_object.hincrby(f'{self.type}:meta:{self.id}', 'nb', 1)
|
||||
|
||||
# Correlations
|
||||
self.add_correlation('item', '', item_id)
|
||||
if is_crawled(item_id): # Domain
|
||||
domain = get_item_domain(item_id)
|
||||
self.add_correlation('domain', '', domain)
|
||||
|
||||
# TODO:ADD objects + Stats
|
||||
def _create(self, first_seen, last_seen):
|
||||
self.set_first_seen(first_seen)
|
||||
self.set_last_seen(last_seen)
|
||||
r_object.sadd(f'{self.type}:all', self.id)
|
||||
|
||||
# TODO
|
||||
def _delete(self):
|
||||
pass
|
|
@ -144,7 +144,7 @@ class AbstractObject(ABC):
|
|||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_meta(self):
|
||||
def get_meta(self, options=set()):
|
||||
"""
|
||||
get Object metadata
|
||||
"""
|
||||
|
@ -165,6 +165,18 @@ class AbstractObject(ABC):
|
|||
def get_misp_object(self):
|
||||
pass
|
||||
|
||||
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
|
||||
"""
|
||||
Get object correlation
|
||||
"""
|
||||
return get_correlations(req_type, req_subtype, req_id, filter_types=[obj_type])
|
||||
|
||||
def get_correlation(self, obj_type):
|
||||
"""
|
||||
Get object correlation
|
||||
"""
|
||||
return get_correlations(self.type, self.subtype, self.id, filter_types=[obj_type])
|
||||
|
||||
def get_correlations(self):
|
||||
"""
|
||||
Get object correlations
|
||||
|
|
|
@ -20,6 +20,8 @@ from lib.objects.abstract_object import AbstractObject
|
|||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.item_basic import is_crawled, get_item_domain
|
||||
|
||||
from packages import Date
|
||||
|
||||
# LOAD CONFIG
|
||||
config_loader = ConfigLoader()
|
||||
r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
|
@ -115,6 +117,11 @@ class AbstractSubtypeObject(AbstractObject):
|
|||
if date > last_seen:
|
||||
self.set_last_seen(date)
|
||||
|
||||
def get_sparkline(self):
|
||||
sparkline = []
|
||||
for date in Date.get_previous_date_list(6):
|
||||
sparkline.append(self.get_nb_seen_by_date(date))
|
||||
return sparkline
|
||||
#
|
||||
# HANDLE Others objects ????
|
||||
#
|
||||
|
|
|
@ -12,11 +12,15 @@ from flask import url_for
|
|||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.ail_core import get_all_objects
|
||||
from lib import correlations_engine
|
||||
from lib import btc_ail
|
||||
from lib import Tag
|
||||
|
||||
from lib.objects.CryptoCurrencies import CryptoCurrency
|
||||
from lib.objects.Cves import Cve
|
||||
from lib.objects.Decodeds import Decoded
|
||||
from lib.objects.Domains import Domain
|
||||
from lib.objects.Items import Item
|
||||
|
@ -39,12 +43,11 @@ def is_valid_object_type(obj_type):
|
|||
|
||||
def sanitize_objs_types(objs):
|
||||
l_types = []
|
||||
print('sanitize')
|
||||
print(objs)
|
||||
print(get_all_objects())
|
||||
for obj in objs:
|
||||
if is_valid_object_type(obj):
|
||||
l_types.append(obj)
|
||||
if not l_types:
|
||||
l_types = get_all_objects()
|
||||
return l_types
|
||||
|
||||
def get_object(obj_type, subtype, id):
|
||||
|
@ -54,6 +57,8 @@ def get_object(obj_type, subtype, id):
|
|||
return Domain(id)
|
||||
elif obj_type == 'decoded':
|
||||
return Decoded(id)
|
||||
elif obj_type == 'cve':
|
||||
return Cve(id)
|
||||
elif obj_type == 'screenshot':
|
||||
return Screenshot(id)
|
||||
elif obj_type == 'cryptocurrency':
|
||||
|
@ -63,23 +68,48 @@ def get_object(obj_type, subtype, id):
|
|||
elif obj_type == 'username':
|
||||
return Username(id, subtype)
|
||||
|
||||
def exists_obj(obj_type, subtype, id):
|
||||
object = get_object(obj_type, subtype, id)
|
||||
return object.exists()
|
||||
def exists_obj(obj_type, subtype, obj_id):
|
||||
obj = get_object(obj_type, subtype, obj_id)
|
||||
if obj:
|
||||
return obj.exists()
|
||||
else:
|
||||
return False
|
||||
|
||||
def get_object_link(obj_type, subtype, id, flask_context=False):
|
||||
object = get_object(obj_type, subtype, id)
|
||||
return object.get_link(flask_context=flask_context)
|
||||
obj = get_object(obj_type, subtype, id)
|
||||
return obj.get_link(flask_context=flask_context)
|
||||
|
||||
def get_object_svg(obj_type, subtype, id):
|
||||
object = get_object(obj_type, subtype, id)
|
||||
return object.get_svg_icon()
|
||||
obj = get_object(obj_type, subtype, id)
|
||||
return obj.get_svg_icon()
|
||||
|
||||
def get_object_meta(obj_type, subtype, id, flask_context=False):
|
||||
object = get_object(obj_type, subtype, id)
|
||||
meta = object.get_meta()
|
||||
meta['icon'] = object.get_svg_icon()
|
||||
meta['link'] = object.get_link(flask_context=flask_context)
|
||||
def get_object_meta(obj_type, subtype, id, options=[], flask_context=False):
|
||||
obj = get_object(obj_type, subtype, id)
|
||||
meta = obj.get_meta(options=options)
|
||||
meta['icon'] = obj.get_svg_icon()
|
||||
meta['link'] = obj.get_link(flask_context=flask_context)
|
||||
return meta
|
||||
|
||||
def get_objects_meta(objs, options=[], flask_context=False):
|
||||
metas = []
|
||||
for obj_dict in objs:
|
||||
metas.append(get_object_meta(obj_dict['type'], obj_dict['subtype'], obj_dict['id'], options=options, flask_context=flask_context))
|
||||
return metas
|
||||
|
||||
def get_object_card_meta(obj_type, subtype, id, related_btc=False):
|
||||
obj = get_object(obj_type, subtype, id)
|
||||
meta = obj.get_meta()
|
||||
meta['icon'] = obj.get_svg_icon()
|
||||
if subtype or obj_type == 'cve':
|
||||
meta['sparkline'] = obj.get_sparkline()
|
||||
if subtype == 'bitcoin' and related_btc:
|
||||
meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id)
|
||||
if obj.get_type() == 'decoded':
|
||||
meta["vt"] = obj.get_meta_vt()
|
||||
meta["vt"]["status"] = obj.is_vt_enabled()
|
||||
# TAGS MODAL
|
||||
if obj.get_type() == 'screenshot' or obj.get_type() == 'decoded':
|
||||
meta["add_tags_modal"] = Tag.get_modal_add_tags(obj.id, object_type=obj.get_type())
|
||||
return meta
|
||||
|
||||
def get_ui_obj_tag_table_keys(obj_type):
|
||||
|
@ -203,7 +233,6 @@ def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
|
|||
dict_node['style']['node_radius'] = dict_node['style']['radius']
|
||||
# # TODO: # FIXME: in UI
|
||||
|
||||
dict_node['style']
|
||||
dict_node['text'] = obj_id
|
||||
if node_id == obj_str_id:
|
||||
dict_node["style"]["node_color"] = 'orange'
|
||||
|
|
|
@ -36,17 +36,19 @@ def _regex_findall(redis_key, regex, item_content, r_set):
|
|||
all_items = re.findall(regex, item_content)
|
||||
if r_set:
|
||||
if len(all_items) > 1:
|
||||
r_serv_cache.sadd(redis_key, *all_items)
|
||||
for item in all_items:
|
||||
r_serv_cache.sadd(redis_key, str(item))
|
||||
r_serv_cache.expire(redis_key, 360)
|
||||
elif all_items:
|
||||
r_serv_cache.sadd(redis_key, all_items[0])
|
||||
r_serv_cache.sadd(redis_key, str(all_items[0]))
|
||||
r_serv_cache.expire(redis_key, 360)
|
||||
else:
|
||||
if len(all_items) > 1:
|
||||
r_serv_cache.lpush(redis_key, *all_items)
|
||||
for item in all_items:
|
||||
r_serv_cache.lpush(redis_key, str(item))
|
||||
r_serv_cache.expire(redis_key, 360)
|
||||
elif all_items:
|
||||
r_serv_cache.lpush(redis_key, all_items[0])
|
||||
r_serv_cache.lpush(redis_key, str(all_items[0]))
|
||||
r_serv_cache.expire(redis_key, 360)
|
||||
|
||||
def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time=30, r_set=True):
|
||||
|
|
|
@ -11,16 +11,16 @@ Search for API keys on an item content.
|
|||
|
||||
"""
|
||||
|
||||
import re
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN']))
|
||||
|
||||
# project packages
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib import regex_helper
|
||||
from lib.objects.Items import Item
|
||||
|
||||
class ApiKey(AbstractModule):
|
||||
"""ApiKey module for AIL framework"""
|
||||
|
@ -28,13 +28,11 @@ class ApiKey(AbstractModule):
|
|||
def __init__(self):
|
||||
super(ApiKey, self).__init__()
|
||||
|
||||
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
|
||||
|
||||
# # TODO: ENUM or dict
|
||||
|
||||
# TODO improve REGEX
|
||||
#r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])'
|
||||
#r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])'
|
||||
# r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])'
|
||||
# r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])'
|
||||
self.re_aws_access_key = r'AKIA[0-9A-Z]{16}'
|
||||
self.re_aws_secret_key = r'[0-9a-zA-Z/+]{40}'
|
||||
re.compile(self.re_aws_access_key)
|
||||
|
@ -48,15 +46,14 @@ class ApiKey(AbstractModule):
|
|||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
def compute(self, message, r_result=False):
|
||||
id, score = message.split()
|
||||
item = Item(id)
|
||||
item_id, score = message.split()
|
||||
item = Item(item_id)
|
||||
item_content = item.get_content()
|
||||
|
||||
google_api_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_google_api_key, item.get_id(), item_content)
|
||||
|
||||
aws_access_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_aws_access_key, item.get_id(), item_content)
|
||||
google_api_key = self.regex_findall(self.re_google_api_key, item.get_id(), item_content)
|
||||
aws_access_key = self.regex_findall(self.re_aws_access_key, item.get_id(), item_content)
|
||||
if aws_access_key:
|
||||
aws_secret_key = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.re_aws_secret_key, item.get_id(), item_content)
|
||||
aws_secret_key = self.regex_findall(self.re_aws_secret_key, item.get_id(), item_content)
|
||||
|
||||
if aws_access_key or google_api_key:
|
||||
to_print = f'ApiKey;{item.get_source()};{item.get_date()};{item.get_basename()};'
|
||||
|
@ -68,7 +65,7 @@ class ApiKey(AbstractModule):
|
|||
msg = f'infoleak:automatic-detection="google-api-key";{item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
# # TODO: # FIXME: AWS regex/validate/sanityze KEY + SECRET KEY
|
||||
# # TODO: # FIXME: AWS regex/validate/sanitize KEY + SECRET KEY
|
||||
if aws_access_key:
|
||||
print(f'found AWS key: {to_print}')
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(aws_access_key)} found AWS Key;{item.get_id()}')
|
||||
|
@ -87,7 +84,8 @@ class ApiKey(AbstractModule):
|
|||
self.send_message_to_queue(item.get_id(), 'Duplicate')
|
||||
|
||||
if r_result:
|
||||
return (google_api_key, aws_access_key, aws_secret_key)
|
||||
return google_api_key, aws_access_key, aws_secret_key
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
module = ApiKey()
|
||||
|
|
|
@ -43,7 +43,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
|
||||
|
||||
class Categ(AbstractModule):
|
||||
|
|
|
@ -30,7 +30,6 @@ import os
|
|||
import sys
|
||||
import time
|
||||
import re
|
||||
import redis
|
||||
from datetime import datetime
|
||||
from pyfaup.faup import Faup
|
||||
|
||||
|
@ -39,9 +38,8 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
from lib import ConfigLoader
|
||||
from lib import regex_helper
|
||||
from lib import Statistics
|
||||
|
||||
|
||||
|
@ -60,21 +58,18 @@ class Credential(AbstractModule):
|
|||
REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev'
|
||||
REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping'
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super(Credential, self).__init__()
|
||||
|
||||
self.faup = Faup()
|
||||
|
||||
self.regex_web = "((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
|
||||
self.regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
|
||||
self.regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
|
||||
|
||||
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
|
||||
self.regex_web = r"((?:https?:\/\/)[\.-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
|
||||
self.regex_cred = r"[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
|
||||
self.regex_site_for_stats = r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
|
||||
|
||||
# Database
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
#self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
|
||||
# self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
|
||||
self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")
|
||||
|
||||
# Config values
|
||||
|
@ -83,29 +78,27 @@ class Credential(AbstractModule):
|
|||
|
||||
self.max_execution_time = 30
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 10
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
|
||||
id, count = message.split()
|
||||
item = Item(id)
|
||||
item_id, count = message.split()
|
||||
item = Item(item_id)
|
||||
|
||||
item_content = item.get_content()
|
||||
|
||||
# TODO: USE SETS
|
||||
# Extract all credentials
|
||||
all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time)
|
||||
|
||||
all_credentials = self.regex_findall(self.regex_cred, item.get_id(), item_content)
|
||||
if all_credentials:
|
||||
nb_cred = len(all_credentials)
|
||||
message = f'Checked {nb_cred} credentials found.'
|
||||
|
||||
all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time)
|
||||
all_sites = self.regex_findall(self.regex_web, item.get_id(), item_content)
|
||||
if all_sites:
|
||||
discovered_sites = ', '.join(all_sites)
|
||||
message += f' Related websites: {discovered_sites}'
|
||||
|
@ -114,7 +107,7 @@ class Credential(AbstractModule):
|
|||
|
||||
to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'
|
||||
|
||||
#num of creds above tresh, publish an alert
|
||||
# num of creds above threshold, publish an alert
|
||||
if nb_cred > self.criticalNumberToAlert:
|
||||
print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
|
||||
self.redis_logger.warning(to_print)
|
||||
|
@ -122,11 +115,11 @@ class Credential(AbstractModule):
|
|||
msg = f'infoleak:automatic-detection="credential";{item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False)
|
||||
site_occurrence = self.regex_findall(self.regex_site_for_stats, item.get_id(), item_content)
|
||||
|
||||
creds_sites = {}
|
||||
|
||||
for site in site_occurence:
|
||||
for site in site_occurrence:
|
||||
site_domain = site[1:-1].lower()
|
||||
if site_domain in creds_sites.keys():
|
||||
creds_sites[site_domain] += 1
|
||||
|
@ -136,7 +129,7 @@ class Credential(AbstractModule):
|
|||
for url in all_sites:
|
||||
self.faup.decode(url)
|
||||
domain = self.faup.get()['domain']
|
||||
## TODO: # FIXME: remove me, check faup versionb
|
||||
# # TODO: # FIXME: remove me, check faup versionb
|
||||
try:
|
||||
domain = domain.decode()
|
||||
except:
|
||||
|
@ -159,10 +152,10 @@ class Credential(AbstractModule):
|
|||
date = datetime.now().strftime("%Y%m")
|
||||
nb_tlds = {}
|
||||
for cred in all_credentials:
|
||||
maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
|
||||
maildomains = re.findall(r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
|
||||
self.faup.decode(maildomains)
|
||||
tld = self.faup.get()['tld']
|
||||
## TODO: # FIXME: remove me
|
||||
# # TODO: # FIXME: remove me
|
||||
try:
|
||||
tld = tld.decode()
|
||||
except:
|
||||
|
|
|
@ -17,14 +17,13 @@ It apply credit card regexes on item content and warn if a valid card number is
|
|||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
from packages import lib_refine
|
||||
|
||||
class CreditCards(AbstractModule):
|
||||
|
@ -53,15 +52,14 @@ class CreditCards(AbstractModule):
|
|||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
|
||||
def compute(self, message, r_result=False):
|
||||
id, score = message.split()
|
||||
item = Item(id)
|
||||
item_id, score = message.split()
|
||||
item = Item(item_id)
|
||||
content = item.get_content()
|
||||
all_cards = re.findall(self.regex, content)
|
||||
|
||||
if len(all_cards) > 0:
|
||||
#self.redis_logger.debug(f'All matching {all_cards}')
|
||||
# self.redis_logger.debug(f'All matching {all_cards}')
|
||||
creditcard_set = set([])
|
||||
|
||||
for card in all_cards:
|
||||
|
@ -70,9 +68,9 @@ class CreditCards(AbstractModule):
|
|||
self.redis_logger.debug(f'{clean_card} is valid')
|
||||
creditcard_set.add(clean_card)
|
||||
|
||||
#pprint.pprint(creditcard_set)
|
||||
# pprint.pprint(creditcard_set)
|
||||
to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
|
||||
if (len(creditcard_set) > 0):
|
||||
if len(creditcard_set) > 0:
|
||||
self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
|
||||
|
||||
msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
|
||||
|
@ -83,7 +81,7 @@ class CreditCards(AbstractModule):
|
|||
else:
|
||||
self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}')
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == '__main__':
|
||||
module = CreditCards()
|
||||
module.run()
|
||||
|
|
|
@ -22,6 +22,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.objects import Cves
|
||||
from lib.objects.Items import Item
|
||||
|
||||
|
||||
|
@ -36,13 +37,12 @@ class Cve(AbstractModule):
|
|||
# regex to find CVE
|
||||
self.reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,5}')
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info(f'Module {self.module_name} initialized')
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
|
||||
item_id, count = message.split()
|
||||
|
@ -51,17 +51,23 @@ class Cve(AbstractModule):
|
|||
|
||||
cves = self.regex_findall(self.reg_cve, item_id, item.get_content())
|
||||
if cves:
|
||||
print(cves)
|
||||
date = item.get_date()
|
||||
for cve_id in cves:
|
||||
cve = Cves.Cve(cve_id)
|
||||
cve.add(date, item_id)
|
||||
|
||||
warning = f'{item_id} contains CVEs {cves}'
|
||||
print(warning)
|
||||
self.redis_logger.warning(warning)
|
||||
|
||||
msg = f'infoleak:automatic-detection="cve";{item_id}'
|
||||
# Send to Tags Queue
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Cve()
|
||||
module.run()
|
||||
# module.run()
|
||||
module.compute('crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd 9')
|
||||
|
|
|
@ -65,49 +65,45 @@ class Decoder(AbstractModule):
|
|||
#hexStr = ''.join( hex_string.split(" ") )
|
||||
return bytes(bytearray([int(hexStr[i:i+2], 16) for i in range(0, len(hexStr), 2)]))
|
||||
|
||||
|
||||
# TODO to lambda expr
|
||||
def binary_decoder(self, binary_string):
|
||||
return bytes(bytearray([int(binary_string[i:i+8], 2) for i in range(0, len(binary_string), 8)]))
|
||||
|
||||
|
||||
# TODO to lambda expr
|
||||
def base64_decoder(self, base64_string):
|
||||
return base64.b64decode(base64_string)
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super(Decoder, self).__init__()
|
||||
|
||||
regex_binary = '[0-1]{40,}'
|
||||
#regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}'
|
||||
regex_hex = '[A-Fa-f0-9]{40,}'
|
||||
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
|
||||
regex_binary = r'[0-1]{40,}'
|
||||
# regex_hex = r'(0[xX])?[A-Fa-f0-9]{40,}'
|
||||
regex_hex = r'[A-Fa-f0-9]{40,}'
|
||||
regex_base64 = r'(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
|
||||
|
||||
cmp_regex_binary = re.compile(regex_binary)
|
||||
cmp_regex_hex = re.compile(regex_hex)
|
||||
cmp_regex_base64 = re.compile(regex_base64)
|
||||
|
||||
# map decoder function
|
||||
self.decoder_function = {'binary':self.binary_decoder,'hexadecimal':self.hex_decoder, 'base64':self.base64_decoder}
|
||||
self.decoder_function = {'binary': self.binary_decoder, 'hexadecimal': self.hex_decoder, 'base64': self.base64_decoder}
|
||||
|
||||
# list all decoder with regex,
|
||||
decoder_binary = {'name': 'binary', 'regex': cmp_regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time}
|
||||
decoder_hexadecimal = {'name': 'hexadecimal', 'regex': cmp_regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time}
|
||||
decoder_base64 = {'name': 'base64', 'regex': cmp_regex_base64, 'encoded_min_size': 40, 'max_execution_time': base64_max_execution_time}
|
||||
|
||||
self.decoder_order = [ decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
|
||||
self.decoder_order = [decoder_base64, decoder_binary, decoder_hexadecimal, decoder_base64]
|
||||
|
||||
for decoder in self.decoder_order:
|
||||
serv_metadata.sadd('all_decoder', decoder['name'])
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info(f'Module {self.module_name} initialized')
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
|
||||
item = Item(message)
|
||||
|
@ -128,10 +124,9 @@ class Decoder(AbstractModule):
|
|||
else:
|
||||
signal.alarm(0)
|
||||
|
||||
if(len(encoded_list) > 0):
|
||||
if len(encoded_list) > 0:
|
||||
content = self.decode_string(content, item.id, date, encoded_list, decoder['name'], decoder['encoded_min_size'])
|
||||
|
||||
|
||||
def decode_string(self, content, item_id, date, encoded_list, decoder_name, encoded_min_size):
|
||||
find = False
|
||||
for encoded in encoded_list:
|
||||
|
@ -153,12 +148,12 @@ class Decoder(AbstractModule):
|
|||
|
||||
save_item_relationship(sha1_string, item_id) ################################
|
||||
|
||||
#remove encoded from item content
|
||||
# remove encoded from item content
|
||||
content = content.replace(encoded, '', 1)
|
||||
|
||||
self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}')
|
||||
print(f'{item_id} : {decoder_name} - {mimetype}')
|
||||
if(find):
|
||||
if find:
|
||||
self.redis_logger.info(f'{decoder_name} decoded')
|
||||
print(f'{decoder_name} decoded')
|
||||
|
||||
|
@ -169,6 +164,7 @@ class Decoder(AbstractModule):
|
|||
# perf: remove encoded from item content
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# # TODO: TEST ME
|
||||
|
|
|
@ -15,7 +15,6 @@ the out output of the Global module.
|
|||
##################################
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import DomainClassifier.domainclassifier
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
@ -23,11 +22,8 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import d4
|
||||
import item_basic
|
||||
from lib.objects.Items import Item
|
||||
from lib import d4
|
||||
|
||||
|
||||
class DomClassifier(AbstractModule):
|
||||
|
@ -38,7 +34,7 @@ class DomClassifier(AbstractModule):
|
|||
def __init__(self):
|
||||
super(DomClassifier, self).__init__()
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
addr_dns = self.process.config.get("DomClassifier", "dns")
|
||||
|
@ -51,11 +47,10 @@ class DomClassifier(AbstractModule):
|
|||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
|
||||
def compute(self, message, r_result=False):
|
||||
host, id = message.split()
|
||||
host, item_id = message.split()
|
||||
|
||||
item = Item(id)
|
||||
item = Item(item_id)
|
||||
item_basename = item.get_basename()
|
||||
item_date = item.get_date()
|
||||
item_source = item.get_source()
|
||||
|
@ -64,7 +59,7 @@ class DomClassifier(AbstractModule):
|
|||
self.c.text(rawtext=host)
|
||||
print(self.c.domain)
|
||||
self.c.validdomain(passive_dns=True, extended=False)
|
||||
#self.redis_logger.debug(self.c.vdomain)
|
||||
# self.redis_logger.debug(self.c.vdomain)
|
||||
|
||||
print(self.c.vdomain)
|
||||
print()
|
||||
|
|
|
@ -12,14 +12,12 @@ Its input comes from other modules, namely:
|
|||
Perform comparisions with ssdeep and tlsh
|
||||
|
||||
"""
|
||||
import redis
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
#from datetime import datetime, timedelta
|
||||
# from datetime import datetime, timedelta
|
||||
import datetime
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
@ -51,7 +49,6 @@ class Duplicates(AbstractModule):
|
|||
|
||||
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
# IOError: "CRC Checksum Failed on : {id}"
|
||||
|
||||
|
@ -72,7 +69,7 @@ class Duplicates(AbstractModule):
|
|||
self.algos['ssdeep']['hash'] = Duplicate.get_ssdeep_hash(content)
|
||||
self.algos['tlsh']['hash'] = Duplicate.get_tlsh_hash(content)
|
||||
|
||||
# TODO: Handle coputed duplicates
|
||||
# TODO: Handle computed duplicates
|
||||
|
||||
nb_duplicates = 0
|
||||
|
||||
|
@ -99,7 +96,7 @@ class Duplicates(AbstractModule):
|
|||
|
||||
y = time.time()
|
||||
print(f'{item.get_id()} Processed in {y-x} sec')
|
||||
#self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))
|
||||
# self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -31,7 +31,6 @@ import os
|
|||
import sys
|
||||
import time
|
||||
import datetime
|
||||
import redis
|
||||
|
||||
from hashlib import md5
|
||||
from uuid import uuid4
|
||||
|
@ -57,19 +56,18 @@ class Global(AbstractModule):
|
|||
self.processed_item = 0
|
||||
self.time_last_stats = time.time()
|
||||
|
||||
# Get and sanityze ITEM DIRECTORY
|
||||
# Get and sanitize ITEM DIRECTORY
|
||||
# # TODO: rename PASTE => ITEM
|
||||
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes"))
|
||||
self.PASTES_FOLDERS = self.PASTES_FOLDER + '/'
|
||||
self.PASTES_FOLDERS = os.path.join(os.path.realpath(self.PASTES_FOLDERS), '')
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 0.5
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
|
||||
def computeNone(self):
|
||||
difftime = time.time() - self.time_last_stats
|
||||
if int(difftime) > 30:
|
||||
|
@ -80,7 +78,6 @@ class Global(AbstractModule):
|
|||
self.time_last_stats = time.time()
|
||||
self.processed_item = 0
|
||||
|
||||
|
||||
def compute(self, message, r_result=False):
|
||||
# Recovering the streamed message informations
|
||||
splitted = message.split()
|
||||
|
@ -129,7 +126,8 @@ class Global(AbstractModule):
|
|||
item_id = item_id.replace(self.PASTES_FOLDERS, '', 1)
|
||||
|
||||
self.send_message_to_queue(item_id)
|
||||
self.processed_item+=1
|
||||
self.processed_item += 1
|
||||
print(item_id)
|
||||
if r_result:
|
||||
return item_id
|
||||
|
||||
|
@ -137,7 +135,6 @@ class Global(AbstractModule):
|
|||
self.redis_logger.debug(f"Empty Item: {message} not processed")
|
||||
print(f"Empty Item: {message} not processed")
|
||||
|
||||
|
||||
def check_filename(self, filename, new_file_content):
|
||||
"""
|
||||
Check if file is not a duplicated file
|
||||
|
@ -181,10 +178,8 @@ class Global(AbstractModule):
|
|||
# File not unzipped
|
||||
filename = None
|
||||
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def gunzip_file(self, filename):
|
||||
"""
|
||||
Unzip a file
|
||||
|
@ -224,7 +219,6 @@ class Global(AbstractModule):
|
|||
|
||||
return gunzipped_bytes_obj
|
||||
|
||||
|
||||
def rreplace(self, s, old, new, occurrence):
|
||||
li = s.rsplit(old, occurrence)
|
||||
return new.join(li)
|
||||
|
|
|
@ -17,7 +17,6 @@ It is looking for Hosts
|
|||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
|
@ -25,9 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib import regex_helper
|
||||
#from lib.objects.Items import Item
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
|
||||
class Hosts(AbstractModule):
|
||||
"""
|
||||
|
@ -40,12 +37,10 @@ class Hosts(AbstractModule):
|
|||
config_loader = ConfigLoader()
|
||||
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
|
||||
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
|
||||
|
||||
# regex timeout
|
||||
self.regex_timeout = 30
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b'
|
||||
|
@ -53,7 +48,6 @@ class Hosts(AbstractModule):
|
|||
|
||||
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
item = Item(message)
|
||||
|
||||
|
@ -61,18 +55,16 @@ class Hosts(AbstractModule):
|
|||
# if mimetype.split('/')[0] == "text":
|
||||
|
||||
content = item.get_content()
|
||||
|
||||
hosts = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.host_regex, item.get_id(), content)
|
||||
hosts = self.regex_findall(self.host_regex, item.get_id(), content)
|
||||
if hosts:
|
||||
print(f'{len(hosts)} host {item.get_id()}')
|
||||
for host in hosts:
|
||||
#print(host)
|
||||
# print(host)
|
||||
|
||||
msg = f'{host} {item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Host')
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Hosts()
|
||||
|
|
|
@ -44,7 +44,7 @@ class Iban(AbstractModule):
|
|||
self.pending_seconds = 10
|
||||
|
||||
self.regex_timeout = 30
|
||||
#iban_regex = re.compile(r'\b[A-Za-z]{2}[0-9]{2}(?:[ ]?[0-9]{4}){4}(?:[ ]?[0-9]{1,2})?\b')
|
||||
# iban_regex = re.compile(r'\b[A-Za-z]{2}[0-9]{2}(?:[ ]?[0-9]{4}){4}(?:[ ]?[0-9]{1,2})?\b')
|
||||
self.iban_regex = re.compile(r'\b([A-Za-z]{2}[ \-]?[0-9]{2})(?=(?:[ \-]?[A-Za-z0-9]){9,30})((?:[ \-]?[A-Za-z0-9]{3,5}){2,6})([ \-]?[A-Za-z0-9]{1,3})\b')
|
||||
self.iban_regex_verify = re.compile(r'^([A-Z]{2})([0-9]{2})([A-Z0-9]{9,30})$')
|
||||
|
||||
|
@ -90,6 +90,7 @@ class Iban(AbstractModule):
|
|||
msg = f'infoleak:automatic-detection="iban";{item_id}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Iban()
|
||||
|
|
|
@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
|
||||
|
||||
class Indexer(AbstractModule):
|
||||
|
@ -57,9 +57,7 @@ class Indexer(AbstractModule):
|
|||
self.ix = None
|
||||
|
||||
if self.indexertype == "whoosh":
|
||||
self.schema = Schema(title=TEXT(stored=True), path=ID(stored=True,
|
||||
unique=True),
|
||||
content=TEXT)
|
||||
self.schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT)
|
||||
if not os.path.exists(self.baseindexpath):
|
||||
os.mkdir(self.baseindexpath)
|
||||
|
||||
|
@ -96,7 +94,6 @@ class Indexer(AbstractModule):
|
|||
|
||||
self.last_refresh = time_now
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
docpath = message.split(" ", -1)[-1]
|
||||
|
||||
|
@ -109,7 +106,7 @@ class Indexer(AbstractModule):
|
|||
|
||||
try:
|
||||
# Avoid calculating the index's size at each message
|
||||
if(time.time() - self.last_refresh > self.TIME_WAIT):
|
||||
if time.time() - self.last_refresh > self.TIME_WAIT:
|
||||
self.last_refresh = time.time()
|
||||
if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000):
|
||||
timestamp = int(time.time())
|
||||
|
@ -145,10 +142,8 @@ class Indexer(AbstractModule):
|
|||
cur_sum = 0
|
||||
for root, dirs, files in os.walk(the_index_name):
|
||||
cur_sum += sum(getsize(join(root, name)) for name in files)
|
||||
|
||||
return cur_sum
|
||||
|
||||
|
||||
def move_index_into_old_index_folder(self):
|
||||
for cur_file in os.listdir(self.baseindexpath):
|
||||
if not cur_file == "old_index":
|
||||
|
|
|
@ -17,7 +17,6 @@ RSA private key, certificate messages
|
|||
##################################
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from enum import Enum
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
@ -25,7 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
|
||||
|
||||
class KeyEnum(Enum):
|
||||
|
@ -53,10 +52,9 @@ class Keys(AbstractModule):
|
|||
def __init__(self):
|
||||
super(Keys, self).__init__()
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
item = Item(message)
|
||||
content = item.get_content()
|
||||
|
@ -169,11 +167,12 @@ class Keys(AbstractModule):
|
|||
if get_pgp_content:
|
||||
self.send_message_to_queue(item.get_id(), 'PgpDump')
|
||||
|
||||
if find :
|
||||
#Send to duplicate
|
||||
self.send_message_to_queue(item.get_id(), 'Duplicate')
|
||||
self.redis_logger.debug(f'{item.get_id()} has key(s)')
|
||||
print(f'{item.get_id()} has key(s)')
|
||||
# if find :
|
||||
# # Send to duplicate
|
||||
# self.send_message_to_queue(item.get_id(), 'Duplicate')
|
||||
# self.redis_logger.debug(f'{item.get_id()} has key(s)')
|
||||
# print(f'{item.get_id()} has key(s)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
from modules.abstract_module import AbstractModule
|
||||
from lib.objects.Domains import Domain
|
||||
from lib.objects.Items import Item
|
||||
#from lib.ConfigLoader import ConfigLoader
|
||||
# from lib.ConfigLoader import ConfigLoader
|
||||
|
||||
class Languages(AbstractModule):
|
||||
"""
|
||||
|
@ -31,6 +31,7 @@ class Languages(AbstractModule):
|
|||
for lang in item.get_languages(min_probability=0.8):
|
||||
domain.add_language(lang.language)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
module = Languages()
|
||||
module.run()
|
||||
|
|
|
@ -13,12 +13,12 @@ It tries to identify SQL Injections with libinjection.
|
|||
|
||||
import os
|
||||
import sys
|
||||
import redis
|
||||
import urllib.request
|
||||
import pylibinjection
|
||||
|
||||
from datetime import datetime
|
||||
from pyfaup.faup import Faup
|
||||
from urllib.parse import unquote
|
||||
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
@ -27,7 +27,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
|
||||
class LibInjection(AbstractModule):
|
||||
"""docstring for LibInjection module."""
|
||||
|
@ -43,38 +43,38 @@ class LibInjection(AbstractModule):
|
|||
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
def compute(self, message):
|
||||
url, id = message.split()
|
||||
url, item_id = message.split()
|
||||
|
||||
self.faup.decode(url)
|
||||
url_parsed = self.faup.get()
|
||||
## TODO: # FIXME: remove me
|
||||
# # TODO: # FIXME: remove me
|
||||
try:
|
||||
resource_path = url_parsed['resource_path'].encode()
|
||||
except:
|
||||
resource_path = url_parsed['resource_path']
|
||||
|
||||
## TODO: # FIXME: remove me
|
||||
# # TODO: # FIXME: remove me
|
||||
try:
|
||||
query_string = url_parsed['query_string'].encode()
|
||||
except:
|
||||
query_string = url_parsed['query_string']
|
||||
|
||||
result_path = {'sqli' : False}
|
||||
result_query = {'sqli' : False}
|
||||
result_path = {'sqli': False}
|
||||
result_query = {'sqli': False}
|
||||
|
||||
if resource_path is not None:
|
||||
result_path = pylibinjection.detect_sqli(resource_path)
|
||||
#print(f'path is sqli : {result_path}')
|
||||
# print(f'path is sqli : {result_path}')
|
||||
|
||||
if query_string is not None:
|
||||
result_query = pylibinjection.detect_sqli(query_string)
|
||||
#print(f'query is sqli : {result_query}')
|
||||
# print(f'query is sqli : {result_query}')
|
||||
|
||||
if result_path['sqli'] is True or result_query['sqli'] is True:
|
||||
item = Item(id)
|
||||
item = Item(item_id)
|
||||
item_id = item.get_id()
|
||||
print(f"Detected (libinjection) SQL in URL: {item_id}")
|
||||
print(urllib.request.unquote(url))
|
||||
print(unquote(url))
|
||||
|
||||
to_print = f'LibInjection;{item.get_source()};{item.get_date()};{item.get_basename()};Detected SQL in URL;{item_id}'
|
||||
self.redis_logger.warning(to_print)
|
||||
|
@ -86,8 +86,8 @@ class LibInjection(AbstractModule):
|
|||
msg = f'infoleak:automatic-detection="sql-injection";{item_id}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
#statistics
|
||||
## TODO: # FIXME: remove me
|
||||
# statistics
|
||||
# # TODO: # FIXME: remove me
|
||||
try:
|
||||
tld = url_parsed['tld'].decode()
|
||||
except:
|
||||
|
@ -96,7 +96,7 @@ class LibInjection(AbstractModule):
|
|||
date = datetime.now().strftime("%Y%m")
|
||||
self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if __name__ == "__main__":
|
||||
module = LibInjection()
|
||||
module.run()
|
||||
|
|
|
@ -13,9 +13,7 @@ It apply mail regexes on item content and warn if above a threshold.
|
|||
|
||||
import os
|
||||
import re
|
||||
import redis
|
||||
import sys
|
||||
import time
|
||||
import datetime
|
||||
|
||||
import dns.resolver
|
||||
|
@ -52,7 +50,7 @@ class Mail(AbstractModule):
|
|||
self.mail_threshold = 10
|
||||
|
||||
self.regex_timeout = 30
|
||||
self.email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
||||
self.email_regex = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
||||
re.compile(self.email_regex)
|
||||
|
||||
def is_mxdomain_in_cache(self, mxdomain):
|
||||
|
@ -64,8 +62,8 @@ class Mail(AbstractModule):
|
|||
def check_mx_record(self, set_mxdomains):
|
||||
"""Check if emails MX domains are responding.
|
||||
|
||||
:param adress_set: -- (set) This is a set of emails domains
|
||||
:return: (int) Number of adress with a responding and valid MX domains
|
||||
:param set_mxdomains: -- (set) This is a set of emails domains
|
||||
:return: (int) Number of address with a responding and valid MX domains
|
||||
|
||||
"""
|
||||
resolver = dns.resolver.Resolver()
|
||||
|
@ -107,7 +105,7 @@ class Mail(AbstractModule):
|
|||
self.redis_logger.debug('SyntaxError: EmptyLabel')
|
||||
print('SyntaxError: EmptyLabel')
|
||||
except dns.resolver.NXDOMAIN:
|
||||
#save_mxdomain_in_cache(mxdomain)
|
||||
# save_mxdomain_in_cache(mxdomain)
|
||||
self.redis_logger.debug('The query name does not exist.')
|
||||
print('The query name does not exist.')
|
||||
except dns.name.LabelTooLong:
|
||||
|
@ -115,12 +113,12 @@ class Mail(AbstractModule):
|
|||
print('The Label is too long')
|
||||
except dns.exception.Timeout:
|
||||
print('dns timeout')
|
||||
#save_mxdomain_in_cache(mxdomain)
|
||||
# save_mxdomain_in_cache(mxdomain)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return valid_mxdomain
|
||||
|
||||
# # TODO: sanityze mails
|
||||
# # TODO: sanitize mails
|
||||
def compute(self, message):
|
||||
item_id, score = message.split()
|
||||
item = Item(item_id)
|
||||
|
@ -134,7 +132,7 @@ class Mail(AbstractModule):
|
|||
mxdomains_email[mxdomain] = set()
|
||||
mxdomains_email[mxdomain].add(mail)
|
||||
|
||||
## TODO: add MAIL trackers
|
||||
# # TODO: add MAIL trackers
|
||||
|
||||
valid_mx = self.check_mx_record(mxdomains_email.keys())
|
||||
print(f'valid_mx: {valid_mx}')
|
||||
|
@ -144,7 +142,7 @@ class Mail(AbstractModule):
|
|||
nb_mails = len(mxdomains_email[domain_mx])
|
||||
num_valid_email += nb_mails
|
||||
|
||||
# Create doamin_mail stats
|
||||
# Create domain_mail stats
|
||||
msg = f'mail;{nb_mails};{domain_mx};{item_date}'
|
||||
self.send_message_to_queue(msg, 'ModuleStats')
|
||||
|
||||
|
@ -159,8 +157,8 @@ class Mail(AbstractModule):
|
|||
for tld in mx_tlds:
|
||||
Statistics.add_module_tld_stats_by_date('mail', item_date, tld, mx_tlds[tld])
|
||||
|
||||
if num_valid_email > self.mail_threshold:
|
||||
msg = f'Mails;{item.get_source()};{item_date};{item.get_basename()};Checked {num_valid_email} e-mail(s);{item_id}'
|
||||
if num_valid_email > self.mail_threshold:
|
||||
print(f'{item_id} Checked {num_valid_email} e-mail(s)')
|
||||
self.redis_logger.warning(msg)
|
||||
# Tags
|
||||
|
@ -170,8 +168,6 @@ class Mail(AbstractModule):
|
|||
self.redis_logger.info(msg)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
module = Mail()
|
||||
#module.compute('tests/2021/01/01/mails.gz 50')
|
||||
module.run()
|
||||
|
|
|
@ -25,12 +25,11 @@ class ModuleStats(AbstractModule):
|
|||
Module Statistics module for AIL framework
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
|
||||
super(ModuleStats, self).__init__()
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 20
|
||||
|
||||
def compute(self, message):
|
||||
|
@ -38,9 +37,10 @@ class ModuleStats(AbstractModule):
|
|||
# MODULE STATS
|
||||
if len(message.split(';')) > 1:
|
||||
module_name, num, keyword, date = message.split(';')
|
||||
Statisticsupdate_module_stats(module_name, num, keyword, date)
|
||||
Statistics.update_module_stats(module_name, num, keyword, date)
|
||||
# ITEM STATS
|
||||
else:
|
||||
item_id = message
|
||||
item = Item(item_id)
|
||||
source = item.get_source()
|
||||
date = item.get_date()
|
||||
|
|
|
@ -13,8 +13,6 @@ Requirements
|
|||
*Need running Redis instances. (Redis)
|
||||
|
||||
"""
|
||||
import time
|
||||
import datetime
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
@ -25,68 +23,8 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.objects.Items import Item
|
||||
from lib import crawlers
|
||||
from lib import regex_helper
|
||||
from packages.Item import Item
|
||||
|
||||
## Manually fetch first page if crawler is disabled
|
||||
# import base64
|
||||
# import subprocess
|
||||
#
|
||||
# torclient_host = '127.0.0.1'
|
||||
# torclient_port = 9050
|
||||
#
|
||||
# def fetch(p, r_cache, urls, domains):
|
||||
# now = datetime.datetime.now()
|
||||
# path = os.path.join('onions', str(now.year).zfill(4),
|
||||
# str(now.month).zfill(2),
|
||||
# str(now.day).zfill(2),
|
||||
# str(int(time.mktime(now.utctimetuple()))))
|
||||
# failed = []
|
||||
# downloaded = []
|
||||
# print('{} Urls to fetch'.format(len(urls)))
|
||||
# for url, domain in zip(urls, domains):
|
||||
# if r_cache.exists(url) or url in failed:
|
||||
# continue
|
||||
# to_fetch = base64.standard_b64encode(url.encode('utf8'))
|
||||
# print('fetching url: {}'.format(to_fetch))
|
||||
# process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch],
|
||||
# stdout=subprocess.PIPE)
|
||||
# while process.poll() is None:
|
||||
# time.sleep(1)
|
||||
#
|
||||
# if process.returncode == 0:
|
||||
# r_cache.setbit(url, 0, 1)
|
||||
# r_cache.expire(url, 360000)
|
||||
# downloaded.append(url)
|
||||
# print('downloaded : {}'.format(downloaded))
|
||||
# '''tempfile = process.stdout.read().strip()
|
||||
# tempfile = tempfile.decode('utf8')
|
||||
# #with open(tempfile, 'r') as f:
|
||||
# filename = path + domain + '.gz'
|
||||
# fetched = f.read()
|
||||
# content = base64.standard_b64decode(fetched)
|
||||
# save_path = os.path.join(os.environ['AIL_HOME'],
|
||||
# p.config.get("Directories", "pastes"),
|
||||
# filename)
|
||||
# dirname = os.path.dirname(save_path)
|
||||
# if not os.path.exists(dirname):
|
||||
# os.makedirs(dirname)
|
||||
# with open(save_path, 'w') as ff:
|
||||
# ff.write(content)
|
||||
# p.populate_set_out(save_path, 'Global')
|
||||
# p.populate_set_out(url, 'ValidOnion')
|
||||
# p.populate_set_out(fetched, 'FetchedOnion')'''
|
||||
# yield url
|
||||
# #os.unlink(tempfile)
|
||||
# else:
|
||||
# r_cache.setbit(url, 0, 0)
|
||||
# r_cache.expire(url, 3600)
|
||||
# failed.append(url)
|
||||
# print('Failed at downloading', url)
|
||||
# print(process.stdout.read())
|
||||
# print('Failed:', len(failed), 'Downloaded:', len(downloaded))
|
||||
|
||||
|
||||
class Onion(AbstractModule):
|
||||
"""docstring for Onion module."""
|
||||
|
@ -103,68 +41,63 @@ class Onion(AbstractModule):
|
|||
self.regex_timeout = 30
|
||||
|
||||
self.faup = crawlers.get_faup()
|
||||
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
|
||||
|
||||
# activate_crawler = p.config.get("Crawler", "activate_crawler")
|
||||
|
||||
|
||||
self.url_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
self.i2p_regex = "((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
re.compile(self.url_regex)
|
||||
re.compile(self.i2p_regex)
|
||||
self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
# self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
re.compile(self.onion_regex)
|
||||
# re.compile(self.i2p_regex)
|
||||
|
||||
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
# TEMP var: SAVE I2P Domain (future I2P crawler)
|
||||
self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
|
||||
# self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
|
||||
|
||||
def compute(self, message):
|
||||
# list of tuples: (url, subdomains, domain)
|
||||
urls_to_crawl = []
|
||||
onion_urls = []
|
||||
domains = []
|
||||
|
||||
id, score = message.split()
|
||||
item = Item(id)
|
||||
item_id, score = message.split()
|
||||
item = Item(item_id)
|
||||
item_content = item.get_content()
|
||||
|
||||
# max execution time on regex
|
||||
res = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content)
|
||||
res = self.regex_findall(self.onion_regex, item.get_id(), item_content)
|
||||
for x in res:
|
||||
# String to tuple
|
||||
x = x[2:-2].replace(" '", "").split("',")
|
||||
url = x[0]
|
||||
subdomain = x[4].lower()
|
||||
self.faup.decode(url)
|
||||
url_unpack = self.faup.get()
|
||||
try: ## TODO: # FIXME: check faup version
|
||||
domain = url_unpack['domain'].decode().lower()
|
||||
except Exception as e:
|
||||
domain = url_unpack['domain'].lower()
|
||||
print(url)
|
||||
|
||||
# TODO Crawl subdomain
|
||||
url_unpack = crawlers.unpack_url(url)
|
||||
domain = url_unpack['domain']
|
||||
if crawlers.is_valid_onion_domain(domain):
|
||||
urls_to_crawl.append((url, subdomain, domain))
|
||||
domains.append(domain)
|
||||
onion_urls.append(url)
|
||||
|
||||
if onion_urls:
|
||||
if crawlers.is_crawler_activated():
|
||||
for domain in domains:# TODO LOAD DEFAULT SCREENSHOT + HAR
|
||||
task_uuid = crawlers.add_crawler_task(domain, parent=item.get_id())
|
||||
if task_uuid:
|
||||
print(f'{domain} added to crawler queue: {task_uuid}')
|
||||
else:
|
||||
to_print = f'Onion;{item.get_source()};{item.get_date()};{item.get_basename()};'
|
||||
if not urls_to_crawl:
|
||||
self.redis_logger.info(f'{to_print}Onion related;{item.get_id()}')
|
||||
return
|
||||
print(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
|
||||
self.redis_logger.warning(f'{to_print}Detected {len(domains)} .onion(s);{item.get_id()}')
|
||||
|
||||
# TAG Item
|
||||
msg = f'infoleak:automatic-detection="onion";{item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
if crawlers.is_crawler_activated():
|
||||
for to_crawl in urls_to_crawl:
|
||||
print(f'{to_crawl[2]} added to crawler queue: {to_crawl[0]}')
|
||||
crawlers.add_item_to_discovery_queue('onion', to_crawl[2], to_crawl[1], to_crawl[0], item.get_id())
|
||||
else:
|
||||
print(f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}')
|
||||
self.redis_logger.warning(f'{to_print}Detected {len(urls_to_crawl)} .onion(s);{item.get_id()}')
|
||||
# keep manual fetcher ????
|
||||
## Manually fetch first page if crawler is disabled
|
||||
# for url in fetch(p, r_cache, urls, domains_list):
|
||||
# publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_rel_path))
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
module = Onion()
|
||||
# module.compute('submitted/2022/10/10/submitted_705d1d92-7e9a-4a44-8c21-ccd167bfb7db.gz 9')
|
||||
module.run()
|
||||
|
||||
|
||||
# 5ajw6aqf3ep7sijnscdzw77t7xq4xjpsy335yb2wiwgouo7yfxtjlmid.onion to debian.org
|
|
@ -17,7 +17,6 @@ It apply phone number regexes on item content and warn if above a threshold.
|
|||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import phonenumbers
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
@ -25,7 +24,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
|
||||
# # TODO: # FIXME: improve regex / filter false positives
|
||||
class Phone(AbstractModule):
|
||||
|
@ -37,14 +36,12 @@ class Phone(AbstractModule):
|
|||
# reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
||||
REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super(Phone, self).__init__()
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
item = Item(message)
|
||||
content = item.get_content()
|
||||
|
@ -79,6 +76,5 @@ class Phone(AbstractModule):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Phone()
|
||||
module.run()
|
||||
|
|
|
@ -14,11 +14,11 @@ It test different possibility to makes some sqlInjection.
|
|||
import os
|
||||
import sys
|
||||
import re
|
||||
import redis
|
||||
import urllib.request
|
||||
|
||||
from datetime import datetime
|
||||
from pyfaup.faup import Faup
|
||||
from urllib.parse import unquote
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
|
@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
|
||||
class SQLInjectionDetection(AbstractModule):
|
||||
"""docstring for SQLInjectionDetection module."""
|
||||
|
@ -46,13 +46,13 @@ class SQLInjectionDetection(AbstractModule):
|
|||
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
def compute(self, message):
|
||||
url, id = message.split()
|
||||
url, item_id = message.split()
|
||||
|
||||
if self.is_sql_injection(url):
|
||||
self.faup.decode(url)
|
||||
url_parsed = self.faup.get()
|
||||
|
||||
item = Item(id)
|
||||
item = Item(item_id)
|
||||
item_id = item.get_id()
|
||||
print(f"Detected SQL in URL: {item_id}")
|
||||
print(urllib.request.unquote(url))
|
||||
|
@ -69,7 +69,7 @@ class SQLInjectionDetection(AbstractModule):
|
|||
# statistics
|
||||
tld = url_parsed['tld']
|
||||
if tld is not None:
|
||||
## TODO: # FIXME: remove me
|
||||
# # TODO: # FIXME: remove me
|
||||
try:
|
||||
tld = tld.decode()
|
||||
except:
|
||||
|
@ -77,15 +77,13 @@ class SQLInjectionDetection(AbstractModule):
|
|||
date = datetime.now().strftime("%Y%m")
|
||||
self.server_statistics.hincrby(f'SQLInjection_by_tld:{date}', tld, 1)
|
||||
|
||||
# Try to detect if the url passed might be an sql injection by appliying the regex
|
||||
# Try to detect if the url passed might be an sql injection by applying the regex
|
||||
# defined above on it.
|
||||
def is_sql_injection(self, url_parsed):
|
||||
line = urllib.request.unquote(url_parsed)
|
||||
|
||||
line = unquote(url_parsed)
|
||||
return re.search(SQLInjectionDetection.SQLI_REGEX, line, re.I) is not None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
module = SQLInjectionDetection()
|
||||
module.run()
|
||||
|
|
|
@ -34,9 +34,8 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages import Paste
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
from lib.objects.Items import Item
|
||||
from lib import ConfigLoader
|
||||
|
||||
|
||||
class TimeoutException(Exception):
|
||||
|
@ -53,12 +52,10 @@ class SentimentAnalysis(AbstractModule):
|
|||
SentimentAnalysis module for AIL framework
|
||||
"""
|
||||
|
||||
|
||||
# Config Variables
|
||||
accepted_Mime_type = ['text/plain']
|
||||
line_max_length_threshold = 1000
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super(SentimentAnalysis, self).__init__()
|
||||
|
||||
|
@ -75,7 +72,6 @@ class SentimentAnalysis(AbstractModule):
|
|||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
# Max time to compute one entry
|
||||
signal.alarm(60)
|
||||
|
@ -87,16 +83,31 @@ class SentimentAnalysis(AbstractModule):
|
|||
else:
|
||||
signal.alarm(0)
|
||||
|
||||
def get_p_content_with_removed_lines(self, threshold, item_content):
|
||||
num_line_removed = 0
|
||||
line_length_threshold = threshold
|
||||
string_content = ""
|
||||
f = item_content
|
||||
for line_id, line in enumerate(f):
|
||||
length = len(line)
|
||||
|
||||
if length < line_length_threshold:
|
||||
string_content += line
|
||||
else:
|
||||
num_line_removed += 1
|
||||
|
||||
return num_line_removed, string_content
|
||||
|
||||
def analyse(self, message):
|
||||
|
||||
paste = Paste.Paste(message)
|
||||
item = Item(message)
|
||||
|
||||
# get content with removed line + number of them
|
||||
num_line_removed, p_content = paste.get_p_content_with_removed_lines(SentimentAnalysis.line_max_length_threshold)
|
||||
provider = paste.p_source
|
||||
p_date = str(paste._get_p_date())
|
||||
p_MimeType = paste._get_p_encoding()
|
||||
num_line_removed, p_content = self.get_p_content_with_removed_lines(SentimentAnalysis.line_max_length_threshold,
|
||||
item.get_content())
|
||||
provider = item.get_source()
|
||||
p_date = item.get_date()
|
||||
p_MimeType = item.get_mimetype()
|
||||
|
||||
# Perform further analysis
|
||||
if p_MimeType == "text/plain":
|
||||
|
|
|
@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
from lib import Tag
|
||||
|
||||
|
||||
|
@ -32,13 +32,12 @@ class Tags(AbstractModule):
|
|||
def __init__(self):
|
||||
super(Tags, self).__init__()
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 10
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info(f'Module {self.module_name} initialized')
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
# Extract item ID and tag from message
|
||||
mess_split = message.split(';')
|
||||
|
@ -62,6 +61,5 @@ class Tags(AbstractModule):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Tags()
|
||||
module.run()
|
||||
|
|
|
@ -16,7 +16,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
from lib import regex_helper
|
||||
from lib import telegram
|
||||
|
||||
|
@ -78,7 +78,7 @@ class Telegram(AbstractModule):
|
|||
|
||||
# CREATE TAG
|
||||
if invite_code_found:
|
||||
#tags
|
||||
# tags
|
||||
msg = f'infoleak:automatic-detection="telegram-invite-hash";{item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
|
|
|
@ -13,7 +13,6 @@ This module extract URLs from an item and send them to others modules.
|
|||
# Import External packages
|
||||
##################################
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from pyfaup.faup import Faup
|
||||
|
@ -23,8 +22,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib import regex_helper
|
||||
from lib.objects.Items import Item
|
||||
|
||||
# # TODO: Faup packages: Add new binding: Check TLD
|
||||
|
||||
|
@ -40,7 +38,6 @@ class Urls(AbstractModule):
|
|||
super(Urls, self).__init__()
|
||||
|
||||
self.faup = Faup()
|
||||
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
|
||||
|
||||
# Protocol file path
|
||||
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||
|
@ -58,21 +55,26 @@ class Urls(AbstractModule):
|
|||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
"""
|
||||
Search for Web links from given message
|
||||
"""
|
||||
# Extract item
|
||||
id, score = message.split()
|
||||
item_id, score = message.split()
|
||||
|
||||
item = Item(id)
|
||||
item = Item(item_id)
|
||||
item_content = item.get_content()
|
||||
|
||||
l_urls = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item_content)
|
||||
# TODO Handle invalid URL
|
||||
l_urls = self.regex_findall(self.url_regex, item.get_id(), item_content)
|
||||
for url in l_urls:
|
||||
self.faup.decode(url)
|
||||
unpack_url = self.faup.get()
|
||||
url_decoded = self.faup.get()
|
||||
# decode URL
|
||||
try:
|
||||
url = url_decoded['url'].decode()
|
||||
except AttributeError:
|
||||
url = url_decoded['url']
|
||||
|
||||
to_send = f"{url} {item.get_id()}"
|
||||
print(to_send)
|
||||
|
@ -83,7 +85,7 @@ class Urls(AbstractModule):
|
|||
to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'
|
||||
self.redis_logger.info(f'{to_print}Detected {len(l_urls)} URL;{item.get_id()}')
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == '__main__':
|
||||
module = Urls()
|
||||
module.run()
|
||||
|
|
|
@ -11,9 +11,8 @@ This module spots zerobins-like services for further processing
|
|||
##################################
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import pdb
|
||||
import re
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
|
@ -42,33 +41,31 @@ class Zerobins(AbstractModule):
|
|||
# Send module state to logs
|
||||
self.redis_logger.info(f'Module {self.module_name} initialized')
|
||||
|
||||
|
||||
def computeNone(self):
|
||||
"""
|
||||
Compute when no message in queue
|
||||
"""
|
||||
self.redis_logger.debug("No message in queue")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
"""regex_helper.regex_findall(self.module_name, self.redis_cache_key
|
||||
"""
|
||||
Compute a message in queue
|
||||
"""
|
||||
print(message)
|
||||
url, id = message.split()
|
||||
url, item_id = message.split()
|
||||
|
||||
# Extract zerobins addresses
|
||||
matching_binz = self.regex_findall(self.regex, id, url)
|
||||
matching_binz = self.regex_findall(self.regex, item_id, url)
|
||||
|
||||
if len(matching_binz) > 0:
|
||||
for bin in matching_binz:
|
||||
print("send {} to crawler".format(bin))
|
||||
crawlers.create_crawler_task(bin, screenshot=False, har=False, depth_limit=1, max_pages=1, auto_crawler=False, crawler_delta=3600, crawler_type=None, cookiejar_uuid=None, user_agent=None)
|
||||
for bin_url in matching_binz:
|
||||
print(f'send {bin_url} to crawler')
|
||||
crawlers.add_crawler_task(bin_url, depth=0, har=False, screenshot=False, proxy='force_tor',
|
||||
parent='manual', priority=10)
|
||||
|
||||
self.redis_logger.debug("Compute message in queue")
|
||||
|
||||
|
||||
# TODO TEST ME
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Zerobins()
|
||||
module.run()
|
|
@ -59,6 +59,9 @@ class AbstractModule(ABC):
|
|||
# Setup the I/O queues
|
||||
self.process = Process(self.queue_name)
|
||||
|
||||
# Debug Mode
|
||||
self.debug = False
|
||||
|
||||
def get_message(self):
|
||||
"""
|
||||
Get message from the Redis Queue (QueueIn)
|
||||
|
@ -104,6 +107,8 @@ class AbstractModule(ABC):
|
|||
# Module processing with the message from the queue
|
||||
self.compute(message)
|
||||
except Exception as err:
|
||||
if self.debug:
|
||||
raise err
|
||||
trace = traceback.format_tb(err.__traceback__)
|
||||
trace = ''.join(trace)
|
||||
self.redis_logger.critical(f"Error in module {self.module_name}: {err}")
|
||||
|
|
|
@ -16,7 +16,6 @@ import os
|
|||
import sys
|
||||
import gzip
|
||||
import io
|
||||
import redis
|
||||
import base64
|
||||
import datetime
|
||||
import time
|
||||
|
@ -51,6 +50,7 @@ class SubmitPaste(AbstractModule):
|
|||
"""
|
||||
super(SubmitPaste, self).__init__(queue_name='submit_paste')
|
||||
|
||||
# TODO KVROCKS
|
||||
self.r_serv_db = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_DB")
|
||||
self.r_serv_log_submit = ConfigLoader.ConfigLoader().get_redis_conn("Redis_Log_submit")
|
||||
self.r_serv_tags = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Tags")
|
||||
|
@ -61,7 +61,6 @@ class SubmitPaste(AbstractModule):
|
|||
|
||||
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], ConfigLoader.ConfigLoader().get_config_str("Directories", "pastes")) + '/'
|
||||
|
||||
|
||||
def compute(self, uuid):
|
||||
"""
|
||||
Main method of the Module to implement
|
||||
|
@ -129,7 +128,6 @@ class SubmitPaste(AbstractModule):
|
|||
self.redis_logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s')
|
||||
time.sleep(self.pending_seconds)
|
||||
|
||||
|
||||
def _manage_text(self, uuid, paste_content, ltags, ltagsgalaxies, source):
|
||||
"""
|
||||
Create a paste for given text
|
||||
|
@ -141,7 +139,6 @@ class SubmitPaste(AbstractModule):
|
|||
else:
|
||||
self.abord_file_submission(uuid, f'Text size is over {SubmitPaste.TEXT_MAX_SIZE} bytes')
|
||||
|
||||
|
||||
def _manage_file(self, uuid, file_full_path, ltags, ltagsgalaxies, source):
|
||||
"""
|
||||
Create a paste for given file
|
||||
|
@ -230,7 +227,6 @@ class SubmitPaste(AbstractModule):
|
|||
else:
|
||||
self.abord_file_submission(uuid, "Server Error, the archive can't be found")
|
||||
|
||||
|
||||
def _is_compressed_type(self, file_type):
|
||||
"""
|
||||
Check if file type is in the list of compressed file extensions format
|
||||
|
@ -239,7 +235,6 @@ class SubmitPaste(AbstractModule):
|
|||
|
||||
return file_type in compressed_type
|
||||
|
||||
|
||||
def remove_submit_uuid(self, uuid):
|
||||
# save temp value on disk
|
||||
self.r_serv_db.delete(f'{uuid}:ltags')
|
||||
|
@ -262,7 +257,6 @@ class SubmitPaste(AbstractModule):
|
|||
self.redis_logger.debug(f'{uuid} all file submitted')
|
||||
print(f'{uuid} all file submitted')
|
||||
|
||||
|
||||
def create_paste(self, uuid, paste_content, ltags, ltagsgalaxies, name, source=None):
|
||||
# # TODO: Use Item create
|
||||
|
||||
|
@ -272,7 +266,7 @@ class SubmitPaste(AbstractModule):
|
|||
source = source if source else 'submitted'
|
||||
save_path = source + '/' + now.strftime("%Y") + '/' + now.strftime("%m") + '/' + now.strftime("%d") + '/submitted_' + name + '.gz'
|
||||
|
||||
full_path = filename = os.path.join(os.environ['AIL_HOME'],
|
||||
full_path = os.path.join(os.environ['AIL_HOME'],
|
||||
self.process.config.get("Directories", "pastes"), save_path)
|
||||
|
||||
self.redis_logger.debug(f'file path of the paste {full_path}')
|
||||
|
@ -281,7 +275,7 @@ class SubmitPaste(AbstractModule):
|
|||
# file not exists in AIL paste directory
|
||||
self.redis_logger.debug(f"new paste {paste_content}")
|
||||
|
||||
gzip64encoded = self._compress_encode_content(paste_content)
|
||||
gzip64encoded = self._compress_encode_content(paste_content, uuid)
|
||||
|
||||
if gzip64encoded:
|
||||
|
||||
|
@ -321,36 +315,30 @@ class SubmitPaste(AbstractModule):
|
|||
|
||||
return result
|
||||
|
||||
|
||||
def _compress_encode_content(self, content):
|
||||
def _compress_encode_content(self, content, uuid):
|
||||
gzip64encoded = None
|
||||
|
||||
try:
|
||||
gzipencoded = gzip.compress(content)
|
||||
gzip64encoded = base64.standard_b64encode(gzipencoded).decode()
|
||||
except:
|
||||
self.abord_file_submission(uuid, "file error")
|
||||
|
||||
return gzip64encoded
|
||||
|
||||
|
||||
def addError(self, uuid, errorMessage):
|
||||
self.redis_logger.debug(errorMessage)
|
||||
print(errorMessage)
|
||||
error = self.r_serv_log_submit.get(f'{uuid}:error')
|
||||
if error != None:
|
||||
if error is not None:
|
||||
self.r_serv_log_submit.set(f'{uuid}:error', error + '<br></br>' + errorMessage)
|
||||
|
||||
self.r_serv_log_submit.incr(f'{uuid}:nb_end')
|
||||
|
||||
|
||||
def abord_file_submission(self, uuid, errorMessage):
|
||||
self.redis_logger.debug(f'abord {uuid}, {errorMessage}')
|
||||
|
||||
self.addError(uuid, errorMessage)
|
||||
self.r_serv_log_submit.set(f'{uuid}:end', 1)
|
||||
curr_date = datetime.date.today()
|
||||
self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_abord', 1)
|
||||
self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"), 'submit_abord', 1)
|
||||
self.remove_submit_uuid(uuid)
|
||||
|
||||
# # TODO: use Item function
|
||||
|
@ -358,14 +346,13 @@ class SubmitPaste(AbstractModule):
|
|||
l_directory = item_filename.split('/')
|
||||
return f'{l_directory[-4]}{l_directory[-3]}{l_directory[-2]}'
|
||||
|
||||
|
||||
def verify_extention_filename(self, filename):
|
||||
if not '.' in filename:
|
||||
return True
|
||||
else:
|
||||
file_type = filename.rsplit('.', 1)[1]
|
||||
|
||||
#txt file
|
||||
# txt file
|
||||
if file_type in SubmitPaste.ALLOWED_EXTENSIONS:
|
||||
return True
|
||||
else:
|
||||
|
@ -373,6 +360,5 @@ class SubmitPaste(AbstractModule):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = SubmitPaste()
|
||||
module.run()
|
||||
|
|
|
@ -153,6 +153,9 @@ def sanitise_date_range(date_from, date_to, separator='', date_type='str'):
|
|||
date_from = date_to
|
||||
elif not date_to and date_from:
|
||||
date_to = date_from
|
||||
elif not date_to and not date_from:
|
||||
date = datetime.date.today().strftime("%Y%m%d")
|
||||
return {"date_from": date, "date_to": date}
|
||||
|
||||
if date_type=='str':
|
||||
# remove separators
|
||||
|
|
|
@ -320,39 +320,6 @@ class HiddenServices(object):
|
|||
har_path = os.path.join(self.screenshot_directory, item_path) + '.json'
|
||||
return har_path
|
||||
|
||||
def create_domain_basic_archive(self, l_pastes):
|
||||
all_har = self.get_all_har(l_pastes, filename=True)
|
||||
all_screenshot = self.get_all_domain_screenshot(l_pastes, filename=True)
|
||||
all_items = self.get_all_item_full_path(l_pastes, filename=True)
|
||||
|
||||
# try:
|
||||
|
||||
# zip buffer
|
||||
zip_buffer = BytesIO()
|
||||
|
||||
with zipfile.ZipFile(zip_buffer, "a") as zf:
|
||||
|
||||
#print(all_har)
|
||||
self.write_in_zip_buffer(zf, all_har)
|
||||
self.write_in_zip_buffer(zf, all_screenshot)
|
||||
self.write_in_zip_buffer(zf, all_items)
|
||||
|
||||
# write map url
|
||||
map_file_content = self.get_metadata_file(l_pastes).encode()
|
||||
zf.writestr( '_URL_MAP_', BytesIO(map_file_content).getvalue())
|
||||
|
||||
zip_buffer.seek(0)
|
||||
return zip_buffer
|
||||
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
# return 'Server Error'
|
||||
|
||||
def write_in_zip_buffer(self, zf, list_file):
|
||||
for file_path, file_name in list_file:
|
||||
with open(file_path, "rb") as f:
|
||||
har_content = f.read()
|
||||
zf.writestr( file_name, BytesIO(har_content).getvalue())
|
||||
|
||||
def get_metadata_file(self, list_items):
|
||||
file_content = ''
|
||||
|
|
|
@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
|
||||
# from lib.objects.Items import Item
|
||||
|
||||
class Template(AbstractModule):
|
||||
"""
|
||||
|
@ -36,19 +36,20 @@ class Template(AbstractModule):
|
|||
# Send module state to logs
|
||||
self.redis_logger.info(f'Module {self.module_name} initialized')
|
||||
|
||||
|
||||
def computeNone(self):
|
||||
"""
|
||||
Compute when no message in queue
|
||||
Do something when there is no message in the queue
|
||||
"""
|
||||
self.redis_logger.debug("No message in queue")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
"""
|
||||
Compute a message in queue
|
||||
Compute a message in queue / process the message (item_id, ...)
|
||||
"""
|
||||
self.redis_logger.debug("Compute message in queue")
|
||||
# # if message is an item_id:
|
||||
# item = Item(message)
|
||||
# content = item.get_content()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import socks
|
||||
import socket
|
||||
import urllib.request
|
||||
import io
|
||||
import gzip
|
||||
import base64
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
# Max size in Mb
|
||||
max_size = 5
|
||||
|
||||
def create_connection(address, timeout=None, source_address=None):
|
||||
sock = socks.socksocket()
|
||||
sock.connect(address)
|
||||
return sock
|
||||
|
||||
|
||||
def get_page(url, torclient_host='127.0.0.1', torclient_port=9050):
|
||||
|
||||
request = urllib.request.Request(url)
|
||||
# UA of the Tor browser bundle
|
||||
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0')
|
||||
return urllib.request.urlopen(request, timeout=5).read(max_size * 100000)
|
||||
|
||||
#FIXME don't work at all
|
||||
def makegzip64(s):
|
||||
|
||||
out = io.BytesIO()
|
||||
|
||||
with gzip.GzipFile(fileobj=out, mode='ab') as fo:
|
||||
fo.write(base64.standard_b64encode(s))
|
||||
|
||||
return out.getvalue()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print('usage:', 'tor_fetcher.py', 'URL (base64 encoded)')
|
||||
exit(1)
|
||||
|
||||
try:
|
||||
url = base64.standard_b64decode(sys.argv[1]).decode('utf8')
|
||||
print(url)
|
||||
except:
|
||||
print('unable to decode')
|
||||
exit(1)
|
||||
|
||||
torclient_host = '127.0.0.1'
|
||||
torclient_port = 9050
|
||||
# Setup Proxy
|
||||
socks.set_default_proxy(socks.SOCKS5, torclient_host, torclient_port, True)
|
||||
socket.socket = socks.socksocket
|
||||
socket.create_connection = create_connection
|
||||
|
||||
try:
|
||||
page = get_page(url)
|
||||
except:
|
||||
print('unable to fetch')
|
||||
exit(1)
|
||||
|
||||
to_write = makegzip64(page)
|
||||
t, path = tempfile.mkstemp()
|
||||
#with open(path, 'w') as f:
|
||||
#f.write(to_write)
|
||||
print(path)
|
||||
exit(0)
|
|
@ -1,328 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
import datetime
|
||||
import redis
|
||||
import json
|
||||
import time
|
||||
|
||||
from hashlib import sha256
|
||||
|
||||
from scrapy.spidermiddlewares.httperror import HttpError
|
||||
from twisted.internet.error import DNSLookupError
|
||||
from twisted.internet.error import TimeoutError
|
||||
from twisted.web._newclient import ResponseNeverReceived
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.linkextractors import LinkExtractor
|
||||
from scrapy.crawler import CrawlerProcess, Crawler
|
||||
|
||||
from scrapy_splash import SplashRequest, SplashJsonResponse
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
from Helper import Process
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import ConfigLoader
|
||||
import Screenshot
|
||||
import crawlers
|
||||
|
||||
script_cookie = """
|
||||
function main(splash, args)
|
||||
-- Default values
|
||||
splash.js_enabled = true
|
||||
splash.private_mode_enabled = true
|
||||
splash.images_enabled = true
|
||||
splash.webgl_enabled = true
|
||||
splash.media_source_enabled = true
|
||||
|
||||
-- Force enable things
|
||||
splash.plugins_enabled = true
|
||||
splash.request_body_enabled = true
|
||||
splash.response_body_enabled = true
|
||||
|
||||
splash.indexeddb_enabled = true
|
||||
splash.html5_media_enabled = true
|
||||
splash.http2_enabled = true
|
||||
|
||||
-- User Agent
|
||||
splash:set_user_agent(args.user_agent)
|
||||
|
||||
-- User defined
|
||||
splash.resource_timeout = args.resource_timeout
|
||||
splash.timeout = args.timeout
|
||||
|
||||
-- Allow to pass cookies
|
||||
splash:init_cookies(args.cookies)
|
||||
|
||||
-- Run
|
||||
ok, reason = splash:go{args.url}
|
||||
if not ok and not reason:find("http") then
|
||||
return {
|
||||
error = reason,
|
||||
last_url = splash:url()
|
||||
}
|
||||
end
|
||||
if reason == "http504" then
|
||||
splash:set_result_status_code(504)
|
||||
return ''
|
||||
end
|
||||
|
||||
splash:wait{args.wait}
|
||||
-- Page instrumentation
|
||||
-- splash.scroll_position = {y=1000}
|
||||
-- splash:wait{args.wait}
|
||||
-- Response
|
||||
return {
|
||||
har = splash:har(),
|
||||
html = splash:html(),
|
||||
png = splash:png{render_all=true},
|
||||
cookies = splash:get_cookies(),
|
||||
last_url = splash:url(),
|
||||
}
|
||||
end
|
||||
"""
|
||||
|
||||
class TorSplashCrawler():
|
||||
|
||||
def __init__(self, splash_url, crawler_options):
|
||||
self.process = CrawlerProcess({'LOG_ENABLED': True})
|
||||
self.crawler = Crawler(self.TorSplashSpider, {
|
||||
'USER_AGENT': crawler_options['user_agent'], # /!\ overwritten by lua script
|
||||
'SPLASH_URL': splash_url,
|
||||
'ROBOTSTXT_OBEY': False,
|
||||
'DOWNLOADER_MIDDLEWARES': {'scrapy_splash.SplashCookiesMiddleware': 723,
|
||||
'scrapy_splash.SplashMiddleware': 725,
|
||||
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
|
||||
'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
|
||||
},
|
||||
'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,},
|
||||
'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter',
|
||||
'HTTPERROR_ALLOW_ALL': True,
|
||||
'RETRY_TIMES': 2,
|
||||
'CLOSESPIDER_PAGECOUNT': crawler_options['closespider_pagecount'],
|
||||
'DEPTH_LIMIT': crawler_options['depth_limit'],
|
||||
'SPLASH_COOKIES_DEBUG': False
|
||||
})
|
||||
|
||||
def crawl(self, splash_url, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item):
|
||||
self.process.crawl(self.crawler, splash_url=splash_url, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, cookies=cookies, original_item=original_item)
|
||||
self.process.start()
|
||||
|
||||
class TorSplashSpider(Spider):
|
||||
name = 'TorSplashSpider'
|
||||
|
||||
def __init__(self, splash_url, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item, *args, **kwargs):
|
||||
self.splash_url = splash_url
|
||||
self.domain_type = type
|
||||
self.requested_mode = requested_mode
|
||||
self.original_item = original_item
|
||||
self.root_key = None
|
||||
self.start_urls = url
|
||||
self.domains = [domain]
|
||||
self.port = str(port)
|
||||
date_str = '{}/{}/{}'.format(date['date_day'][0:4], date['date_day'][4:6], date['date_day'][6:8])
|
||||
self.full_date = date['date_day']
|
||||
self.date_month = date['date_month']
|
||||
self.date_epoch = int(date['epoch'])
|
||||
|
||||
self.user_agent = crawler_options['user_agent']
|
||||
self.png = crawler_options['png']
|
||||
self.har = crawler_options['har']
|
||||
self.cookies = cookies
|
||||
|
||||
config_section = 'Crawler'
|
||||
self.p = Process(config_section)
|
||||
self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str )
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
self.har_dir = os.path.join(config_loader.get_files_directory('har') , date_str )
|
||||
config_loader = None
|
||||
|
||||
self.r_serv_log_submit = redis.StrictRedis(
|
||||
host=self.p.config.get("Redis_Log_submit", "host"),
|
||||
port=self.p.config.getint("Redis_Log_submit", "port"),
|
||||
db=self.p.config.getint("Redis_Log_submit", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
self.root_key = None
|
||||
|
||||
def build_request_arg(self, cookies):
|
||||
return {'wait': 10,
|
||||
'resource_timeout': 30, # /!\ Weird behaviour if timeout < resource_timeout /!\
|
||||
'timeout': 30,
|
||||
'user_agent': self.user_agent,
|
||||
'cookies': cookies,
|
||||
'lua_source': script_cookie
|
||||
}
|
||||
|
||||
def start_requests(self):
|
||||
l_cookies = self.build_request_arg(self.cookies)
|
||||
yield SplashRequest(
|
||||
self.start_urls,
|
||||
self.parse,
|
||||
errback=self.errback_catcher,
|
||||
endpoint='execute',
|
||||
meta={'father': self.original_item, 'current_url': self.start_urls},
|
||||
args=l_cookies
|
||||
)
|
||||
|
||||
# # TODO: remove duplicate and anchor
|
||||
def parse(self,response):
|
||||
#print(response.headers)
|
||||
#print(response.status)
|
||||
#print(response.meta)
|
||||
#print(response.data) # # TODO: handle lua script error
|
||||
#{'type': 'ScriptError', 'info': {'error': "'}' expected (to close '{' at line 47) near 'error_retry'",
|
||||
#'message': '[string "..."]:53: \'}\' expected (to close \'{\' at line 47) near \'error_retry\'',
|
||||
#'type': 'LUA_INIT_ERROR', 'source': '[string "..."]', 'line_number': 53},
|
||||
#'error': 400, 'description': 'Error happened while executing Lua script'}
|
||||
if response.status == 504:
|
||||
# no response
|
||||
#print('504 detected')
|
||||
pass
|
||||
|
||||
# LUA ERROR # # TODO: logs errors
|
||||
elif 'error' in response.data:
|
||||
if(response.data['error'] == 'network99'):
|
||||
## splash restart ##
|
||||
error_retry = response.meta.get('error_retry', 0)
|
||||
if error_retry < 3:
|
||||
error_retry += 1
|
||||
url = response.data['last_url']
|
||||
father = response.meta['father']
|
||||
|
||||
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url)
|
||||
time.sleep(10)
|
||||
if 'cookies' in response.data:
|
||||
all_cookies = response.data['cookies'] # # TODO: use initial cookie ?????
|
||||
else:
|
||||
all_cookies = []
|
||||
l_cookies = self.build_request_arg(all_cookies)
|
||||
yield SplashRequest(
|
||||
url,
|
||||
self.parse,
|
||||
errback=self.errback_catcher,
|
||||
endpoint='execute',
|
||||
dont_filter=True,
|
||||
meta={'father': father, 'current_url': url, 'error_retry': error_retry},
|
||||
args=l_cookies
|
||||
)
|
||||
else:
|
||||
if self.requested_mode == 'test':
|
||||
crawlers.save_test_ail_crawlers_result(False, 'Connection to proxy refused')
|
||||
print('Connection to proxy refused')
|
||||
elif response.data['error'] == 'network3':
|
||||
if self.requested_mode == 'test':
|
||||
crawlers.save_test_ail_crawlers_result(False, 'HostNotFoundError: the remote host name was not found (invalid hostname)')
|
||||
print('HostNotFoundError: the remote host name was not found (invalid hostname)')
|
||||
else:
|
||||
if self.requested_mode == 'test':
|
||||
crawlers.save_test_ail_crawlers_result(False, response.data['error'])
|
||||
print(response.data['error'])
|
||||
|
||||
elif response.status != 200:
|
||||
print('other response: {}'.format(response.status))
|
||||
# detect connection to proxy refused
|
||||
error_log = (json.loads(response.body.decode()))
|
||||
print(error_log)
|
||||
#elif crawlers.is_redirection(self.domains[0], response.data['last_url']):
|
||||
# pass # ignore response
|
||||
else:
|
||||
## TEST MODE ##
|
||||
if self.requested_mode == 'test':
|
||||
if 'It works!' in response.data['html']:
|
||||
crawlers.save_test_ail_crawlers_result(True, 'It works!')
|
||||
else:
|
||||
print('TEST ERROR')
|
||||
crawlers.save_test_ail_crawlers_result(False, 'TEST ERROR')
|
||||
return
|
||||
## -- ##
|
||||
|
||||
item_id = crawlers.create_item_id(self.item_dir, self.domains[0])
|
||||
self.save_crawled_item(item_id, response.data['html'])
|
||||
crawlers.create_item_metadata(item_id, self.domains[0], response.data['last_url'], self.port, response.meta['father'])
|
||||
|
||||
if self.root_key is None:
|
||||
self.root_key = item_id
|
||||
crawlers.add_domain_root_item(item_id, self.domain_type, self.domains[0], self.date_epoch, self.port)
|
||||
crawlers.create_domain_metadata(self.domain_type, self.domains[0], self.port, self.full_date, self.date_month)
|
||||
|
||||
if 'cookies' in response.data:
|
||||
all_cookies = response.data['cookies']
|
||||
else:
|
||||
all_cookies = []
|
||||
|
||||
# SCREENSHOT
|
||||
if 'png' in response.data and self.png:
|
||||
sha256_string = Screenshot.save_crawled_screeshot(response.data['png'], 5000000, f_save=self.requested_mode)
|
||||
if sha256_string:
|
||||
Screenshot.save_item_relationship(sha256_string, item_id)
|
||||
Screenshot.save_domain_relationship(sha256_string, self.domains[0])
|
||||
# HAR
|
||||
if 'har' in response.data and self.har:
|
||||
crawlers.save_har(self.har_dir, item_id, response.data['har'])
|
||||
|
||||
le = LinkExtractor(allow_domains=self.domains, unique=True)
|
||||
for link in le.extract_links(response):
|
||||
l_cookies = self.build_request_arg(all_cookies)
|
||||
yield SplashRequest(
|
||||
link.url,
|
||||
self.parse,
|
||||
errback=self.errback_catcher,
|
||||
endpoint='execute',
|
||||
meta={'father': item_id, 'current_url': link.url},
|
||||
args=l_cookies
|
||||
)
|
||||
|
||||
def errback_catcher(self, failure):
|
||||
# catch all errback failures,
|
||||
self.logger.error(repr(failure))
|
||||
|
||||
if failure.check(ResponseNeverReceived):
|
||||
## DEBUG ##
|
||||
self.logger.error(failure.request)
|
||||
if failure.value.response:
|
||||
self.logger.error(failure.value.response)
|
||||
## ----- ##
|
||||
|
||||
# Extract request metadata
|
||||
url = failure.request.meta['current_url']
|
||||
father = failure.request.meta['father']
|
||||
l_cookies = self.build_request_arg(failure.request.meta['splash']['args']['cookies'])
|
||||
|
||||
# Check if Splash restarted
|
||||
if not crawlers.is_splash_reachable(self.splash_url):
|
||||
self.logger.error('Splash, ResponseNeverReceived for %s, retry in 30s ...', url)
|
||||
time.sleep(30)
|
||||
|
||||
yield SplashRequest(
|
||||
url,
|
||||
self.parse,
|
||||
errback=self.errback_catcher,
|
||||
endpoint='execute',
|
||||
meta={'father': father, 'current_url': url},
|
||||
args=l_cookies
|
||||
)
|
||||
|
||||
else:
|
||||
self.logger.error(failure.type)
|
||||
self.logger.error(failure.getErrorMessage())
|
||||
|
||||
def save_crawled_item(self, item_id, item_content):
|
||||
gzip64encoded = crawlers.save_crawled_item(item_id, item_content)
|
||||
|
||||
# Send item to queue
|
||||
# send paste to Global
|
||||
relay_message = "{0} {1}".format(item_id, gzip64encoded)
|
||||
self.p.populate_set_out(relay_message, 'Mixer')
|
||||
|
||||
# increase nb of paste by feeder name
|
||||
self.r_serv_log_submit.hincrby("mixer_cache:list_feeder", "crawler", 1)
|
||||
|
||||
# tag crawled paste
|
||||
msg = 'infoleak:submission="crawler";{}'.format(item_id)
|
||||
self.p.populate_set_out(msg, 'Tags')
|
|
@ -1,80 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
issplashed=`screen -ls | egrep '[0-9]+.Docker_Splash' | cut -d. -f1`
|
||||
|
||||
usage() { echo "Usage: sudo $0 [-f <config_absolute_path>] [-p <port_start>] [-n <number_of_splash_servers>]" 1>&2;
|
||||
echo " -f: absolute path to splash docker proxy-profiles directory (used for proxy configuration)";
|
||||
echo " -p: number of the first splash server port number. This number is incremented for the others splash server";
|
||||
echo " -n: number of splash servers to start";
|
||||
echo "";
|
||||
echo " -options:";
|
||||
echo " -u: max unbound in-memory cache (Mb, Restart Splash when full, default=3000 Mb)";
|
||||
echo "";
|
||||
echo "example:";
|
||||
echo "sudo ./launch_splash_crawler.sh -f /home/my_user/AIL-framework/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 3";
|
||||
exit 1;
|
||||
}
|
||||
|
||||
while getopts ":p:f:n:u:" o; do
|
||||
case "${o}" in
|
||||
p)
|
||||
p=${OPTARG}
|
||||
;;
|
||||
f)
|
||||
f=${OPTARG}
|
||||
;;
|
||||
n)
|
||||
n=${OPTARG}
|
||||
;;
|
||||
u)
|
||||
u=${OPTARG}
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $((OPTIND-1))
|
||||
|
||||
if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then
|
||||
usage;
|
||||
fi
|
||||
|
||||
RED="\\033[1;31m"
|
||||
DEFAULT="\\033[0;39m"
|
||||
GREEN="\\033[1;32m"
|
||||
WHITE="\\033[0;02m"
|
||||
|
||||
if [ "$EUID" -ne 0 ]; then
|
||||
echo -e $RED"\t* Please run as root or sudo.\n"$DEFAULT
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d "${f}" ]; then
|
||||
printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "${f}default.ini" ]; then
|
||||
printf "$RED\n Error -f, proxy configuration file:$WHITE default.ini$RED not found\n$DEFAULT Please check if you enter the correct path\n"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $issplashed ]]; then
|
||||
echo -e $RED"\t* A screen is already launched, please kill it before creating another one."$DEFAULT
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "${u}" ]; then
|
||||
u=3000;
|
||||
fi
|
||||
|
||||
screen -dmS "Docker_Splash"
|
||||
sleep 0.1
|
||||
|
||||
for ((i=0;i<=$((${n} - 1));i++)); do
|
||||
port_number=$((${p} + $i))
|
||||
screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -d -p '$port_number':8050 --restart=always --cpus=1 --memory=2G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash --maxrss '$u'; read x'
|
||||
sleep 0.1
|
||||
printf "$GREEN Splash server launched on port $port_number$DEFAULT\n"
|
||||
done
|
|
@ -1,53 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import redis
|
||||
from TorSplashCrawler import TorSplashCrawler
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
import crawlers
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print('usage:', 'tor_crawler.py', 'uuid')
|
||||
exit(1)
|
||||
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
redis_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
config_loader = None
|
||||
|
||||
# get crawler config key
|
||||
uuid = sys.argv[1]
|
||||
|
||||
# get configs
|
||||
crawler_json = json.loads(redis_cache.get('crawler_request:{}'.format(uuid)))
|
||||
|
||||
splash_url = crawler_json['splash_url']
|
||||
service_type = crawler_json['service_type']
|
||||
url = crawler_json['url']
|
||||
domain = crawler_json['domain']
|
||||
port = crawler_json['port']
|
||||
original_item = crawler_json['item']
|
||||
crawler_options = crawler_json['crawler_options']
|
||||
date = crawler_json['date']
|
||||
requested_mode = crawler_json['requested']
|
||||
|
||||
if crawler_options['cookiejar_uuid']:
|
||||
cookies = crawlers.load_crawler_cookies(crawler_options['cookiejar_uuid'], domain, crawler_type=service_type)
|
||||
else:
|
||||
cookies = []
|
||||
|
||||
redis_cache.delete('crawler_request:{}'.format(uuid))
|
||||
|
||||
try:
|
||||
crawler = TorSplashCrawler(splash_url, crawler_options)
|
||||
crawler.crawl(splash_url, service_type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print(e, file=sys.stderr)
|
|
@ -10,7 +10,6 @@ The Retro_Hunt trackers module
|
|||
# Import External packages
|
||||
##################################
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import yara
|
||||
|
@ -20,15 +19,15 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from packages.Item import Date
|
||||
from lib.objects.Items import Item
|
||||
from packages import Date
|
||||
from lib import Tracker
|
||||
|
||||
import NotificationHelper # # TODO: refractor
|
||||
|
||||
class Retro_Hunt(AbstractModule):
|
||||
|
||||
#mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}"
|
||||
# mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}"
|
||||
|
||||
"""
|
||||
Retro_Hunt module for AIL framework
|
||||
|
@ -39,9 +38,6 @@ class Retro_Hunt(AbstractModule):
|
|||
|
||||
self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id="
|
||||
|
||||
self.refresh_deleta = 10
|
||||
self.last_refresh = 0
|
||||
|
||||
# reset on each loop
|
||||
self.task_uuid = None
|
||||
self.date_from = 0
|
||||
|
@ -49,13 +45,12 @@ class Retro_Hunt(AbstractModule):
|
|||
self.nb_src_done = 0
|
||||
self.progress = 0
|
||||
self.item = None
|
||||
self.tags = []
|
||||
|
||||
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
|
||||
# # TODO: send mails
|
||||
# # TODO: # start_time
|
||||
# end_time
|
||||
# # TODO: # start_time # end_time
|
||||
|
||||
def compute(self, task_uuid):
|
||||
self.redis_logger.warning(f'{self.module_name}, starting Retro hunt task {task_uuid}')
|
||||
|
@ -75,7 +70,7 @@ class Retro_Hunt(AbstractModule):
|
|||
self.tags = Tracker.get_retro_hunt_task_tags(task_uuid)
|
||||
curr_date = Tracker.get_retro_hunt_task_current_date(task_uuid)
|
||||
self.nb_src_done = Tracker.get_retro_hunt_task_nb_src_done(task_uuid, sources=sources)
|
||||
self.progress = self.update_progress(sources, curr_date)
|
||||
self.update_progress(sources, curr_date)
|
||||
# iterate on date
|
||||
filter_last = True
|
||||
while int(curr_date) <= int(self.date_to):
|
||||
|
@ -91,14 +86,15 @@ class Retro_Hunt(AbstractModule):
|
|||
self.redis_logger.debug(f'{self.module_name}, Retro Hunt searching in directory {dir}')
|
||||
l_obj = Tracker.get_items_to_analyze(dir)
|
||||
for id in l_obj:
|
||||
#print(f'{dir} / {id}')
|
||||
# print(f'{dir} / {id}')
|
||||
self.item = Item(id)
|
||||
# save current item in cache
|
||||
Tracker.set_cache_retro_hunt_task_id(task_uuid, id)
|
||||
|
||||
self.redis_logger.debug(f'{self.module_name}, Retro Hunt rule {task_uuid}, searching item {id}')
|
||||
|
||||
yara_match = rule.match(data=self.item.get_content(), callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout)
|
||||
yara_match = rule.match(data=self.item.get_content(), callback=self.yara_rules_match,
|
||||
which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout)
|
||||
|
||||
# save last item
|
||||
if nb_id % 10 == 0: # # TODO: Add nb before save in DB
|
||||
|
@ -110,7 +106,7 @@ class Retro_Hunt(AbstractModule):
|
|||
self.update_progress(sources, curr_date)
|
||||
if Tracker.check_retro_hunt_pause(task_uuid):
|
||||
Tracker.set_retro_hunt_last_analyzed(task_uuid, id)
|
||||
#self.update_progress(sources, curr_date, save_db=True)
|
||||
# self.update_progress(sources, curr_date, save_db=True)
|
||||
Tracker.pause_retro_hunt_task(task_uuid)
|
||||
Tracker.clear_retro_hunt_task_cache(task_uuid)
|
||||
return None
|
||||
|
@ -142,7 +138,7 @@ class Retro_Hunt(AbstractModule):
|
|||
|
||||
def yara_rules_match(self, data):
|
||||
id = self.item.get_id()
|
||||
#print(data)
|
||||
# print(data)
|
||||
task_uuid = data['namespace']
|
||||
|
||||
self.redis_logger.info(f'{self.module_name}, Retro hunt {task_uuid} match found: {id}')
|
||||
|
@ -177,9 +173,9 @@ class Retro_Hunt(AbstractModule):
|
|||
if task_uuid:
|
||||
# Module processing with the message from the queue
|
||||
self.redis_logger.debug(task_uuid)
|
||||
#try:
|
||||
# try:
|
||||
self.compute(task_uuid)
|
||||
#except Exception as err:
|
||||
# except Exception as err:
|
||||
# self.redis_logger.error(f'Error in module {self.module_name}: {err}')
|
||||
# # Remove uuid ref
|
||||
# self.remove_submit_uuid(uuid)
|
||||
|
|
|
@ -9,7 +9,6 @@ It processes every item coming from the global module and test the regex
|
|||
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import requests
|
||||
|
@ -19,10 +18,9 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
from packages import Term
|
||||
from lib import Tracker
|
||||
from lib import regex_helper
|
||||
|
||||
import NotificationHelper
|
||||
|
||||
|
@ -42,8 +40,6 @@ class Tracker_Regex(AbstractModule):
|
|||
|
||||
self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id="
|
||||
|
||||
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
|
||||
|
||||
# refresh Tracked Regex
|
||||
self.dict_regex_tracked = Term.get_regex_tracked_words_dict()
|
||||
self.last_refresh = time.time()
|
||||
|
@ -63,7 +59,7 @@ class Tracker_Regex(AbstractModule):
|
|||
item_content = item.get_content()
|
||||
|
||||
for regex in self.dict_regex_tracked:
|
||||
matched = regex_helper.regex_search(self.module_name, self.redis_cache_key, self.dict_regex_tracked[regex], item_id, item_content, max_time=self.max_execution_time)
|
||||
matched = self.regex_findall(self.dict_regex_tracked[regex], item_id, item_content)
|
||||
if matched:
|
||||
self.new_tracker_found(regex, 'regex', item)
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
import NotificationHelper
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
from packages import Term
|
||||
from lib import Tracker
|
||||
|
||||
|
@ -96,7 +96,7 @@ class Tracker_Term(AbstractModule):
|
|||
# Term.create_token_statistics(item_date, word, dict_words_freq[word])
|
||||
|
||||
# check solo words
|
||||
####### # TODO: check if source needed #######
|
||||
# ###### # TODO: check if source needed #######
|
||||
for word in self.list_tracked_words:
|
||||
if word in dict_words_freq:
|
||||
self.new_term_found(word, 'word', item)
|
||||
|
@ -162,7 +162,6 @@ class Tracker_Term(AbstractModule):
|
|||
self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong")
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
module = Tracker_Term()
|
||||
module.run()
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
# Import External packages
|
||||
##################################
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import yara
|
||||
|
@ -20,7 +19,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages import Term
|
||||
from packages.Item import Item
|
||||
from lib.objects.Items import Item
|
||||
from lib import Tracker
|
||||
|
||||
import NotificationHelper # # TODO: refactor
|
||||
|
@ -46,7 +45,6 @@ class Tracker_Yara(AbstractModule):
|
|||
|
||||
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
|
||||
def compute(self, item_id):
|
||||
# refresh YARA list
|
||||
if self.last_refresh < Tracker.get_tracker_last_updated_by_type('yara'):
|
||||
|
@ -58,7 +56,8 @@ class Tracker_Yara(AbstractModule):
|
|||
self.item = Item(item_id)
|
||||
item_content = self.item.get_content()
|
||||
try:
|
||||
yara_match = self.rules.match(data=item_content, callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=60)
|
||||
yara_match = self.rules.match(data=item_content, callback=self.yara_rules_match,
|
||||
which_callbacks=yara.CALLBACK_MATCHES, timeout=60)
|
||||
if yara_match:
|
||||
self.redis_logger.info(f'{self.item.get_id()}: {yara_match}')
|
||||
print(f'{self.item.get_id()}: {yara_match}')
|
||||
|
@ -116,7 +115,6 @@ class Tracker_Yara(AbstractModule):
|
|||
except:
|
||||
self.redis_logger.error(f"Webhook request failed for {webhook_to_post}\nReason: Something went wrong")
|
||||
|
||||
|
||||
return yara.CALLBACK_CONTINUE
|
||||
|
||||
|
||||
|
|
|
@ -262,14 +262,10 @@ db = 0
|
|||
|
||||
[Crawler]
|
||||
activate_crawler = False
|
||||
crawler_depth_limit = 1
|
||||
default_crawler_har = True
|
||||
default_crawler_png = True
|
||||
default_crawler_closespider_pagecount = 50
|
||||
default_crawler_user_agent = Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0
|
||||
splash_url = http://127.0.0.1
|
||||
splash_port = 8050-8052
|
||||
domain_proxy = onion.foundation
|
||||
default_depth_limit = 1
|
||||
default_har = True
|
||||
default_screenshot = True
|
||||
onion_proxy = onion.foundation
|
||||
|
||||
[IP]
|
||||
# list of comma-separated CIDR that you wish to be alerted for. e.g:
|
||||
|
|
|
@ -94,7 +94,7 @@ DEFAULT_HOME=$(pwd)
|
|||
#### KVROCKS ####
|
||||
test ! -d kvrocks/ && git clone https://github.com/apache/incubator-kvrocks.git kvrocks
|
||||
pushd kvrocks
|
||||
./build.sh build
|
||||
./x.py build
|
||||
popd
|
||||
|
||||
DEFAULT_KVROCKS_DATA=$DEFAULT_HOME/DATA_KVROCKS
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
pyail
|
||||
pylacus
|
||||
pymisp>=2.4.144
|
||||
d4-pyclient>=0.1.6
|
||||
|
||||
thehive4py
|
||||
|
||||
# Core
|
||||
redis==2.10.6
|
||||
redis==3.0.0
|
||||
python-magic>0.4.15
|
||||
yara-python>4.0.2
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ class Test_Module_ApiKey(unittest.TestCase):
|
|||
|
||||
def setUp(self):
|
||||
self.module_obj = ApiKey()
|
||||
self.module_obj.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/api_keys.gz'
|
||||
|
@ -56,6 +57,7 @@ class Test_Module_Categ(unittest.TestCase):
|
|||
|
||||
def setUp(self):
|
||||
self.module_obj = Categ()
|
||||
self.module_obj.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/categ.gz'
|
||||
|
@ -69,6 +71,7 @@ class Test_Module_CreditCards(unittest.TestCase):
|
|||
|
||||
def setUp(self):
|
||||
self.module_obj = CreditCards()
|
||||
self.module_obj.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/credit_cards.gz 7'
|
||||
|
@ -86,6 +89,7 @@ class Test_Module_DomClassifier(unittest.TestCase):
|
|||
|
||||
def setUp(self):
|
||||
self.module_obj = DomClassifier()
|
||||
self.module_obj.debug = True
|
||||
|
||||
def test_module(self):
|
||||
test_host = 'foo.be'
|
||||
|
@ -98,6 +102,7 @@ class Test_Module_Global(unittest.TestCase):
|
|||
|
||||
def setUp(self):
|
||||
self.module_obj = Global()
|
||||
self.module_obj.debug = True
|
||||
|
||||
def test_module(self):
|
||||
# # TODO: delete item
|
||||
|
@ -138,6 +143,7 @@ class Test_Module_Keys(unittest.TestCase):
|
|||
|
||||
def setUp(self):
|
||||
self.module_obj = Keys()
|
||||
self.module_obj.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/keys.gz'
|
||||
|
@ -148,6 +154,7 @@ class Test_Module_Onion(unittest.TestCase):
|
|||
|
||||
def setUp(self):
|
||||
self.module_obj = Onion()
|
||||
self.module_obj.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/onion.gz'
|
||||
|
@ -157,7 +164,7 @@ class Test_Module_Onion(unittest.TestCase):
|
|||
|
||||
self.module_obj.compute(f'{item_id} 3')
|
||||
if crawlers.is_crawler_activated():
|
||||
## check domain queues
|
||||
# # check domain queues
|
||||
# all domains queue
|
||||
self.assertTrue(crawlers.is_domain_in_queue('onion', domain_1))
|
||||
# all url/item queue
|
||||
|
@ -177,11 +184,13 @@ class Test_Module_Telegram(unittest.TestCase):
|
|||
|
||||
def setUp(self):
|
||||
self.module_obj = Telegram()
|
||||
self.module_obj.debug = True
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/keys.gz'
|
||||
# # TODO: check results
|
||||
result = self.module_obj.compute(item_id)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -12,10 +12,8 @@ Requirements:
|
|||
|
||||
|
||||
"""
|
||||
import redis
|
||||
import os
|
||||
import time
|
||||
from packages import Paste
|
||||
from pubsublogger import publisher
|
||||
from Helper import Process
|
||||
|
|
@ -37,8 +37,9 @@ def get_object_correlation_json(correlation_id, subtype, max_nodes):
|
|||
object_type = 'cryptocurrency'
|
||||
max_nodes = sanitise_nb_max_nodes(max_nodes)
|
||||
|
||||
# FIXME
|
||||
# ALL correlations
|
||||
correlation_names = Correlate_object.sanitise_correlation_names('')
|
||||
#correlation_names = Correlate_object.sanitise_correlation_names('')
|
||||
#correlation_objects = Correlate_object.sanitise_correlation_objects('')
|
||||
correlation_objects = ['domain']
|
||||
|
||||
|
|
|
@ -29,12 +29,10 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.Users import User
|
||||
from lib import Tag
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
|
||||
# Import config
|
||||
import Flask_config
|
||||
|
||||
|
@ -50,14 +48,14 @@ from blueprints.hunters import hunters
|
|||
from blueprints.old_endpoints import old_endpoints
|
||||
from blueprints.ail_2_ail_sync import ail_2_ail_sync
|
||||
from blueprints.settings_b import settings_b
|
||||
from blueprints.objects_cve import objects_cve
|
||||
from blueprints.objects_decoded import objects_decoded
|
||||
from blueprints.objects_range import objects_range
|
||||
|
||||
|
||||
Flask_dir = os.environ['AIL_FLASK']
|
||||
|
||||
# CONFIG #
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
config_loader = ConfigLoader()
|
||||
baseUrl = config_loader.get_config_str("Flask", "baseurl")
|
||||
host = config_loader.get_config_str("Flask", "host")
|
||||
baseUrl = baseUrl.replace('/', '')
|
||||
|
@ -111,8 +109,8 @@ app.register_blueprint(hunters, url_prefix=baseUrl)
|
|||
app.register_blueprint(old_endpoints, url_prefix=baseUrl)
|
||||
app.register_blueprint(ail_2_ail_sync, url_prefix=baseUrl)
|
||||
app.register_blueprint(settings_b, url_prefix=baseUrl)
|
||||
app.register_blueprint(objects_cve, url_prefix=baseUrl)
|
||||
app.register_blueprint(objects_decoded, url_prefix=baseUrl)
|
||||
app.register_blueprint(objects_range, url_prefix=baseUrl)
|
||||
# ========= =========#
|
||||
|
||||
# ========= Cookie name ========
|
||||
|
@ -162,33 +160,32 @@ for root, dirs, files in os.walk(os.path.join(Flask_dir, 'modules')):
|
|||
if name == 'Flask_config.py':
|
||||
continue
|
||||
name = name.strip('.py')
|
||||
#print('importing {}'.format(name))
|
||||
importlib.import_module(name)
|
||||
elif name == 'header_{}.html'.format(module_name):
|
||||
with open(join(root, name), 'r') as f:
|
||||
to_add_to_header_dico[module_name] = f.read()
|
||||
|
||||
#create header.html
|
||||
# create header.html
|
||||
complete_header = ""
|
||||
with open(os.path.join(Flask_dir, 'templates', 'header_base.html'), 'r') as f:
|
||||
complete_header = f.read()
|
||||
modified_header = complete_header
|
||||
|
||||
#Add the header in the supplied order
|
||||
# Add the header in the supplied order
|
||||
for module_name, txt in list(to_add_to_header_dico.items()):
|
||||
to_replace = '<!--{}-->'.format(module_name)
|
||||
if to_replace in complete_header:
|
||||
modified_header = modified_header.replace(to_replace, txt)
|
||||
del to_add_to_header_dico[module_name]
|
||||
|
||||
#Add the header for no-supplied order
|
||||
# Add the header for no-supplied order
|
||||
to_add_to_header = []
|
||||
for module_name, txt in to_add_to_header_dico.items():
|
||||
to_add_to_header.append(txt)
|
||||
|
||||
modified_header = modified_header.replace('<!--insert here-->', '\n'.join(to_add_to_header))
|
||||
|
||||
#Write the header.html file
|
||||
# Write the header.html file
|
||||
with open(os.path.join(Flask_dir, 'templates', 'header.html'), 'w') as f:
|
||||
f.write(modified_header)
|
||||
|
||||
|
@ -250,6 +247,7 @@ def page_not_found(e):
|
|||
# avoid endpoint enumeration
|
||||
return render_template('error/404.html'), 404
|
||||
|
||||
|
||||
# ========== INITIAL taxonomies ============
|
||||
default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"]
|
||||
|
||||
|
|
|
@ -26,22 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
from lib.objects import ail_objects
|
||||
|
||||
################################################################################
|
||||
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
||||
import Correlate_object
|
||||
import Domain
|
||||
import Screenshot
|
||||
import btc_ail
|
||||
import Username
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
|
||||
import Cryptocurrency
|
||||
import Pgp
|
||||
import Decoded
|
||||
import Tag
|
||||
|
||||
bootstrap_label = Flask_config.bootstrap_label
|
||||
vt_enabled = Flask_config.vt_enabled
|
||||
|
||||
|
@ -74,77 +58,15 @@ def sanitise_nb_max_nodes(nb_max_nodes):
|
|||
nb_max_nodes = 300
|
||||
return nb_max_nodes
|
||||
|
||||
def sanitise_correlation_names(correlation_names):
|
||||
'''
|
||||
correlation_names ex = 'pgp,crypto'
|
||||
'''
|
||||
all_correlation_names = Correlate_object.get_all_correlation_names()
|
||||
if correlation_names is None:
|
||||
return all_correlation_names
|
||||
else:
|
||||
l_correlation_names = []
|
||||
for correl in correlation_names.split(','):
|
||||
if correl in all_correlation_names:
|
||||
l_correlation_names.append(correl)
|
||||
if l_correlation_names:
|
||||
return l_correlation_names
|
||||
else:
|
||||
return all_correlation_names
|
||||
|
||||
def sanitise_correlation_objects(correlation_objects):
|
||||
'''
|
||||
correlation_objects ex = 'domain,decoded'
|
||||
'''
|
||||
all_correlation_objects = Correlate_object.get_all_correlation_objects()
|
||||
if correlation_objects is None:
|
||||
return all_correlation_objects
|
||||
else:
|
||||
l_correlation_objects = []
|
||||
for correl in correlation_objects.split(','):
|
||||
if correl in all_correlation_objects:
|
||||
l_correlation_objects.append(correl)
|
||||
if l_correlation_objects:
|
||||
return l_correlation_objects
|
||||
else:
|
||||
return all_correlation_objects
|
||||
|
||||
def get_card_metadata(object_type, correlation_id, type_id=None, expand_card=False):
|
||||
card_dict = {}
|
||||
if object_type == 'cryptocurrency':
|
||||
card_dict["sparkline"] = Cryptocurrency.cryptocurrency.get_list_nb_previous_correlation_object(type_id, correlation_id, 6)
|
||||
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, type_id)
|
||||
if type_id == 'bitcoin' and expand_card:
|
||||
card_dict["related_btc"] = btc_ail.get_bitcoin_info(correlation_id)
|
||||
elif object_type == 'pgp':
|
||||
card_dict["sparkline"] = Pgp.pgp.get_list_nb_previous_correlation_object(type_id, correlation_id, 6)
|
||||
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, type_id)
|
||||
elif object_type == 'username':
|
||||
card_dict["sparkline"] = Username.correlation.get_list_nb_previous_correlation_object(type_id, correlation_id, 6)
|
||||
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, type_id)
|
||||
elif object_type == 'decoded':
|
||||
card_dict["sparkline"] = Decoded.get_list_nb_previous_hash(correlation_id, 6)
|
||||
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, value=correlation_id)
|
||||
card_dict["vt"] = Decoded.get_decoded_vt_report(correlation_id)
|
||||
card_dict["vt"]["status"] = vt_enabled
|
||||
card_dict["add_tags_modal"] = Tag.get_modal_add_tags(correlation_id, object_type='decoded')
|
||||
elif object_type == 'domain':
|
||||
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, value=correlation_id)
|
||||
card_dict["tags"] = Domain.get_domain_tags(correlation_id)
|
||||
elif object_type == 'screenshot':
|
||||
card_dict["add_tags_modal"] = Tag.get_modal_add_tags(correlation_id, object_type='image')
|
||||
elif object_type == 'paste':
|
||||
card_dict["icon"] = Correlate_object.get_correlation_node_icon(object_type, value=correlation_id)
|
||||
return card_dict
|
||||
|
||||
# ============= ROUTES ==============
|
||||
@correlation.route('/correlation/show_correlation', methods=['GET', 'POST']) # GET + POST
|
||||
@correlation.route('/correlation/show', methods=['GET', 'POST']) # GET + POST
|
||||
@login_required
|
||||
@login_read_only
|
||||
def show_correlation():
|
||||
if request.method == 'POST':
|
||||
object_type = request.form.get('object_type')
|
||||
type_id = request.form.get('type_id')
|
||||
correlation_id = request.form.get('correlation_id')
|
||||
object_type = request.form.get('obj_type')
|
||||
subtype = request.form.get('subtype')
|
||||
obj_id = request.form.get('obj_id')
|
||||
max_nodes = request.form.get('max_nb_nodes_in')
|
||||
mode = request.form.get('mode')
|
||||
if mode:
|
||||
|
@ -153,73 +75,71 @@ def show_correlation():
|
|||
mode = 'union'
|
||||
|
||||
## get all selected correlations
|
||||
correlation_names = []
|
||||
correlation_objects = []
|
||||
#correlation_names
|
||||
filter_types = []
|
||||
correl_option = request.form.get('CveCheck')
|
||||
if correl_option:
|
||||
filter_types.append('cve')
|
||||
correl_option = request.form.get('CryptocurrencyCheck')
|
||||
if correl_option:
|
||||
correlation_names.append('cryptocurrency')
|
||||
filter_types.append('cryptocurrency')
|
||||
correl_option = request.form.get('PgpCheck')
|
||||
if correl_option:
|
||||
correlation_names.append('pgp')
|
||||
filter_types.append('pgp')
|
||||
correl_option = request.form.get('UsernameCheck')
|
||||
if correl_option:
|
||||
correlation_names.append('username')
|
||||
filter_types.append('username')
|
||||
correl_option = request.form.get('DecodedCheck')
|
||||
if correl_option:
|
||||
correlation_names.append('decoded')
|
||||
filter_types.append('decoded')
|
||||
correl_option = request.form.get('ScreenshotCheck')
|
||||
if correl_option:
|
||||
correlation_names.append('screenshot')
|
||||
filter_types.append('screenshot')
|
||||
# correlation_objects
|
||||
correl_option = request.form.get('DomainCheck')
|
||||
if correl_option:
|
||||
correlation_objects.append('domain')
|
||||
correl_option = request.form.get('PasteCheck')
|
||||
filter_types.append('domain')
|
||||
correl_option = request.form.get('ItemCheck')
|
||||
if correl_option:
|
||||
correlation_objects.append('item')
|
||||
filter_types.append('item')
|
||||
|
||||
# list as params
|
||||
correlation_names = ",".join(correlation_names)
|
||||
correlation_objects = ",".join(correlation_objects)
|
||||
filter_types = ",".join(filter_types)
|
||||
|
||||
# redirect to keep history and bookmark
|
||||
return redirect(url_for('correlation.show_correlation', object_type=object_type, type_id=type_id, correlation_id=correlation_id, mode=mode,
|
||||
max_nodes=max_nodes, correlation_names=correlation_names, correlation_objects=correlation_objects))
|
||||
return redirect(url_for('correlation.show_correlation', type=object_type, subtype=subtype, id=obj_id, mode=mode,
|
||||
max_nodes=max_nodes, filter=filter_types))
|
||||
|
||||
# request.method == 'GET'
|
||||
else:
|
||||
object_type = request.args.get('object_type')
|
||||
type_id = request.args.get('type_id')
|
||||
correlation_id = request.args.get('correlation_id')
|
||||
obj_type = request.args.get('type')
|
||||
subtype = request.args.get('subtype', '')
|
||||
obj_id = request.args.get('id')
|
||||
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
|
||||
mode = sanitise_graph_mode(request.args.get('mode'))
|
||||
|
||||
expand_card = request.args.get('expand_card')
|
||||
related_btc = bool(request.args.get('expand_card', False))
|
||||
|
||||
correlation_names = ail_objects.sanitize_objs_types(request.args.get('correlation_names', '').split(','))
|
||||
correlation_objects = ail_objects.sanitize_objs_types(request.args.get('correlation_objects', '').split(','))
|
||||
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
|
||||
|
||||
# # TODO: remove me, rename screenshot to image
|
||||
if object_type == 'image':
|
||||
object_type == 'screenshot'
|
||||
if obj_type == 'image':
|
||||
obj_type = 'screenshot'
|
||||
|
||||
# check if correlation_id exist
|
||||
if not Correlate_object.exist_object(object_type, correlation_id, type_id=type_id):
|
||||
# check if obj_id exist
|
||||
if not ail_objects.exists_obj(obj_type, subtype, obj_id):
|
||||
abort(404) # return 404
|
||||
# oject exist
|
||||
# object exist
|
||||
else:
|
||||
dict_object = {"object_type": object_type, "correlation_id": correlation_id}
|
||||
dict_object["max_nodes"] = max_nodes
|
||||
dict_object["mode"] = mode
|
||||
dict_object["correlation_names"] = correlation_names
|
||||
dict_object["correlation_names_str"] = ",".join(correlation_names)
|
||||
dict_object["correlation_objects"] = correlation_objects
|
||||
dict_object["correlation_objects_str"] = ",".join(correlation_objects)
|
||||
dict_object["metadata"] = Correlate_object.get_object_metadata(object_type, correlation_id, type_id=type_id)
|
||||
if type_id:
|
||||
dict_object["metadata"]['type_id'] = type_id
|
||||
dict_object["metadata_card"] = get_card_metadata(object_type, correlation_id, type_id=type_id, expand_card=expand_card)
|
||||
dict_object = {"object_type": obj_type,
|
||||
"correlation_id": obj_id,
|
||||
"max_nodes": max_nodes, "mode": mode,
|
||||
"filter": filter_types, "filter_str": ",".join(filter_types),
|
||||
"metadata": ail_objects.get_object_meta(obj_type, subtype, obj_id, flask_context=True)
|
||||
}
|
||||
print(dict_object)
|
||||
if subtype:
|
||||
dict_object["metadata"]['type_id'] = subtype
|
||||
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id, related_btc=related_btc)
|
||||
return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label)
|
||||
|
||||
@correlation.route('/correlation/get/description')
|
||||
|
@ -254,19 +174,17 @@ def get_description():
|
|||
@login_required
|
||||
@login_read_only
|
||||
def graph_node_json():
|
||||
obj_id = request.args.get('correlation_id') #######################3
|
||||
subtype = request.args.get('type_id') #######################
|
||||
obj_type = request.args.get('object_type') #######################
|
||||
obj_id = request.args.get('id')
|
||||
subtype = request.args.get('subtype')
|
||||
obj_type = request.args.get('type')
|
||||
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
|
||||
|
||||
correlation_names = ail_objects.sanitize_objs_types(request.args.get('correlation_names', '').split(','))
|
||||
correlation_objects = ail_objects.sanitize_objs_types(request.args.get('correlation_objects', '').split(','))
|
||||
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
|
||||
|
||||
# # TODO: remove me, rename screenshot
|
||||
if obj_type == 'image':
|
||||
obj_type == 'screenshot'
|
||||
obj_type = 'screenshot'
|
||||
|
||||
filter_types = correlation_names + correlation_objects
|
||||
json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=2, flask_context=True)
|
||||
#json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes)
|
||||
return jsonify(json_graph)
|
||||
|
|
|
@ -6,11 +6,13 @@
|
|||
'''
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import random
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, make_response
|
||||
from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, send_file, abort
|
||||
from flask_login import login_required, current_user, login_user, logout_user
|
||||
|
||||
sys.path.append('modules')
|
||||
|
@ -19,15 +21,6 @@ import Flask_config
|
|||
# Import Role_Manager
|
||||
from Role_Manager import login_admin, login_analyst, login_read_only
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
|
||||
import Tag
|
||||
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
|
@ -36,6 +29,10 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
from lib import crawlers
|
||||
from lib import Language
|
||||
from lib.objects import Domains
|
||||
from lib.objects.Items import Item
|
||||
from lib import Tag
|
||||
|
||||
from packages import Date
|
||||
|
||||
from lib import Domain # # # # # # # # # # # # # # # # TODO:
|
||||
|
||||
|
@ -50,9 +47,9 @@ crawler_splash = Blueprint('crawler_splash', __name__, template_folder=os.path.j
|
|||
|
||||
|
||||
# ============ FUNCTIONS ============
|
||||
def api_validator(api_response):
|
||||
if api_response:
|
||||
return Response(json.dumps(api_response[0], indent=2, sort_keys=True), mimetype='application/json'), api_response[1]
|
||||
def api_validator(message, code):
|
||||
if message and code:
|
||||
return Response(json.dumps(message, indent=2, sort_keys=True), mimetype='application/json'), code
|
||||
|
||||
def create_json_response(data, status_code):
|
||||
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
|
||||
|
@ -62,26 +59,26 @@ def create_json_response(data, status_code):
|
|||
@login_required
|
||||
@login_read_only
|
||||
def crawlers_dashboard():
|
||||
# # TODO: get splash manager status
|
||||
is_manager_connected = crawlers.get_splash_manager_connection_metadata()
|
||||
all_splash_crawler_status = crawlers.get_all_spash_crawler_status()
|
||||
splash_crawlers_latest_stats = crawlers.get_splash_crawler_latest_stats()
|
||||
is_manager_connected = crawlers.get_lacus_connection_metadata()
|
||||
crawlers_status = crawlers.get_crawler_capture_status()
|
||||
print(crawlers_status)
|
||||
crawlers_latest_stats = crawlers.get_crawlers_stats()
|
||||
print(crawlers_latest_stats)
|
||||
date = crawlers.get_current_date()
|
||||
|
||||
return render_template("dashboard_splash_crawler.html", all_splash_crawler_status = all_splash_crawler_status,
|
||||
is_manager_connected=is_manager_connected, date=date,
|
||||
splash_crawlers_latest_stats=splash_crawlers_latest_stats)
|
||||
return render_template("dashboard_crawler.html", date=date,
|
||||
is_manager_connected=is_manager_connected,
|
||||
crawlers_status=crawlers_status,
|
||||
crawlers_latest_stats=crawlers_latest_stats)
|
||||
|
||||
@crawler_splash.route("/crawlers/crawler_dashboard_json", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawler_dashboard_json():
|
||||
crawlers_status = crawlers.get_crawler_capture_status()
|
||||
crawlers_latest_stats = crawlers.get_crawlers_stats()
|
||||
|
||||
all_splash_crawler_status = crawlers.get_all_spash_crawler_status()
|
||||
splash_crawlers_latest_stats = crawlers.get_splash_crawler_latest_stats()
|
||||
|
||||
return jsonify({'all_splash_crawler_status': all_splash_crawler_status,
|
||||
'splash_crawlers_latest_stats':splash_crawlers_latest_stats})
|
||||
return jsonify({'crawlers_status': crawlers_status,
|
||||
'stats': crawlers_latest_stats})
|
||||
|
||||
@crawler_splash.route("/crawlers/manual", methods=['GET'])
|
||||
@login_required
|
||||
|
@ -89,12 +86,12 @@ def crawler_dashboard_json():
|
|||
def manual():
|
||||
user_id = current_user.get_id()
|
||||
l_cookiejar = crawlers.api_get_cookies_list_select(user_id)
|
||||
all_crawlers_types = crawlers.get_all_crawlers_queues_types()
|
||||
all_splash_name = crawlers.get_all_crawlers_to_launch_splash_name()
|
||||
crawlers_types = crawlers.get_crawler_all_types()
|
||||
proxies = [] # TODO HANDLE PROXIES
|
||||
return render_template("crawler_manual.html",
|
||||
is_manager_connected=crawlers.get_splash_manager_connection_metadata(),
|
||||
all_crawlers_types=all_crawlers_types,
|
||||
all_splash_name=all_splash_name,
|
||||
is_manager_connected=crawlers.get_lacus_connection_metadata(),
|
||||
crawlers_types=crawlers_types,
|
||||
proxies=proxies,
|
||||
l_cookiejar=l_cookiejar)
|
||||
|
||||
@crawler_splash.route("/crawlers/send_to_spider", methods=['POST'])
|
||||
|
@ -106,17 +103,16 @@ def send_to_spider():
|
|||
# POST val
|
||||
url = request.form.get('url_to_crawl')
|
||||
crawler_type = request.form.get('crawler_queue_type')
|
||||
splash_name = request.form.get('splash_name')
|
||||
auto_crawler = request.form.get('crawler_type')
|
||||
crawler_delta = request.form.get('crawler_epoch')
|
||||
proxy = request.form.get('proxy_name')
|
||||
auto_crawler = request.form.get('crawler_type') # TODO Auto Crawler
|
||||
crawler_delta = request.form.get('crawler_epoch') # TODO Auto Crawler
|
||||
screenshot = request.form.get('screenshot')
|
||||
har = request.form.get('har')
|
||||
depth_limit = request.form.get('depth_limit')
|
||||
max_pages = request.form.get('max_pages')
|
||||
cookiejar_uuid = request.form.get('cookiejar')
|
||||
|
||||
if splash_name:
|
||||
crawler_type = splash_name
|
||||
if crawler_type == 'onion':
|
||||
proxy = 'force_tor'
|
||||
|
||||
if cookiejar_uuid:
|
||||
if cookiejar_uuid == 'None':
|
||||
|
@ -125,13 +121,55 @@ def send_to_spider():
|
|||
cookiejar_uuid = cookiejar_uuid.rsplit(':')
|
||||
cookiejar_uuid = cookiejar_uuid[-1].replace(' ', '')
|
||||
|
||||
res = crawlers.api_create_crawler_task(user_id, url, screenshot=screenshot, har=har, depth_limit=depth_limit, max_pages=max_pages,
|
||||
crawler_type=crawler_type,
|
||||
auto_crawler=auto_crawler, crawler_delta=crawler_delta, cookiejar_uuid=cookiejar_uuid)
|
||||
if res:
|
||||
data = {'url': url, 'depth': depth_limit, 'har': har, 'screenshot': screenshot}
|
||||
if proxy:
|
||||
data['proxy'] = proxy
|
||||
if cookiejar_uuid:
|
||||
data['cookiejar'] = cookiejar_uuid
|
||||
res = crawlers.api_add_crawler_task(data, user_id=user_id)
|
||||
|
||||
if res[1] != 200:
|
||||
return create_json_response(res[0], res[1])
|
||||
return redirect(url_for('crawler_splash.manual'))
|
||||
|
||||
@crawler_splash.route("/crawlers/last/domains", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawlers_last_domains():
|
||||
domain_type = request.args.get('type')
|
||||
if domain_type not in crawlers.get_crawler_all_types():
|
||||
return jsonify({'error': 'Invalid domain type'}), 400
|
||||
|
||||
# TODO STAT by EPOCH
|
||||
domains = []
|
||||
for domain_row in crawlers.get_last_crawled_domains(domain_type):
|
||||
domain, epoch = domain_row.split(':', 1)
|
||||
dom = Domains.Domain(domain)
|
||||
meta = dom.get_meta()
|
||||
meta['epoch'] = epoch
|
||||
meta['status_epoch'] = dom.is_up_by_epoch(epoch)
|
||||
domains.append(meta)
|
||||
crawler_stats = crawlers.get_crawlers_stats(domain_type=domain_type)
|
||||
|
||||
now = datetime.now()
|
||||
date = now.strftime("%Y%m%d")
|
||||
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
|
||||
return render_template("last_crawled.html", domains=domains, type=domain_type,
|
||||
is_manager_connected=crawlers.get_lacus_connection_metadata(),
|
||||
date_from=date_string, date_to=date_string,
|
||||
crawler_stats=crawler_stats)
|
||||
|
||||
@crawler_splash.route('/crawlers/last/domains/json')
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawlers_last_domains_json():
|
||||
domain_type = request.args.get('type')
|
||||
if domain_type not in crawlers.get_crawler_all_types():
|
||||
return jsonify({'error': 'Invalid domain type'}), 400
|
||||
stats = []
|
||||
for date in Date.get_date_range(7):
|
||||
stats.append(crawlers.get_crawlers_stats_by_day(date, domain_type))
|
||||
return jsonify(stats)
|
||||
|
||||
#### Domains ####
|
||||
|
||||
|
@ -143,37 +181,70 @@ def showDomain():
|
|||
if request.method == 'POST':
|
||||
domain_name = request.form.get('in_show_domain')
|
||||
epoch = None
|
||||
port = None
|
||||
else:
|
||||
domain_name = request.args.get('domain')
|
||||
epoch = request.args.get('epoch')
|
||||
port = request.args.get('port')
|
||||
|
||||
res = api_validator(Domain.api_verify_if_domain_exist(domain_name))
|
||||
if res:
|
||||
return res
|
||||
try:
|
||||
epoch = int(epoch)
|
||||
except (ValueError, TypeError):
|
||||
epoch = None
|
||||
|
||||
domain = Domains.Domain(domain_name)
|
||||
dom = Domain.Domain(domain_name, port=port)
|
||||
if not domain.exists():
|
||||
abort(404)
|
||||
|
||||
dict_domain = dom.get_domain_metadata()
|
||||
dict_domain['domain'] = domain_name
|
||||
if dom.domain_was_up():
|
||||
dict_domain = domain.get_meta(options=['last_origin', 'languages'])
|
||||
dict_domain['domain'] = domain.id
|
||||
if domain.was_up():
|
||||
dict_domain = {**dict_domain, **domain.get_correlations()}
|
||||
print(dict_domain)
|
||||
dict_domain['correlation_nb'] = len(dict_domain['decoded']) + len(dict_domain['username']) + len(dict_domain['pgp']) + len(dict_domain['cryptocurrency']) + len(dict_domain['screenshot'])
|
||||
dict_domain['father'] = dom.get_domain_father()
|
||||
dict_domain['languages'] = Language.get_languages_from_iso(dom.get_domain_languages(), sort=True)
|
||||
dict_domain['tags'] = dom.get_domain_tags()
|
||||
dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags'])
|
||||
dict_domain['history'] = dom.get_domain_history_with_status()
|
||||
dict_domain['crawler_history'] = dom.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port
|
||||
if dict_domain['crawler_history'].get('items', []):
|
||||
dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items'])
|
||||
dict_domain['history'] = domain.get_history(status=True)
|
||||
curr_epoch = None
|
||||
# Select valid epoch
|
||||
if epoch:
|
||||
for row in dict_domain['history']:
|
||||
if row['epoch'] == epoch:
|
||||
curr_epoch = row['epoch']
|
||||
break
|
||||
else:
|
||||
curr_epoch = -1
|
||||
for row in dict_domain['history']:
|
||||
if row['epoch'] > curr_epoch:
|
||||
curr_epoch = row['epoch']
|
||||
dict_domain['epoch'] = curr_epoch
|
||||
dict_domain["date"] = time.strftime('%Y/%m/%d - %H:%M.%S', time.gmtime(curr_epoch))
|
||||
|
||||
return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label,
|
||||
print(dict_domain['epoch'])
|
||||
|
||||
dict_domain['crawler_history_items'] = []
|
||||
for item_id in domain.get_crawled_items_by_epoch(epoch):
|
||||
dict_domain['crawler_history_items'].append(Item(item_id).get_meta(options=['crawler']))
|
||||
if dict_domain['crawler_history_items']:
|
||||
dict_domain['random_item'] = random.choice(dict_domain['crawler_history_items'])
|
||||
|
||||
return render_template("showDomain.html",
|
||||
dict_domain=dict_domain, bootstrap_label=bootstrap_label,
|
||||
modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain"))
|
||||
|
||||
@crawler_splash.route('/crawlers/domain/download', methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawlers_domain_download():
|
||||
domain = request.args.get('domain')
|
||||
epoch = request.args.get('epoch')
|
||||
try:
|
||||
epoch = int(epoch)
|
||||
except (ValueError, TypeError):
|
||||
epoch = None
|
||||
dom = Domains.Domain(domain)
|
||||
if not dom.exists():
|
||||
abort(404)
|
||||
zip_file = dom.get_download_zip(epoch=epoch)
|
||||
if not zip_file:
|
||||
abort(404)
|
||||
return send_file(zip_file, download_name=f'{dom.get_id()}.zip', as_attachment=True)
|
||||
|
||||
@crawler_splash.route('/domains/explorer/domain_type_post', methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
|
@ -304,13 +375,36 @@ def domains_search_name():
|
|||
l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label,
|
||||
domains_types=domains_types)
|
||||
|
||||
@crawler_splash.route('/domains/TODO', methods=['GET'])
|
||||
@crawler_splash.route('/domains/date', methods=['GET'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def domains_todo():
|
||||
def domains_search_date():
|
||||
# TODO sanitize type + date
|
||||
domain_type = request.args.get('type')
|
||||
last_domains = Domain.get_last_crawled_domains(domain_type)
|
||||
date_from = request.args.get('date_from')
|
||||
date_to = request.args.get('date_to')
|
||||
# page = request.args.get('page')
|
||||
|
||||
date = Date.sanitise_date_range(date_from, date_to)
|
||||
domains_date = Domains.get_domains_by_daterange(date['date_from'], date['date_to'], domain_type)
|
||||
dict_domains = {}
|
||||
for d in domains_date:
|
||||
dict_domains[d] = Domains.get_domains_meta(domains_date[d])
|
||||
date_from = f"{date['date_from'][0:4]}-{date['date_from'][4:6]}-{date['date_from'][6:8]}"
|
||||
date_to = f"{date['date_to'][0:4]}-{date['date_to'][4:6]}-{date['date_to'][6:8]}"
|
||||
|
||||
return render_template("domains_daterange.html", date_from=date_from, date_to=date_to,
|
||||
bootstrap_label=bootstrap_label,
|
||||
dict_domains=dict_domains, type=domain_type)
|
||||
|
||||
@crawler_splash.route('/domains/date/post', methods=['POST'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def domains_search_date_post():
|
||||
domain_type = request.form.get('type')
|
||||
date_from = request.form.get('date_from')
|
||||
date_to = request.form.get('date_to')
|
||||
return redirect(url_for('crawler_splash.domains_search_date', date_from=date_from, date_to=date_to, type=domain_type))
|
||||
|
||||
|
||||
##-- --##
|
||||
|
@ -521,49 +615,8 @@ def crawler_cookiejar_cookie_json_add_post():
|
|||
|
||||
return redirect(url_for('crawler_splash.crawler_cookiejar_cookie_add', cookiejar_uuid=cookiejar_uuid))
|
||||
|
||||
@crawler_splash.route('/crawler/settings', methods=['GET'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def crawler_splash_setings():
|
||||
all_proxies = crawlers.get_all_proxies_metadata()
|
||||
all_splash = crawlers.get_all_splash_crawler_metadata()
|
||||
splash_manager_url = crawlers.get_splash_manager_url()
|
||||
api_key = crawlers.get_hidden_splash_api_key()
|
||||
is_manager_connected = crawlers.get_splash_manager_connection_metadata(force_ping=True)
|
||||
#--- Cookiejar ---#
|
||||
|
||||
nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch()
|
||||
#crawler_full_config = Config_DB.get_full_config_by_section('crawler')
|
||||
is_crawler_working = crawlers.is_test_ail_crawlers_successful()
|
||||
crawler_error_mess = crawlers.get_test_ail_crawlers_message()
|
||||
|
||||
return render_template("settings_splash_crawler.html",
|
||||
is_manager_connected=is_manager_connected,
|
||||
splash_manager_url=splash_manager_url, api_key=api_key,
|
||||
all_splash=all_splash, all_proxies=all_proxies,
|
||||
nb_crawlers_to_launch=nb_crawlers_to_launch,
|
||||
is_crawler_working=is_crawler_working,
|
||||
crawler_error_mess=crawler_error_mess,
|
||||
#crawler_full_config=crawler_full_config
|
||||
)
|
||||
|
||||
@crawler_splash.route('/crawler/settings/crawler_manager', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@login_admin
|
||||
def crawler_splash_setings_crawler_manager():
|
||||
if request.method == 'POST':
|
||||
splash_manager_url = request.form.get('splash_manager_url')
|
||||
api_key = request.form.get('api_key')
|
||||
|
||||
res = crawlers.api_save_splash_manager_url_api({'url':splash_manager_url, 'api_key':api_key})
|
||||
if res[1] != 200:
|
||||
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
|
||||
else:
|
||||
return redirect(url_for('crawler_splash.crawler_splash_setings'))
|
||||
else:
|
||||
splash_manager_url = crawlers.get_splash_manager_url()
|
||||
api_key = crawlers.get_splash_api_key()
|
||||
return render_template("settings_edit_splash_crawler_manager.html",
|
||||
splash_manager_url=splash_manager_url, api_key=api_key)
|
||||
|
||||
@crawler_splash.route('/crawler/settings/crawlers_to_lauch', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
|
@ -583,13 +636,6 @@ def crawler_splash_setings_crawlers_to_lauch():
|
|||
return render_template("settings_edit_crawlers_to_launch.html",
|
||||
nb_crawlers_to_launch=nb_crawlers_to_launch)
|
||||
|
||||
@crawler_splash.route('/crawler/settings/test_crawler', methods=['GET'])
|
||||
@login_required
|
||||
@login_admin
|
||||
def crawler_splash_setings_test_crawler():
|
||||
crawlers.test_ail_crawlers()
|
||||
return redirect(url_for('crawler_splash.crawler_splash_setings'))
|
||||
|
||||
@crawler_splash.route('/crawler/settings/relaunch_crawler', methods=['GET'])
|
||||
@login_required
|
||||
@login_admin
|
||||
|
@ -598,3 +644,59 @@ def crawler_splash_setings_relaunch_crawler():
|
|||
return redirect(url_for('crawler_splash.crawler_splash_setings'))
|
||||
|
||||
## - - ##
|
||||
|
||||
#### LACUS ####
|
||||
|
||||
@crawler_splash.route('/crawler/settings', methods=['GET'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def crawler_settings():
|
||||
lacus_url = crawlers.get_lacus_url()
|
||||
api_key = crawlers.get_hidden_lacus_api_key()
|
||||
|
||||
is_manager_connected = crawlers.get_lacus_connection_metadata(force_ping=True)
|
||||
is_crawler_working = crawlers.is_test_ail_crawlers_successful()
|
||||
crawler_error_mess = crawlers.get_test_ail_crawlers_message()
|
||||
|
||||
# TODO REGISTER PROXY
|
||||
# all_proxies = crawlers.get_all_proxies_metadata()
|
||||
|
||||
# nb_crawlers_to_launch = crawlers.get_nb_crawlers_to_launch()
|
||||
# crawler_full_config = Config_DB.get_full_config_by_section('crawler')
|
||||
|
||||
return render_template("settings_crawler.html",
|
||||
is_manager_connected=is_manager_connected,
|
||||
lacus_url=lacus_url, api_key=api_key,
|
||||
#all_proxies=all_proxies,
|
||||
#nb_crawlers_to_launch=nb_crawlers_to_launch,
|
||||
is_crawler_working=is_crawler_working,
|
||||
crawler_error_mess=crawler_error_mess,
|
||||
)
|
||||
|
||||
@crawler_splash.route('/crawler/settings/crawler/manager', methods=['GET', 'POST'])
|
||||
@login_required
|
||||
@login_admin
|
||||
def crawler_lacus_settings_crawler_manager():
|
||||
if request.method == 'POST':
|
||||
lacus_url = request.form.get('lacus_url')
|
||||
api_key = request.form.get('api_key')
|
||||
|
||||
res = crawlers.api_save_lacus_url_key({'url': lacus_url, 'api_key': api_key})
|
||||
print(res)
|
||||
if res[1] != 200:
|
||||
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
|
||||
else:
|
||||
return redirect(url_for('crawler_splash.crawler_settings'))
|
||||
else:
|
||||
lacus_url = crawlers.get_lacus_url()
|
||||
api_key = crawlers.get_lacus_api_key()
|
||||
return render_template("settings_edit_lacus_crawler.html", lacus_url=lacus_url, api_key=api_key)
|
||||
|
||||
@crawler_splash.route('/crawler/settings/crawler/test', methods=['GET'])
|
||||
@login_required
|
||||
@login_admin
|
||||
def crawler_settings_crawler_test():
|
||||
crawlers.test_ail_crawlers()
|
||||
return redirect(url_for('crawler_splash.crawler_settings'))
|
||||
|
||||
#--- LACUS ---#
|
|
@ -53,7 +53,7 @@ def show_investigation():
|
|||
investigation_uuid = request.args.get("uuid")
|
||||
investigation = Investigations.Investigation(investigation_uuid)
|
||||
metadata = investigation.get_metadata(r_str=True)
|
||||
objs = ail_objects.get_objects_meta(investigation.get_objects(), icon=True, url=True, flask_context=True)
|
||||
objs = ail_objects.get_objects_meta(investigation.get_objects(), flask_context=True)
|
||||
return render_template("view_investigation.html", bootstrap_label=bootstrap_label,
|
||||
metadata=metadata, investigation_objs=objs)
|
||||
|
||||
|
|
82
var/www/blueprints/objects_cve.py
Normal file
82
var/www/blueprints/objects_cve.py
Normal file
|
@ -0,0 +1,82 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
'''
|
||||
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
|
||||
'''
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
|
||||
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file
|
||||
from flask_login import login_required, current_user
|
||||
|
||||
# Import Role_Manager
|
||||
from Role_Manager import login_admin, login_analyst, login_read_only
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.objects import Cves
|
||||
from packages import Date
|
||||
|
||||
# ============ BLUEPRINT ============
|
||||
objects_cve = Blueprint('objects_cve', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/cve'))
|
||||
|
||||
# ============ VARIABLES ============
|
||||
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
|
||||
|
||||
|
||||
# ============ FUNCTIONS ============
|
||||
@objects_cve.route("/objects/cve", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_cves():
|
||||
date_from = request.args.get('date_from')
|
||||
date_to = request.args.get('date_to')
|
||||
show_objects = request.args.get('show_objects')
|
||||
date = Date.sanitise_date_range(date_from, date_to)
|
||||
date_from = date['date_from']
|
||||
date_to = date['date_to']
|
||||
|
||||
# barchart_type
|
||||
# correlation_type_search_endpoint
|
||||
|
||||
dict_objects = Cves.api_get_cves_meta_by_daterange(date_from, date_to)
|
||||
print(date_from, date_to, dict_objects)
|
||||
return render_template("CveDaterange.html", date_from=date_from, date_to=date_to,
|
||||
dict_objects=dict_objects, show_objects=show_objects)
|
||||
|
||||
@objects_cve.route("/objects/cve/post", methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_cves_post():
|
||||
date_from = request.form.get('date_from')
|
||||
date_to = request.form.get('date_to')
|
||||
show_objects = request.form.get('show_objects')
|
||||
return redirect(url_for('objects_cve.objects_cves', date_from=date_from, date_to=date_to, show_objects=show_objects))
|
||||
|
||||
@objects_cve.route("/objects/cve/range/json", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_cve_range_json():
|
||||
return None
|
||||
|
||||
@objects_cve.route("/objects/cve/search", methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def objects_cve_search():
|
||||
to_search = request.form.get('object_id')
|
||||
|
||||
# TODO SANITIZE ID
|
||||
# TODO Search all
|
||||
cve = Cves.Cve(to_search)
|
||||
if not cve.exists():
|
||||
abort(404)
|
||||
else:
|
||||
return redirect(cve.get_link(flask_context=True))
|
||||
|
||||
# ============= ROUTES ==============
|
||||
|
|
@ -36,7 +36,7 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
|
|||
|
||||
|
||||
# ============= ROUTES ==============
|
||||
@objects_item.route("/object/item") #completely shows the paste in a new tab
|
||||
@objects_item.route("/object/item")
|
||||
@login_required
|
||||
@login_read_only
|
||||
def showItem(): # # TODO: support post
|
||||
|
@ -45,7 +45,7 @@ def showItem(): # # TODO: support post
|
|||
abort(404)
|
||||
|
||||
item = Item(item_id)
|
||||
meta = item.get_meta(options=set(['content', 'crawler', 'duplicates', 'lines', 'size']))
|
||||
meta = item.get_meta(options=['content', 'crawler', 'duplicates', 'lines', 'size'])
|
||||
|
||||
meta['name'] = meta['id'].replace('/', ' / ')
|
||||
meta['father'] = item_basic.get_item_parent(item_id)
|
||||
|
@ -94,4 +94,4 @@ def item_download(): # # TODO: support post
|
|||
if not item_id or not item_basic.exist_item(item_id):
|
||||
abort(404)
|
||||
item = Item(item_id)
|
||||
return send_file(item.get_raw_content(), attachment_filename=item_id, as_attachment=True)
|
||||
return send_file(item.get_raw_content(), download_name=item_id, as_attachment=True)
|
||||
|
|
|
@ -17,7 +17,6 @@ import redis
|
|||
import unicodedata
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
from Date import Date
|
||||
|
||||
from functools import wraps
|
||||
|
||||
|
@ -31,9 +30,9 @@ from flask_login import login_required
|
|||
# Import Project packages
|
||||
##################################
|
||||
from lib import Tag
|
||||
from lib.objects.Items import Item
|
||||
|
||||
import Paste
|
||||
import Import_helper
|
||||
from packages import Import_helper
|
||||
from pytaxonomies import Taxonomies
|
||||
from pymispgalaxies import Galaxies, Clusters
|
||||
|
||||
|
@ -98,8 +97,6 @@ def limit_content_length():
|
|||
|
||||
|
||||
# ============ FUNCTIONS ============
|
||||
def one():
|
||||
return 1
|
||||
|
||||
def allowed_file(filename):
|
||||
if not '.' in filename:
|
||||
|
@ -126,15 +123,14 @@ def date_to_str(date):
|
|||
|
||||
def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, publish, path):
|
||||
|
||||
paste = Paste.Paste(path)
|
||||
source = path.split('/')[-6:]
|
||||
source = '/'.join(source)[:-3]
|
||||
item = Item(path)
|
||||
source = item.get_source()
|
||||
ail_uuid = r_serv_db.get('ail:uuid')
|
||||
pseudofile = BytesIO(paste.get_p_content().encode())
|
||||
pseudofile = BytesIO(item.get_content(binary=True))
|
||||
|
||||
temp = paste._get_p_duplicate()
|
||||
temp = item.get_duplicates()
|
||||
|
||||
#beautifier
|
||||
# beautifier
|
||||
if not temp:
|
||||
temp = ''
|
||||
|
||||
|
@ -181,7 +177,7 @@ def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, pub
|
|||
leak_obj = MISPObject(obj_name)
|
||||
leak_obj.add_attribute('sensor', value=ail_uuid, type="text")
|
||||
leak_obj.add_attribute('origin', value=source, type='text')
|
||||
leak_obj.add_attribute('last-seen', value=date_to_str(paste.p_date), type='datetime')
|
||||
leak_obj.add_attribute('last-seen', value=date_to_str(item.get_date()), type='datetime')
|
||||
leak_obj.add_attribute('raw-data', value=source, data=pseudofile, type="attachment")
|
||||
|
||||
if p_duplicate_number > 0:
|
||||
|
@ -192,7 +188,8 @@ def misp_create_event(distribution, threat_level_id, analysis, info, l_tags, pub
|
|||
templateID = [x['ObjectTemplate']['id'] for x in pymisp.get_object_templates_list()['response'] if x['ObjectTemplate']['name'] == obj_name][0]
|
||||
except IndexError:
|
||||
valid_types = ", ".join([x['ObjectTemplate']['name'] for x in pymisp.get_object_templates_list()])
|
||||
print ("Template for type {} not found! Valid types are: {%s}".format(obj_name, valid_types))
|
||||
print (f"Template for type {obj_name} not found! Valid types are: {valid_types}")
|
||||
return False
|
||||
r = pymisp.add_object(eventid, templateID, leak_obj)
|
||||
if 'errors' in r:
|
||||
print(r)
|
||||
|
@ -206,7 +203,7 @@ def hive_create_case(hive_tlp, threat_level, hive_description, hive_case_title,
|
|||
ail_uuid = r_serv_db.get('ail:uuid')
|
||||
source = path.split('/')[-6:]
|
||||
source = '/'.join(source)[:-3]
|
||||
# get paste date
|
||||
# get item date
|
||||
var = path.split('/')
|
||||
last_seen = "{0}-{1}-{2}".format(var[-4], var[-3], var[-2])
|
||||
|
||||
|
|
|
@ -41,45 +41,6 @@ dic_type_name={'onion':'Onion', 'regular':'Website'}
|
|||
|
||||
# ============ FUNCTIONS ============
|
||||
|
||||
def one():
|
||||
return 1
|
||||
|
||||
def get_date_range(num_day):
|
||||
curr_date = datetime.date.today()
|
||||
date = Date( '{}{}{}'.format(str(curr_date.year), str(curr_date.month).zfill(2), str(curr_date.day).zfill(2)) )
|
||||
date_list = []
|
||||
|
||||
for i in range(0, num_day):
|
||||
date_list.append(date.substract_day(i))
|
||||
|
||||
return list(reversed(date_list))
|
||||
|
||||
def substract_date(date_from, date_to):
|
||||
date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
|
||||
date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
|
||||
delta = date_to - date_from # timedelta
|
||||
l_date = []
|
||||
for i in range(delta.days + 1):
|
||||
date = date_from + datetime.timedelta(i)
|
||||
l_date.append( date.strftime('%Y%m%d') )
|
||||
return l_date
|
||||
|
||||
def unpack_paste_tags(p_tags):
|
||||
l_tags = []
|
||||
for tag in p_tags:
|
||||
complete_tag = tag
|
||||
tag = tag.split('=')
|
||||
if len(tag) > 1:
|
||||
if tag[1] != '':
|
||||
tag = tag[1][1:-1]
|
||||
# no value
|
||||
else:
|
||||
tag = tag[0][1:-1]
|
||||
# use for custom tags
|
||||
else:
|
||||
tag = tag[0]
|
||||
l_tags.append( (tag, complete_tag) )
|
||||
return l_tags
|
||||
|
||||
def is_valid_domain(domain):
|
||||
faup.decode(domain)
|
||||
|
@ -89,26 +50,6 @@ def is_valid_domain(domain):
|
|||
else:
|
||||
return False
|
||||
|
||||
def is_valid_service_type(service_type):
|
||||
accepted_service = ['onion', 'regular']
|
||||
if service_type in accepted_service:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def get_onion_status(domain, date):
|
||||
if r_serv_onion.sismember('onion_up:'+date , domain):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def get_domain_type(domain):
|
||||
type_id = domain.split(':')[-1]
|
||||
if type_id == 'onion':
|
||||
return 'onion'
|
||||
else:
|
||||
return 'regular'
|
||||
|
||||
def get_type_domain(domain):
|
||||
if domain is None:
|
||||
type = 'regular'
|
||||
|
@ -133,18 +74,6 @@ def get_domain_from_url(url):
|
|||
def get_last_domains_crawled(type): # DONE
|
||||
return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1)
|
||||
|
||||
def get_nb_domains_inqueue(type):
|
||||
nb = r_serv_onion.scard('{}_crawler_queue'.format(type))
|
||||
nb += r_serv_onion.scard('{}_crawler_priority_queue'.format(type))
|
||||
return nb
|
||||
|
||||
def get_stats_last_crawled_domains(type, date):
|
||||
statDomains = {}
|
||||
statDomains['domains_up'] = r_serv_onion.scard('{}_up:{}'.format(type, date))
|
||||
statDomains['domains_down'] = r_serv_onion.scard('{}_down:{}'.format(type, date))
|
||||
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
|
||||
statDomains['domains_queue'] = get_nb_domains_inqueue(type)
|
||||
return statDomains
|
||||
|
||||
def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, auto_mode=False):
|
||||
list_crawled_metadata = []
|
||||
|
@ -201,22 +130,6 @@ def get_last_crawled_domains_metadata(list_domains_crawled, date, type=None, aut
|
|||
list_crawled_metadata.append(metadata_domain)
|
||||
return list_crawled_metadata
|
||||
|
||||
def get_crawler_splash_status(type):
|
||||
crawler_metadata = []
|
||||
all_crawlers = r_cache.smembers('{}_crawlers'.format(type))
|
||||
for crawler in all_crawlers:
|
||||
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
|
||||
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
|
||||
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
|
||||
crawler_info = '{} - {}'.format(crawler, started_time)
|
||||
if status_info=='Waiting' or status_info=='Crawling':
|
||||
status=True
|
||||
else:
|
||||
status=False
|
||||
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
|
||||
|
||||
return crawler_metadata
|
||||
|
||||
def delete_auto_crawler(url):
|
||||
domain = get_domain_from_url(url)
|
||||
type = get_type_domain(domain)
|
||||
|
@ -231,67 +144,6 @@ def delete_auto_crawler(url):
|
|||
|
||||
# ============= ROUTES ==============
|
||||
|
||||
# @hiddenServices.route("/crawlers/", methods=['GET'])
|
||||
# @login_required
|
||||
# @login_read_only
|
||||
# def dashboard():
|
||||
# crawler_metadata_onion = get_crawler_splash_status('onion')
|
||||
# crawler_metadata_regular = get_crawler_splash_status('regular')
|
||||
#
|
||||
# now = datetime.datetime.now()
|
||||
# date = now.strftime("%Y%m%d")
|
||||
# statDomains_onion = get_stats_last_crawled_domains('onion', date)
|
||||
# statDomains_regular = get_stats_last_crawled_domains('regular', date)
|
||||
#
|
||||
# return render_template("Crawler_dashboard.html", crawler_metadata_onion = crawler_metadata_onion,
|
||||
# date=date,
|
||||
# crawler_metadata_regular=crawler_metadata_regular,
|
||||
# statDomains_onion=statDomains_onion, statDomains_regular=statDomains_regular)
|
||||
|
||||
@hiddenServices.route("/crawlers/crawler_splash_onion", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def crawler_splash_onion():
|
||||
type = 'onion'
|
||||
last_onions = get_last_domains_crawled(type)
|
||||
list_onion = []
|
||||
|
||||
now = datetime.datetime.now()
|
||||
date = now.strftime("%Y%m%d")
|
||||
statDomains = get_stats_last_crawled_domains(type, date)
|
||||
|
||||
list_onion = get_last_crawled_domains_metadata(last_onions, date, type=type)
|
||||
crawler_metadata = get_crawler_splash_status(type)
|
||||
|
||||
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
|
||||
return render_template("Crawler_Splash_onion.html", last_onions=list_onion, statDomains=statDomains,
|
||||
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
|
||||
|
||||
@hiddenServices.route("/crawlers/Crawler_Splash_last_by_type", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def Crawler_Splash_last_by_type():
|
||||
type = request.args.get('type')
|
||||
# verify user input
|
||||
if type not in list_types:
|
||||
type = 'onion'
|
||||
type_name = dic_type_name[type]
|
||||
list_domains = []
|
||||
|
||||
now = datetime.datetime.now()
|
||||
date = now.strftime("%Y%m%d")
|
||||
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
|
||||
|
||||
statDomains = get_stats_last_crawled_domains(type, date)
|
||||
|
||||
list_domains = get_last_crawled_domains_metadata(get_last_domains_crawled(type), date, type=type)
|
||||
crawler_metadata = get_crawler_splash_status(type)
|
||||
|
||||
return render_template("Crawler_Splash_last_by_type.html", type=type, type_name=type_name,
|
||||
is_manager_connected=crawlers.get_splash_manager_connection_metadata(),
|
||||
last_domains=list_domains, statDomains=statDomains,
|
||||
crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string)
|
||||
|
||||
@hiddenServices.route("/crawlers/blacklisted_domains", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
|
@ -424,7 +276,7 @@ def auto_crawler():
|
|||
|
||||
return render_template("Crawler_auto.html", page=page, nb_page_max=nb_page_max,
|
||||
last_domains=last_domains,
|
||||
is_manager_connected=crawlers.get_splash_manager_connection_metadata(),
|
||||
is_manager_connected=crawlers.get_lacus_connection_metadata(),
|
||||
auto_crawler_domain_onions_metadata=auto_crawler_domain_onions_metadata,
|
||||
auto_crawler_domain_regular_metadata=auto_crawler_domain_regular_metadata)
|
||||
|
||||
|
@ -439,285 +291,6 @@ def remove_auto_crawler():
|
|||
delete_auto_crawler(url)
|
||||
return redirect(url_for('hiddenServices.auto_crawler', page=page))
|
||||
|
||||
# # TODO: refractor
|
||||
@hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def last_crawled_domains_with_stats_json():
|
||||
last_onions = r_serv_onion.lrange('last_onion', 0 ,-1)
|
||||
list_onion = []
|
||||
|
||||
now = datetime.datetime.now()
|
||||
date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d"))
|
||||
statDomains = {}
|
||||
statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date))
|
||||
statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date))
|
||||
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
|
||||
statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue')
|
||||
|
||||
for onion in last_onions:
|
||||
metadata_onion = {}
|
||||
metadata_onion['domain'] = onion
|
||||
metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check')
|
||||
if metadata_onion['last_check'] is None:
|
||||
metadata_onion['last_check'] = '********'
|
||||
metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen')
|
||||
if metadata_onion['first_seen'] is None:
|
||||
metadata_onion['first_seen'] = '********'
|
||||
if get_onion_status(onion, metadata_onion['last_check']):
|
||||
metadata_onion['status_text'] = 'UP'
|
||||
metadata_onion['status_color'] = 'Green'
|
||||
metadata_onion['status_icon'] = 'fa-check-circle'
|
||||
else:
|
||||
metadata_onion['status_text'] = 'DOWN'
|
||||
metadata_onion['status_color'] = 'Red'
|
||||
metadata_onion['status_icon'] = 'fa-times-circle'
|
||||
list_onion.append(metadata_onion)
|
||||
|
||||
crawler_metadata=[]
|
||||
all_onion_crawler = r_cache.smembers('all_crawler:onion')
|
||||
for crawler in all_onion_crawler:
|
||||
crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain')
|
||||
started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time')
|
||||
status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status')
|
||||
crawler_info = '{} - {}'.format(crawler, started_time)
|
||||
if status_info=='Waiting' or status_info=='Crawling':
|
||||
status=True
|
||||
else:
|
||||
status=False
|
||||
crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status})
|
||||
|
||||
date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8])
|
||||
|
||||
return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata})
|
||||
|
||||
@hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def get_onions_by_daterange():
|
||||
date_from = request.form.get('date_from')
|
||||
date_to = request.form.get('date_to')
|
||||
service_type = request.form.get('service_type')
|
||||
domains_up = request.form.get('domains_up')
|
||||
domains_down = request.form.get('domains_down')
|
||||
domains_tags = request.form.get('domains_tags')
|
||||
|
||||
return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, service_type=service_type, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags))
|
||||
|
||||
@hiddenServices.route("/hiddenServices/show_domains_by_daterange", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def show_domains_by_daterange():
|
||||
date_from = request.args.get('date_from')
|
||||
date_to = request.args.get('date_to')
|
||||
service_type = request.args.get('service_type')
|
||||
domains_up = request.args.get('domains_up')
|
||||
domains_down = request.args.get('domains_down')
|
||||
domains_tags = request.args.get('domains_tags')
|
||||
|
||||
# incorrect service type
|
||||
if not is_valid_service_type(service_type):
|
||||
service_type = 'onion'
|
||||
|
||||
type_name = dic_type_name[service_type]
|
||||
|
||||
date_range = []
|
||||
if date_from is not None and date_to is not None:
|
||||
#change format
|
||||
try:
|
||||
if len(date_from) != 8:
|
||||
date_from = date_from[0:4] + date_from[5:7] + date_from[8:10]
|
||||
date_to = date_to[0:4] + date_to[5:7] + date_to[8:10]
|
||||
date_range = substract_date(date_from, date_to)
|
||||
except:
|
||||
pass
|
||||
|
||||
if not date_range:
|
||||
date_range.append(datetime.date.today().strftime("%Y%m%d"))
|
||||
date_from = date_range[0][0:4] + '-' + date_range[0][4:6] + '-' + date_range[0][6:8]
|
||||
date_to = date_from
|
||||
|
||||
else:
|
||||
date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8]
|
||||
date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8]
|
||||
|
||||
statDomains = {}
|
||||
statDomains['domains_up'] = 0
|
||||
statDomains['domains_down'] = 0
|
||||
statDomains['total'] = 0
|
||||
statDomains['domains_queue'] = get_nb_domains_inqueue(service_type)
|
||||
|
||||
domains_by_day = {}
|
||||
domain_metadata = {}
|
||||
stats_by_date = {}
|
||||
for date in date_range:
|
||||
stats_by_date[date] = {}
|
||||
stats_by_date[date]['domain_up'] = 0
|
||||
stats_by_date[date]['domain_down'] = 0
|
||||
if domains_up:
|
||||
domains_up = True
|
||||
domains_by_day[date] = list(r_serv_onion.smembers('{}_up:{}'.format(service_type, date)))
|
||||
for domain in domains_by_day[date]:
|
||||
h = HiddenServices(domain, 'onion')
|
||||
domain_metadata[domain] = {}
|
||||
if domains_tags:
|
||||
domains_tags = True
|
||||
domain_metadata[domain]['tags'] = h.get_domain_tags(update=True)
|
||||
|
||||
domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check')
|
||||
if domain_metadata[domain]['last_check'] is None:
|
||||
domain_metadata[domain]['last_check'] = '********'
|
||||
domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen')
|
||||
if domain_metadata[domain]['first_seen'] is None:
|
||||
domain_metadata[domain]['first_seen'] = '********'
|
||||
domain_metadata[domain]['status_text'] = 'UP'
|
||||
domain_metadata[domain]['status_color'] = 'Green'
|
||||
domain_metadata[domain]['status_icon'] = 'fa-check-circle'
|
||||
statDomains['domains_up'] += 1
|
||||
stats_by_date[date]['domain_up'] += 1
|
||||
|
||||
if domains_down:
|
||||
domains_down = True
|
||||
domains_by_day_down = list(r_serv_onion.smembers('{}_down:{}'.format(service_type, date)))
|
||||
if domains_up:
|
||||
domains_by_day[date].extend(domains_by_day_down)
|
||||
else:
|
||||
domains_by_day[date] = domains_by_day_down
|
||||
for domain in domains_by_day_down:
|
||||
#h = HiddenServices(onion_domain, 'onion')
|
||||
domain_metadata[domain] = {}
|
||||
#domain_metadata[domain]['tags'] = h.get_domain_tags()
|
||||
|
||||
domain_metadata[domain]['last_check'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'last_check')
|
||||
if domain_metadata[domain]['last_check'] is None:
|
||||
domain_metadata[domain]['last_check'] = '********'
|
||||
domain_metadata[domain]['first_seen'] = r_serv_onion.hget('{}_metadata:{}'.format(service_type, domain), 'first_seen')
|
||||
if domain_metadata[domain]['first_seen'] is None:
|
||||
domain_metadata[domain]['first_seen'] = '********'
|
||||
|
||||
domain_metadata[domain]['status_text'] = 'DOWN'
|
||||
domain_metadata[domain]['status_color'] = 'Red'
|
||||
domain_metadata[domain]['status_icon'] = 'fa-times-circle'
|
||||
statDomains['domains_down'] += 1
|
||||
stats_by_date[date]['domain_down'] += 1
|
||||
|
||||
statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down']
|
||||
|
||||
return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day,
|
||||
statDomains=statDomains, type_name=type_name,
|
||||
domain_metadata=domain_metadata,
|
||||
stats_by_date=stats_by_date,
|
||||
date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down,
|
||||
domains_tags=domains_tags, type=service_type, bootstrap_label=bootstrap_label)
|
||||
|
||||
@hiddenServices.route("/crawlers/download_domain", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
@no_cache
|
||||
def download_domain():
|
||||
domain = request.args.get('domain')
|
||||
epoch = request.args.get('epoch')
|
||||
try:
|
||||
epoch = int(epoch)
|
||||
except:
|
||||
epoch = None
|
||||
port = request.args.get('port')
|
||||
faup.decode(domain)
|
||||
unpack_url = faup.get()
|
||||
|
||||
## TODO: # FIXME: remove me
|
||||
try:
|
||||
domain = unpack_url['domain'].decode()
|
||||
except:
|
||||
domain = unpack_url['domain']
|
||||
|
||||
if not port:
|
||||
if unpack_url['port']:
|
||||
try:
|
||||
port = unpack_url['port'].decode()
|
||||
except:
|
||||
port = unpack_url['port']
|
||||
else:
|
||||
port = 80
|
||||
try:
|
||||
port = int(port)
|
||||
except:
|
||||
port = 80
|
||||
type = get_type_domain(domain)
|
||||
if domain is None or not r_serv_onion.exists('{}_metadata:{}'.format(type, domain)):
|
||||
return '404'
|
||||
# # TODO: FIXME return 404
|
||||
|
||||
origin_paste = r_serv_onion.hget('{}_metadata:{}'.format(type, domain), 'paste_parent')
|
||||
|
||||
h = HiddenServices(domain, type, port=port)
|
||||
item_core = h.get_domain_crawled_core_item(epoch=epoch)
|
||||
if item_core:
|
||||
l_pastes = h.get_last_crawled_pastes(item_root=item_core['root_item'])
|
||||
else:
|
||||
l_pastes = []
|
||||
#dict_links = h.get_all_links(l_pastes)
|
||||
|
||||
zip_file = h.create_domain_basic_archive(l_pastes)
|
||||
filename = domain + '.zip'
|
||||
|
||||
return send_file(zip_file, attachment_filename=filename, as_attachment=True)
|
||||
|
||||
|
||||
@hiddenServices.route("/hiddenServices/onion_son", methods=['GET'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
def onion_son():
|
||||
onion_domain = request.args.get('onion_domain')
|
||||
|
||||
h = HiddenServices(onion_domain, 'onion')
|
||||
l_pastes = h.get_last_crawled_pastes()
|
||||
l_son = h.get_domain_son(l_pastes)
|
||||
return 'l_son'
|
||||
|
||||
# ============= JSON ==============
|
||||
@hiddenServices.route("/hiddenServices/domain_crawled_7days_json", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
def domain_crawled_7days_json():
|
||||
type = 'onion'
|
||||
## TODO: # FIXME: 404 error
|
||||
|
||||
date_range = get_date_range(7)
|
||||
json_domain_stats = []
|
||||
#try:
|
||||
for date in date_range:
|
||||
nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
|
||||
nb_domain_down = r_serv_onion.scard('{}_up:{}'.format(type, date))
|
||||
date = date[0:4] + '-' + date[4:6] + '-' + date[6:8]
|
||||
json_domain_stats.append({ 'date': date, 'value': int( nb_domain_up ), 'nb_domain_down': int( nb_domain_down )})
|
||||
#except:
|
||||
#return jsonify()
|
||||
|
||||
return jsonify(json_domain_stats)
|
||||
|
||||
@hiddenServices.route('/hiddenServices/domain_crawled_by_type_json')
|
||||
@login_required
|
||||
@login_read_only
|
||||
def domain_crawled_by_type_json():
|
||||
current_date = request.args.get('date')
|
||||
type = request.args.get('type')
|
||||
if type in list_types:
|
||||
|
||||
num_day_type = 7
|
||||
date_range = get_date_range(num_day_type)
|
||||
range_decoder = []
|
||||
for date in date_range:
|
||||
day_crawled = {}
|
||||
day_crawled['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8]
|
||||
day_crawled['UP']= nb_domain_up = r_serv_onion.scard('{}_up:{}'.format(type, date))
|
||||
day_crawled['DOWN']= nb_domain_up = r_serv_onion.scard('{}_down:{}'.format(type, date))
|
||||
range_decoder.append(day_crawled)
|
||||
|
||||
return jsonify(range_decoder)
|
||||
|
||||
else:
|
||||
return jsonify('Incorrect Type')
|
||||
|
||||
# ========= REGISTRATION =========
|
||||
app.register_blueprint(hiddenServices, url_prefix=baseUrl)
|
||||
|
|
|
@ -1,476 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL-Framework</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
|
||||
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
|
||||
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
|
||||
|
||||
<style>
|
||||
.bar {
|
||||
fill: steelblue;
|
||||
}
|
||||
.bar:hover{
|
||||
fill: brown;
|
||||
cursor: pointer;
|
||||
}
|
||||
.bar_stack:hover{
|
||||
cursor: pointer;
|
||||
}
|
||||
div.tooltip {
|
||||
position: absolute;
|
||||
text-align: center;
|
||||
padding: 2px;
|
||||
font: 12px sans-serif;
|
||||
background: #ebf4fb;
|
||||
border: 2px solid #b7ddf2;
|
||||
border-radius: 8px;
|
||||
pointer-events: none;
|
||||
color: #000000;
|
||||
}
|
||||
</style>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<div class="row">
|
||||
<div class="col-12 col-xl-6">
|
||||
|
||||
<div class="table-responsive mt-1 table-hover table-borderless table-striped">
|
||||
<table class="table">
|
||||
<thead class="thead-dark">
|
||||
<tr>
|
||||
<th>Domain</th>
|
||||
<th>First Seen</th>
|
||||
<th>Last Check</th>
|
||||
<th>Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="tbody_last_crawled">
|
||||
{% for metadata_onion in last_onions %}
|
||||
<tr>
|
||||
<td><a target="_blank" href="{{ url_for('hiddenServices.onion_domain') }}?onion_domain={{ metadata_onion['domain'] }}">{{ metadata_onion['domain'] }}</a></td>
|
||||
<td>{{'{}/{}/{}'.format(metadata_onion['first_seen'][0:4], metadata_onion['first_seen'][4:6], metadata_onion['first_seen'][6:8])}}</td>
|
||||
<td>{{'{}/{}/{}'.format(metadata_onion['last_check'][0:4], metadata_onion['last_check'][4:6], metadata_onion['last_check'][6:8])}}</td>
|
||||
<td><div style="color:{{metadata_onion['status_color']}}; display:inline-block">
|
||||
<i class="fas {{metadata_onion['status_icon']}} "></i>
|
||||
{{metadata_onion['status_text']}}
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<a href="{{ url_for('hiddenServices.blacklisted_onion') }}">
|
||||
<button type="button" class="btn btn-outline-danger">Show Blacklisted Onion</button>
|
||||
</a>
|
||||
|
||||
</div>
|
||||
<div class="col-12 col-xl-6">
|
||||
|
||||
<div class="card text-white bg-dark mb-3 mt-1">
|
||||
<div class="card-header">
|
||||
<div class="row">
|
||||
<div class="col-6">
|
||||
<span class="badge badge-success">{{ statDomains['domains_up'] }}</span> UP
|
||||
<span class="badge badge-danger ml-md-3">{{ statDomains['domains_down'] }}</span> DOWN
|
||||
</div>
|
||||
<div class="col-6">
|
||||
<span class="badge badge-success">{{ statDomains['total'] }}</span> Crawled
|
||||
<span class="badge badge-warning ml-md-3">{{ statDomains['domains_queue'] }}</span> Queue
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<h5 class="card-title">Select domains by date range :</h5>
|
||||
<p class="card-text">Some quick example text to build on the card title and make up the bulk of the card's content.</p>
|
||||
<form action="{{ url_for('hiddenServices.get_onions_by_daterange') }}" id="hash_selector_form" method='post'>
|
||||
<div class="row">
|
||||
<div class="col-6">
|
||||
<div class="input-group" id="date-range-from">
|
||||
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
|
||||
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{{ date_from }}" name="date_from">
|
||||
</div>
|
||||
<div class="input-group" id="date-range-to">
|
||||
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
|
||||
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{{ date_to }}" name="date_to">
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-6">
|
||||
<div class="custom-control custom-switch">
|
||||
<input class="custom-control-input" type="checkbox" name="domains_up" value="True" id="domains_up_id" checked>
|
||||
<label class="custom-control-label" for="domains_up_id">
|
||||
<span class="badge badge-success"><i class="fas fa-check-circle"></i> Domains UP </span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="custom-control custom-switch">
|
||||
<input class="custom-control-input" type="checkbox" name="domains_down" value="True" id="domains_down_id">
|
||||
<label class="custom-control-label" for="domains_down_id">
|
||||
<span class="badge badge-danger"><i class="fas fa-times-circle"></i> Domains DOWN</span>
|
||||
</label>
|
||||
</div>
|
||||
<div class="custom-control custom-switch mt-2">
|
||||
<input class="custom-control-input" type="checkbox" name="domains_tags" value="True" id="domains_tags_id">
|
||||
<label class="custom-control-label" for="domains_tags_id">
|
||||
<span class="badge badge-dark"><i class="fas fa-tags"></i> Domains Tags</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<button class="btn btn-primary">
|
||||
<i class="fas fa-eye"></i> Show Onions
|
||||
</button>
|
||||
<form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="barchart_type">
|
||||
</div>
|
||||
|
||||
<div class="card mt-1 mb-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
Crawlers Status
|
||||
</div>
|
||||
<div class="card-body px-0 py-0 ">
|
||||
<table class="table">
|
||||
<tbody id="tbody_crawler_info">
|
||||
{% for crawler in crawler_metadata %}
|
||||
<tr>
|
||||
<td>
|
||||
<i class="fas fa-{%if crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};"></i> {{crawler['crawler_info']}}
|
||||
</td>
|
||||
<td>
|
||||
{{crawler['crawling_domain']}}
|
||||
</td>
|
||||
<td style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};">
|
||||
{{crawler['status_info']}}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
var chart = {};
|
||||
$(document).ready(function(){
|
||||
$("#page-Crawler").addClass("active");
|
||||
$("#nav_onion_crawler").addClass("active");
|
||||
|
||||
$('#date-range-from').dateRangePicker({
|
||||
separator : ' to ',
|
||||
getValue: function(){
|
||||
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
|
||||
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
|
||||
else
|
||||
return '';
|
||||
},
|
||||
setValue: function(s,s1,s2){
|
||||
$('#date-range-from-input').val(s1);
|
||||
$('#date-range-to-input').val(s2);
|
||||
}
|
||||
});
|
||||
$('#date-range-to').dateRangePicker({
|
||||
separator : ' to ',
|
||||
getValue: function(){
|
||||
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
|
||||
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
|
||||
else
|
||||
return '';
|
||||
},
|
||||
setValue: function(s,s1,s2){
|
||||
$('#date-range-from-input').val(s1);
|
||||
$('#date-range-to-input').val(s2);
|
||||
}
|
||||
});
|
||||
chart.stackBarChart =barchart_type_stack("{{ url_for('hiddenServices.automatic_onion_crawler_json') }}", 'id');
|
||||
|
||||
chart.onResize();
|
||||
$(window).on("resize", function() {
|
||||
chart.onResize();
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
<script>/*
|
||||
function refresh_list_crawled(){
|
||||
|
||||
$.getJSON("{{ url_for('hiddenServices.last_crawled_domains_with_stats_json') }}",
|
||||
function(data) {
|
||||
|
||||
var tableRef = document.getElementById('tbody_last_crawled');
|
||||
$("#tbody_last_crawled").empty()
|
||||
|
||||
for (var i = 0; i < data.last_onions.length; i++) {
|
||||
var data_domain = data.last_onions[i]
|
||||
var newRow = tableRef.insertRow(tableRef.rows.length);
|
||||
|
||||
var newCell = newRow.insertCell(0);
|
||||
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+data_domain['domain']+"\">"+data_domain['domain']+"</a></td>";
|
||||
|
||||
newCell = newRow.insertCell(1);
|
||||
newCell.innerHTML = "<td>"+data_domain['first_seen'].substr(0, 4)+"/"+data_domain['first_seen'].substr(4, 2)+"/"+data_domain['first_seen'].substr(6, 2)+"</td>"
|
||||
|
||||
newCell = newRow.insertCell(2);
|
||||
newCell.innerHTML = "<td>"+data_domain['last_check'].substr(0, 4)+"/"+data_domain['last_check'].substr(4, 2)+"/"+data_domain['last_check'].substr(6, 2)+"</td>"
|
||||
|
||||
newCell = newRow.insertCell(3);
|
||||
newCell.innerHTML = "<td><div style=\"color:"+data_domain['status_color']+"; display:inline-block\"><i class=\"fa "+data_domain['status_icon']+" fa-2x\"></i>"+data_domain['status_text']+"</div></td>"
|
||||
|
||||
}
|
||||
var statDomains = data.statDomains
|
||||
document.getElementById('text_domain_up').innerHTML = statDomains['domains_up']
|
||||
document.getElementById('text_domain_down').innerHTML = statDomains['domains_down']
|
||||
document.getElementById('text_domain_queue').innerHTML = statDomains['domains_queue']
|
||||
document.getElementById('text_total_domains').innerHTML = statDomains['total']
|
||||
|
||||
if(data.crawler_metadata.length!=0){
|
||||
$("#tbody_crawler_info").empty();
|
||||
var tableRef = document.getElementById('tbody_crawler_info');
|
||||
for (var i = 0; i < data.crawler_metadata.length; i++) {
|
||||
var crawler = data.crawler_metadata[i];
|
||||
var newRow = tableRef.insertRow(tableRef.rows.length);
|
||||
var text_color;
|
||||
var icon;
|
||||
if(crawler['status']){
|
||||
text_color = 'Green';
|
||||
icon = 'check';
|
||||
} else {
|
||||
text_color = 'Red';
|
||||
icon = 'times';
|
||||
}
|
||||
|
||||
var newCell = newRow.insertCell(0);
|
||||
newCell.innerHTML = "<td><i class=\"fa fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i>"+crawler['crawler_info']+"</td>";
|
||||
|
||||
newCell = newRow.insertCell(1);
|
||||
newCell.innerHTML = "<td><a target=\"_blank\" href=\"{{ url_for('hiddenServices.onion_domain') }}?onion_domain="+crawler['crawling_domain']+"\">"+crawler['crawling_domain']+"</a></td>";
|
||||
|
||||
newCell = newRow.insertCell(2);
|
||||
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status_info']+"</div></td>";
|
||||
|
||||
$("#panel_crawler").show();
|
||||
}
|
||||
} else {
|
||||
$("#panel_crawler").hide();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
if (to_refresh) {
|
||||
setTimeout("refresh_list_crawled()", 10000);
|
||||
}
|
||||
}*/
|
||||
</script>
|
||||
|
||||
<script>
|
||||
var margin = {top: 20, right: 90, bottom: 55, left: 0},
|
||||
width = parseInt(d3.select('#barchart_type').style('width'), 10);
|
||||
width = 1000 - margin.left - margin.right,
|
||||
height = 500 - margin.top - margin.bottom;
|
||||
var x = d3.scaleBand().rangeRound([0, width]).padding(0.1);
|
||||
|
||||
var y = d3.scaleLinear().rangeRound([height, 0]);
|
||||
|
||||
var xAxis = d3.axisBottom(x);
|
||||
|
||||
var yAxis = d3.axisLeft(y);
|
||||
|
||||
var color = d3.scaleOrdinal(d3.schemeSet3);
|
||||
|
||||
var svg = d3.select("#barchart_type").append("svg")
|
||||
.attr("id", "thesvg")
|
||||
.attr("viewBox", "0 0 "+width+" 500")
|
||||
.attr("width", width + margin.left + margin.right)
|
||||
.attr("height", height + margin.top + margin.bottom)
|
||||
.append("g")
|
||||
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
|
||||
|
||||
|
||||
function barchart_type_stack(url, id) {
|
||||
|
||||
d3.json(url)
|
||||
.then(function(data){
|
||||
|
||||
var labelVar = 'date'; //A
|
||||
var varNames = d3.keys(data[0])
|
||||
.filter(function (key) { return key !== labelVar;}); //B
|
||||
|
||||
data.forEach(function (d) { //D
|
||||
var y0 = 0;
|
||||
d.mapping = varNames.map(function (name) {
|
||||
return {
|
||||
name: name,
|
||||
label: d[labelVar],
|
||||
y0: y0,
|
||||
y1: y0 += +d[name]
|
||||
};
|
||||
});
|
||||
d.total = d.mapping[d.mapping.length - 1].y1;
|
||||
});
|
||||
|
||||
x.domain(data.map(function (d) { return (d.date); })); //E
|
||||
y.domain([0, d3.max(data, function (d) { return d.total; })]);
|
||||
|
||||
svg.append("g")
|
||||
.attr("class", "x axis")
|
||||
.attr("transform", "translate(0," + height + ")")
|
||||
.call(xAxis)
|
||||
.selectAll("text")
|
||||
.attr("class", "bar")
|
||||
.on("click", function (d) { window.location.href = "#" })
|
||||
.attr("transform", "rotate(-18)" )
|
||||
//.attr("transform", "rotate(-40)" )
|
||||
.style("text-anchor", "end");
|
||||
|
||||
svg.append("g")
|
||||
.attr("class", "y axis")
|
||||
.call(yAxis)
|
||||
.append("text")
|
||||
.attr("transform", "rotate(-90)")
|
||||
.attr("y", 6)
|
||||
.attr("dy", ".71em")
|
||||
.style("text-anchor", "end");
|
||||
|
||||
var selection = svg.selectAll(".series")
|
||||
.data(data)
|
||||
.enter().append("g")
|
||||
.attr("class", "series")
|
||||
.attr("transform", function (d) { return "translate(" + x((d.date)) + ",0)"; });
|
||||
|
||||
selection.selectAll("rect")
|
||||
.data(function (d) { return d.mapping; })
|
||||
.enter().append("rect")
|
||||
.attr("class", "bar_stack")
|
||||
.attr("width", x.bandwidth())
|
||||
.attr("y", function (d) { return y(d.y1); })
|
||||
.attr("height", function (d) { return y(d.y0) - y(d.y1); })
|
||||
.style("fill", function (d) { return color(d.name); })
|
||||
.style("stroke", "grey")
|
||||
.on("mouseover", function (d) { showPopover.call(this, d); })
|
||||
.on("mouseout", function (d) { removePopovers(); })
|
||||
.on("click", function(d){ window.location.href = "#" });
|
||||
|
||||
|
||||
data.forEach(function(d) {
|
||||
if(d.total != 0){
|
||||
svg.append("text")
|
||||
.attr("class", "bar")
|
||||
.attr("dy", "-.35em")
|
||||
.attr('x', x(d.date) + x.bandwidth()/2)
|
||||
.attr('y', y(d.total))
|
||||
.on("click", function () {window.location.href = "#" })
|
||||
.style("text-anchor", "middle")
|
||||
.text(d.total);
|
||||
}
|
||||
});
|
||||
|
||||
drawLegend(varNames);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
function drawLegend (varNames) {
|
||||
var legend = svg.selectAll(".legend")
|
||||
.data(varNames.slice().reverse())
|
||||
.enter().append("g")
|
||||
.attr("class", "legend")
|
||||
.attr("transform", function (d, i) { return "translate(0," + i * 20 + ")"; });
|
||||
|
||||
legend.append("rect")
|
||||
.attr("x", 943)
|
||||
.attr("width", 10)
|
||||
.attr("height", 10)
|
||||
.style("fill", color)
|
||||
.style("stroke", "grey");
|
||||
|
||||
legend.append("text")
|
||||
.attr("class", "svgText")
|
||||
.attr("x", 941)
|
||||
.attr("y", 6)
|
||||
.attr("dy", ".35em")
|
||||
.style("text-anchor", "end")
|
||||
.text(function (d) { return d; });
|
||||
}
|
||||
|
||||
function removePopovers () {
|
||||
$('.popover').each(function() {
|
||||
$(this).remove();
|
||||
});
|
||||
}
|
||||
|
||||
function showPopover (d) {
|
||||
$(this).popover({
|
||||
title: d.name,
|
||||
placement: 'top',
|
||||
container: 'body',
|
||||
trigger: 'manual',
|
||||
html : true,
|
||||
content: function() {
|
||||
return d.label +
|
||||
"<br/>num: " + d3.format(",")(d.value ? d.value: d.y1 - d.y0); }
|
||||
});
|
||||
$(this).popover('show')
|
||||
}
|
||||
|
||||
chart.onResize = function () {
|
||||
var aspect = width / height, chart = $("#thesvg");
|
||||
var targetWidth = chart.parent().width();
|
||||
chart.attr("width", targetWidth);
|
||||
chart.attr("height", targetWidth / 2);
|
||||
}
|
||||
|
||||
window.chart = chart;
|
||||
|
||||
</script>
|
|
@ -4,7 +4,6 @@
|
|||
'''
|
||||
Flask functions and routes for the trending modules page
|
||||
'''
|
||||
import redis
|
||||
import json
|
||||
import os
|
||||
import datetime
|
||||
|
@ -14,11 +13,12 @@ from flask import Flask, render_template, jsonify, request, Blueprint
|
|||
from Role_Manager import login_admin, login_analyst
|
||||
from flask_login import login_required
|
||||
|
||||
import Paste
|
||||
from whoosh import index
|
||||
from whoosh.fields import Schema, TEXT, ID
|
||||
from whoosh.qparser import QueryParser
|
||||
|
||||
from lib.objects.Items import Item
|
||||
|
||||
import time
|
||||
|
||||
# ============ VARIABLES ============
|
||||
|
@ -27,7 +27,6 @@ import Flask_config
|
|||
app = Flask_config.app
|
||||
config_loader = Flask_config.config_loader
|
||||
baseUrl = Flask_config.baseUrl
|
||||
r_serv_metadata = Flask_config.r_serv_metadata
|
||||
max_preview_char = Flask_config.max_preview_char
|
||||
max_preview_modal = Flask_config.max_preview_modal
|
||||
bootstrap_label = Flask_config.bootstrap_label
|
||||
|
@ -128,15 +127,14 @@ def search():
|
|||
for x in results:
|
||||
r.append(x.items()[0][1].replace(PASTES_FOLDER, '', 1))
|
||||
path = x.items()[0][1].replace(PASTES_FOLDER, '', 1)
|
||||
paste = Paste.Paste(path)
|
||||
content = paste.get_p_content()
|
||||
item = Item(path)
|
||||
content = item.get_content()
|
||||
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
|
||||
c.append(content[0:content_range])
|
||||
curr_date = str(paste._get_p_date())
|
||||
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
|
||||
curr_date = item.get_date(separator=True)
|
||||
paste_date.append(curr_date)
|
||||
paste_size.append(paste._get_p_size())
|
||||
p_tags = r_serv_metadata.smembers('tag:'+path)
|
||||
paste_size.append(item.get_size())
|
||||
p_tags = item.get_tags()
|
||||
l_tags = []
|
||||
for tag in p_tags:
|
||||
complete_tag = tag
|
||||
|
@ -205,15 +203,14 @@ def get_more_search_result():
|
|||
path = x.items()[0][1]
|
||||
path = path.replace(PASTES_FOLDER, '', 1)
|
||||
path_array.append(path)
|
||||
paste = Paste.Paste(path)
|
||||
content = paste.get_p_content()
|
||||
item = Item(path)
|
||||
content = item.get_content()
|
||||
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
|
||||
preview_array.append(content[0:content_range])
|
||||
curr_date = str(paste._get_p_date())
|
||||
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
|
||||
curr_date = item.get_date(separator=True)
|
||||
date_array.append(curr_date)
|
||||
size_array.append(paste._get_p_size())
|
||||
p_tags = r_serv_metadata.smembers('tag:'+path)
|
||||
size_array.append(item.get_size())
|
||||
p_tags = item.get_tags()
|
||||
l_tags = []
|
||||
for tag in p_tags:
|
||||
complete_tag = tag
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
'''
|
||||
Flask functions and routes for the trending modules page
|
||||
'''
|
||||
import redis
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
@ -15,17 +14,14 @@ from Role_Manager import login_admin, login_analyst, login_read_only, no_cache
|
|||
from flask_login import login_required
|
||||
|
||||
import difflib
|
||||
import ssdeep
|
||||
|
||||
import Paste
|
||||
import requests
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
|
||||
import Tag
|
||||
import Item
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import Domain
|
||||
|
||||
# ============ VARIABLES ============
|
||||
import Flask_config
|
||||
|
@ -57,209 +53,6 @@ def get_item_screenshot_path(item):
|
|||
screenshot = ''
|
||||
return screenshot
|
||||
|
||||
def showpaste(content_range, requested_path):
|
||||
if PASTES_FOLDER not in requested_path:
|
||||
# remove full path
|
||||
requested_path_full = os.path.join(requested_path, PASTES_FOLDER)
|
||||
else:
|
||||
requested_path_full = requested_path
|
||||
requested_path = requested_path.replace(PASTES_FOLDER, '', 1)
|
||||
|
||||
# escape directory transversal
|
||||
if os.path.commonprefix((requested_path_full,PASTES_FOLDER)) != PASTES_FOLDER:
|
||||
return 'path transversal detected'
|
||||
|
||||
vt_enabled = Flask_config.vt_enabled
|
||||
|
||||
try:
|
||||
paste = Paste.Paste(requested_path)
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
p_date = str(paste._get_p_date())
|
||||
p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4]
|
||||
p_source = paste.p_source
|
||||
p_encoding = paste._get_p_encoding()
|
||||
p_language = 'None'
|
||||
p_size = paste.p_size
|
||||
p_mime = paste.p_mime
|
||||
p_lineinfo = paste.get_lines_info()
|
||||
p_content = paste.get_p_content()
|
||||
p_duplicate_str_full_list = paste._get_p_duplicate()
|
||||
|
||||
p_duplicate_full_list = []
|
||||
p_duplicate_list = []
|
||||
p_simil_list = []
|
||||
p_date_list = []
|
||||
p_hashtype_list = []
|
||||
|
||||
for dup_list in p_duplicate_str_full_list:
|
||||
dup_list = dup_list[1:-1].replace('\'', '').replace(' ', '').split(',')
|
||||
if dup_list[0] == "tlsh":
|
||||
dup_list[2] = 100 - int(dup_list[2])
|
||||
else:
|
||||
dup_list[2] = int(dup_list[2])
|
||||
p_duplicate_full_list.append(dup_list)
|
||||
|
||||
#p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
|
||||
|
||||
# Combine multiple duplicate paste name and format for display
|
||||
new_dup_list = []
|
||||
dup_list_removed = []
|
||||
for dup_list_index in range(0, len(p_duplicate_full_list)):
|
||||
if dup_list_index in dup_list_removed:
|
||||
continue
|
||||
indices = [i for i, x in enumerate(p_duplicate_full_list) if x[1] == p_duplicate_full_list[dup_list_index][1]]
|
||||
hash_types = []
|
||||
comp_vals = []
|
||||
for i in indices:
|
||||
hash_types.append(p_duplicate_full_list[i][0])
|
||||
comp_vals.append(p_duplicate_full_list[i][2])
|
||||
dup_list_removed.append(i)
|
||||
|
||||
#hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
|
||||
#comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
|
||||
|
||||
if len(p_duplicate_full_list[dup_list_index]) > 3:
|
||||
try:
|
||||
date_paste = str(int(p_duplicate_full_list[dup_list_index][3]))
|
||||
date_paste = date_paste[0:4]+"-"+date_paste[4:6]+"-"+date_paste[6:8]
|
||||
except ValueError:
|
||||
date_paste = str(p_duplicate_full_list[dup_list_index][3])
|
||||
else:
|
||||
date_paste = "No date available"
|
||||
new_dup_list.append([hash_types, p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste])
|
||||
|
||||
# Create the list to pass to the webpage
|
||||
for dup_list in new_dup_list:
|
||||
hash_type, path, simil_percent, date_paste = dup_list
|
||||
p_duplicate_list.append(path)
|
||||
p_simil_list.append(simil_percent)
|
||||
p_hashtype_list.append(hash_type)
|
||||
p_date_list.append(date_paste)
|
||||
|
||||
if content_range != 0:
|
||||
p_content = p_content[0:content_range]
|
||||
|
||||
#active taxonomies
|
||||
active_taxonomies = r_serv_tags.smembers('active_taxonomies')
|
||||
|
||||
l_tags = r_serv_metadata.smembers('tag:'+requested_path)
|
||||
tags_safe = Tag.is_tags_safe(l_tags)
|
||||
|
||||
#active galaxies
|
||||
active_galaxies = r_serv_tags.smembers('active_galaxies')
|
||||
|
||||
list_tags = []
|
||||
|
||||
for tag in l_tags:
|
||||
if(tag[9:28] == 'automatic-detection'):
|
||||
automatic = True
|
||||
else:
|
||||
automatic = False
|
||||
|
||||
if r_serv_statistics.sismember('tp:'+tag, requested_path):
|
||||
tag_status_tp = True
|
||||
else:
|
||||
tag_status_tp = False
|
||||
if r_serv_statistics.sismember('fp:'+tag, requested_path):
|
||||
tag_status_fp = True
|
||||
else:
|
||||
tag_status_fp = False
|
||||
|
||||
list_tags.append( (tag, automatic, tag_status_tp, tag_status_fp) )
|
||||
|
||||
l_64 = []
|
||||
# load hash files
|
||||
if r_serv_metadata.scard('hash_paste:'+requested_path) > 0:
|
||||
set_b64 = r_serv_metadata.smembers('hash_paste:'+requested_path)
|
||||
for hash in set_b64:
|
||||
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
|
||||
# item list not updated
|
||||
if nb_in_file is None:
|
||||
l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1)
|
||||
for paste_name in l_pastes:
|
||||
# dynamic update
|
||||
if PASTES_FOLDER in paste_name:
|
||||
score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste_name)
|
||||
r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste_name)
|
||||
paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
|
||||
r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste_name)
|
||||
nb_in_file = r_serv_metadata.zscore('nb_seen_hash:'+hash, requested_path)
|
||||
nb_in_file = int(nb_in_file)
|
||||
estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type')
|
||||
file_type = estimated_type.split('/')[0]
|
||||
# set file icon
|
||||
if file_type == 'application':
|
||||
file_icon = 'fa-file-o '
|
||||
elif file_type == 'audio':
|
||||
file_icon = 'fa-file-video-o '
|
||||
elif file_type == 'image':
|
||||
file_icon = 'fa-file-image-o'
|
||||
elif file_type == 'text':
|
||||
file_icon = 'fa-file-text-o'
|
||||
else:
|
||||
file_icon = 'fa-file'
|
||||
saved_path = r_serv_metadata.hget('metadata_hash:'+hash, 'saved_path')
|
||||
if r_serv_metadata.hexists('metadata_hash:'+hash, 'vt_link'):
|
||||
b64_vt = True
|
||||
b64_vt_link = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_link')
|
||||
b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report')
|
||||
else:
|
||||
b64_vt = False
|
||||
b64_vt_link = ''
|
||||
b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report')
|
||||
# hash never refreshed
|
||||
if b64_vt_report is None:
|
||||
b64_vt_report = ''
|
||||
|
||||
l_64.append( (file_icon, estimated_type, hash, saved_path, nb_in_file, b64_vt, b64_vt_link, b64_vt_report) )
|
||||
|
||||
crawler_metadata = {}
|
||||
if 'infoleak:submission="crawler"' in l_tags:
|
||||
crawler_metadata['get_metadata'] = True
|
||||
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
|
||||
crawler_metadata['domain'] = crawler_metadata['domain'].rsplit(':', 1)[0]
|
||||
if tags_safe:
|
||||
tags_safe = Tag.is_tags_safe(Domain.get_domain_tags(crawler_metadata['domain']))
|
||||
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
|
||||
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
|
||||
crawler_metadata['screenshot'] = get_item_screenshot_path(requested_path)
|
||||
else:
|
||||
crawler_metadata['get_metadata'] = False
|
||||
|
||||
item_parent = Item.get_item_parent(requested_path)
|
||||
|
||||
if Flask_config.pymisp is False:
|
||||
misp = False
|
||||
else:
|
||||
misp = True
|
||||
|
||||
if Flask_config.HiveApi is False:
|
||||
hive = False
|
||||
else:
|
||||
hive = True
|
||||
|
||||
misp_event = r_serv_metadata.get('misp_events:' + requested_path)
|
||||
if misp_event is None:
|
||||
misp_eventid = False
|
||||
misp_url = ''
|
||||
else:
|
||||
misp_eventid = True
|
||||
misp_url = misp_event_url + misp_event
|
||||
|
||||
hive_case = r_serv_metadata.get('hive_cases:' + requested_path)
|
||||
if hive_case is None:
|
||||
hive_caseid = False
|
||||
hive_url = ''
|
||||
else:
|
||||
hive_caseid = True
|
||||
hive_url = hive_case_url.replace('id_here', hive_case)
|
||||
|
||||
return render_template("show_saved_paste.html", date=p_date, bootstrap_label=bootstrap_label, active_taxonomies=active_taxonomies, active_galaxies=active_galaxies, list_tags=list_tags, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list,
|
||||
crawler_metadata=crawler_metadata, tags_safe=tags_safe, item_parent=item_parent,
|
||||
l_64=l_64, vt_enabled=vt_enabled, misp=misp, hive=hive, misp_eventid=misp_eventid, misp_url=misp_url, hive_caseid=hive_caseid, hive_url=hive_url)
|
||||
|
||||
def get_item_basic_info(item):
|
||||
item_basic_info = {}
|
||||
item_basic_info['date'] = str(item.get_p_date())
|
||||
|
@ -286,7 +79,7 @@ def show_item_min(requested_path , content_range=0):
|
|||
else:
|
||||
relative_path = requested_path.replace(PASTES_FOLDER, '', 1)
|
||||
# remove old full path
|
||||
#requested_path = requested_path.replace(PASTES_FOLDER, '')
|
||||
# requested_path = requested_path.replace(PASTES_FOLDER, '')
|
||||
# escape directory transversal
|
||||
if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER:
|
||||
return 'path transversal detected'
|
||||
|
@ -370,7 +163,7 @@ def show_item_min(requested_path , content_range=0):
|
|||
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+relative_path, 'father')
|
||||
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+relative_path,'real_link')
|
||||
crawler_metadata['screenshot'] = get_item_screenshot_path(relative_path)
|
||||
#crawler_metadata['har_file'] = Item.get_item_har(relative_path)
|
||||
# crawler_metadata['har_file'] = Item.get_item_har(relative_path)
|
||||
else:
|
||||
crawler_metadata['get_metadata'] = False
|
||||
|
||||
|
@ -462,13 +255,6 @@ def showDiff():
|
|||
def screenshot(filename):
|
||||
return send_from_directory(SCREENSHOT_FOLDER, filename+'.png', as_attachment=True)
|
||||
|
||||
# @showsavedpastes.route('/har/paste/<path:filename>')
|
||||
# @login_required
|
||||
# @login_read_only
|
||||
# def har(filename):
|
||||
# har_file = Item.get_item_har(filename)
|
||||
# return jsonify(har_file)
|
||||
|
||||
@showsavedpastes.route('/send_file_to_vt/', methods=['POST'])
|
||||
@login_required
|
||||
@login_analyst
|
||||
|
|
|
@ -15,14 +15,11 @@ from flask import Flask, render_template, jsonify, request, Blueprint, url_for,
|
|||
|
||||
from Role_Manager import login_admin, login_analyst, login_user_no_api, login_read_only
|
||||
from flask_login import login_required, current_user
|
||||
|
||||
import re
|
||||
from pprint import pprint
|
||||
import Levenshtein
|
||||
|
||||
# ---------------------------------------------------------------
|
||||
|
||||
import Paste
|
||||
from lib.objects.Items import Item
|
||||
import Term
|
||||
|
||||
# ============ VARIABLES ============
|
||||
|
@ -268,15 +265,15 @@ def credentials_management_query_paste():
|
|||
paste_info = []
|
||||
for pathNum in allPath:
|
||||
path = r_serv_cred.hget(REDIS_KEY_ALL_PATH_SET_REV, pathNum)
|
||||
paste = Paste.Paste(path)
|
||||
p_date = str(paste._get_p_date())
|
||||
p_date = p_date[0:4]+'/'+p_date[4:6]+'/'+p_date[6:8]
|
||||
p_source = paste.p_source
|
||||
p_encoding = paste._get_p_encoding()
|
||||
p_size = paste.p_size
|
||||
p_mime = paste.p_mime
|
||||
p_lineinfo = paste.get_lines_info()
|
||||
p_content = paste.get_p_content()
|
||||
item = Item(path)
|
||||
p_date = item.get_date(separator=True)
|
||||
p_source = item.get_source()
|
||||
p_content = item.get_content()
|
||||
p_encoding = item.get_mimetype()
|
||||
p_size = item.get_size()
|
||||
p_mime = p_encoding
|
||||
lineinfo = item.get_meta_lines(content=p_content)
|
||||
p_lineinfo = lineinfo['nb'], lineinfo['max_length']
|
||||
if p_content != 0:
|
||||
p_content = p_content[0:400]
|
||||
paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})
|
||||
|
|
|
@ -51,7 +51,7 @@
|
|||
<th>Total sent</th>
|
||||
<th>Balance</th>
|
||||
<th>Inputs address seen in AIL</th>
|
||||
<th>Ouputs address seen in AIL</th>
|
||||
<th>Outputs address seen in AIL</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
|
@ -62,12 +62,12 @@
|
|||
<td>{{ dict_object["metadata_card"]["related_btc"]["final_balance"] }}</td>
|
||||
<td>
|
||||
{% for btc_addr in dict_object["metadata_card"]["related_btc"]["btc_in"] %}
|
||||
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&type_id=bitcoin&correlation_id={{ btc_addr }}&correlation_objects=paste">{{ btc_addr }}</a>
|
||||
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype=bitcoin&id={{ btc_addr }}">{{ btc_addr }}</a>
|
||||
{% endfor %}
|
||||
</td>
|
||||
<td>
|
||||
{% for btc_addr in dict_object["metadata_card"]["related_btc"]["btc_out"] %}
|
||||
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&type_id=bitcoin&correlation_id={{ btc_addr }}&correlation_objects=paste">{{ btc_addr }}</a>
|
||||
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype=bitcoin&id={{ btc_addr }}">{{ btc_addr }}</a>
|
||||
{% endfor %}
|
||||
</td>
|
||||
</tr>
|
||||
|
@ -75,7 +75,7 @@
|
|||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<a class="btn btn-secondary" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&type_id=bitcoin&correlation_id={{ dict_object['correlation_id'] }}&expand_card=True&correlation_objects=paste">Expand Bitcoin address</a>
|
||||
<a class="btn btn-secondary" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype=bitcoin&id={{ dict_object['correlation_id'] }}&related_btc=True">Expand Bitcoin address</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
|
|
172
var/www/templates/correlation/metadata_card_cve.html
Normal file
172
var/www/templates/correlation/metadata_card_cve.html
Normal file
|
@ -0,0 +1,172 @@
|
|||
<link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
|
||||
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
|
||||
|
||||
{#{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%}#}
|
||||
{# {% include 'modals/add_tags.html' %}#}
|
||||
{#{% endwith %}#}
|
||||
|
||||
{% include 'modals/edit_tag.html' %}
|
||||
|
||||
<div class="card my-3">
|
||||
<div class="card-header" style="background-color:#d9edf7;font-size: 15px">
|
||||
<h4 class="text-secondary">{{ dict_object["correlation_id"] }} :</h4>
|
||||
<ul class="list-group mb-2">
|
||||
<li class="list-group-item py-0">
|
||||
<div class="row">
|
||||
<div class="col-md-10">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Object type</th>
|
||||
<th>First seen</th>
|
||||
<th>Last seen</th>
|
||||
<th>Nb seen</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>{{ dict_object["object_type"] }}</td>
|
||||
<td>
|
||||
<svg height="26" width="26">
|
||||
<g class="nodes">
|
||||
<circle cx="13" cy="13" r="13" fill="orange"></circle>
|
||||
<text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ dict_object["metadata_card"]["icon"]["icon_class"] }}" font-size="16px">{{ dict_object["metadata_card"]["icon"]["icon_text"] }}</text>
|
||||
</g>
|
||||
</svg>
|
||||
</td>
|
||||
<td>{{ dict_object["metadata"]['first_seen'] }}</td>
|
||||
<td>{{ dict_object["metadata"]['last_seen'] }}</td>
|
||||
<td>{{ dict_object["metadata"]['nb_seen'] }}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<div class="col-md-1">
|
||||
<div id="sparkline"></div>
|
||||
</div>
|
||||
</div>
|
||||
</li>
|
||||
{# <li class="list-group-item py-0">#}
|
||||
{# <br>#}
|
||||
{# <div class="mb-3">#}
|
||||
{# Tags:#}
|
||||
{# {% for tag in dict_object["metadata"]['tags'] %}#}
|
||||
{# <button class="btn btn-{{ bootstrap_label[loop.index0 % 5] }}" data-toggle="modal" data-target="#edit_tags_modal"#}
|
||||
{# data-tagid="{{ tag }}" data-objtype="decoded" data-objid="{{ dict_object["correlation_id"] }}">#}
|
||||
{# {{ tag }}#}
|
||||
{# </button>#}
|
||||
{# {% endfor %}#}
|
||||
{# <button type="button" class="btn btn-light" data-toggle="modal" data-target="#add_tags_modal">#}
|
||||
{# <i class="far fa-plus-square"></i>#}
|
||||
{# </button>#}
|
||||
{# </div>#}
|
||||
{# </li>#}
|
||||
</ul>
|
||||
|
||||
|
||||
{% with obj_type='decoded', obj_id=dict_object['correlation_id'], obj_subtype='' %}
|
||||
{% include 'modals/investigations_register_obj.html' %}
|
||||
{% endwith %}
|
||||
<button type="button" class="btn btn-primary" data-toggle="modal" data-target="#investigations_register_obj_modal">
|
||||
<i class="fas fa-microscope"></i> Investigations
|
||||
</button>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="{{ url_for('static', filename='js/d3/sparklines.js')}}"></script>
|
||||
<script>
|
||||
sparkline("sparkline", {{ dict_object["metadata_card"]["sparkline"] }}, {});
|
||||
</script>
|
||||
|
||||
|
||||
<script>
|
||||
function create_line_chart(id, url){
|
||||
|
||||
var width = 900;
|
||||
var height = Math.round(width / 4);
|
||||
|
||||
var margin = {top: 20, right: 55, bottom: 50, left: 40};
|
||||
|
||||
var x = d3.scaleTime().range([0, width]);
|
||||
var y = d3.scaleLinear().rangeRound([height, 0]);
|
||||
|
||||
var xAxis = d3.axisBottom(x);
|
||||
var yAxis = d3.axisLeft(y);
|
||||
|
||||
var parseTime = d3.timeParse("%Y-%m-%d");
|
||||
|
||||
var line = d3.line()
|
||||
.x(function(d) {
|
||||
return x(d.date);
|
||||
}).y(function(d) {
|
||||
return y(d.value);
|
||||
});
|
||||
|
||||
var svg_line = d3.select('#'+id).append('svg')
|
||||
.attr("id", "graph_div")
|
||||
.attr("width", width + margin.left + margin.right)
|
||||
.attr("height", height + margin.top + margin.bottom)
|
||||
.append('g')
|
||||
.attr('transform', "translate("+ margin.left +","+ margin.top +")");
|
||||
|
||||
var div = d3.select('body').append('div')
|
||||
.attr('class', 'tooltip')
|
||||
.style('opacity', 0);
|
||||
|
||||
//add div tooltip
|
||||
|
||||
d3.json(url)
|
||||
.then(function(data){
|
||||
|
||||
data.forEach(function(d) {
|
||||
d.date_label = d.date;
|
||||
d.date = parseTime(d.date);
|
||||
d.value = +d.value;
|
||||
});
|
||||
|
||||
// fit the data
|
||||
x.domain(d3.extent(data, function(d) { return d.date; }));
|
||||
//x.domain(data.map(function (d) { return d.date; })); //E
|
||||
y.domain([0, d3.max(data, function(d){ return d.value ; })]);
|
||||
|
||||
//line
|
||||
svg_line.append("path")
|
||||
.data([data])
|
||||
.attr("class", "line_graph")
|
||||
.attr("d", line);
|
||||
|
||||
// add X axis
|
||||
svg_line.append("g")
|
||||
.attr("transform", "translate(0," + height + ")")
|
||||
.call(d3.axisBottom(x))
|
||||
.selectAll("text")
|
||||
.style("text-anchor", "end")
|
||||
.attr("transform", "rotate(-45)" );
|
||||
|
||||
// Add the Y Axis
|
||||
svg_line.append("g")
|
||||
.call(d3.axisLeft(y));
|
||||
|
||||
//add a dot circle
|
||||
svg_line.selectAll('dot')
|
||||
.data(data).enter()
|
||||
.append('circle')
|
||||
.attr('r', 2)
|
||||
.attr('cx', function(d) { return x(d.date); })
|
||||
.attr('cy', function(d) { return y(d.value); })
|
||||
|
||||
.on('mouseover', function(d) {
|
||||
div.transition().style('opacity', .9);
|
||||
div.html('' + d.date_label+ '<br/>' + d.value).style('left', (d3.event.pageX) + 'px')
|
||||
.style("left", (d3.event.pageX) + "px")
|
||||
.style("top", (d3.event.pageY - 28) + "px");
|
||||
})
|
||||
.on('mouseout', function(d)
|
||||
{
|
||||
div.transition().style('opacity', 0);
|
||||
});
|
||||
|
||||
});
|
||||
}
|
||||
</script>
|
|
@ -39,7 +39,7 @@
|
|||
</div>
|
||||
<div class="mb-2 float-right">
|
||||
<a href="{{ url_for('objects_item.showItem')}}?id={{ dict_object["correlation_id"] }}" target="_blank" style="font-size: 15px">
|
||||
<button class="btn btn-info"><i class="fas fa-search"></i> Show Paste
|
||||
<button class="btn btn-info"><i class="fas fa-search"></i> Show Item
|
||||
</button>
|
||||
</a>
|
||||
</div>
|
|
@ -99,12 +99,14 @@
|
|||
{% include 'correlation/metadata_card_username.html' %}
|
||||
{% elif dict_object["object_type"] == "decoded" %}
|
||||
{% include 'correlation/metadata_card_decoded.html' %}
|
||||
{% elif dict_object["object_type"] == "cve" %}
|
||||
{% include 'correlation/metadata_card_cve.html' %}
|
||||
{% elif dict_object["object_type"] == "domain" %}
|
||||
{% include 'correlation/metadata_card_domain.html' %}
|
||||
{% elif dict_object["object_type"] == "screenshot" %}
|
||||
{% include 'correlation/metadata_card_screenshot.html' %}
|
||||
{% elif dict_object["object_type"] == "paste" %}
|
||||
{% include 'correlation/metadata_card_paste.html' %}
|
||||
{% elif dict_object["object_type"] == "item" %}
|
||||
{% include 'correlation/metadata_card_item.html' %}
|
||||
{% endif %}
|
||||
|
||||
<div class="row">
|
||||
|
@ -146,41 +148,46 @@
|
|||
<div class="card-body text-center px-0 py-0">
|
||||
|
||||
<ul class="list-group">
|
||||
<li class="list-group-item list-group-item-info">Select Correlation</i></li>
|
||||
<li class="list-group-item list-group-item-info">Select Correlation</li>
|
||||
<form action="{{ url_for('correlation.show_correlation') }}" method="post">
|
||||
<li class="list-group-item text-left">
|
||||
|
||||
<form action="{{ url_for('correlation.show_correlation') }}" method="post">
|
||||
<input type="hidden" id="object_type" name="object_type" value="{{ dict_object["object_type"] }}">
|
||||
<input type="hidden" id="type_id" name="type_id" value="{{ dict_object["metadata"]["type_id"] }}">
|
||||
<input type="hidden" id="correlation_id" name="correlation_id" value="{{ dict_object["correlation_id"] }}">
|
||||
<input type="hidden" id="obj_type" name="obj_type" value="{{ dict_object["object_type"] }}">
|
||||
<input type="hidden" id="subtype" name="subtype" value="{{ dict_object["metadata"]["type_id"] }}">
|
||||
<input type="hidden" id="obj_id" name="obj_id" value="{{ dict_object["correlation_id"] }}">
|
||||
|
||||
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" value="True" id="CryptocurrencyCheck" name="CryptocurrencyCheck" {%if "cryptocurrency" in dict_object["correlation_names"]%}checked{%endif%}>
|
||||
<input class="form-check-input" type="checkbox" value="True" id="CveCheck" name="CveCheck" {%if "cve" in dict_object["filter"]%}checked{%endif%}>
|
||||
<label class="form-check-label" for="CveCheck">Cve</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" value="True" id="CryptocurrencyCheck" name="CryptocurrencyCheck" {%if "cryptocurrency" in dict_object["filter"]%}checked{%endif%}>
|
||||
<label class="form-check-label" for="CryptocurrencyCheck">Cryptocurrency</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" value="True" id="DecodedCheck" name="DecodedCheck" {%if "decoded" in dict_object["correlation_names"]%}checked{%endif%}>
|
||||
<input class="form-check-input" type="checkbox" value="True" id="DecodedCheck" name="DecodedCheck" {%if "decoded" in dict_object["filter"]%}checked{%endif%}>
|
||||
<label class="form-check-label" for="DecodedCheck">Decoded</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" value="True" id="ScreenshotCheck" name="ScreenshotCheck" {%if "screenshot" in dict_object["correlation_names"]%}checked{%endif%}>
|
||||
<input class="form-check-input" type="checkbox" value="True" id="ScreenshotCheck" name="ScreenshotCheck" {%if "screenshot" in dict_object["filter"]%}checked{%endif%}>
|
||||
<label class="form-check-label" for="ScreenshotCheck">Screenshot</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" value="True" id="PgpCheck" name="PgpCheck" {%if "pgp" in dict_object["correlation_names"]%}checked{%endif%}>
|
||||
<input class="form-check-input" type="checkbox" value="True" id="PgpCheck" name="PgpCheck" {%if "pgp" in dict_object["filter"]%}checked{%endif%}>
|
||||
<label class="form-check-label" for="PgpCheck">PGP</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" value="True" id="UsernameCheck" name="UsernameCheck" {%if "username" in dict_object["correlation_names"]%}checked{%endif%}>
|
||||
<input class="form-check-input" type="checkbox" value="True" id="UsernameCheck" name="UsernameCheck" {%if "username" in dict_object["filter"]%}checked{%endif%}>
|
||||
<label class="form-check-label" for="UsernameCheck">Username</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" value="True" id="DomainCheck" name="DomainCheck" {%if "domain" in dict_object["correlation_objects"]%}checked{%endif%}>
|
||||
<input class="form-check-input" type="checkbox" value="True" id="DomainCheck" name="DomainCheck" {%if "domain" in dict_object["filter"]%}checked{%endif%}>
|
||||
<label class="form-check-label" for="DomainCheck">Domain</label>
|
||||
</div>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" value="True" id="PasteCheck" name="PasteCheck" {%if "item" in dict_object["correlation_objects"]%}checked{%endif%}>
|
||||
<label class="form-check-label" for="PasteCheck">Item</label>
|
||||
<input class="form-check-input" type="checkbox" value="True" id="ItemCheck" name="ItemCheck" {%if "item" in dict_object["filter"]%}checked{%endif%}>
|
||||
<label class="form-check-label" for="ItemCheck">Item</label>
|
||||
</div>
|
||||
|
||||
</li>
|
||||
|
@ -204,9 +211,9 @@
|
|||
<input class="btn btn-primary" type="submit" value="Redraw Graph">
|
||||
</div>
|
||||
|
||||
</form>
|
||||
|
||||
</li>
|
||||
</form>
|
||||
</ul>
|
||||
|
||||
<ul class="list-group">
|
||||
|
@ -236,7 +243,7 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
{% if dict_object["object_type"] in ["decoded", "pgp", "cryptocurrency"] %}
|
||||
{% if dict_object["object_type"] in ["cve", "decoded", "pgp", "cryptocurrency"] %}
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<i class="fas fa-chart-bar"></i> Graph
|
||||
|
@ -257,13 +264,15 @@ var all_graph = {};
|
|||
$(document).ready(function(){
|
||||
$("#page-Decoded").addClass("active");
|
||||
|
||||
all_graph.node_graph = create_graph("{{ url_for('correlation.graph_node_json') }}?correlation_id={{ dict_object["correlation_id"] }}&object_type={{ dict_object["object_type"] }}&mode={{ dict_object["mode"] }}&correlation_names={{ dict_object["correlation_names_str"] }}&correlation_objects={{ dict_object["correlation_objects_str"] }}&max_nodes={{dict_object["max_nodes"]}}{% if 'type_id' in dict_object["metadata"] %}&type_id={{ dict_object["metadata"]["type_id"] }}{% endif %}");
|
||||
all_graph.node_graph = create_graph("{{ url_for('correlation.graph_node_json') }}?id={{ dict_object["correlation_id"] }}&type={{ dict_object["object_type"] }}&mode={{ dict_object["mode"] }}&filter={{ dict_object["filter_str"] }}&max_nodes={{dict_object["max_nodes"]}}{% if 'type_id' in dict_object["metadata"] %}&subtype={{ dict_object["metadata"]["type_id"] }}{% endif %}");
|
||||
{% if dict_object["object_type"] == "pgp" %}
|
||||
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.pgpdump_graph_line_json') }}?type_id={{dict_object["metadata"]["type_id"]}}&key_id={{dict_object["correlation_id"]}}");
|
||||
{% elif dict_object["object_type"] == "cryptocurrency" %}
|
||||
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.cryptocurrency_graph_line_json') }}?type_id={{dict_object["metadata"]["type_id"]}}&key_id={{dict_object["correlation_id"]}}");
|
||||
{% elif dict_object["object_type"] == "decoded" %}
|
||||
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.hash_graph_line_json') }}?hash={{dict_object["correlation_id"]}}");
|
||||
{% elif dict_object["object_type"] == "cve" %}
|
||||
all_graph.line_chart = create_line_chart('graph_line', "{{ url_for('hashDecoded.hash_graph_line_json') }}?hash={{dict_object["correlation_id"]}}");
|
||||
{% endif %}
|
||||
all_graph.onResize();
|
||||
});
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
<h5 class="card-title">Crawl a Domain</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p class="card-text">Enter a domain and choose what kind of data you want.</p>
|
||||
<p class="card-text">Enter an url or a domain and choose what kind of option you want.</p>
|
||||
<form action="{{ url_for('crawler_splash.send_to_spider') }}" method='post'>
|
||||
<div class="row">
|
||||
<div class="col-12 col-lg-6">
|
||||
|
@ -49,22 +49,22 @@
|
|||
<div class="custom-control custom-switch">
|
||||
<input class="custom-control-input" type="checkbox" name="queue_type_selector" value="True" id="queue_type_selector">
|
||||
<label class="custom-control-label" for="queue_type_selector">
|
||||
<i class="fas fa-splotch"></i> Splash Name
|
||||
<i class="fas fa-splotch"></i> Proxy
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
<div id="div_crawler_queue_type">
|
||||
<select class="custom-select form-control" name="crawler_queue_type" id="crawler_queue_type">
|
||||
{%for crawler_type in all_crawlers_types%}
|
||||
{%for crawler_type in crawlers_types%}
|
||||
<option value="{{crawler_type}}" {%if crawler_type=='tor'%}selected{%endif%}>{{crawler_type}}</option>
|
||||
{%endfor%}
|
||||
</select>
|
||||
</div>
|
||||
<div id="div_splash_name">
|
||||
<select class="custom-select form-control" name="splash_name" id="splash_name">
|
||||
<option value="None" selected>Don't use a special splash crawler</option>
|
||||
{%for splash_name in all_splash_name%}
|
||||
<option value="{{splash_name}}">{{splash_name}}</option>
|
||||
<div id="div_proxy_name">
|
||||
<select class="custom-select form-control" name="proxy_name" id="proxy_name">
|
||||
<option value="None" selected>Use a proxy</option>
|
||||
{%for proxy in proxies%}
|
||||
<option value="{{proxy}}">{{proxy}}</option>
|
||||
{%endfor%}
|
||||
</select>
|
||||
</div>
|
||||
|
@ -122,15 +122,16 @@
|
|||
<span class="input-group-text">Depth Limit</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="input-group mt-2">
|
||||
<div class="input-group-prepend">
|
||||
<span class="input-group-text bg-light"><i class="fas fa-copy"></i> </span>
|
||||
</div>
|
||||
<input class="form-control" type="number" id="max_pages" name="max_pages" min="1" value="1" required>
|
||||
<div class="input-group-append">
|
||||
<span class="input-group-text">Max Pages</span>
|
||||
</div>
|
||||
</div>
|
||||
{# TEMPORARY DISABLED #}
|
||||
{# <div class="input-group mt-2">#}
|
||||
{# <div class="input-group-prepend">#}
|
||||
{# <span class="input-group-text bg-light"><i class="fas fa-copy"></i> </span>#}
|
||||
{# </div>#}
|
||||
{# <input class="form-control" type="number" id="max_pages" name="max_pages" min="1" value="1" required>#}
|
||||
{# <div class="input-group-append">#}
|
||||
{# <span class="input-group-text">Max Pages</span>#}
|
||||
{# </div>#}
|
||||
{# </div>#}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
@ -204,10 +205,10 @@ function manual_crawler_input_controler() {
|
|||
function queue_type_selector_input_controler() {
|
||||
if($('#queue_type_selector').is(':checked')){
|
||||
$("#div_crawler_queue_type").hide();
|
||||
$("#div_splash_name").show();
|
||||
$("#div_proxy_name").show();
|
||||
}else{
|
||||
$("#div_crawler_queue_type").show();
|
||||
$("#div_splash_name").hide();
|
||||
$("#div_proxy_name").hide();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,10 +7,13 @@
|
|||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
|
||||
<script src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
|
||||
|
||||
|
||||
</head>
|
||||
|
@ -33,15 +36,15 @@
|
|||
|
||||
<div class="card mt-1 mb-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<h5><a class="text-info" href="{{ url_for('hiddenServices.Crawler_Splash_last_by_type')}}?type=onion"><i class="fas fa-user-secret"></i> Onions Crawlers</a></h5>
|
||||
<h5><a class="text-info" href="{{ url_for('crawler_splash.crawlers_last_domains')}}?type=onion"><i class="fas fa-user-secret"></i> Onions Crawlers</a></h5>
|
||||
<div class="row">
|
||||
<div class="col-6">
|
||||
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=onion&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_domain_up">{{ splash_crawlers_latest_stats['onion']['domains_up'] }}</a> UP
|
||||
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=onion&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_onion_domain_down">{{ splash_crawlers_latest_stats['onion']['domains_down'] }}</a> DOWN
|
||||
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=onion&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_domain_up">{{ crawlers_latest_stats['onion']['up'] }}</a> UP
|
||||
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=onion&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_onion_domain_down">{{ crawlers_latest_stats['onion']['down'] }}</a> DOWN
|
||||
</div>
|
||||
<div class="col-6">
|
||||
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=onion&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_total">{{ splash_crawlers_latest_stats['onion']['total'] }}</a> Crawled
|
||||
<span class="badge badge-warning ml-md-3" id="stat_onion_queue">{{ splash_crawlers_latest_stats['onion']['domains_queue'] }}</span> Queue
|
||||
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=onion&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_onion_total">{{ crawlers_latest_stats['onion']['crawled'] }}</a> Crawled
|
||||
<span class="badge badge-warning ml-md-3" id="stat_onion_queue">{{ crawlers_latest_stats['onion']['queue'] }}</span> Queue
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -51,15 +54,15 @@
|
|||
<div class="col-xl-6">
|
||||
<div class="card mt-1 mb-1">
|
||||
<div class="card-header text-white bg-dark">
|
||||
<h5><a class="text-info" href="{{ url_for('hiddenServices.Crawler_Splash_last_by_type')}}?type=regular"><i class="fab fa-html5"></i> Regular Crawlers</a></h5>
|
||||
<h5><a class="text-info" href="{{ url_for('crawler_splash.crawlers_last_domains')}}?type=web"><i class="fab fa-html5"></i> Web Crawlers</a></h5>
|
||||
<div class="row">
|
||||
<div class="col-6">
|
||||
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=regular&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_regular_domain_up">{{ splash_crawlers_latest_stats['regular']['domains_up'] }}</a> UP
|
||||
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=regular&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_regular_domain_down">{{ splash_crawlers_latest_stats['regular']['domains_down'] }}</a> DOWN
|
||||
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=web&domains_up=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_web_domain_up">{{ crawlers_latest_stats['web']['up'] }}</a> UP
|
||||
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=web&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-danger ml-md-3" id="stat_web_domain_down">{{ crawlers_latest_stats['web']['down'] }}</a> DOWN
|
||||
</div>
|
||||
<div class="col-6">
|
||||
<a href="{{ url_for('hiddenServices.show_domains_by_daterange') }}?service_type=regular&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_regular_total">{{ splash_crawlers_latest_stats['regular']['total'] }}</a> Crawled
|
||||
<span class="badge badge-warning ml-md-3" id="stat_regular_queue">{{ splash_crawlers_latest_stats['regular']['domains_queue'] }}</span> Queue
|
||||
<a href="{{ url_for('crawler_splash.domains_search_date') }}?service_type=web&domains_up=True&domains_down=True&date_from={{date}}&date_to={{date}}" class="badge badge-success" id="stat_web_total">{{ crawlers_latest_stats['web']['crawled'] }}</a> Crawled
|
||||
<span class="badge badge-warning ml-md-3" id="stat_web_queue">{{ crawlers_latest_stats['web']['queue'] }}</span> Queue
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -69,23 +72,23 @@
|
|||
|
||||
<table class="table">
|
||||
<tbody id="tbody_crawler_onion_info">
|
||||
{% for splash_crawler in all_splash_crawler_status %}
|
||||
{% for crawler in crawlers_status %}
|
||||
<tr>
|
||||
<td>
|
||||
<i class="fas fa-{%if splash_crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if splash_crawler['status']%}Green{%else%}Red{%endif%};"></i> {{splash_crawler['crawler_info']}}
|
||||
<i class="fas fa-{%if crawler['status']%}check{%else%}times{%endif%}-circle" style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};"></i> {{crawler['start_time']}}
|
||||
</td>
|
||||
<td>
|
||||
{%if splash_crawler['type']=='onion'%}
|
||||
{%if crawler['type']=='onion'%}
|
||||
<i class="fas fa-user-secret"></i>
|
||||
{%else%}
|
||||
<i class="fab fa-html5">
|
||||
{%endif%}
|
||||
</td>
|
||||
<td>
|
||||
{{splash_crawler['crawling_domain']}}
|
||||
{{crawler['domain']}}
|
||||
</td>
|
||||
<td style="color:{%if splash_crawler['status']%}Green{%else%}Red{%endif%};">
|
||||
{{splash_crawler['status_info']}}
|
||||
<td style="color:{%if crawler['status']%}Green{%else%}Red{%endif%};">
|
||||
{{crawler['status']}}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
|
@ -93,6 +96,9 @@
|
|||
</table>
|
||||
|
||||
{% include 'domains/block_domains_name_search.html' %}
|
||||
<div class="d-flex justify-content-center my-4">
|
||||
{% include 'crawler/show_domains_by_daterange.html' %}
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
<div class="row mb-3">
|
||||
|
@ -134,6 +140,32 @@ var to_refresh = false
|
|||
$(document).ready(function(){
|
||||
$("#page-Crawler").addClass("active");
|
||||
$("#nav_dashboard").addClass("active");
|
||||
$('#date-range-from').dateRangePicker({
|
||||
separator : ' to ',
|
||||
getValue: function(){
|
||||
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
|
||||
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
|
||||
else
|
||||
return '';
|
||||
},
|
||||
setValue: function(s,s1,s2){
|
||||
$('#date-range-from-input').val(s1);
|
||||
$('#date-range-to-input').val(s2);
|
||||
}
|
||||
});
|
||||
$('#date-range-to').dateRangePicker({
|
||||
separator : ' to ',
|
||||
getValue: function(){
|
||||
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
|
||||
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
|
||||
else
|
||||
return '';
|
||||
},
|
||||
setValue: function(s,s1,s2){
|
||||
$('#date-range-from-input').val(s1);
|
||||
$('#date-range-to-input').val(s2);
|
||||
}
|
||||
});
|
||||
$( window ).on("focus", function() {
|
||||
to_refresh = true
|
||||
refresh_crawler_status();
|
||||
|
@ -144,6 +176,7 @@ $(document).ready(function(){
|
|||
|
||||
to_refresh = true
|
||||
refresh_crawler_status();
|
||||
|
||||
});
|
||||
|
||||
function toggle_sidebar(){
|
||||
|
@ -165,21 +198,21 @@ function refresh_crawler_status(){
|
|||
$.getJSON("{{ url_for('crawler_splash.crawler_dashboard_json') }}",
|
||||
function(data) {
|
||||
|
||||
$('#stat_onion_domain_up').text(data.splash_crawlers_latest_stats['onion']['domains_up']);
|
||||
$('#stat_onion_domain_down').text(data.splash_crawlers_latest_stats['onion']['domains_down']);
|
||||
$('#stat_onion_total').text(data.splash_crawlers_latest_stats['onion']['total']);
|
||||
$('#stat_onion_queue').text(data.splash_crawlers_latest_stats['onion']['domains_queue']);
|
||||
$('#stat_onion_domain_up').text(data.stats['onion']['up']);
|
||||
$('#stat_onion_domain_down').text(data.stats['onion']['down']);
|
||||
$('#stat_onion_total').text(data.stats['onion']['crawled']);
|
||||
$('#stat_onion_queue').text(data.stats['onion']['queue']);
|
||||
|
||||
$('#stat_regular_domain_up').text(data.splash_crawlers_latest_stats['regular']['domains_up']);
|
||||
$('#stat_regular_domain_down').text(data.splash_crawlers_latest_stats['regular']['domains_down']);
|
||||
$('#stat_regular_total').text(data.splash_crawlers_latest_stats['regular']['total']);
|
||||
$('#stat_regular_queue').text(data.splash_crawlers_latest_stats['regular']['domains_queue']);
|
||||
$('#stat_web_domain_up').text(data.stats['web']['up']);
|
||||
$('#stat_web_domain_down').text(data.stats['web']['down']);
|
||||
$('#stat_web_total').text(data.stats['web']['crawled']);
|
||||
$('#stat_web_queue').text(data.stats['web']['queue']);
|
||||
|
||||
if(data.all_splash_crawler_status.length!=0){
|
||||
if(data.crawlers_status.length!=0){
|
||||
$("#tbody_crawler_onion_info").empty();
|
||||
var tableRef = document.getElementById('tbody_crawler_onion_info');
|
||||
for (var i = 0; i < data.all_splash_crawler_status.length; i++) {
|
||||
var crawler = data.all_splash_crawler_status[i];
|
||||
for (var i = 0; i < data.crawlers_status.length; i++) {
|
||||
var crawler = data.crawlers_status[i];
|
||||
var newRow = tableRef.insertRow(tableRef.rows.length);
|
||||
var text_color;
|
||||
var icon;
|
||||
|
@ -198,16 +231,16 @@ function refresh_crawler_status(){
|
|||
}
|
||||
|
||||
var newCell = newRow.insertCell(0);
|
||||
newCell.innerHTML = "<td><i class=\"fas fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i> "+crawler['crawler_info']+"</td>";
|
||||
newCell.innerHTML = "<td><i class=\"fas fa-"+icon+"-circle\" style=\"color:"+text_color+";\"></i> "+crawler['start_time']+"</td>";
|
||||
|
||||
var newCell = newRow.insertCell(1);
|
||||
newCell.innerHTML = "<td><i class=\""+icon_t+"\"></i></td>";
|
||||
|
||||
newCell = newRow.insertCell(2);
|
||||
newCell.innerHTML = "<td>"+crawler['crawling_domain']+"</td>";
|
||||
newCell.innerHTML = "<td>"+crawler['domain']+"</td>";
|
||||
|
||||
newCell = newRow.insertCell(3);
|
||||
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status_info']+"</div></td>";
|
||||
newCell.innerHTML = "<td><div style=\"color:"+text_color+";\">"+crawler['status']+"</div></td>";
|
||||
|
||||
//$("#panel_crawler").show();
|
||||
}
|
154
var/www/templates/crawler/crawler_splash/domains_daterange.html
Normal file
154
var/www/templates/crawler/crawler_splash/domains_daterange.html
Normal file
|
@ -0,0 +1,154 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>AIL-Framework</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
|
||||
<script src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
{% include 'crawler/show_domains_by_daterange.html' %}
|
||||
|
||||
|
||||
{% for date in dict_domains %}
|
||||
<div class="card text-center mt-1 mb-3">
|
||||
<div class="card-header bg-dark text-white">
|
||||
<h3 style="text-align:center;">{{'{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])}}</h3>
|
||||
</div>
|
||||
<div class="card-body px-1">
|
||||
|
||||
<table id="table_{{date}}" class="table table-striped table-bordered">
|
||||
<thead class="bg-dark text-white">
|
||||
<tr>
|
||||
<th>Domain</th>
|
||||
<th>First Seen</th>
|
||||
<th>Last Check</th>
|
||||
<th>Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for dict_domain in dict_domains[date] %}
|
||||
<tr>
|
||||
<td>
|
||||
<a target="_blank" href="{{ url_for('crawler_splash.showDomain') }}?domain={{ dict_domain['domain'] }}">{{ dict_domain['domain'] }}</a>
|
||||
<div>
|
||||
{% for tag in dict_domain['tags'] %}
|
||||
<a href="{{ url_for('tags_ui.get_obj_by_tags') }}?object_type=domain<ags={{ tag }}">
|
||||
<span class="badge badge-{{ bootstrap_label[loop.index0 % 5] }}">{{ tag }}</span>
|
||||
</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</td>
|
||||
<td>{{dict_domain['first_seen']}}</td>
|
||||
<td>{{dict_domain['last_check']}}</td>
|
||||
<td>
|
||||
{% if dict_domain['status'] %}
|
||||
<div style="color:Green; display:inline-block">
|
||||
<i class="fas fa-check-circle"></i> UP
|
||||
</div>
|
||||
{% else %}
|
||||
<div style="color:Red; display:inline-block">
|
||||
<i class="fas fa-times-circle"></i> DOWN
|
||||
</div>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endfor %}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$("#page-Crawler").addClass("active");
|
||||
|
||||
$('#date-range-from').dateRangePicker({
|
||||
separator : ' to ',
|
||||
getValue: function(){
|
||||
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
|
||||
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
|
||||
else
|
||||
return '';
|
||||
},
|
||||
setValue: function(s,s1,s2){
|
||||
$('#date-range-from-input').val(s1);
|
||||
$('#date-range-to-input').val(s2);
|
||||
}
|
||||
});
|
||||
$('#date-range-to').dateRangePicker({
|
||||
separator : ' to ',
|
||||
getValue: function(){
|
||||
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
|
||||
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
|
||||
else
|
||||
return '';
|
||||
},
|
||||
setValue: function(s,s1,s2){
|
||||
$('#date-range-from-input').val(s1);
|
||||
$('#date-range-to-input').val(s2);
|
||||
}
|
||||
});
|
||||
|
||||
{% for date in dict_domains %}
|
||||
$('#table_{{date}}').DataTable({
|
||||
"aLengthMenu": [[5, 15, 30, -1], [5, 15, 30, "All"]],
|
||||
"iDisplayLength": 15,
|
||||
"order": [[ 0, "asc" ]]
|
||||
});
|
||||
{% endfor %}
|
||||
|
||||
});
|
||||
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
</html>
|
338
var/www/templates/crawler/crawler_splash/last_crawled.html
Normal file
338
var/www/templates/crawler/crawler_splash/last_crawled.html
Normal file
|
@ -0,0 +1,338 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL-Framework</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
<script language="javascript" src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
|
||||
<script language="javascript" src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
|
||||
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
|
||||
|
||||
<style>
|
||||
.bar {
|
||||
fill: steelblue;
|
||||
}
|
||||
.bar:hover{
|
||||
fill: brown;
|
||||
cursor: pointer;
|
||||
}
|
||||
.bar_stack:hover{
|
||||
cursor: pointer;
|
||||
}
|
||||
.popover{
|
||||
max-width: 100%;
|
||||
}
|
||||
.domain_name {
|
||||
display:inline-block;
|
||||
overflow: hidden;
|
||||
white-space: nowrap;
|
||||
text-overflow: ellipsis;
|
||||
max-width: 400px;
|
||||
}
|
||||
</style>
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
{% include 'crawler/crawler_disabled.html' %}
|
||||
|
||||
<div class="row">
|
||||
<div class="col-12 col-xl-6">
|
||||
<div class="table-responsive mt-1 table-hover table-borderless table-striped">
|
||||
<table class="table">
|
||||
<thead class="thead-dark">
|
||||
<tr>
|
||||
<th>Domain</th>
|
||||
<th>First Seen</th>
|
||||
<th>Last Check</th>
|
||||
<th>Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="tbody_last_crawled">
|
||||
{% for domain in domains %}
|
||||
<tr data-toggle="popover" data-trigger="hover"
|
||||
title="<span class='badge badge-dark'>{{domain['domain']}}</span>"
|
||||
data-content="epoch: {{domain['epoch']}}<br>last status: {{ domain['status'] }}">
|
||||
<td><a target="_blank" class="domain_name" href="{{ url_for('crawler_splash.showDomain') }}?domain={{ domain['domain'] }}&epoch={{domain['epoch']}}">{{ domain['domain'] }}</a></td>
|
||||
<td>{{domain['first_seen']}}</td>
|
||||
<td>{{domain['last_check']}}</td>
|
||||
<td>
|
||||
{% if domain['status_epoch'] %}
|
||||
<div style="color:Green; display:inline-block">
|
||||
<i class="fas fa-check-circle"></i> UP
|
||||
</div>
|
||||
{% else %}
|
||||
<div style="color:Red; display:inline-block">
|
||||
<i class="fas fa-times-circle"></i> DOWN
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<a href="{{ url_for('hiddenServices.blacklisted_domains') }}?type={{type}}">
|
||||
<button type="button" class="btn btn-outline-danger">Show Blacklisted {{type_name}}s</button>
|
||||
</a>
|
||||
|
||||
</div>
|
||||
<div class="col-12 col-xl-6">
|
||||
{% include 'crawler/show_domains_by_daterange.html' %}
|
||||
|
||||
<div id="barchart_type"></div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
var chart = {};
|
||||
$(document).ready(function(){
|
||||
$("#page-Crawler").addClass("active");
|
||||
$("#nav_{{type}}_crawler").addClass("active");
|
||||
|
||||
$('#date-range-from').dateRangePicker({
|
||||
separator : ' to ',
|
||||
getValue: function(){
|
||||
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
|
||||
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
|
||||
else
|
||||
return '';
|
||||
},
|
||||
setValue: function(s,s1,s2){
|
||||
$('#date-range-from-input').val(s1);
|
||||
$('#date-range-to-input').val(s2);
|
||||
}
|
||||
});
|
||||
$('#date-range-to').dateRangePicker({
|
||||
separator : ' to ',
|
||||
getValue: function(){
|
||||
if ($('#date-range-from-input').val() && $('#date-range-to-input').val() )
|
||||
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to-input').val();
|
||||
else
|
||||
return '';
|
||||
},
|
||||
setValue: function(s,s1,s2){
|
||||
$('#date-range-from-input').val(s1);
|
||||
$('#date-range-to-input').val(s2);
|
||||
}
|
||||
});
|
||||
chart.stackBarChart =barchart_type_stack("{{ url_for('crawler_splash.crawlers_last_domains_json') }}?type={{type}}", 'id');
|
||||
|
||||
chart.onResize();
|
||||
$(window).on("resize", function() {
|
||||
chart.onResize();
|
||||
});
|
||||
|
||||
$('[data-toggle="popover"]').popover({
|
||||
placement: 'top',
|
||||
container: 'body',
|
||||
html : true,
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<script>
|
||||
var margin = {top: 20, right: 90, bottom: 55, left: 0},
|
||||
width = parseInt(d3.select('#barchart_type').style('width'), 10);
|
||||
width = 1000 - margin.left - margin.right,
|
||||
height = 500 - margin.top - margin.bottom;
|
||||
var x = d3.scaleBand().rangeRound([0, width]).padding(0.1);
|
||||
|
||||
var y = d3.scaleLinear().rangeRound([height, 0]);
|
||||
|
||||
var xAxis = d3.axisBottom(x);
|
||||
|
||||
var yAxis = d3.axisLeft(y);
|
||||
|
||||
var color = d3.scaleOrdinal(d3.schemeSet3);
|
||||
|
||||
var svg = d3.select("#barchart_type").append("svg")
|
||||
.attr("id", "thesvg")
|
||||
.attr("viewBox", "0 0 "+width+" 500")
|
||||
.attr("width", width + margin.left + margin.right)
|
||||
.attr("height", height + margin.top + margin.bottom)
|
||||
.append("g")
|
||||
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
|
||||
|
||||
|
||||
function barchart_type_stack(url, id) {
|
||||
|
||||
d3.json(url)
|
||||
.then(function(data){
|
||||
|
||||
var labelVar = 'date'; //A
|
||||
var varNames = d3.keys(data[0])
|
||||
.filter(function (key) { return key !== labelVar;}); //B
|
||||
|
||||
data.forEach(function (d) { //D
|
||||
var y0 = 0;
|
||||
d.mapping = varNames.map(function (name) {
|
||||
return {
|
||||
name: name,
|
||||
label: d[labelVar],
|
||||
y0: y0,
|
||||
y1: y0 += +d[name]
|
||||
};
|
||||
});
|
||||
d.total = d.mapping[d.mapping.length - 1].y1;
|
||||
});
|
||||
|
||||
x.domain(data.map(function (d) { return (d.date); })); //E
|
||||
y.domain([0, d3.max(data, function (d) { return d.total; })]);
|
||||
|
||||
svg.append("g")
|
||||
.attr("class", "x axis")
|
||||
.attr("transform", "translate(0," + height + ")")
|
||||
.call(xAxis)
|
||||
.selectAll("text")
|
||||
.attr("class", "bar")
|
||||
.on("click", function (d) { window.location.href = "#" })
|
||||
.attr("transform", "rotate(-18)" )
|
||||
//.attr("transform", "rotate(-40)" )
|
||||
.style("text-anchor", "end");
|
||||
|
||||
svg.append("g")
|
||||
.attr("class", "y axis")
|
||||
.call(yAxis)
|
||||
.append("text")
|
||||
.attr("transform", "rotate(-90)")
|
||||
.attr("y", 6)
|
||||
.attr("dy", ".71em")
|
||||
.style("text-anchor", "end");
|
||||
|
||||
var selection = svg.selectAll(".series")
|
||||
.data(data)
|
||||
.enter().append("g")
|
||||
.attr("class", "series")
|
||||
.attr("transform", function (d) { return "translate(" + x((d.date)) + ",0)"; });
|
||||
|
||||
selection.selectAll("rect")
|
||||
.data(function (d) { return d.mapping; })
|
||||
.enter().append("rect")
|
||||
.attr("class", "bar_stack")
|
||||
.attr("width", x.bandwidth())
|
||||
.attr("y", function (d) { return y(d.y1); })
|
||||
.attr("height", function (d) { return y(d.y0) - y(d.y1); })
|
||||
.style("fill", function (d) { return color(d.name); })
|
||||
.style("stroke", "grey")
|
||||
.on("mouseover", function (d) { showPopover.call(this, d); })
|
||||
.on("mouseout", function (d) { removePopovers(); })
|
||||
.on("click", function(d){ window.location.href = "#" });
|
||||
|
||||
|
||||
data.forEach(function(d) {
|
||||
if(d.total != 0){
|
||||
svg.append("text")
|
||||
.attr("class", "bar")
|
||||
.attr("dy", "-.35em")
|
||||
.attr('x', x(d.date) + x.bandwidth()/2)
|
||||
.attr('y', y(d.total))
|
||||
.on("click", function () {window.location.href = "#" })
|
||||
.style("text-anchor", "middle")
|
||||
.text(d.total);
|
||||
}
|
||||
});
|
||||
|
||||
drawLegend(varNames);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
function drawLegend (varNames) {
|
||||
var legend = svg.selectAll(".legend")
|
||||
.data(varNames.slice().reverse())
|
||||
.enter().append("g")
|
||||
.attr("class", "legend")
|
||||
.attr("transform", function (d, i) { return "translate(0," + i * 20 + ")"; });
|
||||
|
||||
legend.append("rect")
|
||||
.attr("x", 943)
|
||||
.attr("width", 10)
|
||||
.attr("height", 10)
|
||||
.style("fill", color)
|
||||
.style("stroke", "grey");
|
||||
|
||||
legend.append("text")
|
||||
.attr("class", "svgText")
|
||||
.attr("x", 941)
|
||||
.attr("y", 6)
|
||||
.attr("dy", ".35em")
|
||||
.style("text-anchor", "end")
|
||||
.text(function (d) { return d; });
|
||||
}
|
||||
|
||||
function removePopovers () {
|
||||
$('.popover').each(function() {
|
||||
$(this).remove();
|
||||
});
|
||||
}
|
||||
|
||||
function showPopover (d) {
|
||||
$(this).popover({
|
||||
title: d.name,
|
||||
placement: 'top',
|
||||
container: 'body',
|
||||
trigger: 'manual',
|
||||
html : true,
|
||||
content: function() {
|
||||
return d.label +
|
||||
"<br/>num: " + d3.format(",")(d.value ? d.value: d.y1 - d.y0); }
|
||||
});
|
||||
$(this).popover('show')
|
||||
}
|
||||
|
||||
chart.onResize = function () {
|
||||
var aspect = width / height, chart = $("#thesvg");
|
||||
var targetWidth = chart.parent().width();
|
||||
chart.attr("width", targetWidth);
|
||||
chart.attr("height", targetWidth / 2);
|
||||
}
|
||||
|
||||
window.chart = chart;
|
||||
|
||||
</script>
|
|
@ -68,17 +68,17 @@
|
|||
<table class="table table-sm">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>Splash Manager URL</td>
|
||||
<td>{{splash_manager_url}}</td>
|
||||
<td>Lacus URL</td>
|
||||
<td>{{lacus_url}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>API Key</td>
|
||||
{# <td>API Key</td>#}
|
||||
{# <td>#}
|
||||
{# {{api_key}}#}
|
||||
{# <!-- <a class="ml-3" href="/settings/new_token"><i class="fa fa-random"></i></a> -->#}
|
||||
{# </td>#}
|
||||
<td>
|
||||
{{api_key}}
|
||||
<!-- <a class="ml-3" href="/settings/new_token"><i class="fa fa-random"></i></a> -->
|
||||
</td>
|
||||
<td>
|
||||
<a href="{{ url_for('crawler_splash.crawler_splash_setings_crawler_manager') }}">
|
||||
<a href="{{ url_for('crawler_splash.crawler_lacus_settings_crawler_manager') }}">
|
||||
<button type="button" class="btn btn-info">
|
||||
Edit <i class="fas fa-pencil-alt"></i>
|
||||
</button>
|
||||
|
@ -92,85 +92,18 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<div {%if not is_manager_connected%}class="hidden"{%endif%}>
|
||||
|
||||
<div class="card border-secondary mb-4">
|
||||
<div class="card-body text-dark">
|
||||
<h5 class="card-title">All Splash Crawlers:</h5>
|
||||
<table class="table table-striped">
|
||||
<thead class="bg-info text-white">
|
||||
<th>
|
||||
Splash name
|
||||
</th>
|
||||
<th>
|
||||
Proxy
|
||||
</th>
|
||||
<th>
|
||||
Crawler type
|
||||
</th>
|
||||
<th>
|
||||
Description
|
||||
</th>
|
||||
<th></th>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for splash_name in all_splash %}
|
||||
<tr>
|
||||
<td>
|
||||
{{splash_name}}
|
||||
</td>
|
||||
<td>
|
||||
{{all_splash[splash_name]['proxy']}}
|
||||
</td>
|
||||
<td>
|
||||
{%if all_splash[splash_name]['type']=='tor'%}
|
||||
<i class="fas fa-user-secret"></i>
|
||||
{%else%}
|
||||
<i class="fab fa-html5">
|
||||
{%endif%}
|
||||
{{all_splash[splash_name]['type']}}
|
||||
</td>
|
||||
<td>
|
||||
{{all_splash[splash_name]['description']}}
|
||||
</td>
|
||||
<td>
|
||||
<div class="d-flex justify-content-end">
|
||||
<!-- <button class="btn btn-outline-dark px-1 py-0">
|
||||
<i class="fas fa-pencil-alt"></i>
|
||||
</button> -->
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card border-secondary">
|
||||
<div class="card-body text-dark">
|
||||
<h5 class="card-title">All Proxies:</h5>
|
||||
<table class="table table-striped">
|
||||
<thead class="bg-info text-white">
|
||||
<th>
|
||||
Proxy name
|
||||
</th>
|
||||
<th>
|
||||
Host
|
||||
</th>
|
||||
<th>
|
||||
Port
|
||||
</th>
|
||||
<th>
|
||||
Type
|
||||
</th>
|
||||
<th>
|
||||
Crawler Type
|
||||
</th>
|
||||
<th>
|
||||
Description
|
||||
</th>
|
||||
<tr>
|
||||
<th>Proxy name</th>
|
||||
<th>URL</th>
|
||||
<th>Crawler Type</th>
|
||||
<th>Description</th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for proxy_name in all_proxies %}
|
||||
|
@ -179,19 +112,13 @@
|
|||
{{proxy_name}}
|
||||
</td>
|
||||
<td>
|
||||
{{all_proxies[proxy_name]['host']}}
|
||||
</td>
|
||||
<td>
|
||||
{{all_proxies[proxy_name]['port']}}
|
||||
</td>
|
||||
<td>
|
||||
{{all_proxies[proxy_name]['type']}}
|
||||
{{all_proxies[proxy_name]['url']}}
|
||||
</td>
|
||||
<td>
|
||||
{%if all_proxies[proxy_name]['crawler_type']=='tor'%}
|
||||
<i class="fas fa-user-secret"></i>
|
||||
{%else%}
|
||||
<i class="fab fa-html5">
|
||||
<i class="fab fa-html5"></i>
|
||||
{%endif%}
|
||||
{{all_proxies[proxy_name]['crawler_type']}}
|
||||
</td>
|
||||
|
@ -211,7 +138,6 @@
|
|||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
@ -242,7 +168,7 @@
|
|||
|
||||
{{crawler_error_mess}}
|
||||
</pre>
|
||||
<a href="{{ url_for('crawler_splash.crawler_splash_setings_test_crawler') }}">
|
||||
<a href="{{ url_for('crawler_splash.crawler_settings_crawler_test') }}">
|
||||
<button type="button" class="btn btn-primary">
|
||||
ReRun Test <i class="fas fa-rocket"></i>
|
||||
</button>
|
|
@ -0,0 +1,61 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL-Framework</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<div class="card my-2">
|
||||
<div class="card-header bg-dark text-white">
|
||||
<h3 class="card-title"> Lacus Config:</h3>
|
||||
<form action="{{ url_for('crawler_splash.crawler_lacus_settings_crawler_manager') }}" method="post" enctype="multipart/form-data">
|
||||
<div class="form-group">
|
||||
<label for="splash_manager_url">Lacus Server URL</label>
|
||||
<input type="text" class="form-control" id="splash_manager_url" required placeholder="https://lacus_url" name="lacus_url" {%if lacus_url%}value="{{lacus_url}}"{%endif%}>
|
||||
</div>
|
||||
{# <div class="form-group">#}
|
||||
{# <label for="api_key">API Key</label>#}
|
||||
{# <input type="text" class="form-control" id="api_key" placeholder="API Key" name="api_key" {%if api_key%}value="{{api_key}}"{%endif%}>#}
|
||||
{# </div>#}
|
||||
<button type="submit" class="btn btn-primary">Edit Lacus <i class="fas fa-pencil-alt"></i></button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
var to_refresh = false
|
||||
$(document).ready(function(){
|
||||
$("#page-Crawler").addClass("active");
|
||||
$("#nav_settings").addClass("active");
|
||||
});
|
||||
|
||||
</script>
|
|
@ -1,55 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>AIL-Framework</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png')}}">
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
|
||||
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'crawler/menu_sidebar.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
<form action="{{ url_for('crawler_splash.crawler_splash_setings_crawler_manager') }}" method="post" enctype="multipart/form-data">
|
||||
<div class="form-group">
|
||||
<label for="splash_manager_url">Splash Manager URL</label>
|
||||
<input type="text" class="form-control" id="splash_manager_url" placeholder="https://splash_manager_url" name="splash_manager_url" {%if splash_manager_url%}value="{{splash_manager_url}}"{%endif%}>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="api_key">API Key</label>
|
||||
<input type="text" class="form-control" id="api_key" placeholder="API Key" name="api_key" {%if api_key%}value="{{api_key}}"{%endif%}>
|
||||
</div>
|
||||
<button type="submit" class="btn btn-primary">Edit <i class="fas fa-pencil-alt"></i></button>
|
||||
</form>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</body>
|
||||
|
||||
<script>
|
||||
var to_refresh = false
|
||||
$(document).ready(function(){
|
||||
$("#page-Crawler").addClass("active");
|
||||
$("#nav_settings").addClass("active");
|
||||
});
|
||||
|
||||
</script>
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue