Merge pull request #201 from CIRCL/python3

Python 3 migration + many new features + fixes
This commit is contained in:
Alexandre Dulaunoy 2018-05-11 16:26:58 +02:00 committed by GitHub
commit 36e79f2f30
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
90 changed files with 1732 additions and 1412 deletions

View file

@ -1,7 +1,7 @@
language: python
python:
- "2.7"
- "3.5"
sudo: required
@ -16,6 +16,7 @@ env:
install:
- ./installing_deps.sh
- pip install coveralls codecov nose
script:
- pushd bin
@ -23,13 +24,11 @@ script:
- ./launch_lvldb.sh
- ./launch_logs.sh
- ./launch_queues.sh
- ./launch_scripts.sh
- sleep 120
- ./Shutdown.py
- popd
- find logs/* -exec cat {} \;
- cd tests
- nosetests --with-coverage --cover-package=../bin -d
notifications:
email:
on_success: change
on_failure: change
after_success:
- codecov
- coveralls

View file

@ -31,6 +31,10 @@ Features
* Terms, Set of terms and Regex tracking and occurrence
* Many more modules for extracting phone numbers, credentials and others
* Alerting to [MISP](https://github.com/MISP/MISP) to share found leaks within a threat intelligence platform using [MISP standard](https://www.misp-project.org/objects.html#_ail_leak)
* Detect and decode Base64 and store files
* Detect Amazon AWS and Google API keys
* Detect Bitcoin address and Bitcoin private keys
* Detect private keys and certificate
Installation
------------
@ -53,6 +57,11 @@ linux based distributions, you can replace it with [installing_deps_archlinux.sh
There is also a [Travis file](.travis.yml) used for automating the installation that can be used to build and install AIL on other systems.
Python 3 Upgrade
------------
To upgrade from an existing AIL installation, you have to launch [python3_upgrade.sh](./python3_upgrade.sh), this script will delete and create a new virtual environment. The script **will upgrade the packages but won't keep your previous data** (neverthless the data is copied into a directory called `old`). If you install from scratch, you don't require to launch the [python3_upgrade.sh](./python3_upgrade.sh).
Docker Quick Start (Ubuntu 16.04 LTS)
------------

87
bin/ApiKey.py Executable file
View file

@ -0,0 +1,87 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The ApiKey Module
======================
This module is consuming the Redis-list created by the Categ module.
It apply API_key regexes on paste content and warn if above a threshold.
"""
import redis
import pprint
import time
import re
from packages import Paste
from packages import lib_refine
from pubsublogger import publisher
from Helper import Process
def search_api_key(message):
filename, score = message.split()
paste = Paste.Paste(filename)
content = paste.get_p_content()
aws_access_key = regex_aws_access_key.findall(content)
aws_secret_key = regex_aws_secret_key.findall(content)
google_api_key = regex_google_api_key.findall(content)
if(len(aws_access_key) > 0 or len(aws_secret_key) > 0 or len(google_api_key) > 0):
to_print = 'ApiKey;{};{};{};'.format(
paste.p_source, paste.p_date, paste.p_name)
if(len(google_api_key) > 0):
print('found google api key')
print(to_print)
publisher.warning('{}Checked {} found Google API Key;{}'.format(
to_print, len(google_api_key), paste.p_path))
if(len(aws_access_key) > 0 or len(aws_secret_key) > 0):
print('found AWS key')
print(to_print)
total = len(aws_access_key) + len(aws_secret_key)
publisher.warning('{}Checked {} found AWS Key;{}'.format(
to_print, total, paste.p_path))
msg = 'apikey;{}'.format(filename)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate
p.populate_set_out(filename, 'Duplicate')
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'ApiKey'
p = Process(config_section)
publisher.info("ApiKey started")
message = p.get_from_set()
# TODO improve REGEX
regex_aws_access_key = re.compile(r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])')
regex_aws_secret_key = re.compile(r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])')
regex_google_api_key = re.compile(r'=AIza[0-9a-zA-Z-_]{35}')
while True:
message = p.get_from_set()
if message is not None:
search_api_key(message)
else:
publisher.debug("Script ApiKey is Idling 10s")
time.sleep(10)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -33,7 +33,7 @@ if __name__ == "__main__":
PST = Paste.Paste(message)
else:
publisher.debug("Script Attribute is idling 1s")
print 'sleeping'
print('sleeping')
time.sleep(1)
continue
@ -45,6 +45,6 @@ if __name__ == "__main__":
# FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path)
except IOError:
print "CRC Checksum Failed on :", PST.p_path
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name))

136
bin/Base64.py Executable file
View file

@ -0,0 +1,136 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Base64 module
Dectect Base64 and decode it
"""
import time
import os
import datetime
from pubsublogger import publisher
from Helper import Process
from packages import Paste
import re
import base64
from hashlib import sha1
import magic
import json
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def search_base64(content, message):
find = False
base64_list = re.findall(regex_base64, content)
if(len(base64_list) > 0):
for b64 in base64_list:
if len(b64) >= 40 :
decode = base64.b64decode(b64)
type = magic.from_buffer(decode, mime=True)
#print(type)
#print(decode)
find = True
hash = sha1(decode).hexdigest()
data = {}
data['name'] = hash
data['date'] = datetime.datetime.now().strftime("%d/%m/%y")
data['origin'] = message
data['estimated type'] = type
json_data = json.dumps(data)
save_base64_as_file(decode, type, hash, json_data)
print('found {} '.format(type))
if(find):
publisher.warning('base64 decoded')
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
#send to Browse_warning_paste
msg = ('base64;{}'.format(message))
p.populate_set_out( msg, 'alertHandler')
def save_base64_as_file(decode, type, hash, json_data):
filename_b64 = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "base64"), type, hash[:2], hash)
filename_json = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "base64"), type, hash[:2], hash + '.json')
dirname = os.path.dirname(filename_b64)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filename_b64, 'wb') as f:
f.write(decode)
with open(filename_json, 'w') as f:
f.write(json_data)
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules.
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
config_section = 'Base64'
# Setup the I/O queues
p = Process(config_section)
max_execution_time = p.config.getint("Base64", "max_execution_time")
# Sent to the logging a description of the module
publisher.info("Base64 started")
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
re.compile(regex_base64)
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
filename = message
paste = Paste.Paste(filename)
signal.alarm(max_execution_time)
try:
# Do something with the message from the queue
#print(filename)
content = paste.get_p_content()
search_base64(content,message)
# (Optional) Send that thing to the next queue
#p.populate_set_out(something_has_been_done)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_path))
continue
else:
signal.alarm(0)

101
bin/Bitcoin.py Executable file
View file

@ -0,0 +1,101 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Bitcoin Module
============================
It trying to extract Bitcoin address and secret key from paste
..seealso:: Paste method (get_regex)
Requirements
------------
*Need running Redis instances. (Redis).
"""
from packages import Paste
from Helper import Process
from pubsublogger import publisher
import re
import time
from hashlib import sha256
#### thank http://rosettacode.org/wiki/Bitcoin/address_validation#Python for this 2 functions
def decode_base58(bc, length):
n = 0
for char in bc:
n = n * 58 + digits58.index(char)
return n.to_bytes(length, 'big')
def check_bc(bc):
try:
bcbytes = decode_base58(bc, 25)
return bcbytes[-4:] == sha256(sha256(bcbytes[:-4]).digest()).digest()[:4]
except Exception:
return False
########################################################
def search_key(content, message, paste):
bitcoin_address = re.findall(regex_bitcoin_public_address, content)
bitcoin_private_key = re.findall(regex_bitcoin_private_key, content)
validate_address = False
key = False
if(len(bitcoin_address) >0):
#print(message)
for address in bitcoin_address:
if(check_bc(address)):
validate_address = True
print('Bitcoin address found : {}'.format(address))
if(len(bitcoin_private_key) > 0):
for private_key in bitcoin_private_key:
print('Bitcoin private key found : {}'.format(private_key))
key = True
if(validate_address):
p.populate_set_out(message, 'Duplicate')
to_print = 'Bitcoin found: {} address and {} private Keys'.format(len(bitcoin_address), len(bitcoin_private_key))
print(to_print)
publisher.warning(to_print)
msg = ('bitcoin;{}'.format(message))
p.populate_set_out( msg, 'alertHandler')
if(key):
to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date,
paste.p_name)
publisher.warning('{}Detected {} Bitcoin private key;{}'.format(
to_print, len(bitcoin_private_key),paste.p_path))
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Bitcoin'
# Setup the I/O queues
p = Process(config_section)
# Sent to the logging a description of the module
publisher.info("Run Keys module ")
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
regex_bitcoin_public_address = re.compile(r'(?<![a-km-zA-HJ-NP-Z0-9])[13][a-km-zA-HJ-NP-Z0-9]{26,33}(?![a-km-zA-HJ-NP-Z0-9])')
regex_bitcoin_private_key = re.compile(r'[5KL][1-9A-HJ-NP-Za-km-z]{50,51}')
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
# Do something with the message from the queue
paste = Paste.Paste(message)
content = paste.get_p_content()
search_key(content, message, paste)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The ZMQ_PubSub_Categ Module
@ -67,13 +67,13 @@ if __name__ == "__main__":
# FUNCTIONS #
publisher.info("Script Categ started")
categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve']
categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey']
tmp_dict = {}
for filename in categories:
bname = os.path.basename(filename)
tmp_dict[bname] = []
with open(os.path.join(args.d, filename), 'r') as f:
patterns = [r'%s' % re.escape(s.strip()) for s in f]
patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
prec_filename = None
@ -82,18 +82,25 @@ if __name__ == "__main__":
filename = p.get_from_set()
if filename is None:
publisher.debug("Script Categ is Idling 10s")
print 'Sleeping'
print('Sleeping')
time.sleep(10)
continue
paste = Paste.Paste(filename)
content = paste.get_p_content()
#print('-----------------------------------------------------')
#print(filename)
#print(content)
#print('-----------------------------------------------------')
for categ, pattern in tmp_dict.items():
found = set(re.findall(pattern, content))
if len(found) >= matchingThreshold:
msg = '{} {}'.format(paste.p_path, len(found))
print msg, categ
#msg = " ".join( [paste.p_path, bytes(len(found))] )
print(msg, categ)
p.populate_set_out(msg, categ)
publisher.info(

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -53,34 +53,34 @@ if __name__ == "__main__":
faup = Faup()
server_cred = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermCred", "host"),
port=p.config.get("Redis_Level_DB_TermCred", "port"),
db=p.config.get("Redis_Level_DB_TermCred", "db"))
host=p.config.get("ARDB_TermCred", "host"),
port=p.config.get("ARDB_TermCred", "port"),
db=p.config.get("ARDB_TermCred", "db"),
decode_responses=True)
criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert")
minTopPassList = p.config.getint("Credential", "minTopPassList")
regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
#regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
while True:
message = p.get_from_set()
if message is None:
publisher.debug("Script Credential is Idling 10s")
print('sleeping 10s')
#print('sleeping 10s')
time.sleep(10)
continue
filepath, count = message.split()
if count < minTopPassList:
# Less than 5 matches from the top password list, false positive.
print("false positive:", count)
continue
filepath, count = message.split(' ')
paste = Paste.Paste(filepath)
content = paste.get_p_content()
creds = set(re.findall(regex_cred, content))
publisher.warning('to_print')
if len(creds) == 0:
continue
@ -89,7 +89,7 @@ if __name__ == "__main__":
message = 'Checked {} credentials found.'.format(len(creds))
if sites_set:
message += ' Related websites: {}'.format(', '.join(sites_set))
message += ' Related websites: {}'.format( (', '.join(sites_set)) )
to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path)
@ -97,12 +97,13 @@ if __name__ == "__main__":
#num of creds above tresh, publish an alert
if len(creds) > criticalNumberToAlert:
print("========> Found more than 10 credentials in this file : {}".format(filepath))
print("========> Found more than 10 credentials in this file : {}".format( filepath ))
publisher.warning(to_print)
#Send to duplicate
p.populate_set_out(filepath, 'Duplicate')
#Send to alertHandler
p.populate_set_out('credential;{}'.format(filepath), 'alertHandler')
msg = 'credential;{}'.format(filepath)
p.populate_set_out(msg, 'alertHandler')
#Put in form, count occurences, then send to moduleStats
creds_sites = {}
@ -122,9 +123,11 @@ if __name__ == "__main__":
else:
creds_sites[domain] = 1
for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats
print 'credential;{};{};{}'.format(num, site, paste.p_date)
p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats')
for site, num in creds_sites.items(): # Send for each different site to moduleStats
mssg = 'credential;{};{};{}'.format(num, site, paste.p_date)
print(mssg)
p.populate_set_out(mssg, 'ModuleStats')
if sites_set:
print("=======> Probably on : {}".format(', '.join(sites_set)))
@ -158,4 +161,3 @@ if __name__ == "__main__":
for partCred in splitedCred:
if len(partCred) > minimumLengthThreshold:
server_cred.sadd(partCred, uniq_num_cred)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -18,6 +18,7 @@ from packages import Paste
from packages import lib_refine
from pubsublogger import publisher
import re
import sys
from Helper import Process
@ -58,13 +59,14 @@ if __name__ == "__main__":
content = paste.get_p_content()
all_cards = re.findall(regex, content)
if len(all_cards) > 0:
print 'All matching', all_cards
print('All matching', all_cards)
creditcard_set = set([])
for card in all_cards:
clean_card = re.sub('[^0-9]', '', card)
clean_card = clean_card
if lib_refine.is_luhn_valid(clean_card):
print clean_card, 'is valid'
print(clean_card, 'is valid')
creditcard_set.add(clean_card)
paste.__setattr__(channel, creditcard_set)
@ -76,13 +78,15 @@ if __name__ == "__main__":
if (len(creditcard_set) > 0):
publisher.warning('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path))
print('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path))
#Send to duplicate
p.populate_set_out(filename, 'Duplicate')
#send to Browse_warning_paste
p.populate_set_out('creditcard;{}'.format(filename), 'alertHandler')
msg = 'creditcard;{}'.format(filename)
p.populate_set_out(msg, 'alertHandler')
else:
publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path))
else:
publisher.debug("Script creditcard is idling 1m")
time.sleep(10)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
@ -53,7 +53,7 @@ def check_if_tracked_term(term, path):
#add_paste to tracked_word_set
set_name = "tracked_" + term
server_term.sadd(set_name, path)
print term, 'addded', set_name, '->', path
print(term, 'addded', set_name, '->', path)
p.populate_set_out("New Term added", 'CurveManageTopSets')
# Send a notification only when the member is in the set
@ -82,14 +82,16 @@ if __name__ == "__main__":
# REDIS #
r_serv1 = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Curve", "host"),
port=p.config.get("Redis_Level_DB_Curve", "port"),
db=p.config.get("Redis_Level_DB_Curve", "db"))
host=p.config.get("ARDB_Curve", "host"),
port=p.config.get("ARDB_Curve", "port"),
db=p.config.get("ARDB_Curve", "db"),
decode_responses=True)
server_term = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermFreq", "host"),
port=p.config.get("Redis_Level_DB_TermFreq", "port"),
db=p.config.get("Redis_Level_DB_TermFreq", "db"))
host=p.config.get("ARDB_TermFreq", "host"),
port=p.config.get("ARDB_TermFreq", "port"),
db=p.config.get("ARDB_TermFreq", "db"),
decode_responses=True)
# FUNCTIONS #
publisher.info("Script Curve started")
@ -149,15 +151,16 @@ if __name__ == "__main__":
if generate_new_graph:
generate_new_graph = False
print 'Building graph'
print('Building graph')
today = datetime.date.today()
year = today.year
month = today.month
lib_words.create_curve_with_word_file(r_serv1, csv_path,
wordfile_path, year,
month)
publisher.debug("Script Curve is Idling")
print "sleeping"
print("sleeping")
time.sleep(10)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -16,7 +16,7 @@ from packages import lib_words
import datetime
import calendar
import os
import ConfigParser
import configparser
# Config Variables
Refresh_rate = 60*5 #sec
@ -68,26 +68,26 @@ def manage_top_set():
# convert dico into sorted array
array_month = []
for w, v in dico.iteritems():
for w, v in dico.items():
array_month.append((w, v))
array_month.sort(key=lambda tup: -tup[1])
array_month = array_month[0:20]
array_week = []
for w, v in dico_week.iteritems():
for w, v in dico_week.items():
array_week.append((w, v))
array_week.sort(key=lambda tup: -tup[1])
array_week = array_week[0:20]
# convert dico_per_paste into sorted array
array_month_per_paste = []
for w, v in dico_per_paste.iteritems():
for w, v in dico_per_paste.items():
array_month_per_paste.append((w, v))
array_month_per_paste.sort(key=lambda tup: -tup[1])
array_month_per_paste = array_month_per_paste[0:20]
array_week_per_paste = []
for w, v in dico_week_per_paste.iteritems():
for w, v in dico_week_per_paste.items():
array_week_per_paste.append((w, v))
array_week_per_paste.sort(key=lambda tup: -tup[1])
array_week_per_paste = array_week_per_paste[0:20]
@ -114,7 +114,7 @@ def manage_top_set():
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
value = str(timestamp) + ", " + "-"
r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value)
print "refreshed module"
print("refreshed module")
@ -131,7 +131,7 @@ if __name__ == '__main__':
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
@ -139,7 +139,8 @@ if __name__ == '__main__':
r_temp = redis.StrictRedis(
host=cfg.get('RedisPubSub', 'host'),
port=cfg.getint('RedisPubSub', 'port'),
db=cfg.getint('RedisPubSub', 'db'))
db=cfg.getint('RedisPubSub', 'db'),
decode_responses=True)
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
value = str(timestamp) + ", " + "-"
@ -147,9 +148,10 @@ if __name__ == '__main__':
r_temp.sadd("MODULE_TYPE_"+ "CurveManageTopSets" , str(os.getpid()))
server_term = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_TermFreq", "host"),
port=cfg.getint("Redis_Level_DB_TermFreq", "port"),
db=cfg.getint("Redis_Level_DB_TermFreq", "db"))
host=cfg.get("ARDB_TermFreq", "host"),
port=cfg.getint("ARDB_TermFreq", "port"),
db=cfg.getint("ARDB_TermFreq", "db"),
decode_responses=True)
publisher.info("Script Curve_manage_top_set started")
@ -162,4 +164,3 @@ if __name__ == '__main__':
# Get one message from the input queue (module only work if linked with a queue)
time.sleep(Refresh_rate) # sleep a long time then manage the set
manage_top_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The CVE Module
@ -32,7 +32,8 @@ def search_cve(message):
publisher.warning('{} contains CVEs'.format(paste.p_name))
#send to Browse_warning_paste
p.populate_set_out('cve;{}'.format(filepath), 'alertHandler')
msg = 'cve;{}'.format(filepath)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate
p.populate_set_out(filepath, 'Duplicate')
@ -63,4 +64,3 @@ if __name__ == '__main__':
# Do something with the message from the queue
search_cve(message)

View file

@ -1,18 +1,18 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import argparse
import redis
from pubsublogger import publisher
from packages.lib_words import create_dirfile
import ConfigParser
import configparser
def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg')
parser = argparse.ArgumentParser(
@ -36,7 +36,8 @@ def main():
r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
publisher.port = 6380
publisher.channel = "Script"

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -24,10 +24,11 @@ def main():
config_section = 'DomClassifier'
p = Process(config_section)
addr_dns = p.config.get("DomClassifier", "dns")
publisher.info("""ZMQ DomainClassifier is Running""")
c = DomainClassifier.domainclassifier.Extract(rawtext="")
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
cc = p.config.get("DomClassifier", "cc")
cc_tld = p.config.get("DomClassifier", "cc_tld")
@ -44,6 +45,7 @@ def main():
continue
paste = PST.get_p_content()
mimetype = PST._get_p_encoding()
if mimetype == "text/plain":
c.text(rawtext=paste)
c.potentialdomain()
@ -59,7 +61,7 @@ def main():
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
except IOError:
print "CRC Checksum Failed on :", PST.p_path
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from pubsublogger import publisher
@ -23,7 +23,7 @@ if __name__ == "__main__":
if message is not None:
f = open(dump_file, 'a')
while message is not None:
print message
print(message)
date = datetime.datetime.now()
if message is not None:
f.write(date.isoformat() + ' ' + message + '\n')

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -49,12 +49,13 @@ if __name__ == "__main__":
# REDIS #
dico_redis = {}
date_today = datetime.today()
for year in xrange(2013, date_today.year+1):
for month in xrange(0, 13):
for year in range(2013, date_today.year+1):
for month in range(0, 13):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month)
#print("dup: "+str(year)+str(month).zfill(2)+"\n")
host=p.config.get("ARDB_DB", "host"),
port=p.config.get("ARDB_DB", "port"),
db=str(year) + str(month),
decode_responses=True)
# FUNCTIONS #
publisher.info("Script duplicate started")
@ -62,7 +63,7 @@ if __name__ == "__main__":
while True:
try:
hash_dico = {}
dupl = []
dupl = set()
dico_range_list = []
x = time.time()
@ -73,6 +74,7 @@ if __name__ == "__main__":
PST = Paste.Paste(path)
else:
publisher.debug("Script Attribute is idling 10s")
print('sleeping')
time.sleep(10)
continue
@ -90,7 +92,7 @@ if __name__ == "__main__":
# Get the date of the range
date_range = date_today - timedelta(days = maximum_month_range*30.4166666)
num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month)
for diff_month in xrange(0, num_of_month+1):
for diff_month in range(0, num_of_month+1):
curr_date_range = date_today - timedelta(days = diff_month*30.4166666)
to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2)
dico_range_list.append(to_append)
@ -102,7 +104,7 @@ if __name__ == "__main__":
yearly_index = str(date_today.year)+'00'
r_serv0 = dico_redis[yearly_index]
r_serv0.incr("current_index")
index = r_serv0.get("current_index")+str(PST.p_date)
index = (r_serv0.get("current_index")) + str(PST.p_date)
# Open selected dico range
opened_dico = []
@ -114,13 +116,16 @@ if __name__ == "__main__":
# Go throught the Database of the dico (of the month)
for curr_dico_name, curr_dico_redis in opened_dico:
for hash_type, paste_hash in paste_hashes.iteritems():
for hash_type, paste_hash in paste_hashes.items():
for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):
try:
if hash_type == 'ssdeep':
percent = 100-ssdeep.compare(dico_hash, paste_hash)
else:
percent = tlsh.diffxlen(dico_hash, paste_hash)
if percent > 100:
percent = 100
threshold_duplicate = threshold_set[hash_type]
if percent < threshold_duplicate:
@ -130,16 +135,20 @@ if __name__ == "__main__":
# index of paste
index_current = r_serv_dico.get(dico_hash)
index_current = index_current
paste_path = r_serv_dico.get(index_current)
paste_path = paste_path
paste_date = r_serv_dico.get(index_current+'_date')
paste_date = paste_date
paste_date = paste_date if paste_date != None else "No date available"
if paste_path != None:
if paste_path != PST.p_path:
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent)
except Exception,e:
print str(e)
#print 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash
print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent))
except Exception:
print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash)
# Add paste in DB after checking to prevent its analysis twice
# hash_type_i -> index_i AND index_i -> PST.PATH
@ -147,7 +156,7 @@ if __name__ == "__main__":
r_serv1.set(index+'_date', PST._get_p_date())
r_serv1.sadd("INDEX", index)
# Adding hashes in Redis
for hash_type, paste_hash in paste_hashes.iteritems():
for hash_type, paste_hash in paste_hashes.items():
r_serv1.set(paste_hash, index)
r_serv1.sadd("HASHS_"+hash_type, paste_hash)
@ -157,24 +166,25 @@ if __name__ == "__main__":
if len(hash_dico) != 0:
# paste_tuple = (hash_type, date, paste_path, percent)
for dico_hash, paste_tuple in hash_dico.items():
dupl.append(paste_tuple)
dupl.add(paste_tuple)
# Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
if dupl != []:
dupl = list(dupl)
PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl)
PST.save_others_pastes_attribute_duplicate("p_duplicate", dupl)
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
print '{}Detected {}'.format(to_print, len(dupl))
print('{}Detected {}'.format(to_print, len(dupl)))
y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
#print '{}Processed in {} sec'.format(to_print, y-x)
except IOError:
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
print "CRC Checksum Failed on :", PST.p_path
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -1,166 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The Duplicate module
====================
This huge module is, in short term, checking duplicates.
Requirements:
-------------
"""
import redis
import os
import time
from packages import Paste
from pubsublogger import publisher
from pybloomfilter import BloomFilter
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Duplicates'
p = Process(config_section)
# REDIS #
# DB OBJECT & HASHS ( DISK )
# FIXME increase flexibility
dico_redis = {}
for year in xrange(2013, 2017):
for month in xrange(0, 16):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month)
#print("dup: "+str(year)+str(month).zfill(2)+"\n")
# FUNCTIONS #
publisher.info("Script duplicate started")
set_limit = 100
bloompath = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "bloomfilters"))
bloop_path_set = set()
while True:
try:
super_dico = {}
hash_dico = {}
dupl = []
nb_hash_current = 0
x = time.time()
message = p.get_from_set()
if message is not None:
path = message
PST = Paste.Paste(path)
else:
publisher.debug("Script Attribute is idling 10s")
time.sleep(10)
continue
PST._set_p_hash_kind("md5")
# Assignate the correct redis connexion
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
# Creating the bloom filter name: bloomyyyymm
filebloompath = os.path.join(bloompath, 'bloom' + PST.p_date.year +
PST.p_date.month)
if os.path.exists(filebloompath):
bloom = BloomFilter.open(filebloompath)
bloop_path_set.add(filebloompath)
else:
bloom = BloomFilter(100000000, 0.01, filebloompath)
bloop_path_set.add(filebloompath)
# UNIQUE INDEX HASHS TABLE
r_serv0 = dico_redis["201600"]
r_serv0.incr("current_index")
index = r_serv0.get("current_index")+str(PST.p_date)
# HASHTABLES PER MONTH (because of r_serv1 changing db)
r_serv1.set(index, PST.p_path)
r_serv1.sadd("INDEX", index)
# For each bloom filter
opened_bloom = []
for bloo in bloop_path_set:
# Opening blooms
opened_bloom.append(BloomFilter.open(bloo))
# For each hash of the paste
for line_hash in PST._get_hash_lines(min=5, start=1, jump=0):
nb_hash_current += 1
# Adding the hash in Redis & limiting the set
if r_serv1.scard(line_hash) <= set_limit:
r_serv1.sadd(line_hash, index)
r_serv1.sadd("HASHS", line_hash)
# Adding the hash in the bloom of the month
bloom.add(line_hash)
# Go throught the Database of the bloom filter (of the month)
for bloo in opened_bloom:
if line_hash in bloo:
db = bloo.name[-6:]
# Go throught the Database of the bloom filter (month)
r_serv_bloom = dico_redis[db]
# set of index paste: set([1,2,4,65])
hash_current = r_serv_bloom.smembers(line_hash)
# removing itself from the list
hash_current = hash_current - set([index])
# if the hash is present at least in 1 files
# (already processed)
if len(hash_current) != 0:
hash_dico[line_hash] = hash_current
# if there is data in this dictionnary
if len(hash_dico) != 0:
super_dico[index] = hash_dico
###########################################################################
# if there is data in this dictionnary
if len(super_dico) != 0:
# current = current paste, phash_dico = {hash: set, ...}
occur_dico = {}
for current, phash_dico in super_dico.items():
# phash = hash, pset = set([ pastes ...])
for phash, pset in hash_dico.items():
for p_fname in pset:
occur_dico.setdefault(p_fname, 0)
# Count how much hash is similar per file occuring
# in the dictionnary
if occur_dico[p_fname] >= 0:
occur_dico[p_fname] = occur_dico[p_fname] + 1
for paste, count in occur_dico.items():
percentage = round((count/float(nb_hash_current))*100, 2)
if percentage >= 50:
dupl.append((paste, percentage))
else:
print 'percentage: ' + str(percentage)
# Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
if dupl != []:
PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl)
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
print '{}Detected {}'.format(to_print, len(dupl))
y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
except IOError:
print "CRC Checksum Failed on :", PST.p_path
publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The ZMQ_Feed_Q Module
@ -27,6 +27,19 @@ from pubsublogger import publisher
from Helper import Process
import magic
import io
#import gzip
'''
def gunzip_bytes_obj(bytes_obj):
in_ = io.BytesIO()
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
return gunzipped_bytes_obj.decode()'''
if __name__ == '__main__':
publisher.port = 6380
@ -44,6 +57,7 @@ if __name__ == '__main__':
while True:
message = p.get_from_set()
#print(message)
# Recovering the streamed message informations.
if message is not None:
splitted = message.split()
@ -51,14 +65,14 @@ if __name__ == '__main__':
paste, gzip64encoded = splitted
else:
# TODO Store the name of the empty paste inside a Redis-list.
print "Empty Paste: not processed"
print("Empty Paste: not processed")
publisher.debug("Empty Paste: {0} not processed".format(message))
continue
else:
print "Empty Queues: Waiting..."
print("Empty Queues: Waiting...")
if int(time.time() - time_1) > 30:
to_print = 'Global; ; ; ;glob Processed {0} paste(s)'.format(processed_paste)
print to_print
print(to_print)
#publisher.info(to_print)
time_1 = time.time()
processed_paste = 0
@ -67,11 +81,28 @@ if __name__ == '__main__':
# Creating the full filepath
filename = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "pastes"), paste)
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
decoded = base64.standard_b64decode(gzip64encoded)
with open(filename, 'wb') as f:
f.write(base64.standard_b64decode(gzip64encoded))
f.write(decoded)
'''try:
decoded2 = gunzip_bytes_obj(decoded)
except:
decoded2 =''
type = magic.from_buffer(decoded2, mime=True)
if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby':
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
print(filename)
print(type)
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
'''
p.populate_set_out(filename)
processed_paste+=1

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Queue helper module
@ -12,11 +12,7 @@ the same Subscriber name in both of them.
"""
import redis
try: # dirty to support python3
import ConfigParser
except:
import configparser
ConfigParser = configparser
import configparser
import os
import zmq
import time
@ -32,7 +28,7 @@ class PubSub(object):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
self.config = ConfigParser.ConfigParser()
self.config = configparser.ConfigParser()
self.config.read(configfile)
self.redis_sub = False
self.zmq_sub = False
@ -49,7 +45,8 @@ class PubSub(object):
r = redis.StrictRedis(
host=self.config.get('RedisPubSub', 'host'),
port=self.config.get('RedisPubSub', 'port'),
db=self.config.get('RedisPubSub', 'db'))
db=self.config.get('RedisPubSub', 'db'),
decode_responses=True)
self.subscribers = r.pubsub(ignore_subscribe_messages=True)
self.subscribers.psubscribe(channel)
elif conn_name.startswith('ZMQ'):
@ -61,7 +58,8 @@ class PubSub(object):
for address in addresses.split(','):
new_sub = context.socket(zmq.SUB)
new_sub.connect(address)
new_sub.setsockopt(zmq.SUBSCRIBE, channel)
# bytes64 encode bytes to ascii only bytes
new_sub.setsockopt_string(zmq.SUBSCRIBE, channel)
self.subscribers.append(new_sub)
def setup_publish(self, conn_name):
@ -72,7 +70,8 @@ class PubSub(object):
if conn_name.startswith('Redis'):
r = redis.StrictRedis(host=self.config.get('RedisPubSub', 'host'),
port=self.config.get('RedisPubSub', 'port'),
db=self.config.get('RedisPubSub', 'db'))
db=self.config.get('RedisPubSub', 'db'),
decode_responses=True)
self.publishers['Redis'].append((r, channel))
elif conn_name.startswith('ZMQ'):
context = zmq.Context()
@ -85,10 +84,12 @@ class PubSub(object):
channel_message = m.get('channel')
for p, channel in self.publishers['Redis']:
if channel_message is None or channel_message == channel:
p.publish(channel, m['message'])
p.publish(channel, ( m['message']) )
for p, channel in self.publishers['ZMQ']:
if channel_message is None or channel_message == channel:
p.send('{} {}'.format(channel, m['message']))
#p.send(b' '.join( [channel, mess] ) )
def subscribe(self):
if self.redis_sub:
@ -100,7 +101,7 @@ class PubSub(object):
for sub in self.subscribers:
try:
msg = sub.recv(zmq.NOBLOCK)
yield msg.split(' ', 1)[1]
yield msg.split(b" ", 1)[1]
except zmq.error.Again as e:
time.sleep(0.2)
pass
@ -117,9 +118,9 @@ class Process(object):
Did you set environment variables? \
Or activate the virtualenv.')
modulesfile = os.path.join(os.environ['AIL_BIN'], 'packages/modules.cfg')
self.config = ConfigParser.ConfigParser()
self.config = configparser.ConfigParser()
self.config.read(configfile)
self.modules = ConfigParser.ConfigParser()
self.modules = configparser.ConfigParser()
self.modules.read(modulesfile)
self.subscriber_name = conf_section
@ -131,11 +132,11 @@ class Process(object):
self.r_temp = redis.StrictRedis(
host=self.config.get('RedisPubSub', 'host'),
port=self.config.get('RedisPubSub', 'port'),
db=self.config.get('RedisPubSub', 'db'))
db=self.config.get('RedisPubSub', 'db'),
decode_responses=True)
self.moduleNum = os.getpid()
def populate_set_in(self):
# monoproc
src = self.modules.get(self.subscriber_name, 'subscribe')
@ -152,6 +153,7 @@ class Process(object):
self.r_temp.hset('queues', self.subscriber_name,
int(self.r_temp.scard(in_set)))
message = self.r_temp.spop(in_set)
timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes')
@ -159,37 +161,46 @@ class Process(object):
return None
else:
try:
if ".gz" in message:
#try:
if '.gz' in message:
path = message.split(".")[-2].split("/")[-1]
#find start of path with AIL_HOME
index_s = message.find(os.environ['AIL_HOME'])
#Stop when .gz
index_e = message.find(".gz")+3
if(index_s == -1):
complete_path = message[0:index_e]
else:
complete_path = message[index_s:index_e]
else:
path = "?"
path = "-"
complete_path = "?"
value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path)
self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
return message
except:
path = "?"
value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?")
self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
return message
#except:
#print('except')
#path = "?"
#value = str(timestamp) + ", " + path
#self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
#self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?")
#self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
#return message
def populate_set_out(self, msg, channel=None):
# multiproc
msg = {'message': msg}
if channel is not None:
msg.update({'channel': channel})
self.r_temp.sadd(self.subscriber_name + 'out', json.dumps(msg))
# bytes64 encode bytes to ascii only bytes
j = json.dumps(msg)
self.r_temp.sadd(self.subscriber_name + 'out', j)
def publish(self):
# monoproc
@ -201,6 +212,7 @@ class Process(object):
self.pubsub.setup_publish(name)
while True:
message = self.r_temp.spop(self.subscriber_name + 'out')
if message is None:
time.sleep(1)
continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -107,10 +107,11 @@ if __name__ == "__main__":
continue
docpath = message.split(" ", -1)[-1]
paste = PST.get_p_content()
print "Indexing - "+indexname+" :", docpath
print("Indexing - " + indexname + " :", docpath)
if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message
#avoid calculating the index's size at each message
if( time.time() - last_refresh > TIME_WAIT):
last_refresh = time.time()
if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000):
timestamp = int(time.time())
@ -128,11 +129,11 @@ if __name__ == "__main__":
if indexertype == "whoosh":
indexwriter = ix.writer()
indexwriter.update_document(
title=unicode(docpath, errors='ignore'),
path=unicode(docpath, errors='ignore'),
content=unicode(paste, errors='ignore'))
title=docpath,
path=docpath,
content=paste)
indexwriter.commit()
except IOError:
print "CRC Checksum Failed on :", PST.p_path
print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -15,16 +15,19 @@ RSA private key, certificate messages
import time
from pubsublogger import publisher
from Helper import Process
#from bin.packages import Paste
#from bin.Helper import Process
from packages import Paste
from Helper import Process
def search_key(message):
paste = Paste.Paste(message)
def search_key(paste):
content = paste.get_p_content()
find = False
if '-----BEGIN PGP MESSAGE-----' in content:
publisher.warning('{} has a PGP enc message'.format(paste.p_name))
find = True
if '-----BEGIN CERTIFICATE-----' in content:
@ -32,15 +35,40 @@ def search_key(message):
find = True
if '-----BEGIN RSA PRIVATE KEY-----' in content:
publisher.warning('{} has a RSA key message'.format(paste.p_name))
publisher.warning('{} has a RSA private key message'.format(paste.p_name))
print('rsa private key message found')
find = True
if '-----BEGIN PRIVATE KEY-----' in content:
publisher.warning('{} has a private message'.format(paste.p_name))
publisher.warning('{} has a private key message'.format(paste.p_name))
print('private key message found')
find = True
if '-----BEGIN ENCRYPTED PRIVATE KEY-----' in content:
publisher.warning('{} has an encrypted private message'.format(paste.p_name))
publisher.warning('{} has an encrypted private key message'.format(paste.p_name))
print('encrypted private key message found')
find = True
if '-----BEGIN OPENSSH PRIVATE KEY-----' in content:
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
print('openssh private key message found')
find = True
if '-----BEGIN OpenVPN Static key V1-----' in content:
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
print('OpenVPN Static key message found')
find = True
if '-----BEGIN DSA PRIVATE KEY-----' in content:
publisher.warning('{} has a dsa private key message'.format(paste.p_name))
find = True
if '-----BEGIN EC PRIVATE KEY-----' in content:
publisher.warning('{} has an ec private key message'.format(paste.p_name))
find = True
if '-----BEGIN PGP PRIVATE KEY BLOCK-----' in content:
publisher.warning('{} has a pgp private key block message'.format(paste.p_name))
find = True
if find :
@ -48,7 +76,9 @@ def search_key(message):
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
#send to Browse_warning_paste
p.populate_set_out('keys;{}'.format(message), 'alertHandler')
msg = ('keys;{}'.format(message))
print(message)
p.populate_set_out( msg, 'alertHandler')
if __name__ == '__main__':
@ -77,6 +107,7 @@ if __name__ == '__main__':
continue
# Do something with the message from the queue
search_key(message)
paste = Paste.Paste(message)
search_key(paste)
# (Optional) Send that thing to the next queue

View file

@ -11,11 +11,11 @@ CYAN="\\033[1;36m"
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_LEVELDB" ] && echo "Needs the env var AIL_LEVELDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_LEVELDB:$PATH
export PATH=$AIL_ARDB:$PATH
function helptext {
echo -e $YELLOW"
@ -40,7 +40,7 @@ function helptext {
(Inside screen Daemons)
"$RED"
But first of all you'll need to edit few path where you installed
your redis & leveldb servers.
your redis & ardb servers.
"$DEFAULT"
Usage:
-----
@ -58,33 +58,17 @@ function launching_redis {
screen -S "Redis_AIL" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x'
sleep 0.1
screen -S "Redis_AIL" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x'
# For Words and curves
sleep 0.1
screen -S "Redis_AIL" -X screen -t "6382" bash -c 'redis-server '$conf_dir'6382.conf ; read x'
}
function launching_lvldb {
lvdbhost='127.0.0.1'
lvdbdir="${AIL_HOME}/LEVEL_DB_DATA/"
nb_db=13
function launching_ardb {
conf_dir="${AIL_HOME}/configs/"
db_y=`date +%Y`
#Verify that a dir with the correct year exists, create it otherwise
if [ ! -d "$lvdbdir$db_y" ]; then
mkdir -p "$db_y"
fi
screen -dmS "LevelDB_AIL"
screen -dmS "ARDB_AIL"
sleep 0.1
echo -e $GREEN"\t* Launching Levels DB servers"$DEFAULT
echo -e $GREEN"\t* Launching ARDB servers"$DEFAULT
#Launch a DB for each dir
for pathDir in $lvdbdir*/ ; do
yDir=$(basename "$pathDir")
sleep 0.1
screen -S "LevelDB_AIL" -X screen -t "$yDir" bash -c 'redis-leveldb -H '$lvdbhost' -D '$pathDir'/ -P '$yDir' -M '$nb_db'; read x'
done
screen -S "ARDB_AIL" -X screen -t "6382" bash -c 'ardb-server '$conf_dir'6382.conf ; read x'
}
function launching_logs {
@ -101,12 +85,12 @@ function launching_queues {
sleep 0.1
echo -e $GREEN"\t* Launching all the queues"$DEFAULT
screen -S "Queue_AIL" -X screen -t "Queues" bash -c './launch_queues.py; read x'
screen -S "Queue_AIL" -X screen -t "Queues" bash -c 'python3 launch_queues.py; read x'
}
function launching_scripts {
echo -e "\t* Checking configuration"
bash -c "./Update-conf.py"
bash -c "python3 Update-conf.py"
exitStatus=$?
if [ $exitStatus -ge 1 ]; then
echo -e $RED"\t* Configuration not up-to-date"$DEFAULT
@ -142,6 +126,8 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "ApiKey" bash -c './ApiKey.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Credential" bash -c './Credential.py; read x'
@ -158,6 +144,10 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Keys" bash -c './Keys.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Base64" bash -c './Base64.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c './Bitcoin.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Phone" bash -c './Phone.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Release" bash -c './Release.py; read x'
@ -183,7 +173,10 @@ function shutting_down_redis {
bash -c $redis_dir'redis-cli -p 6380 SHUTDOWN'
sleep 0.1
bash -c $redis_dir'redis-cli -p 6381 SHUTDOWN'
sleep 0.1
}
function shutting_down_ardb {
redis_dir=${AIL_HOME}/redis/src/
bash -c $redis_dir'redis-cli -p 6382 SHUTDOWN'
}
@ -208,12 +201,21 @@ function checking_redis {
flag_redis=1
fi
sleep 0.1
return $flag_redis;
}
function checking_ardb {
flag_ardb=0
redis_dir=${AIL_HOME}/redis/src/
sleep 0.2
bash -c $redis_dir'redis-cli -p 6382 PING | grep "PONG" &> /dev/null'
if [ ! $? == 0 ]; then
echo -e $RED"\t6382 not ready"$DEFAULT
flag_redis=1
flag_ardb=1
fi
return $flag_redis;
return $flag_ardb;
}
#If no params, display the help
@ -223,12 +225,12 @@ helptext;
############### TESTS ###################
isredis=`screen -ls | egrep '[0-9]+.Redis_AIL' | cut -d. -f1`
islvldb=`screen -ls | egrep '[0-9]+.LevelDB_AIL' | cut -d. -f1`
isardb=`screen -ls | egrep '[0-9]+.ARDB_AIL' | cut -d. -f1`
islogged=`screen -ls | egrep '[0-9]+.Logging_AIL' | cut -d. -f1`
isqueued=`screen -ls | egrep '[0-9]+.Queue_AIL' | cut -d. -f1`
isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1`
options=("Redis" "LevelDB" "Logs" "Queues" "Scripts" "Killall" "Shutdown" "Update-config")
options=("Redis" "Ardb" "Logs" "Queues" "Scripts" "Killall" "Shutdown" "Update-config")
menu() {
echo "What do you want to Launch?:"
@ -259,9 +261,9 @@ for i in ${!options[@]}; do
echo -e $RED"\t* A screen is already launched"$DEFAULT
fi
;;
LevelDB)
if [[ ! $islvldb ]]; then
launching_lvldb;
Ardb)
if [[ ! $isardb ]]; then
launching_ardb;
else
echo -e $RED"\t* A screen is already launched"$DEFAULT
fi
@ -282,12 +284,13 @@ for i in ${!options[@]}; do
;;
Scripts)
if [[ ! $isscripted ]]; then
if checking_redis; then
sleep 1
if checking_redis && checking_ardb; then
launching_scripts;
else
echo -e $YELLOW"\tScript not started, waiting 3 secondes"$DEFAULT
sleep 3
if checking_redis; then
echo -e $YELLOW"\tScript not started, waiting 5 secondes"$DEFAULT
sleep 5
if checking_redis && checking_ardb; then
launching_scripts;
else
echo -e $RED"\tScript not started"$DEFAULT
@ -298,14 +301,17 @@ for i in ${!options[@]}; do
fi
;;
Killall)
if [[ $isredis || $islvldb || $islogged || $isqueued || $isscripted ]]; then
if [[ $isredis || $isardb || $islogged || $isqueued || $isscripted ]]; then
echo -e $GREEN"Gracefully closing redis servers"$DEFAULT
shutting_down_redis;
sleep 0.2
echo -e $GREEN"Gracefully closing ardb servers"$DEFAULT
shutting_down_ardb;
echo -e $GREEN"Killing all"$DEFAULT
kill $isredis $islvldb $islogged $isqueued $isscripted
kill $isredis $isardb $islogged $isqueued $isscripted
sleep 0.2
echo -e $ROSE`screen -ls`$DEFAULT
echo -e $GREEN"\t* $isredis $islvldb $islogged $isqueued $isscripted killed."$DEFAULT
echo -e $GREEN"\t* $isredis $isardb $islogged $isqueued $isscripted killed."$DEFAULT
else
echo -e $RED"\t* No screen to kill"$DEFAULT
fi

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -61,7 +61,7 @@ if __name__ == '__main__':
while True:
try:
message = p.get_from_set()
print message
print(message)
if message is not None:
PST = Paste.Paste(message)
else:
@ -77,8 +77,8 @@ if __name__ == '__main__':
# FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path)
if lines_infos[1] < args.max:
p.populate_set_out(PST.p_path, 'LinesShort')
p.populate_set_out( PST.p_path , 'LinesShort')
else:
p.populate_set_out(PST.p_path, 'LinesLong')
p.populate_set_out( PST.p_path , 'LinesLong')
except IOError:
print "CRC Checksum Error on : ", PST.p_path
print("CRC Checksum Error on : ", PST.p_path)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -28,12 +28,14 @@ if __name__ == "__main__":
config_section = 'Mail'
p = Process(config_section)
addr_dns = p.config.get("Mail", "dns")
# REDIS #
r_serv2 = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"))
db=p.config.getint("Redis_Cache", "db"),
decode_responses=True)
# FUNCTIONS #
publisher.info("Suscribed to channel mails_categ")
@ -56,7 +58,7 @@ if __name__ == "__main__":
if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename)
MX_values = lib_refine.checking_MX_record(
r_serv2, PST.get_regex(email_regex))
r_serv2, PST.get_regex(email_regex), addr_dns)
if MX_values[0] >= 1:
@ -78,14 +80,14 @@ if __name__ == "__main__":
publisher.info(to_print)
#Send to ModuleStats
for mail in MX_values[1]:
print 'mail;{};{};{}'.format(1, mail, PST.p_date)
print('mail;{};{};{}'.format(1, mail, PST.p_date))
p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats')
prec_filename = filename
else:
publisher.debug("Script Mails is Idling 10s")
print 'Sleeping'
print('Sleeping')
time.sleep(10)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Mixer Module
@ -35,7 +35,7 @@ import os
import time
from pubsublogger import publisher
import redis
import ConfigParser
import configparser
from Helper import Process
@ -58,14 +58,15 @@ if __name__ == '__main__':
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
# REDIS #
server = redis.StrictRedis(
host=cfg.get("Redis_Mixer_Cache", "host"),
port=cfg.getint("Redis_Mixer_Cache", "port"),
db=cfg.getint("Redis_Mixer_Cache", "db"))
db=cfg.getint("Redis_Mixer_Cache", "db"),
decode_responses=True)
# LOGGING #
publisher.info("Feed Script started to receive & publish.")
@ -89,9 +90,13 @@ if __name__ == '__main__':
splitted = message.split()
if len(splitted) == 2:
complete_paste, gzip64encoded = splitted
try:
#feeder_name = ( complete_paste.replace("archive/","") ).split("/")[0]
feeder_name, paste_name = complete_paste.split('>')
feeder_name.replace(" ","")
paste_name = complete_paste
except ValueError as e:
feeder_name = "unnamed_feeder"
paste_name = complete_paste
@ -106,7 +111,9 @@ if __name__ == '__main__':
duplicated_paste_per_feeder[feeder_name] = 0
relay_message = "{0} {1}".format(paste_name, gzip64encoded)
digest = hashlib.sha1(gzip64encoded).hexdigest()
#relay_message = b" ".join( [paste_name, gzip64encoded] )
digest = hashlib.sha1(gzip64encoded.encode('utf8')).hexdigest()
# Avoid any duplicate coming from any sources
if operation_mode == 1:
@ -173,26 +180,26 @@ if __name__ == '__main__':
else:
# TODO Store the name of the empty paste inside a Redis-list.
print "Empty Paste: not processed"
print("Empty Paste: not processed")
publisher.debug("Empty Paste: {0} not processed".format(message))
else:
print "Empty Queues: Waiting..."
print("Empty Queues: Waiting...")
if int(time.time() - time_1) > refresh_time:
print processed_paste_per_feeder
print(processed_paste_per_feeder)
to_print = 'Mixer; ; ; ;mixer_all All_feeders Processed {0} paste(s) in {1}sec'.format(processed_paste, refresh_time)
print to_print
print(to_print)
publisher.info(to_print)
processed_paste = 0
for feeder, count in processed_paste_per_feeder.iteritems():
for feeder, count in processed_paste_per_feeder.items():
to_print = 'Mixer; ; ; ;mixer_{0} {0} Processed {1} paste(s) in {2}sec'.format(feeder, count, refresh_time)
print to_print
print(to_print)
publisher.info(to_print)
processed_paste_per_feeder[feeder] = 0
for feeder, count in duplicated_paste_per_feeder.iteritems():
for feeder, count in duplicated_paste_per_feeder.items():
to_print = 'Mixer; ; ; ;mixer_{0} {0} Duplicated {1} paste(s) in {2}sec'.format(feeder, count, refresh_time)
print to_print
print(to_print)
publisher.info(to_print)
duplicated_paste_per_feeder[feeder] = 0

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -20,7 +20,7 @@ import os
import signal
import argparse
from subprocess import PIPE, Popen
import ConfigParser
import configparser
import json
from terminaltables import AsciiTable
import textwrap
@ -51,7 +51,7 @@ last_refresh = 0
def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout:
print line
print(line)
splittedLine = line.split()
if 'python2' in splittedLine:
return int(splittedLine[0])
@ -76,7 +76,7 @@ def cleanRedis():
flag_pid_valid = True
if not flag_pid_valid:
print flag_pid_valid, 'cleaning', pid, 'in', k
print(flag_pid_valid, 'cleaning', pid, 'in', k)
server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k])
@ -85,11 +85,11 @@ def cleanRedis():
def kill_module(module, pid):
print ''
print '-> trying to kill module:', module
print('')
print('-> trying to kill module:', module)
if pid is None:
print 'pid was None'
print('pid was None')
printarrayGlob.insert(1, [0, module, pid, "PID was None"])
printarrayGlob.pop()
pid = getPid(module)
@ -102,15 +102,15 @@ def kill_module(module, pid):
try:
os.kill(pid, signal.SIGUSR1)
except OSError:
print pid, 'already killed'
print(pid, 'already killed')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"])
printarrayGlob.pop()
return
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
print(module, 'has been killed')
print('restarting', module, '...')
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
@ -119,7 +119,7 @@ def kill_module(module, pid):
printarrayGlob.pop()
else:
print 'killing failed, retrying...'
print('killing failed, retrying...')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."])
printarrayGlob.pop()
@ -128,8 +128,8 @@ def kill_module(module, pid):
os.kill(pid, signal.SIGUSR1)
time.sleep(1)
if getPid(module) is None:
print module, 'has been killed'
print 'restarting', module, '...'
print(module, 'has been killed')
print('restarting', module, '...')
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
@ -137,12 +137,12 @@ def kill_module(module, pid):
printarrayGlob.pop()
printarrayGlob.pop()
else:
print 'killing failed!'
print('killing failed!')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"])
printarrayGlob.pop()
else:
print 'Module does not exist'
print('Module does not exist')
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"])
printarrayGlob.pop()
@ -192,14 +192,15 @@ if __name__ == "__main__":
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
# REDIS #
server = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
if args.clear == 1:
clearRedisModuleInfo()
@ -227,7 +228,7 @@ if __name__ == "__main__":
printarray1 = []
printarray2 = []
printarray3 = []
for queue, card in server.hgetall("queues").iteritems():
for queue, card in server.hgetall("queues").items():
all_queue.add(queue)
key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue
@ -337,15 +338,15 @@ if __name__ == "__main__":
legend = AsciiTable(legend_array, title="Legend")
legend.column_max_width(1)
print legend.table
print '\n'
print t1.table
print '\n'
print t2.table
print '\n'
print t3.table
print '\n'
print t4.table
print(legend.table)
print('\n')
print(t1.table)
print('\n')
print(t2.table)
print('\n')
print(t3.table)
print('\n')
print(t4.table9)
if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5:
lastTime = datetime.datetime.now()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
This module makes statistics for some modules and providers
@ -52,10 +52,10 @@ def compute_most_posted(server, message):
# Member set is a list of (value, score) pairs
if int(member_set[0][1]) < keyword_total_sum:
#remove min from set and add the new one
print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server.zrem(redis_progression_name_set, member_set[0][0])
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
print redis_progression_name_set
print(redis_progression_name_set)
def compute_provider_info(server_trend, server_pasteName, path):
@ -94,7 +94,7 @@ def compute_provider_info(server_trend, server_pasteName, path):
# Member set is a list of (value, score) pairs
if float(member_set[0][1]) < new_avg:
#remove min from set and add the new one
print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server_trend.zrem(redis_sum_size_set, member_set[0][0])
server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
server_trend.zrem(redis_avg_size_name_set, member_set[0][0])
@ -110,7 +110,7 @@ def compute_provider_info(server_trend, server_pasteName, path):
# Member set is a list of (value, score) pairs
if int(member_set[0][1]) < num_paste:
#remove min from set and add the new one
print 'Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')'
print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server_trend.zrem(member_set[0][0])
server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
@ -133,14 +133,16 @@ if __name__ == '__main__':
# REDIS #
r_serv_trend = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Trending", "host"),
port=p.config.get("Redis_Level_DB_Trending", "port"),
db=p.config.get("Redis_Level_DB_Trending", "db"))
host=p.config.get("ARDB_Trending", "host"),
port=p.config.get("ARDB_Trending", "port"),
db=p.config.get("ARDB_Trending", "db"),
decode_responses=True)
r_serv_pasteName = redis.StrictRedis(
host=p.config.get("Redis_Paste_Name", "host"),
port=p.config.get("Redis_Paste_Name", "port"),
db=p.config.get("Redis_Paste_Name", "db"))
db=p.config.get("Redis_Paste_Name", "db"),
decode_responses=True)
# Endless loop getting messages from the input queue
while True:
@ -149,7 +151,7 @@ if __name__ == '__main__':
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping'
print('sleeping')
time.sleep(20)
continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \
@ -10,7 +10,7 @@ from asciimatics.event import Event
from asciimatics.event import KeyboardEvent, MouseEvent
import sys, os
import time, datetime
import argparse, ConfigParser
import argparse, configparser
import json
import redis
import psutil
@ -497,9 +497,8 @@ MANAGE MODULES AND GET INFOS
def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout:
print line
splittedLine = line.split()
if 'python2' in splittedLine:
if 'python3' in splittedLine:
return int(splittedLine[0])
return None
@ -517,15 +516,20 @@ def cleanRedis():
proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
try:
for line in proc.stdout:
line = line.decode('utf8')
splittedLine = line.split()
if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine:
if ('python3.5' in splittedLine or 'python3' in splittedLine or 'python' in splittedLine):
moduleCommand = "./"+moduleName + ".py"
moduleCommand2 = moduleName + ".py"
if(moduleCommand in splittedLine or moduleCommand2 in splittedLine):
flag_pid_valid = True
if not flag_pid_valid:
#print flag_pid_valid, 'cleaning', pid, 'in', k
server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time()))
log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + k], 0))
log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + (k)], 0))
#Error due to resize, interrupted sys call
except IOError as e:
@ -601,7 +605,7 @@ def fetchQueueData():
printarray_running = []
printarray_idle = []
printarray_notrunning = []
for queue, card in server.hgetall("queues").iteritems():
for queue, card in iter(server.hgetall("queues").items()):
all_queue.add(queue)
key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue
@ -609,7 +613,9 @@ def fetchQueueData():
for moduleNum in server.smembers(keySet):
value = server.get(key + str(moduleNum))
complete_paste_path = server.get(key + str(moduleNum) + "_PATH")
complete_paste_path = ( server.get(key + str(moduleNum) + "_PATH") )
if(complete_paste_path is not None):
complete_paste_path = complete_paste_path
COMPLETE_PASTE_PATH_PER_PID[moduleNum] = complete_paste_path
if value is not None:
@ -644,6 +650,7 @@ def fetchQueueData():
cpu_avg = sum(CPU_TABLE[moduleNum])/len(CPU_TABLE[moduleNum])
if len(CPU_TABLE[moduleNum]) > args.refresh*10:
CPU_TABLE[moduleNum].pop()
mem_percent = CPU_OBJECT_TABLE[int(moduleNum)].memory_percent()
except psutil.NoSuchProcess:
del CPU_OBJECT_TABLE[int(moduleNum)]
@ -652,6 +659,7 @@ def fetchQueueData():
cpu_avg = cpu_percent
mem_percent = 0
except KeyError:
#print('key error2')
try:
CPU_OBJECT_TABLE[int(moduleNum)] = psutil.Process(int(moduleNum))
cpu_percent = CPU_OBJECT_TABLE[int(moduleNum)].cpu_percent()
@ -671,7 +679,7 @@ def fetchQueueData():
printarray_idle.append( ([" <K> ", str(queue), str(moduleNum), str(processed_time_readable), str(path)], moduleNum) )
PID_NAME_DICO[int(moduleNum)] = str(queue)
array_module_type.sort(lambda x,y: cmp(x[0][4], y[0][4]), reverse=True) #Sort by num of pastes
#array_module_type.sort(lambda x,y: cmp(x[0][4], y[0][4]), reverse=True) #Sort by num of pastes
for e in array_module_type:
printarray_running.append(e)
@ -715,6 +723,7 @@ def format_string(tab, padding_row):
text=""
for ite, elem in enumerate(the_array):
if len(elem) > padding_row[ite]:
text += "*" + elem[-padding_row[ite]+6:]
padd_off = " "*5
@ -761,7 +770,7 @@ def demo(screen):
if time.time() - time_cooldown > args.refresh:
cleanRedis()
for key, val in fetchQueueData().iteritems(): #fetch data and put it into the tables
for key, val in iter(fetchQueueData().items()): #fetch data and put it into the tables
TABLES[key] = val
TABLES["logs"] = format_string(printarrayLog, TABLES_PADDING["logs"])
@ -790,14 +799,15 @@ if __name__ == "__main__":
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
# REDIS #
server = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
if args.clear == 1:
clearRedisModuleInfo()

View file

@ -1,10 +1,10 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import ConfigParser
import configparser
import os
import smtplib
from email.MIMEMultipart import MIMEMultipart
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
"""
@ -28,7 +28,7 @@ def sendEmailNotification(recipient, term):
Did you set environment variables? \
Or activate the virtualenv?')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
sender = cfg.get("Notifications", "sender"),
@ -76,8 +76,5 @@ def sendEmailNotification(recipient, term):
smtp_server.quit()
except Exception as e:
print str(e)
print(str(e))
# raise e

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The ZMQ_Sub_Onion Module
@ -37,11 +37,12 @@ from Helper import Process
def fetch(p, r_cache, urls, domains, path):
failed = []
downloaded = []
print len(urls), 'Urls to fetch.'
print('{} Urls to fetch'.format(len(urls)))
for url, domain in zip(urls, domains):
if r_cache.exists(url) or url in failed:
continue
to_fetch = base64.standard_b64encode(url)
to_fetch = base64.standard_b64encode(url.encode('utf8'))
print('fetching url: {}'.format(to_fetch))
process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch],
stdout=subprocess.PIPE)
while process.poll() is None:
@ -51,8 +52,10 @@ def fetch(p, r_cache, urls, domains, path):
r_cache.setbit(url, 0, 1)
r_cache.expire(url, 360000)
downloaded.append(url)
tempfile = process.stdout.read().strip()
with open(tempfile, 'r') as f:
print('downloaded : {}'.format(downloaded))
'''tempfile = process.stdout.read().strip()
tempfile = tempfile.decode('utf8')
#with open(tempfile, 'r') as f:
filename = path + domain + '.gz'
fetched = f.read()
content = base64.standard_b64decode(fetched)
@ -66,16 +69,16 @@ def fetch(p, r_cache, urls, domains, path):
ff.write(content)
p.populate_set_out(save_path, 'Global')
p.populate_set_out(url, 'ValidOnion')
p.populate_set_out(fetched, 'FetchedOnion')
p.populate_set_out(fetched, 'FetchedOnion')'''
yield url
os.unlink(tempfile)
#os.unlink(tempfile)
else:
r_cache.setbit(url, 0, 0)
r_cache.expire(url, 3600)
failed.append(url)
print 'Failed at downloading', url
print process.stdout.read()
print 'Failed:', len(failed), 'Downloaded:', len(downloaded)
print('Failed at downloading', url)
print(process.stdout.read())
print('Failed:', len(failed), 'Downloaded:', len(downloaded))
if __name__ == "__main__":
@ -91,7 +94,8 @@ if __name__ == "__main__":
r_cache = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"))
db=p.config.getint("Redis_Cache", "db"),
decode_responses=True)
# FUNCTIONS #
publisher.info("Script subscribed to channel onion_categ")
@ -109,7 +113,7 @@ if __name__ == "__main__":
while True:
if message is not None:
print message
print(message)
filename, score = message.split()
# "For each new paste"
@ -131,6 +135,8 @@ if __name__ == "__main__":
PST.save_attribute_redis(channel, domains_list)
to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
PST.p_name)
print(len(domains_list))
if len(domains_list) > 0:
publisher.warning('{}Detected {} .onion(s);{}'.format(
@ -144,7 +150,7 @@ if __name__ == "__main__":
PST.p_date,
PST.p_name)
for url in fetch(p, r_cache, urls, domains_list, path):
publisher.warning('{}Checked {};{}'.format(to_print, url, PST.p_path))
publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_path))
p.populate_set_out('onion;{}'.format(PST.p_path), 'alertHandler')
else:
publisher.info('{}Onion related;{}'.format(to_print, PST.p_path))
@ -152,6 +158,6 @@ if __name__ == "__main__":
prec_filename = filename
else:
publisher.debug("Script url is Idling 10s")
print 'Sleeping'
#print('Sleeping')
time.sleep(10)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -30,10 +30,11 @@ def search_phone(message):
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers
if len(results) > 4:
print results
print(results)
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
#send to Browse_warning_paste
p.populate_set_out('phone;{}'.format(message), 'alertHandler')
msg = 'phone;{}'.format(message)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
stats = {}

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from pubsublogger import publisher

View file

@ -1,9 +1,9 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import redis
import argparse
import ConfigParser
import configparser
import time
import os
from pubsublogger import publisher
@ -14,7 +14,7 @@ def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg')
# SCRIPT PARSER #
@ -30,7 +30,8 @@ def main():
r_serv = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
# LOGGING #
publisher.port = 6380
@ -49,7 +50,7 @@ def main():
row.sort()
table.add_rows(row, header=False)
os.system('clear')
print table.draw()
print(table.draw())
if __name__ == "__main__":

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
This Module is used for term frequency.
@ -54,9 +54,10 @@ if __name__ == "__main__":
# REDIS #
server_term = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermFreq", "host"),
port=p.config.get("Redis_Level_DB_TermFreq", "port"),
db=p.config.get("Redis_Level_DB_TermFreq", "db"))
host=p.config.get("ARDB_TermFreq", "host"),
port=p.config.get("ARDB_TermFreq", "port"),
db=p.config.get("ARDB_TermFreq", "db"),
decode_responses=True)
# FUNCTIONS #
publisher.info("RegexForTermsFrequency script started")
@ -115,6 +116,6 @@ if __name__ == "__main__":
else:
publisher.debug("Script RegexForTermsFrequency is Idling")
print "sleeping"
print("sleeping")
time.sleep(5)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import time
from packages import Paste
@ -6,6 +6,16 @@ from pubsublogger import publisher
from Helper import Process
import re
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
'''
This module takes its input from the global module.
It applies some regex and publish matched content
@ -16,6 +26,7 @@ if __name__ == "__main__":
publisher.channel = "Script"
config_section = "Release"
p = Process(config_section)
max_execution_time = p.config.getint("Curve", "max_execution_time")
publisher.info("Release scripts to find release names")
movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
@ -29,18 +40,28 @@ if __name__ == "__main__":
filepath = p.get_from_set()
if filepath is None:
publisher.debug("Script Release is Idling 10s")
print 'Sleeping'
print('Sleeping')
time.sleep(10)
continue
paste = Paste.Paste(filepath)
content = paste.get_p_content()
signal.alarm(max_execution_time)
try:
releases = set(re.findall(regex, content))
if len(releases) == 0:
continue
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path)
print(to_print)
if len(releases) > 30:
publisher.warning(to_print)
else:
publisher.info(to_print)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_path))
continue
else:
signal.alarm(0)

View file

@ -1,9 +1,9 @@
#!/usr/bin/python2.7
#!/usr/bin/python3
# -*-coding:UTF-8 -*
import redis
import argparse
import ConfigParser
import configparser
from datetime import datetime
from pubsublogger import publisher
@ -14,7 +14,7 @@ def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg')
# SCRIPT PARSER #
@ -33,9 +33,10 @@ def main():
# port generated automatically depending on the date
curYear = datetime.now().year if args.year is None else args.year
r_serv = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_Hashs", "host"),
port=curYear,
db=cfg.getint("Redis_Level_DB_Hashs", "db"))
host=cfg.get("ARDB_Hashs", "host"),
port=cfg.getint("ARDB_Hashs", "port"),
db=curYear,
decode_responses=True)
# LOGGING #
publisher.port = 6380

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -13,7 +13,7 @@ It test different possibility to makes some sqlInjection.
import time
import string
import urllib2
import urllib.request
import re
from pubsublogger import publisher
from Helper import Process
@ -66,16 +66,16 @@ def analyse(url, path):
result_query = 0
if resource_path is not None:
result_path = is_sql_injection(resource_path)
result_path = is_sql_injection(resource_path.decode('utf8'))
if query_string is not None:
result_query = is_sql_injection(query_string)
result_query = is_sql_injection(query_string.decode('utf8'))
if (result_path > 0) or (result_query > 0):
paste = Paste.Paste(path)
if (result_path > 1) or (result_query > 1):
print "Detected SQL in URL: "
print urllib2.unquote(url)
print("Detected SQL in URL: ")
print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path)
publisher.warning(to_print)
#Send to duplicate
@ -83,8 +83,8 @@ def analyse(url, path):
#send to Browse_warning_paste
p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler')
else:
print "Potential SQL injection:"
print urllib2.unquote(url)
print("Potential SQL injection:")
print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path)
publisher.info(to_print)
@ -92,8 +92,8 @@ def analyse(url, path):
# Try to detect if the url passed might be an sql injection by appliying the regex
# defined above on it.
def is_sql_injection(url_parsed):
line = urllib2.unquote(url_parsed)
line = string.upper(line)
line = urllib.request.unquote(url_parsed)
line = str.upper(line)
result = []
result_suspect = []
@ -104,20 +104,20 @@ def is_sql_injection(url_parsed):
for word_list in word_injection:
for word in word_list:
temp_res = string.find(line, string.upper(word))
temp_res = str.find(line, str.upper(word))
if temp_res!=-1:
result.append(line[temp_res:temp_res+len(word)])
for word in word_injection_suspect:
temp_res = string.find(line, string.upper(word))
temp_res = str.find(line, str.upper(word))
if temp_res!=-1:
result_suspect.append(line[temp_res:temp_res+len(word)])
if len(result)>0:
print result
print(result)
return 2
elif len(result_suspect)>0:
print result_suspect
print(result_suspect)
return 1
else:
return 0

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Sentiment analyser module.
@ -33,7 +33,7 @@ size_threshold = 250
line_max_length_threshold = 1000
import os
import ConfigParser
import configparser
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
@ -41,7 +41,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
@ -69,7 +69,7 @@ def Analyse(message, server):
combined_datetime = datetime.datetime.combine(the_date, the_time)
timestamp = calendar.timegm(combined_datetime.timetuple())
sentences = tokenize.sent_tokenize(p_content.decode('utf-8', 'ignore'))
sentences = tokenize.sent_tokenize(p_content)
if len(sentences) > 0:
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
@ -109,11 +109,11 @@ def Analyse(message, server):
provider_timestamp = provider + '_' + str(timestamp)
server.incr('UniqID')
UniqID = server.get('UniqID')
print provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines'
print(provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines')
server.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score)
else:
print 'Dropped:', p_MimeType
print('Dropped:', p_MimeType)
def isJSON(content):
@ -121,7 +121,7 @@ def isJSON(content):
json.loads(content)
return True
except Exception,e:
except Exception:
return False
import signal
@ -152,9 +152,10 @@ if __name__ == '__main__':
# REDIS_LEVEL_DB #
server = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Sentiment", "host"),
port=p.config.get("Redis_Level_DB_Sentiment", "port"),
db=p.config.get("Redis_Level_DB_Sentiment", "db"))
host=p.config.get("ARDB_Sentiment", "host"),
port=p.config.get("ARDB_Sentiment", "port"),
db=p.config.get("ARDB_Sentiment", "db"),
decode_responses=True)
while True:
message = p.get_from_set()
@ -170,4 +171,3 @@ if __name__ == '__main__':
continue
else:
signal.alarm(0)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
This Module is used for term frequency.
@ -52,9 +52,10 @@ if __name__ == "__main__":
# REDIS #
server_term = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermFreq", "host"),
port=p.config.get("Redis_Level_DB_TermFreq", "port"),
db=p.config.get("Redis_Level_DB_TermFreq", "db"))
host=p.config.get("ARDB_TermFreq", "host"),
port=p.config.get("ARDB_TermFreq", "port"),
db=p.config.get("ARDB_TermFreq", "db"),
decode_responses=True)
# FUNCTIONS #
publisher.info("RegexForTermsFrequency script started")
@ -126,6 +127,6 @@ if __name__ == "__main__":
else:
publisher.debug("Script RegexForTermsFrequency is Idling")
print "sleeping"
print("sleeping")
time.sleep(5)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The ZMQ_Feed_Q Module
@ -21,7 +21,7 @@ Requirements
"""
import redis
import ConfigParser
import configparser
import os
configfile = os.path.join(os.environ['AIL_BIN'], './packages/config.cfg')
@ -31,13 +31,14 @@ def main():
"""Main Function"""
# CONFIG #
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
# REDIS
r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
# FIXME: automatic based on the queue name.
# ### SCRIPTS ####

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import time
from packages import Paste
@ -29,7 +29,7 @@ if __name__ == "__main__":
languages = [shell, c, php, bash, python, javascript, bash, ruby, adr]
regex = '|'.join(languages)
print regex
print(regex)
while True:
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Tokenize Module
@ -50,7 +50,7 @@ if __name__ == "__main__":
while True:
message = p.get_from_set()
print message
print(message)
if message is not None:
paste = Paste.Paste(message)
signal.alarm(5)
@ -67,4 +67,4 @@ if __name__ == "__main__":
else:
publisher.debug("Tokeniser is idling 10s")
time.sleep(10)
print "sleepin"
print("Sleeping")

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import ConfigParser
from ConfigParser import ConfigParser as cfgP
import configparser
from configparser import ConfigParser as cfgP
import os
from collections import OrderedDict
import sys
@ -20,9 +20,9 @@ def main():
Or activate the virtualenv.')
configfileSample = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
cfgSample = ConfigParser.ConfigParser()
cfgSample = configparser.ConfigParser()
cfgSample.read(configfileSample)
sections = cfgP.sections(cfg)
@ -63,12 +63,12 @@ def main():
print(" - "+item[0])
print("+--------------------------------------------------------------------+")
resp = raw_input("Do you want to auto fix it? [y/n] ")
resp = input("Do you want to auto fix it? [y/n] ")
if resp != 'y':
return False
else:
resp2 = raw_input("Do you want to keep a backup of the old configuration file? [y/n] ")
resp2 = input("Do you want to keep a backup of the old configuration file? [y/n] ")
if resp2 == 'y':
shutil.move(configfile, configfileBackup)
@ -109,4 +109,3 @@ if __name__ == "__main__":
sys.exit()
else:
sys.exit(1)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -47,7 +47,8 @@ if __name__ == "__main__":
r_serv2 = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"))
db=p.config.getint("Redis_Cache", "db"),
decode_responses=True)
# Protocol file path
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
@ -95,17 +96,23 @@ if __name__ == "__main__":
subdomain = faup.get_subdomain()
f1 = None
domains_list.append(domain)
publisher.debug('{} Published'.format(url))
if f1 == "onion":
print domain
print(domain)
if subdomain is not None:
subdomain = subdomain.decode('utf8')
if domain is not None:
domain = domain.decode('utf8')
domains_list.append(domain)
hostl = avoidNone(subdomain) + avoidNone(domain)
hostl = unicode(avoidNone(subdomain)+avoidNone(domain))
try:
socket.setdefaulttimeout(1)
ip = socket.gethostbyname(unicode(hostl))
ip = socket.gethostbyname(hostl)
except:
# If the resolver is not giving any IPv4 address,
# ASN/CC lookup is skip.
@ -113,32 +120,36 @@ if __name__ == "__main__":
try:
l = client.lookup(ip, qType='IP')
except ipaddress.AddressValueError:
continue
cc = getattr(l, 'cc')
asn = getattr(l, 'asn')
if getattr(l, 'asn') is not None:
asn = getattr(l, 'asn')[2:] #remobe b'
# EU is not an official ISO 3166 code (but used by RIPE
# IP allocation)
if cc is not None and cc != "EU":
print hostl, asn, cc, \
pycountry.countries.get(alpha_2=cc).name
print(hostl, asn, cc, \
pycountry.countries.get(alpha_2=cc).name)
if cc == cc_critical:
to_print = 'Url;{};{};{};Detected {} {}'.format(
PST.p_source, PST.p_date, PST.p_name,
hostl, cc)
#publisher.warning(to_print)
print to_print
print(to_print)
else:
print hostl, asn, cc
print(hostl, asn, cc)
A_values = lib_refine.checking_A_record(r_serv2,
domains_list)
if A_values[0] >= 1:
PST.__setattr__(channel, A_values)
PST.save_attribute_redis(channel, (A_values[0],
list(A_values[1])))
pprint.pprint(A_values)
publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path))
@ -146,7 +157,7 @@ if __name__ == "__main__":
else:
publisher.debug("Script url is Idling 10s")
print 'Sleeping'
print('Sleeping')
time.sleep(10)
message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -29,11 +29,12 @@ num_day_to_look = 5 # the detection of the progression start num_day_to_lo
def analyse(server, field_name, date, url_parsed):
field = url_parsed[field_name]
if field is not None:
field = field.decode('utf8')
server.hincrby(field, date, 1)
if field_name == "domain": #save domain in a set for the monthly plot
domain_set_name = "domain_set_" + date[0:6]
server.sadd(domain_set_name, field)
print "added in " + domain_set_name +": "+ field
print("added in " + domain_set_name +": "+ field)
def get_date_range(num_day):
curr_date = datetime.date.today()
@ -113,9 +114,10 @@ if __name__ == '__main__':
# REDIS #
r_serv_trend = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Trending", "host"),
port=p.config.get("Redis_Level_DB_Trending", "port"),
db=p.config.get("Redis_Level_DB_Trending", "db"))
host=p.config.get("ARDB_Trending", "host"),
port=p.config.get("ARDB_Trending", "port"),
db=p.config.get("ARDB_Trending", "db"),
decode_responses=True)
# FILE CURVE SECTION #
csv_path_proto = os.path.join(os.environ['AIL_HOME'],
@ -145,24 +147,25 @@ if __name__ == '__main__':
year = today.year
month = today.month
print 'Building protocol graph'
print('Building protocol graph')
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto,
protocolsfile_path, year,
month)
print 'Building tld graph'
print('Building tld graph')
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld,
tldsfile_path, year,
month)
print 'Building domain graph'
print('Building domain graph')
lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain,
"domain", year,
month)
print 'end building'
print('end building')
publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping'
print('sleeping')
time.sleep(5*60)
continue
@ -173,9 +176,13 @@ if __name__ == '__main__':
faup.decode(url)
url_parsed = faup.get()
analyse(r_serv_trend, 'scheme', date, url_parsed) #Scheme analysis
analyse(r_serv_trend, 'tld', date, url_parsed) #Tld analysis
analyse(r_serv_trend, 'domain', date, url_parsed) #Domain analysis
# Scheme analysis
analyse(r_serv_trend, 'scheme', date, url_parsed)
# Tld analysis
analyse(r_serv_trend, 'tld', date, url_parsed)
# Domain analysis
analyse(r_serv_trend, 'domain', date, url_parsed)
compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python3.5
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from pymisp.tools.abstractgenerator import AbstractMISPObjectGenerator
@ -15,7 +15,7 @@ class AilleakObject(AbstractMISPObjectGenerator):
self._p_source = p_source.split('/')[-5:]
self._p_source = '/'.join(self._p_source)[:-3] # -3 removes .gz
self._p_date = p_date
self._p_content = p_content.encode('utf8')
self._p_content = p_content
self._p_duplicate = p_duplicate
self._p_duplicate_number = p_duplicate_number
self.generate_attributes()
@ -45,13 +45,10 @@ class ObjectWrapper:
self.paste = Paste.Paste(path)
self.p_date = self.date_to_str(self.paste.p_date)
self.p_source = self.paste.p_path
self.p_content = self.paste.get_p_content().decode('utf8')
self.p_content = self.paste.get_p_content()
temp = self.paste._get_p_duplicate()
try:
temp = temp.decode('utf8')
except AttributeError:
pass
#beautifier
temp = json.loads(temp)
self.p_duplicate_number = len(temp) if len(temp) >= 0 else 0

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python3.5
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -52,9 +52,10 @@ if __name__ == "__main__":
# port generated automatically depending on the date
curYear = datetime.now().year
server = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"),
port=curYear,
db=p.config.get("Redis_Level_DB", "db"))
host=p.config.get("ARDB_DB", "host"),
port=p.config.get("ARDB_DB", "port"),
db=curYear,
decode_responses=True)
# FUNCTIONS #
publisher.info("Script duplicate started")
@ -62,8 +63,8 @@ if __name__ == "__main__":
while True:
message = p.get_from_set()
if message is not None:
message = message.decode('utf8') #decode because of pyhton3
module_name, p_path = message.split(';')
print("new alert : {}".format(module_name))
#PST = Paste.Paste(p_path)
else:
publisher.debug("Script Attribute is idling 10s")

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
@ -27,10 +27,9 @@ if __name__ == "__main__":
config_section = ['Curve']
for queue in config_section:
print 'dropping: ' + queue
print('dropping: ' + queue)
p = Process(queue)
while True:
message = p.get_from_set()
if message is None:
break

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# This file is part of AIL framework - Analysis Information Leak framework
@ -25,7 +25,7 @@ import time
import redis
import base64
import os
import ConfigParser
import configparser
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
@ -33,7 +33,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
if cfg.has_option("ZMQ_Global", "bind"):
@ -50,7 +50,7 @@ socket = context.socket(zmq.PUB)
socket.bind(zmq_url)
# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
r = redis.StrictRedis(host='localhost', db=10)
r = redis.StrictRedis(host='localhost', db=10, decode_responses=True)
# 101 pastes processed feed
# 102 raw pastes feed
@ -59,6 +59,7 @@ while True:
time.sleep(base_sleeptime + sleep_inc)
topic = 101
paste = r.lpop("pastes")
print(paste)
if paste is None:
continue
socket.send("%d %s" % (topic, paste))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# This file is part of AIL framework - Analysis Information Leak framework
@ -24,9 +24,10 @@ socket.setsockopt(zmq.SUBSCRIBE, topicfilter)
while True:
message = socket.recv()
print('b1')
print (message)
if topicfilter == "102":
topic, paste, messagedata = message.split()
print paste, messagedata
print(paste, messagedata)
else:
print (message)

View file

@ -1,9 +1,9 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import zmq
import base64
import StringIO
from io import StringIO
import gzip
import argparse
import os
@ -31,8 +31,7 @@ import mimetypes
'
'''
import StringIO
import gzip
def is_hierachy_valid(path):
var = path.split('/')
try:
@ -72,7 +71,12 @@ if __name__ == "__main__":
wanted_path = wanted_path.split('/')
wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):])
messagedata = open(complete_path).read()
with gzip.open(complete_path, 'rb') as f:
messagedata = f.read()
#print(type(complete_path))
#file = open(complete_path)
#messagedata = file.read()
#if paste do not have a 'date hierarchy' ignore it
if not is_hierachy_valid(complete_path):
@ -90,5 +94,8 @@ if __name__ == "__main__":
print(args.name+'>'+wanted_path)
path_to_send = args.name + '>' + wanted_path
socket.send('{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata)))
#s = b'{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata))
# use bytes object
s = b' '.join( [ args.channel.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] )
socket.send(s)
time.sleep(args.seconds)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# This file is part of AIL framework - Analysis Information Leak framework
@ -10,7 +10,7 @@
#
# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be
import ConfigParser
import configparser
import argparse
import gzip
import os
@ -23,7 +23,7 @@ def readdoc(path=None):
return f.read()
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
# Indexer configuration - index dir and schema setup
@ -51,7 +51,7 @@ ix = index.open_dir(indexpath)
from whoosh.qparser import QueryParser
if args.n:
print ix.doc_count_all()
print(ix.doc_count_all())
exit(0)
if args.l:

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import ConfigParser
import configparser
import os
import subprocess
import time
@ -23,21 +23,21 @@ if __name__ == '__main__':
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
config = ConfigParser.ConfigParser()
config = configparser.ConfigParser()
config.read(configfile)
modules = config.sections()
pids = {}
for module in modules:
pin = subprocess.Popen(["python", './QueueIn.py', '-c', module])
pout = subprocess.Popen(["python", './QueueOut.py', '-c', module])
pin = subprocess.Popen(["python3", './QueueIn.py', '-c', module])
pout = subprocess.Popen(["python3", './QueueOut.py', '-c', module])
pids[module] = (pin, pout)
is_running = True
try:
while is_running:
time.sleep(5)
is_running = False
for module, p in pids.iteritems():
for module, p in pids.items():
pin, pout = p
if pin is None:
# already dead
@ -57,7 +57,7 @@ if __name__ == '__main__':
is_running = True
pids[module] = (pin, pout)
except KeyboardInterrupt:
for module, p in pids.iteritems():
for module, p in pids.items():
pin, pout = p
if pin is not None:
pin.kill()

View file

@ -1,3 +1,5 @@
#!/usr/bin/python3
class Date(object):
"""docstring for Date"""
def __init__(self, *args):
@ -38,4 +40,3 @@ class Date(object):
comp_month = str(computed_date.month).zfill(2)
comp_day = str(computed_date.day).zfill(2)
return comp_year + comp_month + comp_day

View file

@ -1,3 +1,5 @@
#!/usr/bin/python3
import hashlib
import crcmod
import mmh3

View file

@ -1,4 +1,4 @@
#!/usr/bin/python2.7
#!/usr/bin/python3
"""
The ``Paste Class``
@ -24,15 +24,8 @@ import operator
import string
import re
import json
try: # dirty to support python3
import ConfigParser
except:
import configparser
ConfigParser = configparser
try: # dirty to support python3
import cStringIO
except:
from io import StringIO as cStringIO
import configparser
from io import StringIO
import sys
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
from Date import Date
@ -71,25 +64,29 @@ class Paste(object):
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
self.cache = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
self.store = redis.StrictRedis(
host=cfg.get("Redis_Data_Merging", "host"),
port=cfg.getint("Redis_Data_Merging", "port"),
db=cfg.getint("Redis_Data_Merging", "db"))
db=cfg.getint("Redis_Data_Merging", "db"),
decode_responses=True)
self.p_path = p_path
self.p_name = os.path.basename(self.p_path)
self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2)
self.p_mime = magic.from_buffer("test", mime=True)
self.p_mime = magic.from_buffer(self.get_p_content(), mime=True)
# Assuming that the paste will alway be in a day folder which is itself
# in a month folder which is itself in a year folder.
# /year/month/day/paste.gz
var = self.p_path.split('/')
self.p_date = Date(var[-4], var[-3], var[-2])
self.p_source = var[-5]
@ -117,17 +114,18 @@ class Paste(object):
paste = self.cache.get(self.p_path)
if paste is None:
try:
with gzip.open(self.p_path, 'rb') as f:
with gzip.open(self.p_path, 'r') as f:
paste = f.read()
self.cache.set(self.p_path, paste)
self.cache.expire(self.p_path, 300)
except:
return ''
pass
return paste
paste = ''
return str(paste)
def get_p_content_as_file(self):
return cStringIO.StringIO(self.get_p_content())
message = StringIO(self.get_p_content())
return message
def get_p_content_with_removed_lines(self, threshold):
num_line_removed = 0
@ -137,6 +135,7 @@ class Paste(object):
line_id = 0
for line_id, line in enumerate(f):
length = len(line)
if length < line_length_threshold:
string_content += line
else:
@ -202,8 +201,8 @@ class Paste(object):
.. seealso:: _set_p_hash_kind("md5")
"""
for hash_name, the_hash in self.p_hash_kind.iteritems():
self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content())
for hash_name, the_hash in self.p_hash_kind.items():
self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content().encode())
return self.p_hash
def _get_p_language(self):
@ -274,7 +273,10 @@ class Paste(object):
def _get_p_duplicate(self):
self.p_duplicate = self.store.hget(self.p_path, "p_duplicate")
return self.p_duplicate if self.p_duplicate is not None else '[]'
if self.p_duplicate is not None:
return self.p_duplicate
else:
return '[]'
def save_all_attributes_redis(self, key=None):
"""
@ -321,6 +323,28 @@ class Paste(object):
else:
self.store.hset(self.p_path, attr_name, json.dumps(value))
def save_others_pastes_attribute_duplicate(self, attr_name, list_value):
"""
Save a new duplicate on others pastes
"""
for hash_type, path, percent, date in list_value:
#get json
json_duplicate = self.store.hget(path, attr_name)
#json save on redis
if json_duplicate is not None:
list_duplicate = (json.loads(json_duplicate))
# avoid duplicate, a paste can be send by multiples modules
to_add = [hash_type, self.p_path, percent, date]
if to_add not in list_duplicate:
list_duplicate.append(to_add)
self.store.hset(path, attr_name, json.dumps(list_duplicate))
else:
# create the new list
list_duplicate = [[hash_type, self.p_path, percent, date]]
self.store.hset(path, attr_name, json.dumps(list_duplicate))
def _get_from_redis(self, r_serv):
ans = {}
for hash_name, the_hash in self.p_hash:
@ -342,7 +366,7 @@ class Paste(object):
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True)
blob = TextBlob(clean(self.get_p_content()), tokenizer=tokenizer)
blob = TextBlob(clean( (self.get_p_content()) ), tokenizer=tokenizer)
for word in blob.tokens:
if word in words.keys():
@ -351,7 +375,7 @@ class Paste(object):
num = 0
words[word] = num + 1
if sort:
var = sorted(words.iteritems(), key=operator.itemgetter(1), reverse=True)
var = sorted(words.items(), key=operator.itemgetter(1), reverse=True)
else:
var = words

View file

@ -2,6 +2,7 @@
bloomfilters = Blooms
dicofilters = Dicos
pastes = PASTES
base64 = BASE64
wordtrending_csv = var/www/static/csv/wordstrendingdata
wordsfile = files/wordfile
@ -53,13 +54,20 @@ criticalNumberToAlert=8
#Will be considered as false positive if less that X matches from the top password list
minTopPassList=5
[Curve]
max_execution_time = 90
[Base64]
path = Base64/
max_execution_time = 60
[Modules_Duplicates]
#Number of month to look back
maximum_month_range = 3
#The value where two pastes are considerate duplicate for ssdeep.
threshold_duplicate_ssdeep = 50
#The value where two pastes are considerate duplicate for tlsh.
threshold_duplicate_tlsh = 100
threshold_duplicate_tlsh = 52
#Minimum size of the paste considered
min_paste_size = 0.3
@ -104,46 +112,56 @@ host = localhost
port = 6381
db = 1
##### LevelDB #####
[Redis_Level_DB_Curve]
##### ARDB #####
[ARDB_Curve]
host = localhost
port = 6382
db = 1
[Redis_Level_DB_Sentiment]
[ARDB_Sentiment]
host = localhost
port = 6382
db = 4
[Redis_Level_DB_TermFreq]
[ARDB_TermFreq]
host = localhost
port = 6382
db = 2
[Redis_Level_DB_TermCred]
[ARDB_TermCred]
host = localhost
port = 6382
db = 5
[Redis_Level_DB]
[ARDB_DB]
host = localhost
port = 6382
db = 0
[Redis_Level_DB_Trending]
[ARDB_Trending]
host = localhost
port = 6382
db = 3
[Redis_Level_DB_Hashs]
[ARDB_Hashs]
host = localhost
db = 1
[ARDB_Tags]
host = localhost
port = 6382
db = 6
[Url]
cc_critical = DE
[DomClassifier]
cc = DE
cc_tld = r'\.de$'
dns = 8.8.8.8
[Mail]
dns = 8.8.8.8
# Indexer configuration
[Indexer]

View file

@ -1,3 +1,5 @@
#!/usr/bin/python3
import re
import dns.resolver
@ -17,24 +19,29 @@ def is_luhn_valid(card_number):
return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0
def checking_MX_record(r_serv, adress_set):
def checking_MX_record(r_serv, adress_set, addr_dns):
"""Check if emails MX domains are responding.
:param r_serv: -- Redis connexion database
:param adress_set: -- (set) This is a set of emails adress
:param adress_set: -- (str) This is a server dns address
:return: (int) Number of adress with a responding and valid MX domains
This function will split the email adress and try to resolve their domains
names: on example@gmail.com it will try to resolve gmail.com
"""
#remove duplicate
adress_set = list(set(adress_set))
score = 0
num = len(adress_set)
WalidMX = set([])
# Transforming the set into a string
MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower())
resolver = dns.resolver.Resolver()
resolver.nameservers = ['149.13.33.69']
resolver.nameservers = [addr_dns]
resolver.timeout = 5
resolver.lifetime = 2
if MXdomains != []:
@ -58,25 +65,31 @@ def checking_MX_record(r_serv, adress_set):
except dns.resolver.NoNameservers:
publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.')
print('NoNameserver, No non-broken nameservers are available to answer the query.')
except dns.resolver.NoAnswer:
publisher.debug('NoAnswer, The response did not contain an answer to the question.')
print('NoAnswer, The response did not contain an answer to the question.')
except dns.name.EmptyLabel:
publisher.debug('SyntaxError: EmptyLabel')
print('SyntaxError: EmptyLabel')
except dns.resolver.NXDOMAIN:
r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
publisher.debug('The query name does not exist.')
print('The query name does not exist.')
except dns.name.LabelTooLong:
publisher.debug('The Label is too long')
print('The Label is too long')
except dns.resolver.Timeout:
print('timeout')
r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
except Exception as e:
print e
print(e)
publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
return (num, WalidMX)
@ -125,7 +138,7 @@ def checking_A_record(r_serv, domains_set):
publisher.debug('The Label is too long')
except Exception as e:
print e
print(e)
publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
return (num, WalidA)

View file

@ -1,3 +1,5 @@
#!/usr/bin/python3
import os
import string
@ -81,17 +83,17 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
to keep the timeline of the curve correct.
"""
threshold = 50
first_day = date(year, month, 01)
threshold = 30
first_day = date(year, month, 1)
last_day = date(year, month, calendar.monthrange(year, month)[1])
words = []
with open(feederfilename, 'rb') as f:
with open(feederfilename, 'r') as f:
# words of the files
words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ])
headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f:
with open(csvfilename+'.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(headers)
@ -103,11 +105,14 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
# from the 1srt day to the last of the list
for word in words:
value = r_serv.hget(word, curdate)
if value is None:
row.append(0)
else:
# if the word have a value for the day
# FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold
value = r_serv.hget(word, curdate)
value = int(value)
if value >= threshold:
row.append(value)
writer.writerow(row)
@ -127,14 +132,15 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month):
"""
first_day = date(year, month, 01)
first_day = date(year, month, 1)
last_day = date(year, month, calendar.monthrange(year, month)[1])
redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2)
words = list(server.smembers(redis_set_name))
#words = [x.decode('utf-8') for x in words]
headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f:
with open(csvfilename+'.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(headers)

View file

@ -45,7 +45,7 @@ subscribe = Redis_CurveManageTopSets
[Categ]
subscribe = Redis_Global
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey
[CreditCards]
subscribe = Redis_CreditCards
@ -105,3 +105,15 @@ publish = Redis_Duplicate,Redis_alertHandler
[Keys]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_alertHandler
[ApiKey]
subscribe = Redis_ApiKey
publish = Redis_Duplicate,Redis_alertHandler
[Base64]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_alertHandler
[Bitcoin]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_alertHandler

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -48,7 +48,7 @@ if __name__ == '__main__':
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
print "queue empty"
print("queue empty")
time.sleep(1)
continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Template for new modules

View file

@ -1,10 +1,10 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import socks
import socket
import urllib2
import StringIO
import urllib.request
import io
import gzip
import base64
import sys
@ -21,17 +21,20 @@ def create_connection(address, timeout=None, source_address=None):
def get_page(url, torclient_host='127.0.0.1', torclient_port=9050):
request = urllib2.Request(url)
request = urllib.request.Request(url)
# UA of the Tor browser bundle
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0')
return urllib2.urlopen(request, timeout=5).read(max_size * 100000)
return urllib.request.urlopen(request, timeout=5).read(max_size * 100000)
#FIXME don't work at all
def makegzip64(s):
out = StringIO.StringIO()
with gzip.GzipFile(fileobj=out, mode="w") as f:
f.write(s)
return base64.standard_b64encode(out.getvalue())
out = io.BytesIO()
with gzip.GzipFile(fileobj=out, mode='ab') as fo:
fo.write(base64.standard_b64encode(s))
return out.getvalue()
if __name__ == "__main__":
@ -41,7 +44,8 @@ if __name__ == "__main__":
exit(1)
try:
url = base64.standard_b64decode(sys.argv[1])
url = base64.standard_b64decode(sys.argv[1]).decode('utf8')
print(url)
except:
print('unable to decode')
exit(1)
@ -61,7 +65,7 @@ if __name__ == "__main__":
to_write = makegzip64(page)
t, path = tempfile.mkstemp()
with open(path, 'w') as f:
f.write(to_write)
print path
#with open(path, 'w') as f:
#f.write(to_write)
print(path)
exit(0)

728
configs/6382.conf Normal file → Executable file
View file

@ -1,4 +1,7 @@
# Redis configuration file example
# Ardb configuration file example, modified from redis's conf file.
# Home dir for ardb instance, it can be referenced by ${ARDB_HOME} in this config file
home ../DATA_ARDB/
# Note on units: when memory size is needed, it is possible to specify
# it in the usual form of 1k 5GB 4M and so forth:
@ -12,63 +15,71 @@
#
# units are case insensitive so 1GB 1Gb 1gB are all the same.
################################## INCLUDES ###################################
# Include one or more other config files here. This is useful if you
# have a standard template that goes to all Redis server but also need
# to customize a few per-server settings. Include files can include
# other files, so use this wisely.
#
# Notice option "include" won't be rewritten by command "CONFIG REWRITE"
# from admin or Redis Sentinel. Since Redis always uses the last processed
# line as value of a configuration directive, you'd better put includes
# at the beginning of this file to avoid overwriting config change at runtime.
#
# If instead you are interested in using includes to override configuration
# options, it is better to use include as the last line.
#
# include /path/to/local.conf
# include /path/to/other.conf
################################ GENERAL #####################################
# By default Redis does not run as a daemon. Use 'yes' if you need it.
# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
# By default Ardb does not run as a daemon. Use 'yes' if you need it.
daemonize no
# When running daemonized, Redis writes a pid file in /var/run/redis.pid by
# When running daemonized, Ardb writes a pid file in ${ARDB_HOME}/ardb.pid by
# default. You can specify a custom pid file location here.
#pidfile /var/run/redis.pid
pidfile ${ARDB_HOME}/ardb.pid
# Accept connections on the specified port, default is 6379.
# If port 0 is specified Redis will not listen on a TCP socket.
port 6382
# The thread pool size for the corresponding all listen servers, -1 means current machine's cpu number
thread-pool-size 4
# TCP listen() backlog.
#
# In high requests-per-second environments you need an high backlog in order
# to avoid slow clients connections issues. Note that the Linux kernel
# will silently truncate it to the value of /proc/sys/net/core/somaxconn so
# make sure to raise both the value of somaxconn and tcp_max_syn_backlog
# in order to get the desired effect.
tcp-backlog 511
#Accept connections on the specified host&port/unix socket, default is 0.0.0.0:16379.
server[0].listen 127.0.0.1:6382
# If current qps exceed the limit, Ardb would return an error.
#server[0].qps-limit 1000
# By default Redis listens for connections from all the network interfaces
# available on the server. It is possible to listen to just one or multiple
# interfaces using the "bind" configuration directive, followed by one or
# more IP addresses.
#
# Examples:
#
# bind 192.168.1.100 10.0.0.1
# bind 127.0.0.1
#listen on unix socket
#server[1].listen /tmp/ardb.sock
#server[1].unixsocketperm 755
#server[1].qps-limit 1000
# Specify the path for the Unix socket that will be used to listen for
# incoming connections. There is no default, so Redis will not listen
# on a unix socket when not specified.
# 'qps-limit-per-host' used to limit the request per second from same host
# 'qps-limit-per-connection' used to limit the request per second from same connection
qps-limit-per-host 0
qps-limit-per-connection 0
# Specify the optimized RocksDB compaction strategies.
# If anything other than none is set then the rocksdb.options will not be used.
# The property can one of:
# OptimizeLevelStyleCompaction
# OptimizeUniversalStyleCompaction
# none
#
#unixsocket /tmp/redis.sock
#unixsocketperm 755
rocksdb.compaction OptimizeLevelStyleCompaction
# Enable this to indicate that hsca/sscan/zscan command use total order mode for rocksdb engine
rocksdb.scan-total-order false
# Disable RocksDB WAL may improve the write performance but
# data in the un-flushed memtables might be lost in case of a RocksDB shutdown.
# Disabling WAL provides similar guarantees as Redis.
rocksdb.disableWAL false
#rocksdb's options
rocksdb.options write_buffer_size=512M;max_write_buffer_number=5;min_write_buffer_number_to_merge=3;compression=kSnappyCompression;\
bloom_locality=1;memtable_prefix_bloom_size_ratio=0.1;\
block_based_table_factory={block_cache=512M;filter_policy=bloomfilter:10:true};\
create_if_missing=true;max_open_files=10000;rate_limiter_bytes_per_sec=50M;\
use_direct_io_for_flush_and_compaction=true;use_adaptive_mutex=true
#leveldb's options
leveldb.options block_cache_size=512M,write_buffer_size=128M,max_open_files=5000,block_size=4k,block_restart_interval=16,\
bloom_bits=10,compression=snappy,logenable=yes,max_file_size=2M
#lmdb's options
lmdb.options database_maxsize=10G,database_maxdbs=4096,readahead=no,batch_commit_watermark=1024
#perconaft's options
perconaft.options cache_size=128M,compression=snappy
#wiredtiger's options
wiredtiger.options cache_size=512M,session_max=8k,chunk_size=100M,block_size=4k,bloom_bits=10,\
mmap=false,compressor=snappy
#forestdb's options
forestdb.options chunksize=8,blocksize=4K
# Close the connection after a client is idle for N seconds (0 to disable)
timeout 0
@ -91,92 +102,21 @@ tcp-keepalive 0
# Specify the server verbosity level.
# This can be one of:
# debug (a lot of information, useful for development/testing)
# verbose (many rarely useful info, but not a mess like the debug level)
# notice (moderately verbose, what you want in production probably)
# warning (only very important / critical messages are logged)
loglevel notice
# error
# warn
# info
# debug
# trace
loglevel info
# Specify the log file name. Also the empty string can be used to force
# Specify the log file name. Also 'stdout' can be used to force
# Redis to log on the standard output. Note that if you use standard
# output for logging but daemonize, logs will be sent to /dev/null
logfile ""
#logfile ${ARDB_HOME}/log/ardb-server.log
logfile stdout
# To enable logging to the system logger, just set 'syslog-enabled' to yes,
# and optionally update the other syslog parameters to suit your needs.
# syslog-enabled no
# Specify the syslog identity.
# syslog-ident redis
# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7.
# syslog-facility local0
# Set the number of databases. The default database is DB 0, you can select
# a different one on a per-connection basis using SELECT <dbid> where
# dbid is a number between 0 and 'databases'-1
databases 16
################################ SNAPSHOTTING ################################
#
# Save the DB on disk:
#
# save <seconds> <changes>
#
# Will save the DB if both the given number of seconds and the given
# number of write operations against the DB occurred.
#
# In the example below the behaviour will be to save:
# after 900 sec (15 min) if at least 1 key changed
# after 300 sec (5 min) if at least 10 keys changed
# after 60 sec if at least 10000 keys changed
#
# Note: you can disable saving at all commenting all the "save" lines.
#
# It is also possible to remove all the previously configured save
# points by adding a save directive with a single empty string argument
# like in the following example:
#
# save ""
#save 900 1
#save 300 10
save 300 100000
# By default Redis will stop accepting writes if RDB snapshots are enabled
# (at least one save point) and the latest background save failed.
# This will make the user aware (in a hard way) that data is not persisting
# on disk properly, otherwise chances are that no one will notice and some
# disaster will happen.
#
# If the background saving process will start working again Redis will
# automatically allow writes again.
#
# However if you have setup your proper monitoring of the Redis server
# and persistence, you may want to disable this feature so that Redis will
# continue to work as usual even if there are problems with disk,
# permissions, and so forth.
stop-writes-on-bgsave-error yes
# Compress string objects using LZF when dump .rdb databases?
# For default that's set to 'yes' as it's almost always a win.
# If you want to save some CPU in the saving child set it to 'no' but
# the dataset will likely be bigger if you have compressible values or keys.
rdbcompression yes
# Since version 5 of RDB a CRC64 checksum is placed at the end of the file.
# This makes the format more resistant to corruption but there is a performance
# hit to pay (around 10%) when saving and loading RDB files, so you can disable it
# for maximum performances.
#
# RDB files created with checksum disabled have a checksum of zero that will
# tell the loading code to skip the check.
rdbchecksum yes
# The filename where to dump the DB
dbfilename dump6382.rdb
# The working directory.
# The working data directory.
#
# The DB will be written inside this directory, with the filename specified
# above using the 'dbfilename' configuration directive.
@ -184,22 +124,29 @@ dbfilename dump6382.rdb
# The Append Only File will also be created inside this directory.
#
# Note that you must specify a directory here, not a file name.
dir ../dumps/
data-dir ${ARDB_HOME}/data
################################# REPLICATION #################################
# Master-Slave replication. Use slaveof to make a Redis instance a copy of
# another Redis server. Note that the configuration is local to the slave
# Master-Slave replication. Use slaveof to make a Ardb instance a copy of
# another Ardb server. Note that the configuration is local to the slave
# so for example it is possible to configure the slave to save the DB with a
# different interval, or to listen to another port, and so on.
#
# slaveof <masterip> <masterport>
# slaveof <masterip>:<masterport>
#slaveof 127.0.0.1:6379
# By default, ardb use 2 threads to execute commands synced from master.
# -1 means use current CPU number threads instead.
slave-workers 2
# Max synced command queue size in memory.
max-slave-worker-queue 1024
# The directory for replication.
repl-dir ${ARDB_HOME}/repl
# If the master is password protected (using the "requirepass" configuration
# directive below) it is possible to tell the slave to authenticate before
# starting the replication synchronization process, otherwise the master will
# refuse the slave request.
#
# masterauth <master-password>
# When a slave loses its connection with the master, or when the replication
# is still in progress, the slave can act in two different ways:
@ -214,33 +161,55 @@ dir ../dumps/
#
slave-serve-stale-data yes
# The slave priority is an integer number published by Ardb/Redis in the INFO output.
# It is used by Redis Sentinel in order to select a slave to promote into a
# master if the master is no longer working correctly.
#
# A slave with a low priority number is considered better for promotion, so
# for instance if there are three slaves with priority 10, 100, 25 Sentinel will
# pick the one with priority 10, that is the lowest.
#
# However a special priority of 0 marks the slave as not able to perform the
# role of master, so a slave with priority of 0 will never be selected by
# Redis Sentinel for promotion.
#
# By default the priority is 100.
slave-priority 100
# You can configure a slave instance to accept writes or not. Writing against
# a slave instance may be useful to store some ephemeral data (because data
# written on a slave will be easily deleted after resync with the master) but
# may also cause problems if clients are writing to it because of a
# misconfiguration.
#
# Since Redis 2.6 by default slaves are read-only.
#
# Note: read only slaves are not designed to be exposed to untrusted clients
# on the internet. It's just a protection layer against misuse of the instance.
# Still a read only slave exports by default all the administrative commands
# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve
# security of read only slaves using 'rename-command' to shadow all the
# administrative / dangerous commands.
#
# Note: any requests processed by non read only slaves would no write to replication
# log and sync to connected slaves.
slave-read-only yes
# The directory for backup.
backup-dir ${ARDB_HOME}/backup
#
# You can configure the backup file format as 'redis' or 'ardb'. The 'ardb' format
# can only used by ardb instance, while 'redis' format file can be used by redis
# and ardb instance.
backup-file-format ardb
# Slaves send PINGs to server in a predefined interval. It's possible to change
# this interval with the repl_ping_slave_period option. The default value is 10
# seconds.
#
# repl-ping-slave-period 10
# The following option sets the replication timeout for:
#
# 1) Bulk transfer I/O during SYNC, from the point of view of slave.
# 2) Master timeout from the point of view of slaves (data, pings).
# 3) Slave timeout from the point of view of masters (REPLCONF ACK pings).
# The following option sets a timeout for both Bulk transfer I/O timeout and
# master data or ping response timeout. The default value is 60 seconds.
#
# It is important to make sure that this value is greater than the value
# specified for repl-ping-slave-period otherwise a timeout will be detected
@ -250,7 +219,7 @@ slave-read-only yes
# Disable TCP_NODELAY on the slave socket after SYNC?
#
# If you select "yes" Redis will use a smaller number of TCP packets and
# If you select "yes" Ardb will use a smaller number of TCP packets and
# less bandwidth to send data to slaves. But this can add a delay for
# the data to appear on the slave side, up to 40 milliseconds with
# Linux kernels using a default configuration.
@ -272,9 +241,46 @@ repl-disable-tcp-nodelay no
# The biggest the replication backlog, the longer the time the slave can be
# disconnected and later be able to perform a partial resynchronization.
#
# The backlog is only allocated once there is at least a slave connected.
# If the size is configured by 0, then Ardb instance can NOT serve as a master.
#
# repl-backlog-size 1mb
# repl-backlog-size 500m
repl-backlog-size 1G
repl-backlog-cache-size 100M
snapshot-max-lag-offset 500M
# Set the max number of snapshots. By default this limit is set to 10 snapshot.
# Once the limit is reached Ardb would try to remove the oldest snapshots
maxsnapshots 10
# It is possible for a master to stop accepting writes if there are less than
# N slaves connected, having a lag less or equal than M seconds.
#
# The N slaves need to be in "online" state.
#
# The lag in seconds, that must be <= the specified value, is calculated from
# the last ping received from the slave, that is usually sent every second.
#
# This option does not GUARANTEE that N replicas will accept the write, but
# will limit the window of exposure for lost writes in case not enough slaves
# are available, to the specified number of seconds.
#
# For example to require at least 3 slaves with a lag <= 10 seconds use:
#
# min-slaves-to-write 3
# min-slaves-max-lag 10
# When a slave loses its connection with the master, or when the replication
# is still in progress, the slave can act in two different ways:
#
# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will
# still reply to client requests, possibly with out of date data, or the
# data set may just be empty if this is the first synchronization.
#
# 2) if slave-serve-stale-data is set to 'no' the slave will reply with
# an error "SYNC with master in progress" to all the kind of commands
# but to INFO and SLAVEOF.
#
slave-serve-stale-data yes
# After a master has no longer connected slaves for some time, the backlog
# will be freed. The following option configures the amount of seconds that
@ -285,42 +291,32 @@ repl-disable-tcp-nodelay no
#
# repl-backlog-ttl 3600
# The slave priority is an integer number published by Redis in the INFO output.
# It is used by Redis Sentinel in order to select a slave to promote into a
# master if the master is no longer working correctly.
#
# A slave with a low priority number is considered better for promotion, so
# for instance if there are three slaves with priority 10, 100, 25 Sentinel will
# pick the one with priority 10, that is the lowest.
#
# However a special priority of 0 marks the slave as not able to perform the
# role of master, so a slave with priority of 0 will never be selected by
# Redis Sentinel for promotion.
#
# By default the priority is 100.
slave-priority 100
# Slave clear current data store before full resync to master.
# It make sure that slave keep consistent with master's data. But slave may cost a
# long time to delete data, it depends on
# If set by no, then slave may have different data with master.
slave-cleardb-before-fullresync yes
# It is possible for a master to stop accepting writes if there are less than
# N slaves connected, having a lag less or equal than M seconds.
# Master/Slave instance would persist sync state every 'repl-backlog-sync-period' secs.
repl-backlog-sync-period 5
# Slave would ignore any 'expire' setting from replication command if set by 'yes'.
# It could be used if master is redis instance serve hot data with expire setting, slave is
# ardb instance which persist all data.
# Since master redis instance would generate a 'del' for each expired key, slave should ignore
# all 'del' command too by setting 'slave-ignore-del' to 'yes' for this scenario.
slave-ignore-expire no
slave-ignore-del no
# After a master has no longer connected slaves for some time, the backlog
# will be freed. The following option configures the amount of seconds that
# need to elapse, starting from the time the last slave disconnected, for
# the backlog buffer to be freed.
#
# The N slaves need to be in "online" state.
# A value of 0 means to never release the backlog.
#
# The lag in seconds, that must be <= the specified value, is calculated from
# the last ping received from the slave, that is usually sent every second.
#
# This option does not GUARANTEES that N replicas will accept the write, but
# will limit the window of exposure for lost writes in case not enough slaves
# are available, to the specified number of seconds.
#
# For example to require at least 3 slaves with a lag <= 10 seconds use:
#
# min-slaves-to-write 3
# min-slaves-max-lag 10
#
# Setting one or the other to 0 disables the feature.
#
# By default min-slaves-to-write is set to 0 (feature disabled) and
# min-slaves-max-lag is set to 10.
# repl-backlog-ttl 3600
################################## SECURITY ###################################
@ -356,6 +352,15 @@ slave-priority 100
# Please note that changing the name of commands that are logged into the
# AOF file or transmitted to slaves may cause problems.
################################ CLUSTER ###############################
# Max execution time of a Lua script in milliseconds.
#zookeeper-servers 127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183
#zk-recv-timeout 10000
#zk-clientid-file ${ARDB_HOME}/ardb.zkclientid
cluster-name ardb-cluster
################################### LIMITS ####################################
# Set the max number of connected clients at the same time. By default
@ -369,173 +374,13 @@ slave-priority 100
#
# maxclients 10000
# Don't use more memory than the specified amount of bytes.
# When the memory limit is reached Redis will try to remove keys
# according to the eviction policy selected (see maxmemory-policy).
#
# If Redis can't remove keys according to the policy, or if the policy is
# set to 'noeviction', Redis will start to reply with errors to commands
# that would use more memory, like SET, LPUSH, and so on, and will continue
# to reply to read-only commands like GET.
#
# This option is usually useful when using Redis as an LRU cache, or to set
# a hard memory limit for an instance (using the 'noeviction' policy).
#
# WARNING: If you have slaves attached to an instance with maxmemory on,
# the size of the output buffers needed to feed the slaves are subtracted
# from the used memory count, so that network problems / resyncs will
# not trigger a loop where keys are evicted, and in turn the output
# buffer of slaves is full with DELs of keys evicted triggering the deletion
# of more keys, and so forth until the database is completely emptied.
#
# In short... if you have slaves attached it is suggested that you set a lower
# limit for maxmemory so that there is some free RAM on the system for slave
# output buffers (but this is not needed if the policy is 'noeviction').
#
# maxmemory <bytes>
# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory
# is reached. You can select among five behaviors:
#
# volatile-lru -> remove the key with an expire set using an LRU algorithm
# allkeys-lru -> remove any key accordingly to the LRU algorithm
# volatile-random -> remove a random key with an expire set
# allkeys-random -> remove a random key, any key
# volatile-ttl -> remove the key with the nearest expire time (minor TTL)
# noeviction -> don't expire at all, just return an error on write operations
#
# Note: with any of the above policies, Redis will return an error on write
# operations, when there are not suitable keys for eviction.
#
# At the date of writing this commands are: set setnx setex append
# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd
# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby
# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby
# getset mset msetnx exec sort
#
# The default is:
#
# maxmemory-policy volatile-lru
# LRU and minimal TTL algorithms are not precise algorithms but approximated
# algorithms (in order to save memory), so you can select as well the sample
# size to check. For instance for default Redis will check three keys and
# pick the one that was used less recently, you can change the sample size
# using the following configuration directive.
#
# maxmemory-samples 3
############################## APPEND ONLY MODE ###############################
# By default Redis asynchronously dumps the dataset on disk. This mode is
# good enough in many applications, but an issue with the Redis process or
# a power outage may result into a few minutes of writes lost (depending on
# the configured save points).
#
# The Append Only File is an alternative persistence mode that provides
# much better durability. For instance using the default data fsync policy
# (see later in the config file) Redis can lose just one second of writes in a
# dramatic event like a server power outage, or a single write if something
# wrong with the Redis process itself happens, but the operating system is
# still running correctly.
#
# AOF and RDB persistence can be enabled at the same time without problems.
# If the AOF is enabled on startup Redis will load the AOF, that is the file
# with the better durability guarantees.
#
# Please check http://redis.io/topics/persistence for more information.
appendonly no
# The name of the append only file (default: "appendonly.aof")
appendfilename "appendonly.aof"
# The fsync() call tells the Operating System to actually write data on disk
# instead to wait for more data in the output buffer. Some OS will really flush
# data on disk, some other OS will just try to do it ASAP.
#
# Redis supports three different modes:
#
# no: don't fsync, just let the OS flush the data when it wants. Faster.
# always: fsync after every write to the append only log . Slow, Safest.
# everysec: fsync only one time every second. Compromise.
#
# The default is "everysec", as that's usually the right compromise between
# speed and data safety. It's up to you to understand if you can relax this to
# "no" that will let the operating system flush the output buffer when
# it wants, for better performances (but if you can live with the idea of
# some data loss consider the default persistence mode that's snapshotting),
# or on the contrary, use "always" that's very slow but a bit safer than
# everysec.
#
# More details please check the following article:
# http://antirez.com/post/redis-persistence-demystified.html
#
# If unsure, use "everysec".
# appendfsync always
appendfsync everysec
# appendfsync no
# When the AOF fsync policy is set to always or everysec, and a background
# saving process (a background save or AOF log background rewriting) is
# performing a lot of I/O against the disk, in some Linux configurations
# Redis may block too long on the fsync() call. Note that there is no fix for
# this currently, as even performing fsync in a different thread will block
# our synchronous write(2) call.
#
# In order to mitigate this problem it's possible to use the following option
# that will prevent fsync() from being called in the main process while a
# BGSAVE or BGREWRITEAOF is in progress.
#
# This means that while another child is saving, the durability of Redis is
# the same as "appendfsync none". In practical terms, this means that it is
# possible to lose up to 30 seconds of log in the worst scenario (with the
# default Linux settings).
#
# If you have latency problems turn this to "yes". Otherwise leave it as
# "no" that is the safest pick from the point of view of durability.
no-appendfsync-on-rewrite no
# Automatic rewrite of the append only file.
# Redis is able to automatically rewrite the log file implicitly calling
# BGREWRITEAOF when the AOF log size grows by the specified percentage.
#
# This is how it works: Redis remembers the size of the AOF file after the
# latest rewrite (if no rewrite has happened since the restart, the size of
# the AOF at startup is used).
#
# This base size is compared to the current size. If the current size is
# bigger than the specified percentage, the rewrite is triggered. Also
# you need to specify a minimal size for the AOF file to be rewritten, this
# is useful to avoid rewriting the AOF file even if the percentage increase
# is reached but it is still pretty small.
#
# Specify a percentage of zero in order to disable the automatic AOF
# rewrite feature.
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
################################ LUA SCRIPTING ###############################
# Max execution time of a Lua script in milliseconds.
#
# If the maximum execution time is reached Redis will log that a script is
# still in execution after the maximum allowed time and will start to
# reply to queries with an error.
#
# When a long running script exceed the maximum execution time only the
# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be
# used to stop a script that did not yet called write commands. The second
# is the only way to shut down the server in the case a write commands was
# already issue by the script but the user don't want to wait for the natural
# termination of the script.
#
# Set it to 0 or a negative value for unlimited execution without warnings.
lua-time-limit 5000
# The client output buffer limits can be used to force disconnection of clients
# that are not reading data from the server fast enough for some reason (a
# common reason is that a Pub/Sub/Slave client can't consume messages as fast as the
# publisher can produce them).
slave-client-output-buffer-limit 256mb
pubsub-client-output-buffer-limit 32mb
################################## SLOW LOG ###################################
@ -561,156 +406,63 @@ slowlog-log-slower-than 10000
# You can reclaim memory used by the slow log with SLOWLOG RESET.
slowlog-max-len 128
############################# Event notification ##############################
################################ LUA SCRIPTING ###############################
# Redis can notify Pub/Sub clients about events happening in the key space.
# This feature is documented at http://redis.io/topics/keyspace-events
# Max execution time of a Lua script in milliseconds.
#
# For instance if keyspace events notification is enabled, and a client
# performs a DEL operation on key "foo" stored in the Database 0, two
# messages will be published via Pub/Sub:
# If the maximum execution time is reached Redis will log that a script is
# still in execution after the maximum allowed time and will start to
# reply to queries with an error.
#
# PUBLISH __keyspace@0__:foo del
# PUBLISH __keyevent@0__:del foo
# When a long running script exceed the maximum execution time only the
# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be
# used to stop a script that did not yet called write commands. The second
# is the only way to shut down the server in the case a write commands was
# already issue by the script but the user don't want to wait for the natural
# termination of the script.
#
# It is possible to select the events that Redis will notify among a set
# of classes. Every class is identified by a single character:
#
# K Keyspace events, published with __keyspace@<db>__ prefix.
# E Keyevent events, published with __keyevent@<db>__ prefix.
# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ...
# $ String commands
# l List commands
# s Set commands
# h Hash commands
# z Sorted set commands
# x Expired events (events generated every time a key expires)
# e Evicted events (events generated when a key is evicted for maxmemory)
# A Alias for g$lshzxe, so that the "AKE" string means all the events.
#
# The "notify-keyspace-events" takes as argument a string that is composed
# by zero or multiple characters. The empty string means that notifications
# are disabled at all.
#
# Example: to enable list and generic events, from the point of view of the
# event name, use:
#
# notify-keyspace-events Elg
#
# Example 2: to get the stream of the expired keys subscribing to channel
# name __keyevent@0__:expired use:
#
# notify-keyspace-events Ex
#
# By default all notifications are disabled because most users don't need
# this feature and the feature has some overhead. Note that if you don't
# specify at least one of K or E, no events will be delivered.
notify-keyspace-events ""
# Set it to 0 or a negative value for unlimited execution without warnings.
lua-time-limit 5000
############################### ADVANCED CONFIG ###############################
## Since some redis clients would check info command's output, this configuration
## would be set in 'misc' section of 'info's output
#additional-misc-info redis_version:2.8.9\nredis_trick:yes
# Hashes are encoded using a memory efficient data structure when they have a
# small number of entries, and the biggest entry does not exceed a given
# threshold. These thresholds can be configured using the following directives.
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
# Similarly to hashes, small lists are also encoded in a special way in order
# to save a lot of space. The special representation is only used when
# you are under the following limits:
list-max-ziplist-entries 512
list-max-ziplist-value 64
# HyperLogLog sparse representation bytes limit. The limit includes the
# 16 bytes header. When an HyperLogLog using the sparse representation crosses
# this limit, it is convereted into the dense representation.
#
# A value greater than 16000 is totally useless, since at that point the
# dense representation is more memory efficient.
#
# The suggested value is ~ 3000 in order to have the benefits of
# the space efficient encoding without slowing down too much PFADD,
# which is O(N) with the sparse encoding. Thev value can be raised to
# ~ 10000 when CPU is not a concern, but space is, and the data set is
# composed of many HyperLogLogs with cardinality in the 0 - 15000 range.
hll-sparse-max-bytes 3000
# Sets have a special encoding in just one case: when a set is composed
# of just strings that happens to be integers in radix 10 in the range
# of 64 bit signed integers.
# The following configuration setting sets the limit in the size of the
# set in order to use this special memory saving encoding.
set-max-intset-entries 512
#trusted-ip 10.10.10.10
#trusted-ip 10.10.10.*
# Similarly to hashes and lists, sorted sets are also specially encoded in
# order to save a lot of space. This encoding is only used when the length and
# elements of a sorted set are below the following limits:
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
# By default Ardb would not compact whole db after loading a snapshot, which may happens
# when slave syncing from master, processing 'import' command from client.
# This configuration only works with rocksdb engine.
# If ardb dord not compact data after loading snapshot file, there would be poor read performance before rocksdb
# completes the next compaction task internally. While the compaction task would cost very long time for a huge data set.
compact-after-snapshot-load false
# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in
# order to help rehashing the main Redis hash table (the one mapping top-level
# keys to values). The hash table implementation Redis uses (see dict.c)
# performs a lazy rehashing: the more operation you run into a hash table
# that is rehashing, the more rehashing "steps" are performed, so if the
# server is idle the rehashing is never complete and some more memory is used
# by the hash table.
#
# The default is to use this millisecond 10 times every second in order to
# active rehashing the main dictionaries, freeing memory when possible.
#
# If unsure:
# use "activerehashing no" if you have hard latency requirements and it is
# not a good thing in your environment that Redis can reply form time to time
# to queries with 2 milliseconds delay.
#
# use "activerehashing yes" if you don't have such hard requirements but
# want to free memory asap when possible.
activerehashing yes
# Ardb would store cursor in memory
scan-redis-compatible yes
scan-cursor-expire-after 60
# The client output buffer limits can be used to force disconnection of clients
# that are not reading data from the server fast enough for some reason (a
# common reason is that a Pub/Sub client can't consume messages as fast as the
# publisher can produce them).
#
# The limit can be set differently for the three different classes of clients:
#
# normal -> normal clients
# slave -> slave clients and MONITOR clients
# pubsub -> clients subscribed to at least one pubsub channel or pattern
#
# The syntax of every client-output-buffer-limit directive is the following:
#
# client-output-buffer-limit <class> <hard limit> <soft limit> <soft seconds>
#
# A client is immediately disconnected once the hard limit is reached, or if
# the soft limit is reached and remains reached for the specified number of
# seconds (continuously).
# So for instance if the hard limit is 32 megabytes and the soft limit is
# 16 megabytes / 10 seconds, the client will get disconnected immediately
# if the size of the output buffers reach 32 megabytes, but will also get
# disconnected if the client reaches 16 megabytes and continuously overcomes
# the limit for 10 seconds.
#
# By default normal clients are not limited because they don't receive data
# without asking (in a push way), but just after a request, so only
# asynchronous clients may create a scenario where data is requested faster
# than it can read.
#
# Instead there is a default limit for pubsub and slave clients, since
# subscribers and slaves receive data in a push fashion.
#
# Both the hard or the soft limit can be disabled by setting them to zero.
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
redis-compatible-mode yes
redis-compatible-version 2.8.0
# Redis calls an internal function to perform many background tasks, like
# closing connections of clients in timeout, purging expired keys that are
# never requested, and so forth.
#
# Not all tasks are performed with the same frequency, but Redis checks for
# tasks to perform accordingly to the specified "hz" value.
#
# By default "hz" is set to 10. Raising the value will use more CPU when
# Redis is idle, but at the same time will make Redis more responsive when
# there are many keys expiring at the same time, and timeouts may be
# handled with more precision.
#
# The range is between 1 and 500, however a value over 100 is usually not
# a good idea. Most users should use the default of 10 and raise this up to
# 100 only in environments where very low latency is required.
hz 10
statistics-log-period 600
# When a child rewrites the AOF file, if the following option is enabled
# the file will be fsync-ed every 32 MB of data generated. This is useful
# in order to commit the file to the disk more incrementally and avoid
# big latency spikes.
aof-rewrite-incremental-fsync yes
# Range deletion min size trigger
range-delete-min-size 100

5
files/ApiKey Normal file
View file

@ -0,0 +1,5 @@
amazon
amazonaws
amzn
aws
googleapis

View file

@ -5,7 +5,7 @@ set -x
sudo apt-get update
sudo apt-get install python-pip python-virtualenv python-dev libfreetype6-dev \
sudo apt-get install python3-pip python-virtualenv python3-dev libfreetype6-dev \
screen g++ python-tk unzip libsnappy-dev cmake -y
#optional tor install
@ -15,7 +15,7 @@ sudo apt-get install tor
sudo apt-get install libssl-dev libfreetype6-dev python-numpy -y
#pyMISP
sudo apt-get -y install python3-pip
#sudo apt-get -y install python3-pip
# DNS deps
sudo apt-get install libadns1 libadns1-dev -y
@ -60,11 +60,9 @@ sudo ldconfig
popd
popd
# REDIS LEVEL DB #
test ! -d redis-leveldb/ && git clone https://github.com/KDr2/redis-leveldb.git
pushd redis-leveldb/
git submodule init
git submodule update
# ARDB #
test ! -d ardb/ && git clone https://github.com/yinqiwen/ardb.git
pushd ardb/
make
popd
@ -73,18 +71,18 @@ if [ ! -f bin/packages/config.cfg ]; then
fi
pushd var/www/
./update_thirdparty.sh
sudo ./update_thirdparty.sh
popd
if [ -z "$VIRTUAL_ENV" ]; then
virtualenv AILENV
virtualenv -p python3 AILENV
echo export AIL_HOME=$(pwd) >> ./AILENV/bin/activate
echo export AIL_BIN=$(pwd)/bin/ >> ./AILENV/bin/activate
echo export AIL_FLASK=$(pwd)/var/www/ >> ./AILENV/bin/activate
echo export AIL_REDIS=$(pwd)/redis/src/ >> ./AILENV/bin/activate
echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate
echo export AIL_ARDB=$(pwd)/ardb/src/ >> ./AILENV/bin/activate
. ./AILENV/bin/activate
@ -93,28 +91,29 @@ fi
year1=20`date +%y`
year2=20`date --date='-1 year' +%y`
mkdir -p $AIL_HOME/{PASTES,Blooms,dumps}
mkdir -p $AIL_HOME/LEVEL_DB_DATA/{$year1,$year2}
pip install -U pip
pip install -U -r pip_packages_requirement.txt
pip3 install -U pip
pip3 install -U -r pip3_packages_requirement.txt
# Pyfaup
pushd faup/src/lib/bindings/python/
python setup.py install
python3 setup.py install
popd
# Py tlsh
pushd tlsh/py_ext
python setup.py build
python setup.py install
sudo python3 setup.py build
sudo python3 setup.py install
#python setup.py build
#python setup.py install
python3 setup.py build
python3 setup.py install
# Download the necessary NLTK corpora and sentiment vader
HOME=$(pwd) python -m textblob.download_corpora
python -m nltk.downloader vader_lexicon
python -m nltk.downloader punkt
HOME=$(pwd) python3 -m textblob.download_corpora
python3 -m nltk.downloader vader_lexicon
python3 -m nltk.downloader punkt
# install nosetests
sudo pip install nose
#Create the file all_module and update the graph in doc
$AIL_HOME/doc/generate_modules_data_flow_graph.sh

View file

@ -1,13 +1,64 @@
pymisp
redis
filemagic
#filemagic conflict with magic
crcmod
mmh3
ssdeep
nltk
textblob
pubsublogger
zmq
langid
#Essential
redis
pyzmq
dnspython
logbook
pubsublogger
textblob
#Tokeniser
nltk
#Graph
numpy
matplotlib
networkx
terminaltables
colorama
asciimatics
# Hashlib
crcmod
mmh3
ssdeep
python-Levenshtein
#Others
python-magic
pybloomfiltermmap
psutil
phonenumbers
ipython
flask
texttable
#DomainClassifier
DomainClassifier
#Indexer requirements
whoosh
ipaddress
pycountry
# To fetch Onion urls
PySocks
#ASN lookup requirements
#https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/adns-python/adns-python-1.2.1.tar.gz
https://github.com/trolldbois/python3-adns/archive/master.zip
https://github.com/trolldbois/python-cymru-services/archive/master.zip
https://github.com/saffsd/langid.py/archive/master.zip

View file

@ -1,51 +0,0 @@
#Essential
redis
pyzmq
dnspython
logbook
pubsublogger
textblob
#Graph
numpy
matplotlib
networkx
terminaltables
colorama
asciimatics
#Tokeniser
nltk
# Hashlib
crcmod
mmh3
ssdeep
python-Levenshtein
#Others
python-magic
pybloomfiltermmap
psutil
phonenumbers
ipython
flask
texttable
#DomainClassifier
DomainClassifier
#Indexer requirements
whoosh
ipaddress
pycountry
# To fetch Onion urls
PySocks
#ASN lookup requirements
https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/adns-python/adns-python-1.2.1.tar.gz
https://github.com/trolldbois/python-cymru-services/archive/master.zip
https://github.com/saffsd/langid.py/archive/master.zip

9
python3_upgrade.sh Executable file
View file

@ -0,0 +1,9 @@
#!/bin/bash
sudo rm -rf AILENV
mkdir old
sudo mv indexdir old/old_indexdir_python2
sudo mv LEVEL_DB_DATA old/old_LEVEL_DB_DATA
sudo mv dumps old/old_dumps
./installing_deps.sh

Binary file not shown.

0
tests/__init__.py Normal file
View file

22
tests/testHelper.py Normal file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import unittest
import sys,os
sys.path.append(os.environ['AIL_BIN'])
from Helper import Process
class TestHelper(unittest.TestCase):
def setUp(self):
config_section = 'Keys'
def test_Process_Constructor_using_key_module(self):
conf_section = 'Keys'
process = Process(conf_section)
self.assertEqual(process.subscriber_name, 'Keys')

36
tests/testKeys.py Normal file
View file

@ -0,0 +1,36 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys,os
import unittest
import magic
sys.path.append(os.environ['AIL_BIN'])
from packages.Paste import Paste
import Keys as Keys
from Helper import Process
from pubsublogger import publisher
class TestKeysModule(unittest.TestCase):
def setUp(self):
self.paste = Paste('../samples/2018/01/01/keys_certificat_sample.gz')
# Section name in bin/packages/modules.cfg
self.config_section = 'Keys'
# Setup the I/O queues
p = Process(self.config_section)
def test_search_key(self):
with self.assertRaises(pubsublogger.exceptions.NoChannelError):
Keys.search_key(self.paste)
def test_search_key(self):
with self.assertRaises(NameError):
publisher.port = 6380
publisher.channel = 'Script'
Keys.search_key(self.paste)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import redis
import ConfigParser
import configparser
import json
import datetime
import time
@ -72,7 +72,7 @@ with open('templates/header_base.html', 'r') as f:
modified_header = complete_header
#Add the header in the supplied order
for module_name, txt in to_add_to_header_dico.items():
for module_name, txt in list(to_add_to_header_dico.items()):
to_replace = '<!--{}-->'.format(module_name)
if to_replace in complete_header:
modified_header = modified_header.replace(to_replace, txt)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"Hepler to create a new webpage associated with a module."
@ -37,7 +37,7 @@ def createModuleFolder(modulename):
def main():
rep1 = raw_input('New module name: ')
rep1 = input('New module name: ')
createModuleFolder(rep1)
if __name__ == '__main__':

View file

@ -1,10 +1,10 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
Flask global variables shared accross modules
'''
import ConfigParser
import configparser
import redis
import os
@ -18,7 +18,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \
Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser()
cfg = configparser.ConfigParser()
cfg.read(configfile)
@ -26,41 +26,47 @@ cfg.read(configfile)
r_serv = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
r_serv_log = redis.StrictRedis(
host=cfg.get("Redis_Log", "host"),
port=cfg.getint("Redis_Log", "port"),
db=cfg.getint("Redis_Log", "db"))
db=cfg.getint("Redis_Log", "db"),
decode_responses=True)
r_serv_charts = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_Trending", "host"),
port=cfg.getint("Redis_Level_DB_Trending", "port"),
db=cfg.getint("Redis_Level_DB_Trending", "db"))
host=cfg.get("ARDB_Trending", "host"),
port=cfg.getint("ARDB_Trending", "port"),
db=cfg.getint("ARDB_Trending", "db"),
decode_responses=True)
r_serv_sentiment = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_Sentiment", "host"),
port=cfg.getint("Redis_Level_DB_Sentiment", "port"),
db=cfg.getint("Redis_Level_DB_Sentiment", "db"))
host=cfg.get("ARDB_Sentiment", "host"),
port=cfg.getint("ARDB_Sentiment", "port"),
db=cfg.getint("ARDB_Sentiment", "db"),
decode_responses=True)
r_serv_term = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_TermFreq", "host"),
port=cfg.getint("Redis_Level_DB_TermFreq", "port"),
db=cfg.getint("Redis_Level_DB_TermFreq", "db"))
host=cfg.get("ARDB_TermFreq", "host"),
port=cfg.getint("ARDB_TermFreq", "port"),
db=cfg.getint("ARDB_TermFreq", "db"),
decode_responses=True)
r_serv_cred = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_TermCred", "host"),
port=cfg.getint("Redis_Level_DB_TermCred", "port"),
db=cfg.getint("Redis_Level_DB_TermCred", "db"))
host=cfg.get("ARDB_TermCred", "host"),
port=cfg.getint("ARDB_TermCred", "port"),
db=cfg.getint("ARDB_TermCred", "db"),
decode_responses=True)
r_serv_pasteName = redis.StrictRedis(
host=cfg.get("Redis_Paste_Name", "host"),
port=cfg.getint("Redis_Paste_Name", "port"),
db=cfg.getint("Redis_Paste_Name", "db"))
db=cfg.getint("Redis_Paste_Name", "db"),
decode_responses=True)
# VARIABLES #
max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip
max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal
tlsh_to_percent = 1000.0 #Use to display the estimated percentage instead of a raw value
DiffMaxLineLength = int(cfg.get("Flask", "DiffMaxLineLength"))#Use to display the estimated percentage instead of a raw value

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -23,21 +23,22 @@ max_preview_modal = Flask_config.max_preview_modal
#init all lvlDB servers
curYear = datetime.now().year
int_year = int(curYear)
r_serv_db = {}
# port generated automatically depending on available levelDB date
yearList = []
lvdbdir= os.path.join(os.environ['AIL_HOME'], "LEVEL_DB_DATA/")
for year in os.listdir(lvdbdir):
try:
intYear = int(year)
except:
continue
yearList.append([year, intYear, int(curYear) == intYear])
for x in range(0, (int_year - 2018) + 1):
intYear = int_year - x
yearList.append([str(intYear), intYear, int(curYear) == intYear])
r_serv_db[intYear] = redis.StrictRedis(
host=cfg.get("Redis_Level_DB", "host"),
port=intYear,
db=cfg.getint("Redis_Level_DB", "db"))
host=cfg.get("ARDB_DB", "host"),
port=cfg.getint("ARDB_DB", "port"),
db=intYear,
decode_responses=True)
yearList.sort(reverse=True)
browsepastes = Blueprint('browsepastes', __name__, template_folder='templates')
@ -48,16 +49,18 @@ def getPastebyType(server, module_name):
all_path = []
for path in server.smembers('WARNING_'+module_name):
all_path.append(path)
return all_path
def event_stream_getImportantPasteByModule(module_name, year):
index = 0
all_pastes_list = getPastebyType(r_serv_db[year], module_name)
for path in all_pastes_list:
index += 1
paste = Paste.Paste(path)
content = paste.get_p_content().decode('utf8', 'ignore')
content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
curr_date = str(paste._get_p_date())
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
@ -83,7 +86,13 @@ def browseImportantPaste():
@browsepastes.route("/importantPasteByModule/", methods=['GET'])
def importantPasteByModule():
module_name = request.args.get('moduleName')
# # TODO: VERIFY YEAR VALIDITY
try:
currentSelectYear = int(request.args.get('year'))
except:
print('Invalid year input')
currentSelectYear = int(datetime.now().year)
all_content = []
paste_date = []
@ -94,7 +103,7 @@ def importantPasteByModule():
for path in allPastes[0:10]:
all_path.append(path)
paste = Paste.Paste(path)
content = paste.get_p_content().decode('utf8', 'ignore')
content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
all_content.append(content[0:content_range].replace("\"", "\'").replace("\r", " ").replace("\n", " "))
curr_date = str(paste._get_p_date())

View file

@ -87,9 +87,12 @@
<li name='nav-pan'><a data-toggle="tab" href="#sqlinjection-tab" data-attribute-name="sqlinjection" data-panel="sqlinjection-panel">SQL injections</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#cve-tab" data-attribute-name="cve" data-panel="cve-panel">CVEs</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#keys-tab" data-attribute-name="keys" data-panel="keys-panel">Keys</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#apikey-tab" data-attribute-name="apikey" data-panel="apikey-panel">API Keys</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#mail-tab" data-attribute-name="mail" data-panel="mail-panel">Mails</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#phone-tab" data-attribute-name="phone" data-panel="phone-panel">Phones</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#onion-tab" data-attribute-name="onion" data-panel="onion-panel">Onions</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#bitcoin-tab" data-attribute-name="bitcoin" data-panel="bitcoin-panel">Bitcoin</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#base64-tab" data-attribute-name="base64" data-panel="base64-panel">Base64</a></li>
</ul>
</br>
@ -110,6 +113,9 @@
<div class="col-lg-12 tab-pane fade" id="keys-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div>
<div class="col-lg-12 tab-pane fade" id="apikey-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div>
<div class="col-lg-12 tab-pane fade" id="mail-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div>
@ -119,6 +125,12 @@
<div class="col-lg-12 tab-pane fade" id="onion-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div>
<div class="col-lg-12 tab-pane fade" id="bitcoin-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div>
<div class="col-lg-12 tab-pane fade" id="base64-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div>
</div> <!-- tab-content -->
<!-- /.row -->
</div>

View file

@ -245,4 +245,3 @@ $(document).ready(function(){
} );
</script>

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -26,20 +26,30 @@ def event_stream():
pubsub = r_serv_log.pubsub()
pubsub.psubscribe("Script" + '.*')
for msg in pubsub.listen():
level = msg['channel'].split('.')[1]
type = msg['type']
pattern = msg['pattern']
channel = msg['channel']
data = msg['data']
msg = {'channel': channel, 'type': type, 'pattern': pattern, 'data': data}
level = (msg['channel']).split('.')[1]
if msg['type'] == 'pmessage' and level != "DEBUG":
yield 'data: %s\n\n' % json.dumps(msg)
def get_queues(r):
# We may want to put the llen in a pipeline to do only one query.
newData = []
for queue, card in r.hgetall("queues").iteritems():
for queue, card in r.hgetall("queues").items():
key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue
for moduleNum in r.smembers(keySet):
value = r.get(key + str(moduleNum))
if value is not None:
timestamp, path = value.split(", ")
if timestamp is not None:

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -108,7 +108,7 @@ def search():
for path in r_serv_pasteName.smembers(q[0]):
r.append(path)
paste = Paste.Paste(path)
content = paste.get_p_content().decode('utf8', 'ignore')
content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
c.append(content[0:content_range])
curr_date = str(paste._get_p_date())
@ -126,7 +126,7 @@ def search():
for x in results:
r.append(x.items()[0][1])
paste = Paste.Paste(x.items()[0][1])
content = paste.get_p_content().decode('utf8', 'ignore')
content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
c.append(content[0:content_range])
curr_date = str(paste._get_p_date())
@ -175,7 +175,7 @@ def get_more_search_result():
for x in results:
path_array.append(x.items()[0][1])
paste = Paste.Paste(x.items()[0][1])
content = paste.get_p_content().decode('utf8', 'ignore')
content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
preview_array.append(content[0:content_range])
curr_date = str(paste._get_p_date())

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -57,15 +57,19 @@ def sentiment_analysis_getplotdata():
if getAllProviders == 'True':
if allProvider == "True":
range_providers = r_serv_charts.smembers('all_provider_set')
return jsonify(list(range_providers))
else:
range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(0)[0], '+inf', '-inf', start=0, num=8)
# if empty, get yesterday top providers
range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(1)[1], '+inf', '-inf', start=0, num=8) if range_providers == [] else range_providers
# if still empty, takes from all providers
if range_providers == []:
print 'today provider empty'
print('today provider empty')
range_providers = r_serv_charts.smembers('all_provider_set')
return jsonify(list(range_providers))
elif provider is not None:
@ -78,7 +82,7 @@ def sentiment_analysis_getplotdata():
list_value = []
for cur_id in r_serv_sentiment.smembers(cur_set_name):
cur_value = r_serv_sentiment.get(cur_id)
cur_value = (r_serv_sentiment.get(cur_id))
list_value.append(cur_value)
list_date[cur_timestamp] = list_value
to_return[provider] = list_date
@ -130,7 +134,7 @@ def sentiment_analysis_plot_tool_getdata():
list_value = []
for cur_id in r_serv_sentiment.smembers(cur_set_name):
cur_value = r_serv_sentiment.get(cur_id)
cur_value = (r_serv_sentiment.get(cur_id))
list_value.append(cur_value)
list_date[cur_timestamp] = list_value
to_return[cur_provider] = list_date

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -20,7 +20,6 @@ cfg = Flask_config.cfg
r_serv_pasteName = Flask_config.r_serv_pasteName
max_preview_char = Flask_config.max_preview_char
max_preview_modal = Flask_config.max_preview_modal
tlsh_to_percent = Flask_config.tlsh_to_percent
DiffMaxLineLength = Flask_config.DiffMaxLineLength
showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templates')
@ -38,7 +37,7 @@ def showpaste(content_range):
p_size = paste.p_size
p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content().decode('utf-8', 'ignore')
p_content = paste.get_p_content()
p_duplicate_full_list = json.loads(paste._get_p_duplicate())
p_duplicate_list = []
p_simil_list = []
@ -48,11 +47,13 @@ def showpaste(content_range):
for dup_list in p_duplicate_full_list:
if dup_list[0] == "tlsh":
dup_list[2] = int(((tlsh_to_percent - float(dup_list[2])) / tlsh_to_percent)*100)
dup_list[2] = 100 - int(dup_list[2])
else:
print('dup_list')
print(dup_list)
dup_list[2] = int(dup_list[2])
p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
#p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
# Combine multiple duplicate paste name and format for display
new_dup_list = []
@ -64,12 +65,13 @@ def showpaste(content_range):
hash_types = []
comp_vals = []
for i in indices:
hash_types.append(p_duplicate_full_list[i][0].encode('utf8'))
hash_types.append(p_duplicate_full_list[i][0])
comp_vals.append(p_duplicate_full_list[i][2])
dup_list_removed.append(i)
hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
if len(p_duplicate_full_list[dup_list_index]) > 3:
try:
date_paste = str(int(p_duplicate_full_list[dup_list_index][3]))
@ -91,7 +93,6 @@ def showpaste(content_range):
if content_range != 0:
p_content = p_content[0:content_range]
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list)
# ============ ROUTES ============
@ -100,6 +101,12 @@ def showpaste(content_range):
def showsavedpaste():
return showpaste(0)
@showsavedpastes.route("/showsavedrawpaste/") #shows raw
def showsavedrawpaste():
requested_path = request.args.get('paste', '')
paste = Paste.Paste(requested_path)
content = paste.get_p_content()
return content, 200, {'Content-Type': 'text/plain'}
@showsavedpastes.route("/showpreviewpaste/")
def showpreviewpaste():
@ -111,7 +118,7 @@ def showpreviewpaste():
def getmoredata():
requested_path = request.args.get('paste', '')
paste = Paste.Paste(requested_path)
p_content = paste.get_p_content().decode('utf-8', 'ignore')
p_content = paste.get_p_content()
to_return = p_content[max_preview_modal-1:]
return to_return
@ -126,8 +133,8 @@ def showDiff():
if maxLengthLine1 > DiffMaxLineLength or maxLengthLine2 > DiffMaxLineLength:
return "Can't make the difference as the lines are too long."
htmlD = difflib.HtmlDiff()
lines1 = p1.get_p_content().decode('utf8', 'ignore').splitlines()
lines2 = p2.get_p_content().decode('utf8', 'ignore').splitlines()
lines1 = p1.get_p_content().splitlines()
lines2 = p2.get_p_content().splitlines()
the_html = htmlD.make_file(lines1, lines2)
return the_html

View file

@ -69,18 +69,18 @@
<tbody>
{% for dup_path in duplicate_list %}
<tr>
<td>{{ hashtype_list[i] }}</td>
<td>Similarity: {{ simil_list[i] }}%</td>
<td>{{ date_list[i] }}</td>
<td>{{ hashtype_list[loop.index - 1] }}</td>
<td>Similarity: {{ simil_list[loop.index - 1] }}%</td>
<td>{{ date_list[loop.index - 1] }}</td>
<td><a target="_blank" href="{{ url_for('showsavedpastes.showsavedpaste') }}?paste={{ dup_path }}" id='dup_path'>{{ dup_path }}</a></td>
<td><a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{ request.args.get('paste') }}&s2={{ dup_path }}" class="fa fa-columns" title="Show differences"></a></td>
</tr>
{% set i = i + 1 %}
{% endfor %}
</tbody>
</table>
{% endif %}
<h3> Content: </h3>
<a href="{{ url_for('showsavedpastes.showsavedrawpaste') }}?paste={{ request.args.get('paste') }}" id='raw_paste' > [Raw content] </a>
<p data-initsize="{{ initsize }}"> <pre id="paste-holder">{{ content }}</pre></p>
</div>
</div>

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -158,7 +158,7 @@ def terms_management():
trackReg_list_num_of_paste = []
for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name):
notificationEMailTermMapping[tracked_regex] = "\n".join(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex))
notificationEMailTermMapping[tracked_regex] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)) )
if tracked_regex not in notificationEnabledDict:
notificationEnabledDict[tracked_regex] = False
@ -182,8 +182,9 @@ def terms_management():
trackSet_list_values = []
trackSet_list_num_of_paste = []
for tracked_set in r_serv_term.smembers(TrackedSetSet_Name):
tracked_set = tracked_set
notificationEMailTermMapping[tracked_set] = "\n".join(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set))
notificationEMailTermMapping[tracked_set] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set)) )
if tracked_set not in notificationEnabledDict:
@ -209,7 +210,7 @@ def terms_management():
track_list_num_of_paste = []
for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name):
notificationEMailTermMapping[tracked_term] = "\n".join(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term))
notificationEMailTermMapping[tracked_term] = "\n".join( r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term))
if tracked_term not in notificationEnabledDict:
notificationEnabledDict[tracked_term] = False
@ -220,7 +221,9 @@ def terms_management():
term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term)
set_paste_name = "tracked_" + tracked_term
track_list_num_of_paste.append(r_serv_term.scard(set_paste_name))
track_list_num_of_paste.append( r_serv_term.scard(set_paste_name) )
term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded"
value_range.append(term_date)
track_list_values.append(value_range)
@ -268,7 +271,7 @@ def terms_management_query_paste():
p_size = paste.p_size
p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content().decode('utf-8', 'ignore')
p_content = paste.get_p_content()
if p_content != 0:
p_content = p_content[0:400]
paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})
@ -310,7 +313,7 @@ def terms_management_action():
term = request.args.get('term')
notificationEmailsParam = request.args.get('emailAddresses')
if action is None or term is None:
if action is None or term is None or notificationEmailsParam is None:
return "None"
else:
if section == "followTerm":
@ -386,7 +389,6 @@ def terms_management_action():
r_serv_term.hdel(TrackedRegexDate_Name, term)
elif term.startswith('\\') and term.endswith('\\'):
r_serv_term.srem(TrackedSetSet_Name, term)
print(term)
r_serv_term.hdel(TrackedSetDate_Name, term)
else:
r_serv_term.srem(TrackedTermsSet_Name, term.lower())
@ -524,7 +526,7 @@ def credentials_management_query_paste():
p_size = paste.p_size
p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content().decode('utf-8', 'ignore')
p_content = paste.get_p_content()
if p_content != 0:
p_content = p_content[0:400]
paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})
@ -534,7 +536,7 @@ def credentials_management_query_paste():
@terms.route("/credentials_management_action/", methods=['GET'])
def cred_management_action():
supplied = request.args.get('term').encode('utf-8')
supplied = request.args.get('term')
action = request.args.get('action')
section = request.args.get('section')
extensive = request.args.get('extensive')
@ -565,7 +567,7 @@ def cred_management_action():
iter_num += 1
if poss in tempUsername:
num = r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET, tempUsername)
num = (r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET, tempUsername))
if num is not None:
uniq_num_set.add(num)
for num in r_serv_cred.smembers(tempUsername):
@ -574,7 +576,7 @@ def cred_management_action():
data = {'usr': [], 'path': [], 'numPaste': [], 'simil': []}
for Unum in uniq_num_set:
levenRatio = 2.0
username = r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET_REV, Unum)
username = (r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET_REV, Unum))
# Calculate Levenshtein distance, ignore negative ratio
supp_splitted = supplied.split()
@ -585,7 +587,10 @@ def cred_management_action():
levenRatioStr = "{:.1%}".format(levenRatio)
data['usr'].append(username)
allPathNum = list(r_serv_cred.smembers(REDIS_KEY_MAP_CRED_TO_PATH+'_'+Unum))
data['path'].append(allPathNum)
data['numPaste'].append(len(allPathNum))
data['simil'].append(levenRatioStr)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -28,6 +28,7 @@ def get_date_range(num_day):
for i in range(0, num_day+1):
date_list.append(date.substract_day(i))
return date_list
@ -46,6 +47,7 @@ def progressionCharts():
date_range = get_date_range(num_day)
# Retreive all data from the last num_day
for date in date_range:
curr_value = r_serv_charts.hget(attribute_name, date)
bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)])
bar_values.insert(0, attribute_name)
@ -54,6 +56,7 @@ def progressionCharts():
else:
redis_progression_name = "z_top_progression_" + trending_name
keyw_value = r_serv_charts.zrevrangebyscore(redis_progression_name, '+inf', '-inf', withscores=True, start=0, num=10)
return jsonify(keyw_value)
@trendings.route("/wordstrending/")

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
@ -28,6 +28,7 @@ def get_top_relevant_data(server, module_name):
for date in get_date_range(15):
redis_progression_name_set = 'top_'+ module_name +'_set_' + date
member_set = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True)
if len(member_set) == 0: #No data for this date
days += 1
else:
@ -85,9 +86,17 @@ def providersChart():
date_range = get_date_range(num_day)
# Retreive all data from the last num_day
for date in date_range:
curr_value_size = r_serv_charts.hget(keyword_name+'_'+'size', date)
curr_value_size = ( r_serv_charts.hget(keyword_name+'_'+'size', date) )
if curr_value_size is not None:
curr_value_size = curr_value_size
curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date)
curr_value_size_avg = r_serv_charts.hget(keyword_name+'_'+'avg', date)
if curr_value_size_avg is not None:
curr_value_size_avg = curr_value_size_avg
if module_name == "size":
curr_value = float(curr_value_size_avg if curr_value_size_avg is not None else 0)
else:
@ -103,6 +112,7 @@ def providersChart():
redis_provider_name_set = redis_provider_name_set + get_date_range(0)[0]
member_set = r_serv_charts.zrevrangebyscore(redis_provider_name_set, '+inf', '-inf', withscores=True, start=0, num=8)
# Member set is a list of (value, score) pairs
if len(member_set) == 0:
member_set.append(("No relevant data", float(100)))