Merge pull request #201 from CIRCL/python3

Python 3 migration + many new features + fixes
This commit is contained in:
Alexandre Dulaunoy 2018-05-11 16:26:58 +02:00 committed by GitHub
commit 36e79f2f30
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
90 changed files with 1732 additions and 1412 deletions

View file

@ -1,7 +1,7 @@
language: python language: python
python: python:
- "2.7" - "3.5"
sudo: required sudo: required
@ -16,6 +16,7 @@ env:
install: install:
- ./installing_deps.sh - ./installing_deps.sh
- pip install coveralls codecov nose
script: script:
- pushd bin - pushd bin
@ -23,13 +24,11 @@ script:
- ./launch_lvldb.sh - ./launch_lvldb.sh
- ./launch_logs.sh - ./launch_logs.sh
- ./launch_queues.sh - ./launch_queues.sh
- ./launch_scripts.sh
- sleep 120
- ./Shutdown.py
- popd - popd
- find logs/* -exec cat {} \; - cd tests
- nosetests --with-coverage --cover-package=../bin -d
notifications:
email: after_success:
on_success: change - codecov
on_failure: change - coveralls

View file

@ -31,6 +31,10 @@ Features
* Terms, Set of terms and Regex tracking and occurrence * Terms, Set of terms and Regex tracking and occurrence
* Many more modules for extracting phone numbers, credentials and others * Many more modules for extracting phone numbers, credentials and others
* Alerting to [MISP](https://github.com/MISP/MISP) to share found leaks within a threat intelligence platform using [MISP standard](https://www.misp-project.org/objects.html#_ail_leak) * Alerting to [MISP](https://github.com/MISP/MISP) to share found leaks within a threat intelligence platform using [MISP standard](https://www.misp-project.org/objects.html#_ail_leak)
* Detect and decode Base64 and store files
* Detect Amazon AWS and Google API keys
* Detect Bitcoin address and Bitcoin private keys
* Detect private keys and certificate
Installation Installation
------------ ------------
@ -53,6 +57,11 @@ linux based distributions, you can replace it with [installing_deps_archlinux.sh
There is also a [Travis file](.travis.yml) used for automating the installation that can be used to build and install AIL on other systems. There is also a [Travis file](.travis.yml) used for automating the installation that can be used to build and install AIL on other systems.
Python 3 Upgrade
------------
To upgrade from an existing AIL installation, you have to launch [python3_upgrade.sh](./python3_upgrade.sh), this script will delete and create a new virtual environment. The script **will upgrade the packages but won't keep your previous data** (neverthless the data is copied into a directory called `old`). If you install from scratch, you don't require to launch the [python3_upgrade.sh](./python3_upgrade.sh).
Docker Quick Start (Ubuntu 16.04 LTS) Docker Quick Start (Ubuntu 16.04 LTS)
------------ ------------

87
bin/ApiKey.py Executable file
View file

@ -0,0 +1,87 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The ApiKey Module
======================
This module is consuming the Redis-list created by the Categ module.
It apply API_key regexes on paste content and warn if above a threshold.
"""
import redis
import pprint
import time
import re
from packages import Paste
from packages import lib_refine
from pubsublogger import publisher
from Helper import Process
def search_api_key(message):
filename, score = message.split()
paste = Paste.Paste(filename)
content = paste.get_p_content()
aws_access_key = regex_aws_access_key.findall(content)
aws_secret_key = regex_aws_secret_key.findall(content)
google_api_key = regex_google_api_key.findall(content)
if(len(aws_access_key) > 0 or len(aws_secret_key) > 0 or len(google_api_key) > 0):
to_print = 'ApiKey;{};{};{};'.format(
paste.p_source, paste.p_date, paste.p_name)
if(len(google_api_key) > 0):
print('found google api key')
print(to_print)
publisher.warning('{}Checked {} found Google API Key;{}'.format(
to_print, len(google_api_key), paste.p_path))
if(len(aws_access_key) > 0 or len(aws_secret_key) > 0):
print('found AWS key')
print(to_print)
total = len(aws_access_key) + len(aws_secret_key)
publisher.warning('{}Checked {} found AWS Key;{}'.format(
to_print, total, paste.p_path))
msg = 'apikey;{}'.format(filename)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate
p.populate_set_out(filename, 'Duplicate')
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'ApiKey'
p = Process(config_section)
publisher.info("ApiKey started")
message = p.get_from_set()
# TODO improve REGEX
regex_aws_access_key = re.compile(r'(?<![A-Z0-9])=[A-Z0-9]{20}(?![A-Z0-9])')
regex_aws_secret_key = re.compile(r'(?<!=[A-Za-z0-9+])=[A-Za-z0-9+]{40}(?![A-Za-z0-9+])')
regex_google_api_key = re.compile(r'=AIza[0-9a-zA-Z-_]{35}')
while True:
message = p.get_from_set()
if message is not None:
search_api_key(message)
else:
publisher.debug("Script ApiKey is Idling 10s")
time.sleep(10)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -33,7 +33,7 @@ if __name__ == "__main__":
PST = Paste.Paste(message) PST = Paste.Paste(message)
else: else:
publisher.debug("Script Attribute is idling 1s") publisher.debug("Script Attribute is idling 1s")
print 'sleeping' print('sleeping')
time.sleep(1) time.sleep(1)
continue continue
@ -45,6 +45,6 @@ if __name__ == "__main__":
# FIXME Not used. # FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path) PST.store.sadd("Pastes_Objects", PST.p_path)
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name)) PST.p_source, PST.p_date, PST.p_name))

136
bin/Base64.py Executable file
View file

@ -0,0 +1,136 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Base64 module
Dectect Base64 and decode it
"""
import time
import os
import datetime
from pubsublogger import publisher
from Helper import Process
from packages import Paste
import re
import base64
from hashlib import sha1
import magic
import json
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def search_base64(content, message):
find = False
base64_list = re.findall(regex_base64, content)
if(len(base64_list) > 0):
for b64 in base64_list:
if len(b64) >= 40 :
decode = base64.b64decode(b64)
type = magic.from_buffer(decode, mime=True)
#print(type)
#print(decode)
find = True
hash = sha1(decode).hexdigest()
data = {}
data['name'] = hash
data['date'] = datetime.datetime.now().strftime("%d/%m/%y")
data['origin'] = message
data['estimated type'] = type
json_data = json.dumps(data)
save_base64_as_file(decode, type, hash, json_data)
print('found {} '.format(type))
if(find):
publisher.warning('base64 decoded')
#Send to duplicate
p.populate_set_out(message, 'Duplicate')
#send to Browse_warning_paste
msg = ('base64;{}'.format(message))
p.populate_set_out( msg, 'alertHandler')
def save_base64_as_file(decode, type, hash, json_data):
filename_b64 = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "base64"), type, hash[:2], hash)
filename_json = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "base64"), type, hash[:2], hash + '.json')
dirname = os.path.dirname(filename_b64)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filename_b64, 'wb') as f:
f.write(decode)
with open(filename_json, 'w') as f:
f.write(json_data)
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger
publisher.port = 6380
# Script is the default channel used for the modules.
publisher.channel = 'Script'
# Section name in bin/packages/modules.cfg
config_section = 'Base64'
# Setup the I/O queues
p = Process(config_section)
max_execution_time = p.config.getint("Base64", "max_execution_time")
# Sent to the logging a description of the module
publisher.info("Base64 started")
regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
re.compile(regex_base64)
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
filename = message
paste = Paste.Paste(filename)
signal.alarm(max_execution_time)
try:
# Do something with the message from the queue
#print(filename)
content = paste.get_p_content()
search_base64(content,message)
# (Optional) Send that thing to the next queue
#p.populate_set_out(something_has_been_done)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_path))
continue
else:
signal.alarm(0)

101
bin/Bitcoin.py Executable file
View file

@ -0,0 +1,101 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Bitcoin Module
============================
It trying to extract Bitcoin address and secret key from paste
..seealso:: Paste method (get_regex)
Requirements
------------
*Need running Redis instances. (Redis).
"""
from packages import Paste
from Helper import Process
from pubsublogger import publisher
import re
import time
from hashlib import sha256
#### thank http://rosettacode.org/wiki/Bitcoin/address_validation#Python for this 2 functions
def decode_base58(bc, length):
n = 0
for char in bc:
n = n * 58 + digits58.index(char)
return n.to_bytes(length, 'big')
def check_bc(bc):
try:
bcbytes = decode_base58(bc, 25)
return bcbytes[-4:] == sha256(sha256(bcbytes[:-4]).digest()).digest()[:4]
except Exception:
return False
########################################################
def search_key(content, message, paste):
bitcoin_address = re.findall(regex_bitcoin_public_address, content)
bitcoin_private_key = re.findall(regex_bitcoin_private_key, content)
validate_address = False
key = False
if(len(bitcoin_address) >0):
#print(message)
for address in bitcoin_address:
if(check_bc(address)):
validate_address = True
print('Bitcoin address found : {}'.format(address))
if(len(bitcoin_private_key) > 0):
for private_key in bitcoin_private_key:
print('Bitcoin private key found : {}'.format(private_key))
key = True
if(validate_address):
p.populate_set_out(message, 'Duplicate')
to_print = 'Bitcoin found: {} address and {} private Keys'.format(len(bitcoin_address), len(bitcoin_private_key))
print(to_print)
publisher.warning(to_print)
msg = ('bitcoin;{}'.format(message))
p.populate_set_out( msg, 'alertHandler')
if(key):
to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date,
paste.p_name)
publisher.warning('{}Detected {} Bitcoin private key;{}'.format(
to_print, len(bitcoin_private_key),paste.p_path))
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Bitcoin'
# Setup the I/O queues
p = Process(config_section)
# Sent to the logging a description of the module
publisher.info("Run Keys module ")
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
regex_bitcoin_public_address = re.compile(r'(?<![a-km-zA-HJ-NP-Z0-9])[13][a-km-zA-HJ-NP-Z0-9]{26,33}(?![a-km-zA-HJ-NP-Z0-9])')
regex_bitcoin_private_key = re.compile(r'[5KL][1-9A-HJ-NP-Za-km-z]{50,51}')
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
message = p.get_from_set()
if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
# Do something with the message from the queue
paste = Paste.Paste(message)
content = paste.get_p_content()
search_key(content, message, paste)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The ZMQ_PubSub_Categ Module The ZMQ_PubSub_Categ Module
@ -67,13 +67,13 @@ if __name__ == "__main__":
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script Categ started") publisher.info("Script Categ started")
categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve'] categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey']
tmp_dict = {} tmp_dict = {}
for filename in categories: for filename in categories:
bname = os.path.basename(filename) bname = os.path.basename(filename)
tmp_dict[bname] = [] tmp_dict[bname] = []
with open(os.path.join(args.d, filename), 'r') as f: with open(os.path.join(args.d, filename), 'r') as f:
patterns = [r'%s' % re.escape(s.strip()) for s in f] patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE) tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
prec_filename = None prec_filename = None
@ -82,18 +82,25 @@ if __name__ == "__main__":
filename = p.get_from_set() filename = p.get_from_set()
if filename is None: if filename is None:
publisher.debug("Script Categ is Idling 10s") publisher.debug("Script Categ is Idling 10s")
print 'Sleeping' print('Sleeping')
time.sleep(10) time.sleep(10)
continue continue
paste = Paste.Paste(filename) paste = Paste.Paste(filename)
content = paste.get_p_content() content = paste.get_p_content()
#print('-----------------------------------------------------')
#print(filename)
#print(content)
#print('-----------------------------------------------------')
for categ, pattern in tmp_dict.items(): for categ, pattern in tmp_dict.items():
found = set(re.findall(pattern, content)) found = set(re.findall(pattern, content))
if len(found) >= matchingThreshold: if len(found) >= matchingThreshold:
msg = '{} {}'.format(paste.p_path, len(found)) msg = '{} {}'.format(paste.p_path, len(found))
print msg, categ #msg = " ".join( [paste.p_path, bytes(len(found))] )
print(msg, categ)
p.populate_set_out(msg, categ) p.populate_set_out(msg, categ)
publisher.info( publisher.info(

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -53,34 +53,34 @@ if __name__ == "__main__":
faup = Faup() faup = Faup()
server_cred = redis.StrictRedis( server_cred = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermCred", "host"), host=p.config.get("ARDB_TermCred", "host"),
port=p.config.get("Redis_Level_DB_TermCred", "port"), port=p.config.get("ARDB_TermCred", "port"),
db=p.config.get("Redis_Level_DB_TermCred", "db")) db=p.config.get("ARDB_TermCred", "db"),
decode_responses=True)
criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert")
minTopPassList = p.config.getint("Credential", "minTopPassList") minTopPassList = p.config.getint("Credential", "minTopPassList")
regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)"
regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+"
regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:"
while True: while True:
message = p.get_from_set() message = p.get_from_set()
if message is None: if message is None:
publisher.debug("Script Credential is Idling 10s") publisher.debug("Script Credential is Idling 10s")
print('sleeping 10s') #print('sleeping 10s')
time.sleep(10) time.sleep(10)
continue continue
filepath, count = message.split() filepath, count = message.split(' ')
if count < minTopPassList:
# Less than 5 matches from the top password list, false positive.
print("false positive:", count)
continue
paste = Paste.Paste(filepath) paste = Paste.Paste(filepath)
content = paste.get_p_content() content = paste.get_p_content()
creds = set(re.findall(regex_cred, content)) creds = set(re.findall(regex_cred, content))
publisher.warning('to_print')
if len(creds) == 0: if len(creds) == 0:
continue continue
@ -89,7 +89,7 @@ if __name__ == "__main__":
message = 'Checked {} credentials found.'.format(len(creds)) message = 'Checked {} credentials found.'.format(len(creds))
if sites_set: if sites_set:
message += ' Related websites: {}'.format(', '.join(sites_set)) message += ' Related websites: {}'.format( (', '.join(sites_set)) )
to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path) to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path)
@ -97,12 +97,13 @@ if __name__ == "__main__":
#num of creds above tresh, publish an alert #num of creds above tresh, publish an alert
if len(creds) > criticalNumberToAlert: if len(creds) > criticalNumberToAlert:
print("========> Found more than 10 credentials in this file : {}".format(filepath)) print("========> Found more than 10 credentials in this file : {}".format( filepath ))
publisher.warning(to_print) publisher.warning(to_print)
#Send to duplicate #Send to duplicate
p.populate_set_out(filepath, 'Duplicate') p.populate_set_out(filepath, 'Duplicate')
#Send to alertHandler #Send to alertHandler
p.populate_set_out('credential;{}'.format(filepath), 'alertHandler') msg = 'credential;{}'.format(filepath)
p.populate_set_out(msg, 'alertHandler')
#Put in form, count occurences, then send to moduleStats #Put in form, count occurences, then send to moduleStats
creds_sites = {} creds_sites = {}
@ -122,9 +123,11 @@ if __name__ == "__main__":
else: else:
creds_sites[domain] = 1 creds_sites[domain] = 1
for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats for site, num in creds_sites.items(): # Send for each different site to moduleStats
print 'credential;{};{};{}'.format(num, site, paste.p_date)
p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') mssg = 'credential;{};{};{}'.format(num, site, paste.p_date)
print(mssg)
p.populate_set_out(mssg, 'ModuleStats')
if sites_set: if sites_set:
print("=======> Probably on : {}".format(', '.join(sites_set))) print("=======> Probably on : {}".format(', '.join(sites_set)))
@ -158,4 +161,3 @@ if __name__ == "__main__":
for partCred in splitedCred: for partCred in splitedCred:
if len(partCred) > minimumLengthThreshold: if len(partCred) > minimumLengthThreshold:
server_cred.sadd(partCred, uniq_num_cred) server_cred.sadd(partCred, uniq_num_cred)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -18,6 +18,7 @@ from packages import Paste
from packages import lib_refine from packages import lib_refine
from pubsublogger import publisher from pubsublogger import publisher
import re import re
import sys
from Helper import Process from Helper import Process
@ -58,13 +59,14 @@ if __name__ == "__main__":
content = paste.get_p_content() content = paste.get_p_content()
all_cards = re.findall(regex, content) all_cards = re.findall(regex, content)
if len(all_cards) > 0: if len(all_cards) > 0:
print 'All matching', all_cards print('All matching', all_cards)
creditcard_set = set([]) creditcard_set = set([])
for card in all_cards: for card in all_cards:
clean_card = re.sub('[^0-9]', '', card) clean_card = re.sub('[^0-9]', '', card)
clean_card = clean_card
if lib_refine.is_luhn_valid(clean_card): if lib_refine.is_luhn_valid(clean_card):
print clean_card, 'is valid' print(clean_card, 'is valid')
creditcard_set.add(clean_card) creditcard_set.add(clean_card)
paste.__setattr__(channel, creditcard_set) paste.__setattr__(channel, creditcard_set)
@ -76,13 +78,15 @@ if __name__ == "__main__":
if (len(creditcard_set) > 0): if (len(creditcard_set) > 0):
publisher.warning('{}Checked {} valid number(s);{}'.format( publisher.warning('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path)) to_print, len(creditcard_set), paste.p_path))
print('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path))
#Send to duplicate #Send to duplicate
p.populate_set_out(filename, 'Duplicate') p.populate_set_out(filename, 'Duplicate')
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('creditcard;{}'.format(filename), 'alertHandler') msg = 'creditcard;{}'.format(filename)
p.populate_set_out(msg, 'alertHandler')
else: else:
publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path)) publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path))
else: else:
publisher.debug("Script creditcard is idling 1m") publisher.debug("Script creditcard is idling 1m")
time.sleep(10) time.sleep(10)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module.
@ -53,7 +53,7 @@ def check_if_tracked_term(term, path):
#add_paste to tracked_word_set #add_paste to tracked_word_set
set_name = "tracked_" + term set_name = "tracked_" + term
server_term.sadd(set_name, path) server_term.sadd(set_name, path)
print term, 'addded', set_name, '->', path print(term, 'addded', set_name, '->', path)
p.populate_set_out("New Term added", 'CurveManageTopSets') p.populate_set_out("New Term added", 'CurveManageTopSets')
# Send a notification only when the member is in the set # Send a notification only when the member is in the set
@ -82,14 +82,16 @@ if __name__ == "__main__":
# REDIS # # REDIS #
r_serv1 = redis.StrictRedis( r_serv1 = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Curve", "host"), host=p.config.get("ARDB_Curve", "host"),
port=p.config.get("Redis_Level_DB_Curve", "port"), port=p.config.get("ARDB_Curve", "port"),
db=p.config.get("Redis_Level_DB_Curve", "db")) db=p.config.get("ARDB_Curve", "db"),
decode_responses=True)
server_term = redis.StrictRedis( server_term = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermFreq", "host"), host=p.config.get("ARDB_TermFreq", "host"),
port=p.config.get("Redis_Level_DB_TermFreq", "port"), port=p.config.get("ARDB_TermFreq", "port"),
db=p.config.get("Redis_Level_DB_TermFreq", "db")) db=p.config.get("ARDB_TermFreq", "db"),
decode_responses=True)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script Curve started") publisher.info("Script Curve started")
@ -149,15 +151,16 @@ if __name__ == "__main__":
if generate_new_graph: if generate_new_graph:
generate_new_graph = False generate_new_graph = False
print 'Building graph' print('Building graph')
today = datetime.date.today() today = datetime.date.today()
year = today.year year = today.year
month = today.month month = today.month
lib_words.create_curve_with_word_file(r_serv1, csv_path, lib_words.create_curve_with_word_file(r_serv1, csv_path,
wordfile_path, year, wordfile_path, year,
month) month)
publisher.debug("Script Curve is Idling") publisher.debug("Script Curve is Idling")
print "sleeping" print("sleeping")
time.sleep(10) time.sleep(10)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -16,7 +16,7 @@ from packages import lib_words
import datetime import datetime
import calendar import calendar
import os import os
import ConfigParser import configparser
# Config Variables # Config Variables
Refresh_rate = 60*5 #sec Refresh_rate = 60*5 #sec
@ -68,26 +68,26 @@ def manage_top_set():
# convert dico into sorted array # convert dico into sorted array
array_month = [] array_month = []
for w, v in dico.iteritems(): for w, v in dico.items():
array_month.append((w, v)) array_month.append((w, v))
array_month.sort(key=lambda tup: -tup[1]) array_month.sort(key=lambda tup: -tup[1])
array_month = array_month[0:20] array_month = array_month[0:20]
array_week = [] array_week = []
for w, v in dico_week.iteritems(): for w, v in dico_week.items():
array_week.append((w, v)) array_week.append((w, v))
array_week.sort(key=lambda tup: -tup[1]) array_week.sort(key=lambda tup: -tup[1])
array_week = array_week[0:20] array_week = array_week[0:20]
# convert dico_per_paste into sorted array # convert dico_per_paste into sorted array
array_month_per_paste = [] array_month_per_paste = []
for w, v in dico_per_paste.iteritems(): for w, v in dico_per_paste.items():
array_month_per_paste.append((w, v)) array_month_per_paste.append((w, v))
array_month_per_paste.sort(key=lambda tup: -tup[1]) array_month_per_paste.sort(key=lambda tup: -tup[1])
array_month_per_paste = array_month_per_paste[0:20] array_month_per_paste = array_month_per_paste[0:20]
array_week_per_paste = [] array_week_per_paste = []
for w, v in dico_week_per_paste.iteritems(): for w, v in dico_week_per_paste.items():
array_week_per_paste.append((w, v)) array_week_per_paste.append((w, v))
array_week_per_paste.sort(key=lambda tup: -tup[1]) array_week_per_paste.sort(key=lambda tup: -tup[1])
array_week_per_paste = array_week_per_paste[0:20] array_week_per_paste = array_week_per_paste[0:20]
@ -114,7 +114,7 @@ def manage_top_set():
timestamp = int(time.mktime(datetime.datetime.now().timetuple())) timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
value = str(timestamp) + ", " + "-" value = str(timestamp) + ", " + "-"
r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value)
print "refreshed module" print("refreshed module")
@ -131,7 +131,7 @@ if __name__ == '__main__':
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
@ -139,7 +139,8 @@ if __name__ == '__main__':
r_temp = redis.StrictRedis( r_temp = redis.StrictRedis(
host=cfg.get('RedisPubSub', 'host'), host=cfg.get('RedisPubSub', 'host'),
port=cfg.getint('RedisPubSub', 'port'), port=cfg.getint('RedisPubSub', 'port'),
db=cfg.getint('RedisPubSub', 'db')) db=cfg.getint('RedisPubSub', 'db'),
decode_responses=True)
timestamp = int(time.mktime(datetime.datetime.now().timetuple())) timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
value = str(timestamp) + ", " + "-" value = str(timestamp) + ", " + "-"
@ -147,9 +148,10 @@ if __name__ == '__main__':
r_temp.sadd("MODULE_TYPE_"+ "CurveManageTopSets" , str(os.getpid())) r_temp.sadd("MODULE_TYPE_"+ "CurveManageTopSets" , str(os.getpid()))
server_term = redis.StrictRedis( server_term = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_TermFreq", "host"), host=cfg.get("ARDB_TermFreq", "host"),
port=cfg.getint("Redis_Level_DB_TermFreq", "port"), port=cfg.getint("ARDB_TermFreq", "port"),
db=cfg.getint("Redis_Level_DB_TermFreq", "db")) db=cfg.getint("ARDB_TermFreq", "db"),
decode_responses=True)
publisher.info("Script Curve_manage_top_set started") publisher.info("Script Curve_manage_top_set started")
@ -162,4 +164,3 @@ if __name__ == '__main__':
# Get one message from the input queue (module only work if linked with a queue) # Get one message from the input queue (module only work if linked with a queue)
time.sleep(Refresh_rate) # sleep a long time then manage the set time.sleep(Refresh_rate) # sleep a long time then manage the set
manage_top_set() manage_top_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The CVE Module The CVE Module
@ -32,7 +32,8 @@ def search_cve(message):
publisher.warning('{} contains CVEs'.format(paste.p_name)) publisher.warning('{} contains CVEs'.format(paste.p_name))
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('cve;{}'.format(filepath), 'alertHandler') msg = 'cve;{}'.format(filepath)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate #Send to duplicate
p.populate_set_out(filepath, 'Duplicate') p.populate_set_out(filepath, 'Duplicate')
@ -63,4 +64,3 @@ if __name__ == '__main__':
# Do something with the message from the queue # Do something with the message from the queue
search_cve(message) search_cve(message)

View file

@ -1,18 +1,18 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import argparse import argparse
import redis import redis
from pubsublogger import publisher from pubsublogger import publisher
from packages.lib_words import create_dirfile from packages.lib_words import create_dirfile
import ConfigParser import configparser
def main(): def main():
"""Main Function""" """Main Function"""
# CONFIG # # CONFIG #
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg') cfg.read('./packages/config.cfg')
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
@ -36,7 +36,8 @@ def main():
r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"), port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -24,10 +24,11 @@ def main():
config_section = 'DomClassifier' config_section = 'DomClassifier'
p = Process(config_section) p = Process(config_section)
addr_dns = p.config.get("DomClassifier", "dns")
publisher.info("""ZMQ DomainClassifier is Running""") publisher.info("""ZMQ DomainClassifier is Running""")
c = DomainClassifier.domainclassifier.Extract(rawtext="") c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
cc = p.config.get("DomClassifier", "cc") cc = p.config.get("DomClassifier", "cc")
cc_tld = p.config.get("DomClassifier", "cc_tld") cc_tld = p.config.get("DomClassifier", "cc_tld")
@ -44,6 +45,7 @@ def main():
continue continue
paste = PST.get_p_content() paste = PST.get_p_content()
mimetype = PST._get_p_encoding() mimetype = PST._get_p_encoding()
if mimetype == "text/plain": if mimetype == "text/plain":
c.text(rawtext=paste) c.text(rawtext=paste)
c.potentialdomain() c.potentialdomain()
@ -59,7 +61,7 @@ def main():
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name)) PST.p_source, PST.p_date, PST.p_name))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
from pubsublogger import publisher from pubsublogger import publisher
@ -23,7 +23,7 @@ if __name__ == "__main__":
if message is not None: if message is not None:
f = open(dump_file, 'a') f = open(dump_file, 'a')
while message is not None: while message is not None:
print message print(message)
date = datetime.datetime.now() date = datetime.datetime.now()
if message is not None: if message is not None:
f.write(date.isoformat() + ' ' + message + '\n') f.write(date.isoformat() + ' ' + message + '\n')

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -49,12 +49,13 @@ if __name__ == "__main__":
# REDIS # # REDIS #
dico_redis = {} dico_redis = {}
date_today = datetime.today() date_today = datetime.today()
for year in xrange(2013, date_today.year+1): for year in range(2013, date_today.year+1):
for month in xrange(0, 13): for month in range(0, 13):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year, host=p.config.get("ARDB_DB", "host"),
db=month) port=p.config.get("ARDB_DB", "port"),
#print("dup: "+str(year)+str(month).zfill(2)+"\n") db=str(year) + str(month),
decode_responses=True)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script duplicate started") publisher.info("Script duplicate started")
@ -62,7 +63,7 @@ if __name__ == "__main__":
while True: while True:
try: try:
hash_dico = {} hash_dico = {}
dupl = [] dupl = set()
dico_range_list = [] dico_range_list = []
x = time.time() x = time.time()
@ -73,6 +74,7 @@ if __name__ == "__main__":
PST = Paste.Paste(path) PST = Paste.Paste(path)
else: else:
publisher.debug("Script Attribute is idling 10s") publisher.debug("Script Attribute is idling 10s")
print('sleeping')
time.sleep(10) time.sleep(10)
continue continue
@ -90,7 +92,7 @@ if __name__ == "__main__":
# Get the date of the range # Get the date of the range
date_range = date_today - timedelta(days = maximum_month_range*30.4166666) date_range = date_today - timedelta(days = maximum_month_range*30.4166666)
num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month) num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month)
for diff_month in xrange(0, num_of_month+1): for diff_month in range(0, num_of_month+1):
curr_date_range = date_today - timedelta(days = diff_month*30.4166666) curr_date_range = date_today - timedelta(days = diff_month*30.4166666)
to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2) to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2)
dico_range_list.append(to_append) dico_range_list.append(to_append)
@ -102,7 +104,7 @@ if __name__ == "__main__":
yearly_index = str(date_today.year)+'00' yearly_index = str(date_today.year)+'00'
r_serv0 = dico_redis[yearly_index] r_serv0 = dico_redis[yearly_index]
r_serv0.incr("current_index") r_serv0.incr("current_index")
index = r_serv0.get("current_index")+str(PST.p_date) index = (r_serv0.get("current_index")) + str(PST.p_date)
# Open selected dico range # Open selected dico range
opened_dico = [] opened_dico = []
@ -114,13 +116,16 @@ if __name__ == "__main__":
# Go throught the Database of the dico (of the month) # Go throught the Database of the dico (of the month)
for curr_dico_name, curr_dico_redis in opened_dico: for curr_dico_name, curr_dico_redis in opened_dico:
for hash_type, paste_hash in paste_hashes.iteritems(): for hash_type, paste_hash in paste_hashes.items():
for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type): for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):
try: try:
if hash_type == 'ssdeep': if hash_type == 'ssdeep':
percent = 100-ssdeep.compare(dico_hash, paste_hash) percent = 100-ssdeep.compare(dico_hash, paste_hash)
else: else:
percent = tlsh.diffxlen(dico_hash, paste_hash) percent = tlsh.diffxlen(dico_hash, paste_hash)
if percent > 100:
percent = 100
threshold_duplicate = threshold_set[hash_type] threshold_duplicate = threshold_set[hash_type]
if percent < threshold_duplicate: if percent < threshold_duplicate:
@ -130,16 +135,20 @@ if __name__ == "__main__":
# index of paste # index of paste
index_current = r_serv_dico.get(dico_hash) index_current = r_serv_dico.get(dico_hash)
index_current = index_current
paste_path = r_serv_dico.get(index_current) paste_path = r_serv_dico.get(index_current)
paste_path = paste_path
paste_date = r_serv_dico.get(index_current+'_date') paste_date = r_serv_dico.get(index_current+'_date')
paste_date = paste_date
paste_date = paste_date if paste_date != None else "No date available" paste_date = paste_date if paste_date != None else "No date available"
if paste_path != None: if paste_path != None:
if paste_path != PST.p_path:
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent) print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent))
except Exception,e:
print str(e) except Exception:
#print 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash)
# Add paste in DB after checking to prevent its analysis twice # Add paste in DB after checking to prevent its analysis twice
# hash_type_i -> index_i AND index_i -> PST.PATH # hash_type_i -> index_i AND index_i -> PST.PATH
@ -147,7 +156,7 @@ if __name__ == "__main__":
r_serv1.set(index+'_date', PST._get_p_date()) r_serv1.set(index+'_date', PST._get_p_date())
r_serv1.sadd("INDEX", index) r_serv1.sadd("INDEX", index)
# Adding hashes in Redis # Adding hashes in Redis
for hash_type, paste_hash in paste_hashes.iteritems(): for hash_type, paste_hash in paste_hashes.items():
r_serv1.set(paste_hash, index) r_serv1.set(paste_hash, index)
r_serv1.sadd("HASHS_"+hash_type, paste_hash) r_serv1.sadd("HASHS_"+hash_type, paste_hash)
@ -157,24 +166,25 @@ if __name__ == "__main__":
if len(hash_dico) != 0: if len(hash_dico) != 0:
# paste_tuple = (hash_type, date, paste_path, percent) # paste_tuple = (hash_type, date, paste_path, percent)
for dico_hash, paste_tuple in hash_dico.items(): for dico_hash, paste_tuple in hash_dico.items():
dupl.append(paste_tuple) dupl.add(paste_tuple)
# Creating the object attribute and save it. # Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format( to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name) PST.p_source, PST.p_date, PST.p_name)
if dupl != []: if dupl != []:
dupl = list(dupl)
PST.__setattr__("p_duplicate", dupl) PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl) PST.save_attribute_redis("p_duplicate", dupl)
PST.save_others_pastes_attribute_duplicate("p_duplicate", dupl)
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path)) publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
print '{}Detected {}'.format(to_print, len(dupl)) print('{}Detected {}'.format(to_print, len(dupl)))
y = time.time() y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
#print '{}Processed in {} sec'.format(to_print, y-x)
except IOError: except IOError:
to_print = 'Duplicate;{};{};{};'.format( to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name) PST.p_source, PST.p_date, PST.p_name)
print "CRC Checksum Failed on :", PST.p_path print("CRC Checksum Failed on :", PST.p_path)
publisher.error('{}CRC Checksum Failed'.format(to_print)) publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -1,166 +0,0 @@
#!/usr/bin/env python2
# -*-coding:UTF-8 -*
"""
The Duplicate module
====================
This huge module is, in short term, checking duplicates.
Requirements:
-------------
"""
import redis
import os
import time
from packages import Paste
from pubsublogger import publisher
from pybloomfilter import BloomFilter
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Duplicates'
p = Process(config_section)
# REDIS #
# DB OBJECT & HASHS ( DISK )
# FIXME increase flexibility
dico_redis = {}
for year in xrange(2013, 2017):
for month in xrange(0, 16):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), port=year,
db=month)
#print("dup: "+str(year)+str(month).zfill(2)+"\n")
# FUNCTIONS #
publisher.info("Script duplicate started")
set_limit = 100
bloompath = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "bloomfilters"))
bloop_path_set = set()
while True:
try:
super_dico = {}
hash_dico = {}
dupl = []
nb_hash_current = 0
x = time.time()
message = p.get_from_set()
if message is not None:
path = message
PST = Paste.Paste(path)
else:
publisher.debug("Script Attribute is idling 10s")
time.sleep(10)
continue
PST._set_p_hash_kind("md5")
# Assignate the correct redis connexion
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
# Creating the bloom filter name: bloomyyyymm
filebloompath = os.path.join(bloompath, 'bloom' + PST.p_date.year +
PST.p_date.month)
if os.path.exists(filebloompath):
bloom = BloomFilter.open(filebloompath)
bloop_path_set.add(filebloompath)
else:
bloom = BloomFilter(100000000, 0.01, filebloompath)
bloop_path_set.add(filebloompath)
# UNIQUE INDEX HASHS TABLE
r_serv0 = dico_redis["201600"]
r_serv0.incr("current_index")
index = r_serv0.get("current_index")+str(PST.p_date)
# HASHTABLES PER MONTH (because of r_serv1 changing db)
r_serv1.set(index, PST.p_path)
r_serv1.sadd("INDEX", index)
# For each bloom filter
opened_bloom = []
for bloo in bloop_path_set:
# Opening blooms
opened_bloom.append(BloomFilter.open(bloo))
# For each hash of the paste
for line_hash in PST._get_hash_lines(min=5, start=1, jump=0):
nb_hash_current += 1
# Adding the hash in Redis & limiting the set
if r_serv1.scard(line_hash) <= set_limit:
r_serv1.sadd(line_hash, index)
r_serv1.sadd("HASHS", line_hash)
# Adding the hash in the bloom of the month
bloom.add(line_hash)
# Go throught the Database of the bloom filter (of the month)
for bloo in opened_bloom:
if line_hash in bloo:
db = bloo.name[-6:]
# Go throught the Database of the bloom filter (month)
r_serv_bloom = dico_redis[db]
# set of index paste: set([1,2,4,65])
hash_current = r_serv_bloom.smembers(line_hash)
# removing itself from the list
hash_current = hash_current - set([index])
# if the hash is present at least in 1 files
# (already processed)
if len(hash_current) != 0:
hash_dico[line_hash] = hash_current
# if there is data in this dictionnary
if len(hash_dico) != 0:
super_dico[index] = hash_dico
###########################################################################
# if there is data in this dictionnary
if len(super_dico) != 0:
# current = current paste, phash_dico = {hash: set, ...}
occur_dico = {}
for current, phash_dico in super_dico.items():
# phash = hash, pset = set([ pastes ...])
for phash, pset in hash_dico.items():
for p_fname in pset:
occur_dico.setdefault(p_fname, 0)
# Count how much hash is similar per file occuring
# in the dictionnary
if occur_dico[p_fname] >= 0:
occur_dico[p_fname] = occur_dico[p_fname] + 1
for paste, count in occur_dico.items():
percentage = round((count/float(nb_hash_current))*100, 2)
if percentage >= 50:
dupl.append((paste, percentage))
else:
print 'percentage: ' + str(percentage)
# Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
if dupl != []:
PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_redis("p_duplicate", dupl)
publisher.info('{}Detected {}'.format(to_print, len(dupl)))
print '{}Detected {}'.format(to_print, len(dupl))
y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
except IOError:
print "CRC Checksum Failed on :", PST.p_path
publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The ZMQ_Feed_Q Module The ZMQ_Feed_Q Module
@ -27,6 +27,19 @@ from pubsublogger import publisher
from Helper import Process from Helper import Process
import magic
import io
#import gzip
'''
def gunzip_bytes_obj(bytes_obj):
in_ = io.BytesIO()
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
return gunzipped_bytes_obj.decode()'''
if __name__ == '__main__': if __name__ == '__main__':
publisher.port = 6380 publisher.port = 6380
@ -44,6 +57,7 @@ if __name__ == '__main__':
while True: while True:
message = p.get_from_set() message = p.get_from_set()
#print(message)
# Recovering the streamed message informations. # Recovering the streamed message informations.
if message is not None: if message is not None:
splitted = message.split() splitted = message.split()
@ -51,14 +65,14 @@ if __name__ == '__main__':
paste, gzip64encoded = splitted paste, gzip64encoded = splitted
else: else:
# TODO Store the name of the empty paste inside a Redis-list. # TODO Store the name of the empty paste inside a Redis-list.
print "Empty Paste: not processed" print("Empty Paste: not processed")
publisher.debug("Empty Paste: {0} not processed".format(message)) publisher.debug("Empty Paste: {0} not processed".format(message))
continue continue
else: else:
print "Empty Queues: Waiting..." print("Empty Queues: Waiting...")
if int(time.time() - time_1) > 30: if int(time.time() - time_1) > 30:
to_print = 'Global; ; ; ;glob Processed {0} paste(s)'.format(processed_paste) to_print = 'Global; ; ; ;glob Processed {0} paste(s)'.format(processed_paste)
print to_print print(to_print)
#publisher.info(to_print) #publisher.info(to_print)
time_1 = time.time() time_1 = time.time()
processed_paste = 0 processed_paste = 0
@ -67,11 +81,28 @@ if __name__ == '__main__':
# Creating the full filepath # Creating the full filepath
filename = os.path.join(os.environ['AIL_HOME'], filename = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "pastes"), paste) p.config.get("Directories", "pastes"), paste)
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
if not os.path.exists(dirname): if not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
decoded = base64.standard_b64decode(gzip64encoded)
with open(filename, 'wb') as f: with open(filename, 'wb') as f:
f.write(base64.standard_b64decode(gzip64encoded)) f.write(decoded)
'''try:
decoded2 = gunzip_bytes_obj(decoded)
except:
decoded2 =''
type = magic.from_buffer(decoded2, mime=True)
if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby':
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
print(filename)
print(type)
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
'''
p.populate_set_out(filename) p.populate_set_out(filename)
processed_paste+=1 processed_paste+=1

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
Queue helper module Queue helper module
@ -12,11 +12,7 @@ the same Subscriber name in both of them.
""" """
import redis import redis
try: # dirty to support python3 import configparser
import ConfigParser
except:
import configparser
ConfigParser = configparser
import os import os
import zmq import zmq
import time import time
@ -32,7 +28,7 @@ class PubSub(object):
raise Exception('Unable to find the configuration file. \ raise Exception('Unable to find the configuration file. \
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
self.config = ConfigParser.ConfigParser() self.config = configparser.ConfigParser()
self.config.read(configfile) self.config.read(configfile)
self.redis_sub = False self.redis_sub = False
self.zmq_sub = False self.zmq_sub = False
@ -49,7 +45,8 @@ class PubSub(object):
r = redis.StrictRedis( r = redis.StrictRedis(
host=self.config.get('RedisPubSub', 'host'), host=self.config.get('RedisPubSub', 'host'),
port=self.config.get('RedisPubSub', 'port'), port=self.config.get('RedisPubSub', 'port'),
db=self.config.get('RedisPubSub', 'db')) db=self.config.get('RedisPubSub', 'db'),
decode_responses=True)
self.subscribers = r.pubsub(ignore_subscribe_messages=True) self.subscribers = r.pubsub(ignore_subscribe_messages=True)
self.subscribers.psubscribe(channel) self.subscribers.psubscribe(channel)
elif conn_name.startswith('ZMQ'): elif conn_name.startswith('ZMQ'):
@ -61,7 +58,8 @@ class PubSub(object):
for address in addresses.split(','): for address in addresses.split(','):
new_sub = context.socket(zmq.SUB) new_sub = context.socket(zmq.SUB)
new_sub.connect(address) new_sub.connect(address)
new_sub.setsockopt(zmq.SUBSCRIBE, channel) # bytes64 encode bytes to ascii only bytes
new_sub.setsockopt_string(zmq.SUBSCRIBE, channel)
self.subscribers.append(new_sub) self.subscribers.append(new_sub)
def setup_publish(self, conn_name): def setup_publish(self, conn_name):
@ -72,7 +70,8 @@ class PubSub(object):
if conn_name.startswith('Redis'): if conn_name.startswith('Redis'):
r = redis.StrictRedis(host=self.config.get('RedisPubSub', 'host'), r = redis.StrictRedis(host=self.config.get('RedisPubSub', 'host'),
port=self.config.get('RedisPubSub', 'port'), port=self.config.get('RedisPubSub', 'port'),
db=self.config.get('RedisPubSub', 'db')) db=self.config.get('RedisPubSub', 'db'),
decode_responses=True)
self.publishers['Redis'].append((r, channel)) self.publishers['Redis'].append((r, channel))
elif conn_name.startswith('ZMQ'): elif conn_name.startswith('ZMQ'):
context = zmq.Context() context = zmq.Context()
@ -85,10 +84,12 @@ class PubSub(object):
channel_message = m.get('channel') channel_message = m.get('channel')
for p, channel in self.publishers['Redis']: for p, channel in self.publishers['Redis']:
if channel_message is None or channel_message == channel: if channel_message is None or channel_message == channel:
p.publish(channel, m['message']) p.publish(channel, ( m['message']) )
for p, channel in self.publishers['ZMQ']: for p, channel in self.publishers['ZMQ']:
if channel_message is None or channel_message == channel: if channel_message is None or channel_message == channel:
p.send('{} {}'.format(channel, m['message'])) p.send('{} {}'.format(channel, m['message']))
#p.send(b' '.join( [channel, mess] ) )
def subscribe(self): def subscribe(self):
if self.redis_sub: if self.redis_sub:
@ -100,7 +101,7 @@ class PubSub(object):
for sub in self.subscribers: for sub in self.subscribers:
try: try:
msg = sub.recv(zmq.NOBLOCK) msg = sub.recv(zmq.NOBLOCK)
yield msg.split(' ', 1)[1] yield msg.split(b" ", 1)[1]
except zmq.error.Again as e: except zmq.error.Again as e:
time.sleep(0.2) time.sleep(0.2)
pass pass
@ -117,9 +118,9 @@ class Process(object):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
modulesfile = os.path.join(os.environ['AIL_BIN'], 'packages/modules.cfg') modulesfile = os.path.join(os.environ['AIL_BIN'], 'packages/modules.cfg')
self.config = ConfigParser.ConfigParser() self.config = configparser.ConfigParser()
self.config.read(configfile) self.config.read(configfile)
self.modules = ConfigParser.ConfigParser() self.modules = configparser.ConfigParser()
self.modules.read(modulesfile) self.modules.read(modulesfile)
self.subscriber_name = conf_section self.subscriber_name = conf_section
@ -131,11 +132,11 @@ class Process(object):
self.r_temp = redis.StrictRedis( self.r_temp = redis.StrictRedis(
host=self.config.get('RedisPubSub', 'host'), host=self.config.get('RedisPubSub', 'host'),
port=self.config.get('RedisPubSub', 'port'), port=self.config.get('RedisPubSub', 'port'),
db=self.config.get('RedisPubSub', 'db')) db=self.config.get('RedisPubSub', 'db'),
decode_responses=True)
self.moduleNum = os.getpid() self.moduleNum = os.getpid()
def populate_set_in(self): def populate_set_in(self):
# monoproc # monoproc
src = self.modules.get(self.subscriber_name, 'subscribe') src = self.modules.get(self.subscriber_name, 'subscribe')
@ -152,6 +153,7 @@ class Process(object):
self.r_temp.hset('queues', self.subscriber_name, self.r_temp.hset('queues', self.subscriber_name,
int(self.r_temp.scard(in_set))) int(self.r_temp.scard(in_set)))
message = self.r_temp.spop(in_set) message = self.r_temp.spop(in_set)
timestamp = int(time.mktime(datetime.datetime.now().timetuple())) timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes') dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes')
@ -159,37 +161,46 @@ class Process(object):
return None return None
else: else:
try: #try:
if ".gz" in message: if '.gz' in message:
path = message.split(".")[-2].split("/")[-1] path = message.split(".")[-2].split("/")[-1]
#find start of path with AIL_HOME #find start of path with AIL_HOME
index_s = message.find(os.environ['AIL_HOME']) index_s = message.find(os.environ['AIL_HOME'])
#Stop when .gz #Stop when .gz
index_e = message.find(".gz")+3 index_e = message.find(".gz")+3
if(index_s == -1):
complete_path = message[0:index_e]
else:
complete_path = message[index_s:index_e] complete_path = message[index_s:index_e]
else: else:
path = "?" path = "-"
complete_path = "?"
value = str(timestamp) + ", " + path value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path) self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path)
self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
return message return message
except: #except:
path = "?" #print('except')
value = str(timestamp) + ", " + path #path = "?"
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) #value = str(timestamp) + ", " + path
self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?") #self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value)
self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) #self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?")
return message #self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum))
#return message
def populate_set_out(self, msg, channel=None): def populate_set_out(self, msg, channel=None):
# multiproc # multiproc
msg = {'message': msg} msg = {'message': msg}
if channel is not None: if channel is not None:
msg.update({'channel': channel}) msg.update({'channel': channel})
self.r_temp.sadd(self.subscriber_name + 'out', json.dumps(msg))
# bytes64 encode bytes to ascii only bytes
j = json.dumps(msg)
self.r_temp.sadd(self.subscriber_name + 'out', j)
def publish(self): def publish(self):
# monoproc # monoproc
@ -201,6 +212,7 @@ class Process(object):
self.pubsub.setup_publish(name) self.pubsub.setup_publish(name)
while True: while True:
message = self.r_temp.spop(self.subscriber_name + 'out') message = self.r_temp.spop(self.subscriber_name + 'out')
if message is None: if message is None:
time.sleep(1) time.sleep(1)
continue continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -107,10 +107,11 @@ if __name__ == "__main__":
continue continue
docpath = message.split(" ", -1)[-1] docpath = message.split(" ", -1)[-1]
paste = PST.get_p_content() paste = PST.get_p_content()
print "Indexing - "+indexname+" :", docpath print("Indexing - " + indexname + " :", docpath)
if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message #avoid calculating the index's size at each message
if( time.time() - last_refresh > TIME_WAIT):
last_refresh = time.time() last_refresh = time.time()
if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000): if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000):
timestamp = int(time.time()) timestamp = int(time.time())
@ -128,11 +129,11 @@ if __name__ == "__main__":
if indexertype == "whoosh": if indexertype == "whoosh":
indexwriter = ix.writer() indexwriter = ix.writer()
indexwriter.update_document( indexwriter.update_document(
title=unicode(docpath, errors='ignore'), title=docpath,
path=unicode(docpath, errors='ignore'), path=docpath,
content=unicode(paste, errors='ignore')) content=paste)
indexwriter.commit() indexwriter.commit()
except IOError: except IOError:
print "CRC Checksum Failed on :", PST.p_path print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name)) PST.p_source, PST.p_date, PST.p_name))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -15,16 +15,19 @@ RSA private key, certificate messages
import time import time
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process #from bin.packages import Paste
#from bin.Helper import Process
from packages import Paste from packages import Paste
from Helper import Process
def search_key(message): def search_key(paste):
paste = Paste.Paste(message)
content = paste.get_p_content() content = paste.get_p_content()
find = False find = False
if '-----BEGIN PGP MESSAGE-----' in content: if '-----BEGIN PGP MESSAGE-----' in content:
publisher.warning('{} has a PGP enc message'.format(paste.p_name)) publisher.warning('{} has a PGP enc message'.format(paste.p_name))
find = True find = True
if '-----BEGIN CERTIFICATE-----' in content: if '-----BEGIN CERTIFICATE-----' in content:
@ -32,15 +35,40 @@ def search_key(message):
find = True find = True
if '-----BEGIN RSA PRIVATE KEY-----' in content: if '-----BEGIN RSA PRIVATE KEY-----' in content:
publisher.warning('{} has a RSA key message'.format(paste.p_name)) publisher.warning('{} has a RSA private key message'.format(paste.p_name))
print('rsa private key message found')
find = True find = True
if '-----BEGIN PRIVATE KEY-----' in content: if '-----BEGIN PRIVATE KEY-----' in content:
publisher.warning('{} has a private message'.format(paste.p_name)) publisher.warning('{} has a private key message'.format(paste.p_name))
print('private key message found')
find = True find = True
if '-----BEGIN ENCRYPTED PRIVATE KEY-----' in content: if '-----BEGIN ENCRYPTED PRIVATE KEY-----' in content:
publisher.warning('{} has an encrypted private message'.format(paste.p_name)) publisher.warning('{} has an encrypted private key message'.format(paste.p_name))
print('encrypted private key message found')
find = True
if '-----BEGIN OPENSSH PRIVATE KEY-----' in content:
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
print('openssh private key message found')
find = True
if '-----BEGIN OpenVPN Static key V1-----' in content:
publisher.warning('{} has an openssh private key message'.format(paste.p_name))
print('OpenVPN Static key message found')
find = True
if '-----BEGIN DSA PRIVATE KEY-----' in content:
publisher.warning('{} has a dsa private key message'.format(paste.p_name))
find = True
if '-----BEGIN EC PRIVATE KEY-----' in content:
publisher.warning('{} has an ec private key message'.format(paste.p_name))
find = True
if '-----BEGIN PGP PRIVATE KEY BLOCK-----' in content:
publisher.warning('{} has a pgp private key block message'.format(paste.p_name))
find = True find = True
if find : if find :
@ -48,7 +76,9 @@ def search_key(message):
#Send to duplicate #Send to duplicate
p.populate_set_out(message, 'Duplicate') p.populate_set_out(message, 'Duplicate')
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('keys;{}'.format(message), 'alertHandler') msg = ('keys;{}'.format(message))
print(message)
p.populate_set_out( msg, 'alertHandler')
if __name__ == '__main__': if __name__ == '__main__':
@ -77,6 +107,7 @@ if __name__ == '__main__':
continue continue
# Do something with the message from the queue # Do something with the message from the queue
search_key(message) paste = Paste.Paste(message)
search_key(paste)
# (Optional) Send that thing to the next queue # (Optional) Send that thing to the next queue

View file

@ -11,11 +11,11 @@ CYAN="\\033[1;36m"
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; [ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; [ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_LEVELDB" ] && echo "Needs the env var AIL_LEVELDB. Run the script from the virtual environment." && exit 1; [ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_LEVELDB:$PATH export PATH=$AIL_ARDB:$PATH
function helptext { function helptext {
echo -e $YELLOW" echo -e $YELLOW"
@ -40,7 +40,7 @@ function helptext {
(Inside screen Daemons) (Inside screen Daemons)
"$RED" "$RED"
But first of all you'll need to edit few path where you installed But first of all you'll need to edit few path where you installed
your redis & leveldb servers. your redis & ardb servers.
"$DEFAULT" "$DEFAULT"
Usage: Usage:
----- -----
@ -58,33 +58,17 @@ function launching_redis {
screen -S "Redis_AIL" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x' screen -S "Redis_AIL" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x'
sleep 0.1 sleep 0.1
screen -S "Redis_AIL" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x' screen -S "Redis_AIL" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x'
# For Words and curves
sleep 0.1
screen -S "Redis_AIL" -X screen -t "6382" bash -c 'redis-server '$conf_dir'6382.conf ; read x'
} }
function launching_lvldb { function launching_ardb {
lvdbhost='127.0.0.1' conf_dir="${AIL_HOME}/configs/"
lvdbdir="${AIL_HOME}/LEVEL_DB_DATA/"
nb_db=13
db_y=`date +%Y` screen -dmS "ARDB_AIL"
#Verify that a dir with the correct year exists, create it otherwise
if [ ! -d "$lvdbdir$db_y" ]; then
mkdir -p "$db_y"
fi
screen -dmS "LevelDB_AIL"
sleep 0.1 sleep 0.1
echo -e $GREEN"\t* Launching Levels DB servers"$DEFAULT echo -e $GREEN"\t* Launching ARDB servers"$DEFAULT
#Launch a DB for each dir
for pathDir in $lvdbdir*/ ; do
yDir=$(basename "$pathDir")
sleep 0.1 sleep 0.1
screen -S "LevelDB_AIL" -X screen -t "$yDir" bash -c 'redis-leveldb -H '$lvdbhost' -D '$pathDir'/ -P '$yDir' -M '$nb_db'; read x' screen -S "ARDB_AIL" -X screen -t "6382" bash -c 'ardb-server '$conf_dir'6382.conf ; read x'
done
} }
function launching_logs { function launching_logs {
@ -101,12 +85,12 @@ function launching_queues {
sleep 0.1 sleep 0.1
echo -e $GREEN"\t* Launching all the queues"$DEFAULT echo -e $GREEN"\t* Launching all the queues"$DEFAULT
screen -S "Queue_AIL" -X screen -t "Queues" bash -c './launch_queues.py; read x' screen -S "Queue_AIL" -X screen -t "Queues" bash -c 'python3 launch_queues.py; read x'
} }
function launching_scripts { function launching_scripts {
echo -e "\t* Checking configuration" echo -e "\t* Checking configuration"
bash -c "./Update-conf.py" bash -c "python3 Update-conf.py"
exitStatus=$? exitStatus=$?
if [ $exitStatus -ge 1 ]; then if [ $exitStatus -ge 1 ]; then
echo -e $RED"\t* Configuration not up-to-date"$DEFAULT echo -e $RED"\t* Configuration not up-to-date"$DEFAULT
@ -142,6 +126,8 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Mail" bash -c './Mail.py; read x' screen -S "Script_AIL" -X screen -t "Mail" bash -c './Mail.py; read x'
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "ApiKey" bash -c './ApiKey.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x' screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x'
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Credential" bash -c './Credential.py; read x' screen -S "Script_AIL" -X screen -t "Credential" bash -c './Credential.py; read x'
@ -158,6 +144,10 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Keys" bash -c './Keys.py; read x' screen -S "Script_AIL" -X screen -t "Keys" bash -c './Keys.py; read x'
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Base64" bash -c './Base64.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c './Bitcoin.py; read x'
sleep 0.1
screen -S "Script_AIL" -X screen -t "Phone" bash -c './Phone.py; read x' screen -S "Script_AIL" -X screen -t "Phone" bash -c './Phone.py; read x'
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Release" bash -c './Release.py; read x' screen -S "Script_AIL" -X screen -t "Release" bash -c './Release.py; read x'
@ -183,7 +173,10 @@ function shutting_down_redis {
bash -c $redis_dir'redis-cli -p 6380 SHUTDOWN' bash -c $redis_dir'redis-cli -p 6380 SHUTDOWN'
sleep 0.1 sleep 0.1
bash -c $redis_dir'redis-cli -p 6381 SHUTDOWN' bash -c $redis_dir'redis-cli -p 6381 SHUTDOWN'
sleep 0.1 }
function shutting_down_ardb {
redis_dir=${AIL_HOME}/redis/src/
bash -c $redis_dir'redis-cli -p 6382 SHUTDOWN' bash -c $redis_dir'redis-cli -p 6382 SHUTDOWN'
} }
@ -208,12 +201,21 @@ function checking_redis {
flag_redis=1 flag_redis=1
fi fi
sleep 0.1 sleep 0.1
return $flag_redis;
}
function checking_ardb {
flag_ardb=0
redis_dir=${AIL_HOME}/redis/src/
sleep 0.2
bash -c $redis_dir'redis-cli -p 6382 PING | grep "PONG" &> /dev/null' bash -c $redis_dir'redis-cli -p 6382 PING | grep "PONG" &> /dev/null'
if [ ! $? == 0 ]; then if [ ! $? == 0 ]; then
echo -e $RED"\t6382 not ready"$DEFAULT echo -e $RED"\t6382 not ready"$DEFAULT
flag_redis=1 flag_ardb=1
fi fi
return $flag_redis;
return $flag_ardb;
} }
#If no params, display the help #If no params, display the help
@ -223,12 +225,12 @@ helptext;
############### TESTS ################### ############### TESTS ###################
isredis=`screen -ls | egrep '[0-9]+.Redis_AIL' | cut -d. -f1` isredis=`screen -ls | egrep '[0-9]+.Redis_AIL' | cut -d. -f1`
islvldb=`screen -ls | egrep '[0-9]+.LevelDB_AIL' | cut -d. -f1` isardb=`screen -ls | egrep '[0-9]+.ARDB_AIL' | cut -d. -f1`
islogged=`screen -ls | egrep '[0-9]+.Logging_AIL' | cut -d. -f1` islogged=`screen -ls | egrep '[0-9]+.Logging_AIL' | cut -d. -f1`
isqueued=`screen -ls | egrep '[0-9]+.Queue_AIL' | cut -d. -f1` isqueued=`screen -ls | egrep '[0-9]+.Queue_AIL' | cut -d. -f1`
isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1` isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1`
options=("Redis" "LevelDB" "Logs" "Queues" "Scripts" "Killall" "Shutdown" "Update-config") options=("Redis" "Ardb" "Logs" "Queues" "Scripts" "Killall" "Shutdown" "Update-config")
menu() { menu() {
echo "What do you want to Launch?:" echo "What do you want to Launch?:"
@ -259,9 +261,9 @@ for i in ${!options[@]}; do
echo -e $RED"\t* A screen is already launched"$DEFAULT echo -e $RED"\t* A screen is already launched"$DEFAULT
fi fi
;; ;;
LevelDB) Ardb)
if [[ ! $islvldb ]]; then if [[ ! $isardb ]]; then
launching_lvldb; launching_ardb;
else else
echo -e $RED"\t* A screen is already launched"$DEFAULT echo -e $RED"\t* A screen is already launched"$DEFAULT
fi fi
@ -282,12 +284,13 @@ for i in ${!options[@]}; do
;; ;;
Scripts) Scripts)
if [[ ! $isscripted ]]; then if [[ ! $isscripted ]]; then
if checking_redis; then sleep 1
if checking_redis && checking_ardb; then
launching_scripts; launching_scripts;
else else
echo -e $YELLOW"\tScript not started, waiting 3 secondes"$DEFAULT echo -e $YELLOW"\tScript not started, waiting 5 secondes"$DEFAULT
sleep 3 sleep 5
if checking_redis; then if checking_redis && checking_ardb; then
launching_scripts; launching_scripts;
else else
echo -e $RED"\tScript not started"$DEFAULT echo -e $RED"\tScript not started"$DEFAULT
@ -298,14 +301,17 @@ for i in ${!options[@]}; do
fi fi
;; ;;
Killall) Killall)
if [[ $isredis || $islvldb || $islogged || $isqueued || $isscripted ]]; then if [[ $isredis || $isardb || $islogged || $isqueued || $isscripted ]]; then
echo -e $GREEN"Gracefully closing redis servers"$DEFAULT echo -e $GREEN"Gracefully closing redis servers"$DEFAULT
shutting_down_redis; shutting_down_redis;
sleep 0.2
echo -e $GREEN"Gracefully closing ardb servers"$DEFAULT
shutting_down_ardb;
echo -e $GREEN"Killing all"$DEFAULT echo -e $GREEN"Killing all"$DEFAULT
kill $isredis $islvldb $islogged $isqueued $isscripted kill $isredis $isardb $islogged $isqueued $isscripted
sleep 0.2 sleep 0.2
echo -e $ROSE`screen -ls`$DEFAULT echo -e $ROSE`screen -ls`$DEFAULT
echo -e $GREEN"\t* $isredis $islvldb $islogged $isqueued $isscripted killed."$DEFAULT echo -e $GREEN"\t* $isredis $isardb $islogged $isqueued $isscripted killed."$DEFAULT
else else
echo -e $RED"\t* No screen to kill"$DEFAULT echo -e $RED"\t* No screen to kill"$DEFAULT
fi fi

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -61,7 +61,7 @@ if __name__ == '__main__':
while True: while True:
try: try:
message = p.get_from_set() message = p.get_from_set()
print message print(message)
if message is not None: if message is not None:
PST = Paste.Paste(message) PST = Paste.Paste(message)
else: else:
@ -77,8 +77,8 @@ if __name__ == '__main__':
# FIXME Not used. # FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path) PST.store.sadd("Pastes_Objects", PST.p_path)
if lines_infos[1] < args.max: if lines_infos[1] < args.max:
p.populate_set_out(PST.p_path, 'LinesShort') p.populate_set_out( PST.p_path , 'LinesShort')
else: else:
p.populate_set_out(PST.p_path, 'LinesLong') p.populate_set_out( PST.p_path , 'LinesLong')
except IOError: except IOError:
print "CRC Checksum Error on : ", PST.p_path print("CRC Checksum Error on : ", PST.p_path)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -28,12 +28,14 @@ if __name__ == "__main__":
config_section = 'Mail' config_section = 'Mail'
p = Process(config_section) p = Process(config_section)
addr_dns = p.config.get("Mail", "dns")
# REDIS # # REDIS #
r_serv2 = redis.StrictRedis( r_serv2 = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"), host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"), port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db")) db=p.config.getint("Redis_Cache", "db"),
decode_responses=True)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Suscribed to channel mails_categ") publisher.info("Suscribed to channel mails_categ")
@ -56,7 +58,7 @@ if __name__ == "__main__":
if prec_filename is None or filename != prec_filename: if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename) PST = Paste.Paste(filename)
MX_values = lib_refine.checking_MX_record( MX_values = lib_refine.checking_MX_record(
r_serv2, PST.get_regex(email_regex)) r_serv2, PST.get_regex(email_regex), addr_dns)
if MX_values[0] >= 1: if MX_values[0] >= 1:
@ -78,14 +80,14 @@ if __name__ == "__main__":
publisher.info(to_print) publisher.info(to_print)
#Send to ModuleStats #Send to ModuleStats
for mail in MX_values[1]: for mail in MX_values[1]:
print 'mail;{};{};{}'.format(1, mail, PST.p_date) print('mail;{};{};{}'.format(1, mail, PST.p_date))
p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats')
prec_filename = filename prec_filename = filename
else: else:
publisher.debug("Script Mails is Idling 10s") publisher.debug("Script Mails is Idling 10s")
print 'Sleeping' print('Sleeping')
time.sleep(10) time.sleep(10)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The Mixer Module The Mixer Module
@ -35,7 +35,7 @@ import os
import time import time
from pubsublogger import publisher from pubsublogger import publisher
import redis import redis
import ConfigParser import configparser
from Helper import Process from Helper import Process
@ -58,14 +58,15 @@ if __name__ == '__main__':
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# REDIS # # REDIS #
server = redis.StrictRedis( server = redis.StrictRedis(
host=cfg.get("Redis_Mixer_Cache", "host"), host=cfg.get("Redis_Mixer_Cache", "host"),
port=cfg.getint("Redis_Mixer_Cache", "port"), port=cfg.getint("Redis_Mixer_Cache", "port"),
db=cfg.getint("Redis_Mixer_Cache", "db")) db=cfg.getint("Redis_Mixer_Cache", "db"),
decode_responses=True)
# LOGGING # # LOGGING #
publisher.info("Feed Script started to receive & publish.") publisher.info("Feed Script started to receive & publish.")
@ -89,9 +90,13 @@ if __name__ == '__main__':
splitted = message.split() splitted = message.split()
if len(splitted) == 2: if len(splitted) == 2:
complete_paste, gzip64encoded = splitted complete_paste, gzip64encoded = splitted
try: try:
#feeder_name = ( complete_paste.replace("archive/","") ).split("/")[0]
feeder_name, paste_name = complete_paste.split('>') feeder_name, paste_name = complete_paste.split('>')
feeder_name.replace(" ","") feeder_name.replace(" ","")
paste_name = complete_paste
except ValueError as e: except ValueError as e:
feeder_name = "unnamed_feeder" feeder_name = "unnamed_feeder"
paste_name = complete_paste paste_name = complete_paste
@ -106,7 +111,9 @@ if __name__ == '__main__':
duplicated_paste_per_feeder[feeder_name] = 0 duplicated_paste_per_feeder[feeder_name] = 0
relay_message = "{0} {1}".format(paste_name, gzip64encoded) relay_message = "{0} {1}".format(paste_name, gzip64encoded)
digest = hashlib.sha1(gzip64encoded).hexdigest() #relay_message = b" ".join( [paste_name, gzip64encoded] )
digest = hashlib.sha1(gzip64encoded.encode('utf8')).hexdigest()
# Avoid any duplicate coming from any sources # Avoid any duplicate coming from any sources
if operation_mode == 1: if operation_mode == 1:
@ -173,26 +180,26 @@ if __name__ == '__main__':
else: else:
# TODO Store the name of the empty paste inside a Redis-list. # TODO Store the name of the empty paste inside a Redis-list.
print "Empty Paste: not processed" print("Empty Paste: not processed")
publisher.debug("Empty Paste: {0} not processed".format(message)) publisher.debug("Empty Paste: {0} not processed".format(message))
else: else:
print "Empty Queues: Waiting..." print("Empty Queues: Waiting...")
if int(time.time() - time_1) > refresh_time: if int(time.time() - time_1) > refresh_time:
print processed_paste_per_feeder print(processed_paste_per_feeder)
to_print = 'Mixer; ; ; ;mixer_all All_feeders Processed {0} paste(s) in {1}sec'.format(processed_paste, refresh_time) to_print = 'Mixer; ; ; ;mixer_all All_feeders Processed {0} paste(s) in {1}sec'.format(processed_paste, refresh_time)
print to_print print(to_print)
publisher.info(to_print) publisher.info(to_print)
processed_paste = 0 processed_paste = 0
for feeder, count in processed_paste_per_feeder.iteritems(): for feeder, count in processed_paste_per_feeder.items():
to_print = 'Mixer; ; ; ;mixer_{0} {0} Processed {1} paste(s) in {2}sec'.format(feeder, count, refresh_time) to_print = 'Mixer; ; ; ;mixer_{0} {0} Processed {1} paste(s) in {2}sec'.format(feeder, count, refresh_time)
print to_print print(to_print)
publisher.info(to_print) publisher.info(to_print)
processed_paste_per_feeder[feeder] = 0 processed_paste_per_feeder[feeder] = 0
for feeder, count in duplicated_paste_per_feeder.iteritems(): for feeder, count in duplicated_paste_per_feeder.items():
to_print = 'Mixer; ; ; ;mixer_{0} {0} Duplicated {1} paste(s) in {2}sec'.format(feeder, count, refresh_time) to_print = 'Mixer; ; ; ;mixer_{0} {0} Duplicated {1} paste(s) in {2}sec'.format(feeder, count, refresh_time)
print to_print print(to_print)
publisher.info(to_print) publisher.info(to_print)
duplicated_paste_per_feeder[feeder] = 0 duplicated_paste_per_feeder[feeder] = 0

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -20,7 +20,7 @@ import os
import signal import signal
import argparse import argparse
from subprocess import PIPE, Popen from subprocess import PIPE, Popen
import ConfigParser import configparser
import json import json
from terminaltables import AsciiTable from terminaltables import AsciiTable
import textwrap import textwrap
@ -51,7 +51,7 @@ last_refresh = 0
def getPid(module): def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout: for line in p.stdout:
print line print(line)
splittedLine = line.split() splittedLine = line.split()
if 'python2' in splittedLine: if 'python2' in splittedLine:
return int(splittedLine[0]) return int(splittedLine[0])
@ -76,7 +76,7 @@ def cleanRedis():
flag_pid_valid = True flag_pid_valid = True
if not flag_pid_valid: if not flag_pid_valid:
print flag_pid_valid, 'cleaning', pid, 'in', k print(flag_pid_valid, 'cleaning', pid, 'in', k)
server.srem(k, pid) server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k]) printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k])
@ -85,11 +85,11 @@ def cleanRedis():
def kill_module(module, pid): def kill_module(module, pid):
print '' print('')
print '-> trying to kill module:', module print('-> trying to kill module:', module)
if pid is None: if pid is None:
print 'pid was None' print('pid was None')
printarrayGlob.insert(1, [0, module, pid, "PID was None"]) printarrayGlob.insert(1, [0, module, pid, "PID was None"])
printarrayGlob.pop() printarrayGlob.pop()
pid = getPid(module) pid = getPid(module)
@ -102,15 +102,15 @@ def kill_module(module, pid):
try: try:
os.kill(pid, signal.SIGUSR1) os.kill(pid, signal.SIGUSR1)
except OSError: except OSError:
print pid, 'already killed' print(pid, 'already killed')
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"]) printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"])
printarrayGlob.pop() printarrayGlob.pop()
return return
time.sleep(1) time.sleep(1)
if getPid(module) is None: if getPid(module) is None:
print module, 'has been killed' print(module, 'has been killed')
print 'restarting', module, '...' print('restarting', module, '...')
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
@ -119,7 +119,7 @@ def kill_module(module, pid):
printarrayGlob.pop() printarrayGlob.pop()
else: else:
print 'killing failed, retrying...' print('killing failed, retrying...')
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."]) printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."])
printarrayGlob.pop() printarrayGlob.pop()
@ -128,8 +128,8 @@ def kill_module(module, pid):
os.kill(pid, signal.SIGUSR1) os.kill(pid, signal.SIGUSR1)
time.sleep(1) time.sleep(1)
if getPid(module) is None: if getPid(module) is None:
print module, 'has been killed' print(module, 'has been killed')
print 'restarting', module, '...' print('restarting', module, '...')
p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) printarrayGlob.insert(1, [inst_time, module, pid, "Killed"])
@ -137,12 +137,12 @@ def kill_module(module, pid):
printarrayGlob.pop() printarrayGlob.pop()
printarrayGlob.pop() printarrayGlob.pop()
else: else:
print 'killing failed!' print('killing failed!')
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"]) printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"])
printarrayGlob.pop() printarrayGlob.pop()
else: else:
print 'Module does not exist' print('Module does not exist')
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"]) printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"])
printarrayGlob.pop() printarrayGlob.pop()
@ -192,14 +192,15 @@ if __name__ == "__main__":
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# REDIS # # REDIS #
server = redis.StrictRedis( server = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"), host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"), port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
if args.clear == 1: if args.clear == 1:
clearRedisModuleInfo() clearRedisModuleInfo()
@ -227,7 +228,7 @@ if __name__ == "__main__":
printarray1 = [] printarray1 = []
printarray2 = [] printarray2 = []
printarray3 = [] printarray3 = []
for queue, card in server.hgetall("queues").iteritems(): for queue, card in server.hgetall("queues").items():
all_queue.add(queue) all_queue.add(queue)
key = "MODULE_" + queue + "_" key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue keySet = "MODULE_TYPE_" + queue
@ -337,15 +338,15 @@ if __name__ == "__main__":
legend = AsciiTable(legend_array, title="Legend") legend = AsciiTable(legend_array, title="Legend")
legend.column_max_width(1) legend.column_max_width(1)
print legend.table print(legend.table)
print '\n' print('\n')
print t1.table print(t1.table)
print '\n' print('\n')
print t2.table print(t2.table)
print '\n' print('\n')
print t3.table print(t3.table)
print '\n' print('\n')
print t4.table print(t4.table9)
if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5:
lastTime = datetime.datetime.now() lastTime = datetime.datetime.now()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
This module makes statistics for some modules and providers This module makes statistics for some modules and providers
@ -52,10 +52,10 @@ def compute_most_posted(server, message):
# Member set is a list of (value, score) pairs # Member set is a list of (value, score) pairs
if int(member_set[0][1]) < keyword_total_sum: if int(member_set[0][1]) < keyword_total_sum:
#remove min from set and add the new one #remove min from set and add the new one
print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server.zrem(redis_progression_name_set, member_set[0][0]) server.zrem(redis_progression_name_set, member_set[0][0])
server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword)
print redis_progression_name_set print(redis_progression_name_set)
def compute_provider_info(server_trend, server_pasteName, path): def compute_provider_info(server_trend, server_pasteName, path):
@ -94,7 +94,7 @@ def compute_provider_info(server_trend, server_pasteName, path):
# Member set is a list of (value, score) pairs # Member set is a list of (value, score) pairs
if float(member_set[0][1]) < new_avg: if float(member_set[0][1]) < new_avg:
#remove min from set and add the new one #remove min from set and add the new one
print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server_trend.zrem(redis_sum_size_set, member_set[0][0]) server_trend.zrem(redis_sum_size_set, member_set[0][0])
server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider) server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider)
server_trend.zrem(redis_avg_size_name_set, member_set[0][0]) server_trend.zrem(redis_avg_size_name_set, member_set[0][0])
@ -110,7 +110,7 @@ def compute_provider_info(server_trend, server_pasteName, path):
# Member set is a list of (value, score) pairs # Member set is a list of (value, score) pairs
if int(member_set[0][1]) < num_paste: if int(member_set[0][1]) < num_paste:
#remove min from set and add the new one #remove min from set and add the new one
print 'Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')')
server_trend.zrem(member_set[0][0]) server_trend.zrem(member_set[0][0])
server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider) server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider)
@ -133,14 +133,16 @@ if __name__ == '__main__':
# REDIS # # REDIS #
r_serv_trend = redis.StrictRedis( r_serv_trend = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Trending", "host"), host=p.config.get("ARDB_Trending", "host"),
port=p.config.get("Redis_Level_DB_Trending", "port"), port=p.config.get("ARDB_Trending", "port"),
db=p.config.get("Redis_Level_DB_Trending", "db")) db=p.config.get("ARDB_Trending", "db"),
decode_responses=True)
r_serv_pasteName = redis.StrictRedis( r_serv_pasteName = redis.StrictRedis(
host=p.config.get("Redis_Paste_Name", "host"), host=p.config.get("Redis_Paste_Name", "host"),
port=p.config.get("Redis_Paste_Name", "port"), port=p.config.get("Redis_Paste_Name", "port"),
db=p.config.get("Redis_Paste_Name", "db")) db=p.config.get("Redis_Paste_Name", "db"),
decode_responses=True)
# Endless loop getting messages from the input queue # Endless loop getting messages from the input queue
while True: while True:
@ -149,7 +151,7 @@ if __name__ == '__main__':
if message is None: if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section)) publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping' print('sleeping')
time.sleep(20) time.sleep(20)
continue continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \ from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \
@ -10,7 +10,7 @@ from asciimatics.event import Event
from asciimatics.event import KeyboardEvent, MouseEvent from asciimatics.event import KeyboardEvent, MouseEvent
import sys, os import sys, os
import time, datetime import time, datetime
import argparse, ConfigParser import argparse, configparser
import json import json
import redis import redis
import psutil import psutil
@ -497,9 +497,8 @@ MANAGE MODULES AND GET INFOS
def getPid(module): def getPid(module):
p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
for line in p.stdout: for line in p.stdout:
print line
splittedLine = line.split() splittedLine = line.split()
if 'python2' in splittedLine: if 'python3' in splittedLine:
return int(splittedLine[0]) return int(splittedLine[0])
return None return None
@ -517,15 +516,20 @@ def cleanRedis():
proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
try: try:
for line in proc.stdout: for line in proc.stdout:
line = line.decode('utf8')
splittedLine = line.split() splittedLine = line.split()
if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine: if ('python3.5' in splittedLine or 'python3' in splittedLine or 'python' in splittedLine):
moduleCommand = "./"+moduleName + ".py"
moduleCommand2 = moduleName + ".py"
if(moduleCommand in splittedLine or moduleCommand2 in splittedLine):
flag_pid_valid = True flag_pid_valid = True
if not flag_pid_valid: if not flag_pid_valid:
#print flag_pid_valid, 'cleaning', pid, 'in', k #print flag_pid_valid, 'cleaning', pid, 'in', k
server.srem(k, pid) server.srem(k, pid)
inst_time = datetime.datetime.fromtimestamp(int(time.time())) inst_time = datetime.datetime.fromtimestamp(int(time.time()))
log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + k], 0)) log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + (k)], 0))
#Error due to resize, interrupted sys call #Error due to resize, interrupted sys call
except IOError as e: except IOError as e:
@ -601,7 +605,7 @@ def fetchQueueData():
printarray_running = [] printarray_running = []
printarray_idle = [] printarray_idle = []
printarray_notrunning = [] printarray_notrunning = []
for queue, card in server.hgetall("queues").iteritems(): for queue, card in iter(server.hgetall("queues").items()):
all_queue.add(queue) all_queue.add(queue)
key = "MODULE_" + queue + "_" key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue keySet = "MODULE_TYPE_" + queue
@ -609,7 +613,9 @@ def fetchQueueData():
for moduleNum in server.smembers(keySet): for moduleNum in server.smembers(keySet):
value = server.get(key + str(moduleNum)) value = server.get(key + str(moduleNum))
complete_paste_path = server.get(key + str(moduleNum) + "_PATH") complete_paste_path = ( server.get(key + str(moduleNum) + "_PATH") )
if(complete_paste_path is not None):
complete_paste_path = complete_paste_path
COMPLETE_PASTE_PATH_PER_PID[moduleNum] = complete_paste_path COMPLETE_PASTE_PATH_PER_PID[moduleNum] = complete_paste_path
if value is not None: if value is not None:
@ -644,6 +650,7 @@ def fetchQueueData():
cpu_avg = sum(CPU_TABLE[moduleNum])/len(CPU_TABLE[moduleNum]) cpu_avg = sum(CPU_TABLE[moduleNum])/len(CPU_TABLE[moduleNum])
if len(CPU_TABLE[moduleNum]) > args.refresh*10: if len(CPU_TABLE[moduleNum]) > args.refresh*10:
CPU_TABLE[moduleNum].pop() CPU_TABLE[moduleNum].pop()
mem_percent = CPU_OBJECT_TABLE[int(moduleNum)].memory_percent() mem_percent = CPU_OBJECT_TABLE[int(moduleNum)].memory_percent()
except psutil.NoSuchProcess: except psutil.NoSuchProcess:
del CPU_OBJECT_TABLE[int(moduleNum)] del CPU_OBJECT_TABLE[int(moduleNum)]
@ -652,6 +659,7 @@ def fetchQueueData():
cpu_avg = cpu_percent cpu_avg = cpu_percent
mem_percent = 0 mem_percent = 0
except KeyError: except KeyError:
#print('key error2')
try: try:
CPU_OBJECT_TABLE[int(moduleNum)] = psutil.Process(int(moduleNum)) CPU_OBJECT_TABLE[int(moduleNum)] = psutil.Process(int(moduleNum))
cpu_percent = CPU_OBJECT_TABLE[int(moduleNum)].cpu_percent() cpu_percent = CPU_OBJECT_TABLE[int(moduleNum)].cpu_percent()
@ -671,7 +679,7 @@ def fetchQueueData():
printarray_idle.append( ([" <K> ", str(queue), str(moduleNum), str(processed_time_readable), str(path)], moduleNum) ) printarray_idle.append( ([" <K> ", str(queue), str(moduleNum), str(processed_time_readable), str(path)], moduleNum) )
PID_NAME_DICO[int(moduleNum)] = str(queue) PID_NAME_DICO[int(moduleNum)] = str(queue)
array_module_type.sort(lambda x,y: cmp(x[0][4], y[0][4]), reverse=True) #Sort by num of pastes #array_module_type.sort(lambda x,y: cmp(x[0][4], y[0][4]), reverse=True) #Sort by num of pastes
for e in array_module_type: for e in array_module_type:
printarray_running.append(e) printarray_running.append(e)
@ -715,6 +723,7 @@ def format_string(tab, padding_row):
text="" text=""
for ite, elem in enumerate(the_array): for ite, elem in enumerate(the_array):
if len(elem) > padding_row[ite]: if len(elem) > padding_row[ite]:
text += "*" + elem[-padding_row[ite]+6:] text += "*" + elem[-padding_row[ite]+6:]
padd_off = " "*5 padd_off = " "*5
@ -761,7 +770,7 @@ def demo(screen):
if time.time() - time_cooldown > args.refresh: if time.time() - time_cooldown > args.refresh:
cleanRedis() cleanRedis()
for key, val in fetchQueueData().iteritems(): #fetch data and put it into the tables for key, val in iter(fetchQueueData().items()): #fetch data and put it into the tables
TABLES[key] = val TABLES[key] = val
TABLES["logs"] = format_string(printarrayLog, TABLES_PADDING["logs"]) TABLES["logs"] = format_string(printarrayLog, TABLES_PADDING["logs"])
@ -790,14 +799,15 @@ if __name__ == "__main__":
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# REDIS # # REDIS #
server = redis.StrictRedis( server = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"), host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"), port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
if args.clear == 1: if args.clear == 1:
clearRedisModuleInfo() clearRedisModuleInfo()

View file

@ -1,10 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import ConfigParser import configparser
import os import os
import smtplib import smtplib
from email.MIMEMultipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
""" """
@ -28,7 +28,7 @@ def sendEmailNotification(recipient, term):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv?') Or activate the virtualenv?')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
sender = cfg.get("Notifications", "sender"), sender = cfg.get("Notifications", "sender"),
@ -76,8 +76,5 @@ def sendEmailNotification(recipient, term):
smtp_server.quit() smtp_server.quit()
except Exception as e: except Exception as e:
print str(e) print(str(e))
# raise e # raise e

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The ZMQ_Sub_Onion Module The ZMQ_Sub_Onion Module
@ -37,11 +37,12 @@ from Helper import Process
def fetch(p, r_cache, urls, domains, path): def fetch(p, r_cache, urls, domains, path):
failed = [] failed = []
downloaded = [] downloaded = []
print len(urls), 'Urls to fetch.' print('{} Urls to fetch'.format(len(urls)))
for url, domain in zip(urls, domains): for url, domain in zip(urls, domains):
if r_cache.exists(url) or url in failed: if r_cache.exists(url) or url in failed:
continue continue
to_fetch = base64.standard_b64encode(url) to_fetch = base64.standard_b64encode(url.encode('utf8'))
print('fetching url: {}'.format(to_fetch))
process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch], process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch],
stdout=subprocess.PIPE) stdout=subprocess.PIPE)
while process.poll() is None: while process.poll() is None:
@ -51,8 +52,10 @@ def fetch(p, r_cache, urls, domains, path):
r_cache.setbit(url, 0, 1) r_cache.setbit(url, 0, 1)
r_cache.expire(url, 360000) r_cache.expire(url, 360000)
downloaded.append(url) downloaded.append(url)
tempfile = process.stdout.read().strip() print('downloaded : {}'.format(downloaded))
with open(tempfile, 'r') as f: '''tempfile = process.stdout.read().strip()
tempfile = tempfile.decode('utf8')
#with open(tempfile, 'r') as f:
filename = path + domain + '.gz' filename = path + domain + '.gz'
fetched = f.read() fetched = f.read()
content = base64.standard_b64decode(fetched) content = base64.standard_b64decode(fetched)
@ -66,16 +69,16 @@ def fetch(p, r_cache, urls, domains, path):
ff.write(content) ff.write(content)
p.populate_set_out(save_path, 'Global') p.populate_set_out(save_path, 'Global')
p.populate_set_out(url, 'ValidOnion') p.populate_set_out(url, 'ValidOnion')
p.populate_set_out(fetched, 'FetchedOnion') p.populate_set_out(fetched, 'FetchedOnion')'''
yield url yield url
os.unlink(tempfile) #os.unlink(tempfile)
else: else:
r_cache.setbit(url, 0, 0) r_cache.setbit(url, 0, 0)
r_cache.expire(url, 3600) r_cache.expire(url, 3600)
failed.append(url) failed.append(url)
print 'Failed at downloading', url print('Failed at downloading', url)
print process.stdout.read() print(process.stdout.read())
print 'Failed:', len(failed), 'Downloaded:', len(downloaded) print('Failed:', len(failed), 'Downloaded:', len(downloaded))
if __name__ == "__main__": if __name__ == "__main__":
@ -91,7 +94,8 @@ if __name__ == "__main__":
r_cache = redis.StrictRedis( r_cache = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"), host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"), port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db")) db=p.config.getint("Redis_Cache", "db"),
decode_responses=True)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script subscribed to channel onion_categ") publisher.info("Script subscribed to channel onion_categ")
@ -109,7 +113,7 @@ if __name__ == "__main__":
while True: while True:
if message is not None: if message is not None:
print message print(message)
filename, score = message.split() filename, score = message.split()
# "For each new paste" # "For each new paste"
@ -131,6 +135,8 @@ if __name__ == "__main__":
PST.save_attribute_redis(channel, domains_list) PST.save_attribute_redis(channel, domains_list)
to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date,
PST.p_name) PST.p_name)
print(len(domains_list))
if len(domains_list) > 0: if len(domains_list) > 0:
publisher.warning('{}Detected {} .onion(s);{}'.format( publisher.warning('{}Detected {} .onion(s);{}'.format(
@ -144,7 +150,7 @@ if __name__ == "__main__":
PST.p_date, PST.p_date,
PST.p_name) PST.p_name)
for url in fetch(p, r_cache, urls, domains_list, path): for url in fetch(p, r_cache, urls, domains_list, path):
publisher.warning('{}Checked {};{}'.format(to_print, url, PST.p_path)) publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_path))
p.populate_set_out('onion;{}'.format(PST.p_path), 'alertHandler') p.populate_set_out('onion;{}'.format(PST.p_path), 'alertHandler')
else: else:
publisher.info('{}Onion related;{}'.format(to_print, PST.p_path)) publisher.info('{}Onion related;{}'.format(to_print, PST.p_path))
@ -152,6 +158,6 @@ if __name__ == "__main__":
prec_filename = filename prec_filename = filename
else: else:
publisher.debug("Script url is Idling 10s") publisher.debug("Script url is Idling 10s")
print 'Sleeping' #print('Sleeping')
time.sleep(10) time.sleep(10)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -30,10 +30,11 @@ def search_phone(message):
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers # if the list is greater than 4, we consider the Paste may contain a list of phone numbers
if len(results) > 4: if len(results) > 4:
print results print(results)
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('phone;{}'.format(message), 'alertHandler') msg = 'phone;{}'.format(message)
p.populate_set_out(msg, 'alertHandler')
#Send to duplicate #Send to duplicate
p.populate_set_out(message, 'Duplicate') p.populate_set_out(message, 'Duplicate')
stats = {} stats = {}

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
from pubsublogger import publisher from pubsublogger import publisher

View file

@ -1,9 +1,9 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis import redis
import argparse import argparse
import ConfigParser import configparser
import time import time
import os import os
from pubsublogger import publisher from pubsublogger import publisher
@ -14,7 +14,7 @@ def main():
"""Main Function""" """Main Function"""
# CONFIG # # CONFIG #
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg') cfg.read('./packages/config.cfg')
# SCRIPT PARSER # # SCRIPT PARSER #
@ -30,7 +30,8 @@ def main():
r_serv = redis.StrictRedis( r_serv = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"), host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"), port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
# LOGGING # # LOGGING #
publisher.port = 6380 publisher.port = 6380
@ -49,7 +50,7 @@ def main():
row.sort() row.sort()
table.add_rows(row, header=False) table.add_rows(row, header=False)
os.system('clear') os.system('clear')
print table.draw() print(table.draw())
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
This Module is used for term frequency. This Module is used for term frequency.
@ -54,9 +54,10 @@ if __name__ == "__main__":
# REDIS # # REDIS #
server_term = redis.StrictRedis( server_term = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermFreq", "host"), host=p.config.get("ARDB_TermFreq", "host"),
port=p.config.get("Redis_Level_DB_TermFreq", "port"), port=p.config.get("ARDB_TermFreq", "port"),
db=p.config.get("Redis_Level_DB_TermFreq", "db")) db=p.config.get("ARDB_TermFreq", "db"),
decode_responses=True)
# FUNCTIONS # # FUNCTIONS #
publisher.info("RegexForTermsFrequency script started") publisher.info("RegexForTermsFrequency script started")
@ -115,6 +116,6 @@ if __name__ == "__main__":
else: else:
publisher.debug("Script RegexForTermsFrequency is Idling") publisher.debug("Script RegexForTermsFrequency is Idling")
print "sleeping" print("sleeping")
time.sleep(5) time.sleep(5)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import time import time
from packages import Paste from packages import Paste
@ -6,6 +6,16 @@ from pubsublogger import publisher
from Helper import Process from Helper import Process
import re import re
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
''' '''
This module takes its input from the global module. This module takes its input from the global module.
It applies some regex and publish matched content It applies some regex and publish matched content
@ -16,6 +26,7 @@ if __name__ == "__main__":
publisher.channel = "Script" publisher.channel = "Script"
config_section = "Release" config_section = "Release"
p = Process(config_section) p = Process(config_section)
max_execution_time = p.config.getint("Curve", "max_execution_time")
publisher.info("Release scripts to find release names") publisher.info("Release scripts to find release names")
movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+" movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
@ -29,18 +40,28 @@ if __name__ == "__main__":
filepath = p.get_from_set() filepath = p.get_from_set()
if filepath is None: if filepath is None:
publisher.debug("Script Release is Idling 10s") publisher.debug("Script Release is Idling 10s")
print 'Sleeping' print('Sleeping')
time.sleep(10) time.sleep(10)
continue continue
paste = Paste.Paste(filepath) paste = Paste.Paste(filepath)
content = paste.get_p_content() content = paste.get_p_content()
signal.alarm(max_execution_time)
try:
releases = set(re.findall(regex, content)) releases = set(re.findall(regex, content))
if len(releases) == 0: if len(releases) == 0:
continue continue
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path) to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path)
print(to_print)
if len(releases) > 30: if len(releases) > 30:
publisher.warning(to_print) publisher.warning(to_print)
else: else:
publisher.info(to_print) publisher.info(to_print)
except TimeoutException:
print ("{0} processing timeout".format(paste.p_path))
continue
else:
signal.alarm(0)

View file

@ -1,9 +1,9 @@
#!/usr/bin/python2.7 #!/usr/bin/python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis import redis
import argparse import argparse
import ConfigParser import configparser
from datetime import datetime from datetime import datetime
from pubsublogger import publisher from pubsublogger import publisher
@ -14,7 +14,7 @@ def main():
"""Main Function""" """Main Function"""
# CONFIG # # CONFIG #
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read('./packages/config.cfg') cfg.read('./packages/config.cfg')
# SCRIPT PARSER # # SCRIPT PARSER #
@ -33,9 +33,10 @@ def main():
# port generated automatically depending on the date # port generated automatically depending on the date
curYear = datetime.now().year if args.year is None else args.year curYear = datetime.now().year if args.year is None else args.year
r_serv = redis.StrictRedis( r_serv = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_Hashs", "host"), host=cfg.get("ARDB_Hashs", "host"),
port=curYear, port=cfg.getint("ARDB_Hashs", "port"),
db=cfg.getint("Redis_Level_DB_Hashs", "db")) db=curYear,
decode_responses=True)
# LOGGING # # LOGGING #
publisher.port = 6380 publisher.port = 6380

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -13,7 +13,7 @@ It test different possibility to makes some sqlInjection.
import time import time
import string import string
import urllib2 import urllib.request
import re import re
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
@ -66,16 +66,16 @@ def analyse(url, path):
result_query = 0 result_query = 0
if resource_path is not None: if resource_path is not None:
result_path = is_sql_injection(resource_path) result_path = is_sql_injection(resource_path.decode('utf8'))
if query_string is not None: if query_string is not None:
result_query = is_sql_injection(query_string) result_query = is_sql_injection(query_string.decode('utf8'))
if (result_path > 0) or (result_query > 0): if (result_path > 0) or (result_query > 0):
paste = Paste.Paste(path) paste = Paste.Paste(path)
if (result_path > 1) or (result_query > 1): if (result_path > 1) or (result_query > 1):
print "Detected SQL in URL: " print("Detected SQL in URL: ")
print urllib2.unquote(url) print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path)
publisher.warning(to_print) publisher.warning(to_print)
#Send to duplicate #Send to duplicate
@ -83,8 +83,8 @@ def analyse(url, path):
#send to Browse_warning_paste #send to Browse_warning_paste
p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler') p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler')
else: else:
print "Potential SQL injection:" print("Potential SQL injection:")
print urllib2.unquote(url) print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path)
publisher.info(to_print) publisher.info(to_print)
@ -92,8 +92,8 @@ def analyse(url, path):
# Try to detect if the url passed might be an sql injection by appliying the regex # Try to detect if the url passed might be an sql injection by appliying the regex
# defined above on it. # defined above on it.
def is_sql_injection(url_parsed): def is_sql_injection(url_parsed):
line = urllib2.unquote(url_parsed) line = urllib.request.unquote(url_parsed)
line = string.upper(line) line = str.upper(line)
result = [] result = []
result_suspect = [] result_suspect = []
@ -104,20 +104,20 @@ def is_sql_injection(url_parsed):
for word_list in word_injection: for word_list in word_injection:
for word in word_list: for word in word_list:
temp_res = string.find(line, string.upper(word)) temp_res = str.find(line, str.upper(word))
if temp_res!=-1: if temp_res!=-1:
result.append(line[temp_res:temp_res+len(word)]) result.append(line[temp_res:temp_res+len(word)])
for word in word_injection_suspect: for word in word_injection_suspect:
temp_res = string.find(line, string.upper(word)) temp_res = str.find(line, str.upper(word))
if temp_res!=-1: if temp_res!=-1:
result_suspect.append(line[temp_res:temp_res+len(word)]) result_suspect.append(line[temp_res:temp_res+len(word)])
if len(result)>0: if len(result)>0:
print result print(result)
return 2 return 2
elif len(result_suspect)>0: elif len(result_suspect)>0:
print result_suspect print(result_suspect)
return 1 return 1
else: else:
return 0 return 0

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
Sentiment analyser module. Sentiment analyser module.
@ -33,7 +33,7 @@ size_threshold = 250
line_max_length_threshold = 1000 line_max_length_threshold = 1000
import os import os
import ConfigParser import configparser
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile): if not os.path.exists(configfile):
@ -41,7 +41,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file") sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
@ -69,7 +69,7 @@ def Analyse(message, server):
combined_datetime = datetime.datetime.combine(the_date, the_time) combined_datetime = datetime.datetime.combine(the_date, the_time)
timestamp = calendar.timegm(combined_datetime.timetuple()) timestamp = calendar.timegm(combined_datetime.timetuple())
sentences = tokenize.sent_tokenize(p_content.decode('utf-8', 'ignore')) sentences = tokenize.sent_tokenize(p_content)
if len(sentences) > 0: if len(sentences) > 0:
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
@ -109,11 +109,11 @@ def Analyse(message, server):
provider_timestamp = provider + '_' + str(timestamp) provider_timestamp = provider + '_' + str(timestamp)
server.incr('UniqID') server.incr('UniqID')
UniqID = server.get('UniqID') UniqID = server.get('UniqID')
print provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines' print(provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines')
server.sadd(provider_timestamp, UniqID) server.sadd(provider_timestamp, UniqID)
server.set(UniqID, avg_score) server.set(UniqID, avg_score)
else: else:
print 'Dropped:', p_MimeType print('Dropped:', p_MimeType)
def isJSON(content): def isJSON(content):
@ -121,7 +121,7 @@ def isJSON(content):
json.loads(content) json.loads(content)
return True return True
except Exception,e: except Exception:
return False return False
import signal import signal
@ -152,9 +152,10 @@ if __name__ == '__main__':
# REDIS_LEVEL_DB # # REDIS_LEVEL_DB #
server = redis.StrictRedis( server = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Sentiment", "host"), host=p.config.get("ARDB_Sentiment", "host"),
port=p.config.get("Redis_Level_DB_Sentiment", "port"), port=p.config.get("ARDB_Sentiment", "port"),
db=p.config.get("Redis_Level_DB_Sentiment", "db")) db=p.config.get("ARDB_Sentiment", "db"),
decode_responses=True)
while True: while True:
message = p.get_from_set() message = p.get_from_set()
@ -170,4 +171,3 @@ if __name__ == '__main__':
continue continue
else: else:
signal.alarm(0) signal.alarm(0)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
This Module is used for term frequency. This Module is used for term frequency.
@ -52,9 +52,10 @@ if __name__ == "__main__":
# REDIS # # REDIS #
server_term = redis.StrictRedis( server_term = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_TermFreq", "host"), host=p.config.get("ARDB_TermFreq", "host"),
port=p.config.get("Redis_Level_DB_TermFreq", "port"), port=p.config.get("ARDB_TermFreq", "port"),
db=p.config.get("Redis_Level_DB_TermFreq", "db")) db=p.config.get("ARDB_TermFreq", "db"),
decode_responses=True)
# FUNCTIONS # # FUNCTIONS #
publisher.info("RegexForTermsFrequency script started") publisher.info("RegexForTermsFrequency script started")
@ -126,6 +127,6 @@ if __name__ == "__main__":
else: else:
publisher.debug("Script RegexForTermsFrequency is Idling") publisher.debug("Script RegexForTermsFrequency is Idling")
print "sleeping" print("sleeping")
time.sleep(5) time.sleep(5)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The ZMQ_Feed_Q Module The ZMQ_Feed_Q Module
@ -21,7 +21,7 @@ Requirements
""" """
import redis import redis
import ConfigParser import configparser
import os import os
configfile = os.path.join(os.environ['AIL_BIN'], './packages/config.cfg') configfile = os.path.join(os.environ['AIL_BIN'], './packages/config.cfg')
@ -31,13 +31,14 @@ def main():
"""Main Function""" """Main Function"""
# CONFIG # # CONFIG #
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# REDIS # REDIS
r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"), port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
# FIXME: automatic based on the queue name. # FIXME: automatic based on the queue name.
# ### SCRIPTS #### # ### SCRIPTS ####

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import time import time
from packages import Paste from packages import Paste
@ -29,7 +29,7 @@ if __name__ == "__main__":
languages = [shell, c, php, bash, python, javascript, bash, ruby, adr] languages = [shell, c, php, bash, python, javascript, bash, ruby, adr]
regex = '|'.join(languages) regex = '|'.join(languages)
print regex print(regex)
while True: while True:
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
The Tokenize Module The Tokenize Module
@ -50,7 +50,7 @@ if __name__ == "__main__":
while True: while True:
message = p.get_from_set() message = p.get_from_set()
print message print(message)
if message is not None: if message is not None:
paste = Paste.Paste(message) paste = Paste.Paste(message)
signal.alarm(5) signal.alarm(5)
@ -67,4 +67,4 @@ if __name__ == "__main__":
else: else:
publisher.debug("Tokeniser is idling 10s") publisher.debug("Tokeniser is idling 10s")
time.sleep(10) time.sleep(10)
print "sleepin" print("Sleeping")

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import ConfigParser import configparser
from ConfigParser import ConfigParser as cfgP from configparser import ConfigParser as cfgP
import os import os
from collections import OrderedDict from collections import OrderedDict
import sys import sys
@ -20,9 +20,9 @@ def main():
Or activate the virtualenv.') Or activate the virtualenv.')
configfileSample = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample') configfileSample = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
cfgSample = ConfigParser.ConfigParser() cfgSample = configparser.ConfigParser()
cfgSample.read(configfileSample) cfgSample.read(configfileSample)
sections = cfgP.sections(cfg) sections = cfgP.sections(cfg)
@ -63,12 +63,12 @@ def main():
print(" - "+item[0]) print(" - "+item[0])
print("+--------------------------------------------------------------------+") print("+--------------------------------------------------------------------+")
resp = raw_input("Do you want to auto fix it? [y/n] ") resp = input("Do you want to auto fix it? [y/n] ")
if resp != 'y': if resp != 'y':
return False return False
else: else:
resp2 = raw_input("Do you want to keep a backup of the old configuration file? [y/n] ") resp2 = input("Do you want to keep a backup of the old configuration file? [y/n] ")
if resp2 == 'y': if resp2 == 'y':
shutil.move(configfile, configfileBackup) shutil.move(configfile, configfileBackup)
@ -109,4 +109,3 @@ if __name__ == "__main__":
sys.exit() sys.exit()
else: else:
sys.exit(1) sys.exit(1)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -47,7 +47,8 @@ if __name__ == "__main__":
r_serv2 = redis.StrictRedis( r_serv2 = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"), host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"), port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db")) db=p.config.getint("Redis_Cache", "db"),
decode_responses=True)
# Protocol file path # Protocol file path
protocolsfile_path = os.path.join(os.environ['AIL_HOME'], protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
@ -95,17 +96,23 @@ if __name__ == "__main__":
subdomain = faup.get_subdomain() subdomain = faup.get_subdomain()
f1 = None f1 = None
domains_list.append(domain)
publisher.debug('{} Published'.format(url)) publisher.debug('{} Published'.format(url))
if f1 == "onion": if f1 == "onion":
print domain print(domain)
if subdomain is not None:
subdomain = subdomain.decode('utf8')
if domain is not None:
domain = domain.decode('utf8')
domains_list.append(domain)
hostl = avoidNone(subdomain) + avoidNone(domain)
hostl = unicode(avoidNone(subdomain)+avoidNone(domain))
try: try:
socket.setdefaulttimeout(1) socket.setdefaulttimeout(1)
ip = socket.gethostbyname(unicode(hostl)) ip = socket.gethostbyname(hostl)
except: except:
# If the resolver is not giving any IPv4 address, # If the resolver is not giving any IPv4 address,
# ASN/CC lookup is skip. # ASN/CC lookup is skip.
@ -113,32 +120,36 @@ if __name__ == "__main__":
try: try:
l = client.lookup(ip, qType='IP') l = client.lookup(ip, qType='IP')
except ipaddress.AddressValueError: except ipaddress.AddressValueError:
continue continue
cc = getattr(l, 'cc') cc = getattr(l, 'cc')
asn = getattr(l, 'asn') if getattr(l, 'asn') is not None:
asn = getattr(l, 'asn')[2:] #remobe b'
# EU is not an official ISO 3166 code (but used by RIPE # EU is not an official ISO 3166 code (but used by RIPE
# IP allocation) # IP allocation)
if cc is not None and cc != "EU": if cc is not None and cc != "EU":
print hostl, asn, cc, \ print(hostl, asn, cc, \
pycountry.countries.get(alpha_2=cc).name pycountry.countries.get(alpha_2=cc).name)
if cc == cc_critical: if cc == cc_critical:
to_print = 'Url;{};{};{};Detected {} {}'.format( to_print = 'Url;{};{};{};Detected {} {}'.format(
PST.p_source, PST.p_date, PST.p_name, PST.p_source, PST.p_date, PST.p_name,
hostl, cc) hostl, cc)
#publisher.warning(to_print) #publisher.warning(to_print)
print to_print print(to_print)
else: else:
print hostl, asn, cc print(hostl, asn, cc)
A_values = lib_refine.checking_A_record(r_serv2, A_values = lib_refine.checking_A_record(r_serv2,
domains_list) domains_list)
if A_values[0] >= 1: if A_values[0] >= 1:
PST.__setattr__(channel, A_values) PST.__setattr__(channel, A_values)
PST.save_attribute_redis(channel, (A_values[0], PST.save_attribute_redis(channel, (A_values[0],
list(A_values[1]))) list(A_values[1])))
pprint.pprint(A_values) pprint.pprint(A_values)
publisher.info('Url;{};{};{};Checked {} URL;{}'.format( publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path)) PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path))
@ -146,7 +157,7 @@ if __name__ == "__main__":
else: else:
publisher.debug("Script url is Idling 10s") publisher.debug("Script url is Idling 10s")
print 'Sleeping' print('Sleeping')
time.sleep(10) time.sleep(10)
message = p.get_from_set() message = p.get_from_set()

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -29,11 +29,12 @@ num_day_to_look = 5 # the detection of the progression start num_day_to_lo
def analyse(server, field_name, date, url_parsed): def analyse(server, field_name, date, url_parsed):
field = url_parsed[field_name] field = url_parsed[field_name]
if field is not None: if field is not None:
field = field.decode('utf8')
server.hincrby(field, date, 1) server.hincrby(field, date, 1)
if field_name == "domain": #save domain in a set for the monthly plot if field_name == "domain": #save domain in a set for the monthly plot
domain_set_name = "domain_set_" + date[0:6] domain_set_name = "domain_set_" + date[0:6]
server.sadd(domain_set_name, field) server.sadd(domain_set_name, field)
print "added in " + domain_set_name +": "+ field print("added in " + domain_set_name +": "+ field)
def get_date_range(num_day): def get_date_range(num_day):
curr_date = datetime.date.today() curr_date = datetime.date.today()
@ -113,9 +114,10 @@ if __name__ == '__main__':
# REDIS # # REDIS #
r_serv_trend = redis.StrictRedis( r_serv_trend = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Trending", "host"), host=p.config.get("ARDB_Trending", "host"),
port=p.config.get("Redis_Level_DB_Trending", "port"), port=p.config.get("ARDB_Trending", "port"),
db=p.config.get("Redis_Level_DB_Trending", "db")) db=p.config.get("ARDB_Trending", "db"),
decode_responses=True)
# FILE CURVE SECTION # # FILE CURVE SECTION #
csv_path_proto = os.path.join(os.environ['AIL_HOME'], csv_path_proto = os.path.join(os.environ['AIL_HOME'],
@ -145,24 +147,25 @@ if __name__ == '__main__':
year = today.year year = today.year
month = today.month month = today.month
print 'Building protocol graph' print('Building protocol graph')
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto, lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto,
protocolsfile_path, year, protocolsfile_path, year,
month) month)
print 'Building tld graph' print('Building tld graph')
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld, lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld,
tldsfile_path, year, tldsfile_path, year,
month) month)
print 'Building domain graph' print('Building domain graph')
lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain, lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain,
"domain", year, "domain", year,
month) month)
print 'end building' print('end building')
publisher.debug("{} queue is empty, waiting".format(config_section)) publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping' print('sleeping')
time.sleep(5*60) time.sleep(5*60)
continue continue
@ -173,9 +176,13 @@ if __name__ == '__main__':
faup.decode(url) faup.decode(url)
url_parsed = faup.get() url_parsed = faup.get()
analyse(r_serv_trend, 'scheme', date, url_parsed) #Scheme analysis # Scheme analysis
analyse(r_serv_trend, 'tld', date, url_parsed) #Tld analysis analyse(r_serv_trend, 'scheme', date, url_parsed)
analyse(r_serv_trend, 'domain', date, url_parsed) #Domain analysis # Tld analysis
analyse(r_serv_trend, 'tld', date, url_parsed)
# Domain analysis
analyse(r_serv_trend, 'domain', date, url_parsed)
compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python3.5 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
from pymisp.tools.abstractgenerator import AbstractMISPObjectGenerator from pymisp.tools.abstractgenerator import AbstractMISPObjectGenerator
@ -15,7 +15,7 @@ class AilleakObject(AbstractMISPObjectGenerator):
self._p_source = p_source.split('/')[-5:] self._p_source = p_source.split('/')[-5:]
self._p_source = '/'.join(self._p_source)[:-3] # -3 removes .gz self._p_source = '/'.join(self._p_source)[:-3] # -3 removes .gz
self._p_date = p_date self._p_date = p_date
self._p_content = p_content.encode('utf8') self._p_content = p_content
self._p_duplicate = p_duplicate self._p_duplicate = p_duplicate
self._p_duplicate_number = p_duplicate_number self._p_duplicate_number = p_duplicate_number
self.generate_attributes() self.generate_attributes()
@ -45,13 +45,10 @@ class ObjectWrapper:
self.paste = Paste.Paste(path) self.paste = Paste.Paste(path)
self.p_date = self.date_to_str(self.paste.p_date) self.p_date = self.date_to_str(self.paste.p_date)
self.p_source = self.paste.p_path self.p_source = self.paste.p_path
self.p_content = self.paste.get_p_content().decode('utf8') self.p_content = self.paste.get_p_content()
temp = self.paste._get_p_duplicate() temp = self.paste._get_p_duplicate()
try:
temp = temp.decode('utf8')
except AttributeError:
pass
#beautifier #beautifier
temp = json.loads(temp) temp = json.loads(temp)
self.p_duplicate_number = len(temp) if len(temp) >= 0 else 0 self.p_duplicate_number = len(temp) if len(temp) >= 0 else 0

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python3.5 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -52,9 +52,10 @@ if __name__ == "__main__":
# port generated automatically depending on the date # port generated automatically depending on the date
curYear = datetime.now().year curYear = datetime.now().year
server = redis.StrictRedis( server = redis.StrictRedis(
host=p.config.get("Redis_Level_DB", "host"), host=p.config.get("ARDB_DB", "host"),
port=curYear, port=p.config.get("ARDB_DB", "port"),
db=p.config.get("Redis_Level_DB", "db")) db=curYear,
decode_responses=True)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Script duplicate started") publisher.info("Script duplicate started")
@ -62,8 +63,8 @@ if __name__ == "__main__":
while True: while True:
message = p.get_from_set() message = p.get_from_set()
if message is not None: if message is not None:
message = message.decode('utf8') #decode because of pyhton3
module_name, p_path = message.split(';') module_name, p_path = message.split(';')
print("new alert : {}".format(module_name))
#PST = Paste.Paste(p_path) #PST = Paste.Paste(p_path)
else: else:
publisher.debug("Script Attribute is idling 10s") publisher.debug("Script Attribute is idling 10s")

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
@ -27,10 +27,9 @@ if __name__ == "__main__":
config_section = ['Curve'] config_section = ['Curve']
for queue in config_section: for queue in config_section:
print 'dropping: ' + queue print('dropping: ' + queue)
p = Process(queue) p = Process(queue)
while True: while True:
message = p.get_from_set() message = p.get_from_set()
if message is None: if message is None:
break break

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# This file is part of AIL framework - Analysis Information Leak framework # This file is part of AIL framework - Analysis Information Leak framework
@ -25,7 +25,7 @@ import time
import redis import redis
import base64 import base64
import os import os
import ConfigParser import configparser
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile): if not os.path.exists(configfile):
@ -33,7 +33,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
if cfg.has_option("ZMQ_Global", "bind"): if cfg.has_option("ZMQ_Global", "bind"):
@ -50,7 +50,7 @@ socket = context.socket(zmq.PUB)
socket.bind(zmq_url) socket.bind(zmq_url)
# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 # check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
r = redis.StrictRedis(host='localhost', db=10) r = redis.StrictRedis(host='localhost', db=10, decode_responses=True)
# 101 pastes processed feed # 101 pastes processed feed
# 102 raw pastes feed # 102 raw pastes feed
@ -59,6 +59,7 @@ while True:
time.sleep(base_sleeptime + sleep_inc) time.sleep(base_sleeptime + sleep_inc)
topic = 101 topic = 101
paste = r.lpop("pastes") paste = r.lpop("pastes")
print(paste)
if paste is None: if paste is None:
continue continue
socket.send("%d %s" % (topic, paste)) socket.send("%d %s" % (topic, paste))

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# This file is part of AIL framework - Analysis Information Leak framework # This file is part of AIL framework - Analysis Information Leak framework
@ -24,9 +24,10 @@ socket.setsockopt(zmq.SUBSCRIBE, topicfilter)
while True: while True:
message = socket.recv() message = socket.recv()
print('b1')
print (message) print (message)
if topicfilter == "102": if topicfilter == "102":
topic, paste, messagedata = message.split() topic, paste, messagedata = message.split()
print paste, messagedata print(paste, messagedata)
else: else:
print (message) print (message)

View file

@ -1,9 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import zmq import zmq
import base64 import base64
import StringIO from io import StringIO
import gzip import gzip
import argparse import argparse
import os import os
@ -31,8 +31,7 @@ import mimetypes
' '
''' '''
import StringIO
import gzip
def is_hierachy_valid(path): def is_hierachy_valid(path):
var = path.split('/') var = path.split('/')
try: try:
@ -72,7 +71,12 @@ if __name__ == "__main__":
wanted_path = wanted_path.split('/') wanted_path = wanted_path.split('/')
wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):]) wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):])
messagedata = open(complete_path).read() with gzip.open(complete_path, 'rb') as f:
messagedata = f.read()
#print(type(complete_path))
#file = open(complete_path)
#messagedata = file.read()
#if paste do not have a 'date hierarchy' ignore it #if paste do not have a 'date hierarchy' ignore it
if not is_hierachy_valid(complete_path): if not is_hierachy_valid(complete_path):
@ -90,5 +94,8 @@ if __name__ == "__main__":
print(args.name+'>'+wanted_path) print(args.name+'>'+wanted_path)
path_to_send = args.name + '>' + wanted_path path_to_send = args.name + '>' + wanted_path
socket.send('{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata))) #s = b'{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata))
# use bytes object
s = b' '.join( [ args.channel.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] )
socket.send(s)
time.sleep(args.seconds) time.sleep(args.seconds)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# This file is part of AIL framework - Analysis Information Leak framework # This file is part of AIL framework - Analysis Information Leak framework
@ -10,7 +10,7 @@
# #
# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be # Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be
import ConfigParser import configparser
import argparse import argparse
import gzip import gzip
import os import os
@ -23,7 +23,7 @@ def readdoc(path=None):
return f.read() return f.read()
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
# Indexer configuration - index dir and schema setup # Indexer configuration - index dir and schema setup
@ -51,7 +51,7 @@ ix = index.open_dir(indexpath)
from whoosh.qparser import QueryParser from whoosh.qparser import QueryParser
if args.n: if args.n:
print ix.doc_count_all() print(ix.doc_count_all())
exit(0) exit(0)
if args.l: if args.l:

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import ConfigParser import configparser
import os import os
import subprocess import subprocess
import time import time
@ -23,21 +23,21 @@ if __name__ == '__main__':
raise Exception('Unable to find the configuration file. \ raise Exception('Unable to find the configuration file. \
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
config = ConfigParser.ConfigParser() config = configparser.ConfigParser()
config.read(configfile) config.read(configfile)
modules = config.sections() modules = config.sections()
pids = {} pids = {}
for module in modules: for module in modules:
pin = subprocess.Popen(["python", './QueueIn.py', '-c', module]) pin = subprocess.Popen(["python3", './QueueIn.py', '-c', module])
pout = subprocess.Popen(["python", './QueueOut.py', '-c', module]) pout = subprocess.Popen(["python3", './QueueOut.py', '-c', module])
pids[module] = (pin, pout) pids[module] = (pin, pout)
is_running = True is_running = True
try: try:
while is_running: while is_running:
time.sleep(5) time.sleep(5)
is_running = False is_running = False
for module, p in pids.iteritems(): for module, p in pids.items():
pin, pout = p pin, pout = p
if pin is None: if pin is None:
# already dead # already dead
@ -57,7 +57,7 @@ if __name__ == '__main__':
is_running = True is_running = True
pids[module] = (pin, pout) pids[module] = (pin, pout)
except KeyboardInterrupt: except KeyboardInterrupt:
for module, p in pids.iteritems(): for module, p in pids.items():
pin, pout = p pin, pout = p
if pin is not None: if pin is not None:
pin.kill() pin.kill()

View file

@ -1,3 +1,5 @@
#!/usr/bin/python3
class Date(object): class Date(object):
"""docstring for Date""" """docstring for Date"""
def __init__(self, *args): def __init__(self, *args):
@ -38,4 +40,3 @@ class Date(object):
comp_month = str(computed_date.month).zfill(2) comp_month = str(computed_date.month).zfill(2)
comp_day = str(computed_date.day).zfill(2) comp_day = str(computed_date.day).zfill(2)
return comp_year + comp_month + comp_day return comp_year + comp_month + comp_day

View file

@ -1,3 +1,5 @@
#!/usr/bin/python3
import hashlib import hashlib
import crcmod import crcmod
import mmh3 import mmh3

View file

@ -1,4 +1,4 @@
#!/usr/bin/python2.7 #!/usr/bin/python3
""" """
The ``Paste Class`` The ``Paste Class``
@ -24,15 +24,8 @@ import operator
import string import string
import re import re
import json import json
try: # dirty to support python3 import configparser
import ConfigParser from io import StringIO
except:
import configparser
ConfigParser = configparser
try: # dirty to support python3
import cStringIO
except:
from io import StringIO as cStringIO
import sys import sys
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
from Date import Date from Date import Date
@ -71,25 +64,29 @@ class Paste(object):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
self.cache = redis.StrictRedis( self.cache = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"), host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"), port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
self.store = redis.StrictRedis( self.store = redis.StrictRedis(
host=cfg.get("Redis_Data_Merging", "host"), host=cfg.get("Redis_Data_Merging", "host"),
port=cfg.getint("Redis_Data_Merging", "port"), port=cfg.getint("Redis_Data_Merging", "port"),
db=cfg.getint("Redis_Data_Merging", "db")) db=cfg.getint("Redis_Data_Merging", "db"),
decode_responses=True)
self.p_path = p_path self.p_path = p_path
self.p_name = os.path.basename(self.p_path) self.p_name = os.path.basename(self.p_path)
self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2) self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2)
self.p_mime = magic.from_buffer("test", mime=True)
self.p_mime = magic.from_buffer(self.get_p_content(), mime=True) self.p_mime = magic.from_buffer(self.get_p_content(), mime=True)
# Assuming that the paste will alway be in a day folder which is itself # Assuming that the paste will alway be in a day folder which is itself
# in a month folder which is itself in a year folder. # in a month folder which is itself in a year folder.
# /year/month/day/paste.gz # /year/month/day/paste.gz
var = self.p_path.split('/') var = self.p_path.split('/')
self.p_date = Date(var[-4], var[-3], var[-2]) self.p_date = Date(var[-4], var[-3], var[-2])
self.p_source = var[-5] self.p_source = var[-5]
@ -117,17 +114,18 @@ class Paste(object):
paste = self.cache.get(self.p_path) paste = self.cache.get(self.p_path)
if paste is None: if paste is None:
try: try:
with gzip.open(self.p_path, 'rb') as f: with gzip.open(self.p_path, 'r') as f:
paste = f.read() paste = f.read()
self.cache.set(self.p_path, paste) self.cache.set(self.p_path, paste)
self.cache.expire(self.p_path, 300) self.cache.expire(self.p_path, 300)
except: except:
return '' paste = ''
pass
return paste return str(paste)
def get_p_content_as_file(self): def get_p_content_as_file(self):
return cStringIO.StringIO(self.get_p_content()) message = StringIO(self.get_p_content())
return message
def get_p_content_with_removed_lines(self, threshold): def get_p_content_with_removed_lines(self, threshold):
num_line_removed = 0 num_line_removed = 0
@ -137,6 +135,7 @@ class Paste(object):
line_id = 0 line_id = 0
for line_id, line in enumerate(f): for line_id, line in enumerate(f):
length = len(line) length = len(line)
if length < line_length_threshold: if length < line_length_threshold:
string_content += line string_content += line
else: else:
@ -202,8 +201,8 @@ class Paste(object):
.. seealso:: _set_p_hash_kind("md5") .. seealso:: _set_p_hash_kind("md5")
""" """
for hash_name, the_hash in self.p_hash_kind.iteritems(): for hash_name, the_hash in self.p_hash_kind.items():
self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content()) self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content().encode())
return self.p_hash return self.p_hash
def _get_p_language(self): def _get_p_language(self):
@ -274,7 +273,10 @@ class Paste(object):
def _get_p_duplicate(self): def _get_p_duplicate(self):
self.p_duplicate = self.store.hget(self.p_path, "p_duplicate") self.p_duplicate = self.store.hget(self.p_path, "p_duplicate")
return self.p_duplicate if self.p_duplicate is not None else '[]' if self.p_duplicate is not None:
return self.p_duplicate
else:
return '[]'
def save_all_attributes_redis(self, key=None): def save_all_attributes_redis(self, key=None):
""" """
@ -321,6 +323,28 @@ class Paste(object):
else: else:
self.store.hset(self.p_path, attr_name, json.dumps(value)) self.store.hset(self.p_path, attr_name, json.dumps(value))
def save_others_pastes_attribute_duplicate(self, attr_name, list_value):
"""
Save a new duplicate on others pastes
"""
for hash_type, path, percent, date in list_value:
#get json
json_duplicate = self.store.hget(path, attr_name)
#json save on redis
if json_duplicate is not None:
list_duplicate = (json.loads(json_duplicate))
# avoid duplicate, a paste can be send by multiples modules
to_add = [hash_type, self.p_path, percent, date]
if to_add not in list_duplicate:
list_duplicate.append(to_add)
self.store.hset(path, attr_name, json.dumps(list_duplicate))
else:
# create the new list
list_duplicate = [[hash_type, self.p_path, percent, date]]
self.store.hset(path, attr_name, json.dumps(list_duplicate))
def _get_from_redis(self, r_serv): def _get_from_redis(self, r_serv):
ans = {} ans = {}
for hash_name, the_hash in self.p_hash: for hash_name, the_hash in self.p_hash:
@ -342,7 +366,7 @@ class Paste(object):
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True) gaps=True, discard_empty=True)
blob = TextBlob(clean(self.get_p_content()), tokenizer=tokenizer) blob = TextBlob(clean( (self.get_p_content()) ), tokenizer=tokenizer)
for word in blob.tokens: for word in blob.tokens:
if word in words.keys(): if word in words.keys():
@ -351,7 +375,7 @@ class Paste(object):
num = 0 num = 0
words[word] = num + 1 words[word] = num + 1
if sort: if sort:
var = sorted(words.iteritems(), key=operator.itemgetter(1), reverse=True) var = sorted(words.items(), key=operator.itemgetter(1), reverse=True)
else: else:
var = words var = words

View file

@ -2,6 +2,7 @@
bloomfilters = Blooms bloomfilters = Blooms
dicofilters = Dicos dicofilters = Dicos
pastes = PASTES pastes = PASTES
base64 = BASE64
wordtrending_csv = var/www/static/csv/wordstrendingdata wordtrending_csv = var/www/static/csv/wordstrendingdata
wordsfile = files/wordfile wordsfile = files/wordfile
@ -53,13 +54,20 @@ criticalNumberToAlert=8
#Will be considered as false positive if less that X matches from the top password list #Will be considered as false positive if less that X matches from the top password list
minTopPassList=5 minTopPassList=5
[Curve]
max_execution_time = 90
[Base64]
path = Base64/
max_execution_time = 60
[Modules_Duplicates] [Modules_Duplicates]
#Number of month to look back #Number of month to look back
maximum_month_range = 3 maximum_month_range = 3
#The value where two pastes are considerate duplicate for ssdeep. #The value where two pastes are considerate duplicate for ssdeep.
threshold_duplicate_ssdeep = 50 threshold_duplicate_ssdeep = 50
#The value where two pastes are considerate duplicate for tlsh. #The value where two pastes are considerate duplicate for tlsh.
threshold_duplicate_tlsh = 100 threshold_duplicate_tlsh = 52
#Minimum size of the paste considered #Minimum size of the paste considered
min_paste_size = 0.3 min_paste_size = 0.3
@ -104,46 +112,56 @@ host = localhost
port = 6381 port = 6381
db = 1 db = 1
##### LevelDB ##### ##### ARDB #####
[Redis_Level_DB_Curve] [ARDB_Curve]
host = localhost host = localhost
port = 6382 port = 6382
db = 1 db = 1
[Redis_Level_DB_Sentiment] [ARDB_Sentiment]
host = localhost host = localhost
port = 6382 port = 6382
db = 4 db = 4
[Redis_Level_DB_TermFreq] [ARDB_TermFreq]
host = localhost host = localhost
port = 6382 port = 6382
db = 2 db = 2
[Redis_Level_DB_TermCred] [ARDB_TermCred]
host = localhost host = localhost
port = 6382 port = 6382
db = 5 db = 5
[Redis_Level_DB] [ARDB_DB]
host = localhost host = localhost
port = 6382
db = 0 db = 0
[Redis_Level_DB_Trending] [ARDB_Trending]
host = localhost host = localhost
port = 6382 port = 6382
db = 3 db = 3
[Redis_Level_DB_Hashs] [ARDB_Hashs]
host = localhost host = localhost
db = 1 db = 1
[ARDB_Tags]
host = localhost
port = 6382
db = 6
[Url] [Url]
cc_critical = DE cc_critical = DE
[DomClassifier] [DomClassifier]
cc = DE cc = DE
cc_tld = r'\.de$' cc_tld = r'\.de$'
dns = 8.8.8.8
[Mail]
dns = 8.8.8.8
# Indexer configuration # Indexer configuration
[Indexer] [Indexer]

View file

@ -1,3 +1,5 @@
#!/usr/bin/python3
import re import re
import dns.resolver import dns.resolver
@ -17,24 +19,29 @@ def is_luhn_valid(card_number):
return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0 return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0
def checking_MX_record(r_serv, adress_set): def checking_MX_record(r_serv, adress_set, addr_dns):
"""Check if emails MX domains are responding. """Check if emails MX domains are responding.
:param r_serv: -- Redis connexion database :param r_serv: -- Redis connexion database
:param adress_set: -- (set) This is a set of emails adress :param adress_set: -- (set) This is a set of emails adress
:param adress_set: -- (str) This is a server dns address
:return: (int) Number of adress with a responding and valid MX domains :return: (int) Number of adress with a responding and valid MX domains
This function will split the email adress and try to resolve their domains This function will split the email adress and try to resolve their domains
names: on example@gmail.com it will try to resolve gmail.com names: on example@gmail.com it will try to resolve gmail.com
""" """
#remove duplicate
adress_set = list(set(adress_set))
score = 0 score = 0
num = len(adress_set) num = len(adress_set)
WalidMX = set([]) WalidMX = set([])
# Transforming the set into a string # Transforming the set into a string
MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower())
resolver = dns.resolver.Resolver() resolver = dns.resolver.Resolver()
resolver.nameservers = ['149.13.33.69'] resolver.nameservers = [addr_dns]
resolver.timeout = 5 resolver.timeout = 5
resolver.lifetime = 2 resolver.lifetime = 2
if MXdomains != []: if MXdomains != []:
@ -58,25 +65,31 @@ def checking_MX_record(r_serv, adress_set):
except dns.resolver.NoNameservers: except dns.resolver.NoNameservers:
publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.') publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.')
print('NoNameserver, No non-broken nameservers are available to answer the query.')
except dns.resolver.NoAnswer: except dns.resolver.NoAnswer:
publisher.debug('NoAnswer, The response did not contain an answer to the question.') publisher.debug('NoAnswer, The response did not contain an answer to the question.')
print('NoAnswer, The response did not contain an answer to the question.')
except dns.name.EmptyLabel: except dns.name.EmptyLabel:
publisher.debug('SyntaxError: EmptyLabel') publisher.debug('SyntaxError: EmptyLabel')
print('SyntaxError: EmptyLabel')
except dns.resolver.NXDOMAIN: except dns.resolver.NXDOMAIN:
r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
publisher.debug('The query name does not exist.') publisher.debug('The query name does not exist.')
print('The query name does not exist.')
except dns.name.LabelTooLong: except dns.name.LabelTooLong:
publisher.debug('The Label is too long') publisher.debug('The Label is too long')
print('The Label is too long')
except dns.resolver.Timeout: except dns.resolver.Timeout:
print('timeout')
r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
except Exception as e: except Exception as e:
print e print(e)
publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
return (num, WalidMX) return (num, WalidMX)
@ -125,7 +138,7 @@ def checking_A_record(r_serv, domains_set):
publisher.debug('The Label is too long') publisher.debug('The Label is too long')
except Exception as e: except Exception as e:
print e print(e)
publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score)) publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
return (num, WalidA) return (num, WalidA)

View file

@ -1,3 +1,5 @@
#!/usr/bin/python3
import os import os
import string import string
@ -81,17 +83,17 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
to keep the timeline of the curve correct. to keep the timeline of the curve correct.
""" """
threshold = 50 threshold = 30
first_day = date(year, month, 01) first_day = date(year, month, 1)
last_day = date(year, month, calendar.monthrange(year, month)[1]) last_day = date(year, month, calendar.monthrange(year, month)[1])
words = [] words = []
with open(feederfilename, 'rb') as f: with open(feederfilename, 'r') as f:
# words of the files # words of the files
words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ]) words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ])
headers = ['Date'] + words headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f: with open(csvfilename+'.csv', 'w') as f:
writer = csv.writer(f) writer = csv.writer(f)
writer.writerow(headers) writer.writerow(headers)
@ -103,11 +105,14 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
# from the 1srt day to the last of the list # from the 1srt day to the last of the list
for word in words: for word in words:
value = r_serv.hget(word, curdate) value = r_serv.hget(word, curdate)
if value is None: if value is None:
row.append(0) row.append(0)
else: else:
# if the word have a value for the day # if the word have a value for the day
# FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold # FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold
value = r_serv.hget(word, curdate)
value = int(value)
if value >= threshold: if value >= threshold:
row.append(value) row.append(value)
writer.writerow(row) writer.writerow(row)
@ -127,14 +132,15 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month):
""" """
first_day = date(year, month, 01) first_day = date(year, month, 1)
last_day = date(year, month, calendar.monthrange(year, month)[1]) last_day = date(year, month, calendar.monthrange(year, month)[1])
redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2) redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2)
words = list(server.smembers(redis_set_name)) words = list(server.smembers(redis_set_name))
#words = [x.decode('utf-8') for x in words]
headers = ['Date'] + words headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f: with open(csvfilename+'.csv', 'w') as f:
writer = csv.writer(f) writer = csv.writer(f)
writer.writerow(headers) writer.writerow(headers)

View file

@ -45,7 +45,7 @@ subscribe = Redis_CurveManageTopSets
[Categ] [Categ]
subscribe = Redis_Global subscribe = Redis_Global
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey
[CreditCards] [CreditCards]
subscribe = Redis_CreditCards subscribe = Redis_CreditCards
@ -105,3 +105,15 @@ publish = Redis_Duplicate,Redis_alertHandler
[Keys] [Keys]
subscribe = Redis_Global subscribe = Redis_Global
publish = Redis_Duplicate,Redis_alertHandler publish = Redis_Duplicate,Redis_alertHandler
[ApiKey]
subscribe = Redis_ApiKey
publish = Redis_Duplicate,Redis_alertHandler
[Base64]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_alertHandler
[Bitcoin]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_alertHandler

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -48,7 +48,7 @@ if __name__ == '__main__':
message = p.get_from_set() message = p.get_from_set()
if message is None: if message is None:
publisher.debug("{} queue is empty, waiting".format(config_section)) publisher.debug("{} queue is empty, waiting".format(config_section))
print "queue empty" print("queue empty")
time.sleep(1) time.sleep(1)
continue continue

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
""" """
Template for new modules Template for new modules

View file

@ -1,10 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import socks import socks
import socket import socket
import urllib2 import urllib.request
import StringIO import io
import gzip import gzip
import base64 import base64
import sys import sys
@ -21,17 +21,20 @@ def create_connection(address, timeout=None, source_address=None):
def get_page(url, torclient_host='127.0.0.1', torclient_port=9050): def get_page(url, torclient_host='127.0.0.1', torclient_port=9050):
request = urllib2.Request(url) request = urllib.request.Request(url)
# UA of the Tor browser bundle # UA of the Tor browser bundle
request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0') request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0')
return urllib2.urlopen(request, timeout=5).read(max_size * 100000) return urllib.request.urlopen(request, timeout=5).read(max_size * 100000)
#FIXME don't work at all
def makegzip64(s): def makegzip64(s):
out = StringIO.StringIO()
with gzip.GzipFile(fileobj=out, mode="w") as f: out = io.BytesIO()
f.write(s)
return base64.standard_b64encode(out.getvalue()) with gzip.GzipFile(fileobj=out, mode='ab') as fo:
fo.write(base64.standard_b64encode(s))
return out.getvalue()
if __name__ == "__main__": if __name__ == "__main__":
@ -41,7 +44,8 @@ if __name__ == "__main__":
exit(1) exit(1)
try: try:
url = base64.standard_b64decode(sys.argv[1]) url = base64.standard_b64decode(sys.argv[1]).decode('utf8')
print(url)
except: except:
print('unable to decode') print('unable to decode')
exit(1) exit(1)
@ -61,7 +65,7 @@ if __name__ == "__main__":
to_write = makegzip64(page) to_write = makegzip64(page)
t, path = tempfile.mkstemp() t, path = tempfile.mkstemp()
with open(path, 'w') as f: #with open(path, 'w') as f:
f.write(to_write) #f.write(to_write)
print path print(path)
exit(0) exit(0)

728
configs/6382.conf Normal file → Executable file
View file

@ -1,4 +1,7 @@
# Redis configuration file example # Ardb configuration file example, modified from redis's conf file.
# Home dir for ardb instance, it can be referenced by ${ARDB_HOME} in this config file
home ../DATA_ARDB/
# Note on units: when memory size is needed, it is possible to specify # Note on units: when memory size is needed, it is possible to specify
# it in the usual form of 1k 5GB 4M and so forth: # it in the usual form of 1k 5GB 4M and so forth:
@ -12,63 +15,71 @@
# #
# units are case insensitive so 1GB 1Gb 1gB are all the same. # units are case insensitive so 1GB 1Gb 1gB are all the same.
################################## INCLUDES ################################### # By default Ardb does not run as a daemon. Use 'yes' if you need it.
# Include one or more other config files here. This is useful if you
# have a standard template that goes to all Redis server but also need
# to customize a few per-server settings. Include files can include
# other files, so use this wisely.
#
# Notice option "include" won't be rewritten by command "CONFIG REWRITE"
# from admin or Redis Sentinel. Since Redis always uses the last processed
# line as value of a configuration directive, you'd better put includes
# at the beginning of this file to avoid overwriting config change at runtime.
#
# If instead you are interested in using includes to override configuration
# options, it is better to use include as the last line.
#
# include /path/to/local.conf
# include /path/to/other.conf
################################ GENERAL #####################################
# By default Redis does not run as a daemon. Use 'yes' if you need it.
# Note that Redis will write a pid file in /var/run/redis.pid when daemonized.
daemonize no daemonize no
# When running daemonized, Redis writes a pid file in /var/run/redis.pid by # When running daemonized, Ardb writes a pid file in ${ARDB_HOME}/ardb.pid by
# default. You can specify a custom pid file location here. # default. You can specify a custom pid file location here.
#pidfile /var/run/redis.pid pidfile ${ARDB_HOME}/ardb.pid
# Accept connections on the specified port, default is 6379. # The thread pool size for the corresponding all listen servers, -1 means current machine's cpu number
# If port 0 is specified Redis will not listen on a TCP socket. thread-pool-size 4
port 6382
# TCP listen() backlog. #Accept connections on the specified host&port/unix socket, default is 0.0.0.0:16379.
# server[0].listen 127.0.0.1:6382
# In high requests-per-second environments you need an high backlog in order # If current qps exceed the limit, Ardb would return an error.
# to avoid slow clients connections issues. Note that the Linux kernel #server[0].qps-limit 1000
# will silently truncate it to the value of /proc/sys/net/core/somaxconn so
# make sure to raise both the value of somaxconn and tcp_max_syn_backlog
# in order to get the desired effect.
tcp-backlog 511
# By default Redis listens for connections from all the network interfaces #listen on unix socket
# available on the server. It is possible to listen to just one or multiple #server[1].listen /tmp/ardb.sock
# interfaces using the "bind" configuration directive, followed by one or #server[1].unixsocketperm 755
# more IP addresses. #server[1].qps-limit 1000
#
# Examples:
#
# bind 192.168.1.100 10.0.0.1
# bind 127.0.0.1
# Specify the path for the Unix socket that will be used to listen for # 'qps-limit-per-host' used to limit the request per second from same host
# incoming connections. There is no default, so Redis will not listen # 'qps-limit-per-connection' used to limit the request per second from same connection
# on a unix socket when not specified. qps-limit-per-host 0
qps-limit-per-connection 0
# Specify the optimized RocksDB compaction strategies.
# If anything other than none is set then the rocksdb.options will not be used.
# The property can one of:
# OptimizeLevelStyleCompaction
# OptimizeUniversalStyleCompaction
# none
# #
#unixsocket /tmp/redis.sock rocksdb.compaction OptimizeLevelStyleCompaction
#unixsocketperm 755
# Enable this to indicate that hsca/sscan/zscan command use total order mode for rocksdb engine
rocksdb.scan-total-order false
# Disable RocksDB WAL may improve the write performance but
# data in the un-flushed memtables might be lost in case of a RocksDB shutdown.
# Disabling WAL provides similar guarantees as Redis.
rocksdb.disableWAL false
#rocksdb's options
rocksdb.options write_buffer_size=512M;max_write_buffer_number=5;min_write_buffer_number_to_merge=3;compression=kSnappyCompression;\
bloom_locality=1;memtable_prefix_bloom_size_ratio=0.1;\
block_based_table_factory={block_cache=512M;filter_policy=bloomfilter:10:true};\
create_if_missing=true;max_open_files=10000;rate_limiter_bytes_per_sec=50M;\
use_direct_io_for_flush_and_compaction=true;use_adaptive_mutex=true
#leveldb's options
leveldb.options block_cache_size=512M,write_buffer_size=128M,max_open_files=5000,block_size=4k,block_restart_interval=16,\
bloom_bits=10,compression=snappy,logenable=yes,max_file_size=2M
#lmdb's options
lmdb.options database_maxsize=10G,database_maxdbs=4096,readahead=no,batch_commit_watermark=1024
#perconaft's options
perconaft.options cache_size=128M,compression=snappy
#wiredtiger's options
wiredtiger.options cache_size=512M,session_max=8k,chunk_size=100M,block_size=4k,bloom_bits=10,\
mmap=false,compressor=snappy
#forestdb's options
forestdb.options chunksize=8,blocksize=4K
# Close the connection after a client is idle for N seconds (0 to disable) # Close the connection after a client is idle for N seconds (0 to disable)
timeout 0 timeout 0
@ -91,92 +102,21 @@ tcp-keepalive 0
# Specify the server verbosity level. # Specify the server verbosity level.
# This can be one of: # This can be one of:
# debug (a lot of information, useful for development/testing) # error
# verbose (many rarely useful info, but not a mess like the debug level) # warn
# notice (moderately verbose, what you want in production probably) # info
# warning (only very important / critical messages are logged) # debug
loglevel notice # trace
loglevel info
# Specify the log file name. Also the empty string can be used to force # Specify the log file name. Also 'stdout' can be used to force
# Redis to log on the standard output. Note that if you use standard # Redis to log on the standard output. Note that if you use standard
# output for logging but daemonize, logs will be sent to /dev/null # output for logging but daemonize, logs will be sent to /dev/null
logfile "" #logfile ${ARDB_HOME}/log/ardb-server.log
logfile stdout
# To enable logging to the system logger, just set 'syslog-enabled' to yes,
# and optionally update the other syslog parameters to suit your needs.
# syslog-enabled no
# Specify the syslog identity. # The working data directory.
# syslog-ident redis
# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7.
# syslog-facility local0
# Set the number of databases. The default database is DB 0, you can select
# a different one on a per-connection basis using SELECT <dbid> where
# dbid is a number between 0 and 'databases'-1
databases 16
################################ SNAPSHOTTING ################################
#
# Save the DB on disk:
#
# save <seconds> <changes>
#
# Will save the DB if both the given number of seconds and the given
# number of write operations against the DB occurred.
#
# In the example below the behaviour will be to save:
# after 900 sec (15 min) if at least 1 key changed
# after 300 sec (5 min) if at least 10 keys changed
# after 60 sec if at least 10000 keys changed
#
# Note: you can disable saving at all commenting all the "save" lines.
#
# It is also possible to remove all the previously configured save
# points by adding a save directive with a single empty string argument
# like in the following example:
#
# save ""
#save 900 1
#save 300 10
save 300 100000
# By default Redis will stop accepting writes if RDB snapshots are enabled
# (at least one save point) and the latest background save failed.
# This will make the user aware (in a hard way) that data is not persisting
# on disk properly, otherwise chances are that no one will notice and some
# disaster will happen.
#
# If the background saving process will start working again Redis will
# automatically allow writes again.
#
# However if you have setup your proper monitoring of the Redis server
# and persistence, you may want to disable this feature so that Redis will
# continue to work as usual even if there are problems with disk,
# permissions, and so forth.
stop-writes-on-bgsave-error yes
# Compress string objects using LZF when dump .rdb databases?
# For default that's set to 'yes' as it's almost always a win.
# If you want to save some CPU in the saving child set it to 'no' but
# the dataset will likely be bigger if you have compressible values or keys.
rdbcompression yes
# Since version 5 of RDB a CRC64 checksum is placed at the end of the file.
# This makes the format more resistant to corruption but there is a performance
# hit to pay (around 10%) when saving and loading RDB files, so you can disable it
# for maximum performances.
#
# RDB files created with checksum disabled have a checksum of zero that will
# tell the loading code to skip the check.
rdbchecksum yes
# The filename where to dump the DB
dbfilename dump6382.rdb
# The working directory.
# #
# The DB will be written inside this directory, with the filename specified # The DB will be written inside this directory, with the filename specified
# above using the 'dbfilename' configuration directive. # above using the 'dbfilename' configuration directive.
@ -184,22 +124,29 @@ dbfilename dump6382.rdb
# The Append Only File will also be created inside this directory. # The Append Only File will also be created inside this directory.
# #
# Note that you must specify a directory here, not a file name. # Note that you must specify a directory here, not a file name.
dir ../dumps/ data-dir ${ARDB_HOME}/data
################################# REPLICATION ################################# ################################# REPLICATION #################################
# Master-Slave replication. Use slaveof to make a Redis instance a copy of # Master-Slave replication. Use slaveof to make a Ardb instance a copy of
# another Redis server. Note that the configuration is local to the slave # another Ardb server. Note that the configuration is local to the slave
# so for example it is possible to configure the slave to save the DB with a # so for example it is possible to configure the slave to save the DB with a
# different interval, or to listen to another port, and so on. # different interval, or to listen to another port, and so on.
# #
# slaveof <masterip> <masterport> # slaveof <masterip>:<masterport>
#slaveof 127.0.0.1:6379
# By default, ardb use 2 threads to execute commands synced from master.
# -1 means use current CPU number threads instead.
slave-workers 2
# Max synced command queue size in memory.
max-slave-worker-queue 1024
# The directory for replication.
repl-dir ${ARDB_HOME}/repl
# If the master is password protected (using the "requirepass" configuration
# directive below) it is possible to tell the slave to authenticate before
# starting the replication synchronization process, otherwise the master will
# refuse the slave request.
#
# masterauth <master-password>
# When a slave loses its connection with the master, or when the replication # When a slave loses its connection with the master, or when the replication
# is still in progress, the slave can act in two different ways: # is still in progress, the slave can act in two different ways:
@ -214,33 +161,55 @@ dir ../dumps/
# #
slave-serve-stale-data yes slave-serve-stale-data yes
# The slave priority is an integer number published by Ardb/Redis in the INFO output.
# It is used by Redis Sentinel in order to select a slave to promote into a
# master if the master is no longer working correctly.
#
# A slave with a low priority number is considered better for promotion, so
# for instance if there are three slaves with priority 10, 100, 25 Sentinel will
# pick the one with priority 10, that is the lowest.
#
# However a special priority of 0 marks the slave as not able to perform the
# role of master, so a slave with priority of 0 will never be selected by
# Redis Sentinel for promotion.
#
# By default the priority is 100.
slave-priority 100
# You can configure a slave instance to accept writes or not. Writing against # You can configure a slave instance to accept writes or not. Writing against
# a slave instance may be useful to store some ephemeral data (because data # a slave instance may be useful to store some ephemeral data (because data
# written on a slave will be easily deleted after resync with the master) but # written on a slave will be easily deleted after resync with the master) but
# may also cause problems if clients are writing to it because of a # may also cause problems if clients are writing to it because of a
# misconfiguration. # misconfiguration.
# #
# Since Redis 2.6 by default slaves are read-only.
#
# Note: read only slaves are not designed to be exposed to untrusted clients # Note: read only slaves are not designed to be exposed to untrusted clients
# on the internet. It's just a protection layer against misuse of the instance. # on the internet. It's just a protection layer against misuse of the instance.
# Still a read only slave exports by default all the administrative commands # Still a read only slave exports by default all the administrative commands
# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve # such as CONFIG, DEBUG, and so forth. To a limited extent you can improve
# security of read only slaves using 'rename-command' to shadow all the # security of read only slaves using 'rename-command' to shadow all the
# administrative / dangerous commands. # administrative / dangerous commands.
#
# Note: any requests processed by non read only slaves would no write to replication
# log and sync to connected slaves.
slave-read-only yes slave-read-only yes
# The directory for backup.
backup-dir ${ARDB_HOME}/backup
#
# You can configure the backup file format as 'redis' or 'ardb'. The 'ardb' format
# can only used by ardb instance, while 'redis' format file can be used by redis
# and ardb instance.
backup-file-format ardb
# Slaves send PINGs to server in a predefined interval. It's possible to change # Slaves send PINGs to server in a predefined interval. It's possible to change
# this interval with the repl_ping_slave_period option. The default value is 10 # this interval with the repl_ping_slave_period option. The default value is 10
# seconds. # seconds.
# #
# repl-ping-slave-period 10 # repl-ping-slave-period 10
# The following option sets the replication timeout for: # The following option sets a timeout for both Bulk transfer I/O timeout and
# # master data or ping response timeout. The default value is 60 seconds.
# 1) Bulk transfer I/O during SYNC, from the point of view of slave.
# 2) Master timeout from the point of view of slaves (data, pings).
# 3) Slave timeout from the point of view of masters (REPLCONF ACK pings).
# #
# It is important to make sure that this value is greater than the value # It is important to make sure that this value is greater than the value
# specified for repl-ping-slave-period otherwise a timeout will be detected # specified for repl-ping-slave-period otherwise a timeout will be detected
@ -250,7 +219,7 @@ slave-read-only yes
# Disable TCP_NODELAY on the slave socket after SYNC? # Disable TCP_NODELAY on the slave socket after SYNC?
# #
# If you select "yes" Redis will use a smaller number of TCP packets and # If you select "yes" Ardb will use a smaller number of TCP packets and
# less bandwidth to send data to slaves. But this can add a delay for # less bandwidth to send data to slaves. But this can add a delay for
# the data to appear on the slave side, up to 40 milliseconds with # the data to appear on the slave side, up to 40 milliseconds with
# Linux kernels using a default configuration. # Linux kernels using a default configuration.
@ -272,9 +241,46 @@ repl-disable-tcp-nodelay no
# The biggest the replication backlog, the longer the time the slave can be # The biggest the replication backlog, the longer the time the slave can be
# disconnected and later be able to perform a partial resynchronization. # disconnected and later be able to perform a partial resynchronization.
# #
# The backlog is only allocated once there is at least a slave connected. # If the size is configured by 0, then Ardb instance can NOT serve as a master.
# #
# repl-backlog-size 1mb # repl-backlog-size 500m
repl-backlog-size 1G
repl-backlog-cache-size 100M
snapshot-max-lag-offset 500M
# Set the max number of snapshots. By default this limit is set to 10 snapshot.
# Once the limit is reached Ardb would try to remove the oldest snapshots
maxsnapshots 10
# It is possible for a master to stop accepting writes if there are less than
# N slaves connected, having a lag less or equal than M seconds.
#
# The N slaves need to be in "online" state.
#
# The lag in seconds, that must be <= the specified value, is calculated from
# the last ping received from the slave, that is usually sent every second.
#
# This option does not GUARANTEE that N replicas will accept the write, but
# will limit the window of exposure for lost writes in case not enough slaves
# are available, to the specified number of seconds.
#
# For example to require at least 3 slaves with a lag <= 10 seconds use:
#
# min-slaves-to-write 3
# min-slaves-max-lag 10
# When a slave loses its connection with the master, or when the replication
# is still in progress, the slave can act in two different ways:
#
# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will
# still reply to client requests, possibly with out of date data, or the
# data set may just be empty if this is the first synchronization.
#
# 2) if slave-serve-stale-data is set to 'no' the slave will reply with
# an error "SYNC with master in progress" to all the kind of commands
# but to INFO and SLAVEOF.
#
slave-serve-stale-data yes
# After a master has no longer connected slaves for some time, the backlog # After a master has no longer connected slaves for some time, the backlog
# will be freed. The following option configures the amount of seconds that # will be freed. The following option configures the amount of seconds that
@ -285,42 +291,32 @@ repl-disable-tcp-nodelay no
# #
# repl-backlog-ttl 3600 # repl-backlog-ttl 3600
# The slave priority is an integer number published by Redis in the INFO output. # Slave clear current data store before full resync to master.
# It is used by Redis Sentinel in order to select a slave to promote into a # It make sure that slave keep consistent with master's data. But slave may cost a
# master if the master is no longer working correctly. # long time to delete data, it depends on
# # If set by no, then slave may have different data with master.
# A slave with a low priority number is considered better for promotion, so slave-cleardb-before-fullresync yes
# for instance if there are three slaves with priority 10, 100, 25 Sentinel will
# pick the one with priority 10, that is the lowest.
#
# However a special priority of 0 marks the slave as not able to perform the
# role of master, so a slave with priority of 0 will never be selected by
# Redis Sentinel for promotion.
#
# By default the priority is 100.
slave-priority 100
# It is possible for a master to stop accepting writes if there are less than # Master/Slave instance would persist sync state every 'repl-backlog-sync-period' secs.
# N slaves connected, having a lag less or equal than M seconds. repl-backlog-sync-period 5
# Slave would ignore any 'expire' setting from replication command if set by 'yes'.
# It could be used if master is redis instance serve hot data with expire setting, slave is
# ardb instance which persist all data.
# Since master redis instance would generate a 'del' for each expired key, slave should ignore
# all 'del' command too by setting 'slave-ignore-del' to 'yes' for this scenario.
slave-ignore-expire no
slave-ignore-del no
# After a master has no longer connected slaves for some time, the backlog
# will be freed. The following option configures the amount of seconds that
# need to elapse, starting from the time the last slave disconnected, for
# the backlog buffer to be freed.
# #
# The N slaves need to be in "online" state. # A value of 0 means to never release the backlog.
# #
# The lag in seconds, that must be <= the specified value, is calculated from # repl-backlog-ttl 3600
# the last ping received from the slave, that is usually sent every second.
#
# This option does not GUARANTEES that N replicas will accept the write, but
# will limit the window of exposure for lost writes in case not enough slaves
# are available, to the specified number of seconds.
#
# For example to require at least 3 slaves with a lag <= 10 seconds use:
#
# min-slaves-to-write 3
# min-slaves-max-lag 10
#
# Setting one or the other to 0 disables the feature.
#
# By default min-slaves-to-write is set to 0 (feature disabled) and
# min-slaves-max-lag is set to 10.
################################## SECURITY ################################### ################################## SECURITY ###################################
@ -356,6 +352,15 @@ slave-priority 100
# Please note that changing the name of commands that are logged into the # Please note that changing the name of commands that are logged into the
# AOF file or transmitted to slaves may cause problems. # AOF file or transmitted to slaves may cause problems.
################################ CLUSTER ###############################
# Max execution time of a Lua script in milliseconds.
#zookeeper-servers 127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183
#zk-recv-timeout 10000
#zk-clientid-file ${ARDB_HOME}/ardb.zkclientid
cluster-name ardb-cluster
################################### LIMITS #################################### ################################### LIMITS ####################################
# Set the max number of connected clients at the same time. By default # Set the max number of connected clients at the same time. By default
@ -369,173 +374,13 @@ slave-priority 100
# #
# maxclients 10000 # maxclients 10000
# Don't use more memory than the specified amount of bytes.
# When the memory limit is reached Redis will try to remove keys
# according to the eviction policy selected (see maxmemory-policy).
#
# If Redis can't remove keys according to the policy, or if the policy is
# set to 'noeviction', Redis will start to reply with errors to commands
# that would use more memory, like SET, LPUSH, and so on, and will continue
# to reply to read-only commands like GET.
#
# This option is usually useful when using Redis as an LRU cache, or to set
# a hard memory limit for an instance (using the 'noeviction' policy).
#
# WARNING: If you have slaves attached to an instance with maxmemory on,
# the size of the output buffers needed to feed the slaves are subtracted
# from the used memory count, so that network problems / resyncs will
# not trigger a loop where keys are evicted, and in turn the output
# buffer of slaves is full with DELs of keys evicted triggering the deletion
# of more keys, and so forth until the database is completely emptied.
#
# In short... if you have slaves attached it is suggested that you set a lower
# limit for maxmemory so that there is some free RAM on the system for slave
# output buffers (but this is not needed if the policy is 'noeviction').
#
# maxmemory <bytes>
# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory # The client output buffer limits can be used to force disconnection of clients
# is reached. You can select among five behaviors: # that are not reading data from the server fast enough for some reason (a
# # common reason is that a Pub/Sub/Slave client can't consume messages as fast as the
# volatile-lru -> remove the key with an expire set using an LRU algorithm # publisher can produce them).
# allkeys-lru -> remove any key accordingly to the LRU algorithm slave-client-output-buffer-limit 256mb
# volatile-random -> remove a random key with an expire set pubsub-client-output-buffer-limit 32mb
# allkeys-random -> remove a random key, any key
# volatile-ttl -> remove the key with the nearest expire time (minor TTL)
# noeviction -> don't expire at all, just return an error on write operations
#
# Note: with any of the above policies, Redis will return an error on write
# operations, when there are not suitable keys for eviction.
#
# At the date of writing this commands are: set setnx setex append
# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd
# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby
# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby
# getset mset msetnx exec sort
#
# The default is:
#
# maxmemory-policy volatile-lru
# LRU and minimal TTL algorithms are not precise algorithms but approximated
# algorithms (in order to save memory), so you can select as well the sample
# size to check. For instance for default Redis will check three keys and
# pick the one that was used less recently, you can change the sample size
# using the following configuration directive.
#
# maxmemory-samples 3
############################## APPEND ONLY MODE ###############################
# By default Redis asynchronously dumps the dataset on disk. This mode is
# good enough in many applications, but an issue with the Redis process or
# a power outage may result into a few minutes of writes lost (depending on
# the configured save points).
#
# The Append Only File is an alternative persistence mode that provides
# much better durability. For instance using the default data fsync policy
# (see later in the config file) Redis can lose just one second of writes in a
# dramatic event like a server power outage, or a single write if something
# wrong with the Redis process itself happens, but the operating system is
# still running correctly.
#
# AOF and RDB persistence can be enabled at the same time without problems.
# If the AOF is enabled on startup Redis will load the AOF, that is the file
# with the better durability guarantees.
#
# Please check http://redis.io/topics/persistence for more information.
appendonly no
# The name of the append only file (default: "appendonly.aof")
appendfilename "appendonly.aof"
# The fsync() call tells the Operating System to actually write data on disk
# instead to wait for more data in the output buffer. Some OS will really flush
# data on disk, some other OS will just try to do it ASAP.
#
# Redis supports three different modes:
#
# no: don't fsync, just let the OS flush the data when it wants. Faster.
# always: fsync after every write to the append only log . Slow, Safest.
# everysec: fsync only one time every second. Compromise.
#
# The default is "everysec", as that's usually the right compromise between
# speed and data safety. It's up to you to understand if you can relax this to
# "no" that will let the operating system flush the output buffer when
# it wants, for better performances (but if you can live with the idea of
# some data loss consider the default persistence mode that's snapshotting),
# or on the contrary, use "always" that's very slow but a bit safer than
# everysec.
#
# More details please check the following article:
# http://antirez.com/post/redis-persistence-demystified.html
#
# If unsure, use "everysec".
# appendfsync always
appendfsync everysec
# appendfsync no
# When the AOF fsync policy is set to always or everysec, and a background
# saving process (a background save or AOF log background rewriting) is
# performing a lot of I/O against the disk, in some Linux configurations
# Redis may block too long on the fsync() call. Note that there is no fix for
# this currently, as even performing fsync in a different thread will block
# our synchronous write(2) call.
#
# In order to mitigate this problem it's possible to use the following option
# that will prevent fsync() from being called in the main process while a
# BGSAVE or BGREWRITEAOF is in progress.
#
# This means that while another child is saving, the durability of Redis is
# the same as "appendfsync none". In practical terms, this means that it is
# possible to lose up to 30 seconds of log in the worst scenario (with the
# default Linux settings).
#
# If you have latency problems turn this to "yes". Otherwise leave it as
# "no" that is the safest pick from the point of view of durability.
no-appendfsync-on-rewrite no
# Automatic rewrite of the append only file.
# Redis is able to automatically rewrite the log file implicitly calling
# BGREWRITEAOF when the AOF log size grows by the specified percentage.
#
# This is how it works: Redis remembers the size of the AOF file after the
# latest rewrite (if no rewrite has happened since the restart, the size of
# the AOF at startup is used).
#
# This base size is compared to the current size. If the current size is
# bigger than the specified percentage, the rewrite is triggered. Also
# you need to specify a minimal size for the AOF file to be rewritten, this
# is useful to avoid rewriting the AOF file even if the percentage increase
# is reached but it is still pretty small.
#
# Specify a percentage of zero in order to disable the automatic AOF
# rewrite feature.
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
################################ LUA SCRIPTING ###############################
# Max execution time of a Lua script in milliseconds.
#
# If the maximum execution time is reached Redis will log that a script is
# still in execution after the maximum allowed time and will start to
# reply to queries with an error.
#
# When a long running script exceed the maximum execution time only the
# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be
# used to stop a script that did not yet called write commands. The second
# is the only way to shut down the server in the case a write commands was
# already issue by the script but the user don't want to wait for the natural
# termination of the script.
#
# Set it to 0 or a negative value for unlimited execution without warnings.
lua-time-limit 5000
################################## SLOW LOG ################################### ################################## SLOW LOG ###################################
@ -561,156 +406,63 @@ slowlog-log-slower-than 10000
# You can reclaim memory used by the slow log with SLOWLOG RESET. # You can reclaim memory used by the slow log with SLOWLOG RESET.
slowlog-max-len 128 slowlog-max-len 128
############################# Event notification ############################## ################################ LUA SCRIPTING ###############################
# Redis can notify Pub/Sub clients about events happening in the key space. # Max execution time of a Lua script in milliseconds.
# This feature is documented at http://redis.io/topics/keyspace-events
# #
# For instance if keyspace events notification is enabled, and a client # If the maximum execution time is reached Redis will log that a script is
# performs a DEL operation on key "foo" stored in the Database 0, two # still in execution after the maximum allowed time and will start to
# messages will be published via Pub/Sub: # reply to queries with an error.
# #
# PUBLISH __keyspace@0__:foo del # When a long running script exceed the maximum execution time only the
# PUBLISH __keyevent@0__:del foo # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be
# used to stop a script that did not yet called write commands. The second
# is the only way to shut down the server in the case a write commands was
# already issue by the script but the user don't want to wait for the natural
# termination of the script.
# #
# It is possible to select the events that Redis will notify among a set # Set it to 0 or a negative value for unlimited execution without warnings.
# of classes. Every class is identified by a single character: lua-time-limit 5000
#
# K Keyspace events, published with __keyspace@<db>__ prefix.
# E Keyevent events, published with __keyevent@<db>__ prefix.
# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ...
# $ String commands
# l List commands
# s Set commands
# h Hash commands
# z Sorted set commands
# x Expired events (events generated every time a key expires)
# e Evicted events (events generated when a key is evicted for maxmemory)
# A Alias for g$lshzxe, so that the "AKE" string means all the events.
#
# The "notify-keyspace-events" takes as argument a string that is composed
# by zero or multiple characters. The empty string means that notifications
# are disabled at all.
#
# Example: to enable list and generic events, from the point of view of the
# event name, use:
#
# notify-keyspace-events Elg
#
# Example 2: to get the stream of the expired keys subscribing to channel
# name __keyevent@0__:expired use:
#
# notify-keyspace-events Ex
#
# By default all notifications are disabled because most users don't need
# this feature and the feature has some overhead. Note that if you don't
# specify at least one of K or E, no events will be delivered.
notify-keyspace-events ""
############################### ADVANCED CONFIG ############################### ############################### ADVANCED CONFIG ###############################
## Since some redis clients would check info command's output, this configuration
## would be set in 'misc' section of 'info's output
#additional-misc-info redis_version:2.8.9\nredis_trick:yes
# Hashes are encoded using a memory efficient data structure when they have a
# small number of entries, and the biggest entry does not exceed a given
# threshold. These thresholds can be configured using the following directives.
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
# Similarly to hashes, small lists are also encoded in a special way in order # HyperLogLog sparse representation bytes limit. The limit includes the
# to save a lot of space. The special representation is only used when # 16 bytes header. When an HyperLogLog using the sparse representation crosses
# you are under the following limits: # this limit, it is convereted into the dense representation.
list-max-ziplist-entries 512 #
list-max-ziplist-value 64 # A value greater than 16000 is totally useless, since at that point the
# dense representation is more memory efficient.
#
# The suggested value is ~ 3000 in order to have the benefits of
# the space efficient encoding without slowing down too much PFADD,
# which is O(N) with the sparse encoding. Thev value can be raised to
# ~ 10000 when CPU is not a concern, but space is, and the data set is
# composed of many HyperLogLogs with cardinality in the 0 - 15000 range.
hll-sparse-max-bytes 3000
# Sets have a special encoding in just one case: when a set is composed #trusted-ip 10.10.10.10
# of just strings that happens to be integers in radix 10 in the range #trusted-ip 10.10.10.*
# of 64 bit signed integers.
# The following configuration setting sets the limit in the size of the
# set in order to use this special memory saving encoding.
set-max-intset-entries 512
# Similarly to hashes and lists, sorted sets are also specially encoded in # By default Ardb would not compact whole db after loading a snapshot, which may happens
# order to save a lot of space. This encoding is only used when the length and # when slave syncing from master, processing 'import' command from client.
# elements of a sorted set are below the following limits: # This configuration only works with rocksdb engine.
zset-max-ziplist-entries 128 # If ardb dord not compact data after loading snapshot file, there would be poor read performance before rocksdb
zset-max-ziplist-value 64 # completes the next compaction task internally. While the compaction task would cost very long time for a huge data set.
compact-after-snapshot-load false
# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in # Ardb would store cursor in memory
# order to help rehashing the main Redis hash table (the one mapping top-level scan-redis-compatible yes
# keys to values). The hash table implementation Redis uses (see dict.c) scan-cursor-expire-after 60
# performs a lazy rehashing: the more operation you run into a hash table
# that is rehashing, the more rehashing "steps" are performed, so if the
# server is idle the rehashing is never complete and some more memory is used
# by the hash table.
#
# The default is to use this millisecond 10 times every second in order to
# active rehashing the main dictionaries, freeing memory when possible.
#
# If unsure:
# use "activerehashing no" if you have hard latency requirements and it is
# not a good thing in your environment that Redis can reply form time to time
# to queries with 2 milliseconds delay.
#
# use "activerehashing yes" if you don't have such hard requirements but
# want to free memory asap when possible.
activerehashing yes
# The client output buffer limits can be used to force disconnection of clients redis-compatible-mode yes
# that are not reading data from the server fast enough for some reason (a redis-compatible-version 2.8.0
# common reason is that a Pub/Sub client can't consume messages as fast as the
# publisher can produce them).
#
# The limit can be set differently for the three different classes of clients:
#
# normal -> normal clients
# slave -> slave clients and MONITOR clients
# pubsub -> clients subscribed to at least one pubsub channel or pattern
#
# The syntax of every client-output-buffer-limit directive is the following:
#
# client-output-buffer-limit <class> <hard limit> <soft limit> <soft seconds>
#
# A client is immediately disconnected once the hard limit is reached, or if
# the soft limit is reached and remains reached for the specified number of
# seconds (continuously).
# So for instance if the hard limit is 32 megabytes and the soft limit is
# 16 megabytes / 10 seconds, the client will get disconnected immediately
# if the size of the output buffers reach 32 megabytes, but will also get
# disconnected if the client reaches 16 megabytes and continuously overcomes
# the limit for 10 seconds.
#
# By default normal clients are not limited because they don't receive data
# without asking (in a push way), but just after a request, so only
# asynchronous clients may create a scenario where data is requested faster
# than it can read.
#
# Instead there is a default limit for pubsub and slave clients, since
# subscribers and slaves receive data in a push fashion.
#
# Both the hard or the soft limit can be disabled by setting them to zero.
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
# Redis calls an internal function to perform many background tasks, like statistics-log-period 600
# closing connections of clients in timeout, purging expired keys that are
# never requested, and so forth.
#
# Not all tasks are performed with the same frequency, but Redis checks for
# tasks to perform accordingly to the specified "hz" value.
#
# By default "hz" is set to 10. Raising the value will use more CPU when
# Redis is idle, but at the same time will make Redis more responsive when
# there are many keys expiring at the same time, and timeouts may be
# handled with more precision.
#
# The range is between 1 and 500, however a value over 100 is usually not
# a good idea. Most users should use the default of 10 and raise this up to
# 100 only in environments where very low latency is required.
hz 10
# When a child rewrites the AOF file, if the following option is enabled
# the file will be fsync-ed every 32 MB of data generated. This is useful
# in order to commit the file to the disk more incrementally and avoid
# big latency spikes.
aof-rewrite-incremental-fsync yes
# Range deletion min size trigger
range-delete-min-size 100

5
files/ApiKey Normal file
View file

@ -0,0 +1,5 @@
amazon
amazonaws
amzn
aws
googleapis

View file

@ -5,7 +5,7 @@ set -x
sudo apt-get update sudo apt-get update
sudo apt-get install python-pip python-virtualenv python-dev libfreetype6-dev \ sudo apt-get install python3-pip python-virtualenv python3-dev libfreetype6-dev \
screen g++ python-tk unzip libsnappy-dev cmake -y screen g++ python-tk unzip libsnappy-dev cmake -y
#optional tor install #optional tor install
@ -15,7 +15,7 @@ sudo apt-get install tor
sudo apt-get install libssl-dev libfreetype6-dev python-numpy -y sudo apt-get install libssl-dev libfreetype6-dev python-numpy -y
#pyMISP #pyMISP
sudo apt-get -y install python3-pip #sudo apt-get -y install python3-pip
# DNS deps # DNS deps
sudo apt-get install libadns1 libadns1-dev -y sudo apt-get install libadns1 libadns1-dev -y
@ -60,11 +60,9 @@ sudo ldconfig
popd popd
popd popd
# REDIS LEVEL DB # # ARDB #
test ! -d redis-leveldb/ && git clone https://github.com/KDr2/redis-leveldb.git test ! -d ardb/ && git clone https://github.com/yinqiwen/ardb.git
pushd redis-leveldb/ pushd ardb/
git submodule init
git submodule update
make make
popd popd
@ -73,18 +71,18 @@ if [ ! -f bin/packages/config.cfg ]; then
fi fi
pushd var/www/ pushd var/www/
./update_thirdparty.sh sudo ./update_thirdparty.sh
popd popd
if [ -z "$VIRTUAL_ENV" ]; then if [ -z "$VIRTUAL_ENV" ]; then
virtualenv AILENV virtualenv -p python3 AILENV
echo export AIL_HOME=$(pwd) >> ./AILENV/bin/activate echo export AIL_HOME=$(pwd) >> ./AILENV/bin/activate
echo export AIL_BIN=$(pwd)/bin/ >> ./AILENV/bin/activate echo export AIL_BIN=$(pwd)/bin/ >> ./AILENV/bin/activate
echo export AIL_FLASK=$(pwd)/var/www/ >> ./AILENV/bin/activate echo export AIL_FLASK=$(pwd)/var/www/ >> ./AILENV/bin/activate
echo export AIL_REDIS=$(pwd)/redis/src/ >> ./AILENV/bin/activate echo export AIL_REDIS=$(pwd)/redis/src/ >> ./AILENV/bin/activate
echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate echo export AIL_ARDB=$(pwd)/ardb/src/ >> ./AILENV/bin/activate
. ./AILENV/bin/activate . ./AILENV/bin/activate
@ -93,28 +91,29 @@ fi
year1=20`date +%y` year1=20`date +%y`
year2=20`date --date='-1 year' +%y` year2=20`date --date='-1 year' +%y`
mkdir -p $AIL_HOME/{PASTES,Blooms,dumps} mkdir -p $AIL_HOME/{PASTES,Blooms,dumps}
mkdir -p $AIL_HOME/LEVEL_DB_DATA/{$year1,$year2}
pip install -U pip pip3 install -U pip
pip install -U -r pip_packages_requirement.txt
pip3 install -U -r pip3_packages_requirement.txt pip3 install -U -r pip3_packages_requirement.txt
# Pyfaup # Pyfaup
pushd faup/src/lib/bindings/python/ pushd faup/src/lib/bindings/python/
python setup.py install python3 setup.py install
popd popd
# Py tlsh # Py tlsh
pushd tlsh/py_ext pushd tlsh/py_ext
python setup.py build #python setup.py build
python setup.py install #python setup.py install
sudo python3 setup.py build python3 setup.py build
sudo python3 setup.py install python3 setup.py install
# Download the necessary NLTK corpora and sentiment vader # Download the necessary NLTK corpora and sentiment vader
HOME=$(pwd) python -m textblob.download_corpora HOME=$(pwd) python3 -m textblob.download_corpora
python -m nltk.downloader vader_lexicon python3 -m nltk.downloader vader_lexicon
python -m nltk.downloader punkt python3 -m nltk.downloader punkt
# install nosetests
sudo pip install nose
#Create the file all_module and update the graph in doc #Create the file all_module and update the graph in doc
$AIL_HOME/doc/generate_modules_data_flow_graph.sh $AIL_HOME/doc/generate_modules_data_flow_graph.sh

View file

@ -1,13 +1,64 @@
pymisp pymisp
redis redis
filemagic #filemagic conflict with magic
crcmod crcmod
mmh3 mmh3
ssdeep ssdeep
nltk
textblob
pubsublogger pubsublogger
zmq zmq
langid langid
#Essential
redis
pyzmq
dnspython
logbook
pubsublogger
textblob
#Tokeniser
nltk
#Graph
numpy
matplotlib
networkx
terminaltables
colorama
asciimatics
# Hashlib
crcmod
mmh3
ssdeep
python-Levenshtein
#Others
python-magic
pybloomfiltermmap
psutil
phonenumbers
ipython
flask
texttable
#DomainClassifier
DomainClassifier
#Indexer requirements
whoosh
ipaddress
pycountry
# To fetch Onion urls
PySocks
#ASN lookup requirements
#https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/adns-python/adns-python-1.2.1.tar.gz
https://github.com/trolldbois/python3-adns/archive/master.zip
https://github.com/trolldbois/python-cymru-services/archive/master.zip
https://github.com/saffsd/langid.py/archive/master.zip

View file

@ -1,51 +0,0 @@
#Essential
redis
pyzmq
dnspython
logbook
pubsublogger
textblob
#Graph
numpy
matplotlib
networkx
terminaltables
colorama
asciimatics
#Tokeniser
nltk
# Hashlib
crcmod
mmh3
ssdeep
python-Levenshtein
#Others
python-magic
pybloomfiltermmap
psutil
phonenumbers
ipython
flask
texttable
#DomainClassifier
DomainClassifier
#Indexer requirements
whoosh
ipaddress
pycountry
# To fetch Onion urls
PySocks
#ASN lookup requirements
https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/adns-python/adns-python-1.2.1.tar.gz
https://github.com/trolldbois/python-cymru-services/archive/master.zip
https://github.com/saffsd/langid.py/archive/master.zip

9
python3_upgrade.sh Executable file
View file

@ -0,0 +1,9 @@
#!/bin/bash
sudo rm -rf AILENV
mkdir old
sudo mv indexdir old/old_indexdir_python2
sudo mv LEVEL_DB_DATA old/old_LEVEL_DB_DATA
sudo mv dumps old/old_dumps
./installing_deps.sh

Binary file not shown.

0
tests/__init__.py Normal file
View file

22
tests/testHelper.py Normal file
View file

@ -0,0 +1,22 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import unittest
import sys,os
sys.path.append(os.environ['AIL_BIN'])
from Helper import Process
class TestHelper(unittest.TestCase):
def setUp(self):
config_section = 'Keys'
def test_Process_Constructor_using_key_module(self):
conf_section = 'Keys'
process = Process(conf_section)
self.assertEqual(process.subscriber_name, 'Keys')

36
tests/testKeys.py Normal file
View file

@ -0,0 +1,36 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys,os
import unittest
import magic
sys.path.append(os.environ['AIL_BIN'])
from packages.Paste import Paste
import Keys as Keys
from Helper import Process
from pubsublogger import publisher
class TestKeysModule(unittest.TestCase):
def setUp(self):
self.paste = Paste('../samples/2018/01/01/keys_certificat_sample.gz')
# Section name in bin/packages/modules.cfg
self.config_section = 'Keys'
# Setup the I/O queues
p = Process(self.config_section)
def test_search_key(self):
with self.assertRaises(pubsublogger.exceptions.NoChannelError):
Keys.search_key(self.paste)
def test_search_key(self):
with self.assertRaises(NameError):
publisher.port = 6380
publisher.channel = 'Script'
Keys.search_key(self.paste)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''

View file

@ -1,8 +1,8 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import redis import redis
import ConfigParser import configparser
import json import json
import datetime import datetime
import time import time
@ -72,7 +72,7 @@ with open('templates/header_base.html', 'r') as f:
modified_header = complete_header modified_header = complete_header
#Add the header in the supplied order #Add the header in the supplied order
for module_name, txt in to_add_to_header_dico.items(): for module_name, txt in list(to_add_to_header_dico.items()):
to_replace = '<!--{}-->'.format(module_name) to_replace = '<!--{}-->'.format(module_name)
if to_replace in complete_header: if to_replace in complete_header:
modified_header = modified_header.replace(to_replace, txt) modified_header = modified_header.replace(to_replace, txt)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
"Hepler to create a new webpage associated with a module." "Hepler to create a new webpage associated with a module."
@ -37,7 +37,7 @@ def createModuleFolder(modulename):
def main(): def main():
rep1 = raw_input('New module name: ') rep1 = input('New module name: ')
createModuleFolder(rep1) createModuleFolder(rep1)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -1,10 +1,10 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
Flask global variables shared accross modules Flask global variables shared accross modules
''' '''
import ConfigParser import configparser
import redis import redis
import os import os
@ -18,7 +18,7 @@ if not os.path.exists(configfile):
Did you set environment variables? \ Did you set environment variables? \
Or activate the virtualenv.') Or activate the virtualenv.')
cfg = ConfigParser.ConfigParser() cfg = configparser.ConfigParser()
cfg.read(configfile) cfg.read(configfile)
@ -26,41 +26,47 @@ cfg.read(configfile)
r_serv = redis.StrictRedis( r_serv = redis.StrictRedis(
host=cfg.get("Redis_Queues", "host"), host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"), port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db")) db=cfg.getint("Redis_Queues", "db"),
decode_responses=True)
r_serv_log = redis.StrictRedis( r_serv_log = redis.StrictRedis(
host=cfg.get("Redis_Log", "host"), host=cfg.get("Redis_Log", "host"),
port=cfg.getint("Redis_Log", "port"), port=cfg.getint("Redis_Log", "port"),
db=cfg.getint("Redis_Log", "db")) db=cfg.getint("Redis_Log", "db"),
decode_responses=True)
r_serv_charts = redis.StrictRedis( r_serv_charts = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_Trending", "host"), host=cfg.get("ARDB_Trending", "host"),
port=cfg.getint("Redis_Level_DB_Trending", "port"), port=cfg.getint("ARDB_Trending", "port"),
db=cfg.getint("Redis_Level_DB_Trending", "db")) db=cfg.getint("ARDB_Trending", "db"),
decode_responses=True)
r_serv_sentiment = redis.StrictRedis( r_serv_sentiment = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_Sentiment", "host"), host=cfg.get("ARDB_Sentiment", "host"),
port=cfg.getint("Redis_Level_DB_Sentiment", "port"), port=cfg.getint("ARDB_Sentiment", "port"),
db=cfg.getint("Redis_Level_DB_Sentiment", "db")) db=cfg.getint("ARDB_Sentiment", "db"),
decode_responses=True)
r_serv_term = redis.StrictRedis( r_serv_term = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_TermFreq", "host"), host=cfg.get("ARDB_TermFreq", "host"),
port=cfg.getint("Redis_Level_DB_TermFreq", "port"), port=cfg.getint("ARDB_TermFreq", "port"),
db=cfg.getint("Redis_Level_DB_TermFreq", "db")) db=cfg.getint("ARDB_TermFreq", "db"),
decode_responses=True)
r_serv_cred = redis.StrictRedis( r_serv_cred = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_TermCred", "host"), host=cfg.get("ARDB_TermCred", "host"),
port=cfg.getint("Redis_Level_DB_TermCred", "port"), port=cfg.getint("ARDB_TermCred", "port"),
db=cfg.getint("Redis_Level_DB_TermCred", "db")) db=cfg.getint("ARDB_TermCred", "db"),
decode_responses=True)
r_serv_pasteName = redis.StrictRedis( r_serv_pasteName = redis.StrictRedis(
host=cfg.get("Redis_Paste_Name", "host"), host=cfg.get("Redis_Paste_Name", "host"),
port=cfg.getint("Redis_Paste_Name", "port"), port=cfg.getint("Redis_Paste_Name", "port"),
db=cfg.getint("Redis_Paste_Name", "db")) db=cfg.getint("Redis_Paste_Name", "db"),
decode_responses=True)
# VARIABLES # # VARIABLES #
max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip
max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal
tlsh_to_percent = 1000.0 #Use to display the estimated percentage instead of a raw value
DiffMaxLineLength = int(cfg.get("Flask", "DiffMaxLineLength"))#Use to display the estimated percentage instead of a raw value DiffMaxLineLength = int(cfg.get("Flask", "DiffMaxLineLength"))#Use to display the estimated percentage instead of a raw value

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -23,21 +23,22 @@ max_preview_modal = Flask_config.max_preview_modal
#init all lvlDB servers #init all lvlDB servers
curYear = datetime.now().year curYear = datetime.now().year
int_year = int(curYear)
r_serv_db = {} r_serv_db = {}
# port generated automatically depending on available levelDB date # port generated automatically depending on available levelDB date
yearList = [] yearList = []
lvdbdir= os.path.join(os.environ['AIL_HOME'], "LEVEL_DB_DATA/")
for year in os.listdir(lvdbdir):
try:
intYear = int(year)
except:
continue
yearList.append([year, intYear, int(curYear) == intYear]) for x in range(0, (int_year - 2018) + 1):
intYear = int_year - x
yearList.append([str(intYear), intYear, int(curYear) == intYear])
r_serv_db[intYear] = redis.StrictRedis( r_serv_db[intYear] = redis.StrictRedis(
host=cfg.get("Redis_Level_DB", "host"), host=cfg.get("ARDB_DB", "host"),
port=intYear, port=cfg.getint("ARDB_DB", "port"),
db=cfg.getint("Redis_Level_DB", "db")) db=intYear,
decode_responses=True)
yearList.sort(reverse=True) yearList.sort(reverse=True)
browsepastes = Blueprint('browsepastes', __name__, template_folder='templates') browsepastes = Blueprint('browsepastes', __name__, template_folder='templates')
@ -48,16 +49,18 @@ def getPastebyType(server, module_name):
all_path = [] all_path = []
for path in server.smembers('WARNING_'+module_name): for path in server.smembers('WARNING_'+module_name):
all_path.append(path) all_path.append(path)
return all_path return all_path
def event_stream_getImportantPasteByModule(module_name, year): def event_stream_getImportantPasteByModule(module_name, year):
index = 0 index = 0
all_pastes_list = getPastebyType(r_serv_db[year], module_name) all_pastes_list = getPastebyType(r_serv_db[year], module_name)
for path in all_pastes_list: for path in all_pastes_list:
index += 1 index += 1
paste = Paste.Paste(path) paste = Paste.Paste(path)
content = paste.get_p_content().decode('utf8', 'ignore') content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
curr_date = str(paste._get_p_date()) curr_date = str(paste._get_p_date())
curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:]
@ -83,7 +86,13 @@ def browseImportantPaste():
@browsepastes.route("/importantPasteByModule/", methods=['GET']) @browsepastes.route("/importantPasteByModule/", methods=['GET'])
def importantPasteByModule(): def importantPasteByModule():
module_name = request.args.get('moduleName') module_name = request.args.get('moduleName')
# # TODO: VERIFY YEAR VALIDITY
try:
currentSelectYear = int(request.args.get('year')) currentSelectYear = int(request.args.get('year'))
except:
print('Invalid year input')
currentSelectYear = int(datetime.now().year)
all_content = [] all_content = []
paste_date = [] paste_date = []
@ -94,7 +103,7 @@ def importantPasteByModule():
for path in allPastes[0:10]: for path in allPastes[0:10]:
all_path.append(path) all_path.append(path)
paste = Paste.Paste(path) paste = Paste.Paste(path)
content = paste.get_p_content().decode('utf8', 'ignore') content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
all_content.append(content[0:content_range].replace("\"", "\'").replace("\r", " ").replace("\n", " ")) all_content.append(content[0:content_range].replace("\"", "\'").replace("\r", " ").replace("\n", " "))
curr_date = str(paste._get_p_date()) curr_date = str(paste._get_p_date())

View file

@ -87,9 +87,12 @@
<li name='nav-pan'><a data-toggle="tab" href="#sqlinjection-tab" data-attribute-name="sqlinjection" data-panel="sqlinjection-panel">SQL injections</a></li> <li name='nav-pan'><a data-toggle="tab" href="#sqlinjection-tab" data-attribute-name="sqlinjection" data-panel="sqlinjection-panel">SQL injections</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#cve-tab" data-attribute-name="cve" data-panel="cve-panel">CVEs</a></li> <li name='nav-pan'><a data-toggle="tab" href="#cve-tab" data-attribute-name="cve" data-panel="cve-panel">CVEs</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#keys-tab" data-attribute-name="keys" data-panel="keys-panel">Keys</a></li> <li name='nav-pan'><a data-toggle="tab" href="#keys-tab" data-attribute-name="keys" data-panel="keys-panel">Keys</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#apikey-tab" data-attribute-name="apikey" data-panel="apikey-panel">API Keys</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#mail-tab" data-attribute-name="mail" data-panel="mail-panel">Mails</a></li> <li name='nav-pan'><a data-toggle="tab" href="#mail-tab" data-attribute-name="mail" data-panel="mail-panel">Mails</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#phone-tab" data-attribute-name="phone" data-panel="phone-panel">Phones</a></li> <li name='nav-pan'><a data-toggle="tab" href="#phone-tab" data-attribute-name="phone" data-panel="phone-panel">Phones</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#onion-tab" data-attribute-name="onion" data-panel="onion-panel">Onions</a></li> <li name='nav-pan'><a data-toggle="tab" href="#onion-tab" data-attribute-name="onion" data-panel="onion-panel">Onions</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#bitcoin-tab" data-attribute-name="bitcoin" data-panel="bitcoin-panel">Bitcoin</a></li>
<li name='nav-pan'><a data-toggle="tab" href="#base64-tab" data-attribute-name="base64" data-panel="base64-panel">Base64</a></li>
</ul> </ul>
</br> </br>
@ -110,6 +113,9 @@
<div class="col-lg-12 tab-pane fade" id="keys-tab"> <div class="col-lg-12 tab-pane fade" id="keys-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;"> <img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div> </div>
<div class="col-lg-12 tab-pane fade" id="apikey-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div>
<div class="col-lg-12 tab-pane fade" id="mail-tab"> <div class="col-lg-12 tab-pane fade" id="mail-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;"> <img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div> </div>
@ -119,6 +125,12 @@
<div class="col-lg-12 tab-pane fade" id="onion-tab"> <div class="col-lg-12 tab-pane fade" id="onion-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;"> <img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div> </div>
<div class="col-lg-12 tab-pane fade" id="bitcoin-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div>
<div class="col-lg-12 tab-pane fade" id="base64-tab">
<img id="loading-gif-modal" src="{{url_for('static', filename='image/loading.gif') }}" style="margin: 4px;">
</div>
</div> <!-- tab-content --> </div> <!-- tab-content -->
<!-- /.row --> <!-- /.row -->
</div> </div>

View file

@ -245,4 +245,3 @@ $(document).ready(function(){
} ); } );
</script> </script>

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -26,20 +26,30 @@ def event_stream():
pubsub = r_serv_log.pubsub() pubsub = r_serv_log.pubsub()
pubsub.psubscribe("Script" + '.*') pubsub.psubscribe("Script" + '.*')
for msg in pubsub.listen(): for msg in pubsub.listen():
level = msg['channel'].split('.')[1]
type = msg['type']
pattern = msg['pattern']
channel = msg['channel']
data = msg['data']
msg = {'channel': channel, 'type': type, 'pattern': pattern, 'data': data}
level = (msg['channel']).split('.')[1]
if msg['type'] == 'pmessage' and level != "DEBUG": if msg['type'] == 'pmessage' and level != "DEBUG":
yield 'data: %s\n\n' % json.dumps(msg) yield 'data: %s\n\n' % json.dumps(msg)
def get_queues(r): def get_queues(r):
# We may want to put the llen in a pipeline to do only one query. # We may want to put the llen in a pipeline to do only one query.
newData = [] newData = []
for queue, card in r.hgetall("queues").iteritems(): for queue, card in r.hgetall("queues").items():
key = "MODULE_" + queue + "_" key = "MODULE_" + queue + "_"
keySet = "MODULE_TYPE_" + queue keySet = "MODULE_TYPE_" + queue
for moduleNum in r.smembers(keySet): for moduleNum in r.smembers(keySet):
value = r.get(key + str(moduleNum)) value = r.get(key + str(moduleNum))
if value is not None: if value is not None:
timestamp, path = value.split(", ") timestamp, path = value.split(", ")
if timestamp is not None: if timestamp is not None:

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -108,7 +108,7 @@ def search():
for path in r_serv_pasteName.smembers(q[0]): for path in r_serv_pasteName.smembers(q[0]):
r.append(path) r.append(path)
paste = Paste.Paste(path) paste = Paste.Paste(path)
content = paste.get_p_content().decode('utf8', 'ignore') content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
c.append(content[0:content_range]) c.append(content[0:content_range])
curr_date = str(paste._get_p_date()) curr_date = str(paste._get_p_date())
@ -126,7 +126,7 @@ def search():
for x in results: for x in results:
r.append(x.items()[0][1]) r.append(x.items()[0][1])
paste = Paste.Paste(x.items()[0][1]) paste = Paste.Paste(x.items()[0][1])
content = paste.get_p_content().decode('utf8', 'ignore') content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
c.append(content[0:content_range]) c.append(content[0:content_range])
curr_date = str(paste._get_p_date()) curr_date = str(paste._get_p_date())
@ -175,7 +175,7 @@ def get_more_search_result():
for x in results: for x in results:
path_array.append(x.items()[0][1]) path_array.append(x.items()[0][1])
paste = Paste.Paste(x.items()[0][1]) paste = Paste.Paste(x.items()[0][1])
content = paste.get_p_content().decode('utf8', 'ignore') content = paste.get_p_content()
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
preview_array.append(content[0:content_range]) preview_array.append(content[0:content_range])
curr_date = str(paste._get_p_date()) curr_date = str(paste._get_p_date())

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -57,15 +57,19 @@ def sentiment_analysis_getplotdata():
if getAllProviders == 'True': if getAllProviders == 'True':
if allProvider == "True": if allProvider == "True":
range_providers = r_serv_charts.smembers('all_provider_set') range_providers = r_serv_charts.smembers('all_provider_set')
return jsonify(list(range_providers)) return jsonify(list(range_providers))
else: else:
range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(0)[0], '+inf', '-inf', start=0, num=8) range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(0)[0], '+inf', '-inf', start=0, num=8)
# if empty, get yesterday top providers # if empty, get yesterday top providers
range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(1)[1], '+inf', '-inf', start=0, num=8) if range_providers == [] else range_providers range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(1)[1], '+inf', '-inf', start=0, num=8) if range_providers == [] else range_providers
# if still empty, takes from all providers # if still empty, takes from all providers
if range_providers == []: if range_providers == []:
print 'today provider empty' print('today provider empty')
range_providers = r_serv_charts.smembers('all_provider_set') range_providers = r_serv_charts.smembers('all_provider_set')
return jsonify(list(range_providers)) return jsonify(list(range_providers))
elif provider is not None: elif provider is not None:
@ -78,7 +82,7 @@ def sentiment_analysis_getplotdata():
list_value = [] list_value = []
for cur_id in r_serv_sentiment.smembers(cur_set_name): for cur_id in r_serv_sentiment.smembers(cur_set_name):
cur_value = r_serv_sentiment.get(cur_id) cur_value = (r_serv_sentiment.get(cur_id))
list_value.append(cur_value) list_value.append(cur_value)
list_date[cur_timestamp] = list_value list_date[cur_timestamp] = list_value
to_return[provider] = list_date to_return[provider] = list_date
@ -130,7 +134,7 @@ def sentiment_analysis_plot_tool_getdata():
list_value = [] list_value = []
for cur_id in r_serv_sentiment.smembers(cur_set_name): for cur_id in r_serv_sentiment.smembers(cur_set_name):
cur_value = r_serv_sentiment.get(cur_id) cur_value = (r_serv_sentiment.get(cur_id))
list_value.append(cur_value) list_value.append(cur_value)
list_date[cur_timestamp] = list_value list_date[cur_timestamp] = list_value
to_return[cur_provider] = list_date to_return[cur_provider] = list_date

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -20,7 +20,6 @@ cfg = Flask_config.cfg
r_serv_pasteName = Flask_config.r_serv_pasteName r_serv_pasteName = Flask_config.r_serv_pasteName
max_preview_char = Flask_config.max_preview_char max_preview_char = Flask_config.max_preview_char
max_preview_modal = Flask_config.max_preview_modal max_preview_modal = Flask_config.max_preview_modal
tlsh_to_percent = Flask_config.tlsh_to_percent
DiffMaxLineLength = Flask_config.DiffMaxLineLength DiffMaxLineLength = Flask_config.DiffMaxLineLength
showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templates') showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templates')
@ -38,7 +37,7 @@ def showpaste(content_range):
p_size = paste.p_size p_size = paste.p_size
p_mime = paste.p_mime p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info() p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content().decode('utf-8', 'ignore') p_content = paste.get_p_content()
p_duplicate_full_list = json.loads(paste._get_p_duplicate()) p_duplicate_full_list = json.loads(paste._get_p_duplicate())
p_duplicate_list = [] p_duplicate_list = []
p_simil_list = [] p_simil_list = []
@ -48,11 +47,13 @@ def showpaste(content_range):
for dup_list in p_duplicate_full_list: for dup_list in p_duplicate_full_list:
if dup_list[0] == "tlsh": if dup_list[0] == "tlsh":
dup_list[2] = int(((tlsh_to_percent - float(dup_list[2])) / tlsh_to_percent)*100) dup_list[2] = 100 - int(dup_list[2])
else: else:
print('dup_list')
print(dup_list)
dup_list[2] = int(dup_list[2]) dup_list[2] = int(dup_list[2])
p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True) #p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
# Combine multiple duplicate paste name and format for display # Combine multiple duplicate paste name and format for display
new_dup_list = [] new_dup_list = []
@ -64,12 +65,13 @@ def showpaste(content_range):
hash_types = [] hash_types = []
comp_vals = [] comp_vals = []
for i in indices: for i in indices:
hash_types.append(p_duplicate_full_list[i][0].encode('utf8')) hash_types.append(p_duplicate_full_list[i][0])
comp_vals.append(p_duplicate_full_list[i][2]) comp_vals.append(p_duplicate_full_list[i][2])
dup_list_removed.append(i) dup_list_removed.append(i)
hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types) hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals) comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
if len(p_duplicate_full_list[dup_list_index]) > 3: if len(p_duplicate_full_list[dup_list_index]) > 3:
try: try:
date_paste = str(int(p_duplicate_full_list[dup_list_index][3])) date_paste = str(int(p_duplicate_full_list[dup_list_index][3]))
@ -91,7 +93,6 @@ def showpaste(content_range):
if content_range != 0: if content_range != 0:
p_content = p_content[0:content_range] p_content = p_content[0:content_range]
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list) return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list)
# ============ ROUTES ============ # ============ ROUTES ============
@ -100,6 +101,12 @@ def showpaste(content_range):
def showsavedpaste(): def showsavedpaste():
return showpaste(0) return showpaste(0)
@showsavedpastes.route("/showsavedrawpaste/") #shows raw
def showsavedrawpaste():
requested_path = request.args.get('paste', '')
paste = Paste.Paste(requested_path)
content = paste.get_p_content()
return content, 200, {'Content-Type': 'text/plain'}
@showsavedpastes.route("/showpreviewpaste/") @showsavedpastes.route("/showpreviewpaste/")
def showpreviewpaste(): def showpreviewpaste():
@ -111,7 +118,7 @@ def showpreviewpaste():
def getmoredata(): def getmoredata():
requested_path = request.args.get('paste', '') requested_path = request.args.get('paste', '')
paste = Paste.Paste(requested_path) paste = Paste.Paste(requested_path)
p_content = paste.get_p_content().decode('utf-8', 'ignore') p_content = paste.get_p_content()
to_return = p_content[max_preview_modal-1:] to_return = p_content[max_preview_modal-1:]
return to_return return to_return
@ -126,8 +133,8 @@ def showDiff():
if maxLengthLine1 > DiffMaxLineLength or maxLengthLine2 > DiffMaxLineLength: if maxLengthLine1 > DiffMaxLineLength or maxLengthLine2 > DiffMaxLineLength:
return "Can't make the difference as the lines are too long." return "Can't make the difference as the lines are too long."
htmlD = difflib.HtmlDiff() htmlD = difflib.HtmlDiff()
lines1 = p1.get_p_content().decode('utf8', 'ignore').splitlines() lines1 = p1.get_p_content().splitlines()
lines2 = p2.get_p_content().decode('utf8', 'ignore').splitlines() lines2 = p2.get_p_content().splitlines()
the_html = htmlD.make_file(lines1, lines2) the_html = htmlD.make_file(lines1, lines2)
return the_html return the_html

View file

@ -69,18 +69,18 @@
<tbody> <tbody>
{% for dup_path in duplicate_list %} {% for dup_path in duplicate_list %}
<tr> <tr>
<td>{{ hashtype_list[i] }}</td> <td>{{ hashtype_list[loop.index - 1] }}</td>
<td>Similarity: {{ simil_list[i] }}%</td> <td>Similarity: {{ simil_list[loop.index - 1] }}%</td>
<td>{{ date_list[i] }}</td> <td>{{ date_list[loop.index - 1] }}</td>
<td><a target="_blank" href="{{ url_for('showsavedpastes.showsavedpaste') }}?paste={{ dup_path }}" id='dup_path'>{{ dup_path }}</a></td> <td><a target="_blank" href="{{ url_for('showsavedpastes.showsavedpaste') }}?paste={{ dup_path }}" id='dup_path'>{{ dup_path }}</a></td>
<td><a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{ request.args.get('paste') }}&s2={{ dup_path }}" class="fa fa-columns" title="Show differences"></a></td> <td><a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{ request.args.get('paste') }}&s2={{ dup_path }}" class="fa fa-columns" title="Show differences"></a></td>
</tr> </tr>
{% set i = i + 1 %}
{% endfor %} {% endfor %}
</tbody> </tbody>
</table> </table>
{% endif %} {% endif %}
<h3> Content: </h3> <h3> Content: </h3>
<a href="{{ url_for('showsavedpastes.showsavedrawpaste') }}?paste={{ request.args.get('paste') }}" id='raw_paste' > [Raw content] </a>
<p data-initsize="{{ initsize }}"> <pre id="paste-holder">{{ content }}</pre></p> <p data-initsize="{{ initsize }}"> <pre id="paste-holder">{{ content }}</pre></p>
</div> </div>
</div> </div>

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -158,7 +158,7 @@ def terms_management():
trackReg_list_num_of_paste = [] trackReg_list_num_of_paste = []
for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name): for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name):
notificationEMailTermMapping[tracked_regex] = "\n".join(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)) notificationEMailTermMapping[tracked_regex] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)) )
if tracked_regex not in notificationEnabledDict: if tracked_regex not in notificationEnabledDict:
notificationEnabledDict[tracked_regex] = False notificationEnabledDict[tracked_regex] = False
@ -182,8 +182,9 @@ def terms_management():
trackSet_list_values = [] trackSet_list_values = []
trackSet_list_num_of_paste = [] trackSet_list_num_of_paste = []
for tracked_set in r_serv_term.smembers(TrackedSetSet_Name): for tracked_set in r_serv_term.smembers(TrackedSetSet_Name):
tracked_set = tracked_set
notificationEMailTermMapping[tracked_set] = "\n".join(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set)) notificationEMailTermMapping[tracked_set] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_set)) )
if tracked_set not in notificationEnabledDict: if tracked_set not in notificationEnabledDict:
@ -209,7 +210,7 @@ def terms_management():
track_list_num_of_paste = [] track_list_num_of_paste = []
for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name):
notificationEMailTermMapping[tracked_term] = "\n".join(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term)) notificationEMailTermMapping[tracked_term] = "\n".join( r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term))
if tracked_term not in notificationEnabledDict: if tracked_term not in notificationEnabledDict:
notificationEnabledDict[tracked_term] = False notificationEnabledDict[tracked_term] = False
@ -220,7 +221,9 @@ def terms_management():
term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term)
set_paste_name = "tracked_" + tracked_term set_paste_name = "tracked_" + tracked_term
track_list_num_of_paste.append(r_serv_term.scard(set_paste_name))
track_list_num_of_paste.append( r_serv_term.scard(set_paste_name) )
term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded"
value_range.append(term_date) value_range.append(term_date)
track_list_values.append(value_range) track_list_values.append(value_range)
@ -268,7 +271,7 @@ def terms_management_query_paste():
p_size = paste.p_size p_size = paste.p_size
p_mime = paste.p_mime p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info() p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content().decode('utf-8', 'ignore') p_content = paste.get_p_content()
if p_content != 0: if p_content != 0:
p_content = p_content[0:400] p_content = p_content[0:400]
paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})
@ -310,7 +313,7 @@ def terms_management_action():
term = request.args.get('term') term = request.args.get('term')
notificationEmailsParam = request.args.get('emailAddresses') notificationEmailsParam = request.args.get('emailAddresses')
if action is None or term is None: if action is None or term is None or notificationEmailsParam is None:
return "None" return "None"
else: else:
if section == "followTerm": if section == "followTerm":
@ -386,7 +389,6 @@ def terms_management_action():
r_serv_term.hdel(TrackedRegexDate_Name, term) r_serv_term.hdel(TrackedRegexDate_Name, term)
elif term.startswith('\\') and term.endswith('\\'): elif term.startswith('\\') and term.endswith('\\'):
r_serv_term.srem(TrackedSetSet_Name, term) r_serv_term.srem(TrackedSetSet_Name, term)
print(term)
r_serv_term.hdel(TrackedSetDate_Name, term) r_serv_term.hdel(TrackedSetDate_Name, term)
else: else:
r_serv_term.srem(TrackedTermsSet_Name, term.lower()) r_serv_term.srem(TrackedTermsSet_Name, term.lower())
@ -524,7 +526,7 @@ def credentials_management_query_paste():
p_size = paste.p_size p_size = paste.p_size
p_mime = paste.p_mime p_mime = paste.p_mime
p_lineinfo = paste.get_lines_info() p_lineinfo = paste.get_lines_info()
p_content = paste.get_p_content().decode('utf-8', 'ignore') p_content = paste.get_p_content()
if p_content != 0: if p_content != 0:
p_content = p_content[0:400] p_content = p_content[0:400]
paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})
@ -534,7 +536,7 @@ def credentials_management_query_paste():
@terms.route("/credentials_management_action/", methods=['GET']) @terms.route("/credentials_management_action/", methods=['GET'])
def cred_management_action(): def cred_management_action():
supplied = request.args.get('term').encode('utf-8') supplied = request.args.get('term')
action = request.args.get('action') action = request.args.get('action')
section = request.args.get('section') section = request.args.get('section')
extensive = request.args.get('extensive') extensive = request.args.get('extensive')
@ -565,7 +567,7 @@ def cred_management_action():
iter_num += 1 iter_num += 1
if poss in tempUsername: if poss in tempUsername:
num = r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET, tempUsername) num = (r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET, tempUsername))
if num is not None: if num is not None:
uniq_num_set.add(num) uniq_num_set.add(num)
for num in r_serv_cred.smembers(tempUsername): for num in r_serv_cred.smembers(tempUsername):
@ -574,7 +576,7 @@ def cred_management_action():
data = {'usr': [], 'path': [], 'numPaste': [], 'simil': []} data = {'usr': [], 'path': [], 'numPaste': [], 'simil': []}
for Unum in uniq_num_set: for Unum in uniq_num_set:
levenRatio = 2.0 levenRatio = 2.0
username = r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET_REV, Unum) username = (r_serv_cred.hget(REDIS_KEY_ALL_CRED_SET_REV, Unum))
# Calculate Levenshtein distance, ignore negative ratio # Calculate Levenshtein distance, ignore negative ratio
supp_splitted = supplied.split() supp_splitted = supplied.split()
@ -585,7 +587,10 @@ def cred_management_action():
levenRatioStr = "{:.1%}".format(levenRatio) levenRatioStr = "{:.1%}".format(levenRatio)
data['usr'].append(username) data['usr'].append(username)
allPathNum = list(r_serv_cred.smembers(REDIS_KEY_MAP_CRED_TO_PATH+'_'+Unum)) allPathNum = list(r_serv_cred.smembers(REDIS_KEY_MAP_CRED_TO_PATH+'_'+Unum))
data['path'].append(allPathNum) data['path'].append(allPathNum)
data['numPaste'].append(len(allPathNum)) data['numPaste'].append(len(allPathNum))
data['simil'].append(levenRatioStr) data['simil'].append(levenRatioStr)

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -28,6 +28,7 @@ def get_date_range(num_day):
for i in range(0, num_day+1): for i in range(0, num_day+1):
date_list.append(date.substract_day(i)) date_list.append(date.substract_day(i))
return date_list return date_list
@ -46,6 +47,7 @@ def progressionCharts():
date_range = get_date_range(num_day) date_range = get_date_range(num_day)
# Retreive all data from the last num_day # Retreive all data from the last num_day
for date in date_range: for date in date_range:
curr_value = r_serv_charts.hget(attribute_name, date) curr_value = r_serv_charts.hget(attribute_name, date)
bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)])
bar_values.insert(0, attribute_name) bar_values.insert(0, attribute_name)
@ -54,6 +56,7 @@ def progressionCharts():
else: else:
redis_progression_name = "z_top_progression_" + trending_name redis_progression_name = "z_top_progression_" + trending_name
keyw_value = r_serv_charts.zrevrangebyscore(redis_progression_name, '+inf', '-inf', withscores=True, start=0, num=10) keyw_value = r_serv_charts.zrevrangebyscore(redis_progression_name, '+inf', '-inf', withscores=True, start=0, num=10)
return jsonify(keyw_value) return jsonify(keyw_value)
@trendings.route("/wordstrending/") @trendings.route("/wordstrending/")

View file

@ -1,4 +1,4 @@
#!/usr/bin/env python2 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
@ -28,6 +28,7 @@ def get_top_relevant_data(server, module_name):
for date in get_date_range(15): for date in get_date_range(15):
redis_progression_name_set = 'top_'+ module_name +'_set_' + date redis_progression_name_set = 'top_'+ module_name +'_set_' + date
member_set = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True) member_set = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True)
if len(member_set) == 0: #No data for this date if len(member_set) == 0: #No data for this date
days += 1 days += 1
else: else:
@ -85,9 +86,17 @@ def providersChart():
date_range = get_date_range(num_day) date_range = get_date_range(num_day)
# Retreive all data from the last num_day # Retreive all data from the last num_day
for date in date_range: for date in date_range:
curr_value_size = r_serv_charts.hget(keyword_name+'_'+'size', date) curr_value_size = ( r_serv_charts.hget(keyword_name+'_'+'size', date) )
if curr_value_size is not None:
curr_value_size = curr_value_size
curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date) curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date)
curr_value_size_avg = r_serv_charts.hget(keyword_name+'_'+'avg', date) curr_value_size_avg = r_serv_charts.hget(keyword_name+'_'+'avg', date)
if curr_value_size_avg is not None:
curr_value_size_avg = curr_value_size_avg
if module_name == "size": if module_name == "size":
curr_value = float(curr_value_size_avg if curr_value_size_avg is not None else 0) curr_value = float(curr_value_size_avg if curr_value_size_avg is not None else 0)
else: else:
@ -103,6 +112,7 @@ def providersChart():
redis_provider_name_set = redis_provider_name_set + get_date_range(0)[0] redis_provider_name_set = redis_provider_name_set + get_date_range(0)[0]
member_set = r_serv_charts.zrevrangebyscore(redis_provider_name_set, '+inf', '-inf', withscores=True, start=0, num=8) member_set = r_serv_charts.zrevrangebyscore(redis_provider_name_set, '+inf', '-inf', withscores=True, start=0, num=8)
# Member set is a list of (value, score) pairs # Member set is a list of (value, score) pairs
if len(member_set) == 0: if len(member_set) == 0:
member_set.append(("No relevant data", float(100))) member_set.append(("No relevant data", float(100)))