diff --git a/.travis.yml b/.travis.yml index 554d0967..41117210 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: python python: - - "2.7" + - "3.5" sudo: required @@ -16,6 +16,7 @@ env: install: - ./installing_deps.sh + - pip install coveralls codecov nose script: - pushd bin @@ -23,13 +24,11 @@ script: - ./launch_lvldb.sh - ./launch_logs.sh - ./launch_queues.sh - - ./launch_scripts.sh - - sleep 120 - - ./Shutdown.py - popd - - find logs/* -exec cat {} \; + - cd tests + - nosetests --with-coverage --cover-package=../bin -d -notifications: - email: - on_success: change - on_failure: change + +after_success: + - codecov + - coveralls diff --git a/README.md b/README.md index 9e9ccd2a..b977c185 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,10 @@ Features * Terms, Set of terms and Regex tracking and occurrence * Many more modules for extracting phone numbers, credentials and others * Alerting to [MISP](https://github.com/MISP/MISP) to share found leaks within a threat intelligence platform using [MISP standard](https://www.misp-project.org/objects.html#_ail_leak) +* Detect and decode Base64 and store files +* Detect Amazon AWS and Google API keys +* Detect Bitcoin address and Bitcoin private keys +* Detect private keys and certificate Installation ------------ @@ -53,6 +57,11 @@ linux based distributions, you can replace it with [installing_deps_archlinux.sh There is also a [Travis file](.travis.yml) used for automating the installation that can be used to build and install AIL on other systems. +Python 3 Upgrade +------------ + +To upgrade from an existing AIL installation, you have to launch [python3_upgrade.sh](./python3_upgrade.sh), this script will delete and create a new virtual environment. The script **will upgrade the packages but won't keep your previous data** (neverthless the data is copied into a directory called `old`). If you install from scratch, you don't require to launch the [python3_upgrade.sh](./python3_upgrade.sh). + Docker Quick Start (Ubuntu 16.04 LTS) ------------ diff --git a/bin/ApiKey.py b/bin/ApiKey.py new file mode 100755 index 00000000..8ce7e2b4 --- /dev/null +++ b/bin/ApiKey.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +""" +The ApiKey Module +====================== + +This module is consuming the Redis-list created by the Categ module. + +It apply API_key regexes on paste content and warn if above a threshold. + +""" + +import redis +import pprint +import time +import re + +from packages import Paste +from packages import lib_refine +from pubsublogger import publisher + +from Helper import Process + + +def search_api_key(message): + filename, score = message.split() + paste = Paste.Paste(filename) + content = paste.get_p_content() + + aws_access_key = regex_aws_access_key.findall(content) + aws_secret_key = regex_aws_secret_key.findall(content) + google_api_key = regex_google_api_key.findall(content) + + if(len(aws_access_key) > 0 or len(aws_secret_key) > 0 or len(google_api_key) > 0): + + to_print = 'ApiKey;{};{};{};'.format( + paste.p_source, paste.p_date, paste.p_name) + if(len(google_api_key) > 0): + print('found google api key') + print(to_print) + publisher.warning('{}Checked {} found Google API Key;{}'.format( + to_print, len(google_api_key), paste.p_path)) + + if(len(aws_access_key) > 0 or len(aws_secret_key) > 0): + print('found AWS key') + print(to_print) + total = len(aws_access_key) + len(aws_secret_key) + publisher.warning('{}Checked {} found AWS Key;{}'.format( + to_print, total, paste.p_path)) + + + msg = 'apikey;{}'.format(filename) + p.populate_set_out(msg, 'alertHandler') + #Send to duplicate + p.populate_set_out(filename, 'Duplicate') + +if __name__ == "__main__": + publisher.port = 6380 + publisher.channel = "Script" + + config_section = 'ApiKey' + + p = Process(config_section) + + publisher.info("ApiKey started") + + message = p.get_from_set() + + # TODO improve REGEX + regex_aws_access_key = re.compile(r'(? 0): + + for b64 in base64_list: + if len(b64) >= 40 : + decode = base64.b64decode(b64) + + type = magic.from_buffer(decode, mime=True) + #print(type) + #print(decode) + + find = True + hash = sha1(decode).hexdigest() + + data = {} + data['name'] = hash + data['date'] = datetime.datetime.now().strftime("%d/%m/%y") + data['origin'] = message + data['estimated type'] = type + json_data = json.dumps(data) + + save_base64_as_file(decode, type, hash, json_data) + print('found {} '.format(type)) + + if(find): + publisher.warning('base64 decoded') + #Send to duplicate + p.populate_set_out(message, 'Duplicate') + #send to Browse_warning_paste + msg = ('base64;{}'.format(message)) + p.populate_set_out( msg, 'alertHandler') + +def save_base64_as_file(decode, type, hash, json_data): + + filename_b64 = os.path.join(os.environ['AIL_HOME'], + p.config.get("Directories", "base64"), type, hash[:2], hash) + + filename_json = os.path.join(os.environ['AIL_HOME'], + p.config.get("Directories", "base64"), type, hash[:2], hash + '.json') + + dirname = os.path.dirname(filename_b64) + if not os.path.exists(dirname): + os.makedirs(dirname) + + with open(filename_b64, 'wb') as f: + f.write(decode) + + with open(filename_json, 'w') as f: + f.write(json_data) + + + + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'Base64' + + # Setup the I/O queues + p = Process(config_section) + max_execution_time = p.config.getint("Base64", "max_execution_time") + + # Sent to the logging a description of the module + publisher.info("Base64 started") + + regex_base64 = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)' + re.compile(regex_base64) + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + if message is None: + + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + filename = message + paste = Paste.Paste(filename) + + signal.alarm(max_execution_time) + try: + # Do something with the message from the queue + #print(filename) + content = paste.get_p_content() + search_base64(content,message) + + # (Optional) Send that thing to the next queue + #p.populate_set_out(something_has_been_done) + + except TimeoutException: + print ("{0} processing timeout".format(paste.p_path)) + continue + else: + signal.alarm(0) diff --git a/bin/Bitcoin.py b/bin/Bitcoin.py new file mode 100755 index 00000000..42468759 --- /dev/null +++ b/bin/Bitcoin.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +The Bitcoin Module +============================ + +It trying to extract Bitcoin address and secret key from paste + + ..seealso:: Paste method (get_regex) + +Requirements +------------ + +*Need running Redis instances. (Redis). + +""" + +from packages import Paste +from Helper import Process +from pubsublogger import publisher + +import re +import time + +from hashlib import sha256 + + +#### thank http://rosettacode.org/wiki/Bitcoin/address_validation#Python for this 2 functions + +def decode_base58(bc, length): + n = 0 + for char in bc: + n = n * 58 + digits58.index(char) + return n.to_bytes(length, 'big') +def check_bc(bc): + try: + bcbytes = decode_base58(bc, 25) + return bcbytes[-4:] == sha256(sha256(bcbytes[:-4]).digest()).digest()[:4] + except Exception: + return False +######################################################## + +def search_key(content, message, paste): + bitcoin_address = re.findall(regex_bitcoin_public_address, content) + bitcoin_private_key = re.findall(regex_bitcoin_private_key, content) + validate_address = False + key = False + if(len(bitcoin_address) >0): + #print(message) + for address in bitcoin_address: + if(check_bc(address)): + validate_address = True + print('Bitcoin address found : {}'.format(address)) + if(len(bitcoin_private_key) > 0): + for private_key in bitcoin_private_key: + print('Bitcoin private key found : {}'.format(private_key)) + key = True + + if(validate_address): + p.populate_set_out(message, 'Duplicate') + to_print = 'Bitcoin found: {} address and {} private Keys'.format(len(bitcoin_address), len(bitcoin_private_key)) + print(to_print) + publisher.warning(to_print) + msg = ('bitcoin;{}'.format(message)) + p.populate_set_out( msg, 'alertHandler') + if(key): + to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date, + paste.p_name) + publisher.warning('{}Detected {} Bitcoin private key;{}'.format( + to_print, len(bitcoin_private_key),paste.p_path)) + +if __name__ == "__main__": + publisher.port = 6380 + publisher.channel = "Script" + + config_section = 'Bitcoin' + + # Setup the I/O queues + p = Process(config_section) + + # Sent to the logging a description of the module + publisher.info("Run Keys module ") + + digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' + + regex_bitcoin_public_address = re.compile(r'(?= matchingThreshold: msg = '{} {}'.format(paste.p_path, len(found)) - print msg, categ + #msg = " ".join( [paste.p_path, bytes(len(found))] ) + + print(msg, categ) p.populate_set_out(msg, categ) publisher.info( diff --git a/bin/Credential.py b/bin/Credential.py index 29f80f88..fde80d12 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -48,39 +48,39 @@ if __name__ == "__main__": config_section = "Credential" p = Process(config_section) publisher.info("Find credentials") - + minimumLengthThreshold = p.config.getint("Credential", "minimumLengthThreshold") faup = Faup() server_cred = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_TermCred", "host"), - port=p.config.get("Redis_Level_DB_TermCred", "port"), - db=p.config.get("Redis_Level_DB_TermCred", "db")) + host=p.config.get("ARDB_TermCred", "host"), + port=p.config.get("ARDB_TermCred", "port"), + db=p.config.get("ARDB_TermCred", "db"), + decode_responses=True) criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") minTopPassList = p.config.getint("Credential", "minTopPassList") regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" - regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" + #regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" + regex_cred = "[a-zA-Z0-9\\._-]+@[a-zA-Z0-9\\.-]+\.[a-zA-Z]{2,6}[\\rn :\_\-]{1,10}[a-zA-Z0-9\_\-]+" regex_site_for_stats = "@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:" while True: message = p.get_from_set() if message is None: publisher.debug("Script Credential is Idling 10s") - print('sleeping 10s') + #print('sleeping 10s') time.sleep(10) continue - filepath, count = message.split() - - if count < minTopPassList: - # Less than 5 matches from the top password list, false positive. - print("false positive:", count) - continue + filepath, count = message.split(' ') paste = Paste.Paste(filepath) content = paste.get_p_content() creds = set(re.findall(regex_cred, content)) + + publisher.warning('to_print') + if len(creds) == 0: continue @@ -89,7 +89,7 @@ if __name__ == "__main__": message = 'Checked {} credentials found.'.format(len(creds)) if sites_set: - message += ' Related websites: {}'.format(', '.join(sites_set)) + message += ' Related websites: {}'.format( (', '.join(sites_set)) ) to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path) @@ -97,13 +97,14 @@ if __name__ == "__main__": #num of creds above tresh, publish an alert if len(creds) > criticalNumberToAlert: - print("========> Found more than 10 credentials in this file : {}".format(filepath)) + print("========> Found more than 10 credentials in this file : {}".format( filepath )) publisher.warning(to_print) #Send to duplicate p.populate_set_out(filepath, 'Duplicate') #Send to alertHandler - p.populate_set_out('credential;{}'.format(filepath), 'alertHandler') - + msg = 'credential;{}'.format(filepath) + p.populate_set_out(msg, 'alertHandler') + #Put in form, count occurences, then send to moduleStats creds_sites = {} site_occurence = re.findall(regex_site_for_stats, content) @@ -122,9 +123,11 @@ if __name__ == "__main__": else: creds_sites[domain] = 1 - for site, num in creds_sites.iteritems(): # Send for each different site to moduleStats - print 'credential;{};{};{}'.format(num, site, paste.p_date) - p.populate_set_out('credential;{};{};{}'.format(num, site, paste.p_date), 'ModuleStats') + for site, num in creds_sites.items(): # Send for each different site to moduleStats + + mssg = 'credential;{};{};{}'.format(num, site, paste.p_date) + print(mssg) + p.populate_set_out(mssg, 'ModuleStats') if sites_set: print("=======> Probably on : {}".format(', '.join(sites_set))) @@ -148,7 +151,7 @@ if __name__ == "__main__": uniq_num_cred = server_cred.incr(REDIS_KEY_NUM_USERNAME) server_cred.hmset(REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred}) server_cred.hmset(REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred}) - + #Add the mapping between the credential and the path server_cred.sadd(REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path) @@ -158,4 +161,3 @@ if __name__ == "__main__": for partCred in splitedCred: if len(partCred) > minimumLengthThreshold: server_cred.sadd(partCred, uniq_num_cred) - diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 133916fe..a7441807 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -18,6 +18,7 @@ from packages import Paste from packages import lib_refine from pubsublogger import publisher import re +import sys from Helper import Process @@ -58,13 +59,14 @@ if __name__ == "__main__": content = paste.get_p_content() all_cards = re.findall(regex, content) if len(all_cards) > 0: - print 'All matching', all_cards + print('All matching', all_cards) creditcard_set = set([]) for card in all_cards: clean_card = re.sub('[^0-9]', '', card) + clean_card = clean_card if lib_refine.is_luhn_valid(clean_card): - print clean_card, 'is valid' + print(clean_card, 'is valid') creditcard_set.add(clean_card) paste.__setattr__(channel, creditcard_set) @@ -76,13 +78,15 @@ if __name__ == "__main__": if (len(creditcard_set) > 0): publisher.warning('{}Checked {} valid number(s);{}'.format( to_print, len(creditcard_set), paste.p_path)) + print('{}Checked {} valid number(s);{}'.format( + to_print, len(creditcard_set), paste.p_path)) #Send to duplicate p.populate_set_out(filename, 'Duplicate') #send to Browse_warning_paste - p.populate_set_out('creditcard;{}'.format(filename), 'alertHandler') + msg = 'creditcard;{}'.format(filename) + p.populate_set_out(msg, 'alertHandler') else: publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path)) else: publisher.debug("Script creditcard is idling 1m") time.sleep(10) - diff --git a/bin/Curve.py b/bin/Curve.py index 712f6006..07f690de 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ This module is consuming the Redis-list created by the ZMQ_Sub_Curve_Q Module. @@ -53,12 +53,12 @@ def check_if_tracked_term(term, path): #add_paste to tracked_word_set set_name = "tracked_" + term server_term.sadd(set_name, path) - print term, 'addded', set_name, '->', path + print(term, 'addded', set_name, '->', path) p.populate_set_out("New Term added", 'CurveManageTopSets') # Send a notification only when the member is in the set if term in server_term.smembers(TrackedTermsNotificationEnabled_Name): - + # Send to every associated email adress for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + term): sendEmailNotification(email, term) @@ -82,14 +82,16 @@ if __name__ == "__main__": # REDIS # r_serv1 = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_Curve", "host"), - port=p.config.get("Redis_Level_DB_Curve", "port"), - db=p.config.get("Redis_Level_DB_Curve", "db")) + host=p.config.get("ARDB_Curve", "host"), + port=p.config.get("ARDB_Curve", "port"), + db=p.config.get("ARDB_Curve", "db"), + decode_responses=True) server_term = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_TermFreq", "host"), - port=p.config.get("Redis_Level_DB_TermFreq", "port"), - db=p.config.get("Redis_Level_DB_TermFreq", "db")) + host=p.config.get("ARDB_TermFreq", "host"), + port=p.config.get("ARDB_TermFreq", "port"), + db=p.config.get("ARDB_TermFreq", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Script Curve started") @@ -137,7 +139,7 @@ if __name__ == "__main__": server_term.zincrby(curr_set, low_word, float(score)) #1 term per paste server_term.zincrby("per_paste_" + curr_set, low_word, float(1)) - + #Add more info for tracked terms check_if_tracked_term(low_word, filename) @@ -149,15 +151,16 @@ if __name__ == "__main__": if generate_new_graph: generate_new_graph = False - print 'Building graph' + print('Building graph') today = datetime.date.today() year = today.year month = today.month + lib_words.create_curve_with_word_file(r_serv1, csv_path, wordfile_path, year, month) publisher.debug("Script Curve is Idling") - print "sleeping" + print("sleeping") time.sleep(10) message = p.get_from_set() diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index eea46a8c..4eaf9c3f 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -16,7 +16,7 @@ from packages import lib_words import datetime import calendar import os -import ConfigParser +import configparser # Config Variables Refresh_rate = 60*5 #sec @@ -68,26 +68,26 @@ def manage_top_set(): # convert dico into sorted array array_month = [] - for w, v in dico.iteritems(): + for w, v in dico.items(): array_month.append((w, v)) array_month.sort(key=lambda tup: -tup[1]) array_month = array_month[0:20] array_week = [] - for w, v in dico_week.iteritems(): + for w, v in dico_week.items(): array_week.append((w, v)) array_week.sort(key=lambda tup: -tup[1]) array_week = array_week[0:20] # convert dico_per_paste into sorted array array_month_per_paste = [] - for w, v in dico_per_paste.iteritems(): + for w, v in dico_per_paste.items(): array_month_per_paste.append((w, v)) array_month_per_paste.sort(key=lambda tup: -tup[1]) array_month_per_paste = array_month_per_paste[0:20] array_week_per_paste = [] - for w, v in dico_week_per_paste.iteritems(): + for w, v in dico_week_per_paste.items(): array_week_per_paste.append((w, v)) array_week_per_paste.sort(key=lambda tup: -tup[1]) array_week_per_paste = array_week_per_paste[0:20] @@ -105,7 +105,7 @@ def manage_top_set(): server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0]) for elem in array_week_per_paste: server_term.zadd("per_paste_" + top_termFreq_setName_week[0], float(elem[1]), elem[0]) - + for elem in array_month: server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0]) for elem in array_month_per_paste: @@ -114,7 +114,7 @@ def manage_top_set(): timestamp = int(time.mktime(datetime.datetime.now().timetuple())) value = str(timestamp) + ", " + "-" r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) - print "refreshed module" + print("refreshed module") @@ -130,8 +130,8 @@ if __name__ == '__main__': raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ Or activate the virtualenv.') - - cfg = ConfigParser.ConfigParser() + + cfg = configparser.ConfigParser() cfg.read(configfile) @@ -139,7 +139,8 @@ if __name__ == '__main__': r_temp = redis.StrictRedis( host=cfg.get('RedisPubSub', 'host'), port=cfg.getint('RedisPubSub', 'port'), - db=cfg.getint('RedisPubSub', 'db')) + db=cfg.getint('RedisPubSub', 'db'), + decode_responses=True) timestamp = int(time.mktime(datetime.datetime.now().timetuple())) value = str(timestamp) + ", " + "-" @@ -147,9 +148,10 @@ if __name__ == '__main__': r_temp.sadd("MODULE_TYPE_"+ "CurveManageTopSets" , str(os.getpid())) server_term = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_TermFreq", "host"), - port=cfg.getint("Redis_Level_DB_TermFreq", "port"), - db=cfg.getint("Redis_Level_DB_TermFreq", "db")) + host=cfg.get("ARDB_TermFreq", "host"), + port=cfg.getint("ARDB_TermFreq", "port"), + db=cfg.getint("ARDB_TermFreq", "db"), + decode_responses=True) publisher.info("Script Curve_manage_top_set started") @@ -162,4 +164,3 @@ if __name__ == '__main__': # Get one message from the input queue (module only work if linked with a queue) time.sleep(Refresh_rate) # sleep a long time then manage the set manage_top_set() - diff --git a/bin/Cve.py b/bin/Cve.py index 62df0aba..9ac4efc8 100755 --- a/bin/Cve.py +++ b/bin/Cve.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The CVE Module @@ -32,7 +32,8 @@ def search_cve(message): publisher.warning('{} contains CVEs'.format(paste.p_name)) #send to Browse_warning_paste - p.populate_set_out('cve;{}'.format(filepath), 'alertHandler') + msg = 'cve;{}'.format(filepath) + p.populate_set_out(msg, 'alertHandler') #Send to duplicate p.populate_set_out(filepath, 'Duplicate') @@ -63,4 +64,3 @@ if __name__ == '__main__': # Do something with the message from the queue search_cve(message) - diff --git a/bin/Dir.py b/bin/Dir.py index 6156c579..d76a7ad5 100755 --- a/bin/Dir.py +++ b/bin/Dir.py @@ -1,18 +1,18 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import argparse import redis from pubsublogger import publisher from packages.lib_words import create_dirfile -import ConfigParser +import configparser def main(): """Main Function""" # CONFIG # - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read('./packages/config.cfg') parser = argparse.ArgumentParser( @@ -36,7 +36,8 @@ def main(): r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) publisher.port = 6380 publisher.channel = "Script" diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index c205cb01..aed87a55 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -1,11 +1,11 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The DomClassifier Module ============================ -The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from +The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from the out output of the Global module. """ @@ -24,10 +24,11 @@ def main(): config_section = 'DomClassifier' p = Process(config_section) + addr_dns = p.config.get("DomClassifier", "dns") publisher.info("""ZMQ DomainClassifier is Running""") - c = DomainClassifier.domainclassifier.Extract(rawtext="") + c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) cc = p.config.get("DomClassifier", "cc") cc_tld = p.config.get("DomClassifier", "cc_tld") @@ -44,6 +45,7 @@ def main(): continue paste = PST.get_p_content() mimetype = PST._get_p_encoding() + if mimetype == "text/plain": c.text(rawtext=paste) c.potentialdomain() @@ -59,7 +61,7 @@ def main(): publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) except IOError: - print "CRC Checksum Failed on :", PST.p_path + print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/DumpValidOnion.py b/bin/DumpValidOnion.py index dad5ea9b..b6f298d6 100755 --- a/bin/DumpValidOnion.py +++ b/bin/DumpValidOnion.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* from pubsublogger import publisher @@ -23,7 +23,7 @@ if __name__ == "__main__": if message is not None: f = open(dump_file, 'a') while message is not None: - print message + print(message) date = datetime.datetime.now() if message is not None: f.write(date.isoformat() + ' ' + message + '\n') diff --git a/bin/Duplicates.py b/bin/Duplicates.py index 50def29f..58bad3f0 100755 --- a/bin/Duplicates.py +++ b/bin/Duplicates.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -42,19 +42,20 @@ if __name__ == "__main__": threshold_duplicate_ssdeep = int(p.config.get("Modules_Duplicates", "threshold_duplicate_ssdeep")) threshold_duplicate_tlsh = int(p.config.get("Modules_Duplicates", "threshold_duplicate_tlsh")) threshold_set = {} - threshold_set['ssdeep'] = threshold_duplicate_ssdeep - threshold_set['tlsh'] = threshold_duplicate_tlsh + threshold_set['ssdeep'] = threshold_duplicate_ssdeep + threshold_set['tlsh'] = threshold_duplicate_tlsh min_paste_size = float(p.config.get("Modules_Duplicates", "min_paste_size")) # REDIS # dico_redis = {} date_today = datetime.today() - for year in xrange(2013, date_today.year+1): - for month in xrange(0, 13): + for year in range(2013, date_today.year+1): + for month in range(0, 13): dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( - host=p.config.get("Redis_Level_DB", "host"), port=year, - db=month) - #print("dup: "+str(year)+str(month).zfill(2)+"\n") + host=p.config.get("ARDB_DB", "host"), + port=p.config.get("ARDB_DB", "port"), + db=str(year) + str(month), + decode_responses=True) # FUNCTIONS # publisher.info("Script duplicate started") @@ -62,7 +63,7 @@ if __name__ == "__main__": while True: try: hash_dico = {} - dupl = [] + dupl = set() dico_range_list = [] x = time.time() @@ -73,6 +74,7 @@ if __name__ == "__main__": PST = Paste.Paste(path) else: publisher.debug("Script Attribute is idling 10s") + print('sleeping') time.sleep(10) continue @@ -90,7 +92,7 @@ if __name__ == "__main__": # Get the date of the range date_range = date_today - timedelta(days = maximum_month_range*30.4166666) num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month) - for diff_month in xrange(0, num_of_month+1): + for diff_month in range(0, num_of_month+1): curr_date_range = date_today - timedelta(days = diff_month*30.4166666) to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2) dico_range_list.append(to_append) @@ -102,7 +104,7 @@ if __name__ == "__main__": yearly_index = str(date_today.year)+'00' r_serv0 = dico_redis[yearly_index] r_serv0.incr("current_index") - index = r_serv0.get("current_index")+str(PST.p_date) + index = (r_serv0.get("current_index")) + str(PST.p_date) # Open selected dico range opened_dico = [] @@ -114,13 +116,16 @@ if __name__ == "__main__": # Go throught the Database of the dico (of the month) for curr_dico_name, curr_dico_redis in opened_dico: - for hash_type, paste_hash in paste_hashes.iteritems(): + for hash_type, paste_hash in paste_hashes.items(): for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type): + try: if hash_type == 'ssdeep': - percent = 100-ssdeep.compare(dico_hash, paste_hash) + percent = 100-ssdeep.compare(dico_hash, paste_hash) else: percent = tlsh.diffxlen(dico_hash, paste_hash) + if percent > 100: + percent = 100 threshold_duplicate = threshold_set[hash_type] if percent < threshold_duplicate: @@ -130,16 +135,20 @@ if __name__ == "__main__": # index of paste index_current = r_serv_dico.get(dico_hash) + index_current = index_current paste_path = r_serv_dico.get(index_current) + paste_path = paste_path paste_date = r_serv_dico.get(index_current+'_date') + paste_date = paste_date paste_date = paste_date if paste_date != None else "No date available" if paste_path != None: - hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) + if paste_path != PST.p_path: + hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) - print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent) - except Exception,e: - print str(e) - #print 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash + print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent)) + + except Exception: + print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash) # Add paste in DB after checking to prevent its analysis twice # hash_type_i -> index_i AND index_i -> PST.PATH @@ -147,7 +156,7 @@ if __name__ == "__main__": r_serv1.set(index+'_date', PST._get_p_date()) r_serv1.sadd("INDEX", index) # Adding hashes in Redis - for hash_type, paste_hash in paste_hashes.iteritems(): + for hash_type, paste_hash in paste_hashes.items(): r_serv1.set(paste_hash, index) r_serv1.sadd("HASHS_"+hash_type, paste_hash) @@ -157,24 +166,25 @@ if __name__ == "__main__": if len(hash_dico) != 0: # paste_tuple = (hash_type, date, paste_path, percent) for dico_hash, paste_tuple in hash_dico.items(): - dupl.append(paste_tuple) + dupl.add(paste_tuple) # Creating the object attribute and save it. to_print = 'Duplicate;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) if dupl != []: + dupl = list(dupl) PST.__setattr__("p_duplicate", dupl) PST.save_attribute_redis("p_duplicate", dupl) + PST.save_others_pastes_attribute_duplicate("p_duplicate", dupl) publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path)) - print '{}Detected {}'.format(to_print, len(dupl)) + print('{}Detected {}'.format(to_print, len(dupl))) y = time.time() publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) - #print '{}Processed in {} sec'.format(to_print, y-x) except IOError: to_print = 'Duplicate;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) - print "CRC Checksum Failed on :", PST.p_path + print("CRC Checksum Failed on :", PST.p_path) publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Duplicates_old.py b/bin/Duplicates_old.py deleted file mode 100755 index 59610f83..00000000 --- a/bin/Duplicates_old.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python2 -# -*-coding:UTF-8 -* - -""" -The Duplicate module -==================== - -This huge module is, in short term, checking duplicates. - -Requirements: -------------- - - -""" -import redis -import os -import time -from packages import Paste -from pubsublogger import publisher -from pybloomfilter import BloomFilter - -from Helper import Process - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Duplicates' - - p = Process(config_section) - - # REDIS # - # DB OBJECT & HASHS ( DISK ) - # FIXME increase flexibility - dico_redis = {} - for year in xrange(2013, 2017): - for month in xrange(0, 16): - dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( - host=p.config.get("Redis_Level_DB", "host"), port=year, - db=month) - #print("dup: "+str(year)+str(month).zfill(2)+"\n") - - # FUNCTIONS # - publisher.info("Script duplicate started") - - set_limit = 100 - bloompath = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "bloomfilters")) - - bloop_path_set = set() - while True: - try: - super_dico = {} - hash_dico = {} - dupl = [] - nb_hash_current = 0 - - x = time.time() - - message = p.get_from_set() - if message is not None: - path = message - PST = Paste.Paste(path) - else: - publisher.debug("Script Attribute is idling 10s") - time.sleep(10) - continue - - PST._set_p_hash_kind("md5") - - # Assignate the correct redis connexion - r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month] - - # Creating the bloom filter name: bloomyyyymm - filebloompath = os.path.join(bloompath, 'bloom' + PST.p_date.year + - PST.p_date.month) - if os.path.exists(filebloompath): - bloom = BloomFilter.open(filebloompath) - bloop_path_set.add(filebloompath) - else: - bloom = BloomFilter(100000000, 0.01, filebloompath) - bloop_path_set.add(filebloompath) - - # UNIQUE INDEX HASHS TABLE - r_serv0 = dico_redis["201600"] - r_serv0.incr("current_index") - index = r_serv0.get("current_index")+str(PST.p_date) - # HASHTABLES PER MONTH (because of r_serv1 changing db) - r_serv1.set(index, PST.p_path) - r_serv1.sadd("INDEX", index) - - # For each bloom filter - opened_bloom = [] - for bloo in bloop_path_set: - # Opening blooms - opened_bloom.append(BloomFilter.open(bloo)) - # For each hash of the paste - for line_hash in PST._get_hash_lines(min=5, start=1, jump=0): - nb_hash_current += 1 - - # Adding the hash in Redis & limiting the set - if r_serv1.scard(line_hash) <= set_limit: - r_serv1.sadd(line_hash, index) - r_serv1.sadd("HASHS", line_hash) - # Adding the hash in the bloom of the month - bloom.add(line_hash) - # Go throught the Database of the bloom filter (of the month) - for bloo in opened_bloom: - if line_hash in bloo: - db = bloo.name[-6:] - # Go throught the Database of the bloom filter (month) - r_serv_bloom = dico_redis[db] - - # set of index paste: set([1,2,4,65]) - hash_current = r_serv_bloom.smembers(line_hash) - # removing itself from the list - hash_current = hash_current - set([index]) - - # if the hash is present at least in 1 files - # (already processed) - if len(hash_current) != 0: - hash_dico[line_hash] = hash_current - - # if there is data in this dictionnary - if len(hash_dico) != 0: - super_dico[index] = hash_dico - - ########################################################################### - - # if there is data in this dictionnary - if len(super_dico) != 0: - # current = current paste, phash_dico = {hash: set, ...} - occur_dico = {} - for current, phash_dico in super_dico.items(): - # phash = hash, pset = set([ pastes ...]) - for phash, pset in hash_dico.items(): - - for p_fname in pset: - occur_dico.setdefault(p_fname, 0) - # Count how much hash is similar per file occuring - # in the dictionnary - if occur_dico[p_fname] >= 0: - occur_dico[p_fname] = occur_dico[p_fname] + 1 - - for paste, count in occur_dico.items(): - percentage = round((count/float(nb_hash_current))*100, 2) - if percentage >= 50: - dupl.append((paste, percentage)) - else: - print 'percentage: ' + str(percentage) - - # Creating the object attribute and save it. - to_print = 'Duplicate;{};{};{};'.format( - PST.p_source, PST.p_date, PST.p_name) - if dupl != []: - PST.__setattr__("p_duplicate", dupl) - PST.save_attribute_redis("p_duplicate", dupl) - publisher.info('{}Detected {}'.format(to_print, len(dupl))) - print '{}Detected {}'.format(to_print, len(dupl)) - - y = time.time() - - publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) - except IOError: - print "CRC Checksum Failed on :", PST.p_path - publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Global.py b/bin/Global.py index bab45b47..6115a53f 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The ZMQ_Feed_Q Module @@ -27,6 +27,19 @@ from pubsublogger import publisher from Helper import Process +import magic +import io +#import gzip + +''' +def gunzip_bytes_obj(bytes_obj): + in_ = io.BytesIO() + in_.write(bytes_obj) + in_.seek(0) + with gzip.GzipFile(fileobj=in_, mode='rb') as fo: + gunzipped_bytes_obj = fo.read() + + return gunzipped_bytes_obj.decode()''' if __name__ == '__main__': publisher.port = 6380 @@ -44,6 +57,7 @@ if __name__ == '__main__': while True: message = p.get_from_set() + #print(message) # Recovering the streamed message informations. if message is not None: splitted = message.split() @@ -51,14 +65,14 @@ if __name__ == '__main__': paste, gzip64encoded = splitted else: # TODO Store the name of the empty paste inside a Redis-list. - print "Empty Paste: not processed" + print("Empty Paste: not processed") publisher.debug("Empty Paste: {0} not processed".format(message)) continue else: - print "Empty Queues: Waiting..." + print("Empty Queues: Waiting...") if int(time.time() - time_1) > 30: to_print = 'Global; ; ; ;glob Processed {0} paste(s)'.format(processed_paste) - print to_print + print(to_print) #publisher.info(to_print) time_1 = time.time() processed_paste = 0 @@ -67,11 +81,28 @@ if __name__ == '__main__': # Creating the full filepath filename = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"), paste) + dirname = os.path.dirname(filename) if not os.path.exists(dirname): os.makedirs(dirname) + decoded = base64.standard_b64decode(gzip64encoded) + with open(filename, 'wb') as f: - f.write(base64.standard_b64decode(gzip64encoded)) + f.write(decoded) + '''try: + decoded2 = gunzip_bytes_obj(decoded) + except: + decoded2 ='' + + type = magic.from_buffer(decoded2, mime=True) + + if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby': + + print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------') + print(filename) + print(type) + print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------') + ''' p.populate_set_out(filename) processed_paste+=1 diff --git a/bin/Helper.py b/bin/Helper.py index 2560f340..289dd956 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ Queue helper module @@ -12,11 +12,7 @@ the same Subscriber name in both of them. """ import redis -try: # dirty to support python3 - import ConfigParser -except: - import configparser - ConfigParser = configparser +import configparser import os import zmq import time @@ -32,7 +28,7 @@ class PubSub(object): raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ Or activate the virtualenv.') - self.config = ConfigParser.ConfigParser() + self.config = configparser.ConfigParser() self.config.read(configfile) self.redis_sub = False self.zmq_sub = False @@ -49,7 +45,8 @@ class PubSub(object): r = redis.StrictRedis( host=self.config.get('RedisPubSub', 'host'), port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db')) + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) self.subscribers = r.pubsub(ignore_subscribe_messages=True) self.subscribers.psubscribe(channel) elif conn_name.startswith('ZMQ'): @@ -61,7 +58,8 @@ class PubSub(object): for address in addresses.split(','): new_sub = context.socket(zmq.SUB) new_sub.connect(address) - new_sub.setsockopt(zmq.SUBSCRIBE, channel) + # bytes64 encode bytes to ascii only bytes + new_sub.setsockopt_string(zmq.SUBSCRIBE, channel) self.subscribers.append(new_sub) def setup_publish(self, conn_name): @@ -72,7 +70,8 @@ class PubSub(object): if conn_name.startswith('Redis'): r = redis.StrictRedis(host=self.config.get('RedisPubSub', 'host'), port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db')) + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) self.publishers['Redis'].append((r, channel)) elif conn_name.startswith('ZMQ'): context = zmq.Context() @@ -85,10 +84,12 @@ class PubSub(object): channel_message = m.get('channel') for p, channel in self.publishers['Redis']: if channel_message is None or channel_message == channel: - p.publish(channel, m['message']) + p.publish(channel, ( m['message']) ) for p, channel in self.publishers['ZMQ']: if channel_message is None or channel_message == channel: p.send('{} {}'.format(channel, m['message'])) + #p.send(b' '.join( [channel, mess] ) ) + def subscribe(self): if self.redis_sub: @@ -100,7 +101,7 @@ class PubSub(object): for sub in self.subscribers: try: msg = sub.recv(zmq.NOBLOCK) - yield msg.split(' ', 1)[1] + yield msg.split(b" ", 1)[1] except zmq.error.Again as e: time.sleep(0.2) pass @@ -117,9 +118,9 @@ class Process(object): Did you set environment variables? \ Or activate the virtualenv.') modulesfile = os.path.join(os.environ['AIL_BIN'], 'packages/modules.cfg') - self.config = ConfigParser.ConfigParser() + self.config = configparser.ConfigParser() self.config.read(configfile) - self.modules = ConfigParser.ConfigParser() + self.modules = configparser.ConfigParser() self.modules.read(modulesfile) self.subscriber_name = conf_section @@ -131,11 +132,11 @@ class Process(object): self.r_temp = redis.StrictRedis( host=self.config.get('RedisPubSub', 'host'), port=self.config.get('RedisPubSub', 'port'), - db=self.config.get('RedisPubSub', 'db')) + db=self.config.get('RedisPubSub', 'db'), + decode_responses=True) self.moduleNum = os.getpid() - def populate_set_in(self): # monoproc src = self.modules.get(self.subscriber_name, 'subscribe') @@ -152,6 +153,7 @@ class Process(object): self.r_temp.hset('queues', self.subscriber_name, int(self.r_temp.scard(in_set))) message = self.r_temp.spop(in_set) + timestamp = int(time.mktime(datetime.datetime.now().timetuple())) dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes') @@ -159,37 +161,46 @@ class Process(object): return None else: - try: - if ".gz" in message: - path = message.split(".")[-2].split("/")[-1] - #find start of path with AIL_HOME - index_s = message.find(os.environ['AIL_HOME']) - #Stop when .gz - index_e = message.find(".gz")+3 + #try: + if '.gz' in message: + path = message.split(".")[-2].split("/")[-1] + #find start of path with AIL_HOME + index_s = message.find(os.environ['AIL_HOME']) + #Stop when .gz + index_e = message.find(".gz")+3 + if(index_s == -1): + complete_path = message[0:index_e] + else: complete_path = message[index_s:index_e] - else: - path = "?" - value = str(timestamp) + ", " + path - self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) - self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path) - self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) - return message + else: + path = "-" + complete_path = "?" - except: - path = "?" - value = str(timestamp) + ", " + path - self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) - self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?") - self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) - return message + value = str(timestamp) + ", " + path + self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) + self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", complete_path) + self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) + return message + + #except: + #print('except') + #path = "?" + #value = str(timestamp) + ", " + path + #self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) + #self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum) + "_PATH", "?") + #self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) + #return message def populate_set_out(self, msg, channel=None): # multiproc msg = {'message': msg} if channel is not None: msg.update({'channel': channel}) - self.r_temp.sadd(self.subscriber_name + 'out', json.dumps(msg)) + + # bytes64 encode bytes to ascii only bytes + j = json.dumps(msg) + self.r_temp.sadd(self.subscriber_name + 'out', j) def publish(self): # monoproc @@ -201,6 +212,7 @@ class Process(object): self.pubsub.setup_publish(name) while True: message = self.r_temp.spop(self.subscriber_name + 'out') + if message is None: time.sleep(1) continue diff --git a/bin/Indexer.py b/bin/Indexer.py index be4c899c..1d1ece4b 100755 --- a/bin/Indexer.py +++ b/bin/Indexer.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -49,7 +49,7 @@ if __name__ == "__main__": # Indexer configuration - index dir and schema setup baseindexpath = join(os.environ['AIL_HOME'], p.config.get("Indexer", "path")) - indexRegister_path = join(os.environ['AIL_HOME'], + indexRegister_path = join(os.environ['AIL_HOME'], p.config.get("Indexer", "register")) indexertype = p.config.get("Indexer", "type") INDEX_SIZE_THRESHOLD = int(p.config.get("Indexer", "index_max_size")) @@ -89,7 +89,7 @@ if __name__ == "__main__": ix = create_in(indexpath, schema) else: ix = open_dir(indexpath) - + last_refresh = time_now # LOGGING # @@ -107,10 +107,11 @@ if __name__ == "__main__": continue docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() - print "Indexing - "+indexname+" :", docpath + print("Indexing - " + indexname + " :", docpath) - if time.time() - last_refresh > TIME_WAIT: #avoid calculating the index's size at each message + #avoid calculating the index's size at each message + if( time.time() - last_refresh > TIME_WAIT): last_refresh = time.time() if check_index_size(baseindexpath, indexname) >= INDEX_SIZE_THRESHOLD*(1000*1000): timestamp = int(time.time()) @@ -128,11 +129,11 @@ if __name__ == "__main__": if indexertype == "whoosh": indexwriter = ix.writer() indexwriter.update_document( - title=unicode(docpath, errors='ignore'), - path=unicode(docpath, errors='ignore'), - content=unicode(paste, errors='ignore')) + title=docpath, + path=docpath, + content=paste) indexwriter.commit() except IOError: - print "CRC Checksum Failed on :", PST.p_path + print("CRC Checksum Failed on :", PST.p_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/Keys.py b/bin/Keys.py index 0e474b40..9f39cf50 100755 --- a/bin/Keys.py +++ b/bin/Keys.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -15,16 +15,19 @@ RSA private key, certificate messages import time from pubsublogger import publisher -from Helper import Process +#from bin.packages import Paste +#from bin.Helper import Process + from packages import Paste +from Helper import Process -def search_key(message): - paste = Paste.Paste(message) +def search_key(paste): content = paste.get_p_content() find = False if '-----BEGIN PGP MESSAGE-----' in content: publisher.warning('{} has a PGP enc message'.format(paste.p_name)) + find = True if '-----BEGIN CERTIFICATE-----' in content: @@ -32,15 +35,40 @@ def search_key(message): find = True if '-----BEGIN RSA PRIVATE KEY-----' in content: - publisher.warning('{} has a RSA key message'.format(paste.p_name)) + publisher.warning('{} has a RSA private key message'.format(paste.p_name)) + print('rsa private key message found') find = True if '-----BEGIN PRIVATE KEY-----' in content: - publisher.warning('{} has a private message'.format(paste.p_name)) + publisher.warning('{} has a private key message'.format(paste.p_name)) + print('private key message found') find = True if '-----BEGIN ENCRYPTED PRIVATE KEY-----' in content: - publisher.warning('{} has an encrypted private message'.format(paste.p_name)) + publisher.warning('{} has an encrypted private key message'.format(paste.p_name)) + print('encrypted private key message found') + find = True + + if '-----BEGIN OPENSSH PRIVATE KEY-----' in content: + publisher.warning('{} has an openssh private key message'.format(paste.p_name)) + print('openssh private key message found') + find = True + + if '-----BEGIN OpenVPN Static key V1-----' in content: + publisher.warning('{} has an openssh private key message'.format(paste.p_name)) + print('OpenVPN Static key message found') + find = True + + if '-----BEGIN DSA PRIVATE KEY-----' in content: + publisher.warning('{} has a dsa private key message'.format(paste.p_name)) + find = True + + if '-----BEGIN EC PRIVATE KEY-----' in content: + publisher.warning('{} has an ec private key message'.format(paste.p_name)) + find = True + + if '-----BEGIN PGP PRIVATE KEY BLOCK-----' in content: + publisher.warning('{} has a pgp private key block message'.format(paste.p_name)) find = True if find : @@ -48,7 +76,9 @@ def search_key(message): #Send to duplicate p.populate_set_out(message, 'Duplicate') #send to Browse_warning_paste - p.populate_set_out('keys;{}'.format(message), 'alertHandler') + msg = ('keys;{}'.format(message)) + print(message) + p.populate_set_out( msg, 'alertHandler') if __name__ == '__main__': @@ -77,6 +107,7 @@ if __name__ == '__main__': continue # Do something with the message from the queue - search_key(message) + paste = Paste.Paste(message) + search_key(paste) # (Optional) Send that thing to the next queue diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 5eeb53c2..974938a6 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -11,11 +11,11 @@ CYAN="\\033[1;36m" [ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; [ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_LEVELDB" ] && echo "Needs the env var AIL_LEVELDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; export PATH=$AIL_HOME:$PATH export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_LEVELDB:$PATH +export PATH=$AIL_ARDB:$PATH function helptext { echo -e $YELLOW" @@ -40,7 +40,7 @@ function helptext { (Inside screen Daemons) "$RED" But first of all you'll need to edit few path where you installed - your redis & leveldb servers. + your redis & ardb servers. "$DEFAULT" Usage: ----- @@ -58,33 +58,17 @@ function launching_redis { screen -S "Redis_AIL" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x' sleep 0.1 screen -S "Redis_AIL" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x' - - # For Words and curves - sleep 0.1 - screen -S "Redis_AIL" -X screen -t "6382" bash -c 'redis-server '$conf_dir'6382.conf ; read x' } -function launching_lvldb { - lvdbhost='127.0.0.1' - lvdbdir="${AIL_HOME}/LEVEL_DB_DATA/" - nb_db=13 +function launching_ardb { + conf_dir="${AIL_HOME}/configs/" - db_y=`date +%Y` - #Verify that a dir with the correct year exists, create it otherwise - if [ ! -d "$lvdbdir$db_y" ]; then - mkdir -p "$db_y" - fi - - screen -dmS "LevelDB_AIL" + screen -dmS "ARDB_AIL" sleep 0.1 - echo -e $GREEN"\t* Launching Levels DB servers"$DEFAULT + echo -e $GREEN"\t* Launching ARDB servers"$DEFAULT - #Launch a DB for each dir - for pathDir in $lvdbdir*/ ; do - yDir=$(basename "$pathDir") - sleep 0.1 - screen -S "LevelDB_AIL" -X screen -t "$yDir" bash -c 'redis-leveldb -H '$lvdbhost' -D '$pathDir'/ -P '$yDir' -M '$nb_db'; read x' - done + sleep 0.1 + screen -S "ARDB_AIL" -X screen -t "6382" bash -c 'ardb-server '$conf_dir'6382.conf ; read x' } function launching_logs { @@ -101,12 +85,12 @@ function launching_queues { sleep 0.1 echo -e $GREEN"\t* Launching all the queues"$DEFAULT - screen -S "Queue_AIL" -X screen -t "Queues" bash -c './launch_queues.py; read x' + screen -S "Queue_AIL" -X screen -t "Queues" bash -c 'python3 launch_queues.py; read x' } function launching_scripts { echo -e "\t* Checking configuration" - bash -c "./Update-conf.py" + bash -c "python3 Update-conf.py" exitStatus=$? if [ $exitStatus -ge 1 ]; then echo -e $RED"\t* Configuration not up-to-date"$DEFAULT @@ -142,6 +126,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Mail" bash -c './Mail.py; read x' sleep 0.1 + screen -S "Script_AIL" -X screen -t "ApiKey" bash -c './ApiKey.py; read x' + sleep 0.1 screen -S "Script_AIL" -X screen -t "Web" bash -c './Web.py; read x' sleep 0.1 screen -S "Script_AIL" -X screen -t "Credential" bash -c './Credential.py; read x' @@ -158,6 +144,10 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Keys" bash -c './Keys.py; read x' sleep 0.1 + screen -S "Script_AIL" -X screen -t "Base64" bash -c './Base64.py; read x' + sleep 0.1 + screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c './Bitcoin.py; read x' + sleep 0.1 screen -S "Script_AIL" -X screen -t "Phone" bash -c './Phone.py; read x' sleep 0.1 screen -S "Script_AIL" -X screen -t "Release" bash -c './Release.py; read x' @@ -183,7 +173,10 @@ function shutting_down_redis { bash -c $redis_dir'redis-cli -p 6380 SHUTDOWN' sleep 0.1 bash -c $redis_dir'redis-cli -p 6381 SHUTDOWN' - sleep 0.1 +} + +function shutting_down_ardb { + redis_dir=${AIL_HOME}/redis/src/ bash -c $redis_dir'redis-cli -p 6382 SHUTDOWN' } @@ -208,12 +201,21 @@ function checking_redis { flag_redis=1 fi sleep 0.1 + + return $flag_redis; +} + +function checking_ardb { + flag_ardb=0 + redis_dir=${AIL_HOME}/redis/src/ + sleep 0.2 bash -c $redis_dir'redis-cli -p 6382 PING | grep "PONG" &> /dev/null' if [ ! $? == 0 ]; then echo -e $RED"\t6382 not ready"$DEFAULT - flag_redis=1 + flag_ardb=1 fi - return $flag_redis; + + return $flag_ardb; } #If no params, display the help @@ -223,12 +225,12 @@ helptext; ############### TESTS ################### isredis=`screen -ls | egrep '[0-9]+.Redis_AIL' | cut -d. -f1` -islvldb=`screen -ls | egrep '[0-9]+.LevelDB_AIL' | cut -d. -f1` +isardb=`screen -ls | egrep '[0-9]+.ARDB_AIL' | cut -d. -f1` islogged=`screen -ls | egrep '[0-9]+.Logging_AIL' | cut -d. -f1` isqueued=`screen -ls | egrep '[0-9]+.Queue_AIL' | cut -d. -f1` isscripted=`screen -ls | egrep '[0-9]+.Script_AIL' | cut -d. -f1` -options=("Redis" "LevelDB" "Logs" "Queues" "Scripts" "Killall" "Shutdown" "Update-config") +options=("Redis" "Ardb" "Logs" "Queues" "Scripts" "Killall" "Shutdown" "Update-config") menu() { echo "What do you want to Launch?:" @@ -259,9 +261,9 @@ for i in ${!options[@]}; do echo -e $RED"\t* A screen is already launched"$DEFAULT fi ;; - LevelDB) - if [[ ! $islvldb ]]; then - launching_lvldb; + Ardb) + if [[ ! $isardb ]]; then + launching_ardb; else echo -e $RED"\t* A screen is already launched"$DEFAULT fi @@ -282,12 +284,13 @@ for i in ${!options[@]}; do ;; Scripts) if [[ ! $isscripted ]]; then - if checking_redis; then + sleep 1 + if checking_redis && checking_ardb; then launching_scripts; else - echo -e $YELLOW"\tScript not started, waiting 3 secondes"$DEFAULT - sleep 3 - if checking_redis; then + echo -e $YELLOW"\tScript not started, waiting 5 secondes"$DEFAULT + sleep 5 + if checking_redis && checking_ardb; then launching_scripts; else echo -e $RED"\tScript not started"$DEFAULT @@ -298,14 +301,17 @@ for i in ${!options[@]}; do fi ;; Killall) - if [[ $isredis || $islvldb || $islogged || $isqueued || $isscripted ]]; then + if [[ $isredis || $isardb || $islogged || $isqueued || $isscripted ]]; then echo -e $GREEN"Gracefully closing redis servers"$DEFAULT shutting_down_redis; + sleep 0.2 + echo -e $GREEN"Gracefully closing ardb servers"$DEFAULT + shutting_down_ardb; echo -e $GREEN"Killing all"$DEFAULT - kill $isredis $islvldb $islogged $isqueued $isscripted + kill $isredis $isardb $islogged $isqueued $isscripted sleep 0.2 echo -e $ROSE`screen -ls`$DEFAULT - echo -e $GREEN"\t* $isredis $islvldb $islogged $isqueued $isscripted killed."$DEFAULT + echo -e $GREEN"\t* $isredis $isardb $islogged $isqueued $isscripted killed."$DEFAULT else echo -e $RED"\t* No screen to kill"$DEFAULT fi diff --git a/bin/Lines.py b/bin/Lines.py index be1ad635..8c9f6827 100755 --- a/bin/Lines.py +++ b/bin/Lines.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -61,7 +61,7 @@ if __name__ == '__main__': while True: try: message = p.get_from_set() - print message + print(message) if message is not None: PST = Paste.Paste(message) else: @@ -77,8 +77,8 @@ if __name__ == '__main__': # FIXME Not used. PST.store.sadd("Pastes_Objects", PST.p_path) if lines_infos[1] < args.max: - p.populate_set_out(PST.p_path, 'LinesShort') + p.populate_set_out( PST.p_path , 'LinesShort') else: - p.populate_set_out(PST.p_path, 'LinesLong') + p.populate_set_out( PST.p_path , 'LinesLong') except IOError: - print "CRC Checksum Error on : ", PST.p_path + print("CRC Checksum Error on : ", PST.p_path) diff --git a/bin/Mail.py b/bin/Mail.py index c608d106..abc112a6 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -28,12 +28,14 @@ if __name__ == "__main__": config_section = 'Mail' p = Process(config_section) + addr_dns = p.config.get("Mail", "dns") # REDIS # r_serv2 = redis.StrictRedis( host=p.config.get("Redis_Cache", "host"), port=p.config.getint("Redis_Cache", "port"), - db=p.config.getint("Redis_Cache", "db")) + db=p.config.getint("Redis_Cache", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Suscribed to channel mails_categ") @@ -56,7 +58,7 @@ if __name__ == "__main__": if prec_filename is None or filename != prec_filename: PST = Paste.Paste(filename) MX_values = lib_refine.checking_MX_record( - r_serv2, PST.get_regex(email_regex)) + r_serv2, PST.get_regex(email_regex), addr_dns) if MX_values[0] >= 1: @@ -73,19 +75,19 @@ if __name__ == "__main__": #Send to duplicate p.populate_set_out(filename, 'Duplicate') p.populate_set_out('mail;{}'.format(filename), 'alertHandler') - + else: publisher.info(to_print) - #Send to ModuleStats + #Send to ModuleStats for mail in MX_values[1]: - print 'mail;{};{};{}'.format(1, mail, PST.p_date) + print('mail;{};{};{}'.format(1, mail, PST.p_date)) p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') prec_filename = filename else: publisher.debug("Script Mails is Idling 10s") - print 'Sleeping' + print('Sleeping') time.sleep(10) message = p.get_from_set() diff --git a/bin/Mixer.py b/bin/Mixer.py index 1b3138d4..309bb32a 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The Mixer Module @@ -35,7 +35,7 @@ import os import time from pubsublogger import publisher import redis -import ConfigParser +import configparser from Helper import Process @@ -58,14 +58,15 @@ if __name__ == '__main__': Did you set environment variables? \ Or activate the virtualenv.') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) # REDIS # server = redis.StrictRedis( host=cfg.get("Redis_Mixer_Cache", "host"), port=cfg.getint("Redis_Mixer_Cache", "port"), - db=cfg.getint("Redis_Mixer_Cache", "db")) + db=cfg.getint("Redis_Mixer_Cache", "db"), + decode_responses=True) # LOGGING # publisher.info("Feed Script started to receive & publish.") @@ -89,9 +90,13 @@ if __name__ == '__main__': splitted = message.split() if len(splitted) == 2: complete_paste, gzip64encoded = splitted + try: + #feeder_name = ( complete_paste.replace("archive/","") ).split("/")[0] feeder_name, paste_name = complete_paste.split('>') feeder_name.replace(" ","") + paste_name = complete_paste + except ValueError as e: feeder_name = "unnamed_feeder" paste_name = complete_paste @@ -106,7 +111,9 @@ if __name__ == '__main__': duplicated_paste_per_feeder[feeder_name] = 0 relay_message = "{0} {1}".format(paste_name, gzip64encoded) - digest = hashlib.sha1(gzip64encoded).hexdigest() + #relay_message = b" ".join( [paste_name, gzip64encoded] ) + + digest = hashlib.sha1(gzip64encoded.encode('utf8')).hexdigest() # Avoid any duplicate coming from any sources if operation_mode == 1: @@ -173,26 +180,26 @@ if __name__ == '__main__': else: # TODO Store the name of the empty paste inside a Redis-list. - print "Empty Paste: not processed" + print("Empty Paste: not processed") publisher.debug("Empty Paste: {0} not processed".format(message)) else: - print "Empty Queues: Waiting..." + print("Empty Queues: Waiting...") if int(time.time() - time_1) > refresh_time: - print processed_paste_per_feeder + print(processed_paste_per_feeder) to_print = 'Mixer; ; ; ;mixer_all All_feeders Processed {0} paste(s) in {1}sec'.format(processed_paste, refresh_time) - print to_print + print(to_print) publisher.info(to_print) processed_paste = 0 - for feeder, count in processed_paste_per_feeder.iteritems(): + for feeder, count in processed_paste_per_feeder.items(): to_print = 'Mixer; ; ; ;mixer_{0} {0} Processed {1} paste(s) in {2}sec'.format(feeder, count, refresh_time) - print to_print + print(to_print) publisher.info(to_print) processed_paste_per_feeder[feeder] = 0 - for feeder, count in duplicated_paste_per_feeder.iteritems(): + for feeder, count in duplicated_paste_per_feeder.items(): to_print = 'Mixer; ; ; ;mixer_{0} {0} Duplicated {1} paste(s) in {2}sec'.format(feeder, count, refresh_time) - print to_print + print(to_print) publisher.info(to_print) duplicated_paste_per_feeder[feeder] = 0 diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py index fc219815..807cb87e 100755 --- a/bin/ModuleInformation.py +++ b/bin/ModuleInformation.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -20,7 +20,7 @@ import os import signal import argparse from subprocess import PIPE, Popen -import ConfigParser +import configparser import json from terminaltables import AsciiTable import textwrap @@ -51,7 +51,7 @@ last_refresh = 0 def getPid(module): p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) for line in p.stdout: - print line + print(line) splittedLine = line.split() if 'python2' in splittedLine: return int(splittedLine[0]) @@ -76,7 +76,7 @@ def cleanRedis(): flag_pid_valid = True if not flag_pid_valid: - print flag_pid_valid, 'cleaning', pid, 'in', k + print(flag_pid_valid, 'cleaning', pid, 'in', k) server.srem(k, pid) inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k]) @@ -85,11 +85,11 @@ def cleanRedis(): def kill_module(module, pid): - print '' - print '-> trying to kill module:', module + print('') + print('-> trying to kill module:', module) if pid is None: - print 'pid was None' + print('pid was None') printarrayGlob.insert(1, [0, module, pid, "PID was None"]) printarrayGlob.pop() pid = getPid(module) @@ -102,15 +102,15 @@ def kill_module(module, pid): try: os.kill(pid, signal.SIGUSR1) except OSError: - print pid, 'already killed' + print(pid, 'already killed') inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"]) printarrayGlob.pop() return time.sleep(1) if getPid(module) is None: - print module, 'has been killed' - print 'restarting', module, '...' + print(module, 'has been killed') + print('restarting', module, '...') p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) @@ -119,7 +119,7 @@ def kill_module(module, pid): printarrayGlob.pop() else: - print 'killing failed, retrying...' + print('killing failed, retrying...') inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."]) printarrayGlob.pop() @@ -128,8 +128,8 @@ def kill_module(module, pid): os.kill(pid, signal.SIGUSR1) time.sleep(1) if getPid(module) is None: - print module, 'has been killed' - print 'restarting', module, '...' + print(module, 'has been killed') + print('restarting', module, '...') p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) @@ -137,12 +137,12 @@ def kill_module(module, pid): printarrayGlob.pop() printarrayGlob.pop() else: - print 'killing failed!' + print('killing failed!') inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"]) printarrayGlob.pop() else: - print 'Module does not exist' + print('Module does not exist') inst_time = datetime.datetime.fromtimestamp(int(time.time())) printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"]) printarrayGlob.pop() @@ -174,7 +174,7 @@ def waiting_refresh(): last_refresh = time.time() return True - + if __name__ == "__main__": @@ -192,14 +192,15 @@ if __name__ == "__main__": Did you set environment variables? \ Or activate the virtualenv.') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) # REDIS # server = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) if args.clear == 1: clearRedisModuleInfo() @@ -222,17 +223,17 @@ if __name__ == "__main__": #while key != 'q': # key = stdsrc.getch() # stdscr.refresh() - + all_queue = set() printarray1 = [] printarray2 = [] printarray3 = [] - for queue, card in server.hgetall("queues").iteritems(): + for queue, card in server.hgetall("queues").items(): all_queue.add(queue) key = "MODULE_" + queue + "_" keySet = "MODULE_TYPE_" + queue array_module_type = [] - + for moduleNum in server.smembers(keySet): value = server.get(key + str(moduleNum)) if value is not None: @@ -240,7 +241,7 @@ if __name__ == "__main__": if timestamp is not None and path is not None: startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] - + if int(card) > 0: if int((datetime.datetime.now() - startTime_readable).total_seconds()) > args.treshold: log = open(log_filename, 'a') @@ -251,15 +252,15 @@ if __name__ == "__main__": last_kill_try = kill_retry_threshold+1 if args.autokill == 1 and last_kill_try > kill_retry_threshold : kill_module(queue, int(moduleNum)) - + array_module_type.append([get_color(processed_time_readable, False) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, False)]) - + else: printarray2.append([get_color(processed_time_readable, True) + str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path) + get_color(None, True)]) array_module_type.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) for e in array_module_type: printarray1.append(e) - + for curr_queue in module_file_array: if curr_queue not in all_queue: printarray3.append([curr_queue, "Not running"]) @@ -277,16 +278,16 @@ if __name__ == "__main__": printarray3.append([curr_queue, "Stuck or idle, restarting in " + str(abs(args.treshold - (int(time.time()) - no_info_modules[curr_queue]))) + "s"]) else: printarray3.append([curr_queue, "Stuck or idle, restarting disabled"]) - + ## FIXME To add: ## Button KILL Process using Curses - + printarray1.sort(key=lambda x: x[0][9:], reverse=False) printarray2.sort(key=lambda x: x[0][9:], reverse=False) printarray1.insert(0,["Queue", "PID", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) printarray2.insert(0,["Queue", "PID","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) printarray3.insert(0,["Queue", "State"]) - + os.system('clear') t1 = AsciiTable(printarray1, title="Working queues") t1.column_max_width(1) @@ -304,7 +305,7 @@ if __name__ == "__main__": temp += l + '\n' content[longest_col] = temp.strip() t1.table_data[i] = content - + t2 = AsciiTable(printarray2, title="Idling queues") t2.column_max_width(1) if not t2.ok: @@ -321,33 +322,33 @@ if __name__ == "__main__": temp += l + '\n' content[longest_col] = temp.strip() t2.table_data[i] = content - + t3 = AsciiTable(printarray3, title="Not running queues") t3.column_max_width(1) - + printarray4 = [] for elem in printarrayGlob: if elem is not None: printarray4.append(elem) - + t4 = AsciiTable(printarray4, title="Last actions") t4.column_max_width(1) - + legend_array = [["Color", "Meaning"], [Back.RED+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+Style.RESET_ALL], [Back.MAGENTA+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold)+" while idle"+Style.RESET_ALL], [Back.YELLOW+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time >=" +str(args.treshold/2)+Style.RESET_ALL], [Back.GREEN+Style.BRIGHT+" "*10+Style.RESET_ALL, "Time <" +str(args.treshold)]] legend = AsciiTable(legend_array, title="Legend") legend.column_max_width(1) - - print legend.table - print '\n' - print t1.table - print '\n' - print t2.table - print '\n' - print t3.table - print '\n' - print t4.table - - if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: + + print(legend.table) + print('\n') + print(t1.table) + print('\n') + print(t2.table) + print('\n') + print(t3.table) + print('\n') + print(t4.table9) + + if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: lastTime = datetime.datetime.now() cleanRedis() #time.sleep(args.refresh) diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 648649f7..6743cdca 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ This module makes statistics for some modules and providers @@ -39,11 +39,11 @@ def compute_most_posted(server, message): # Compute Most Posted date = get_date_range(0)[0] # check if this keyword is eligible for progression - keyword_total_sum = 0 + keyword_total_sum = 0 curr_value = server.hget(date, module+'-'+keyword) keyword_total_sum += int(curr_value) if curr_value is not None else 0 - + if server.zcard(redis_progression_name_set) < max_set_cardinality: server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) @@ -52,17 +52,17 @@ def compute_most_posted(server, message): # Member set is a list of (value, score) pairs if int(member_set[0][1]) < keyword_total_sum: #remove min from set and add the new one - print module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + print(module + ': adding ' +keyword+ '(' +str(keyword_total_sum)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server.zrem(redis_progression_name_set, member_set[0][0]) server.zadd(redis_progression_name_set, float(keyword_total_sum), keyword) - print redis_progression_name_set + print(redis_progression_name_set) def compute_provider_info(server_trend, server_pasteName, path): redis_all_provider = 'all_provider_set' - + paste = Paste.Paste(path) - + paste_baseName = paste.p_name.split('.')[0] paste_size = paste._get_p_size() paste_provider = paste.p_source @@ -84,7 +84,7 @@ def compute_provider_info(server_trend, server_pasteName, path): # # Compute Most Posted # - + # Size if server_trend.zcard(redis_sum_size_set) < max_set_cardinality or server_trend.zscore(redis_sum_size_set, paste_provider) != "nil": server_trend.zadd(redis_sum_size_set, float(num_paste), paste_provider) @@ -94,7 +94,7 @@ def compute_provider_info(server_trend, server_pasteName, path): # Member set is a list of (value, score) pairs if float(member_set[0][1]) < new_avg: #remove min from set and add the new one - print 'Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + print('Size - adding ' +paste_provider+ '(' +str(new_avg)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server_trend.zrem(redis_sum_size_set, member_set[0][0]) server_trend.zadd(redis_sum_size_set, float(sum_size), paste_provider) server_trend.zrem(redis_avg_size_name_set, member_set[0][0]) @@ -110,7 +110,7 @@ def compute_provider_info(server_trend, server_pasteName, path): # Member set is a list of (value, score) pairs if int(member_set[0][1]) < num_paste: #remove min from set and add the new one - print 'Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')' + print('Num - adding ' +paste_provider+ '(' +str(num_paste)+') in set and removing '+member_set[0][0]+'('+str(member_set[0][1])+')') server_trend.zrem(member_set[0][0]) server_trend.zadd(redis_providers_name_set, float(num_paste), paste_provider) @@ -133,14 +133,16 @@ if __name__ == '__main__': # REDIS # r_serv_trend = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_Trending", "host"), - port=p.config.get("Redis_Level_DB_Trending", "port"), - db=p.config.get("Redis_Level_DB_Trending", "db")) + host=p.config.get("ARDB_Trending", "host"), + port=p.config.get("ARDB_Trending", "port"), + db=p.config.get("ARDB_Trending", "db"), + decode_responses=True) r_serv_pasteName = redis.StrictRedis( host=p.config.get("Redis_Paste_Name", "host"), port=p.config.get("Redis_Paste_Name", "port"), - db=p.config.get("Redis_Paste_Name", "db")) + db=p.config.get("Redis_Paste_Name", "db"), + decode_responses=True) # Endless loop getting messages from the input queue while True: @@ -149,7 +151,7 @@ if __name__ == '__main__': if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) - print 'sleeping' + print('sleeping') time.sleep(20) continue diff --git a/bin/ModulesInformationV2.py b/bin/ModulesInformationV2.py index 041f8ec5..36b397ca 100755 --- a/bin/ModulesInformationV2.py +++ b/bin/ModulesInformationV2.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* from asciimatics.widgets import Frame, ListBox, Layout, Divider, Text, \ @@ -10,7 +10,7 @@ from asciimatics.event import Event from asciimatics.event import KeyboardEvent, MouseEvent import sys, os import time, datetime -import argparse, ConfigParser +import argparse, configparser import json import redis import psutil @@ -45,7 +45,7 @@ TABLES_PADDING = {"running": [12, 23, 8, 8, 23, 10, 55, 11, 11, 12], "idle": [9, QUEUE_STATUS = {} # Maintain the state of the CPU objects -CPU_TABLE = {} +CPU_TABLE = {} CPU_OBJECT_TABLE = {} # Path of the current paste for a pid @@ -137,7 +137,7 @@ class CListBox(ListBox): # Quit if press q elif event.key_code == ord('q'): Dashboard._quit() - + else: # Ignore any other key press. return event @@ -196,7 +196,7 @@ END EXTENSION ''' SCENE DEFINITION -''' +''' class Dashboard(Frame): def __init__(self, screen): @@ -497,9 +497,8 @@ MANAGE MODULES AND GET INFOS def getPid(module): p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) for line in p.stdout: - print line splittedLine = line.split() - if 'python2' in splittedLine: + if 'python3' in splittedLine: return int(splittedLine[0]) return None @@ -517,15 +516,20 @@ def cleanRedis(): proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) try: for line in proc.stdout: + line = line.decode('utf8') splittedLine = line.split() - if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine: - flag_pid_valid = True + if ('python3.5' in splittedLine or 'python3' in splittedLine or 'python' in splittedLine): + moduleCommand = "./"+moduleName + ".py" + moduleCommand2 = moduleName + ".py" + if(moduleCommand in splittedLine or moduleCommand2 in splittedLine): + flag_pid_valid = True + if not flag_pid_valid: #print flag_pid_valid, 'cleaning', pid, 'in', k server.srem(k, pid) inst_time = datetime.datetime.fromtimestamp(int(time.time())) - log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + k], 0)) + log(([str(inst_time).split(' ')[1], moduleName, pid, "Cleared invalid pid in " + (k)], 0)) #Error due to resize, interrupted sys call except IOError as e: @@ -601,15 +605,17 @@ def fetchQueueData(): printarray_running = [] printarray_idle = [] printarray_notrunning = [] - for queue, card in server.hgetall("queues").iteritems(): + for queue, card in iter(server.hgetall("queues").items()): all_queue.add(queue) key = "MODULE_" + queue + "_" keySet = "MODULE_TYPE_" + queue array_module_type = [] - + for moduleNum in server.smembers(keySet): value = server.get(key + str(moduleNum)) - complete_paste_path = server.get(key + str(moduleNum) + "_PATH") + complete_paste_path = ( server.get(key + str(moduleNum) + "_PATH") ) + if(complete_paste_path is not None): + complete_paste_path = complete_paste_path COMPLETE_PASTE_PATH_PER_PID[moduleNum] = complete_paste_path if value is not None: @@ -624,7 +630,7 @@ def fetchQueueData(): QUEUE_STATUS[moduleNum] = 1 else: QUEUE_STATUS[moduleNum] = 0 - + # Queue contain elements if int(card) > 0: # Queue need to be killed @@ -636,7 +642,7 @@ def fetchQueueData(): last_kill_try = kill_retry_threshold+1 if args.autokill == 1 and last_kill_try > kill_retry_threshold : kill_module(queue, int(moduleNum)) - + # Create CPU objects try: cpu_percent = CPU_OBJECT_TABLE[int(moduleNum)].cpu_percent() @@ -644,6 +650,7 @@ def fetchQueueData(): cpu_avg = sum(CPU_TABLE[moduleNum])/len(CPU_TABLE[moduleNum]) if len(CPU_TABLE[moduleNum]) > args.refresh*10: CPU_TABLE[moduleNum].pop() + mem_percent = CPU_OBJECT_TABLE[int(moduleNum)].memory_percent() except psutil.NoSuchProcess: del CPU_OBJECT_TABLE[int(moduleNum)] @@ -652,6 +659,7 @@ def fetchQueueData(): cpu_avg = cpu_percent mem_percent = 0 except KeyError: + #print('key error2') try: CPU_OBJECT_TABLE[int(moduleNum)] = psutil.Process(int(moduleNum)) cpu_percent = CPU_OBJECT_TABLE[int(moduleNum)].cpu_percent() @@ -664,17 +672,17 @@ def fetchQueueData(): mem_percent = 0 array_module_type.append( ([" [ ]", str(queue), str(moduleNum), str(card), str(startTime_readable), - str(processed_time_readable), str(path), "{0:.2f}".format(cpu_percent)+"%", + str(processed_time_readable), str(path), "{0:.2f}".format(cpu_percent)+"%", "{0:.2f}".format(mem_percent)+"%", "{0:.2f}".format(cpu_avg)+"%"], moduleNum) ) - + else: printarray_idle.append( ([" ", str(queue), str(moduleNum), str(processed_time_readable), str(path)], moduleNum) ) PID_NAME_DICO[int(moduleNum)] = str(queue) - array_module_type.sort(lambda x,y: cmp(x[0][4], y[0][4]), reverse=True) #Sort by num of pastes + #array_module_type.sort(lambda x,y: cmp(x[0][4], y[0][4]), reverse=True) #Sort by num of pastes for e in array_module_type: printarray_running.append(e) - + for curr_queue in module_file_array: if curr_queue not in all_queue: #Module not running by default printarray_notrunning.append( ([" ", curr_queue, "Not running by default"], curr_queue) ) @@ -692,8 +700,8 @@ def fetchQueueData(): printarray_notrunning.append( ([" ", curr_queue, "Stuck or idle, restarting in " + str(abs(args.treshold - (int(time.time()) - no_info_modules[curr_queue]))) + "s"], curr_queue) ) else: printarray_notrunning.append( ([" ", curr_queue, "Stuck or idle, restarting disabled"], curr_queue) ) - - + + printarray_running.sort(key=lambda x: x[0], reverse=False) printarray_idle.sort(key=lambda x: x[0], reverse=False) printarray_notrunning.sort(key=lambda x: x[0][1], reverse=False) @@ -715,6 +723,7 @@ def format_string(tab, padding_row): text="" for ite, elem in enumerate(the_array): + if len(elem) > padding_row[ite]: text += "*" + elem[-padding_row[ite]+6:] padd_off = " "*5 @@ -761,7 +770,7 @@ def demo(screen): if time.time() - time_cooldown > args.refresh: cleanRedis() - for key, val in fetchQueueData().iteritems(): #fetch data and put it into the tables + for key, val in iter(fetchQueueData().items()): #fetch data and put it into the tables TABLES[key] = val TABLES["logs"] = format_string(printarrayLog, TABLES_PADDING["logs"]) @@ -790,14 +799,15 @@ if __name__ == "__main__": Did you set environment variables? \ Or activate the virtualenv.') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) # REDIS # server = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) if args.clear == 1: clearRedisModuleInfo() @@ -821,7 +831,7 @@ if __name__ == "__main__": module_file_array.add(line[:-1]) cleanRedis() - + TABLES_TITLES["running"] = format_string([([" Action", "Queue name", "PID", "#", "S Time", "R Time", "Processed element", "CPU %", "Mem %", "Avg CPU%"],0)], TABLES_PADDING["running"])[0][0] TABLES_TITLES["idle"] = format_string([([" Action", "Queue", "PID", "Idle Time", "Last paste hash"],0)], TABLES_PADDING["idle"])[0][0] TABLES_TITLES["notRunning"] = format_string([([" Action", "Queue", "State"],0)], TABLES_PADDING["notRunning"])[0][0] diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py index dd15499a..8c65bb3d 100755 --- a/bin/NotificationHelper.py +++ b/bin/NotificationHelper.py @@ -1,10 +1,10 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* -import ConfigParser +import configparser import os import smtplib -from email.MIMEMultipart import MIMEMultipart +from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText """ @@ -22,31 +22,31 @@ TrackedTermsNotificationEnabled_Name = "TrackedNotifications" TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_" def sendEmailNotification(recipient, term): - + if not os.path.exists(configfile): raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ Or activate the virtualenv?') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) sender = cfg.get("Notifications", "sender"), sender_host = cfg.get("Notifications", "sender_host"), sender_port = cfg.getint("Notifications", "sender_port"), sender_pw = cfg.get("Notifications", "sender_pw"), - + if isinstance(sender, tuple): sender = sender[0] if isinstance(sender_host, tuple): sender_host = sender_host[0] - + if isinstance(sender_port, tuple): sender_port = sender_port[0] - + if isinstance(sender_pw, tuple): - sender_pw = sender_pw[0] + sender_pw = sender_pw[0] # raise an exception if any of these is None if (sender is None or @@ -62,22 +62,19 @@ def sendEmailNotification(recipient, term): smtp_server.login(sender, sender_pw) else: smtp_server = smtplib.SMTP(sender_host, sender_port) - - + + mime_msg = MIMEMultipart() mime_msg['From'] = sender mime_msg['To'] = recipient mime_msg['Subject'] = "AIL Term Alert" - + body = "New occurrence for term: " + term mime_msg.attach(MIMEText(body, 'plain')) - + smtp_server.sendmail(sender, recipient, mime_msg.as_string()) smtp_server.quit() - + except Exception as e: - print str(e) + print(str(e)) # raise e - - - diff --git a/bin/Onion.py b/bin/Onion.py index aaf30a1b..77ed75fe 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The ZMQ_Sub_Onion Module @@ -37,11 +37,12 @@ from Helper import Process def fetch(p, r_cache, urls, domains, path): failed = [] downloaded = [] - print len(urls), 'Urls to fetch.' + print('{} Urls to fetch'.format(len(urls))) for url, domain in zip(urls, domains): if r_cache.exists(url) or url in failed: continue - to_fetch = base64.standard_b64encode(url) + to_fetch = base64.standard_b64encode(url.encode('utf8')) + print('fetching url: {}'.format(to_fetch)) process = subprocess.Popen(["python", './tor_fetcher.py', to_fetch], stdout=subprocess.PIPE) while process.poll() is None: @@ -51,8 +52,10 @@ def fetch(p, r_cache, urls, domains, path): r_cache.setbit(url, 0, 1) r_cache.expire(url, 360000) downloaded.append(url) - tempfile = process.stdout.read().strip() - with open(tempfile, 'r') as f: + print('downloaded : {}'.format(downloaded)) + '''tempfile = process.stdout.read().strip() + tempfile = tempfile.decode('utf8') + #with open(tempfile, 'r') as f: filename = path + domain + '.gz' fetched = f.read() content = base64.standard_b64decode(fetched) @@ -66,16 +69,16 @@ def fetch(p, r_cache, urls, domains, path): ff.write(content) p.populate_set_out(save_path, 'Global') p.populate_set_out(url, 'ValidOnion') - p.populate_set_out(fetched, 'FetchedOnion') - yield url - os.unlink(tempfile) + p.populate_set_out(fetched, 'FetchedOnion')''' + yield url + #os.unlink(tempfile) else: r_cache.setbit(url, 0, 0) r_cache.expire(url, 3600) failed.append(url) - print 'Failed at downloading', url - print process.stdout.read() - print 'Failed:', len(failed), 'Downloaded:', len(downloaded) + print('Failed at downloading', url) + print(process.stdout.read()) + print('Failed:', len(failed), 'Downloaded:', len(downloaded)) if __name__ == "__main__": @@ -91,7 +94,8 @@ if __name__ == "__main__": r_cache = redis.StrictRedis( host=p.config.get("Redis_Cache", "host"), port=p.config.getint("Redis_Cache", "port"), - db=p.config.getint("Redis_Cache", "db")) + db=p.config.getint("Redis_Cache", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("Script subscribed to channel onion_categ") @@ -109,7 +113,7 @@ if __name__ == "__main__": while True: if message is not None: - print message + print(message) filename, score = message.split() # "For each new paste" @@ -131,6 +135,8 @@ if __name__ == "__main__": PST.save_attribute_redis(channel, domains_list) to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) + + print(len(domains_list)) if len(domains_list) > 0: publisher.warning('{}Detected {} .onion(s);{}'.format( @@ -144,7 +150,7 @@ if __name__ == "__main__": PST.p_date, PST.p_name) for url in fetch(p, r_cache, urls, domains_list, path): - publisher.warning('{}Checked {};{}'.format(to_print, url, PST.p_path)) + publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_path)) p.populate_set_out('onion;{}'.format(PST.p_path), 'alertHandler') else: publisher.info('{}Onion related;{}'.format(to_print, PST.p_path)) @@ -152,6 +158,6 @@ if __name__ == "__main__": prec_filename = filename else: publisher.debug("Script url is Idling 10s") - print 'Sleeping' + #print('Sleeping') time.sleep(10) message = p.get_from_set() diff --git a/bin/Phone.py b/bin/Phone.py index 7a4811da..e3f0f908 100755 --- a/bin/Phone.py +++ b/bin/Phone.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -30,10 +30,11 @@ def search_phone(message): # if the list is greater than 4, we consider the Paste may contain a list of phone numbers if len(results) > 4: - print results + print(results) publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) #send to Browse_warning_paste - p.populate_set_out('phone;{}'.format(message), 'alertHandler') + msg = 'phone;{}'.format(message) + p.populate_set_out(msg, 'alertHandler') #Send to duplicate p.populate_set_out(message, 'Duplicate') stats = {} diff --git a/bin/QueueIn.py b/bin/QueueIn.py index 683a50ef..4495e9c4 100755 --- a/bin/QueueIn.py +++ b/bin/QueueIn.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* diff --git a/bin/QueueOut.py b/bin/QueueOut.py index d2853274..dbb36513 100755 --- a/bin/QueueOut.py +++ b/bin/QueueOut.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* from pubsublogger import publisher diff --git a/bin/Queues_Monitoring.py b/bin/Queues_Monitoring.py index 20c137fb..3f0462ab 100755 --- a/bin/Queues_Monitoring.py +++ b/bin/Queues_Monitoring.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import redis import argparse -import ConfigParser +import configparser import time import os from pubsublogger import publisher @@ -14,7 +14,7 @@ def main(): """Main Function""" # CONFIG # - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read('./packages/config.cfg') # SCRIPT PARSER # @@ -30,7 +30,8 @@ def main(): r_serv = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) # LOGGING # publisher.port = 6380 @@ -49,7 +50,7 @@ def main(): row.sort() table.add_rows(row, header=False) os.system('clear') - print table.draw() + print(table.draw()) if __name__ == "__main__": diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py index d1534eab..7aea03f0 100755 --- a/bin/RegexForTermsFrequency.py +++ b/bin/RegexForTermsFrequency.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ This Module is used for term frequency. @@ -54,9 +54,10 @@ if __name__ == "__main__": # REDIS # server_term = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_TermFreq", "host"), - port=p.config.get("Redis_Level_DB_TermFreq", "port"), - db=p.config.get("Redis_Level_DB_TermFreq", "db")) + host=p.config.get("ARDB_TermFreq", "host"), + port=p.config.get("ARDB_TermFreq", "port"), + db=p.config.get("ARDB_TermFreq", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("RegexForTermsFrequency script started") @@ -115,6 +116,6 @@ if __name__ == "__main__": else: publisher.debug("Script RegexForTermsFrequency is Idling") - print "sleeping" + print("sleeping") time.sleep(5) message = p.get_from_set() diff --git a/bin/Release.py b/bin/Release.py index 98e60a96..6e7a8277 100755 --- a/bin/Release.py +++ b/bin/Release.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import time from packages import Paste @@ -6,6 +6,16 @@ from pubsublogger import publisher from Helper import Process import re +import signal + +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + ''' This module takes its input from the global module. It applies some regex and publish matched content @@ -16,6 +26,7 @@ if __name__ == "__main__": publisher.channel = "Script" config_section = "Release" p = Process(config_section) + max_execution_time = p.config.getint("Curve", "max_execution_time") publisher.info("Release scripts to find release names") movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+" @@ -29,18 +40,28 @@ if __name__ == "__main__": filepath = p.get_from_set() if filepath is None: publisher.debug("Script Release is Idling 10s") - print 'Sleeping' + print('Sleeping') time.sleep(10) continue paste = Paste.Paste(filepath) content = paste.get_p_content() - releases = set(re.findall(regex, content)) - if len(releases) == 0: - continue - to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path) - if len(releases) > 30: - publisher.warning(to_print) + signal.alarm(max_execution_time) + try: + releases = set(re.findall(regex, content)) + if len(releases) == 0: + continue + + to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path) + print(to_print) + if len(releases) > 30: + publisher.warning(to_print) + else: + publisher.info(to_print) + + except TimeoutException: + print ("{0} processing timeout".format(paste.p_path)) + continue else: - publisher.info(to_print) + signal.alarm(0) diff --git a/bin/Repartition_graph.py b/bin/Repartition_graph.py index 38c5e1b6..5aa146a2 100755 --- a/bin/Repartition_graph.py +++ b/bin/Repartition_graph.py @@ -1,9 +1,9 @@ -#!/usr/bin/python2.7 +#!/usr/bin/python3 # -*-coding:UTF-8 -* import redis import argparse -import ConfigParser +import configparser from datetime import datetime from pubsublogger import publisher @@ -14,7 +14,7 @@ def main(): """Main Function""" # CONFIG # - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read('./packages/config.cfg') # SCRIPT PARSER # @@ -33,9 +33,10 @@ def main(): # port generated automatically depending on the date curYear = datetime.now().year if args.year is None else args.year r_serv = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_Hashs", "host"), - port=curYear, - db=cfg.getint("Redis_Level_DB_Hashs", "db")) + host=cfg.get("ARDB_Hashs", "host"), + port=cfg.getint("ARDB_Hashs", "port"), + db=curYear, + decode_responses=True) # LOGGING # publisher.port = 6380 diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index 318466c8..9e28de72 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -13,7 +13,7 @@ It test different possibility to makes some sqlInjection. import time import string -import urllib2 +import urllib.request import re from pubsublogger import publisher from Helper import Process @@ -66,16 +66,16 @@ def analyse(url, path): result_query = 0 if resource_path is not None: - result_path = is_sql_injection(resource_path) + result_path = is_sql_injection(resource_path.decode('utf8')) if query_string is not None: - result_query = is_sql_injection(query_string) + result_query = is_sql_injection(query_string.decode('utf8')) if (result_path > 0) or (result_query > 0): paste = Paste.Paste(path) if (result_path > 1) or (result_query > 1): - print "Detected SQL in URL: " - print urllib2.unquote(url) + print("Detected SQL in URL: ") + print(urllib.request.unquote(url)) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) publisher.warning(to_print) #Send to duplicate @@ -83,8 +83,8 @@ def analyse(url, path): #send to Browse_warning_paste p.populate_set_out('sqlinjection;{}'.format(path), 'alertHandler') else: - print "Potential SQL injection:" - print urllib2.unquote(url) + print("Potential SQL injection:") + print(urllib.request.unquote(url)) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path) publisher.info(to_print) @@ -92,8 +92,8 @@ def analyse(url, path): # Try to detect if the url passed might be an sql injection by appliying the regex # defined above on it. def is_sql_injection(url_parsed): - line = urllib2.unquote(url_parsed) - line = string.upper(line) + line = urllib.request.unquote(url_parsed) + line = str.upper(line) result = [] result_suspect = [] @@ -104,20 +104,20 @@ def is_sql_injection(url_parsed): for word_list in word_injection: for word in word_list: - temp_res = string.find(line, string.upper(word)) + temp_res = str.find(line, str.upper(word)) if temp_res!=-1: result.append(line[temp_res:temp_res+len(word)]) for word in word_injection_suspect: - temp_res = string.find(line, string.upper(word)) + temp_res = str.find(line, str.upper(word)) if temp_res!=-1: result_suspect.append(line[temp_res:temp_res+len(word)]) if len(result)>0: - print result + print(result) return 2 elif len(result_suspect)>0: - print result_suspect + print(result_suspect) return 1 else: return 0 diff --git a/bin/SentimentAnalysis.py b/bin/SentimentAnalysis.py index 00b15abb..34beea3f 100755 --- a/bin/SentimentAnalysis.py +++ b/bin/SentimentAnalysis.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ Sentiment analyser module. @@ -33,7 +33,7 @@ size_threshold = 250 line_max_length_threshold = 1000 import os -import ConfigParser +import configparser configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): @@ -41,7 +41,7 @@ if not os.path.exists(configfile): Did you set environment variables? \ Or activate the virtualenv.') -cfg = ConfigParser.ConfigParser() +cfg = configparser.ConfigParser() cfg.read(configfile) sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file") @@ -69,7 +69,7 @@ def Analyse(message, server): combined_datetime = datetime.datetime.combine(the_date, the_time) timestamp = calendar.timegm(combined_datetime.timetuple()) - sentences = tokenize.sent_tokenize(p_content.decode('utf-8', 'ignore')) + sentences = tokenize.sent_tokenize(p_content) if len(sentences) > 0: avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} @@ -99,7 +99,7 @@ def Analyse(message, server): avg_score[k] = avg_score[k] / len(sentences) - # In redis-levelDB: {} = set, () = K-V + # In redis-levelDB: {} = set, () = K-V # {Provider_set -> provider_i} # {Provider_TimestampInHour_i -> UniqID_i}_j # (UniqID_i -> PasteValue_i) @@ -109,11 +109,11 @@ def Analyse(message, server): provider_timestamp = provider + '_' + str(timestamp) server.incr('UniqID') UniqID = server.get('UniqID') - print provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines' + print(provider_timestamp, '->', UniqID, 'dropped', num_line_removed, 'lines') server.sadd(provider_timestamp, UniqID) server.set(UniqID, avg_score) else: - print 'Dropped:', p_MimeType + print('Dropped:', p_MimeType) def isJSON(content): @@ -121,7 +121,7 @@ def isJSON(content): json.loads(content) return True - except Exception,e: + except Exception: return False import signal @@ -152,9 +152,10 @@ if __name__ == '__main__': # REDIS_LEVEL_DB # server = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_Sentiment", "host"), - port=p.config.get("Redis_Level_DB_Sentiment", "port"), - db=p.config.get("Redis_Level_DB_Sentiment", "db")) + host=p.config.get("ARDB_Sentiment", "host"), + port=p.config.get("ARDB_Sentiment", "port"), + db=p.config.get("ARDB_Sentiment", "db"), + decode_responses=True) while True: message = p.get_from_set() @@ -170,4 +171,3 @@ if __name__ == '__main__': continue else: signal.alarm(0) - diff --git a/bin/SetForTermsFrequency.py b/bin/SetForTermsFrequency.py index 014ce10e..b42f07e6 100755 --- a/bin/SetForTermsFrequency.py +++ b/bin/SetForTermsFrequency.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ This Module is used for term frequency. @@ -52,9 +52,10 @@ if __name__ == "__main__": # REDIS # server_term = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_TermFreq", "host"), - port=p.config.get("Redis_Level_DB_TermFreq", "port"), - db=p.config.get("Redis_Level_DB_TermFreq", "db")) + host=p.config.get("ARDB_TermFreq", "host"), + port=p.config.get("ARDB_TermFreq", "port"), + db=p.config.get("ARDB_TermFreq", "db"), + decode_responses=True) # FUNCTIONS # publisher.info("RegexForTermsFrequency script started") @@ -126,6 +127,6 @@ if __name__ == "__main__": else: publisher.debug("Script RegexForTermsFrequency is Idling") - print "sleeping" + print("sleeping") time.sleep(5) message = p.get_from_set() diff --git a/bin/Shutdown.py b/bin/Shutdown.py index 8467dafb..609b257a 100755 --- a/bin/Shutdown.py +++ b/bin/Shutdown.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The ZMQ_Feed_Q Module @@ -21,7 +21,7 @@ Requirements """ import redis -import ConfigParser +import configparser import os configfile = os.path.join(os.environ['AIL_BIN'], './packages/config.cfg') @@ -31,13 +31,14 @@ def main(): """Main Function""" # CONFIG # - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) # REDIS r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) # FIXME: automatic based on the queue name. # ### SCRIPTS #### diff --git a/bin/SourceCode.py b/bin/SourceCode.py index 41120e69..f34bb34e 100644 --- a/bin/SourceCode.py +++ b/bin/SourceCode.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import time from packages import Paste @@ -26,10 +26,10 @@ if __name__ == "__main__": adr = "0x[a-f0-9]{2}" #asm = "\"((?s).{1}x[0-9a-f]{2}){3,}" ISSUES WITH FINDALL, pattern like \x54\xaf\x23\.. - + languages = [shell, c, php, bash, python, javascript, bash, ruby, adr] regex = '|'.join(languages) - print regex + print(regex) while True: message = p.get_from_set() diff --git a/bin/Tokenize.py b/bin/Tokenize.py index 377cba5a..fdefeb6a 100755 --- a/bin/Tokenize.py +++ b/bin/Tokenize.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The Tokenize Module @@ -50,7 +50,7 @@ if __name__ == "__main__": while True: message = p.get_from_set() - print message + print(message) if message is not None: paste = Paste.Paste(message) signal.alarm(5) @@ -67,4 +67,4 @@ if __name__ == "__main__": else: publisher.debug("Tokeniser is idling 10s") time.sleep(10) - print "sleepin" + print("Sleeping") diff --git a/bin/Update-conf.py b/bin/Update-conf.py index 863ff436..901cb935 100755 --- a/bin/Update-conf.py +++ b/bin/Update-conf.py @@ -1,8 +1,8 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* -import ConfigParser -from ConfigParser import ConfigParser as cfgP +import configparser +from configparser import ConfigParser as cfgP import os from collections import OrderedDict import sys @@ -20,14 +20,14 @@ def main(): Or activate the virtualenv.') configfileSample = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg.sample') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) - cfgSample = ConfigParser.ConfigParser() + cfgSample = configparser.ConfigParser() cfgSample.read(configfileSample) sections = cfgP.sections(cfg) sectionsSample = cfgP.sections(cfgSample) - + missingSection = [] dicoMissingSection = {} missingItem = [] @@ -63,12 +63,12 @@ def main(): print(" - "+item[0]) print("+--------------------------------------------------------------------+") - resp = raw_input("Do you want to auto fix it? [y/n] ") + resp = input("Do you want to auto fix it? [y/n] ") if resp != 'y': return False else: - resp2 = raw_input("Do you want to keep a backup of the old configuration file? [y/n] ") + resp2 = input("Do you want to keep a backup of the old configuration file? [y/n] ") if resp2 == 'y': shutil.move(configfile, configfileBackup) @@ -89,7 +89,7 @@ def main(): ''' Return a new dico with the section ordered as the old configuration with the updated one added ''' def add_items_to_correct_position(sample_dico, old_dico, missingSection, dicoMissingSection): new_dico = OrderedDict() - + positions = {} for pos_i, sec in enumerate(sample_dico): if sec in missingSection: @@ -109,4 +109,3 @@ if __name__ == "__main__": sys.exit() else: sys.exit(1) - diff --git a/bin/Web.py b/bin/Web.py index dc2bf2fd..45e5bfbe 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -47,7 +47,8 @@ if __name__ == "__main__": r_serv2 = redis.StrictRedis( host=p.config.get("Redis_Cache", "host"), port=p.config.getint("Redis_Cache", "port"), - db=p.config.getint("Redis_Cache", "db")) + db=p.config.getint("Redis_Cache", "db"), + decode_responses=True) # Protocol file path protocolsfile_path = os.path.join(os.environ['AIL_HOME'], @@ -95,17 +96,23 @@ if __name__ == "__main__": subdomain = faup.get_subdomain() f1 = None - domains_list.append(domain) - publisher.debug('{} Published'.format(url)) if f1 == "onion": - print domain + print(domain) + + if subdomain is not None: + subdomain = subdomain.decode('utf8') + + if domain is not None: + domain = domain.decode('utf8') + domains_list.append(domain) + + hostl = avoidNone(subdomain) + avoidNone(domain) - hostl = unicode(avoidNone(subdomain)+avoidNone(domain)) try: socket.setdefaulttimeout(1) - ip = socket.gethostbyname(unicode(hostl)) + ip = socket.gethostbyname(hostl) except: # If the resolver is not giving any IPv4 address, # ASN/CC lookup is skip. @@ -113,32 +120,36 @@ if __name__ == "__main__": try: l = client.lookup(ip, qType='IP') + except ipaddress.AddressValueError: continue cc = getattr(l, 'cc') - asn = getattr(l, 'asn') + if getattr(l, 'asn') is not None: + asn = getattr(l, 'asn')[2:] #remobe b' # EU is not an official ISO 3166 code (but used by RIPE # IP allocation) if cc is not None and cc != "EU": - print hostl, asn, cc, \ - pycountry.countries.get(alpha_2=cc).name + print(hostl, asn, cc, \ + pycountry.countries.get(alpha_2=cc).name) if cc == cc_critical: to_print = 'Url;{};{};{};Detected {} {}'.format( PST.p_source, PST.p_date, PST.p_name, hostl, cc) #publisher.warning(to_print) - print to_print + print(to_print) else: - print hostl, asn, cc + print(hostl, asn, cc) A_values = lib_refine.checking_A_record(r_serv2, domains_list) + if A_values[0] >= 1: PST.__setattr__(channel, A_values) PST.save_attribute_redis(channel, (A_values[0], list(A_values[1]))) + pprint.pprint(A_values) publisher.info('Url;{};{};{};Checked {} URL;{}'.format( PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path)) @@ -146,7 +157,7 @@ if __name__ == "__main__": else: publisher.debug("Script url is Idling 10s") - print 'Sleeping' + print('Sleeping') time.sleep(10) message = p.get_from_set() diff --git a/bin/WebStats.py b/bin/WebStats.py index cbb52e7a..7eecb0d2 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -29,11 +29,12 @@ num_day_to_look = 5 # the detection of the progression start num_day_to_lo def analyse(server, field_name, date, url_parsed): field = url_parsed[field_name] if field is not None: + field = field.decode('utf8') server.hincrby(field, date, 1) if field_name == "domain": #save domain in a set for the monthly plot domain_set_name = "domain_set_" + date[0:6] server.sadd(domain_set_name, field) - print "added in " + domain_set_name +": "+ field + print("added in " + domain_set_name +": "+ field) def get_date_range(num_day): curr_date = datetime.date.today() @@ -113,16 +114,17 @@ if __name__ == '__main__': # REDIS # r_serv_trend = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_Trending", "host"), - port=p.config.get("Redis_Level_DB_Trending", "port"), - db=p.config.get("Redis_Level_DB_Trending", "db")) + host=p.config.get("ARDB_Trending", "host"), + port=p.config.get("ARDB_Trending", "port"), + db=p.config.get("ARDB_Trending", "db"), + decode_responses=True) # FILE CURVE SECTION # csv_path_proto = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "protocolstrending_csv")) protocolsfile_path = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "protocolsfile")) - + csv_path_tld = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "tldstrending_csv")) tldsfile_path = os.path.join(os.environ['AIL_HOME'], @@ -145,24 +147,25 @@ if __name__ == '__main__': year = today.year month = today.month - print 'Building protocol graph' + print('Building protocol graph') lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto, protocolsfile_path, year, month) - print 'Building tld graph' + print('Building tld graph') lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld, tldsfile_path, year, month) - print 'Building domain graph' + print('Building domain graph') lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain, "domain", year, month) - print 'end building' + print('end building') + publisher.debug("{} queue is empty, waiting".format(config_section)) - print 'sleeping' + print('sleeping') time.sleep(5*60) continue @@ -172,10 +175,14 @@ if __name__ == '__main__': url, date, path = message.split() faup.decode(url) url_parsed = faup.get() - - analyse(r_serv_trend, 'scheme', date, url_parsed) #Scheme analysis - analyse(r_serv_trend, 'tld', date, url_parsed) #Tld analysis - analyse(r_serv_trend, 'domain', date, url_parsed) #Domain analysis + + # Scheme analysis + analyse(r_serv_trend, 'scheme', date, url_parsed) + # Tld analysis + analyse(r_serv_trend, 'tld', date, url_parsed) + # Domain analysis + analyse(r_serv_trend, 'domain', date, url_parsed) + compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed) compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed) diff --git a/bin/ailleakObject.py b/bin/ailleakObject.py index 8b7ea185..bbf88711 100755 --- a/bin/ailleakObject.py +++ b/bin/ailleakObject.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* from pymisp.tools.abstractgenerator import AbstractMISPObjectGenerator @@ -15,7 +15,7 @@ class AilleakObject(AbstractMISPObjectGenerator): self._p_source = p_source.split('/')[-5:] self._p_source = '/'.join(self._p_source)[:-3] # -3 removes .gz self._p_date = p_date - self._p_content = p_content.encode('utf8') + self._p_content = p_content self._p_duplicate = p_duplicate self._p_duplicate_number = p_duplicate_number self.generate_attributes() @@ -37,7 +37,7 @@ class ObjectWrapper: self.eventID_to_push = self.get_daily_event_id() cfg = configparser.ConfigParser() cfg.read('./packages/config.cfg') - self.maxDuplicateToPushToMISP = cfg.getint("ailleakObject", "maxDuplicateToPushToMISP") + self.maxDuplicateToPushToMISP = cfg.getint("ailleakObject", "maxDuplicateToPushToMISP") def add_new_object(self, moduleName, path): self.moduleName = moduleName @@ -45,13 +45,10 @@ class ObjectWrapper: self.paste = Paste.Paste(path) self.p_date = self.date_to_str(self.paste.p_date) self.p_source = self.paste.p_path - self.p_content = self.paste.get_p_content().decode('utf8') - + self.p_content = self.paste.get_p_content() + temp = self.paste._get_p_duplicate() - try: - temp = temp.decode('utf8') - except AttributeError: - pass + #beautifier temp = json.loads(temp) self.p_duplicate_number = len(temp) if len(temp) >= 0 else 0 @@ -108,8 +105,8 @@ class ObjectWrapper: orgc_id = None sharing_group_id = None date = None - event = self.pymisp.new_event(distribution, threat, - analysis, info, date, + event = self.pymisp.new_event(distribution, threat, + analysis, info, date, published, orgc_id, org_id, sharing_group_id) return event diff --git a/bin/alertHandler.py b/bin/alertHandler.py index ce473ed4..60787b77 100755 --- a/bin/alertHandler.py +++ b/bin/alertHandler.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -52,9 +52,10 @@ if __name__ == "__main__": # port generated automatically depending on the date curYear = datetime.now().year server = redis.StrictRedis( - host=p.config.get("Redis_Level_DB", "host"), - port=curYear, - db=p.config.get("Redis_Level_DB", "db")) + host=p.config.get("ARDB_DB", "host"), + port=p.config.get("ARDB_DB", "port"), + db=curYear, + decode_responses=True) # FUNCTIONS # publisher.info("Script duplicate started") @@ -62,8 +63,8 @@ if __name__ == "__main__": while True: message = p.get_from_set() if message is not None: - message = message.decode('utf8') #decode because of pyhton3 module_name, p_path = message.split(';') + print("new alert : {}".format(module_name)) #PST = Paste.Paste(p_path) else: publisher.debug("Script Attribute is idling 10s") diff --git a/bin/empty_queue.py b/bin/empty_queue.py index f1b3c453..5b763a32 100755 --- a/bin/empty_queue.py +++ b/bin/empty_queue.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ @@ -27,10 +27,9 @@ if __name__ == "__main__": config_section = ['Curve'] for queue in config_section: - print 'dropping: ' + queue + print('dropping: ' + queue) p = Process(queue) while True: message = p.get_from_set() if message is None: break - diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index f6e64033..50ffaeba 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # This file is part of AIL framework - Analysis Information Leak framework @@ -25,7 +25,7 @@ import time import redis import base64 import os -import ConfigParser +import configparser configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') if not os.path.exists(configfile): @@ -33,7 +33,7 @@ if not os.path.exists(configfile): Did you set environment variables? \ Or activate the virtualenv.') -cfg = ConfigParser.ConfigParser() +cfg = configparser.ConfigParser() cfg.read(configfile) if cfg.has_option("ZMQ_Global", "bind"): @@ -50,7 +50,7 @@ socket = context.socket(zmq.PUB) socket.bind(zmq_url) # check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 -r = redis.StrictRedis(host='localhost', db=10) +r = redis.StrictRedis(host='localhost', db=10, decode_responses=True) # 101 pastes processed feed # 102 raw pastes feed @@ -59,6 +59,7 @@ while True: time.sleep(base_sleeptime + sleep_inc) topic = 101 paste = r.lpop("pastes") + print(paste) if paste is None: continue socket.send("%d %s" % (topic, paste)) diff --git a/bin/feeder/test-zmq.py b/bin/feeder/test-zmq.py index 2bedf3fe..f6f28aa1 100644 --- a/bin/feeder/test-zmq.py +++ b/bin/feeder/test-zmq.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # This file is part of AIL framework - Analysis Information Leak framework @@ -24,9 +24,10 @@ socket.setsockopt(zmq.SUBSCRIBE, topicfilter) while True: message = socket.recv() + print('b1') print (message) if topicfilter == "102": topic, paste, messagedata = message.split() - print paste, messagedata + print(paste, messagedata) else: print (message) diff --git a/bin/import_dir.py b/bin/import_dir.py index 3d291db0..d8360631 100755 --- a/bin/import_dir.py +++ b/bin/import_dir.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- import zmq import base64 -import StringIO +from io import StringIO import gzip import argparse import os @@ -31,8 +31,7 @@ import mimetypes ' ''' -import StringIO -import gzip + def is_hierachy_valid(path): var = path.split('/') try: @@ -72,7 +71,12 @@ if __name__ == "__main__": wanted_path = wanted_path.split('/') wanted_path = '/'.join(wanted_path[-(4+args.hierarchy):]) - messagedata = open(complete_path).read() + with gzip.open(complete_path, 'rb') as f: + messagedata = f.read() + + #print(type(complete_path)) + #file = open(complete_path) + #messagedata = file.read() #if paste do not have a 'date hierarchy' ignore it if not is_hierachy_valid(complete_path): @@ -90,5 +94,8 @@ if __name__ == "__main__": print(args.name+'>'+wanted_path) path_to_send = args.name + '>' + wanted_path - socket.send('{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata))) + #s = b'{} {} {}'.format(args.channel, path_to_send, base64.b64encode(messagedata)) + # use bytes object + s = b' '.join( [ args.channel.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] ) + socket.send(s) time.sleep(args.seconds) diff --git a/bin/indexer_lookup.py b/bin/indexer_lookup.py index c7674d38..cb01e3f2 100644 --- a/bin/indexer_lookup.py +++ b/bin/indexer_lookup.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # This file is part of AIL framework - Analysis Information Leak framework @@ -10,7 +10,7 @@ # # Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be -import ConfigParser +import configparser import argparse import gzip import os @@ -23,7 +23,7 @@ def readdoc(path=None): return f.read() configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -cfg = ConfigParser.ConfigParser() +cfg = configparser.ConfigParser() cfg.read(configfile) # Indexer configuration - index dir and schema setup @@ -51,7 +51,7 @@ ix = index.open_dir(indexpath) from whoosh.qparser import QueryParser if args.n: - print ix.doc_count_all() + print(ix.doc_count_all()) exit(0) if args.l: diff --git a/bin/launch_queues.py b/bin/launch_queues.py index e60a7b50..9eac1a98 100755 --- a/bin/launch_queues.py +++ b/bin/launch_queues.py @@ -1,8 +1,8 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*-coding:UTF-8 -* -import ConfigParser +import configparser import os import subprocess import time @@ -23,21 +23,21 @@ if __name__ == '__main__': raise Exception('Unable to find the configuration file. \ Did you set environment variables? \ Or activate the virtualenv.') - config = ConfigParser.ConfigParser() + config = configparser.ConfigParser() config.read(configfile) modules = config.sections() pids = {} for module in modules: - pin = subprocess.Popen(["python", './QueueIn.py', '-c', module]) - pout = subprocess.Popen(["python", './QueueOut.py', '-c', module]) + pin = subprocess.Popen(["python3", './QueueIn.py', '-c', module]) + pout = subprocess.Popen(["python3", './QueueOut.py', '-c', module]) pids[module] = (pin, pout) is_running = True try: while is_running: time.sleep(5) is_running = False - for module, p in pids.iteritems(): + for module, p in pids.items(): pin, pout = p if pin is None: # already dead @@ -57,7 +57,7 @@ if __name__ == '__main__': is_running = True pids[module] = (pin, pout) except KeyboardInterrupt: - for module, p in pids.iteritems(): + for module, p in pids.items(): pin, pout = p if pin is not None: pin.kill() diff --git a/bin/packages/Date.py b/bin/packages/Date.py index 85da5b36..72b960b1 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + class Date(object): """docstring for Date""" def __init__(self, *args): @@ -30,7 +32,7 @@ class Date(object): def _set_day(self, day): self.day = day - + def substract_day(self, numDay): import datetime computed_date = datetime.date(int(self.year), int(self.month), int(self.day)) - datetime.timedelta(numDay) @@ -38,4 +40,3 @@ class Date(object): comp_month = str(computed_date.month).zfill(2) comp_day = str(computed_date.day).zfill(2) return comp_year + comp_month + comp_day - diff --git a/bin/packages/Hash.py b/bin/packages/Hash.py index a55a8695..f0bb1d8e 100644 --- a/bin/packages/Hash.py +++ b/bin/packages/Hash.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + import hashlib import crcmod import mmh3 diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index 1debd33e..332981f9 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2.7 +#!/usr/bin/python3 """ The ``Paste Class`` @@ -24,15 +24,8 @@ import operator import string import re import json -try: # dirty to support python3 - import ConfigParser -except: - import configparser - ConfigParser = configparser -try: # dirty to support python3 - import cStringIO -except: - from io import StringIO as cStringIO +import configparser +from io import StringIO import sys sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) from Date import Date @@ -71,25 +64,29 @@ class Paste(object): Did you set environment variables? \ Or activate the virtualenv.') - cfg = ConfigParser.ConfigParser() + cfg = configparser.ConfigParser() cfg.read(configfile) self.cache = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) self.store = redis.StrictRedis( host=cfg.get("Redis_Data_Merging", "host"), port=cfg.getint("Redis_Data_Merging", "port"), - db=cfg.getint("Redis_Data_Merging", "db")) + db=cfg.getint("Redis_Data_Merging", "db"), + decode_responses=True) self.p_path = p_path self.p_name = os.path.basename(self.p_path) self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2) + self.p_mime = magic.from_buffer("test", mime=True) self.p_mime = magic.from_buffer(self.get_p_content(), mime=True) # Assuming that the paste will alway be in a day folder which is itself # in a month folder which is itself in a year folder. # /year/month/day/paste.gz + var = self.p_path.split('/') self.p_date = Date(var[-4], var[-3], var[-2]) self.p_source = var[-5] @@ -117,17 +114,18 @@ class Paste(object): paste = self.cache.get(self.p_path) if paste is None: try: - with gzip.open(self.p_path, 'rb') as f: + with gzip.open(self.p_path, 'r') as f: paste = f.read() self.cache.set(self.p_path, paste) self.cache.expire(self.p_path, 300) except: - return '' - pass - return paste + paste = '' + + return str(paste) def get_p_content_as_file(self): - return cStringIO.StringIO(self.get_p_content()) + message = StringIO(self.get_p_content()) + return message def get_p_content_with_removed_lines(self, threshold): num_line_removed = 0 @@ -137,6 +135,7 @@ class Paste(object): line_id = 0 for line_id, line in enumerate(f): length = len(line) + if length < line_length_threshold: string_content += line else: @@ -202,8 +201,8 @@ class Paste(object): .. seealso:: _set_p_hash_kind("md5") """ - for hash_name, the_hash in self.p_hash_kind.iteritems(): - self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content()) + for hash_name, the_hash in self.p_hash_kind.items(): + self.p_hash[hash_name] = the_hash.Calculate(self.get_p_content().encode()) return self.p_hash def _get_p_language(self): @@ -271,10 +270,13 @@ class Paste(object): return True, var else: return False, var - + def _get_p_duplicate(self): self.p_duplicate = self.store.hget(self.p_path, "p_duplicate") - return self.p_duplicate if self.p_duplicate is not None else '[]' + if self.p_duplicate is not None: + return self.p_duplicate + else: + return '[]' def save_all_attributes_redis(self, key=None): """ @@ -321,6 +323,28 @@ class Paste(object): else: self.store.hset(self.p_path, attr_name, json.dumps(value)) + def save_others_pastes_attribute_duplicate(self, attr_name, list_value): + """ + Save a new duplicate on others pastes + """ + for hash_type, path, percent, date in list_value: + #get json + json_duplicate = self.store.hget(path, attr_name) + #json save on redis + if json_duplicate is not None: + list_duplicate = (json.loads(json_duplicate)) + # avoid duplicate, a paste can be send by multiples modules + to_add = [hash_type, self.p_path, percent, date] + if to_add not in list_duplicate: + list_duplicate.append(to_add) + self.store.hset(path, attr_name, json.dumps(list_duplicate)) + + else: + # create the new list + list_duplicate = [[hash_type, self.p_path, percent, date]] + self.store.hset(path, attr_name, json.dumps(list_duplicate)) + + def _get_from_redis(self, r_serv): ans = {} for hash_name, the_hash in self.p_hash: @@ -342,7 +366,7 @@ class Paste(object): tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', gaps=True, discard_empty=True) - blob = TextBlob(clean(self.get_p_content()), tokenizer=tokenizer) + blob = TextBlob(clean( (self.get_p_content()) ), tokenizer=tokenizer) for word in blob.tokens: if word in words.keys(): @@ -351,7 +375,7 @@ class Paste(object): num = 0 words[word] = num + 1 if sort: - var = sorted(words.iteritems(), key=operator.itemgetter(1), reverse=True) + var = sorted(words.items(), key=operator.itemgetter(1), reverse=True) else: var = words diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 8cb7a295..21eb264c 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -2,6 +2,7 @@ bloomfilters = Blooms dicofilters = Dicos pastes = PASTES +base64 = BASE64 wordtrending_csv = var/www/static/csv/wordstrendingdata wordsfile = files/wordfile @@ -53,13 +54,20 @@ criticalNumberToAlert=8 #Will be considered as false positive if less that X matches from the top password list minTopPassList=5 +[Curve] +max_execution_time = 90 + +[Base64] +path = Base64/ +max_execution_time = 60 + [Modules_Duplicates] #Number of month to look back maximum_month_range = 3 #The value where two pastes are considerate duplicate for ssdeep. threshold_duplicate_ssdeep = 50 #The value where two pastes are considerate duplicate for tlsh. -threshold_duplicate_tlsh = 100 +threshold_duplicate_tlsh = 52 #Minimum size of the paste considered min_paste_size = 0.3 @@ -104,46 +112,56 @@ host = localhost port = 6381 db = 1 -##### LevelDB ##### -[Redis_Level_DB_Curve] +##### ARDB ##### +[ARDB_Curve] host = localhost port = 6382 db = 1 -[Redis_Level_DB_Sentiment] +[ARDB_Sentiment] host = localhost port = 6382 db = 4 -[Redis_Level_DB_TermFreq] +[ARDB_TermFreq] host = localhost port = 6382 db = 2 -[Redis_Level_DB_TermCred] +[ARDB_TermCred] host = localhost port = 6382 db = 5 -[Redis_Level_DB] +[ARDB_DB] host = localhost +port = 6382 db = 0 -[Redis_Level_DB_Trending] +[ARDB_Trending] host = localhost port = 6382 db = 3 -[Redis_Level_DB_Hashs] +[ARDB_Hashs] host = localhost db = 1 +[ARDB_Tags] +host = localhost +port = 6382 +db = 6 + [Url] cc_critical = DE [DomClassifier] cc = DE cc_tld = r'\.de$' +dns = 8.8.8.8 + +[Mail] +dns = 8.8.8.8 # Indexer configuration [Indexer] diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 6606566c..83511e40 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + import re import dns.resolver @@ -17,24 +19,29 @@ def is_luhn_valid(card_number): return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0 -def checking_MX_record(r_serv, adress_set): +def checking_MX_record(r_serv, adress_set, addr_dns): """Check if emails MX domains are responding. :param r_serv: -- Redis connexion database :param adress_set: -- (set) This is a set of emails adress + :param adress_set: -- (str) This is a server dns address :return: (int) Number of adress with a responding and valid MX domains This function will split the email adress and try to resolve their domains names: on example@gmail.com it will try to resolve gmail.com """ + + #remove duplicate + adress_set = list(set(adress_set)) + score = 0 num = len(adress_set) WalidMX = set([]) # Transforming the set into a string MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) resolver = dns.resolver.Resolver() - resolver.nameservers = ['149.13.33.69'] + resolver.nameservers = [addr_dns] resolver.timeout = 5 resolver.lifetime = 2 if MXdomains != []: @@ -58,25 +65,31 @@ def checking_MX_record(r_serv, adress_set): except dns.resolver.NoNameservers: publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.') + print('NoNameserver, No non-broken nameservers are available to answer the query.') except dns.resolver.NoAnswer: publisher.debug('NoAnswer, The response did not contain an answer to the question.') + print('NoAnswer, The response did not contain an answer to the question.') except dns.name.EmptyLabel: publisher.debug('SyntaxError: EmptyLabel') + print('SyntaxError: EmptyLabel') except dns.resolver.NXDOMAIN: r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) publisher.debug('The query name does not exist.') + print('The query name does not exist.') except dns.name.LabelTooLong: publisher.debug('The Label is too long') + print('The Label is too long') except dns.resolver.Timeout: + print('timeout') r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) except Exception as e: - print e + print(e) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) return (num, WalidMX) @@ -125,7 +138,7 @@ def checking_A_record(r_serv, domains_set): publisher.debug('The Label is too long') except Exception as e: - print e + print(e) publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score)) return (num, WalidA) diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py index e98609d7..54581403 100644 --- a/bin/packages/lib_words.py +++ b/bin/packages/lib_words.py @@ -1,3 +1,5 @@ +#!/usr/bin/python3 + import os import string @@ -81,17 +83,17 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month to keep the timeline of the curve correct. """ - threshold = 50 - first_day = date(year, month, 01) + threshold = 30 + first_day = date(year, month, 1) last_day = date(year, month, calendar.monthrange(year, month)[1]) words = [] - with open(feederfilename, 'rb') as f: + with open(feederfilename, 'r') as f: # words of the files words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ]) headers = ['Date'] + words - with open(csvfilename+'.csv', 'wb') as f: + with open(csvfilename+'.csv', 'w') as f: writer = csv.writer(f) writer.writerow(headers) @@ -103,11 +105,14 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month # from the 1srt day to the last of the list for word in words: value = r_serv.hget(word, curdate) + if value is None: row.append(0) else: # if the word have a value for the day # FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold + value = r_serv.hget(word, curdate) + value = int(value) if value >= threshold: row.append(value) writer.writerow(row) @@ -127,14 +132,15 @@ def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month): """ - first_day = date(year, month, 01) + first_day = date(year, month, 1) last_day = date(year, month, calendar.monthrange(year, month)[1]) - + redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2) words = list(server.smembers(redis_set_name)) - + #words = [x.decode('utf-8') for x in words] + headers = ['Date'] + words - with open(csvfilename+'.csv', 'wb') as f: + with open(csvfilename+'.csv', 'w') as f: writer = csv.writer(f) writer.writerow(headers) diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 55fb46d4..b9e29506 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -45,7 +45,7 @@ subscribe = Redis_CurveManageTopSets [Categ] subscribe = Redis_Global -publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve +publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey [CreditCards] subscribe = Redis_CreditCards @@ -105,3 +105,15 @@ publish = Redis_Duplicate,Redis_alertHandler [Keys] subscribe = Redis_Global publish = Redis_Duplicate,Redis_alertHandler + +[ApiKey] +subscribe = Redis_ApiKey +publish = Redis_Duplicate,Redis_alertHandler + +[Base64] +subscribe = Redis_Global +publish = Redis_Duplicate,Redis_alertHandler + +[Bitcoin] +subscribe = Redis_Global +publish = Redis_Duplicate,Redis_alertHandler diff --git a/bin/preProcessFeed.py b/bin/preProcessFeed.py index d9ef419d..37ee0512 100755 --- a/bin/preProcessFeed.py +++ b/bin/preProcessFeed.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -48,7 +48,7 @@ if __name__ == '__main__': message = p.get_from_set() if message is None: publisher.debug("{} queue is empty, waiting".format(config_section)) - print "queue empty" + print("queue empty") time.sleep(1) continue diff --git a/bin/template.py b/bin/template.py index 22489d16..f311d439 100755 --- a/bin/template.py +++ b/bin/template.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ Template for new modules diff --git a/bin/tor_fetcher.py b/bin/tor_fetcher.py index bd3d72d3..67a2f4f8 100644 --- a/bin/tor_fetcher.py +++ b/bin/tor_fetcher.py @@ -1,10 +1,10 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import socks import socket -import urllib2 -import StringIO +import urllib.request +import io import gzip import base64 import sys @@ -21,17 +21,20 @@ def create_connection(address, timeout=None, source_address=None): def get_page(url, torclient_host='127.0.0.1', torclient_port=9050): - request = urllib2.Request(url) + request = urllib.request.Request(url) # UA of the Tor browser bundle request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0') - return urllib2.urlopen(request, timeout=5).read(max_size * 100000) - + return urllib.request.urlopen(request, timeout=5).read(max_size * 100000) +#FIXME don't work at all def makegzip64(s): - out = StringIO.StringIO() - with gzip.GzipFile(fileobj=out, mode="w") as f: - f.write(s) - return base64.standard_b64encode(out.getvalue()) + + out = io.BytesIO() + + with gzip.GzipFile(fileobj=out, mode='ab') as fo: + fo.write(base64.standard_b64encode(s)) + + return out.getvalue() if __name__ == "__main__": @@ -41,7 +44,8 @@ if __name__ == "__main__": exit(1) try: - url = base64.standard_b64decode(sys.argv[1]) + url = base64.standard_b64decode(sys.argv[1]).decode('utf8') + print(url) except: print('unable to decode') exit(1) @@ -61,7 +65,7 @@ if __name__ == "__main__": to_write = makegzip64(page) t, path = tempfile.mkstemp() - with open(path, 'w') as f: - f.write(to_write) - print path + #with open(path, 'w') as f: + #f.write(to_write) + print(path) exit(0) diff --git a/configs/6382.conf b/configs/6382.conf old mode 100644 new mode 100755 index 667fb03a..fecfa7dd --- a/configs/6382.conf +++ b/configs/6382.conf @@ -1,4 +1,7 @@ -# Redis configuration file example +# Ardb configuration file example, modified from redis's conf file. + +# Home dir for ardb instance, it can be referenced by ${ARDB_HOME} in this config file +home ../DATA_ARDB/ # Note on units: when memory size is needed, it is possible to specify # it in the usual form of 1k 5GB 4M and so forth: @@ -12,63 +15,71 @@ # # units are case insensitive so 1GB 1Gb 1gB are all the same. -################################## INCLUDES ################################### - -# Include one or more other config files here. This is useful if you -# have a standard template that goes to all Redis server but also need -# to customize a few per-server settings. Include files can include -# other files, so use this wisely. -# -# Notice option "include" won't be rewritten by command "CONFIG REWRITE" -# from admin or Redis Sentinel. Since Redis always uses the last processed -# line as value of a configuration directive, you'd better put includes -# at the beginning of this file to avoid overwriting config change at runtime. -# -# If instead you are interested in using includes to override configuration -# options, it is better to use include as the last line. -# -# include /path/to/local.conf -# include /path/to/other.conf - -################################ GENERAL ##################################### - -# By default Redis does not run as a daemon. Use 'yes' if you need it. -# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. +# By default Ardb does not run as a daemon. Use 'yes' if you need it. daemonize no -# When running daemonized, Redis writes a pid file in /var/run/redis.pid by +# When running daemonized, Ardb writes a pid file in ${ARDB_HOME}/ardb.pid by # default. You can specify a custom pid file location here. -#pidfile /var/run/redis.pid +pidfile ${ARDB_HOME}/ardb.pid -# Accept connections on the specified port, default is 6379. -# If port 0 is specified Redis will not listen on a TCP socket. -port 6382 +# The thread pool size for the corresponding all listen servers, -1 means current machine's cpu number +thread-pool-size 4 -# TCP listen() backlog. -# -# In high requests-per-second environments you need an high backlog in order -# to avoid slow clients connections issues. Note that the Linux kernel -# will silently truncate it to the value of /proc/sys/net/core/somaxconn so -# make sure to raise both the value of somaxconn and tcp_max_syn_backlog -# in order to get the desired effect. -tcp-backlog 511 +#Accept connections on the specified host&port/unix socket, default is 0.0.0.0:16379. +server[0].listen 127.0.0.1:6382 +# If current qps exceed the limit, Ardb would return an error. +#server[0].qps-limit 1000 -# By default Redis listens for connections from all the network interfaces -# available on the server. It is possible to listen to just one or multiple -# interfaces using the "bind" configuration directive, followed by one or -# more IP addresses. -# -# Examples: -# -# bind 192.168.1.100 10.0.0.1 -# bind 127.0.0.1 +#listen on unix socket +#server[1].listen /tmp/ardb.sock +#server[1].unixsocketperm 755 +#server[1].qps-limit 1000 -# Specify the path for the Unix socket that will be used to listen for -# incoming connections. There is no default, so Redis will not listen -# on a unix socket when not specified. +# 'qps-limit-per-host' used to limit the request per second from same host +# 'qps-limit-per-connection' used to limit the request per second from same connection +qps-limit-per-host 0 +qps-limit-per-connection 0 + +# Specify the optimized RocksDB compaction strategies. +# If anything other than none is set then the rocksdb.options will not be used. +# The property can one of: +# OptimizeLevelStyleCompaction +# OptimizeUniversalStyleCompaction +# none # -#unixsocket /tmp/redis.sock -#unixsocketperm 755 +rocksdb.compaction OptimizeLevelStyleCompaction + +# Enable this to indicate that hsca/sscan/zscan command use total order mode for rocksdb engine +rocksdb.scan-total-order false + +# Disable RocksDB WAL may improve the write performance but +# data in the un-flushed memtables might be lost in case of a RocksDB shutdown. +# Disabling WAL provides similar guarantees as Redis. +rocksdb.disableWAL false + +#rocksdb's options +rocksdb.options write_buffer_size=512M;max_write_buffer_number=5;min_write_buffer_number_to_merge=3;compression=kSnappyCompression;\ + bloom_locality=1;memtable_prefix_bloom_size_ratio=0.1;\ + block_based_table_factory={block_cache=512M;filter_policy=bloomfilter:10:true};\ + create_if_missing=true;max_open_files=10000;rate_limiter_bytes_per_sec=50M;\ + use_direct_io_for_flush_and_compaction=true;use_adaptive_mutex=true + +#leveldb's options +leveldb.options block_cache_size=512M,write_buffer_size=128M,max_open_files=5000,block_size=4k,block_restart_interval=16,\ + bloom_bits=10,compression=snappy,logenable=yes,max_file_size=2M + +#lmdb's options +lmdb.options database_maxsize=10G,database_maxdbs=4096,readahead=no,batch_commit_watermark=1024 + +#perconaft's options +perconaft.options cache_size=128M,compression=snappy + +#wiredtiger's options +wiredtiger.options cache_size=512M,session_max=8k,chunk_size=100M,block_size=4k,bloom_bits=10,\ + mmap=false,compressor=snappy + +#forestdb's options +forestdb.options chunksize=8,blocksize=4K # Close the connection after a client is idle for N seconds (0 to disable) timeout 0 @@ -91,115 +102,51 @@ tcp-keepalive 0 # Specify the server verbosity level. # This can be one of: -# debug (a lot of information, useful for development/testing) -# verbose (many rarely useful info, but not a mess like the debug level) -# notice (moderately verbose, what you want in production probably) -# warning (only very important / critical messages are logged) -loglevel notice +# error +# warn +# info +# debug +# trace +loglevel info -# Specify the log file name. Also the empty string can be used to force +# Specify the log file name. Also 'stdout' can be used to force # Redis to log on the standard output. Note that if you use standard # output for logging but daemonize, logs will be sent to /dev/null -logfile "" +#logfile ${ARDB_HOME}/log/ardb-server.log +logfile stdout -# To enable logging to the system logger, just set 'syslog-enabled' to yes, -# and optionally update the other syslog parameters to suit your needs. -# syslog-enabled no -# Specify the syslog identity. -# syslog-ident redis - -# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. -# syslog-facility local0 - -# Set the number of databases. The default database is DB 0, you can select -# a different one on a per-connection basis using SELECT where -# dbid is a number between 0 and 'databases'-1 -databases 16 - -################################ SNAPSHOTTING ################################ -# -# Save the DB on disk: -# -# save -# -# Will save the DB if both the given number of seconds and the given -# number of write operations against the DB occurred. -# -# In the example below the behaviour will be to save: -# after 900 sec (15 min) if at least 1 key changed -# after 300 sec (5 min) if at least 10 keys changed -# after 60 sec if at least 10000 keys changed -# -# Note: you can disable saving at all commenting all the "save" lines. -# -# It is also possible to remove all the previously configured save -# points by adding a save directive with a single empty string argument -# like in the following example: -# -# save "" - -#save 900 1 -#save 300 10 -save 300 100000 - -# By default Redis will stop accepting writes if RDB snapshots are enabled -# (at least one save point) and the latest background save failed. -# This will make the user aware (in a hard way) that data is not persisting -# on disk properly, otherwise chances are that no one will notice and some -# disaster will happen. -# -# If the background saving process will start working again Redis will -# automatically allow writes again. -# -# However if you have setup your proper monitoring of the Redis server -# and persistence, you may want to disable this feature so that Redis will -# continue to work as usual even if there are problems with disk, -# permissions, and so forth. -stop-writes-on-bgsave-error yes - -# Compress string objects using LZF when dump .rdb databases? -# For default that's set to 'yes' as it's almost always a win. -# If you want to save some CPU in the saving child set it to 'no' but -# the dataset will likely be bigger if you have compressible values or keys. -rdbcompression yes - -# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. -# This makes the format more resistant to corruption but there is a performance -# hit to pay (around 10%) when saving and loading RDB files, so you can disable it -# for maximum performances. -# -# RDB files created with checksum disabled have a checksum of zero that will -# tell the loading code to skip the check. -rdbchecksum yes - -# The filename where to dump the DB -dbfilename dump6382.rdb - -# The working directory. +# The working data directory. # # The DB will be written inside this directory, with the filename specified # above using the 'dbfilename' configuration directive. -# +# # The Append Only File will also be created inside this directory. -# +# # Note that you must specify a directory here, not a file name. -dir ../dumps/ +data-dir ${ARDB_HOME}/data + ################################# REPLICATION ################################# -# Master-Slave replication. Use slaveof to make a Redis instance a copy of -# another Redis server. Note that the configuration is local to the slave +# Master-Slave replication. Use slaveof to make a Ardb instance a copy of +# another Ardb server. Note that the configuration is local to the slave # so for example it is possible to configure the slave to save the DB with a # different interval, or to listen to another port, and so on. # -# slaveof +# slaveof : +#slaveof 127.0.0.1:6379 + + +# By default, ardb use 2 threads to execute commands synced from master. +# -1 means use current CPU number threads instead. +slave-workers 2 + +# Max synced command queue size in memory. +max-slave-worker-queue 1024 + +# The directory for replication. +repl-dir ${ARDB_HOME}/repl -# If the master is password protected (using the "requirepass" configuration -# directive below) it is possible to tell the slave to authenticate before -# starting the replication synchronization process, otherwise the master will -# refuse the slave request. -# -# masterauth # When a slave loses its connection with the master, or when the replication # is still in progress, the slave can act in two different ways: @@ -214,33 +161,55 @@ dir ../dumps/ # slave-serve-stale-data yes +# The slave priority is an integer number published by Ardb/Redis in the INFO output. +# It is used by Redis Sentinel in order to select a slave to promote into a +# master if the master is no longer working correctly. +# +# A slave with a low priority number is considered better for promotion, so +# for instance if there are three slaves with priority 10, 100, 25 Sentinel will +# pick the one with priority 10, that is the lowest. +# +# However a special priority of 0 marks the slave as not able to perform the +# role of master, so a slave with priority of 0 will never be selected by +# Redis Sentinel for promotion. +# +# By default the priority is 100. +slave-priority 100 + # You can configure a slave instance to accept writes or not. Writing against # a slave instance may be useful to store some ephemeral data (because data # written on a slave will be easily deleted after resync with the master) but # may also cause problems if clients are writing to it because of a # misconfiguration. # -# Since Redis 2.6 by default slaves are read-only. -# # Note: read only slaves are not designed to be exposed to untrusted clients # on the internet. It's just a protection layer against misuse of the instance. # Still a read only slave exports by default all the administrative commands # such as CONFIG, DEBUG, and so forth. To a limited extent you can improve # security of read only slaves using 'rename-command' to shadow all the # administrative / dangerous commands. +# +# Note: any requests processed by non read only slaves would no write to replication +# log and sync to connected slaves. slave-read-only yes +# The directory for backup. +backup-dir ${ARDB_HOME}/backup +# +# You can configure the backup file format as 'redis' or 'ardb'. The 'ardb' format +# can only used by ardb instance, while 'redis' format file can be used by redis +# and ardb instance. +backup-file-format ardb + + # Slaves send PINGs to server in a predefined interval. It's possible to change # this interval with the repl_ping_slave_period option. The default value is 10 # seconds. # # repl-ping-slave-period 10 -# The following option sets the replication timeout for: -# -# 1) Bulk transfer I/O during SYNC, from the point of view of slave. -# 2) Master timeout from the point of view of slaves (data, pings). -# 3) Slave timeout from the point of view of masters (REPLCONF ACK pings). +# The following option sets a timeout for both Bulk transfer I/O timeout and +# master data or ping response timeout. The default value is 60 seconds. # # It is important to make sure that this value is greater than the value # specified for repl-ping-slave-period otherwise a timeout will be detected @@ -250,7 +219,7 @@ slave-read-only yes # Disable TCP_NODELAY on the slave socket after SYNC? # -# If you select "yes" Redis will use a smaller number of TCP packets and +# If you select "yes" Ardb will use a smaller number of TCP packets and # less bandwidth to send data to slaves. But this can add a delay for # the data to appear on the slave side, up to 40 milliseconds with # Linux kernels using a default configuration. @@ -272,9 +241,46 @@ repl-disable-tcp-nodelay no # The biggest the replication backlog, the longer the time the slave can be # disconnected and later be able to perform a partial resynchronization. # -# The backlog is only allocated once there is at least a slave connected. +# If the size is configured by 0, then Ardb instance can NOT serve as a master. # -# repl-backlog-size 1mb +# repl-backlog-size 500m +repl-backlog-size 1G +repl-backlog-cache-size 100M +snapshot-max-lag-offset 500M + +# Set the max number of snapshots. By default this limit is set to 10 snapshot. +# Once the limit is reached Ardb would try to remove the oldest snapshots +maxsnapshots 10 + +# It is possible for a master to stop accepting writes if there are less than +# N slaves connected, having a lag less or equal than M seconds. +# +# The N slaves need to be in "online" state. +# +# The lag in seconds, that must be <= the specified value, is calculated from +# the last ping received from the slave, that is usually sent every second. +# +# This option does not GUARANTEE that N replicas will accept the write, but +# will limit the window of exposure for lost writes in case not enough slaves +# are available, to the specified number of seconds. +# +# For example to require at least 3 slaves with a lag <= 10 seconds use: +# +# min-slaves-to-write 3 +# min-slaves-max-lag 10 + +# When a slave loses its connection with the master, or when the replication +# is still in progress, the slave can act in two different ways: +# +# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will +# still reply to client requests, possibly with out of date data, or the +# data set may just be empty if this is the first synchronization. +# +# 2) if slave-serve-stale-data is set to 'no' the slave will reply with +# an error "SYNC with master in progress" to all the kind of commands +# but to INFO and SLAVEOF. +# +slave-serve-stale-data yes # After a master has no longer connected slaves for some time, the backlog # will be freed. The following option configures the amount of seconds that @@ -285,42 +291,32 @@ repl-disable-tcp-nodelay no # # repl-backlog-ttl 3600 -# The slave priority is an integer number published by Redis in the INFO output. -# It is used by Redis Sentinel in order to select a slave to promote into a -# master if the master is no longer working correctly. -# -# A slave with a low priority number is considered better for promotion, so -# for instance if there are three slaves with priority 10, 100, 25 Sentinel will -# pick the one with priority 10, that is the lowest. -# -# However a special priority of 0 marks the slave as not able to perform the -# role of master, so a slave with priority of 0 will never be selected by -# Redis Sentinel for promotion. -# -# By default the priority is 100. -slave-priority 100 +# Slave clear current data store before full resync to master. +# It make sure that slave keep consistent with master's data. But slave may cost a +# long time to delete data, it depends on +# If set by no, then slave may have different data with master. +slave-cleardb-before-fullresync yes -# It is possible for a master to stop accepting writes if there are less than -# N slaves connected, having a lag less or equal than M seconds. +# Master/Slave instance would persist sync state every 'repl-backlog-sync-period' secs. +repl-backlog-sync-period 5 + +# Slave would ignore any 'expire' setting from replication command if set by 'yes'. +# It could be used if master is redis instance serve hot data with expire setting, slave is +# ardb instance which persist all data. +# Since master redis instance would generate a 'del' for each expired key, slave should ignore +# all 'del' command too by setting 'slave-ignore-del' to 'yes' for this scenario. +slave-ignore-expire no +slave-ignore-del no + +# After a master has no longer connected slaves for some time, the backlog +# will be freed. The following option configures the amount of seconds that +# need to elapse, starting from the time the last slave disconnected, for +# the backlog buffer to be freed. # -# The N slaves need to be in "online" state. +# A value of 0 means to never release the backlog. # -# The lag in seconds, that must be <= the specified value, is calculated from -# the last ping received from the slave, that is usually sent every second. -# -# This option does not GUARANTEES that N replicas will accept the write, but -# will limit the window of exposure for lost writes in case not enough slaves -# are available, to the specified number of seconds. -# -# For example to require at least 3 slaves with a lag <= 10 seconds use: -# -# min-slaves-to-write 3 -# min-slaves-max-lag 10 -# -# Setting one or the other to 0 disables the feature. -# -# By default min-slaves-to-write is set to 0 (feature disabled) and -# min-slaves-max-lag is set to 10. +# repl-backlog-ttl 3600 + ################################## SECURITY ################################### @@ -330,7 +326,7 @@ slave-priority 100 # # This should stay commented out for backward compatibility and because most # people do not need auth (e.g. they run their own servers). -# +# # Warning: since Redis is pretty fast an outside user can try up to # 150k passwords per second against a good box. This means that you should # use a very strong password otherwise it will be very easy to break. @@ -356,6 +352,15 @@ slave-priority 100 # Please note that changing the name of commands that are logged into the # AOF file or transmitted to slaves may cause problems. +################################ CLUSTER ############################### + +# Max execution time of a Lua script in milliseconds. +#zookeeper-servers 127.0.0.1:2181,127.0.0.1:2182,127.0.0.1:2183 +#zk-recv-timeout 10000 +#zk-clientid-file ${ARDB_HOME}/ardb.zkclientid +cluster-name ardb-cluster + + ################################### LIMITS #################################### # Set the max number of connected clients at the same time. By default @@ -369,155 +374,37 @@ slave-priority 100 # # maxclients 10000 -# Don't use more memory than the specified amount of bytes. -# When the memory limit is reached Redis will try to remove keys -# according to the eviction policy selected (see maxmemory-policy). -# -# If Redis can't remove keys according to the policy, or if the policy is -# set to 'noeviction', Redis will start to reply with errors to commands -# that would use more memory, like SET, LPUSH, and so on, and will continue -# to reply to read-only commands like GET. -# -# This option is usually useful when using Redis as an LRU cache, or to set -# a hard memory limit for an instance (using the 'noeviction' policy). -# -# WARNING: If you have slaves attached to an instance with maxmemory on, -# the size of the output buffers needed to feed the slaves are subtracted -# from the used memory count, so that network problems / resyncs will -# not trigger a loop where keys are evicted, and in turn the output -# buffer of slaves is full with DELs of keys evicted triggering the deletion -# of more keys, and so forth until the database is completely emptied. -# -# In short... if you have slaves attached it is suggested that you set a lower -# limit for maxmemory so that there is some free RAM on the system for slave -# output buffers (but this is not needed if the policy is 'noeviction'). -# -# maxmemory -# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory -# is reached. You can select among five behaviors: -# -# volatile-lru -> remove the key with an expire set using an LRU algorithm -# allkeys-lru -> remove any key accordingly to the LRU algorithm -# volatile-random -> remove a random key with an expire set -# allkeys-random -> remove a random key, any key -# volatile-ttl -> remove the key with the nearest expire time (minor TTL) -# noeviction -> don't expire at all, just return an error on write operations -# -# Note: with any of the above policies, Redis will return an error on write -# operations, when there are not suitable keys for eviction. -# -# At the date of writing this commands are: set setnx setex append -# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd -# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby -# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby -# getset mset msetnx exec sort -# -# The default is: -# -# maxmemory-policy volatile-lru +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub/Slave client can't consume messages as fast as the +# publisher can produce them). +slave-client-output-buffer-limit 256mb +pubsub-client-output-buffer-limit 32mb -# LRU and minimal TTL algorithms are not precise algorithms but approximated -# algorithms (in order to save memory), so you can select as well the sample -# size to check. For instance for default Redis will check three keys and -# pick the one that was used less recently, you can change the sample size -# using the following configuration directive. -# -# maxmemory-samples 3 +################################## SLOW LOG ################################### -############################## APPEND ONLY MODE ############################### +# The Redis Slow Log is a system to log queries that exceeded a specified +# execution time. The execution time does not include the I/O operations +# like talking with the client, sending the reply and so forth, +# but just the time needed to actually execute the command (this is the only +# stage of command execution where the thread is blocked and can not serve +# other requests in the meantime). +# +# You can configure the slow log with two parameters: one tells Redis +# what is the execution time, in microseconds, to exceed in order for the +# command to get logged, and the other parameter is the length of the +# slow log. When a new command is logged the oldest one is removed from the +# queue of logged commands. -# By default Redis asynchronously dumps the dataset on disk. This mode is -# good enough in many applications, but an issue with the Redis process or -# a power outage may result into a few minutes of writes lost (depending on -# the configured save points). -# -# The Append Only File is an alternative persistence mode that provides -# much better durability. For instance using the default data fsync policy -# (see later in the config file) Redis can lose just one second of writes in a -# dramatic event like a server power outage, or a single write if something -# wrong with the Redis process itself happens, but the operating system is -# still running correctly. -# -# AOF and RDB persistence can be enabled at the same time without problems. -# If the AOF is enabled on startup Redis will load the AOF, that is the file -# with the better durability guarantees. -# -# Please check http://redis.io/topics/persistence for more information. +# The following time is expressed in microseconds, so 1000000 is equivalent +# to one second. Note that a negative number disables the slow log, while +# a value of zero forces the logging of every command. +slowlog-log-slower-than 10000 -appendonly no - -# The name of the append only file (default: "appendonly.aof") - -appendfilename "appendonly.aof" - -# The fsync() call tells the Operating System to actually write data on disk -# instead to wait for more data in the output buffer. Some OS will really flush -# data on disk, some other OS will just try to do it ASAP. -# -# Redis supports three different modes: -# -# no: don't fsync, just let the OS flush the data when it wants. Faster. -# always: fsync after every write to the append only log . Slow, Safest. -# everysec: fsync only one time every second. Compromise. -# -# The default is "everysec", as that's usually the right compromise between -# speed and data safety. It's up to you to understand if you can relax this to -# "no" that will let the operating system flush the output buffer when -# it wants, for better performances (but if you can live with the idea of -# some data loss consider the default persistence mode that's snapshotting), -# or on the contrary, use "always" that's very slow but a bit safer than -# everysec. -# -# More details please check the following article: -# http://antirez.com/post/redis-persistence-demystified.html -# -# If unsure, use "everysec". - -# appendfsync always -appendfsync everysec -# appendfsync no - -# When the AOF fsync policy is set to always or everysec, and a background -# saving process (a background save or AOF log background rewriting) is -# performing a lot of I/O against the disk, in some Linux configurations -# Redis may block too long on the fsync() call. Note that there is no fix for -# this currently, as even performing fsync in a different thread will block -# our synchronous write(2) call. -# -# In order to mitigate this problem it's possible to use the following option -# that will prevent fsync() from being called in the main process while a -# BGSAVE or BGREWRITEAOF is in progress. -# -# This means that while another child is saving, the durability of Redis is -# the same as "appendfsync none". In practical terms, this means that it is -# possible to lose up to 30 seconds of log in the worst scenario (with the -# default Linux settings). -# -# If you have latency problems turn this to "yes". Otherwise leave it as -# "no" that is the safest pick from the point of view of durability. - -no-appendfsync-on-rewrite no - -# Automatic rewrite of the append only file. -# Redis is able to automatically rewrite the log file implicitly calling -# BGREWRITEAOF when the AOF log size grows by the specified percentage. -# -# This is how it works: Redis remembers the size of the AOF file after the -# latest rewrite (if no rewrite has happened since the restart, the size of -# the AOF at startup is used). -# -# This base size is compared to the current size. If the current size is -# bigger than the specified percentage, the rewrite is triggered. Also -# you need to specify a minimal size for the AOF file to be rewritten, this -# is useful to avoid rewriting the AOF file even if the percentage increase -# is reached but it is still pretty small. -# -# Specify a percentage of zero in order to disable the automatic AOF -# rewrite feature. - -auto-aof-rewrite-percentage 100 -auto-aof-rewrite-min-size 64mb +# There is no limit to this length. Just be aware that it will consume memory. +# You can reclaim memory used by the slow log with SLOWLOG RESET. +slowlog-max-len 128 ################################ LUA SCRIPTING ############################### @@ -537,180 +424,45 @@ auto-aof-rewrite-min-size 64mb # Set it to 0 or a negative value for unlimited execution without warnings. lua-time-limit 5000 -################################## SLOW LOG ################################### - -# The Redis Slow Log is a system to log queries that exceeded a specified -# execution time. The execution time does not include the I/O operations -# like talking with the client, sending the reply and so forth, -# but just the time needed to actually execute the command (this is the only -# stage of command execution where the thread is blocked and can not serve -# other requests in the meantime). -# -# You can configure the slow log with two parameters: one tells Redis -# what is the execution time, in microseconds, to exceed in order for the -# command to get logged, and the other parameter is the length of the -# slow log. When a new command is logged the oldest one is removed from the -# queue of logged commands. - -# The following time is expressed in microseconds, so 1000000 is equivalent -# to one second. Note that a negative number disables the slow log, while -# a value of zero forces the logging of every command. -slowlog-log-slower-than 10000 - -# There is no limit to this length. Just be aware that it will consume memory. -# You can reclaim memory used by the slow log with SLOWLOG RESET. -slowlog-max-len 128 - -############################# Event notification ############################## - -# Redis can notify Pub/Sub clients about events happening in the key space. -# This feature is documented at http://redis.io/topics/keyspace-events -# -# For instance if keyspace events notification is enabled, and a client -# performs a DEL operation on key "foo" stored in the Database 0, two -# messages will be published via Pub/Sub: -# -# PUBLISH __keyspace@0__:foo del -# PUBLISH __keyevent@0__:del foo -# -# It is possible to select the events that Redis will notify among a set -# of classes. Every class is identified by a single character: -# -# K Keyspace events, published with __keyspace@__ prefix. -# E Keyevent events, published with __keyevent@__ prefix. -# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... -# $ String commands -# l List commands -# s Set commands -# h Hash commands -# z Sorted set commands -# x Expired events (events generated every time a key expires) -# e Evicted events (events generated when a key is evicted for maxmemory) -# A Alias for g$lshzxe, so that the "AKE" string means all the events. -# -# The "notify-keyspace-events" takes as argument a string that is composed -# by zero or multiple characters. The empty string means that notifications -# are disabled at all. -# -# Example: to enable list and generic events, from the point of view of the -# event name, use: -# -# notify-keyspace-events Elg -# -# Example 2: to get the stream of the expired keys subscribing to channel -# name __keyevent@0__:expired use: -# -# notify-keyspace-events Ex -# -# By default all notifications are disabled because most users don't need -# this feature and the feature has some overhead. Note that if you don't -# specify at least one of K or E, no events will be delivered. -notify-keyspace-events "" - ############################### ADVANCED CONFIG ############################### +## Since some redis clients would check info command's output, this configuration +## would be set in 'misc' section of 'info's output +#additional-misc-info redis_version:2.8.9\nredis_trick:yes -# Hashes are encoded using a memory efficient data structure when they have a -# small number of entries, and the biggest entry does not exceed a given -# threshold. These thresholds can be configured using the following directives. -hash-max-ziplist-entries 512 -hash-max-ziplist-value 64 -# Similarly to hashes, small lists are also encoded in a special way in order -# to save a lot of space. The special representation is only used when -# you are under the following limits: -list-max-ziplist-entries 512 -list-max-ziplist-value 64 +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is convereted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. Thev value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 -# Sets have a special encoding in just one case: when a set is composed -# of just strings that happens to be integers in radix 10 in the range -# of 64 bit signed integers. -# The following configuration setting sets the limit in the size of the -# set in order to use this special memory saving encoding. -set-max-intset-entries 512 +#trusted-ip 10.10.10.10 +#trusted-ip 10.10.10.* -# Similarly to hashes and lists, sorted sets are also specially encoded in -# order to save a lot of space. This encoding is only used when the length and -# elements of a sorted set are below the following limits: -zset-max-ziplist-entries 128 -zset-max-ziplist-value 64 +# By default Ardb would not compact whole db after loading a snapshot, which may happens +# when slave syncing from master, processing 'import' command from client. +# This configuration only works with rocksdb engine. +# If ardb dord not compact data after loading snapshot file, there would be poor read performance before rocksdb +# completes the next compaction task internally. While the compaction task would cost very long time for a huge data set. +compact-after-snapshot-load false -# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in -# order to help rehashing the main Redis hash table (the one mapping top-level -# keys to values). The hash table implementation Redis uses (see dict.c) -# performs a lazy rehashing: the more operation you run into a hash table -# that is rehashing, the more rehashing "steps" are performed, so if the -# server is idle the rehashing is never complete and some more memory is used -# by the hash table. -# -# The default is to use this millisecond 10 times every second in order to -# active rehashing the main dictionaries, freeing memory when possible. -# -# If unsure: -# use "activerehashing no" if you have hard latency requirements and it is -# not a good thing in your environment that Redis can reply form time to time -# to queries with 2 milliseconds delay. -# -# use "activerehashing yes" if you don't have such hard requirements but -# want to free memory asap when possible. -activerehashing yes +# Ardb would store cursor in memory +scan-redis-compatible yes +scan-cursor-expire-after 60 -# The client output buffer limits can be used to force disconnection of clients -# that are not reading data from the server fast enough for some reason (a -# common reason is that a Pub/Sub client can't consume messages as fast as the -# publisher can produce them). -# -# The limit can be set differently for the three different classes of clients: -# -# normal -> normal clients -# slave -> slave clients and MONITOR clients -# pubsub -> clients subscribed to at least one pubsub channel or pattern -# -# The syntax of every client-output-buffer-limit directive is the following: -# -# client-output-buffer-limit -# -# A client is immediately disconnected once the hard limit is reached, or if -# the soft limit is reached and remains reached for the specified number of -# seconds (continuously). -# So for instance if the hard limit is 32 megabytes and the soft limit is -# 16 megabytes / 10 seconds, the client will get disconnected immediately -# if the size of the output buffers reach 32 megabytes, but will also get -# disconnected if the client reaches 16 megabytes and continuously overcomes -# the limit for 10 seconds. -# -# By default normal clients are not limited because they don't receive data -# without asking (in a push way), but just after a request, so only -# asynchronous clients may create a scenario where data is requested faster -# than it can read. -# -# Instead there is a default limit for pubsub and slave clients, since -# subscribers and slaves receive data in a push fashion. -# -# Both the hard or the soft limit can be disabled by setting them to zero. -client-output-buffer-limit normal 0 0 0 -client-output-buffer-limit slave 256mb 64mb 60 -client-output-buffer-limit pubsub 32mb 8mb 60 +redis-compatible-mode yes +redis-compatible-version 2.8.0 -# Redis calls an internal function to perform many background tasks, like -# closing connections of clients in timeout, purging expired keys that are -# never requested, and so forth. -# -# Not all tasks are performed with the same frequency, but Redis checks for -# tasks to perform accordingly to the specified "hz" value. -# -# By default "hz" is set to 10. Raising the value will use more CPU when -# Redis is idle, but at the same time will make Redis more responsive when -# there are many keys expiring at the same time, and timeouts may be -# handled with more precision. -# -# The range is between 1 and 500, however a value over 100 is usually not -# a good idea. Most users should use the default of 10 and raise this up to -# 100 only in environments where very low latency is required. -hz 10 +statistics-log-period 600 -# When a child rewrites the AOF file, if the following option is enabled -# the file will be fsync-ed every 32 MB of data generated. This is useful -# in order to commit the file to the disk more incrementally and avoid -# big latency spikes. -aof-rewrite-incremental-fsync yes +# Range deletion min size trigger +range-delete-min-size 100 diff --git a/files/ApiKey b/files/ApiKey new file mode 100644 index 00000000..0f938eaa --- /dev/null +++ b/files/ApiKey @@ -0,0 +1,5 @@ +amazon +amazonaws +amzn +aws +googleapis diff --git a/installing_deps.sh b/installing_deps.sh index c600f604..246fd2b2 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -5,7 +5,7 @@ set -x sudo apt-get update -sudo apt-get install python-pip python-virtualenv python-dev libfreetype6-dev \ +sudo apt-get install python3-pip python-virtualenv python3-dev libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev cmake -y #optional tor install @@ -15,7 +15,7 @@ sudo apt-get install tor sudo apt-get install libssl-dev libfreetype6-dev python-numpy -y #pyMISP -sudo apt-get -y install python3-pip +#sudo apt-get -y install python3-pip # DNS deps sudo apt-get install libadns1 libadns1-dev -y @@ -60,11 +60,9 @@ sudo ldconfig popd popd -# REDIS LEVEL DB # -test ! -d redis-leveldb/ && git clone https://github.com/KDr2/redis-leveldb.git -pushd redis-leveldb/ -git submodule init -git submodule update +# ARDB # +test ! -d ardb/ && git clone https://github.com/yinqiwen/ardb.git +pushd ardb/ make popd @@ -73,18 +71,18 @@ if [ ! -f bin/packages/config.cfg ]; then fi pushd var/www/ -./update_thirdparty.sh +sudo ./update_thirdparty.sh popd if [ -z "$VIRTUAL_ENV" ]; then - virtualenv AILENV + virtualenv -p python3 AILENV echo export AIL_HOME=$(pwd) >> ./AILENV/bin/activate echo export AIL_BIN=$(pwd)/bin/ >> ./AILENV/bin/activate echo export AIL_FLASK=$(pwd)/var/www/ >> ./AILENV/bin/activate echo export AIL_REDIS=$(pwd)/redis/src/ >> ./AILENV/bin/activate - echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate + echo export AIL_ARDB=$(pwd)/ardb/src/ >> ./AILENV/bin/activate . ./AILENV/bin/activate @@ -93,28 +91,29 @@ fi year1=20`date +%y` year2=20`date --date='-1 year' +%y` mkdir -p $AIL_HOME/{PASTES,Blooms,dumps} -mkdir -p $AIL_HOME/LEVEL_DB_DATA/{$year1,$year2} -pip install -U pip -pip install -U -r pip_packages_requirement.txt +pip3 install -U pip pip3 install -U -r pip3_packages_requirement.txt # Pyfaup pushd faup/src/lib/bindings/python/ -python setup.py install +python3 setup.py install popd # Py tlsh pushd tlsh/py_ext -python setup.py build -python setup.py install -sudo python3 setup.py build -sudo python3 setup.py install +#python setup.py build +#python setup.py install +python3 setup.py build +python3 setup.py install # Download the necessary NLTK corpora and sentiment vader -HOME=$(pwd) python -m textblob.download_corpora -python -m nltk.downloader vader_lexicon -python -m nltk.downloader punkt +HOME=$(pwd) python3 -m textblob.download_corpora +python3 -m nltk.downloader vader_lexicon +python3 -m nltk.downloader punkt + +# install nosetests +sudo pip install nose #Create the file all_module and update the graph in doc $AIL_HOME/doc/generate_modules_data_flow_graph.sh diff --git a/pip3_packages_requirement.txt b/pip3_packages_requirement.txt index 57cf60eb..78b19d64 100644 --- a/pip3_packages_requirement.txt +++ b/pip3_packages_requirement.txt @@ -1,13 +1,64 @@ pymisp redis -filemagic +#filemagic conflict with magic crcmod mmh3 ssdeep -nltk -textblob pubsublogger zmq langid + +#Essential +redis +pyzmq +dnspython +logbook +pubsublogger +textblob + +#Tokeniser +nltk + +#Graph +numpy +matplotlib +networkx +terminaltables +colorama +asciimatics + +# Hashlib +crcmod +mmh3 +ssdeep +python-Levenshtein + +#Others +python-magic +pybloomfiltermmap +psutil +phonenumbers + +ipython +flask +texttable + +#DomainClassifier +DomainClassifier +#Indexer requirements +whoosh + +ipaddress +pycountry + +# To fetch Onion urls +PySocks + +#ASN lookup requirements +#https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/adns-python/adns-python-1.2.1.tar.gz +https://github.com/trolldbois/python3-adns/archive/master.zip +https://github.com/trolldbois/python-cymru-services/archive/master.zip + +https://github.com/saffsd/langid.py/archive/master.zip diff --git a/pip_packages_requirement.txt b/pip_packages_requirement.txt deleted file mode 100644 index d6dd108d..00000000 --- a/pip_packages_requirement.txt +++ /dev/null @@ -1,51 +0,0 @@ -#Essential -redis -pyzmq -dnspython -logbook -pubsublogger -textblob - -#Graph -numpy -matplotlib -networkx -terminaltables -colorama -asciimatics - -#Tokeniser -nltk - -# Hashlib -crcmod -mmh3 -ssdeep -python-Levenshtein - -#Others -python-magic -pybloomfiltermmap -psutil -phonenumbers - -ipython -flask -texttable - -#DomainClassifier -DomainClassifier -#Indexer requirements -whoosh - -ipaddress -pycountry - -# To fetch Onion urls -PySocks - -#ASN lookup requirements -https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/adns-python/adns-python-1.2.1.tar.gz -https://github.com/trolldbois/python-cymru-services/archive/master.zip - -https://github.com/saffsd/langid.py/archive/master.zip diff --git a/python3_upgrade.sh b/python3_upgrade.sh new file mode 100755 index 00000000..3c444999 --- /dev/null +++ b/python3_upgrade.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +sudo rm -rf AILENV +mkdir old +sudo mv indexdir old/old_indexdir_python2 +sudo mv LEVEL_DB_DATA old/old_LEVEL_DB_DATA +sudo mv dumps old/old_dumps + +./installing_deps.sh diff --git a/samples/2018/01/01/keys_certificat_sample.gz b/samples/2018/01/01/keys_certificat_sample.gz new file mode 100644 index 00000000..d3427e10 Binary files /dev/null and b/samples/2018/01/01/keys_certificat_sample.gz differ diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/testHelper.py b/tests/testHelper.py new file mode 100644 index 00000000..7b1e07a3 --- /dev/null +++ b/tests/testHelper.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import unittest +import sys,os + +sys.path.append(os.environ['AIL_BIN']) + +from Helper import Process + +class TestHelper(unittest.TestCase): + + def setUp(self): + + config_section = 'Keys' + + + def test_Process_Constructor_using_key_module(self): + + conf_section = 'Keys' + process = Process(conf_section) + self.assertEqual(process.subscriber_name, 'Keys') diff --git a/tests/testKeys.py b/tests/testKeys.py new file mode 100644 index 00000000..9dc45c75 --- /dev/null +++ b/tests/testKeys.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import sys,os +import unittest +import magic + +sys.path.append(os.environ['AIL_BIN']) + +from packages.Paste import Paste +import Keys as Keys +from Helper import Process +from pubsublogger import publisher + + +class TestKeysModule(unittest.TestCase): + + def setUp(self): + self.paste = Paste('../samples/2018/01/01/keys_certificat_sample.gz') + + # Section name in bin/packages/modules.cfg + self.config_section = 'Keys' + + # Setup the I/O queues + p = Process(self.config_section) + + + def test_search_key(self): + with self.assertRaises(pubsublogger.exceptions.NoChannelError): + Keys.search_key(self.paste) + + def test_search_key(self): + with self.assertRaises(NameError): + publisher.port = 6380 + publisher.channel = 'Script' + Keys.search_key(self.paste) diff --git a/var/www/Flask_base_template.py b/var/www/Flask_base_template.py index cb7070a3..e9b65cda 100644 --- a/var/www/Flask_base_template.py +++ b/var/www/Flask_base_template.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 914c4f33..0be6854a 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -1,8 +1,8 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* import redis -import ConfigParser +import configparser import json import datetime import time @@ -72,7 +72,7 @@ with open('templates/header_base.html', 'r') as f: modified_header = complete_header #Add the header in the supplied order -for module_name, txt in to_add_to_header_dico.items(): +for module_name, txt in list(to_add_to_header_dico.items()): to_replace = ''.format(module_name) if to_replace in complete_header: modified_header = modified_header.replace(to_replace, txt) diff --git a/var/www/create_new_web_module.py b/var/www/create_new_web_module.py index 42db6c7f..23c35b01 100755 --- a/var/www/create_new_web_module.py +++ b/var/www/create_new_web_module.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* "Hepler to create a new webpage associated with a module." @@ -8,12 +8,12 @@ import os def createModuleFolder(modulename): path_module = os.path.join('modules', modulename) os.mkdir(path_module) - + # create html template with open('templates/base_template.html', 'r') as templateFile: template = templateFile.read() template = template.replace('MODULENAME', modulename) - + os.mkdir(os.path.join(path_module, 'templates')) with open(os.path.join(os.path.join(path_module, 'templates'), modulename+'.html'), 'w') as toWriteTemplate: toWriteTemplate.write(template) @@ -22,7 +22,7 @@ def createModuleFolder(modulename): with open('templates/header_base_template.html', 'r') as header_templateFile: header = header_templateFile.read() header = header.replace('MODULENAME', modulename) - + with open(os.path.join(os.path.join(path_module, 'templates'), 'header_{}.html'.format(modulename) ), 'w') as toWriteHeader: toWriteHeader.write(header) @@ -37,7 +37,7 @@ def createModuleFolder(modulename): def main(): - rep1 = raw_input('New module name: ') + rep1 = input('New module name: ') createModuleFolder(rep1) if __name__ == '__main__': diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 256ddfd8..f9e7aef4 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -1,10 +1,10 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' Flask global variables shared accross modules ''' -import ConfigParser +import configparser import redis import os @@ -18,7 +18,7 @@ if not os.path.exists(configfile): Did you set environment variables? \ Or activate the virtualenv.') -cfg = ConfigParser.ConfigParser() +cfg = configparser.ConfigParser() cfg.read(configfile) @@ -26,41 +26,47 @@ cfg.read(configfile) r_serv = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) + db=cfg.getint("Redis_Queues", "db"), + decode_responses=True) r_serv_log = redis.StrictRedis( host=cfg.get("Redis_Log", "host"), port=cfg.getint("Redis_Log", "port"), - db=cfg.getint("Redis_Log", "db")) + db=cfg.getint("Redis_Log", "db"), + decode_responses=True) r_serv_charts = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_Trending", "host"), - port=cfg.getint("Redis_Level_DB_Trending", "port"), - db=cfg.getint("Redis_Level_DB_Trending", "db")) + host=cfg.get("ARDB_Trending", "host"), + port=cfg.getint("ARDB_Trending", "port"), + db=cfg.getint("ARDB_Trending", "db"), + decode_responses=True) r_serv_sentiment = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_Sentiment", "host"), - port=cfg.getint("Redis_Level_DB_Sentiment", "port"), - db=cfg.getint("Redis_Level_DB_Sentiment", "db")) + host=cfg.get("ARDB_Sentiment", "host"), + port=cfg.getint("ARDB_Sentiment", "port"), + db=cfg.getint("ARDB_Sentiment", "db"), + decode_responses=True) r_serv_term = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_TermFreq", "host"), - port=cfg.getint("Redis_Level_DB_TermFreq", "port"), - db=cfg.getint("Redis_Level_DB_TermFreq", "db")) + host=cfg.get("ARDB_TermFreq", "host"), + port=cfg.getint("ARDB_TermFreq", "port"), + db=cfg.getint("ARDB_TermFreq", "db"), + decode_responses=True) r_serv_cred = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_TermCred", "host"), - port=cfg.getint("Redis_Level_DB_TermCred", "port"), - db=cfg.getint("Redis_Level_DB_TermCred", "db")) + host=cfg.get("ARDB_TermCred", "host"), + port=cfg.getint("ARDB_TermCred", "port"), + db=cfg.getint("ARDB_TermCred", "db"), + decode_responses=True) r_serv_pasteName = redis.StrictRedis( host=cfg.get("Redis_Paste_Name", "host"), port=cfg.getint("Redis_Paste_Name", "port"), - db=cfg.getint("Redis_Paste_Name", "db")) + db=cfg.getint("Redis_Paste_Name", "db"), + decode_responses=True) # VARIABLES # max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal -tlsh_to_percent = 1000.0 #Use to display the estimated percentage instead of a raw value DiffMaxLineLength = int(cfg.get("Flask", "DiffMaxLineLength"))#Use to display the estimated percentage instead of a raw value diff --git a/var/www/modules/browsepastes/Flask_browsepastes.py b/var/www/modules/browsepastes/Flask_browsepastes.py index 30853c38..67923fbd 100644 --- a/var/www/modules/browsepastes/Flask_browsepastes.py +++ b/var/www/modules/browsepastes/Flask_browsepastes.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python3 # -*-coding:UTF-8 -* ''' @@ -23,21 +23,22 @@ max_preview_modal = Flask_config.max_preview_modal #init all lvlDB servers curYear = datetime.now().year +int_year = int(curYear) r_serv_db = {} # port generated automatically depending on available levelDB date yearList = [] -lvdbdir= os.path.join(os.environ['AIL_HOME'], "LEVEL_DB_DATA/") -for year in os.listdir(lvdbdir): - try: - intYear = int(year) - except: - continue - yearList.append([year, intYear, int(curYear) == intYear]) +for x in range(0, (int_year - 2018) + 1): + + intYear = int_year - x + + yearList.append([str(intYear), intYear, int(curYear) == intYear]) r_serv_db[intYear] = redis.StrictRedis( - host=cfg.get("Redis_Level_DB", "host"), - port=intYear, - db=cfg.getint("Redis_Level_DB", "db")) + host=cfg.get("ARDB_DB", "host"), + port=cfg.getint("ARDB_DB", "port"), + db=intYear, + decode_responses=True) + yearList.sort(reverse=True) browsepastes = Blueprint('browsepastes', __name__, template_folder='templates') @@ -48,16 +49,18 @@ def getPastebyType(server, module_name): all_path = [] for path in server.smembers('WARNING_'+module_name): all_path.append(path) + return all_path def event_stream_getImportantPasteByModule(module_name, year): index = 0 all_pastes_list = getPastebyType(r_serv_db[year], module_name) + for path in all_pastes_list: index += 1 paste = Paste.Paste(path) - content = paste.get_p_content().decode('utf8', 'ignore') + content = paste.get_p_content() content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 curr_date = str(paste._get_p_date()) curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] @@ -83,7 +86,13 @@ def browseImportantPaste(): @browsepastes.route("/importantPasteByModule/", methods=['GET']) def importantPasteByModule(): module_name = request.args.get('moduleName') - currentSelectYear = int(request.args.get('year')) + + # # TODO: VERIFY YEAR VALIDITY + try: + currentSelectYear = int(request.args.get('year')) + except: + print('Invalid year input') + currentSelectYear = int(datetime.now().year) all_content = [] paste_date = [] @@ -94,7 +103,7 @@ def importantPasteByModule(): for path in allPastes[0:10]: all_path.append(path) paste = Paste.Paste(path) - content = paste.get_p_content().decode('utf8', 'ignore') + content = paste.get_p_content() content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 all_content.append(content[0:content_range].replace("\"", "\'").replace("\r", " ").replace("\n", " ")) curr_date = str(paste._get_p_date()) @@ -108,13 +117,13 @@ def importantPasteByModule(): finished = True return render_template("important_paste_by_module.html", - moduleName=module_name, + moduleName=module_name, year=currentSelectYear, - all_path=all_path, - content=all_content, - paste_date=paste_date, - paste_linenum=paste_linenum, - char_to_display=max_preview_modal, + all_path=all_path, + content=all_content, + paste_date=paste_date, + paste_linenum=paste_linenum, + char_to_display=max_preview_modal, finished=finished) @browsepastes.route("/_getImportantPasteByModule", methods=['GET']) diff --git a/var/www/modules/browsepastes/templates/browse_important_paste.html b/var/www/modules/browsepastes/templates/browse_important_paste.html index 89fc8aaa..faa7ed3d 100644 --- a/var/www/modules/browsepastes/templates/browse_important_paste.html +++ b/var/www/modules/browsepastes/templates/browse_important_paste.html @@ -66,7 +66,7 @@
- Year: + Year: