diff --git a/Dockerfile b/Dockerfile index 71318ba4..340e5014 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,19 @@ FROM ubuntu:16.04 -RUN mkdir /opt/AIL && apt-get update -y \ - && apt-get install git python-dev build-essential \ - libffi-dev libssl-dev libfuzzy-dev wget sudo -y +# Make sure that all updates are in place +RUN apt-get clean && apt-get update -y && apt-get upgrade -y \ + && apt-get dist-upgrade -y && apt-get autoremove -y + +# Install needed packages +RUN apt-get install git python-dev build-essential \ + libffi-dev libssl-dev libfuzzy-dev wget sudo -y # Adding sudo command RUN useradd -m docker && echo "docker:docker" | chpasswd && adduser docker sudo RUN echo "root ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers # Installing AIL dependencies +RUN mkdir /opt/AIL ADD . /opt/AIL WORKDIR /opt/AIL RUN ./installing_deps.sh diff --git a/HOWTO.md b/HOWTO.md index 2228d3a6..c21a970f 100644 --- a/HOWTO.md +++ b/HOWTO.md @@ -27,7 +27,7 @@ Feed data to AIL: 4. Edit your configuration file ```bin/packages/config.cfg``` and modify the pystemonpath path accordingly -5. Launch pystemon-feeder ``` ./pystemon-feeder.py ``` +5. Launch pystemon-feeder ``` ./bin/feeder/pystemon-feeder.py ``` How to create a new module diff --git a/OVERVIEW.md b/OVERVIEW.md index effb387d..5f85a7b6 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -26,6 +26,24 @@ ARDB overview ARDB_DB * DB 1 - Curve * DB 2 - TermFreq + ----------------------------------------- TERM ---------------------------------------- + + SET - 'TrackedRegexSet' term + + HSET - 'TrackedRegexDate' tracked_regex today_timestamp + + SET - 'TrackedSetSet' set_to_add + + HSET - 'TrackedSetDate' set_to_add today_timestamp + + SET - 'TrackedSetTermSet' term + + HSET - 'TrackedTermDate' tracked_regex today_timestamp + + SET - 'TrackedNotificationEmails_'+term/set email + + SET - 'TrackedNotifications' term/set + * DB 3 - Trending * DB 4 - Sentiment * DB 5 - TermCred @@ -65,7 +83,6 @@ ARDB_DB ZADD - 'base64_hash:'+hash paste * nb_seen_in_paste ZADD - 'binary_hash:'+hash paste * nb_seen_in_paste - ZADD - 'hash_type:'+type date nb_seen ZADD - 'base64_type:'+type date nb_seen ZADD - 'binary_type:'+type date nb_seen diff --git a/README.md b/README.md index 72166c58..e8af8e5f 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,6 @@ Type these command lines for a fully automated installation and start AIL framew git clone https://github.com/CIRCL/AIL-framework.git cd AIL-framework ./installing_deps.sh -cd var/www/ -./update_thirdparty.sh cd ~/AIL-framework/ . ./AILENV/bin/activate cd bin/ @@ -155,6 +153,11 @@ Eventually you can browse the status of the AIL framework website at the followi http://localhost:7000/ ``` +Training +-------- + +CIRCL organises training on how to use or extend the AIL framework. The next training will be [Thursday, 20 Dec](https://en.xing-events.com/ZEQWMLJ.html) in Luxembourg. + HOWTO ----- diff --git a/bin/Crawler.py b/bin/Crawler.py index 99917c49..e6b61a99 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -10,6 +10,8 @@ import time import subprocess import requests +from pyfaup.faup import Faup + sys.path.append(os.environ['AIL_BIN']) from Helper import Process from pubsublogger import publisher @@ -18,28 +20,43 @@ def on_error_send_message_back_in_queue(type_hidden_service, domain, message): # send this msg back in the queue if not r_onion.sismember('{}_domain_crawler_queue'.format(type_hidden_service), domain): r_onion.sadd('{}_domain_crawler_queue'.format(type_hidden_service), domain) - r_onion.sadd('{}_crawler_queue'.format(type_hidden_service), message) + r_onion.sadd('{}_crawler_priority_queue'.format(type_hidden_service), message) def crawl_onion(url, domain, date, date_month, message): + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'crawling_domain', domain) + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) + #if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain): super_father = r_serv_metadata.hget('paste_metadata:'+paste, 'super_father') if super_father is None: super_father=paste - try: - r = requests.get(splash_url , timeout=30.0) - except Exception: - # TODO: relaunch docker or send error message + retry = True + nb_retry = 0 + while retry: + try: + r = requests.get(splash_url , timeout=30.0) + retry = False + except Exception: + # TODO: relaunch docker or send error message + nb_retry += 1 - on_error_send_message_back_in_queue(type_hidden_service, domain, message) - publisher.error('{} SPASH DOWN'.format(splash_url)) - print('--------------------------------------') - print(' \033[91m DOCKER SPLASH DOWN\033[0m') - print(' {} DOWN'.format(splash_url)) - exit(1) + if nb_retry == 6: + on_error_send_message_back_in_queue(type_hidden_service, domain, message) + publisher.error('{} SPASH DOWN'.format(splash_url)) + print('--------------------------------------') + print(' \033[91m DOCKER SPLASH DOWN\033[0m') + print(' {} DOWN'.format(splash_url)) + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'SPLASH DOWN') + nb_retry == 0 + + print(' \033[91m DOCKER SPLASH NOT AVAILABLE\033[0m') + print(' Retry({}) in 10 seconds'.format(nb_retry)) + time.sleep(10) if r.status_code == 200: + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', splash_url, type_hidden_service, url, domain, paste, super_father], stdout=subprocess.PIPE) while process.poll() is None: @@ -57,6 +74,7 @@ def crawl_onion(url, domain, date, date_month, message): print('') print(' PROXY DOWN OR BAD CONFIGURATION\033[0m'.format(splash_url)) print('------------------------------------------------------------------------') + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Error') exit(-2) else: print(process.stdout.read()) @@ -66,6 +84,7 @@ def crawl_onion(url, domain, date, date_month, message): print('--------------------------------------') print(' \033[91m DOCKER SPLASH DOWN\033[0m') print(' {} DOWN'.format(splash_url)) + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Crawling') exit(1) @@ -109,6 +128,7 @@ if __name__ == '__main__': print('splash url: {}'.format(splash_url)) crawler_depth_limit = p.config.getint("Crawler", "crawler_depth_limit") + faup = Faup() PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) @@ -130,6 +150,10 @@ if __name__ == '__main__': db=p.config.getint("ARDB_Onion", "db"), decode_responses=True) + r_cache.sadd('all_crawler:{}'.format(type_hidden_service), splash_port) + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'started_time', datetime.datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) + # load domains blacklist try: with open(os.environ['AIL_BIN']+'/torcrawler/blacklist_onion.txt', 'r') as f: @@ -142,8 +166,12 @@ if __name__ == '__main__': while True: - # Recovering the streamed message informations. - message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) + # Priority Queue - Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_priority_queue'.format(type_hidden_service)) + + if message is None: + # Recovering the streamed message informations. + message = r_onion.spop('{}_crawler_queue'.format(type_hidden_service)) if message is not None: @@ -163,6 +191,8 @@ if __name__ == '__main__': domain_url = 'http://{}'.format(domain) + print() + print() print('\033[92m------------------START CRAWLER------------------\033[0m') print('crawler type: {}'.format(type_hidden_service)) print('\033[92m-------------------------------------------------\033[0m') @@ -170,12 +200,24 @@ if __name__ == '__main__': print('domain: {}'.format(domain)) print('domain_url: {}'.format(domain_url)) - if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain): + faup.decode(domain) + onion_domain=faup.get()['domain'].decode() + + if not r_onion.sismember('blacklist_{}'.format(type_hidden_service), domain) and not r_onion.sismember('blacklist_{}'.format(type_hidden_service), onion_domain): date = datetime.datetime.now().strftime("%Y%m%d") date_month = datetime.datetime.now().strftime("%Y%m") if not r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and not r_onion.sismember('{}_down:{}'.format(type_hidden_service, date), domain): + # first seen + if not r_onion.hexists('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen'): + r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen', date) + + # last_father + r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'paste_parent', paste) + + # last check + r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date) crawl_onion(url, domain, date, date_month, message) if url != domain_url: @@ -188,21 +230,12 @@ if __name__ == '__main__': r_onion.sadd('{}_down:{}'.format(type_hidden_service, date), domain) #r_onion.sadd('{}_down_link:{}'.format(type_hidden_service, date), url) #r_onion.hincrby('{}_link_down'.format(type_hidden_service), url, 1) - if not r_onion.exists('{}_metadata:{}'.format(type_hidden_service, domain)): - r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'first_seen', date) - r_onion.hset('{}_metadata:{}'.format(type_hidden_service,domain), 'last_seen', date) else: #r_onion.hincrby('{}_link_up'.format(type_hidden_service), url, 1) if r_onion.sismember('month_{}_up:{}'.format(type_hidden_service, date_month), domain) and r_serv_metadata.exists('paste_children:'+paste): msg = 'infoleak:automatic-detection="{}";{}'.format(type_hidden_service, paste) p.populate_set_out(msg, 'Tags') - # last check - r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'last_check', date) - - # last_father - r_onion.hset('{}_metadata:{}'.format(type_hidden_service, domain), 'paste_parent', paste) - # add onion screenshot history # add crawled days if r_onion.lindex('{}_history:{}'.format(type_hidden_service, domain), 0) != date: @@ -233,6 +266,14 @@ if __name__ == '__main__': r_onion.lpush('last_{}'.format(type_hidden_service), domain) r_onion.ltrim('last_{}'.format(type_hidden_service), 0, 15) + #update crawler status + r_cache.hset('metadata_crawler:{}'.format(splash_port), 'status', 'Waiting') + r_cache.hdel('metadata_crawler:{}'.format(splash_port), 'crawling_domain') + else: + print(' Blacklisted Onion') + print() + print() + else: continue else: diff --git a/bin/Curve.py b/bin/Curve.py index 8e228039..c7083c54 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -48,6 +48,8 @@ top_termFreq_setName_week = ["TopTermFreq_set_week", 7] top_termFreq_setName_month = ["TopTermFreq_set_month", 31] top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] +TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_" + # create direct link in mail full_paste_url = "/showsavedpaste/?paste=" @@ -71,6 +73,11 @@ def check_if_tracked_term(term, path): for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + term): sendEmailNotification(email, 'Term', mail_body) + # tag paste + for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + term): + msg = '{};{}'.format(tag, path) + p.populate_set_out(msg, 'Tags') + def getValueOverRange(word, startDate, num_day): to_return = 0 diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 684af83b..549c0425 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -9,12 +9,30 @@ WHITE="\\033[0;02m" YELLOW="\\033[1;33m" CYAN="\\033[1;36m" -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; +# Getting CWD where bash script resides +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd |sed 's/bin//' )" +export AIL_HOME="${DIR}" +cd ${AIL_HOME} + +if [ -e "${DIR}/AILENV/bin/python" ]; then + echo "AIL-framework virtualenv seems to exist, good" + ENV_PY="${DIR}/AILENV/bin/python" +else + echo "Please make sure you have a AIL-framework environment, au revoir" + exit 1 +fi + +# redis-server is bundled during install +## [ ! -f "`which redis-server`" ] && echo "'redis-server' is not installed/not on PATH. Please fix and run again." && exit 1 + +export AIL_BIN=${AIL_HOME}/bin/ +export AIL_FLASK=${AIL_HOME}/var/www/ +export AIL_REDIS=${AIL_HOME}/redis/src/ +export AIL_ARDB=${AIL_HOME}/ardb/src/ +export AIL_VENV=${AIL_HOME}/AILENV/ + +export PATH=$AIL_VENV/bin:$PATH export PATH=$AIL_HOME:$PATH export PATH=$AIL_REDIS:$PATH export PATH=$AIL_ARDB:$PATH @@ -91,9 +109,9 @@ function launching_logs { screen -dmS "Logging_AIL" sleep 0.1 echo -e $GREEN"\t* Launching logging process"$DEFAULT - screen -S "Logging_AIL" -X screen -t "LogQueue" bash -c 'cd '${AIL_BIN}'; log_subscriber -p 6380 -c Queuing -l ../logs/; read x' + screen -S "Logging_AIL" -X screen -t "LogQueue" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Queuing -l ../logs/; read x" sleep 0.1 - screen -S "Logging_AIL" -X screen -t "LogScript" bash -c 'cd '${AIL_BIN}'; log_subscriber -p 6380 -c Script -l ../logs/; read x' + screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Script -l ../logs/; read x" } function launching_queues { @@ -101,16 +119,16 @@ function launching_queues { sleep 0.1 echo -e $GREEN"\t* Launching all the queues"$DEFAULT - screen -S "Queue_AIL" -X screen -t "Queues" bash -c 'cd '${AIL_BIN}'; python3 launch_queues.py; read x' + screen -S "Queue_AIL" -X screen -t "Queues" bash -c "cd ${AIL_BIN}; ${ENV_PY} launch_queues.py; read x" } function checking_configuration { bin_dir=${AIL_HOME}/bin echo -e "\t* Checking configuration" if [ "$1" == "automatic" ]; then - bash -c "python3 $bin_dir/Update-conf.py True" + bash -c "${ENV_PY} $bin_dir/Update-conf.py True" else - bash -c "python3 $bin_dir/Update-conf.py False" + bash -c "${ENV_PY} $bin_dir/Update-conf.py False" fi exitStatus=$? @@ -128,75 +146,75 @@ function launching_scripts { sleep 0.1 echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT - screen -S "Script_AIL" -X screen -t "ModuleInformation" bash -c 'cd '${AIL_BIN}'; ./ModulesInformationV2.py -k 0 -c 1; read x' + screen -S "Script_AIL" -X screen -t "ModuleInformation" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModulesInformationV2.py -k 0 -c 1; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Mixer" bash -c 'cd '${AIL_BIN}'; ./Mixer.py; read x' + screen -S "Script_AIL" -X screen -t "Mixer" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Mixer.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Global" bash -c 'cd '${AIL_BIN}'; ./Global.py; read x' + screen -S "Script_AIL" -X screen -t "Global" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Global.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Duplicates" bash -c 'cd '${AIL_BIN}'; ./Duplicates.py; read x' + screen -S "Script_AIL" -X screen -t "Duplicates" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Duplicates.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Lines" bash -c 'cd '${AIL_BIN}'; ./Lines.py; read x' + screen -S "Script_AIL" -X screen -t "Lines" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Lines.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c 'cd '${AIL_BIN}'; ./DomClassifier.py; read x' + screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DomClassifier.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Categ" bash -c 'cd '${AIL_BIN}'; ./Categ.py; read x' + screen -S "Script_AIL" -X screen -t "Categ" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Categ.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Tokenize" bash -c 'cd '${AIL_BIN}'; ./Tokenize.py; read x' + screen -S "Script_AIL" -X screen -t "Tokenize" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tokenize.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "CreditCards" bash -c 'cd '${AIL_BIN}'; ./CreditCards.py; read x' + screen -S "Script_AIL" -X screen -t "CreditCards" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./CreditCards.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "BankAccount" bash -c 'cd '${AIL_BIN}'; ./BankAccount.py; read x' + screen -S "Script_AIL" -X screen -t "BankAccount" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./BankAccount.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Onion" bash -c 'cd '${AIL_BIN}'; ./Onion.py; read x' + screen -S "Script_AIL" -X screen -t "Onion" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Onion.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Mail" bash -c 'cd '${AIL_BIN}'; ./Mail.py; read x' + screen -S "Script_AIL" -X screen -t "Mail" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Mail.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "ApiKey" bash -c 'cd '${AIL_BIN}'; ./ApiKey.py; read x' + screen -S "Script_AIL" -X screen -t "ApiKey" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ApiKey.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Web" bash -c 'cd '${AIL_BIN}'; ./Web.py; read x' + screen -S "Script_AIL" -X screen -t "Web" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Web.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Credential" bash -c 'cd '${AIL_BIN}'; ./Credential.py; read x' + screen -S "Script_AIL" -X screen -t "Credential" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Credential.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Curve" bash -c 'cd '${AIL_BIN}'; ./Curve.py; read x' + screen -S "Script_AIL" -X screen -t "Curve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Curve.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "CurveManageTopSets" bash -c 'cd '${AIL_BIN}'; ./CurveManageTopSets.py; read x' + screen -S "Script_AIL" -X screen -t "CurveManageTopSets" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./CurveManageTopSets.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "RegexForTermsFrequency" bash -c 'cd '${AIL_BIN}'; ./RegexForTermsFrequency.py; read x' + screen -S "Script_AIL" -X screen -t "RegexForTermsFrequency" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./RegexForTermsFrequency.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "SetForTermsFrequency" bash -c 'cd '${AIL_BIN}'; ./SetForTermsFrequency.py; read x' + screen -S "Script_AIL" -X screen -t "SetForTermsFrequency" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SetForTermsFrequency.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Indexer" bash -c 'cd '${AIL_BIN}'; ./Indexer.py; read x' + screen -S "Script_AIL" -X screen -t "Indexer" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Indexer.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Keys" bash -c 'cd '${AIL_BIN}'; ./Keys.py; read x' + screen -S "Script_AIL" -X screen -t "Keys" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Keys.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Decoder" bash -c 'cd '${AIL_BIN}'; ./Decoder.py; read x' + screen -S "Script_AIL" -X screen -t "Decoder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Decoder.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c 'cd '${AIL_BIN}'; ./Bitcoin.py; read x' + screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Bitcoin.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Phone" bash -c 'cd '${AIL_BIN}'; ./Phone.py; read x' + screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Phone.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Release" bash -c 'cd '${AIL_BIN}'; ./Release.py; read x' + screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Cve" bash -c 'cd '${AIL_BIN}'; ./Cve.py; read x' + screen -S "Script_AIL" -X screen -t "Cve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Cve.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "WebStats" bash -c 'cd '${AIL_BIN}'; ./WebStats.py; read x' + screen -S "Script_AIL" -X screen -t "WebStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./WebStats.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c 'cd '${AIL_BIN}'; ./ModuleStats.py; read x' + screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModuleStats.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c 'cd '${AIL_BIN}'; ./SQLInjectionDetection.py; read x' + screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SQLInjectionDetection.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "LibInjection" bash -c 'cd '${AIL_BIN}'; ./LibInjection.py; read x' + screen -S "Script_AIL" -X screen -t "LibInjection" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./LibInjection.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "alertHandler" bash -c 'cd '${AIL_BIN}'; ./alertHandler.py; read x' + screen -S "Script_AIL" -X screen -t "alertHandler" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./alertHandler.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "MISPtheHIVEfeeder" bash -c 'cd '${AIL_BIN}'; ./MISP_The_Hive_feeder.py; read x' + screen -S "Script_AIL" -X screen -t "MISPtheHIVEfeeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./MISP_The_Hive_feeder.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Tags" bash -c 'cd '${AIL_BIN}'; ./Tags.py; read x' + screen -S "Script_AIL" -X screen -t "Tags" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tags.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c 'cd '${AIL_BIN}'; ./SentimentAnalysis.py; read x' + screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c 'cd '${AIL_BIN}'; ./submit_paste.py; read x' + screen -S "Script_AIL" -X screen -t "SubmitPaste" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./submit_paste.py; read x" } @@ -219,7 +237,7 @@ function launching_crawler { sleep 0.1 for ((i=first_port;i<=last_port;i++)); do - screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c 'cd '${AIL_BIN}'; ./Crawler.py onion '$i'; read x' + screen -S "Crawler_AIL" -X screen -t "onion_crawler:$i" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Crawler.py onion $i; read x" sleep 0.1 done @@ -342,7 +360,7 @@ function launch_flask { screen -dmS "Flask_AIL" sleep 0.1 echo -e $GREEN"\t* Launching Flask server"$DEFAULT - screen -S "Flask_AIL" -X screen -t "Flask_server" bash -c "cd $flask_dir; ls; ./Flask_server.py; read x" + screen -S "Flask_AIL" -X screen -t "Flask_server" bash -c "cd $flask_dir; ls; ${ENV_PY} ./Flask_server.py; read x" else echo -e $RED"\t* A Flask screen is already launched"$DEFAULT fi @@ -353,9 +371,9 @@ function launch_feeder { screen -dmS "Feeder_Pystemon" sleep 0.1 echo -e $GREEN"\t* Launching Pystemon feeder"$DEFAULT - screen -S "Feeder_Pystemon" -X screen -t "Pystemon_feeder" bash -c 'cd '${AIL_BIN}'; ./feeder/pystemon-feeder.py; read x' + screen -S "Feeder_Pystemon" -X screen -t "Pystemon_feeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./feeder/pystemon-feeder.py; read x" sleep 0.1 - screen -S "Feeder_Pystemon" -X screen -t "Pystemon" bash -c 'cd '${AIL_HOME}/../pystemon'; python2 pystemon.py; read x' + screen -S "Feeder_Pystemon" -X screen -t "Pystemon" bash -c "cd ${AIL_HOME}/../pystemon; ${ENV_PY} ./pystemon.py; read x" else echo -e $RED"\t* A Feeder screen is already launched"$DEFAULT fi diff --git a/bin/MISP_The_Hive_feeder.py b/bin/MISP_The_Hive_feeder.py index 0a8f1791..07c121c9 100755 --- a/bin/MISP_The_Hive_feeder.py +++ b/bin/MISP_The_Hive_feeder.py @@ -54,7 +54,7 @@ from thehive4py.models import Case, CaseTask, CustomFieldHelper -def create_the_hive_alert(source, path, content, tag): +def create_the_hive_alert(source, path, tag): tags = list(r_serv_metadata.smembers('tag:'+path)) artifacts = [ @@ -63,7 +63,6 @@ def create_the_hive_alert(source, path, content, tag): ] l_tags = tag.split(',') - print(tag) # Prepare the sample Alert sourceRef = str(uuid.uuid4())[0:6] @@ -91,6 +90,41 @@ def create_the_hive_alert(source, path, content, tag): except: print('hive connection error') +def feeder(message, count=0): + + if flag_the_hive or flag_misp: + tag, path = message.split(';') + ## FIXME: remove it + if PASTES_FOLDER not in path: + path = os.path.join(PASTES_FOLDER, path) + try: + paste = Paste.Paste(path) + except FileNotFoundError: + if count < 10: + r_serv_db.zincrby('mess_not_saved_export', message, 1) + return 0 + else: + r_serv_db.zrem('mess_not_saved_export', message) + print('Error: {} do not exist, tag= {}'.format(path, tag)) + return 0 + + source = '/'.join(paste.p_path.split('/')[-6:]) + + if HiveApi != False: + if int(r_serv_db.get('hive:auto-alerts')) == 1: + whitelist_hive = r_serv_db.scard('whitelist_hive') + if r_serv_db.sismember('whitelist_hive', tag): + create_the_hive_alert(source, path, tag) + else: + print('hive, auto alerts creation disable') + if flag_misp: + if int(r_serv_db.get('misp:auto-events')) == 1: + if r_serv_db.sismember('whitelist_misp', tag): + misp_wrapper.pushToMISP(uuid_ail, path, tag) + else: + print('misp, auto events creation disable') + + if __name__ == "__main__": publisher.port = 6380 @@ -119,10 +153,18 @@ if __name__ == "__main__": db=cfg.getint("ARDB_Metadata", "db"), decode_responses=True) + # set sensor uuid uuid_ail = r_serv_db.get('ail:uuid') if uuid_ail is None: uuid_ail = r_serv_db.set('ail:uuid', uuid.uuid4() ) + # set default + if r_serv_db.get('hive:auto-alerts') is None: + r_serv_db.set('hive:auto-alerts', 0) + + if r_serv_db.get('misp:auto-events') is None: + r_serv_db.set('misp:auto-events', 0) + p = Process(config_section) # create MISP connection if flag_misp: @@ -167,36 +209,30 @@ if __name__ == "__main__": r_serv_db.set('ail:thehive', False) print('Not connected to The HIVE') + refresh_time = 3 + ## FIXME: remove it + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + time_1 = time.time() + while True: # Get one message from the input queue message = p.get_from_set() if message is None: - publisher.debug("{} queue is empty, waiting 1s".format(config_section)) - time.sleep(1) - continue + + # handle not saved pastes + if int(time.time() - time_1) > refresh_time: + + num_queu = r_serv_db.zcard('mess_not_saved_export') + list_queu = r_serv_db.zrange('mess_not_saved_export', 0, -1, withscores=True) + + if num_queu and list_queu: + for i in range(0, num_queu): + feeder(list_queu[i][0],list_queu[i][1]) + + time_1 = time.time() + else: + publisher.debug("{} queue is empty, waiting 1s".format(config_section)) + time.sleep(1) else: - - if flag_the_hive or flag_misp: - tag, path = message.split(';') - paste = Paste.Paste(path) - source = '/'.join(paste.p_path.split('/')[-6:]) - - full_path = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "pastes"), path) - - - if HiveApi != False: - if int(r_serv_db.get('hive:auto-alerts')) == 1: - whitelist_hive = r_serv_db.scard('whitelist_hive') - if r_serv_db.sismember('whitelist_hive', tag): - create_the_hive_alert(source, path, full_path, tag) - - else: - print('hive, auto alerts creation disable') - if flag_misp: - if int(r_serv_db.get('misp:auto-events')) == 1: - if r_serv_db.sismember('whitelist_misp', tag): - misp_wrapper.pushToMISP(uuid_ail, path, tag) - else: - print('misp, auto events creation disable') + feeder(message) diff --git a/bin/Mixer.py b/bin/Mixer.py index e1656b8e..e7f9e6de 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -80,6 +80,7 @@ if __name__ == '__main__': # OTHER CONFIG # operation_mode = cfg.getint("Module_Mixer", "operation_mode") ttl_key = cfg.getint("Module_Mixer", "ttl_duplicate") + default_unnamed_feed_name = cfg.get("Module_Mixer", "default_unnamed_feed_name") # STATS # processed_paste = 0 @@ -106,7 +107,7 @@ if __name__ == '__main__': paste_name = complete_paste except ValueError as e: - feeder_name = "unnamed_feeder" + feeder_name = default_unnamed_feed_name paste_name = complete_paste # Processed paste diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py index d8f7fe92..1bccd314 100755 --- a/bin/NotificationHelper.py +++ b/bin/NotificationHelper.py @@ -3,8 +3,10 @@ import argparse import configparser +import traceback import os import smtplib +from pubsublogger import publisher from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText @@ -15,6 +17,9 @@ This module allows the global configuration and management of notification setti # CONFIG # configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') +publisher.port = 6380 +publisher.channel = "Script" + # notifications enabled/disabled TrackedTermsNotificationEnabled_Name = "TrackedNotifications" @@ -22,7 +27,6 @@ TrackedTermsNotificationEnabled_Name = "TrackedNotifications" # Keys will be e.g. TrackedNotificationEmails TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_" - def sendEmailNotification(recipient, alert_name, content): if not os.path.exists(configfile): @@ -33,22 +37,12 @@ def sendEmailNotification(recipient, alert_name, content): cfg = configparser.ConfigParser() cfg.read(configfile) - sender = cfg.get("Notifications", "sender"), - sender_host = cfg.get("Notifications", "sender_host"), - sender_port = cfg.getint("Notifications", "sender_port"), - sender_pw = cfg.get("Notifications", "sender_pw"), - - if isinstance(sender, tuple): - sender = sender[0] - - if isinstance(sender_host, tuple): - sender_host = sender_host[0] - - if isinstance(sender_port, tuple): - sender_port = sender_port[0] - - if isinstance(sender_pw, tuple): - sender_pw = sender_pw[0] + sender = cfg.get("Notifications", "sender") + sender_host = cfg.get("Notifications", "sender_host") + sender_port = cfg.getint("Notifications", "sender_port") + sender_pw = cfg.get("Notifications", "sender_pw") + if sender_pw == 'None': + sender_pw = None # raise an exception if any of these is None if (sender is None or @@ -83,9 +77,9 @@ def sendEmailNotification(recipient, alert_name, content): smtp_server.quit() print('Send notification ' + alert_name + ' to '+recipient) - except Exception as e: - print(str(e)) - # raise e + except Exception as err: + traceback.print_tb(err.__traceback__) + publisher.warning(err) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Test notification sender.') diff --git a/bin/Onion.py b/bin/Onion.py index 1f233fcf..026617e9 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -29,10 +29,18 @@ import os import base64 import subprocess import redis +import signal import re from Helper import Process +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) def fetch(p, r_cache, urls, domains, path): failed = [] @@ -113,6 +121,8 @@ if __name__ == "__main__": message = p.get_from_set() prec_filename = None + max_execution_time = p.config.getint("Onion", "max_execution_time") + # send to crawler: activate_crawler = p.config.get("Crawler", "activate_crawler") if activate_crawler == 'True': @@ -130,6 +140,7 @@ if __name__ == "__main__": while True: + message = p.get_from_set() if message is not None: print(message) filename, score = message.split() @@ -140,16 +151,24 @@ if __name__ == "__main__": urls = [] PST = Paste.Paste(filename) - for x in PST.get_regex(url_regex): - print(x) - # Extracting url with regex - url, s, credential, subdomain, domain, host, port, \ - resource_path, query_string, f1, f2, f3, f4 = x + # max execution time on regex + signal.alarm(max_execution_time) + try: + for x in PST.get_regex(url_regex): + print(x) + # Extracting url with regex + url, s, credential, subdomain, domain, host, port, \ + resource_path, query_string, f1, f2, f3, f4 = x - if '.onion' in url: - print(url) - domains_list.append(domain) - urls.append(url) + if '.onion' in url: + print(url) + domains_list.append(domain) + urls.append(url) + except TimeoutException: + encoded_list = [] + p.incr_module_timeout_statistic() + print ("{0} processing timeout".format(PST.p_path)) + continue ''' for x in PST.get_regex(i2p_regex): @@ -204,7 +223,11 @@ if __name__ == "__main__": print('send to onion crawler') r_onion.sadd('onion_domain_crawler_queue', domain) msg = '{};{}'.format(url,PST.p_path) - r_onion.sadd('onion_crawler_queue', msg) + if not r_onion.hexists('onion_metadata:{}'.format(domain), 'first_seen'): + r_onion.sadd('onion_crawler_priority_queue', msg) + print('send to priority queue') + else: + r_onion.sadd('onion_crawler_queue', msg) #p.populate_set_out(msg, 'Crawler') else: @@ -222,4 +245,3 @@ if __name__ == "__main__": publisher.debug("Script url is Idling 10s") #print('Sleeping') time.sleep(10) - message = p.get_from_set() diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py index fae7a03a..0db7f2ee 100755 --- a/bin/RegexForTermsFrequency.py +++ b/bin/RegexForTermsFrequency.py @@ -42,6 +42,8 @@ top_termFreq_setName_week = ["TopTermFreq_set_week", 7] top_termFreq_setName_month = ["TopTermFreq_set_month", 31] top_termFreq_set_array = [top_termFreq_setName_day, top_termFreq_setName_week, top_termFreq_setName_month] +TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_" + # create direct link in mail full_paste_url = "/showsavedpaste/?paste=" @@ -129,6 +131,11 @@ if __name__ == "__main__": for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + regex_str_complete): sendEmailNotification(email, 'Term', mail_body) + # tag paste + for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + regex_str_complete): + msg = '{};{}'.format(tag, filename) + p.populate_set_out(msg, 'Tags') + set_name = 'regex_' + dico_regexname_to_redis[regex_str] new_to_the_set = server_term.sadd(set_name, filename) new_to_the_set = True if new_to_the_set == 1 else False diff --git a/bin/SetForTermsFrequency.py b/bin/SetForTermsFrequency.py index 78de9b08..19ed7210 100755 --- a/bin/SetForTermsFrequency.py +++ b/bin/SetForTermsFrequency.py @@ -34,6 +34,8 @@ top_termFreq_setName_week = ["TopTermFreq_set_week", 7] top_termFreq_setName_month = ["TopTermFreq_set_month", 31] top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] +TrackedTermsNotificationTagsPrefix_Name = "TrackedNotificationTags_" + # create direct link in mail full_paste_url = "/showsavedpaste/?paste=" @@ -121,6 +123,11 @@ if __name__ == "__main__": for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + dico_setname_to_redis[str(the_set)]): sendEmailNotification(email, 'Term', mail_body) + # tag paste + for tag in server_term.smembers(TrackedTermsNotificationTagsPrefix_Name + dico_setname_to_redis[str(the_set)]): + msg = '{};{}'.format(tag, filename) + p.populate_set_out(msg, 'Tags') + print(the_set, "matched in", filename) set_name = 'set_' + dico_setname_to_redis[the_set] new_to_the_set = server_term.sadd(set_name, filename) diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py index b6680ee9..5c9f743c 100755 --- a/bin/feeder/pystemon-feeder.py +++ b/bin/feeder/pystemon-feeder.py @@ -61,13 +61,13 @@ topic = '102' while True: time.sleep(base_sleeptime + sleep_inc) paste = r.lpop("pastes") - print(paste) if paste is None: continue try: + print(paste) with open(pystemonpath+paste, 'rb') as f: #.read() messagedata = f.read() - path_to_send = pastes_directory+paste + path_to_send = os.path.join(pastes_directory,paste) s = b' '.join( [ topic.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] ) socket.send(s) diff --git a/bin/feeder/test-zmq.py b/bin/feeder/test-zmq.py index f6f28aa1..110c5de2 100644 --- a/bin/feeder/test-zmq.py +++ b/bin/feeder/test-zmq.py @@ -20,7 +20,7 @@ socket.connect ("tcp://crf.circl.lu:%s" % port) # 102 Full pastes in raw base64(gz) topicfilter = "102" -socket.setsockopt(zmq.SUBSCRIBE, topicfilter) +socket.setsockopt_string(zmq.SUBSCRIBE, topicfilter) while True: message = socket.recv() diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index d515c955..92e2e57c 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -81,8 +81,12 @@ class HiddenServices(object): return '' return origin_paste.replace(self.paste_directory+'/', '') - def get_domain_tags(self): - return self.tags + def get_domain_tags(self, update=False): + if not update: + return self.tags + else: + self.get_last_crawled_pastes() + return self.tags def update_domain_tags(self, children): p_tags = self.r_serv_metadata.smembers('tag:'+children) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index 088e778b..c9a800b9 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -27,6 +27,7 @@ ail_domain = http://localhost:7000 sender = sender@example.com sender_host = smtp.example.com sender_port = 1337 +sender_pw = None # optional for using with authenticated SMTP over SSL # sender_pw = securepassword @@ -67,6 +68,9 @@ minTopPassList=5 [Curve] max_execution_time = 90 +[Onion] +max_execution_time = 180 + [Base64] path = Base64/ max_execution_time = 60 @@ -98,6 +102,7 @@ threshold_stucked_module=600 operation_mode = 3 #Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) ttl_duplicate = 86400 +default_unnamed_feed_name = unnamed_feeder [RegexForTermsFrequency] max_execution_time = 60 @@ -207,6 +212,9 @@ dns = 8.8.8.8 [Mail] dns = 8.8.8.8 +[Web] +dns = 149.13.33.69 + # Indexer configuration [Indexer] type = whoosh diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 5d2af0a9..32f56900 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -1,6 +1,8 @@ #!/usr/bin/python3 import re +import os +import configparser import dns.resolver from pubsublogger import publisher @@ -101,11 +103,20 @@ def checking_MX_record(r_serv, adress_set, addr_dns): def checking_A_record(r_serv, domains_set): + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + cfg = configparser.ConfigParser() + cfg.read(configfile) + dns_server = cfg.get("Web", "dns") + score = 0 num = len(domains_set) WalidA = set([]) resolver = dns.resolver.Resolver() - resolver.nameservers = ['149.13.33.69'] + resolver.nameservers = [dns_server] resolver.timeout = 5 resolver.lifetime = 2 diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index deb5a069..0dc40448 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -32,13 +32,15 @@ publish = Redis_Words [Curve] subscribe = Redis_Words -publish = Redis_CurveManageTopSets +publish = Redis_CurveManageTopSets,Redis_Tags [RegexForTermsFrequency] subscribe = Redis_Global +publish = Redis_Tags [SetForTermsFrequency] subscribe = Redis_Global +publish = Redis_Tags [CurveManageTopSets] subscribe = Redis_CurveManageTopSets diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 47486dd9..99a4f3b3 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -10,10 +10,12 @@ import datetime import base64 import redis import json +import time from scrapy.spidermiddlewares.httperror import HttpError from twisted.internet.error import DNSLookupError from twisted.internet.error import TimeoutError +from twisted.web._newclient import ResponseNeverReceived from scrapy import Spider from scrapy.linkextractors import LinkExtractor @@ -39,6 +41,8 @@ class TorSplashCrawler(): 'SPIDER_MIDDLEWARES': {'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,}, 'DUPEFILTER_CLASS': 'scrapy_splash.SplashAwareDupeFilter', 'HTTPERROR_ALLOW_ALL': True, + 'RETRY_TIMES': 2, + 'CLOSESPIDER_PAGECOUNT': 50, 'DEPTH_LIMIT': crawler_depth_limit }) @@ -97,7 +101,7 @@ class TorSplashCrawler(): yield SplashRequest( self.start_urls, self.parse, - #errback=self.errback_catcher, + errback=self.errback_catcher, endpoint='render.json', meta={'father': self.original_paste}, args={ 'html': 1, @@ -174,7 +178,7 @@ class TorSplashCrawler(): yield SplashRequest( link.url, self.parse, - #errback=self.errback_catcher, + errback=self.errback_catcher, endpoint='render.json', meta={'father': relative_filename_paste}, args={ 'html': 1, @@ -184,17 +188,39 @@ class TorSplashCrawler(): 'wait': 10} ) - ''' def errback_catcher(self, failure): # catch all errback failures, self.logger.error(repr(failure)) - print('failure') - #print(failure) - print(failure.type) - #print(failure.request.meta['item']) + if failure.check(ResponseNeverReceived): + request = failure.request + url = request.meta['splash']['args']['url'] + father = request.meta['father'] + + self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url) + time.sleep(10) + yield SplashRequest( + url, + self.parse, + errback=self.errback_catcher, + endpoint='render.json', + meta={'father': father}, + args={ 'html': 1, + 'png': 1, + 'render_all': 1, + 'har': 1, + 'wait': 10} + ) + + else: + print('failure') + #print(failure) + print(failure.type) + #print(failure.request.meta['item']) + + ''' #if isinstance(failure.value, HttpError): - if failure.check(HttpError): + elif failure.check(HttpError): # you can get the response response = failure.value.response print('HttpError') @@ -214,7 +240,7 @@ class TorSplashCrawler(): print('TimeoutError') print(TimeoutError) self.logger.error('TimeoutError on %s', request.url) - ''' + ''' def save_crawled_paste(self, filename, content): diff --git a/bin/torcrawler/blacklist_onion.txt b/bin/torcrawler/blacklist_onion.txt index 15dfa0de..ebfedb30 100644 --- a/bin/torcrawler/blacklist_onion.txt +++ b/bin/torcrawler/blacklist_onion.txt @@ -3,3 +3,4 @@ facebookcorewwwi.onion graylady3jvrrxbe.onion expyuzz4wqqyqhjn.onion dccbbv6cooddgcrq.onion +pugljpwjhbiagkrn.onion diff --git a/bin/torcrawler/launch_splash_crawler.sh b/bin/torcrawler/launch_splash_crawler.sh index 412022c1..5c7f21ee 100755 --- a/bin/torcrawler/launch_splash_crawler.sh +++ b/bin/torcrawler/launch_splash_crawler.sh @@ -5,12 +5,15 @@ usage() { echo "Usage: sudo $0 [-f ] [-p ] [-n echo " -p: number of the first splash server port number. This number is incremented for the others splash server"; echo " -n: number of splash servers to start"; echo ""; + echo " -options:"; + echo " -u: max unbound in-memory cache (Mb, Restart Splash when full, default=3000 Mb)"; + echo ""; echo "example:"; echo "sudo ./launch_splash_crawler.sh -f /home/my_user/AIL-framework/configs/docker/splash_onion/etc/splash/proxy-profiles/ -p 8050 -n 3"; exit 1; } -while getopts ":p:f:n:" o; do +while getopts ":p:f:n:u:" o; do case "${o}" in p) p=${OPTARG} @@ -21,6 +24,9 @@ while getopts ":p:f:n:" o; do n) n=${OPTARG} ;; + u) + u=${OPTARG} + ;; *) usage ;; @@ -28,16 +34,35 @@ while getopts ":p:f:n:" o; do done shift $((OPTIND-1)) +if [ -z "${u}" ]; then + u=3000; +fi + if [ -z "${p}" ] || [ -z "${f}" ] || [ -z "${n}" ]; then usage; fi +RED="\\033[1;31m" +DEFAULT="\\033[0;39m" +GREEN="\\033[1;32m" +WHITE="\\033[0;02m" + +if [ ! -d "${f}" ]; then + printf "$RED\n Error -f, proxy-profiles directory: $WHITE${f}$RED not found\n$DEFAULT Please check if you enter the correct path\n" + exit 1 +fi + +if [ ! -f "${f}default.ini" ]; then + printf "$RED\n Error -f, proxy configuration file:$WHITE default.ini$RED not found\n$DEFAULT Please check if you enter the correct path\n" + exit 1 +fi + screen -dmS "Docker_Splash" sleep 0.1 for ((i=0;i<=$((${n} - 1));i++)); do port_number=$((${p} + $i)) - screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -p '$port_number':8050 --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash; read x' + screen -S "Docker_Splash" -X screen -t "docker_splash:$port_number" bash -c 'sudo docker run -d -p '$port_number':8050 --restart=always --cpus=1 --memory=4.5G -v '$f':/etc/splash/proxy-profiles/ --net="bridge" scrapinghub/splash --maxrss '$u'; read x' sleep 0.1 - echo " Splash server launched on port $port_number" + printf "$GREEN Splash server launched on port $port_number$DEFAULT\n" done diff --git a/configs/6379.conf b/configs/6379.conf index d799cd17..9a535bed 100644 --- a/configs/6379.conf +++ b/configs/6379.conf @@ -61,7 +61,7 @@ tcp-backlog 511 # Examples: # # bind 192.168.1.100 10.0.0.1 -# bind 127.0.0.1 +bind 127.0.0.1 # Specify the path for the Unix socket that will be used to listen for # incoming connections. There is no default, so Redis will not listen diff --git a/configs/6380.conf b/configs/6380.conf index 2a30b0d1..31c7a6e0 100644 --- a/configs/6380.conf +++ b/configs/6380.conf @@ -61,7 +61,7 @@ tcp-backlog 511 # Examples: # # bind 192.168.1.100 10.0.0.1 -# bind 127.0.0.1 +bind 127.0.0.1 # Specify the path for the Unix socket that will be used to listen for # incoming connections. There is no default, so Redis will not listen diff --git a/configs/6381.conf b/configs/6381.conf index 95a5c07d..8360a199 100644 --- a/configs/6381.conf +++ b/configs/6381.conf @@ -61,7 +61,7 @@ tcp-backlog 511 # Examples: # # bind 192.168.1.100 10.0.0.1 -# bind 127.0.0.1 +bind 127.0.0.1 # Specify the path for the Unix socket that will be used to listen for # incoming connections. There is no default, so Redis will not listen diff --git a/doc/presentation/ail-training-december-2018.pdf b/doc/presentation/ail-training-december-2018.pdf new file mode 100644 index 00000000..5aa6d9d9 Binary files /dev/null and b/doc/presentation/ail-training-december-2018.pdf differ diff --git a/installing_deps.sh b/installing_deps.sh index 6110d534..484ca770 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -5,7 +5,7 @@ set -x sudo apt-get update -sudo apt-get install python3-pip python-virtualenv python3-dev python3-tk libfreetype6-dev \ +sudo apt-get install python3-pip virtualenv python3-dev python3-tk libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev cmake -y #optional tor install diff --git a/pip3_packages_requirement.txt b/pip3_packages_requirement.txt index dd447d5c..3991e158 100644 --- a/pip3_packages_requirement.txt +++ b/pip3_packages_requirement.txt @@ -2,7 +2,7 @@ pymisp thehive4py -redis +redis==2.10.6 #filemagic conflict with magic crcmod mmh3 @@ -13,7 +13,6 @@ zmq langid #Essential -redis pyzmq dnspython logbook diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index ea6fd6ed..d26b2363 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -30,6 +30,12 @@ r_serv = redis.StrictRedis( db=cfg.getint("Redis_Queues", "db"), decode_responses=True) +r_cache = redis.StrictRedis( + host=cfg.get("Redis_Cache", "host"), + port=cfg.getint("Redis_Cache", "port"), + db=cfg.getint("Redis_Cache", "db"), + decode_responses=True) + r_serv_log = redis.StrictRedis( host=cfg.get("Redis_Log", "host"), port=cfg.getint("Redis_Log", "port"), @@ -102,7 +108,6 @@ r_serv_onion = redis.StrictRedis( db=cfg.getint("ARDB_Onion", "db"), decode_responses=True) - sys.path.append('../../configs/keys') # MISP # try: diff --git a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py index 16930ef8..cc38de77 100644 --- a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py +++ b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py @@ -506,6 +506,8 @@ def edit_tag_export(): status_misp = [] status_hive = [] + infoleak_tags = Taxonomies().get('infoleak').machinetags() + is_infoleak_tag = [] for tag in list_export_tags: if r_serv_db.sismember('whitelist_misp', tag): @@ -519,6 +521,11 @@ def edit_tag_export(): else: status_hive.append(False) + if tag in infoleak_tags: + is_infoleak_tag.append(True) + else: + is_infoleak_tag.append(False) + if misp_auto_events is not None: if int(misp_auto_events) == 1: misp_active = True @@ -543,6 +550,7 @@ def edit_tag_export(): misp_active=misp_active, hive_active=hive_active, list_export_tags=list_export_tags, + is_infoleak_tag=is_infoleak_tag, status_misp=status_misp, status_hive=status_hive, nb_tags_whitelist_misp=nb_tags_whitelist_misp, @@ -594,5 +602,37 @@ def disable_hive_auto_alert(): r_serv_db.set('hive:auto-alerts', 0) return edit_tag_export() +@PasteSubmit.route("/PasteSubmit/add_push_tag") +def add_push_tag(): + tag = request.args.get('tag') + if tag is not None: + + #limit tag length + if len(tag) > 49: + tag = tag[0:48] + + r_serv_db.sadd('list_export_tags', tag) + + to_return = {} + to_return["tag"] = tag + return jsonify(to_return) + else: + return 'None args', 400 + +@PasteSubmit.route("/PasteSubmit/delete_push_tag") +def delete_push_tag(): + tag = request.args.get('tag') + + infoleak_tags = Taxonomies().get('infoleak').machinetags() + if tag not in infoleak_tags and r_serv_db.sismember('list_export_tags', tag): + r_serv_db.srem('list_export_tags', tag) + r_serv_db.srem('whitelist_misp', tag) + r_serv_db.srem('whitelist_hive', tag) + to_return = {} + to_return["tag"] = tag + return jsonify(to_return) + else: + return 'this tag can\'t be removed', 400 + # ========= REGISTRATION ========= app.register_blueprint(PasteSubmit, url_prefix=baseUrl) diff --git a/var/www/modules/PasteSubmit/templates/edit_tag_export.html b/var/www/modules/PasteSubmit/templates/edit_tag_export.html index 04a506d6..94980787 100644 --- a/var/www/modules/PasteSubmit/templates/edit_tag_export.html +++ b/var/www/modules/PasteSubmit/templates/edit_tag_export.html @@ -37,6 +37,9 @@ background: #d91f2d; color: #fff; } + .mouse_pointer{ + cursor: pointer; + } @@ -169,7 +172,14 @@ {% endif %} - {{ tag }} + + {{ tag }} + {% if not is_infoleak_tag[loop.index0] %} + + {% endif %} + {% endfor %} @@ -209,7 +219,14 @@ {% endif %} - {{ tag }} + + {{ tag }} + {% if not is_infoleak_tag[loop.index0] %} + + {% endif %} + {% endfor %} @@ -232,6 +249,42 @@ +
+ + + + +
+ @@ -277,6 +330,25 @@ $(document).ready(function(){ } ); } + + function delete_push_tag(tag){ + //var row_tr = $(this).closest("tr"); + $.get("{{ url_for('PasteSubmit.delete_push_tag') }}", { tag: tag }, function(data, status){ + if(status == "success") { + //row_tr.remove(); + window.location.reload(false); + } + }); + } + + function add_custom_tag(){ + $.get("{{ url_for('PasteSubmit.add_push_tag') }}", { tag: document.getElementById('new_custom_tag').value }, function(data, status){ + if(status == "success") { + //row_tr.remove(); + window.location.reload(false); + } + }); + } diff --git a/var/www/modules/hashDecoded/Flask_hashDecoded.py b/var/www/modules/hashDecoded/Flask_hashDecoded.py index faf6af34..62a32f75 100644 --- a/var/www/modules/hashDecoded/Flask_hashDecoded.py +++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py @@ -101,7 +101,8 @@ def all_hash_search(): date_to = request.form.get('date_to') type = request.form.get('type') encoding = request.form.get('encoding') - return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding)) + show_decoded_files = request.form.get('show_decoded_files') + return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding, show_decoded_files=show_decoded_files)) @hashDecoded.route("/hashDecoded/", methods=['GET']) def hashDecoded_page(): @@ -109,6 +110,7 @@ def hashDecoded_page(): date_to = request.args.get('date_to') type = request.args.get('type') encoding = request.args.get('encoding') + show_decoded_files = request.args.get('show_decoded_files') if type == 'All types': type = None @@ -161,14 +163,16 @@ def hashDecoded_page(): daily_date = None l_64 = set() - for date in date_range: - if encoding is None: - l_hash = r_serv_metadata.zrange('hash_date:' +date, 0, -1) - else: - l_hash = r_serv_metadata.zrange(encoding+'_date:' +date, 0, -1) - if l_hash: - for hash in l_hash: - l_64.add(hash) + if show_decoded_files: + show_decoded_files = True + for date in date_range: + if encoding is None: + l_hash = r_serv_metadata.zrange('hash_date:' +date, 0, -1) + else: + l_hash = r_serv_metadata.zrange(encoding+'_date:' +date, 0, -1) + if l_hash: + for hash in l_hash: + l_64.add(hash) num_day_sparkline = 6 date_range_sparkline = get_date_range(num_day_sparkline) @@ -214,7 +218,7 @@ def hashDecoded_page(): l_type = r_serv_metadata.smembers('hash_all_type') return render_template("hashDecoded.html", l_64=b64_metadata, vt_enabled=vt_enabled, l_type=l_type, type=type, daily_type_chart=daily_type_chart, daily_date=daily_date, - encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to) + encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to, show_decoded_files=show_decoded_files) @hashDecoded.route('/hashDecoded/hash_by_type') def hash_by_type(): @@ -400,6 +404,63 @@ def decoder_type_json(): to_json.append({'name': decoder, 'value': nb_decoded[decoder]}) return jsonify(to_json) +@hashDecoded.route('/hashDecoded/top5_type_json') +def top5_type_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + + typ = request.args.get('type') + decoder = request.args.get('encoding') + + if decoder == 'All encoding' or decoder is None: + all_decoder = r_serv_metadata.smembers('all_decoder') + else: + if not r_serv_metadata.sismember('all_decoder', decoder): + return jsonify({'Error': 'This decoder do not exist'}) + else: + all_decoder = [decoder] + + if typ == 'All types' or typ is None or typ=='None': + all_type = r_serv_metadata.smembers('hash_all_type') + else: + typ = typ.replace(' ', '+') + if not r_serv_metadata.sismember('hash_all_type', typ): + return jsonify({'Error': 'This type do not exist'}) + else: + all_type = [typ] + + date_range = [] + if date_from is not None and date_to is not None: + #change format + try: + if len(date_from) != 8: + date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] + date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] + date_range = substract_date(date_from, date_to) + except: + pass + + if not date_range: + date_range.append(datetime.date.today().strftime("%Y%m%d")) + + # TODO replace with ZUNIONSTORE + nb_types_decoded = {} + for date in date_range: + for typ in all_type: + for decoder in all_decoder: + nb_decoded = r_serv_metadata.zscore('{}_type:{}'.format(decoder, typ), date) + if nb_decoded is not None: + if typ in nb_types_decoded: + nb_types_decoded[typ] = nb_types_decoded[typ] + int(nb_decoded) + else: + nb_types_decoded[typ] = int(nb_decoded) + + to_json = [] + top5_types = sorted(nb_types_decoded, key=nb_types_decoded.get, reverse=True)[:5] + for typ in top5_types: + to_json.append({'name': typ, 'value': nb_types_decoded[typ]}) + return jsonify(to_json) + @hashDecoded.route('/hashDecoded/daily_type_json') def daily_type_json(): diff --git a/var/www/modules/hashDecoded/templates/hashDecoded.html b/var/www/modules/hashDecoded/templates/hashDecoded.html index 995ecad9..19ddcdaf 100644 --- a/var/www/modules/hashDecoded/templates/hashDecoded.html +++ b/var/www/modules/hashDecoded/templates/hashDecoded.html @@ -121,7 +121,14 @@ {% endif %} {% endfor %} -
+
+ +
@@ -129,6 +136,8 @@
+
+
@@ -189,10 +198,12 @@ {% else %} - {% if date_from|string == date_to|string %} -

{{ date_from }}, No Hashes

- {% else %} -

{{ date_from }} to {{ date_to }}, No Hashes

+ {% if show_decoded_files %} + {% if date_from|string == date_to|string %} +

{{ date_from }}, No Hashes

+ {% else %} +

{{ date_from }} to {{ date_to }}, No Hashes

+ {% endif %} {% endif %} {% endif %} @@ -248,9 +259,12 @@ {% elif daily_type_chart %} chart.stackBarChart =barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{daily_date}}&date_to={{daily_date}}", 'id'); {% else %} - chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id') + chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id'); {% endif %} + draw_pie_chart("pie_chart_encoded" ,"{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding="); + draw_pie_chart("pie_chart_top5_types" ,"{{ url_for('hashDecoded.top5_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type="); + chart.onResize(); $(window).on("resize", function() { chart.onResize(); @@ -498,79 +512,83 @@ window.chart = chart; diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 47ea56f1..cc977976 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -8,7 +8,7 @@ import redis import datetime import sys import os -from flask import Flask, render_template, jsonify, request, Blueprint +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for from Date import Date from HiddenServices import HiddenServices @@ -19,6 +19,7 @@ import Flask_config app = Flask_config.app cfg = Flask_config.cfg baseUrl = Flask_config.baseUrl +r_cache = Flask_config.r_cache r_serv_onion = Flask_config.r_serv_onion r_serv_metadata = Flask_config.r_serv_metadata bootstrap_label = Flask_config.bootstrap_label @@ -40,6 +41,16 @@ def get_date_range(num_day): return list(reversed(date_list)) +def substract_date(date_from, date_to): + date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) + date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) + delta = date_to - date_from # timedelta + l_date = [] + for i in range(delta.days + 1): + date = date_from + datetime.timedelta(i) + l_date.append( date.strftime('%Y%m%d') ) + return l_date + def unpack_paste_tags(p_tags): l_tags = [] for tag in p_tags: @@ -81,7 +92,11 @@ def hiddenServices_page(): metadata_onion = {} metadata_onion['domain'] = onion metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check') + if metadata_onion['last_check'] is None: + metadata_onion['last_check'] = '********' metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen') + if metadata_onion['first_seen'] is None: + metadata_onion['first_seen'] = '********' if get_onion_status(onion, metadata_onion['last_check']): metadata_onion['status_text'] = 'UP' metadata_onion['status_color'] = 'Green' @@ -92,7 +107,159 @@ def hiddenServices_page(): metadata_onion['status_icon'] = 'fa-times-circle' list_onion.append(metadata_onion) - return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains) + crawler_metadata=[] + all_onion_crawler = r_cache.smembers('all_crawler:onion') + for crawler in all_onion_crawler: + crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain') + started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time') + status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status') + crawler_info = '{} - {}'.format(crawler, started_time) + if status_info=='Waiting' or status_info=='Crawling': + status=True + else: + status=False + crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) + + date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) + return render_template("hiddenServices.html", last_onions=list_onion, statDomains=statDomains, + crawler_metadata=crawler_metadata, date_from=date_string, date_to=date_string) + +@hiddenServices.route("/hiddenServices/last_crawled_domains_with_stats_json", methods=['GET']) +def last_crawled_domains_with_stats_json(): + last_onions = r_serv_onion.lrange('last_onion', 0 ,-1) + list_onion = [] + + now = datetime.datetime.now() + date = '{}{}{}'.format(now.strftime("%Y"), now.strftime("%m"), now.strftime("%d")) + statDomains = {} + statDomains['domains_up'] = r_serv_onion.scard('onion_up:{}'.format(date)) + statDomains['domains_down'] = r_serv_onion.scard('onion_down:{}'.format(date)) + statDomains['total'] = statDomains['domains_up'] + statDomains['domains_down'] + statDomains['domains_queue'] = r_serv_onion.scard('onion_domain_crawler_queue') + + for onion in last_onions: + metadata_onion = {} + metadata_onion['domain'] = onion + metadata_onion['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'last_check') + if metadata_onion['last_check'] is None: + metadata_onion['last_check'] = '********' + metadata_onion['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(onion), 'first_seen') + if metadata_onion['first_seen'] is None: + metadata_onion['first_seen'] = '********' + if get_onion_status(onion, metadata_onion['last_check']): + metadata_onion['status_text'] = 'UP' + metadata_onion['status_color'] = 'Green' + metadata_onion['status_icon'] = 'fa-check-circle' + else: + metadata_onion['status_text'] = 'DOWN' + metadata_onion['status_color'] = 'Red' + metadata_onion['status_icon'] = 'fa-times-circle' + list_onion.append(metadata_onion) + + crawler_metadata=[] + all_onion_crawler = r_cache.smembers('all_crawler:onion') + for crawler in all_onion_crawler: + crawling_domain = r_cache.hget('metadata_crawler:{}'.format(crawler), 'crawling_domain') + started_time = r_cache.hget('metadata_crawler:{}'.format(crawler), 'started_time') + status_info = r_cache.hget('metadata_crawler:{}'.format(crawler), 'status') + crawler_info = '{} - {}'.format(crawler, started_time) + if status_info=='Waiting' or status_info=='Crawling': + status=True + else: + status=False + crawler_metadata.append({'crawler_info': crawler_info, 'crawling_domain': crawling_domain, 'status_info': status_info, 'status': status}) + + date_string = '{}-{}-{}'.format(date[0:4], date[4:6], date[6:8]) + + return jsonify({'last_onions': list_onion, 'statDomains': statDomains, 'crawler_metadata':crawler_metadata}) + +@hiddenServices.route("/hiddenServices/get_onions_by_daterange", methods=['POST']) +def get_onions_by_daterange(): + date_from = request.form.get('date_from') + date_to = request.form.get('date_to') + domains_up = request.form.get('domains_up') + domains_down = request.form.get('domains_down') + domains_tags = request.form.get('domains_tags') + + return redirect(url_for('hiddenServices.show_domains_by_daterange', date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, domains_tags=domains_tags)) + +@hiddenServices.route("/hiddenServices/show_domains_by_daterange", methods=['GET']) +def show_domains_by_daterange(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + domains_up = request.args.get('domains_up') + domains_down = request.args.get('domains_down') + domains_tags = request.args.get('domains_tags') + + date_range = [] + if date_from is not None and date_to is not None: + #change format + try: + if len(date_from) != 8: + date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] + date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] + date_range = substract_date(date_from, date_to) + except: + pass + + if not date_range: + date_range.append(datetime.date.today().strftime("%Y%m%d")) + date_from = date_range[0][0:4] + '-' + date_range[0][4:6] + '-' + date_range[0][6:8] + date_to = date_from + + else: + date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8] + date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8] + + domains_by_day = {} + domain_metadata = {} + for date in date_range: + if domains_up: + domains_up = True + domains_by_day[date] = list(r_serv_onion.smembers('onion_up:{}'.format(date))) + for domain in domains_by_day[date]: + h = HiddenServices(domain, 'onion') + domain_metadata[domain] = {} + if domains_tags: + domains_tags = True + domain_metadata[domain]['tags'] = h.get_domain_tags(update=True) + + domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check') + if domain_metadata[domain]['last_check'] is None: + domain_metadata[domain]['last_check'] = '********' + domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen') + if domain_metadata[domain]['first_seen'] is None: + domain_metadata[domain]['first_seen'] = '********' + domain_metadata[domain]['status_text'] = 'UP' + domain_metadata[domain]['status_color'] = 'Green' + domain_metadata[domain]['status_icon'] = 'fa-check-circle' + + if domains_down: + domains_down = True + domains_by_day_down = list(r_serv_onion.smembers('onion_down:{}'.format(date))) + if domains_up: + domains_by_day[date].extend(domains_by_day_down) + else: + domains_by_day[date] = domains_by_day_down + for domain in domains_by_day_down: + #h = HiddenServices(onion_domain, 'onion') + domain_metadata[domain] = {} + #domain_metadata[domain]['tags'] = h.get_domain_tags() + + domain_metadata[domain]['last_check'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'last_check') + if domain_metadata[domain]['last_check'] is None: + domain_metadata[domain]['last_check'] = '********' + domain_metadata[domain]['first_seen'] = r_serv_onion.hget('onion_metadata:{}'.format(domain), 'first_seen') + if domain_metadata[domain]['first_seen'] is None: + domain_metadata[domain]['first_seen'] = '********' + + domain_metadata[domain]['status_text'] = 'DOWN' + domain_metadata[domain]['status_color'] = 'Red' + domain_metadata[domain]['status_icon'] = 'fa-times-circle' + + return render_template("domains.html", date_range=date_range, domains_by_day=domains_by_day, domain_metadata=domain_metadata, + date_from=date_from, date_to=date_to, domains_up=domains_up, domains_down=domains_down, + domains_tags=domains_tags, bootstrap_label=bootstrap_label) @hiddenServices.route("/hiddenServices/onion_domain", methods=['GET']) def onion_domain(): @@ -102,8 +269,12 @@ def onion_domain(): # # TODO: FIXME return 404 last_check = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'last_check') + if last_check is None: + last_check = '********' last_check = '{}/{}/{}'.format(last_check[0:4], last_check[4:6], last_check[6:8]) first_seen = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'first_seen') + if first_seen is None: + first_seen = '********' first_seen = '{}/{}/{}'.format(first_seen[0:4], first_seen[4:6], first_seen[6:8]) origin_paste = r_serv_onion.hget('onion_metadata:{}'.format(onion_domain), 'paste_parent') diff --git a/var/www/modules/hiddenServices/templates/domains.html b/var/www/modules/hiddenServices/templates/domains.html new file mode 100644 index 00000000..136291b1 --- /dev/null +++ b/var/www/modules/hiddenServices/templates/domains.html @@ -0,0 +1,291 @@ + + + + + + + + Hidden Service - AIL + + + + + + + + + + + + + + + + + + + + + + {% include 'navbar.html' %} + +
+ +
+
+ + {% for date in date_range %} + {% if domains_by_day[date]%} +
+
+

{{'{}/{}/{}'.format(date[0:4], date[4:6], date[6:8])}}

+
+
+ + + + + + + + + + + + {% for domain in domains_by_day[date] %} + + + + + + + {% endfor %} + +
DomainFirst SeenLast CheckStatus
+ {{ domain }} +
+ {% for tag in domain_metadata[domain]['tags'] %} + + {{ tag }} {{ domain_metadata[domain]['tags'][tag] }} + + {% endfor %} +
+
{{'{}/{}/{}'.format(domain_metadata[domain]['first_seen'][0:4], domain_metadata[domain]['first_seen'][4:6], domain_metadata[domain]['first_seen'][6:8])}}{{'{}/{}/{}'.format(domain_metadata[domain]['last_check'][0:4], domain_metadata[domain]['last_check'][4:6], domain_metadata[domain]['last_check'][6:8])}}
+ + {{domain_metadata[domain]['status_text']}} +
+
+
+
+
+ {% endif %} + {% endfor %} + + +
+ +
+
+
+ Select domains by date range : +
+
+
+
+
+
+ + +
+
+ + +
+
+ +
+
+
+
+ +
+
+ +
+
+
+ + +
+
+ +
+ +
+ +
+ + + + + + + + diff --git a/var/www/modules/hiddenServices/templates/hiddenServices.html b/var/www/modules/hiddenServices/templates/hiddenServices.html index 59aeb2ae..58b5937f 100644 --- a/var/www/modules/hiddenServices/templates/hiddenServices.html +++ b/var/www/modules/hiddenServices/templates/hiddenServices.html @@ -12,11 +12,17 @@ + + + @@ -74,6 +80,7 @@
+ {% set uniq_id = namespace(modal_id=0)%}