From 1aa769842072f5721266fb21f025ebd72c46e1e1 Mon Sep 17 00:00:00 2001 From: Blackbern Date: Fri, 5 Feb 2016 13:14:52 +0100 Subject: [PATCH 1/6] ajout d'un script d'installation pour archlinux via pacman et yaourt. --- .gitignore | 3 ++ installing_deps.sh | 8 ++--- installing_deps_archlinux.sh | 60 ++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 4 deletions(-) create mode 100644 installing_deps_archlinux.sh diff --git a/.gitignore b/.gitignore index 8fac5792..264aaa5f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,6 @@ var/www/static/ # Local config bin/packages/config.cfg + +# installed files +nltk_data/ diff --git a/installing_deps.sh b/installing_deps.sh index 5756a1d4..f50b3313 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -9,17 +9,17 @@ sudo apt-get install python-pip python-virtualenv python-dev libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev #Needed for bloom filters -sudo apt-get install libssl-dev libfreetype6-dev python-numpy +sudo pacman -S openssl python2-numpy --needed # DNS deps -sudo apt-get install libadns1 libadns1-dev +sudo pacman -S adns --needed #Needed for redis-lvlDB -sudo apt-get install libev-dev libgmp-dev +sudo pacman -S libev gmp --needed #needed for mathplotlib test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/ -sudo easy_install -U distribute +sudo easy_install-2.7 -U distribute # REDIS # test ! -d redis/ && git clone https://github.com/antirez/redis.git diff --git a/installing_deps_archlinux.sh b/installing_deps_archlinux.sh new file mode 100644 index 00000000..b921d3c0 --- /dev/null +++ b/installing_deps_archlinux.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +set -e +set -x + +sudo pacman -Syu + +sudo pacman -S python2-pip screen gcc unzip freetype2 python2 git --needed +sudo yaourt -S snappy --needed +sudo pip2 install virtualenv + +#Needed for bloom filters +sudo pacman -S openssl python2-numpy --needed + +# DNS deps +sudo pacman -S adns --needed + +#Needed for redis-lvlDB +sudo pacman -S libev gmp --needed + +#needed for mathplotlib +test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/ +sudo easy_install-2.7 -U distribute + +# REDIS # +test ! -d redis/ && git clone https://github.com/antirez/redis.git +pushd redis/ +git checkout 2.8 +make +popd + +# REDIS LEVEL DB # +test ! -d redis-leveldb/ && git clone https://github.com/KDr2/redis-leveldb.git +pushd redis-leveldb/ +git submodule init +git submodule update +make +popd + +if [ ! -f bin/packages/config.cfg ]; then + cp bin/packages/config.cfg.sample bin/packages/config.cfg +fi + +virtualenv AILENV + +echo export AIL_HOME=$(pwd) >> ./AILENV/bin/activate +echo export AIL_BIN=$(pwd)/bin/ >> ./AILENV/bin/activate +echo export AIL_FLASK=$(pwd)/var/www/ >> ./AILENV/bin/activate +echo export AIL_REDIS=$(pwd)/redis/src/ >> ./AILENV/bin/activate +echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate + +. ./AILENV/bin/activate + +mkdir -p $AIL_HOME/{PASTES,Blooms,dumps} +mkdir -p $AIL_HOME/LEVEL_DB_DATA/{2014,2013} + +pip install -r pip_packages_requirement.txt + +# Download the necessary NLTK corpora +HOME=$(pwd) python -m textblob.download_corpora From 714f569d15c5812123bcf9660eec95cf30ec6f41 Mon Sep 17 00:00:00 2001 From: Blackbern Date: Fri, 5 Feb 2016 13:18:50 +0100 Subject: [PATCH 2/6] Correction de installing_deps.sh --- installing_deps.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/installing_deps.sh b/installing_deps.sh index f50b3313..8f8562e1 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -7,15 +7,14 @@ sudo apt-get update sudo apt-get install python-pip python-virtualenv python-dev libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev - #Needed for bloom filters -sudo pacman -S openssl python2-numpy --needed +sudo apt-get install libssl-dev libfreetype6-dev python-numpy # DNS deps -sudo pacman -S adns --needed +sudo apt-get install libadns1 libadns1-dev #Needed for redis-lvlDB -sudo pacman -S libev gmp --needed +sudo apt-get install libev-dev libgmp-dev #needed for mathplotlib test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/ From 2859162b01fb8e0fe3719864b3f78bcd872895da Mon Sep 17 00:00:00 2001 From: Blackbern Date: Fri, 5 Feb 2016 13:20:23 +0100 Subject: [PATCH 3/6] Autre correction de installing_deps.sh --- installing_deps.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/installing_deps.sh b/installing_deps.sh index 8f8562e1..5756a1d4 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -7,6 +7,7 @@ sudo apt-get update sudo apt-get install python-pip python-virtualenv python-dev libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev + #Needed for bloom filters sudo apt-get install libssl-dev libfreetype6-dev python-numpy @@ -18,7 +19,7 @@ sudo apt-get install libev-dev libgmp-dev #needed for mathplotlib test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/ -sudo easy_install-2.7 -U distribute +sudo easy_install -U distribute # REDIS # test ! -d redis/ && git clone https://github.com/antirez/redis.git From 43b3556588d782f2826c20b0956190007e87e113 Mon Sep 17 00:00:00 2001 From: Alain Date: Fri, 5 Feb 2016 13:58:21 -0500 Subject: [PATCH 4/6] Starting Phone number recognition --- bin/Phone.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 bin/Phone.py diff --git a/bin/Phone.py b/bin/Phone.py new file mode 100644 index 00000000..87caf772 --- /dev/null +++ b/bin/Phone.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* +""" + module for finding phone numbers +""" + +import time +import pprint +import re +from packages import Paste +from packages import lib_refine +from pubsublogger import publisher +from Helper import Process + + +def search_phone(message): + paste = Paste.Paste(message) + content = paste.get_p_content() + # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) + reg_phone = re.compile(r'(\+\d{1,3}\(\d{1,2}\)\d?)?(\d{2,4}[\W\D\s]?){4,6} ') + # list of the regex results in the Paste, may be null + results = reg_phone.findall(content) + + # if the list is greater than 4, we consider the Paste may contain a list of phone numbers + if len(results) > 4 : + print results + publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) + + if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'Phone' + + # Setup the I/O queues + p = Process(config_section) + + # Sent to the logging a description of the module + publisher.info("Run Phone module") + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + # Do something with the message from the queue + search_phone(message) + From fabbfd8ae9a606ce3fe13949cf066a2129880470 Mon Sep 17 00:00:00 2001 From: Alain Date: Fri, 5 Feb 2016 14:00:41 -0500 Subject: [PATCH 5/6] Update module.cfg (adding Keys and Phone section) --- bin/packages/modules.cfg | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index e408b84f..1638f7be 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -46,3 +46,9 @@ subscribe = Redis_ValidOnion [Web] subscribe = Redis_Web publish = Redis_Url,ZMQ_Url + +[Keys] +subscribe = Redis_Global + +[Phone] +subscribe = Redis_Global From ea52fd106818f28a1954c6bcc2b9b5794c616ea1 Mon Sep 17 00:00:00 2001 From: Alain Date: Fri, 5 Feb 2016 20:58:02 +0100 Subject: [PATCH 6/6] Phone regex updated Might still need to be fixed / optimized, in case of maths or random numbers starting with a 0. Do not capture dates, hours, coordinates anymore. Captured formats are: e.g. +331234567890 ; 09 12 34 56 78 ; +4177/123.45.69 ; +352(0)6-23-23-23... --- bin/Phone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/Phone.py b/bin/Phone.py index 87caf772..384040cf 100644 --- a/bin/Phone.py +++ b/bin/Phone.py @@ -17,7 +17,7 @@ def search_phone(message): paste = Paste.Paste(message) content = paste.get_p_content() # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) - reg_phone = re.compile(r'(\+\d{1,3}\(\d{1,2}\)\d?)?(\d{2,4}[\W\D\s]?){4,6} ') + reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') # list of the regex results in the Paste, may be null results = reg_phone.findall(content)