Improved description of modules inside the scripts

2024-11-30 01:37:17 +00:00 · 2017-05-09 11:13:16 +02:00 · 2017-05-09 11:13:16 +02:00 · 3a4dcd691d
commit 3a4dcd691d
parent 2187c8338e
21 changed files with 120 additions and 56 deletions
--- a/README.md
+++ b/README.md
@ -100,7 +100,7 @@ Terms manager and occurence

 ![Term-Manager](./doc/screenshots/terms-manager.png?raw=true "AIL framework termManager")

-## Top terms
+### Top terms

 ![Term-Top](./doc/screenshots/terms-top.png?raw=true "AIL framework termTop")
 ![Term-Plot](./doc/screenshots/terms-plot.png?raw=true "AIL framework termPlot")
@ -108,6 +108,10 @@ Terms manager and occurence

 [AIL framework screencast](https://www.youtube.com/watch?v=1_ZrZkRKmNo)

+Command line module manager
+---------------------------
+
+![Module-Manager](./doc/screenshots/module-manager.png?raw=true "AIL framework ModuleInformationV2.py")

 License
 =======
--- a/bin/Attributes.py
+++ b/bin/Attributes.py
@ -5,25 +5,7 @@
 The ZMQ_Sub_Attribute Module
 ============================

-This module is consuming the Redis-list created by the ZMQ_PubSub_Line_Q Module
-
-It perform a sorting on the line's length and publish/forward them to
-differents channels:
-
-*Channel 1 if max length(line) < max
-*Channel 2 if max length(line) > max
-
-The collected informations about the processed pastes
-(number of lines and maximum length line) are stored in Redis.
-
-..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
-the same Subscriber name in both of them.
-
-Requirements
------------
-
-*Need running Redis instances. (LevelDB & Redis)
-*Need the ZMQ_PubSub_Line_Q Module running to be able to work properly.
+This module is saving Attribute of the paste into redis

 """
 import time
--- a/bin/Credential.py
+++ b/bin/Credential.py
@ -1,5 +1,16 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
+
+"""
+The Credential Module
+=====================
+
+This module is consuming the Redis-list created by the Categ module.
+
+It apply credential regexes on paste content and warn if above a threshold.
+
+"""
+
 import time
 import sys
 from packages import Paste
--- a/bin/CreditCards.py
+++ b/bin/CreditCards.py
@ -1,5 +1,17 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*
+
+"""
+The CreditCards Module
+======================
+
+This module is consuming the Redis-list created by the Categ module.
+
+It apply credit card regexes on paste content and warn if above a threshold.
+
+"""
+
+
 import pprint
 import time
 from packages import Paste
@ -7,7 +19,6 @@ from packages import lib_refine
 from pubsublogger import publisher
 import re

-
 from Helper import Process

 if __name__ == "__main__":
--- a/bin/CurveManageTopSets.py
+++ b/bin/CurveManageTopSets.py
@ -5,14 +5,6 @@
 This module manage top sets for terms frequency.
 Every 'refresh_rate' update the weekly and monthly set

-
-Requirements
------------
-
-*Need running Redis instances. (Redis)
-*Categories files of words in /files/ need to be created
-*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
-
 """

 import redis
--- a/bin/Cve.py
+++ b/bin/Cve.py
@ -1,7 +1,13 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
 """
-    Template for new modules
+The CVE Module
+======================
+
+This module is consuming the Redis-list created by the Categ module.
+
+It apply CVE regexes on paste content and warn if a reference to a CVE is spotted.
+
 """

 import time
--- a/bin/DomClassifier.py
+++ b/bin/DomClassifier.py
@ -5,8 +5,8 @@
 The DomClassifier Module
 ============================

-The DomClassifier modules is fetching the list of files to be
-processed and index each file with a full-text indexer (Whoosh until now).
+The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from 
+the out output of the Global module.

 """
 import time
--- a/bin/Keys.py
+++ b/bin/Keys.py
@ -1,7 +1,14 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
+
 """
-    Template for new modules
+The Keys Module
+======================
+
+This module is consuming the Redis-list created by the Global module.
+
+It is looking for PGP encrypted messages
+
 """

 import time
--- a/bin/Mail.py
+++ b/bin/Mail.py
@ -1,6 +1,16 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*

+"""
+The CreditCards Module
+======================
+
+This module is consuming the Redis-list created by the Categ module.
+
+It apply mail regexes on paste content and warn if above a threshold.
+
+"""
+
 import redis
 import pprint
 import time
--- a/bin/Mixer.py
+++ b/bin/Mixer.py
@ -1,8 +1,8 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*
 """
-The ZMQ_Feed_Q Module
-=====================
+The Mixer Module
+================

 This module is consuming the Redis-list created by the ZMQ_Feed_Q Module.

@ -22,13 +22,7 @@ Depending on the configuration, this module will process the feed as follow:
 Note that the hash of the content is defined as the sha1(gzip64encoded).

 Every data coming from a named feed can be sent to a pre-processing module before going to the global module.
-The mapping can be done via the variable feed_queue_mapping
-
-Requirements
------------
-
-*Need running Redis instances.
-*Need the ZMQ_Feed_Q Module running to be able to work properly.
+The mapping can be done via the variable FEED_QUEUE_MAPPING

 """
 import base64
@ -44,7 +38,7 @@ from Helper import Process

 # CONFIG #
 refresh_time = 30
-feed_queue_mapping = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module
+FEED_QUEUE_MAPPING = { "feeder2": "preProcess1" } # Map a feeder name to a pre-processing module

 if __name__ == '__main__':
    publisher.port = 6380
@ -117,8 +111,8 @@ if __name__ == '__main__':
                    else: # New content

                        # populate Global OR populate another set based on the feeder_name
-                        if feeder_name in feed_queue_mapping:
-                            p.populate_set_out(relay_message, feed_queue_mapping[feeder_name])
+                        if feeder_name in FEED_QUEUE_MAPPING:
+                            p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
                        else:
                            p.populate_set_out(relay_message, 'Mixer')

@ -139,8 +133,8 @@ if __name__ == '__main__':
                        server.expire('HASH_'+paste_name, ttl_key)

                        # populate Global OR populate another set based on the feeder_name
-                        if feeder_name in feed_queue_mapping:
-                            p.populate_set_out(relay_message, feed_queue_mapping[feeder_name])
+                        if feeder_name in FEED_QUEUE_MAPPING:
+                            p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
                        else:
                            p.populate_set_out(relay_message, 'Mixer')

@ -153,8 +147,8 @@ if __name__ == '__main__':
                            server.expire(paste_name, ttl_key)

                            # populate Global OR populate another set based on the feeder_name
-                            if feeder_name in feed_queue_mapping:
-                                p.populate_set_out(relay_message, feed_queue_mapping[feeder_name])
+                            if feeder_name in FEED_QUEUE_MAPPING:
+                                p.populate_set_out(relay_message, FEED_QUEUE_MAPPING[feeder_name])
                            else:
                                p.populate_set_out(relay_message, 'Mixer')

--- a/bin/Phone.py
+++ b/bin/Phone.py
@ -1,7 +1,14 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
+
 """
-    module for finding phone numbers
+The Phone Module
+================
+
+This module is consuming the Redis-list created by the Categ module.
+
+It apply phone number regexes on paste content and warn if above a threshold.
+
 """

 import time
--- a/bin/RegexForTermsFrequency.py
+++ b/bin/RegexForTermsFrequency.py
@ -2,6 +2,8 @@
 # -*-coding:UTF-8 -*
 """
 This Module is used for term frequency.
+It processes every paste coming from the global module and test the regexs
+supplied in  the term webpage.

 """
 import redis
--- a/bin/Release.py
+++ b/bin/Release.py
@ -6,6 +6,11 @@ from pubsublogger import publisher
 from Helper import Process
 import re

+'''
+This module takes its input from the global module.
+It applies some regex and publish matched content
+'''
+
 if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
--- a/bin/SQLInjectionDetection.py
+++ b/bin/SQLInjectionDetection.py
@ -1,7 +1,14 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
+
 """
-    Sql Injection module
+The SQLInjectionDetection Module
+================================
+
+This module is consuming the Redis-list created by the Web module.
+
+It test different possibility to makes some sqlInjection.
+
 """

 import time
--- a/bin/SentimentAnalysis.py
+++ b/bin/SentimentAnalysis.py
@ -4,8 +4,8 @@
    Sentiment analyser module.
    It takes its inputs from 'global'.

-    The content analysed comes from the pastes with length of the line 
-    above a defined threshold removed (get_p_content_with_removed_lines).
+    The content is analysed if the length of the line is
+    above a defined threshold (get_p_content_with_removed_lines).
    This is done because NLTK sentences tokemnizer (sent_tokenize) seems to crash
    for long lines (function _slices_from_text line#1276).

--- a/bin/SetForTermsFrequency.py
+++ b/bin/SetForTermsFrequency.py
@ -2,6 +2,8 @@
 # -*-coding:UTF-8 -*
 """
 This Module is used for term frequency.
+It processes every paste coming from the global module and test the sets
+supplied in  the term webpage.

 """
 import redis
--- a/bin/Tokenize.py
+++ b/bin/Tokenize.py
@ -1,8 +1,8 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*
 """
-The ZMQ_PubSub_Lines Module
-============================
+The Tokenize Module
+===================

 This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
 Module.
--- a/bin/Web.py
+++ b/bin/Web.py
@ -1,5 +1,14 @@
 #!/usr/bin/env python
 # -*-coding:UTF-8 -*
+
+"""
+The Web Module
+============================
+
+This module tries to parse URLs and warns if some defined contry code are present.
+
+"""
+
 import redis
 import pprint
 import time
--- a/bin/WebStats.py
+++ b/bin/WebStats.py
@ -1,7 +1,13 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*
+
 """
-    Template for new modules
+The WebStats Module
+======================
+
+This module makes stats on URL recolted from the web module.
+It consider the TLD, Domain and protocol.
+
 """

 import time
--- a/bin/preProcessFeed.py
+++ b/bin/preProcessFeed.py
@ -1,6 +1,15 @@
 #!/usr/bin/env python2
 # -*-coding:UTF-8 -*

+'''
+The preProcess Module
+=====================
+
+This module is just an example of how we can pre-process a feed coming from the Mixer
+module before seding it to the Global module.
+
+'''
+
 import time
 from pubsublogger import publisher

--- a/doc/screenshots/sentiment.png
+++ b/doc/screenshots/sentiment.png