From 56b6659d8be27cc7785df7e186ebfa4dced5f6e0 Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Wed, 13 Jul 2016 08:59:48 +0200
Subject: [PATCH 01/14] Commented out get_language because it adds too much
 overhead

---
 bin/Attribute.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/Attribute.py b/bin/Attribute.py
index 46d80858..a7f78696 100755
--- a/bin/Attribute.py
+++ b/bin/Attribute.py
@@ -51,12 +51,13 @@ if __name__ == "__main__":
                 PST = Paste.Paste(message)
             else:
                 publisher.debug("Script Attribute is idling 1s")
+                print 'sleeping'
                 time.sleep(1)
                 continue
 
             # FIXME do it directly in the class
             PST.save_attribute_redis("p_encoding", PST._get_p_encoding())
-            PST.save_attribute_redis("p_language", PST._get_p_language())
+            #PST.save_attribute_redis("p_language", PST._get_p_language())
             # FIXME why not all saving everything there.
             PST.save_all_attributes_redis()
             # FIXME Not used.

From 594d2def359e055f1c8d94dc17ab33eec295bbe8 Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Wed, 13 Jul 2016 15:57:33 +0200
Subject: [PATCH 02/14] In index: Added number of processed pastes chart

---
 bin/Global.py                        |  9 +++++
 var/www/Flask_server.py              |  3 +-
 var/www/static/js/indexjavascript.js | 60 ++++++++++++++++++++++++++++
 var/www/templates/index.html         | 15 +++++--
 4 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/bin/Global.py b/bin/Global.py
index fb44c70b..8b6e482f 100755
--- a/bin/Global.py
+++ b/bin/Global.py
@@ -31,6 +31,8 @@ from Helper import Process
 if __name__ == '__main__':
     publisher.port = 6380
     publisher.channel = 'Script'
+    processed_paste = 0
+    time_1 = time.time()
 
     config_section = 'Global'
 
@@ -54,6 +56,12 @@ if __name__ == '__main__':
                 continue
         else:
             print "Empty Queues: Waiting..."
+            if int(time.time() - time_1) > 30:
+                to_print = 'Global; ; ; ;glob Processed {0} paste(s)'.format(processed_paste)
+                print to_print
+                publisher.info(to_print)
+                time_1 = time.time()
+                processed_paste = 0
             time.sleep(1)
             continue
         # Creating the full filepath
@@ -66,3 +74,4 @@ if __name__ == '__main__':
         with open(filename, 'wb') as f:
             f.write(base64.standard_b64decode(gzip64encoded))
         p.populate_set_out(filename)
+        processed_paste+=1
diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py
index 018608f1..36fcfbcc 100755
--- a/var/www/Flask_server.py
+++ b/var/www/Flask_server.py
@@ -122,7 +122,8 @@ def search():
 
 @app.route("/")
 def index():
-    return render_template("index.html")
+    default_minute = cfg.get("Flask", "minute_processed_paste")
+    return render_template("index.html", default_minute = default_minute)
 
 
 @app.route("/monitoring/")
diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js
index 7eb7e5c5..ef9bf0c6 100644
--- a/var/www/static/js/indexjavascript.js
+++ b/var/www/static/js/indexjavascript.js
@@ -1,3 +1,54 @@
+// Plot and update the number of processed pastes
+$(function() {
+    var data = [];
+    var totalPoints = 60*10; //60s*10m
+    var curr_max = 0;
+    
+    function getData() {
+        if (data.length > 0){
+             curr_max = curr_max == data[0] ? Math.max.apply(null, data) : curr_max;
+        	data = data.slice(1);
+        }
+        
+        while (data.length < totalPoints) {
+        	var y = (typeof window.paste_num_tabvar !== "undefined") ? window.paste_num_tabvar : 0;
+             curr_max = curr_max < y ? y : curr_max;
+        	data.push(y);
+        }
+        
+        // Zip the generated y values with the x values
+        var res = [];
+        for (var i = 0; i < data.length; ++i) {
+            res.push([i, data[i]])
+        }
+        return res;
+    }
+
+    var updateInterval = 1000;
+    var options = {
+        series: { shadowSize: 1 },
+        lines: { fill: true, fillColor: { colors: [ { opacity: 1 }, { opacity: 0.1 } ] }},
+        yaxis: { min: 0, max: 40 },
+        xaxis: { show: false },
+        colors: ["#F4A506"],
+        grid: {
+                  tickColor: "#dddddd",
+                  borderWidth: 0 
+        },
+    };
+    var plot = $.plot("#realtimechart", [ getData() ], options);
+    
+    function update() {
+console.log(curr_max);
+        plot.setData([getData()]);
+        plot.getOptions().yaxes[0].max = curr_max;
+        plot.setupGrid();
+        plot.draw();
+        setTimeout(update, updateInterval);
+    }
+    update();
+});
+
 function initfunc( csvay, scroot) {
   window.csv = csvay;
   window.scroot = scroot;
@@ -38,6 +89,13 @@ function create_log_table(obj_json) {
     var chansplit = obj_json.channel.split('.');
     var parsedmess = obj_json.data.split(';');
 
+    if (parsedmess[0] == "Global"){
+        var paste_processed = parsedmess[4].split(" ")[2];
+        console.log(paste_processed)
+        window.paste_num_tabvar = paste_processed;
+        return;
+    }
+
     if( chansplit[1] == "INFO" ){
         tr.className = "info";
     }
@@ -270,3 +328,5 @@ $(document).ready(function () {
     }
 
 });
+
+
diff --git a/var/www/templates/index.html b/var/www/templates/index.html
index eb34aaf0..551c56b8 100644
--- a/var/www/templates/index.html
+++ b/var/www/templates/index.html
@@ -14,9 +14,10 @@
   <link href="{{ url_for('static', filename='css/dygraph_gallery.css') }}" rel="stylesheet" type="text/css" />
   <!-- JS -->
   <script type="text/javascript" src="{{ url_for('static', filename='js/dygraph-combined.js') }}"></script>
-  <script src="{{ url_for('static', filename='js/jquery.js') }}"></script>
   <script src="{{ url_for('static', filename='js/jquery.tagcanvas.js') }}"></script>
   <script src="{{ url_for('static', filename='js/jquery-1.4.2.js') }}"></script>
+  <script src="{{ url_for('static', filename='js/jquery.js') }}"></script>
+  <script src="{{ url_for('static', filename='js/jquery.flot.js') }}"></script>
   <script>
       function update_values() {
           $SCRIPT_ROOT = {{ request.script_root|tojson|safe }};
@@ -52,7 +53,16 @@
                   <!-- /#side-menu -->
               </div>
               <!-- /.sidebar-collapse -->
-              <div class="table-responsive", id="queueing" style="margin-top:40px;"></div>
+              <div class="panel panel-default">
+                  <div class="panel-heading">
+                      <i class="fa fa-dashboard  fa-fw"></i> Pastes since {{ default_minute }} min
+                  </div>
+                  <div class="panel-body">
+                       <div id="realtimechart" style="height: 90px; padding: 0px; position: relative;"></div>
+                  </div>
+              </div>
+                  <!-- <div id="Graph_paste_num" style="height:90px; width:100%;"></div> -->
+              <div class="table-responsive", id="queueing" style="margin-top:10px;"></div>
              <a  href="{{ url_for('index') }}"><img src="{{ url_for('static', filename='image/AIL.png') }}" /></a>
           </div>
           <!-- /.navbar-static-side -->
@@ -134,7 +144,6 @@
     initfunc( "{{ url_for('static', filename='csv/wordstrendingdata.csv') }}", {{ request.script_root|tojson|safe }} );
     </script>
 
-    <script src="{{ url_for('static', filename='js/jquery.js') }}"></script>
     <script src="{{ url_for('static', filename='js/bootstrap.min.js') }}"></script>
     <script src="{{ url_for('static', filename='js/plugins/metisMenu/metisMenu.js') }}"></script>
 

From 60552bca4df0bfad8c095808204fa5c9ee3099ed Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Thu, 14 Jul 2016 10:31:47 +0200
Subject: [PATCH 03/14] Fixed a bug in processed_pastes graph

---
 var/www/static/js/indexjavascript.js | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js
index ef9bf0c6..628ffe86 100644
--- a/var/www/static/js/indexjavascript.js
+++ b/var/www/static/js/indexjavascript.js
@@ -6,16 +6,16 @@ $(function() {
     
     function getData() {
         if (data.length > 0){
-             curr_max = curr_max == data[0] ? Math.max.apply(null, data) : curr_max;
-        	data = data.slice(1);
+             var data_old = data[0];
+             data = data.slice(1);
+             curr_max = curr_max == data_old ? Math.max.apply(null, data) : curr_max;
         }
         
         while (data.length < totalPoints) {
-        	var y = (typeof window.paste_num_tabvar !== "undefined") ? window.paste_num_tabvar : 0;
-             curr_max = curr_max < y ? y : curr_max;
-        	data.push(y);
+            var y = (typeof window.paste_num_tabvar !== "undefined") ? parseInt(window.paste_num_tabvar) : 0;
+            curr_max = y > curr_max ? y : curr_max;
+            data.push(y);
         }
-        
         // Zip the generated y values with the x values
         var res = [];
         for (var i = 0; i < data.length; ++i) {
@@ -29,17 +29,15 @@ $(function() {
         series: { shadowSize: 1 },
         lines: { fill: true, fillColor: { colors: [ { opacity: 1 }, { opacity: 0.1 } ] }},
         yaxis: { min: 0, max: 40 },
-        xaxis: { show: false },
-        colors: ["#F4A506"],
+        colors: ["#a971ff"],
         grid: {
-                  tickColor: "#dddddd",
-                  borderWidth: 0 
+            tickColor: "#dddddd",
+            borderWidth: 0 
         },
     };
     var plot = $.plot("#realtimechart", [ getData() ], options);
     
     function update() {
-console.log(curr_max);
         plot.setData([getData()]);
         plot.getOptions().yaxes[0].max = curr_max;
         plot.setupGrid();
@@ -91,7 +89,6 @@ function create_log_table(obj_json) {
 
     if (parsedmess[0] == "Global"){
         var paste_processed = parsedmess[4].split(" ")[2];
-        console.log(paste_processed)
         window.paste_num_tabvar = paste_processed;
         return;
     }

From 0332f23579ae7dee01c7f42db72aff070c8aa019 Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Fri, 15 Jul 2016 08:56:16 +0200
Subject: [PATCH 04/14] Added SimHash library

---
 bin/Duplicate.py             | 7 ++++---
 bin/packages/Hash.py         | 4 ++++
 pip_packages_requirement.txt | 1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/bin/Duplicate.py b/bin/Duplicate.py
index a7a41dc1..59610f83 100755
--- a/bin/Duplicate.py
+++ b/bin/Duplicate.py
@@ -74,9 +74,9 @@ if __name__ == "__main__":
             # Creating the bloom filter name: bloomyyyymm
             filebloompath = os.path.join(bloompath, 'bloom' + PST.p_date.year +
                                          PST.p_date.month)
-
             if os.path.exists(filebloompath):
                 bloom = BloomFilter.open(filebloompath)
+                bloop_path_set.add(filebloompath)
             else:
                 bloom = BloomFilter(100000000, 0.01, filebloompath)
                 bloop_path_set.add(filebloompath)
@@ -94,7 +94,6 @@ if __name__ == "__main__":
             for bloo in bloop_path_set:
                 # Opening blooms
                 opened_bloom.append(BloomFilter.open(bloo))
-
             # For each hash of the paste
             for line_hash in PST._get_hash_lines(min=5, start=1, jump=0):
                 nb_hash_current += 1
@@ -105,7 +104,6 @@ if __name__ == "__main__":
                     r_serv1.sadd("HASHS", line_hash)
                 # Adding the hash in the bloom of the month
                 bloom.add(line_hash)
-
                 # Go throught the Database of the bloom filter (of the month)
                 for bloo in opened_bloom:
                     if line_hash in bloo:
@@ -148,6 +146,8 @@ if __name__ == "__main__":
                     percentage = round((count/float(nb_hash_current))*100, 2)
                     if percentage >= 50:
                         dupl.append((paste, percentage))
+                    else:
+                        print 'percentage: ' + str(percentage)
 
                 # Creating the object attribute and save it.
                 to_print = 'Duplicate;{};{};{};'.format(
@@ -156,6 +156,7 @@ if __name__ == "__main__":
                     PST.__setattr__("p_duplicate", dupl)
                     PST.save_attribute_redis("p_duplicate", dupl)
                     publisher.info('{}Detected {}'.format(to_print, len(dupl)))
+                    print '{}Detected {}'.format(to_print, len(dupl))
 
                 y = time.time()
 
diff --git a/bin/packages/Hash.py b/bin/packages/Hash.py
index f8dcac0f..d46abcba 100644
--- a/bin/packages/Hash.py
+++ b/bin/packages/Hash.py
@@ -1,6 +1,7 @@
 import hashlib
 import crcmod
 import mmh3
+import simhash
 
 
 class Hash(object):
@@ -32,4 +33,7 @@ class Hash(object):
         elif self.name == "murmur":
             hash = mmh3.hash(string)
 
+        elif self.name == "simhash":
+            hash = Simhash(string)
+
         return hash
diff --git a/pip_packages_requirement.txt b/pip_packages_requirement.txt
index 40dcda8e..db2f23c5 100644
--- a/pip_packages_requirement.txt
+++ b/pip_packages_requirement.txt
@@ -17,6 +17,7 @@ nltk
 # Hashlib
 crcmod
 mmh3
+simhash
 
 #Others
 python-magic

From 14e9850dd6d1f87a78276dcf317b9f0f3605960b Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Fri, 15 Jul 2016 16:58:48 +0200
Subject: [PATCH 05/14] Added new module for Duplicate paste. Seems working but
 has some small bug (re-check same paste twice)

---
 bin/Duplicate_ssdeep.py | 180 ++++++++++++++++++++++++++++++++++++++++
 bin/LAUNCH.sh           |   2 +-
 bin/packages/Hash.py    |   6 +-
 3 files changed, 184 insertions(+), 4 deletions(-)
 create mode 100755 bin/Duplicate_ssdeep.py

diff --git a/bin/Duplicate_ssdeep.py b/bin/Duplicate_ssdeep.py
new file mode 100755
index 00000000..916bc0ba
--- /dev/null
+++ b/bin/Duplicate_ssdeep.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python2
+# -*-coding:UTF-8 -*
+
+"""
+The Duplicate module
+====================
+
+This huge module is, in short term, checking duplicates.
+
+Requirements:
+-------------
+
+
+"""
+import redis
+import os
+import time
+import datetime
+import json
+import ssdeep
+from packages import Paste
+from pubsublogger import publisher
+from pybloomfilter import BloomFilter
+
+from Helper import Process
+
+if __name__ == "__main__":
+    publisher.port = 6380
+    publisher.channel = "Script"
+
+    config_section = 'Duplicates'
+    saved_dico_and_reload = 1 #min
+    time_1 = time.time()
+    flag_reload = True
+    flag_to_disk = False
+
+    p = Process(config_section)
+
+    # REDIS #
+    # DB OBJECT & HASHS ( DISK )
+    # FIXME increase flexibility
+    dico_redis = {}
+    for year in xrange(2013, datetime.date.today().year+1):
+        for month in xrange(0, 16):
+            dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
+                host=p.config.get("Redis_Level_DB", "host"), port=year,
+                db=month)
+	    #print("dup: "+str(year)+str(month).zfill(2)+"\n")
+
+    # FUNCTIONS #
+    publisher.info("Script duplicate started")
+
+    dicopath = os.path.join(os.environ['AIL_HOME'],
+                             p.config.get("Directories", "dicofilters"))
+
+    dico_path_set = set()
+    while True:
+        try:
+            hash_dico = {}
+            dupl = []
+
+            x = time.time()
+
+            message = p.get_from_set()
+            if message is not None:
+                path = message
+                PST = Paste.Paste(path)
+            else:
+                publisher.debug("Script Attribute is idling 10s")
+                time.sleep(10)
+                continue
+
+            PST._set_p_hash_kind("ssdeep")
+
+            # Assignate the correct redis connexion
+            r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
+
+            # Creating the dicor name: dicoyyyymm
+            filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year +
+                                         PST.p_date.month)
+            filedicopath_today = filedicopath
+
+            # Save I/O
+            if time.time() - time_1 > saved_dico_and_reload*60:
+                flag_to_disk = True
+
+            if os.path.exists(filedicopath):
+                if flag_reload == True:
+                    flag_reload = False
+                    print 'Reloading'
+                    time_1 = time.time()
+                    with open(filedicopath, 'r') as fp:
+                        today_dico = json.load(fp)
+            else:
+                time_1 = time.time()
+                today_dico = {}
+                with open(filedicopath, 'w') as fp:
+                    json.dump(today_dico, fp)
+
+            # For now, just use monthly dico
+            dico_path_set.add(filedicopath)
+
+            # UNIQUE INDEX HASHS TABLE
+            yearly_index = str(datetime.date.today().year)+'00'
+            r_serv0 = dico_redis[yearly_index]
+            r_serv0.incr("current_index")
+            index = r_serv0.get("current_index")+str(PST.p_date)
+            # HASHTABLES PER MONTH (because of r_serv1 changing db)
+            r_serv1.set(index, PST.p_path)
+            r_serv1.sadd("INDEX", index)
+            # For each dico
+            opened_dico = []
+            for dico in dico_path_set:
+                # Opening dico
+                if dico == filedicopath_today:
+                    opened_dico.append([dico, today_dico])
+                with open(dico, 'r') as fp:
+                    opened_dico.append([dico, json.load(fp)])
+
+              
+            #retrieve hash from paste
+            paste_hash = PST._get_p_hash()
+            # Adding the hash in Redis
+            r_serv1.set(paste_hash, index)
+            r_serv1.sadd("HASHS", paste_hash)
+            # Go throught the Database of the dico (of the month)
+            threshold_dup = 10 
+            for dico_name, dico in opened_dico:
+                for dico_key, dico_hash in dico.items():
+                    percent = ssdeep.compare(dico_hash, paste_hash)
+                    if percent > threshold_dup:
+                        db = dico_name[-6:]
+                        # Go throught the Database of the bloom filter (month)
+                        r_serv_dico = dico_redis[db]
+                        
+                        # index of paste
+                        # FIXME Use r_serv_dico and do not consider only 1 server!!
+                        index_current = r_serv1.get(dico_hash)
+                        paste_path = r_serv1.get(index_current)
+                        if paste_path != None:
+                            hash_dico[dico_hash] = (paste_path, percent)
+
+                        print 'comparing: ' + str(dico_hash[:20]) + '  and  ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
+                        print '   '+ PST.p_path[44:]  +', '+ paste_path[44:]
+
+    ##################### Similarity found  #######################
+
+            # if there is data in this dictionnary
+            if len(hash_dico) != 0:
+                for dico_hash, paste_tuple in hash_dico.items():
+                    paste_path, percent = paste_tuple
+                    dupl.append((paste_path, percent))
+
+                # Creating the object attribute and save it.
+                to_print = 'Duplicate;{};{};{};'.format(
+                    PST.p_source, PST.p_date, PST.p_name)
+                if dupl != []:
+                    PST.__setattr__("p_duplicate", dupl)
+                    PST.save_attribute_redis("p_duplicate", dupl)
+                    publisher.info('{}Detected {}'.format(to_print, len(dupl)))
+                    print '{}Detected {}'.format(to_print, len(dupl))
+
+                y = time.time()
+
+                publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
+           
+
+            # Adding the hash in the dico of the month
+            today_dico[index] = paste_hash
+
+            if flag_to_disk:
+                flag_to_disk = False
+                flag_reload = True
+                with open(filedicopath, 'w') as fp:
+                    json.dump(today_dico, fp)
+        except IOError:
+            to_print = 'Duplicate;{};{};{};'.format(
+                PST.p_source, PST.p_date, PST.p_name)
+            print "CRC Checksum Failed on :", PST.p_path
+            publisher.error('{}CRC Checksum Failed'.format(to_print))
diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh
index fc8c9ff1..86e155b1 100755
--- a/bin/LAUNCH.sh
+++ b/bin/LAUNCH.sh
@@ -105,7 +105,7 @@ function launching_scripts {
 
     screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
     sleep 0.1
-    screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate.py; read x'
+    screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep.py; read x'
     sleep 0.1
     screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
     sleep 0.1
diff --git a/bin/packages/Hash.py b/bin/packages/Hash.py
index d46abcba..2f34c5c7 100644
--- a/bin/packages/Hash.py
+++ b/bin/packages/Hash.py
@@ -1,7 +1,7 @@
 import hashlib
 import crcmod
 import mmh3
-import simhash
+import ssdeep
 
 
 class Hash(object):
@@ -33,7 +33,7 @@ class Hash(object):
         elif self.name == "murmur":
             hash = mmh3.hash(string)
 
-        elif self.name == "simhash":
-            hash = Simhash(string)
+        elif self.name == "ssdeep":
+            hash = ssdeep.hash(string)
 
         return hash

From 4f6813350b08c805fe9207af3ec8b4181f8685ea Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Mon, 18 Jul 2016 15:50:41 +0200
Subject: [PATCH 06/14] Added two new version of duplicate module. One with
 hashes are saved in json on disk The other with only leveldb

---
 bin/Duplicate_ssdeep.py        |  58 ++++++------
 bin/Duplicate_ssdeep_v2.py     | 160 +++++++++++++++++++++++++++++++++
 bin/LAUNCH.sh                  |   2 +-
 bin/packages/config.cfg.sample |   7 ++
 4 files changed, 198 insertions(+), 29 deletions(-)
 create mode 100755 bin/Duplicate_ssdeep_v2.py

diff --git a/bin/Duplicate_ssdeep.py b/bin/Duplicate_ssdeep.py
index 916bc0ba..1b173eca 100755
--- a/bin/Duplicate_ssdeep.py
+++ b/bin/Duplicate_ssdeep.py
@@ -20,7 +20,6 @@ import json
 import ssdeep
 from packages import Paste
 from pubsublogger import publisher
-from pybloomfilter import BloomFilter
 
 from Helper import Process
 
@@ -29,10 +28,10 @@ if __name__ == "__main__":
     publisher.channel = "Script"
 
     config_section = 'Duplicates'
-    saved_dico_and_reload = 1 #min
+    save_dico_and_reload = 1 #min
     time_1 = time.time()
-    flag_reload = True
-    flag_to_disk = False
+    flag_reload_from_disk = True
+    flag_write_to_disk = False
 
     p = Process(config_section)
 
@@ -81,18 +80,16 @@ if __name__ == "__main__":
             filedicopath_today = filedicopath
 
             # Save I/O
-            if time.time() - time_1 > saved_dico_and_reload*60:
-                flag_to_disk = True
+            if time.time() - time_1 > save_dico_and_reload*60:
+                flag_write_to_disk = True
 
             if os.path.exists(filedicopath):
-                if flag_reload == True:
-                    flag_reload = False
+                if flag_reload_from_disk == True:
+                    flag_reload_from_disk = False
                     print 'Reloading'
-                    time_1 = time.time()
                     with open(filedicopath, 'r') as fp:
                         today_dico = json.load(fp)
             else:
-                time_1 = time.time()
                 today_dico = {}
                 with open(filedicopath, 'w') as fp:
                     json.dump(today_dico, fp)
@@ -105,44 +102,47 @@ if __name__ == "__main__":
             r_serv0 = dico_redis[yearly_index]
             r_serv0.incr("current_index")
             index = r_serv0.get("current_index")+str(PST.p_date)
-            # HASHTABLES PER MONTH (because of r_serv1 changing db)
-            r_serv1.set(index, PST.p_path)
-            r_serv1.sadd("INDEX", index)
+            
             # For each dico
             opened_dico = []
             for dico in dico_path_set:
                 # Opening dico
                 if dico == filedicopath_today:
                     opened_dico.append([dico, today_dico])
-                with open(dico, 'r') as fp:
-                    opened_dico.append([dico, json.load(fp)])
+                else:
+                    with open(dico, 'r') as fp:
+                        opened_dico.append([dico, json.load(fp)])
 
               
             #retrieve hash from paste
             paste_hash = PST._get_p_hash()
-            # Adding the hash in Redis
-            r_serv1.set(paste_hash, index)
-            r_serv1.sadd("HASHS", paste_hash)
+            
             # Go throught the Database of the dico (of the month)
-            threshold_dup = 10 
+            threshold_dup = 99 
             for dico_name, dico in opened_dico:
                 for dico_key, dico_hash in dico.items():
                     percent = ssdeep.compare(dico_hash, paste_hash)
                     if percent > threshold_dup:
                         db = dico_name[-6:]
-                        # Go throught the Database of the bloom filter (month)
+                        # Go throught the Database of the dico filter (month)
                         r_serv_dico = dico_redis[db]
                         
                         # index of paste
-                        # FIXME Use r_serv_dico and do not consider only 1 server!!
-                        index_current = r_serv1.get(dico_hash)
-                        paste_path = r_serv1.get(index_current)
+                        index_current = r_serv_dico.get(dico_hash)
+                        paste_path = r_serv_dico.get(index_current)
                         if paste_path != None:
                             hash_dico[dico_hash] = (paste_path, percent)
 
-                        print 'comparing: ' + str(dico_hash[:20]) + '  and  ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
-                        print '   '+ PST.p_path[44:]  +', '+ paste_path[44:]
+                        #print 'comparing: ' + str(dico_hash[:20]) + '  and  ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
+                        print '   '+ PST.p_path[44:]  +', '+ paste_path[44:] + ', ' + str(percent)
 
+            # Add paste in DB to prevent its analyse twice
+            # HASHTABLES PER MONTH (because of r_serv1 changing db)
+            r_serv1.set(index, PST.p_path)
+            r_serv1.sadd("INDEX", index)
+            # Adding the hash in Redis
+            r_serv1.set(paste_hash, index)
+            r_serv1.sadd("HASHS", paste_hash)
     ##################### Similarity found  #######################
 
             # if there is data in this dictionnary
@@ -168,9 +168,11 @@ if __name__ == "__main__":
             # Adding the hash in the dico of the month
             today_dico[index] = paste_hash
 
-            if flag_to_disk:
-                flag_to_disk = False
-                flag_reload = True
+            if flag_write_to_disk:
+                time_1 = time.time()
+                flag_write_to_disk = False
+                flag_reload_from_disk = True
+                print 'writing'
                 with open(filedicopath, 'w') as fp:
                     json.dump(today_dico, fp)
         except IOError:
diff --git a/bin/Duplicate_ssdeep_v2.py b/bin/Duplicate_ssdeep_v2.py
new file mode 100755
index 00000000..35874371
--- /dev/null
+++ b/bin/Duplicate_ssdeep_v2.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python2
+# -*-coding:UTF-8 -*
+
+"""
+The Duplicate module
+====================
+
+This huge module is, in short term, checking duplicates.
+
+This one differ from v1 by only using redis and not json file on disk
+
+Requirements:
+-------------
+
+
+"""
+import redis
+import os
+import time
+from datetime import datetime, timedelta
+import json
+import ssdeep
+from packages import Paste
+from pubsublogger import publisher
+
+from Helper import Process
+
+if __name__ == "__main__":
+    publisher.port = 6380
+    publisher.channel = "Script"
+
+    config_section = 'Duplicates'
+
+    p = Process(config_section)
+
+    maximum_month_range = int(p.config.get("Modules_Duplicates", "maximum_month_range"))
+    threshold_duplicate = int(p.config.get("Modules_Duplicates", "threshold_duplicate")) 
+    min_paste_size = float(p.config.get("Modules_Duplicates", "min_paste_size")) 
+
+    # REDIS #
+    dico_redis = {}
+    date_today = datetime.today()
+    for year in xrange(2013, date_today.year+1):
+        for month in xrange(0, 13):
+            dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
+                host=p.config.get("Redis_Level_DB", "host"), port=year,
+                db=month)
+	    #print("dup: "+str(year)+str(month).zfill(2)+"\n")
+
+    # FUNCTIONS #
+    publisher.info("Script duplicate started")
+
+    while True:
+        try:
+            hash_dico = {}
+            dupl = []
+            dico_range_list = []
+
+            x = time.time()
+
+            message = p.get_from_set()
+            if message is not None:
+                path = message
+                PST = Paste.Paste(path)
+            else:
+                publisher.debug("Script Attribute is idling 10s")
+                time.sleep(10)
+                continue
+
+            # the paste is too small
+            if (PST._get_p_size() < min_paste_size): 
+                continue
+
+            PST._set_p_hash_kind("ssdeep")
+
+            # Assignate the correct redis connexion
+            r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
+
+            # Creating the dico name: yyyymm
+            # Get the date of the range
+            date_range = date_today - timedelta(days = maximum_month_range*30.4166666)
+            num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month)
+            for diff_month in xrange(0, num_of_month+1):
+                curr_date_range = date_today - timedelta(days = diff_month*30.4166666)
+                to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2)
+                dico_range_list.append(to_append)
+            
+            # Use all dico in range
+            dico_range_list = dico_range_list[0:maximum_month_range]
+
+            # UNIQUE INDEX HASHS TABLE
+            yearly_index = str(date_today.year)+'00'
+            r_serv0 = dico_redis[yearly_index]
+            r_serv0.incr("current_index")
+            index = r_serv0.get("current_index")+str(PST.p_date)
+            
+            # Open selected dico range 
+            opened_dico = []
+            for dico_name in dico_range_list:
+                opened_dico.append([dico_name, dico_redis[dico_name]])
+              
+            # retrieve hash from paste
+            paste_hash = PST._get_p_hash()
+            
+            # Go throught the Database of the dico (of the month)
+            for curr_dico_name, curr_dico_redis in opened_dico:
+                for dico_hash in curr_dico_redis.smembers('HASHS'):
+                    try:
+                        percent = ssdeep.compare(dico_hash, paste_hash)
+                        if percent > threshold_duplicate:
+                            # Go throught the Database of the dico filter (month)
+                            r_serv_dico = dico_redis[curr_dico_name]
+                            
+                            # index of paste
+                            index_current = r_serv_dico.get(dico_hash)
+                            paste_path = r_serv_dico.get(index_current)
+                            if paste_path != None:
+                                hash_dico[dico_hash] = (paste_path, percent)
+
+                            print 'comparing: ' + str(PST.p_path[44:]) + '  and  ' + str(paste_path[44:]) + ' percentage: ' + str(percent)
+                            #print '   '+ PST.p_path[44:]  +', '+ paste_path[44:] + ', ' + str(percent)
+                    except:
+                        # ssdeep hash not comparable
+                        print 'ssdeep hash not comparable' 
+                        publisher.error('ssdeep hash not comparable')
+
+            # Add paste in DB after checking to prevent its analysis twice
+            # hash_i -> index_i  AND  index_i -> PST.PATH
+            r_serv1.set(index, PST.p_path)
+            r_serv1.sadd("INDEX", index)
+            # Adding the hash in Redis
+            r_serv1.set(paste_hash, index)
+            r_serv1.sadd("HASHS", paste_hash)
+    ##################### Similarity found  #######################
+
+            # if there is data in this dictionnary
+            if len(hash_dico) != 0:
+                # paste_tuple = (paste_path, percent)
+                for dico_hash, paste_tuple in hash_dico.items():
+                    dupl.append(paste_tuple)
+
+                # Creating the object attribute and save it.
+                to_print = 'Duplicate;{};{};{};'.format(
+                    PST.p_source, PST.p_date, PST.p_name)
+                if dupl != []:
+                    PST.__setattr__("p_duplicate", dupl)
+                    PST.save_attribute_redis("p_duplicate", dupl)
+                    publisher.info('{}Detected {}'.format(to_print, len(dupl)))
+                    #print '{}Detected {}'.format(to_print, len(dupl))
+
+                y = time.time()
+
+                publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
+                #print '{}Processed in {} sec'.format(to_print, y-x)
+           
+        except IOError:
+            to_print = 'Duplicate;{};{};{};'.format(
+                PST.p_source, PST.p_date, PST.p_name)
+            print "CRC Checksum Failed on :", PST.p_path
+            publisher.error('{}CRC Checksum Failed'.format(to_print))
diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh
index 86e155b1..d6706e1e 100755
--- a/bin/LAUNCH.sh
+++ b/bin/LAUNCH.sh
@@ -105,7 +105,7 @@ function launching_scripts {
 
     screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
     sleep 0.1
-    screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep.py; read x'
+    screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
     sleep 0.1
     screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
     sleep 0.1
diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample
index 6d07707c..b5f2c308 100644
--- a/bin/packages/config.cfg.sample
+++ b/bin/packages/config.cfg.sample
@@ -4,6 +4,13 @@ pastes = PASTES
 wordtrending_csv = var/www/static/csv/wordstrendingdata
 wordsfile = files/wordfile
 
+#### Modules #### 
+[Modules_Duplicates]
+#Number of month to look back
+maximum_month_range = 3
+#The value where two pastes are considerate duplicate.
+threshold_duplicate = 50
+
 ##### Redis #####
 [Redis_Cache]
 host = localhost

From 6805ed6488b87b3f887f56fcaf984dc46fed0089 Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Mon, 18 Jul 2016 15:52:53 +0200
Subject: [PATCH 07/14] Added default config

---
 bin/packages/config.cfg.sample | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample
index b5f2c308..0d2abb79 100644
--- a/bin/packages/config.cfg.sample
+++ b/bin/packages/config.cfg.sample
@@ -10,6 +10,8 @@ wordsfile = files/wordfile
 maximum_month_range = 3
 #The value where two pastes are considerate duplicate.
 threshold_duplicate = 50
+#Minimum size of the paste considered
+min_paste_size = 0.3
 
 ##### Redis #####
 [Redis_Cache]

From 996c0e02dea69d49334a58c9a6fc1ae81058fc2d Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Mon, 18 Jul 2016 16:22:33 +0200
Subject: [PATCH 08/14] Duplicate module takes its messages from other modules
 and no more from Global.

---
 bin/Credential.py          | 2 ++
 bin/CreditCard.py          | 2 ++
 bin/Duplicate_ssdeep_v2.py | 9 +++++----
 bin/Keys.py                | 2 ++
 bin/Mail.py                | 2 ++
 bin/Phone.py               | 2 ++
 bin/packages/modules.cfg   | 7 ++++++-
 7 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/bin/Credential.py b/bin/Credential.py
index 3ac61faf..d81c9ff6 100755
--- a/bin/Credential.py
+++ b/bin/Credential.py
@@ -50,6 +50,8 @@ if __name__ == "__main__":
         if len(creds) > critical:
             print("========> Found more than 10 credentials in this file : {}".format(filepath))
             publisher.warning(to_print)
+            #Send to duplicate
+            p.populate_set_out(filepath)
             if sites:
                 print("=======> Probably on : {}".format(', '.join(sites)))
         else:
diff --git a/bin/CreditCard.py b/bin/CreditCard.py
index d4660c13..18703f4e 100755
--- a/bin/CreditCard.py
+++ b/bin/CreditCard.py
@@ -65,6 +65,8 @@ if __name__ == "__main__":
                 if (len(creditcard_set) > 0):
                     publisher.warning('{}Checked {} valid number(s)'.format(
                         to_print, len(creditcard_set)))
+                    #Send to duplicate
+                    p.populate_set_out(filename)
                 else:
                     publisher.info('{}CreditCard related'.format(to_print))
         else:
diff --git a/bin/Duplicate_ssdeep_v2.py b/bin/Duplicate_ssdeep_v2.py
index 35874371..f6aaca4f 100755
--- a/bin/Duplicate_ssdeep_v2.py
+++ b/bin/Duplicate_ssdeep_v2.py
@@ -6,8 +6,10 @@ The Duplicate module
 ====================
 
 This huge module is, in short term, checking duplicates.
+Its input comes from other modules, namely:
+    Credential, CreditCard, Keys, Mails and Phone
 
-This one differ from v1 by only using redis and not json file on disk
+This one differ from v1 by only using redis and not json file stored on disk
 
 Requirements:
 -------------
@@ -117,8 +119,7 @@ if __name__ == "__main__":
                             if paste_path != None:
                                 hash_dico[dico_hash] = (paste_path, percent)
 
-                            print 'comparing: ' + str(PST.p_path[44:]) + '  and  ' + str(paste_path[44:]) + ' percentage: ' + str(percent)
-                            #print '   '+ PST.p_path[44:]  +', '+ paste_path[44:] + ', ' + str(percent)
+                            #print 'comparing: ' + str(PST.p_path[44:]) + '  and  ' + str(paste_path[44:]) + ' percentage: ' + str(percent)
                     except:
                         # ssdeep hash not comparable
                         print 'ssdeep hash not comparable' 
@@ -146,7 +147,7 @@ if __name__ == "__main__":
                     PST.__setattr__("p_duplicate", dupl)
                     PST.save_attribute_redis("p_duplicate", dupl)
                     publisher.info('{}Detected {}'.format(to_print, len(dupl)))
-                    #print '{}Detected {}'.format(to_print, len(dupl))
+                    print '{}Detected {}'.format(to_print, len(dupl))
 
                 y = time.time()
 
diff --git a/bin/Keys.py b/bin/Keys.py
index 8058545b..9c44f60a 100755
--- a/bin/Keys.py
+++ b/bin/Keys.py
@@ -16,6 +16,8 @@ def search_gpg(message):
     content = paste.get_p_content()
     if '-----BEGIN PGP MESSAGE-----' in content:
         publisher.warning('{} has a PGP enc message'.format(paste.p_name))
+        #Send to duplicate
+        p.populate_set_out(message)
 
 
 if __name__ == '__main__':
diff --git a/bin/Mail.py b/bin/Mail.py
index dd348ba6..964deb19 100755
--- a/bin/Mail.py
+++ b/bin/Mail.py
@@ -60,6 +60,8 @@ if __name__ == "__main__":
                                MX_values[0])
                     if MX_values[0] > is_critical:
                         publisher.warning(to_print)
+                        #Send to duplicate
+                        p.populate_set_out(filename)
                     else:
                         publisher.info(to_print)
             prec_filename = filename
diff --git a/bin/Phone.py b/bin/Phone.py
index 628f77c2..b53b079c 100755
--- a/bin/Phone.py
+++ b/bin/Phone.py
@@ -23,6 +23,8 @@ def search_phone(message):
     if len(results) > 4:
         print results
         publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
+        #Send to duplicate
+        p.populate_set_out(message)
 
 if __name__ == '__main__':
     # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg
index 9d8d6637..5f087427 100644
--- a/bin/packages/modules.cfg
+++ b/bin/packages/modules.cfg
@@ -3,7 +3,7 @@ subscribe = ZMQ_Global
 publish = Redis_Global
 
 [Duplicates]
-subscribe = Redis_Global
+subscribe = Redis_Duplicate
 
 [Indexer]
 subscribe = Redis_Global
@@ -31,9 +31,11 @@ publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Re
 
 [CreditCards]
 subscribe = Redis_CreditCards
+publish = Redis_Duplicate 
 
 [Mail]
 subscribe = Redis_Mail
+publish = Redis_Duplicate 
 
 [Onion]
 subscribe = Redis_Onion
@@ -55,15 +57,18 @@ subscribe = Redis_Global
 
 [Credential]
 subscribe = Redis_Credential
+publish = Redis_Duplicate 
 
 [Cve]
 subscribe = Redis_Cve
 
 [Phone]
 subscribe = Redis_Global
+publish = Redis_Duplicate 
 
 [SourceCode]
 subscribe = Redis_SourceCode
 
 [Keys]
 subscribe = Redis_Global
+publish = Redis_Duplicate 

From a6996c0b23e6e9bac1510f8fb71befdb3afb838b Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Tue, 19 Jul 2016 10:48:44 +0200
Subject: [PATCH 09/14] Added related functions and display of duplicated paste
 in search.py

---
 bin/packages/Paste.py                   |  5 +++++
 var/www/Flask_server.py                 | 26 ++++++++++++++++++++++++-
 var/www/templates/show_saved_paste.html | 12 +++++++++++-
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py
index bedf36b0..172f0931 100755
--- a/bin/packages/Paste.py
+++ b/bin/packages/Paste.py
@@ -91,6 +91,7 @@ class Paste(object):
         self.p_langage = None
         self.p_nb_lines = None
         self.p_max_length_line = None
+        self.p_duplicate = None
 
     def get_p_content(self):
         """
@@ -277,6 +278,10 @@ class Paste(object):
             return True, var
         else:
             return False, var
+    
+    def _get_p_duplicate(self):
+        self.p_duplicate = self.store.hget(self.p_path, "p_duplicate")
+        return self.p_duplicate if self.p_duplicate is not None else []
 
     def save_all_attributes_redis(self, key=None):
         """
diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py
index 36fcfbcc..08ea0675 100755
--- a/var/www/Flask_server.py
+++ b/var/www/Flask_server.py
@@ -58,6 +58,21 @@ def list_len(s):
     return len(s)
 app.jinja_env.filters['list_len'] = list_len
 
+def parseStringToList(the_string):
+    strList = ""
+    elemList = []
+    for c in the_string:
+        if c != ']':
+            if c != '[' and c !=' ' and c != '"':
+                strList += c
+        else:
+            the_list = strList.split(',')
+            if len(the_list) == 2:
+               elemList.append(the_list)
+            elif len(the_list) > 1:
+               elemList.append(the_list[1:])
+            strList = ""
+    return elemList
 
 def showpaste(content_range):    
     requested_path = request.args.get('paste', '')
@@ -71,10 +86,19 @@ def showpaste(content_range):
     p_mime = paste.p_mime
     p_lineinfo = paste.get_lines_info()
     p_content = paste.get_p_content().decode('utf-8', 'ignore')
+    p_duplicate_full_list = parseStringToList(paste._get_p_duplicate())
+    p_duplicate_list = []
+    p_simil_list = []
+
+    for dup_list in p_duplicate_full_list:
+        path, simil_percent = dup_list
+        p_duplicate_list.append(path)
+        p_simil_list.append(simil_percent)
+
     if content_range != 0:
        p_content = p_content[0:content_range] 
 
-    return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content))
+    return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list)
 
 
 @app.route("/_logs")
diff --git a/var/www/templates/show_saved_paste.html b/var/www/templates/show_saved_paste.html
index ce68465c..707786d7 100644
--- a/var/www/templates/show_saved_paste.html
+++ b/var/www/templates/show_saved_paste.html
@@ -42,7 +42,17 @@
     </table>
   </div>
   <div class="panel-body" id="panel-body">
-  <h4> Content: </h4>
+  {% if  duplicate_list|length == 0 %}
+      <h4> No Duplicate </h4>
+  {% else %}
+      <h4> Duplicate list: </h4>
+      {% set i = 0 %}
+      {% for dup_path in duplicate_list %}
+          Similarity: {{ simil_list[i] }}% - <a target="_blank" href="{{ url_for('showsavedpaste') }}?paste={{ dup_path }}" id='dup_path'>{{ dup_path }}</a></br>
+          {% set i = i + 1 %}
+      {% endfor %}
+  {% endif %}
+    <h4> Content: </h4>
   <p data-initsize="{{ initsize }}"> <xmp id="paste-holder">{{ content }}</xmp></p>
   </div>
 </div>

From 4bc84a2580a0203977da304722fbae0251dbdf6e Mon Sep 17 00:00:00 2001
From: mokaddem <sami.mokaddem@student.uclouvain.be>
Date: Tue, 19 Jul 2016 16:49:57 +0200
Subject: [PATCH 10/14] Added dependency for flot chart

---
 var/www/update_thirdparty.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh
index 563ee4c4..3c937cbb 100755
--- a/var/www/update_thirdparty.sh
+++ b/var/www/update_thirdparty.sh
@@ -29,6 +29,10 @@ wget https://cdn.datatables.net/1.10.12/js/jquery.dataTables.min.js -O ./static/
 wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTables.bootstrap.css -O ./static/css/dataTables.bootstrap.css
 wget https://cdn.datatables.net/plug-ins/1.10.7/integration/bootstrap/3/dataTables.bootstrap.js -O ./static/js/dataTables.bootstrap.js
 
+#Ressource for graph
+wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.js -O ./static/js/jquery.flot.js
+wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.pie.js -O ./static/js/jquery.flot.pie.js
+
 rm -rf ./static/js/plugins
 mv temp/${filename}/js/* ./static/js/
 

From 6f4bfeb4ef154bb262658f6e426452e5ec3e99e6 Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Thu, 21 Jul 2016 14:45:41 +0200
Subject: [PATCH 11/14] restored deleted part of a failed merge-conflict

---
 var/www/static/js/indexjavascript.js | 42 ++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js
index bd73522a..1219ffc3 100644
--- a/var/www/static/js/indexjavascript.js
+++ b/var/www/static/js/indexjavascript.js
@@ -6,6 +6,48 @@ $(function() {
     var curr_max = 0;
     
     function getData() {
+        if (data.length > 0){
+             var data_old = data[0];
+             data = data.slice(1);
+             curr_max = curr_max == data_old ? Math.max.apply(null, data) : curr_max;
+        }
+        
+        while (data.length < totalPoints) {
+            var y = (typeof window.paste_num_tabvar !== "undefined") ? parseInt(window.paste_num_tabvar) : 0;
+            curr_max = y > curr_max ? y : curr_max;
+            data.push(y);
+        }
+        // Zip the generated y values with the x values
+        var res = [];
+        for (var i = 0; i < data.length; ++i) {
+            res.push([i, data[i]])
+        }
+        return res;
+    }
+
+    var updateInterval = 1000;
+    var options = {
+        series: { shadowSize: 1 },
+        lines: { fill: true, fillColor: { colors: [ { opacity: 1 }, { opacity: 0.1 } ] }},
+        yaxis: { min: 0, max: 40 },
+        colors: ["#a971ff"],
+        grid: {
+            tickColor: "#dddddd",
+            borderWidth: 0 
+        },
+    };
+    var plot = $.plot("#realtimechart", [ getData() ], options);
+    
+    function update() {
+        plot.setData([getData()]);
+        plot.getOptions().yaxes[0].max = curr_max;
+        plot.setupGrid();
+        plot.draw();
+        setTimeout(update, updateInterval);
+    }
+    update();
+});
+
 function initfunc( csvay, scroot) {
   window.csv = csvay;
   window.scroot = scroot;

From 9a34a587cc1958a5792517730af41fba95991e57 Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Thu, 21 Jul 2016 14:53:34 +0200
Subject: [PATCH 12/14] bug global_tabvar seems solved: Caused by a race
 condition, Just switched two functions execution

---
 var/www/static/js/indexjavascript.js | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js
index 1219ffc3..7fd463f7 100644
--- a/var/www/static/js/indexjavascript.js
+++ b/var/www/static/js/indexjavascript.js
@@ -1,3 +1,17 @@
+function initfunc( csvay, scroot) {
+  window.csv = csvay;
+  window.scroot = scroot;
+};
+
+function update_values() {
+  $SCRIPT_ROOT = window.scroot ;
+    $.getJSON($SCRIPT_ROOT+"/_stuff",
+        function(data) {
+            window.glob_tabvar = data;
+        });
+    };
+
+
 // Plot and update the number of processed pastes
 $(function() {
     var data = [];

From f125a6211513bf4f1db6997fdaba76df5a13b3e1 Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Thu, 21 Jul 2016 15:32:07 +0200
Subject: [PATCH 13/14] Added ssdeep in pip_requirments

---
 pip_packages_requirement.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pip_packages_requirement.txt b/pip_packages_requirement.txt
index db2f23c5..bd734175 100644
--- a/pip_packages_requirement.txt
+++ b/pip_packages_requirement.txt
@@ -17,7 +17,7 @@ nltk
 # Hashlib
 crcmod
 mmh3
-simhash
+ssdeep
 
 #Others
 python-magic

From c686f69ca67f3bfea99da68277f8dfd038321709 Mon Sep 17 00:00:00 2001
From: Mokaddem <mokaddem.sami@gmail.com>
Date: Fri, 22 Jul 2016 10:04:58 +0200
Subject: [PATCH 14/14] Clean not compatible ssdeep hash encountered

---
 bin/Duplicate_ssdeep_v2.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/Duplicate_ssdeep_v2.py b/bin/Duplicate_ssdeep_v2.py
index f6aaca4f..e8930c02 100755
--- a/bin/Duplicate_ssdeep_v2.py
+++ b/bin/Duplicate_ssdeep_v2.py
@@ -122,8 +122,8 @@ if __name__ == "__main__":
                             #print 'comparing: ' + str(PST.p_path[44:]) + '  and  ' + str(paste_path[44:]) + ' percentage: ' + str(percent)
                     except:
                         # ssdeep hash not comparable
-                        print 'ssdeep hash not comparable' 
-                        publisher.error('ssdeep hash not comparable')
+                        print 'ssdeep hash not comparable, cleaning bad hash: '+dico_hash
+                        curr_dico_redis.srem('HASHS', dico_hash)
 
             # Add paste in DB after checking to prevent its analysis twice
             # hash_i -> index_i  AND  index_i -> PST.PATH