From 9e8611a42d5b7e729f7ddcc681f74cdbe0466cc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= <raphael@vinot.info>
Date: Tue, 2 Sep 2014 18:20:28 +0200
Subject: [PATCH] stop killing the disk when creating the word curve

---
 bin/Curve.py              | 34 ++++++++++++-------
 bin/packages/lib_words.py | 71 ++++++++++++---------------------------
 2 files changed, 42 insertions(+), 63 deletions(-)

diff --git a/bin/Curve.py b/bin/Curve.py
index 9226300e..85410b52 100755
--- a/bin/Curve.py
+++ b/bin/Curve.py
@@ -24,10 +24,10 @@ Requirements
 """
 import redis
 import time
-from packages import Paste
 from pubsublogger import publisher
 from packages import lib_words
 import os
+import datetime
 
 from Helper import Process
 
@@ -57,23 +57,31 @@ if __name__ == "__main__":
     prec_filename = None
     while True:
         if message is not None:
-            filename, word, score = message.split()
-            if prec_filename is None or filename != prec_filename:
-                PST = Paste.Paste(filename)
-                lib_words.create_curve_with_word_file(
-                    r_serv1, csv_path, wordfile_path, int(PST.p_date.year),
-                    int(PST.p_date.month))
+            generate_new_graph = True
 
-            prec_filename = filename
-            prev_score = r_serv1.hget(word.lower(), PST.p_date)
+            filename, word, score = message.split()
+            temp = filename.split('/')
+            date = temp[-4] + temp[-3] + temp[-2]
+
+            low_word = word.lower()
+            prev_score = r_serv1.hget(low_word, date)
             if prev_score is not None:
-                r_serv1.hset(word.lower(), PST.p_date,
-                             int(prev_score) + int(score))
+                r_serv1.hset(low_word, date, int(prev_score) + int(score))
             else:
-                r_serv1.hset(word.lower(), PST.p_date, score)
+                r_serv1.hset(low_word, date, score)
 
         else:
+            if generate_new_graph:
+                generate_new_graph = False
+                print 'Building graph'
+                today = datetime.date.today()
+                year = today.year
+                month = today.month
+                lib_words.create_curve_with_word_file(r_serv1, csv_path,
+                                                      wordfile_path, year,
+                                                      month)
+
             publisher.debug("Script Curve is Idling")
             print "sleeping"
-            time.sleep(1)
+            time.sleep(10)
         message = p.get_from_set()
diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py
index 9446a8ec..3c065ed0 100644
--- a/bin/packages/lib_words.py
+++ b/bin/packages/lib_words.py
@@ -6,6 +6,7 @@ from pubsublogger import publisher
 import calendar
 from datetime import date
 from dateutil.rrule import rrule, DAILY
+import csv
 
 
 def listdirectory(path):
@@ -80,60 +81,30 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
     to keep the timeline of the curve correct.
 
     """
-    a = date(year, month, 01)
-    b = date(year, month, calendar.monthrange(year, month)[1])
-    days = {}
+    first_day = date(year, month, 01)
+    last_day = date(year, month, calendar.monthrange(year, month)[1])
     words = []
 
-    with open(feederfilename, 'rb') as F:
+    with open(feederfilename, 'rb') as f:
         # words of the files
-        for word in F:
-            # list of words (sorted as in the file)
-            words.append(word[:-1])
+        words = sorted([word.strip() for word in f])
+
+    headers = ['Date'] + words
+    with open(csvfilename+'.csv', 'wb') as f:
+        writer = csv.writer(f)
+        writer.writerow(headers)
 
         # for each days
-        for dt in rrule(DAILY, dtstart=a, until=b):
-
-            mot = []
-            mot1 = []
-            mot2 = []
-
-            days[dt.strftime("%Y%m%d")] = ''
+        for dt in rrule(DAILY, dtstart=first_day, until=last_day):
+            row = []
+            curdate = dt.strftime("%Y%m%d")
+            row.append(curdate)
             # from the 1srt day to the last of the list
-            for word in sorted(words):
-
-                # if the word have a value for the day
-                if r_serv.hexists(word, dt.strftime("%Y%m%d")):
-                    mot1.append(str(word))
-                    mot2.append(r_serv.hget(word, dt.strftime("%Y%m%d")))
-
-                    mot = zip(mot1, mot2)
-
-                    days[dt.strftime("%Y%m%d")] = mot
+            for word in words:
+                value = r_serv.hget(word, curdate)
+                if value is None:
+                    row.append(0)
                 else:
-
-                    mot1.append(str(word))
-                    mot2.append(0)
-
-                    mot = zip(mot1, mot2)
-
-                    days[dt.strftime("%Y%m%d")] = mot
-
-    with open(csvfilename+".csv", 'wb') as F:
-        F.write("Date," + ",".join(sorted(words)) + '\n')
-
-        for x, s in days.items():
-            val = []
-            for y in s:
-                val.append(y[1])
-
-            F.write(x + ',' + str(val) + '\n')
-
-    with open(csvfilename+".csv", 'rb') as F:
-        h = F.read()
-        h = h.replace("[", "")
-        h = h.replace("]", "")
-        h = h.replace('\'', "")
-
-    with open(csvfilename+".csv", 'wb') as F:
-        F.write(h)
+                    # if the word have a value for the day
+                    row.append(value)
+            writer.writerow(row)