From 9e8611a42d5b7e729f7ddcc681f74cdbe0466cc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 2 Sep 2014 18:20:28 +0200 Subject: [PATCH] stop killing the disk when creating the word curve --- bin/Curve.py | 34 ++++++++++++------- bin/packages/lib_words.py | 71 ++++++++++++--------------------------- 2 files changed, 42 insertions(+), 63 deletions(-) diff --git a/bin/Curve.py b/bin/Curve.py index 9226300e..85410b52 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -24,10 +24,10 @@ Requirements """ import redis import time -from packages import Paste from pubsublogger import publisher from packages import lib_words import os +import datetime from Helper import Process @@ -57,23 +57,31 @@ if __name__ == "__main__": prec_filename = None while True: if message is not None: - filename, word, score = message.split() - if prec_filename is None or filename != prec_filename: - PST = Paste.Paste(filename) - lib_words.create_curve_with_word_file( - r_serv1, csv_path, wordfile_path, int(PST.p_date.year), - int(PST.p_date.month)) + generate_new_graph = True - prec_filename = filename - prev_score = r_serv1.hget(word.lower(), PST.p_date) + filename, word, score = message.split() + temp = filename.split('/') + date = temp[-4] + temp[-3] + temp[-2] + + low_word = word.lower() + prev_score = r_serv1.hget(low_word, date) if prev_score is not None: - r_serv1.hset(word.lower(), PST.p_date, - int(prev_score) + int(score)) + r_serv1.hset(low_word, date, int(prev_score) + int(score)) else: - r_serv1.hset(word.lower(), PST.p_date, score) + r_serv1.hset(low_word, date, score) else: + if generate_new_graph: + generate_new_graph = False + print 'Building graph' + today = datetime.date.today() + year = today.year + month = today.month + lib_words.create_curve_with_word_file(r_serv1, csv_path, + wordfile_path, year, + month) + publisher.debug("Script Curve is Idling") print "sleeping" - time.sleep(1) + time.sleep(10) message = p.get_from_set() diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py index 9446a8ec..3c065ed0 100644 --- a/bin/packages/lib_words.py +++ b/bin/packages/lib_words.py @@ -6,6 +6,7 @@ from pubsublogger import publisher import calendar from datetime import date from dateutil.rrule import rrule, DAILY +import csv def listdirectory(path): @@ -80,60 +81,30 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month to keep the timeline of the curve correct. """ - a = date(year, month, 01) - b = date(year, month, calendar.monthrange(year, month)[1]) - days = {} + first_day = date(year, month, 01) + last_day = date(year, month, calendar.monthrange(year, month)[1]) words = [] - with open(feederfilename, 'rb') as F: + with open(feederfilename, 'rb') as f: # words of the files - for word in F: - # list of words (sorted as in the file) - words.append(word[:-1]) + words = sorted([word.strip() for word in f]) + + headers = ['Date'] + words + with open(csvfilename+'.csv', 'wb') as f: + writer = csv.writer(f) + writer.writerow(headers) # for each days - for dt in rrule(DAILY, dtstart=a, until=b): - - mot = [] - mot1 = [] - mot2 = [] - - days[dt.strftime("%Y%m%d")] = '' + for dt in rrule(DAILY, dtstart=first_day, until=last_day): + row = [] + curdate = dt.strftime("%Y%m%d") + row.append(curdate) # from the 1srt day to the last of the list - for word in sorted(words): - - # if the word have a value for the day - if r_serv.hexists(word, dt.strftime("%Y%m%d")): - mot1.append(str(word)) - mot2.append(r_serv.hget(word, dt.strftime("%Y%m%d"))) - - mot = zip(mot1, mot2) - - days[dt.strftime("%Y%m%d")] = mot + for word in words: + value = r_serv.hget(word, curdate) + if value is None: + row.append(0) else: - - mot1.append(str(word)) - mot2.append(0) - - mot = zip(mot1, mot2) - - days[dt.strftime("%Y%m%d")] = mot - - with open(csvfilename+".csv", 'wb') as F: - F.write("Date," + ",".join(sorted(words)) + '\n') - - for x, s in days.items(): - val = [] - for y in s: - val.append(y[1]) - - F.write(x + ',' + str(val) + '\n') - - with open(csvfilename+".csv", 'rb') as F: - h = F.read() - h = h.replace("[", "") - h = h.replace("]", "") - h = h.replace('\'', "") - - with open(csvfilename+".csv", 'wb') as F: - F.write(h) + # if the word have a value for the day + row.append(value) + writer.writerow(row)