diff --git a/bin/Tags.py b/bin/Tags.py index 0a178fef..c0ea4bdc 100755 --- a/bin/Tags.py +++ b/bin/Tags.py @@ -17,6 +17,19 @@ from pubsublogger import publisher from Helper import Process from packages import Paste +def get_paste_date(paste_filename): + l_directory = paste_filename.split('/') + return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2]) + +def set_tag_metadata(tag, date): + # First time we see this tag ## TODO: filter paste from the paste ? + if not server.hexists('tag_metadata:{}'.format(tag), 'first_seen'): + server.hset('tag_metadata:{}'.format(tag), 'first_seen', date) + # Check and Set tag last_seen + last_seen = server.hget('tag_metadata:{}'.format(tag), 'last_seen') + if last_seen is None or date > last_seen: + server.hset('tag_metadata:{}'.format(tag), 'last_seen', date) + if __name__ == '__main__': # Port of the redis instance used by pubsublogger @@ -42,12 +55,6 @@ if __name__ == '__main__': db=p.config.get("ARDB_Metadata", "db"), decode_responses=True) - serv_statistics = redis.StrictRedis( - host=p.config.get('ARDB_Statistics', 'host'), - port=p.config.get('ARDB_Statistics', 'port'), - db=p.config.get('ARDB_Statistics', 'db'), - decode_responses=True) - # Sent to the logging a description of the module publisher.info("Tags module started") @@ -68,12 +75,14 @@ if __name__ == '__main__': if res == 1: print("new tags added : {}".format(tag)) # add the path to the tag set - res = server.sadd(tag, path) + date = get_paste_date(path) + res = server.sadd('{}:{}'.format(tag, date), path) if res == 1: print("new paste: {}".format(path)) print(" tagged: {}".format(tag)) - server_metadata.sadd('tag:'+path, tag) + set_tag_metadata(tag, date) + server_metadata.sadd('tag:{}'.format(path), tag) - curr_date = datetime.date.today() - serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'paste_tagged:'+tag, 1) + curr_date = datetime.date.today().strftime("%Y%m%d") + server.hincrby('daily_tags:{}'.format(curr_date), tag, 1) p.populate_set_out(message, 'MISP_The_Hive_feeder')