chg: [term] refractor + add new tracked word/set

This commit is contained in:
Terrtia 2019-08-06 17:03:49 +02:00
parent 32efbfa019
commit 28320a32a6
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
5 changed files with 420 additions and 0 deletions

View file

@ -109,8 +109,56 @@ Redis and ARDB overview
| **uuid**:ltags | **tag** |
| **uuid**:ltagsgalaxies | **tag** |
## DB2 - New TermFreq:
##### Term Tracker metadata:
| Hset - Key | Field | Value |
| ------ | ------ | ------ |
| tracked_term:**uuid** | tracked | **tacked word/set/regex** |
| | type | **term/set/regex** |
| | date | **date added** |
| | user_id | **created by user_id** |
| | dashboard | **0/1 Display alert on dashboard** |
| | level | **0/1 Tracker visibility** |
##### Term Tracked by user_id (visibility level: user only):
| Set - Key | Value |
| ------ | ------ |
| user:tracked_term:**user_id** | **uuid - tracked term uuid** |
##### Global Term Tracked (visibility level: all users):
| Set - Key | Value |
| ------ | ------ |
| gobal:tracked_term | **uuid - tracked term uuid** |
##### All Term Tracked by type:
| Set - Key | Value |
| ------ | ------ |
| all:tracked_term:**word/set/regex - term type** | **tracked term** |
| Set - Key | Value |
| ------ | ------ |
| all:tracked_term_uuid:**tracked term** | **uuid - tracked term uuid** |
##### All Term Tracked items:
| Set - Key | Value |
| ------ | ------ |
| tracked_term:item:**uuid** | **item_id** |
##### All Term Tracked tags:
| Set - Key | Value |
| ------ | ------ |
| tracked_term:tags:**uuid** | **tag** |
##### All Term Tracked tags:
| Set - Key | Value |
| ------ | ------ |
| tracked_term:mail:**uuid** | **mail** |
## DB2 - TermFreq:
##### Set:
##### Set:
| Key | Value |
| ------ | ------ |
@ -118,6 +166,17 @@ Redis and ARDB overview
| TrackedSetSet | **tracked_set** |
| TrackedRegexSet | **tracked_regex** |
| | |
| | |
| global:TrackedSetTermSet | **tracked_term** |
| global:TrackedSetSet | **tracked_set** |
| global:TrackedRegexSet | **tracked_regex** |
| | |
| | |
| user:**user_id**:TrackedSetTermSet | **tracked_term** |
| user:**user_id**:TrackedSetSet | **tracked_set** |
| user:**user_id**:TrackedRegexSet | **tracked_regex** |
| | |
| | |
| tracked_**tracked_term** | **item_path** |
| set_**tracked_set** | **item_path** |
| regex_**tracked_regex** | **item_path** |

58
bin/TermTrackerMod.py Executable file
View file

@ -0,0 +1,58 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The TermTracker Module
===================
"""
import os
import sys
import time
from packages import Paste
from packages import Term
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config
r_serv_term = Flask_config.r_serv_term
# loads tracked words
list_tracked_words = Term.get_tracked_words_list()
set_tracked_words_list = Term.get_set_tracked_words_list()
def new_term_found(term, term_type):
uuid_list = get_term_uuid_list()
email_notification = []
tags = []
for term_uuid in uuid_list:
pass
if __name__ == "__main__":
item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz'
#item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz'
paste = Paste.Paste(item_id)
res = Term.parse_tracked_term_to_add('test zorro meroio apple weert', 'word')
'''
dict_words_freq = Term.get_text_word_frequency(paste.get_p_content())
# check solo words
for word in list_tracked_words:
if word in dict_words_freq:
pass
# tag + get uuids ...
# check words set
for list_words, nb_words_threshold in set_tracked_words_list:
nb_uniq_word = 0
for word in list_words:
if word in dict_words_freq:
nb_uniq_word += 1
if nb_uniq_word > nb_words_threshold:
# tag + get uuid
pass
'''

206
bin/packages/Term.py Executable file
View file

@ -0,0 +1,206 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import uuid
import redis
import datetime
from collections import defaultdict
from nltk.tokenize import RegexpTokenizer
from textblob import TextBlob
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config
r_serv_term = Flask_config.r_serv_term
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
special_characters.add('\\s')
# NLTK tokenizer
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True)
def get_text_word_frequency(item_content, filtering=True):
item_content = item_content.lower()
words_dict = defaultdict(int)
if filtering:
blob = TextBlob(item_content , tokenizer=tokenizer)
else:
blob = TextBlob(item_content)
for word in blob.tokens:
words_dict[word] += 1
print(words_dict)
return words_dict
# # TODO: create all tracked words
def get_tracked_words_list():
return list(r_serv_term.smembers('all:tracked_term:word'))
def get_set_tracked_words_list():
set_list = r_serv_term.smembers('all:tracked_term:set')
all_set_list = []
for elem in set_list:
elem = elem.split(';')
num_words = int(elem[1])
ter_set = elem[0].split(',')
all_set_list.append((ter_set, num_words))
def parse_json_term_to_add(dict_input):
term = dict_input.get('term', None)
if not term:
return ({"status": "error", "reason": "Term not provided"}, 400)
term_type = dict_input.get('term', None)
if not term_type:
return ({"status": "error", "reason": "Term type not provided"}, 400)
nb_words = dict_input.get('nb_words', 1)
res = parse_tracked_term_to_add(term , term_type, nb_words=nb_words)
if res['status']=='error':
return res
# get user_id
tags = dict_input.get('tags', [])
mails = dict_input.get('mails', [])
## TODO: verify mail integrity
## TODO: add dashboard key
level = dict_input.get('level', 1)
try:
level = int(level)
if level not in range(0, 1):
level = 1
except:
level = 1
term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails)
return ({'term': term, 'uuid': term_uuid}, 200)
def parse_tracked_term_to_add(term , term_type, nb_words=1):
# todo verify regex format
if term_type=='regex':
# TODO: verify regex integrity
pass
elif term_type=='word' or term_type=='set':
# force lowercase
term = term.lower()
word_set = set(term)
set_inter = word_set.intersection(special_characters)
if set_inter:
return ({"status": "error", "reason": "special character not allowed", "message": "Please use a regex or remove all special characters"}, 400)
words = term.split()
# not a word
if term_type=='word' and words:
term_type = 'set'
# ouput format: term1,term2,term3;2
if term_type=='set':
try:
nb_words = int(nb_words)
except:
nb_words = 1
words_set = set(words)
words_set = sorted(words_set)
term = ",".join(words_set)
term = "{};{}".format(term, nb_words)
print(term)
print(term_type)
return ({"status": "success", "term": term, "type": term_type}, 200)
else:
return ({"status": "error", "reason": "Incorrect type"}, 400)
def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0):
term_uuid = str(uuid.uuid4())
# create metadata
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'tracked',term)
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'type', term_type)
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'date', datetime.date.today().strftime("%Y%m%d"))
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'user_id', user_id)
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'level', level)
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'dashboard', dashboard)
# create all term set
r_serv_term.sadd('all:tracked_term:{}'.format(term_type), term)
# create term - uuid map
r_serv_term.sadd('all:tracked_term_uuid:{}'.format(term), term_uuid)
# add display level set
if level == 0: # user only
r_serv_term.sadd('user:tracked_term:{}'.format(user_id), term_uuid)
elif level == 1: # global
r_serv_term.sadd('gobal:tracked_term', term_uuid)
# create term tags list
for tag in tags:
r_serv_term.sadd('tracked_term:tags:{}'.format(term_uuid), tag)
# create term tags mail notification list
for mail in mails:
r_serv_term.sadd('tracked_term:mail:{}'.format(term_uuid), mail)
return term_uuid
def get_term_uuid_list(term):
return list(r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term)))
def get_global_tracked_term():
dict_tracked = {}
tracked_set = list(r_serv_term.smembers('global:TrackedSetSet'))
tracked_regex = list(r_serv_term.smembers('global:TrackedRegexSet'))
tracked_terms = list(r_serv_term.smembers('global:TrackedSetTermSet'))
return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex}
def get_user_tracked_term(user_id):
dict_tracked = {}
tracked_set = list(r_serv_term.smembers('user:{}:TrackedSetSet'.format(user_id)))
tracked_regex = list(r_serv_term.smembers('user:{}:TrackedRegexSet').format(user_id))
tracked_terms = list(r_serv_term.smembers('user:{}:TrackedSetTermSet').format(user_id))
return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex}

View file

@ -583,6 +583,94 @@ curl https://127.0.0.1:7000/api/v1/get/tag/metadata --header "Authorization: iHc
## Tracker
### Add term tracker: `api/v1/add/tracker/term`<a name="add_term_tracker"></a>
#### Description
Add term tracker
**Method** : `POST`
#### Parameters
- `term`
- term to add
- *str - word(s)*
- default: `text`
- `nb_words`
- number of words in set
- *int*
- default: `1`
- `type`
- term type
- *str*
- mandatory: `word`, `set`, `regex`
- `tags`
- list of tags
- *list*
- default: `[]`
- `mails`
- list of mails to notify
- *list*
- default: `[]`
- `level`
- tracker visibility
- *int - 0: user only, 1: all users*
- default: `1`
#### JSON response
- `uuid`
- import uuid
- *uuid4*
#### Example
```
curl https://127.0.0.1:7000/api/v1/import/item --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST
```
#### input.json Example
```json
{
"type": "text",
"tags": [
"infoleak:analyst-detection=\"private-key\""
],
"text": "text to import"
}
```
#### Expected Success Response
**HTTP Status Code** : `200`
```json
{
"uuid": "0c3d7b34-936e-4f01-9cdf-2070184b6016"
}
```
#### Expected Fail Response
**HTTP Status Code** : `400`
```json
{"status": "error", "reason": "Malformed JSON"}
{"status": "error", "reason": "No text supplied"}
{"status": "error", "reason": "Tags or Galaxy not enabled"}
{"status": "error", "reason": "Size exceeds default"}
```
## Import management

View file

@ -307,6 +307,15 @@ def get_all_tags():
res = {'tags': Tag.get_all_tags()}
return Response(json.dumps(res, indent=2, sort_keys=True), mimetype='application/json'), 200
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # TAGS # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@restApi.route("api/v1/add/tracker/term", methods=['POST'])
#@token_required('analyst')
def add_tracker_term():
data = request.get_json()
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #