mirror of
https://github.com/ail-project/ail-framework.git
synced 2025-01-18 16:36:13 +00:00
chg: [term] refractor + add new tracked word/set
This commit is contained in:
parent
32efbfa019
commit
28320a32a6
5 changed files with 420 additions and 0 deletions
59
OVERVIEW.md
59
OVERVIEW.md
|
@ -109,8 +109,56 @@ Redis and ARDB overview
|
|||
| **uuid**:ltags | **tag** |
|
||||
| **uuid**:ltagsgalaxies | **tag** |
|
||||
|
||||
## DB2 - New TermFreq:
|
||||
|
||||
##### Term Tracker metadata:
|
||||
| Hset - Key | Field | Value |
|
||||
| ------ | ------ | ------ |
|
||||
| tracked_term:**uuid** | tracked | **tacked word/set/regex** |
|
||||
| | type | **term/set/regex** |
|
||||
| | date | **date added** |
|
||||
| | user_id | **created by user_id** |
|
||||
| | dashboard | **0/1 Display alert on dashboard** |
|
||||
| | level | **0/1 Tracker visibility** |
|
||||
|
||||
##### Term Tracked by user_id (visibility level: user only):
|
||||
| Set - Key | Value |
|
||||
| ------ | ------ |
|
||||
| user:tracked_term:**user_id** | **uuid - tracked term uuid** |
|
||||
|
||||
##### Global Term Tracked (visibility level: all users):
|
||||
| Set - Key | Value |
|
||||
| ------ | ------ |
|
||||
| gobal:tracked_term | **uuid - tracked term uuid** |
|
||||
|
||||
##### All Term Tracked by type:
|
||||
| Set - Key | Value |
|
||||
| ------ | ------ |
|
||||
| all:tracked_term:**word/set/regex - term type** | **tracked term** |
|
||||
|
||||
| Set - Key | Value |
|
||||
| ------ | ------ |
|
||||
| all:tracked_term_uuid:**tracked term** | **uuid - tracked term uuid** |
|
||||
|
||||
##### All Term Tracked items:
|
||||
| Set - Key | Value |
|
||||
| ------ | ------ |
|
||||
| tracked_term:item:**uuid** | **item_id** |
|
||||
|
||||
##### All Term Tracked tags:
|
||||
| Set - Key | Value |
|
||||
| ------ | ------ |
|
||||
| tracked_term:tags:**uuid** | **tag** |
|
||||
|
||||
##### All Term Tracked tags:
|
||||
| Set - Key | Value |
|
||||
| ------ | ------ |
|
||||
| tracked_term:mail:**uuid** | **mail** |
|
||||
|
||||
## DB2 - TermFreq:
|
||||
|
||||
##### Set:
|
||||
|
||||
##### Set:
|
||||
| Key | Value |
|
||||
| ------ | ------ |
|
||||
|
@ -118,6 +166,17 @@ Redis and ARDB overview
|
|||
| TrackedSetSet | **tracked_set** |
|
||||
| TrackedRegexSet | **tracked_regex** |
|
||||
| | |
|
||||
| | |
|
||||
| global:TrackedSetTermSet | **tracked_term** |
|
||||
| global:TrackedSetSet | **tracked_set** |
|
||||
| global:TrackedRegexSet | **tracked_regex** |
|
||||
| | |
|
||||
| | |
|
||||
| user:**user_id**:TrackedSetTermSet | **tracked_term** |
|
||||
| user:**user_id**:TrackedSetSet | **tracked_set** |
|
||||
| user:**user_id**:TrackedRegexSet | **tracked_regex** |
|
||||
| | |
|
||||
| | |
|
||||
| tracked_**tracked_term** | **item_path** |
|
||||
| set_**tracked_set** | **item_path** |
|
||||
| regex_**tracked_regex** | **item_path** |
|
||||
|
|
58
bin/TermTrackerMod.py
Executable file
58
bin/TermTrackerMod.py
Executable file
|
@ -0,0 +1,58 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
"""
|
||||
The TermTracker Module
|
||||
===================
|
||||
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
from packages import Paste
|
||||
from packages import Term
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
||||
import Flask_config
|
||||
|
||||
r_serv_term = Flask_config.r_serv_term
|
||||
|
||||
# loads tracked words
|
||||
list_tracked_words = Term.get_tracked_words_list()
|
||||
set_tracked_words_list = Term.get_set_tracked_words_list()
|
||||
|
||||
def new_term_found(term, term_type):
|
||||
uuid_list = get_term_uuid_list()
|
||||
email_notification = []
|
||||
tags = []
|
||||
|
||||
for term_uuid in uuid_list:
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz'
|
||||
#item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz'
|
||||
paste = Paste.Paste(item_id)
|
||||
res = Term.parse_tracked_term_to_add('test zorro meroio apple weert', 'word')
|
||||
|
||||
'''
|
||||
dict_words_freq = Term.get_text_word_frequency(paste.get_p_content())
|
||||
|
||||
# check solo words
|
||||
for word in list_tracked_words:
|
||||
if word in dict_words_freq:
|
||||
pass
|
||||
# tag + get uuids ...
|
||||
|
||||
# check words set
|
||||
for list_words, nb_words_threshold in set_tracked_words_list:
|
||||
nb_uniq_word = 0
|
||||
for word in list_words:
|
||||
if word in dict_words_freq:
|
||||
nb_uniq_word += 1
|
||||
if nb_uniq_word > nb_words_threshold:
|
||||
# tag + get uuid
|
||||
pass
|
||||
'''
|
206
bin/packages/Term.py
Executable file
206
bin/packages/Term.py
Executable file
|
@ -0,0 +1,206 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
import redis
|
||||
import datetime
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from nltk.tokenize import RegexpTokenizer
|
||||
from textblob import TextBlob
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
||||
import Flask_config
|
||||
|
||||
r_serv_term = Flask_config.r_serv_term
|
||||
|
||||
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
|
||||
special_characters.add('\\s')
|
||||
|
||||
# NLTK tokenizer
|
||||
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
|
||||
gaps=True, discard_empty=True)
|
||||
|
||||
def get_text_word_frequency(item_content, filtering=True):
|
||||
item_content = item_content.lower()
|
||||
words_dict = defaultdict(int)
|
||||
|
||||
if filtering:
|
||||
blob = TextBlob(item_content , tokenizer=tokenizer)
|
||||
else:
|
||||
blob = TextBlob(item_content)
|
||||
for word in blob.tokens:
|
||||
words_dict[word] += 1
|
||||
print(words_dict)
|
||||
return words_dict
|
||||
|
||||
# # TODO: create all tracked words
|
||||
def get_tracked_words_list():
|
||||
return list(r_serv_term.smembers('all:tracked_term:word'))
|
||||
|
||||
def get_set_tracked_words_list():
|
||||
set_list = r_serv_term.smembers('all:tracked_term:set')
|
||||
all_set_list = []
|
||||
for elem in set_list:
|
||||
elem = elem.split(';')
|
||||
num_words = int(elem[1])
|
||||
ter_set = elem[0].split(',')
|
||||
all_set_list.append((ter_set, num_words))
|
||||
|
||||
def parse_json_term_to_add(dict_input):
|
||||
term = dict_input.get('term', None)
|
||||
if not term:
|
||||
return ({"status": "error", "reason": "Term not provided"}, 400)
|
||||
term_type = dict_input.get('term', None)
|
||||
if not term_type:
|
||||
return ({"status": "error", "reason": "Term type not provided"}, 400)
|
||||
nb_words = dict_input.get('nb_words', 1)
|
||||
|
||||
res = parse_tracked_term_to_add(term , term_type, nb_words=nb_words)
|
||||
if res['status']=='error':
|
||||
return res
|
||||
|
||||
# get user_id
|
||||
tags = dict_input.get('tags', [])
|
||||
mails = dict_input.get('mails', [])
|
||||
## TODO: verify mail integrity
|
||||
|
||||
## TODO: add dashboard key
|
||||
level = dict_input.get('level', 1)
|
||||
try:
|
||||
level = int(level)
|
||||
if level not in range(0, 1):
|
||||
level = 1
|
||||
except:
|
||||
level = 1
|
||||
|
||||
term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails)
|
||||
|
||||
return ({'term': term, 'uuid': term_uuid}, 200)
|
||||
|
||||
|
||||
def parse_tracked_term_to_add(term , term_type, nb_words=1):
|
||||
|
||||
# todo verify regex format
|
||||
if term_type=='regex':
|
||||
# TODO: verify regex integrity
|
||||
pass
|
||||
elif term_type=='word' or term_type=='set':
|
||||
# force lowercase
|
||||
term = term.lower()
|
||||
word_set = set(term)
|
||||
set_inter = word_set.intersection(special_characters)
|
||||
if set_inter:
|
||||
return ({"status": "error", "reason": "special character not allowed", "message": "Please use a regex or remove all special characters"}, 400)
|
||||
words = term.split()
|
||||
# not a word
|
||||
if term_type=='word' and words:
|
||||
term_type = 'set'
|
||||
|
||||
# ouput format: term1,term2,term3;2
|
||||
if term_type=='set':
|
||||
try:
|
||||
nb_words = int(nb_words)
|
||||
except:
|
||||
nb_words = 1
|
||||
|
||||
words_set = set(words)
|
||||
words_set = sorted(words_set)
|
||||
term = ",".join(words_set)
|
||||
term = "{};{}".format(term, nb_words)
|
||||
|
||||
print(term)
|
||||
print(term_type)
|
||||
|
||||
return ({"status": "success", "term": term, "type": term_type}, 200)
|
||||
|
||||
else:
|
||||
return ({"status": "error", "reason": "Incorrect type"}, 400)
|
||||
|
||||
def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0):
|
||||
|
||||
term_uuid = str(uuid.uuid4())
|
||||
|
||||
# create metadata
|
||||
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'tracked',term)
|
||||
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'type', term_type)
|
||||
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'date', datetime.date.today().strftime("%Y%m%d"))
|
||||
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'user_id', user_id)
|
||||
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'level', level)
|
||||
r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'dashboard', dashboard)
|
||||
|
||||
# create all term set
|
||||
r_serv_term.sadd('all:tracked_term:{}'.format(term_type), term)
|
||||
|
||||
# create term - uuid map
|
||||
r_serv_term.sadd('all:tracked_term_uuid:{}'.format(term), term_uuid)
|
||||
|
||||
# add display level set
|
||||
if level == 0: # user only
|
||||
r_serv_term.sadd('user:tracked_term:{}'.format(user_id), term_uuid)
|
||||
elif level == 1: # global
|
||||
r_serv_term.sadd('gobal:tracked_term', term_uuid)
|
||||
|
||||
# create term tags list
|
||||
for tag in tags:
|
||||
r_serv_term.sadd('tracked_term:tags:{}'.format(term_uuid), tag)
|
||||
|
||||
# create term tags mail notification list
|
||||
for mail in mails:
|
||||
r_serv_term.sadd('tracked_term:mail:{}'.format(term_uuid), mail)
|
||||
|
||||
return term_uuid
|
||||
|
||||
def get_term_uuid_list(term):
|
||||
return list(r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term)))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def get_global_tracked_term():
|
||||
dict_tracked = {}
|
||||
tracked_set = list(r_serv_term.smembers('global:TrackedSetSet'))
|
||||
tracked_regex = list(r_serv_term.smembers('global:TrackedRegexSet'))
|
||||
tracked_terms = list(r_serv_term.smembers('global:TrackedSetTermSet'))
|
||||
return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex}
|
||||
|
||||
def get_user_tracked_term(user_id):
|
||||
dict_tracked = {}
|
||||
tracked_set = list(r_serv_term.smembers('user:{}:TrackedSetSet'.format(user_id)))
|
||||
tracked_regex = list(r_serv_term.smembers('user:{}:TrackedRegexSet').format(user_id))
|
||||
tracked_terms = list(r_serv_term.smembers('user:{}:TrackedSetTermSet').format(user_id))
|
||||
return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex}
|
|
@ -583,6 +583,94 @@ curl https://127.0.0.1:7000/api/v1/get/tag/metadata --header "Authorization: iHc
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
## Tracker
|
||||
|
||||
|
||||
|
||||
### Add term tracker: `api/v1/add/tracker/term`<a name="add_term_tracker"></a>
|
||||
|
||||
#### Description
|
||||
Add term tracker
|
||||
|
||||
**Method** : `POST`
|
||||
|
||||
#### Parameters
|
||||
- `term`
|
||||
- term to add
|
||||
- *str - word(s)*
|
||||
- default: `text`
|
||||
- `nb_words`
|
||||
- number of words in set
|
||||
- *int*
|
||||
- default: `1`
|
||||
- `type`
|
||||
- term type
|
||||
- *str*
|
||||
- mandatory: `word`, `set`, `regex`
|
||||
- `tags`
|
||||
- list of tags
|
||||
- *list*
|
||||
- default: `[]`
|
||||
- `mails`
|
||||
- list of mails to notify
|
||||
- *list*
|
||||
- default: `[]`
|
||||
- `level`
|
||||
- tracker visibility
|
||||
- *int - 0: user only, 1: all users*
|
||||
- default: `1`
|
||||
|
||||
#### JSON response
|
||||
- `uuid`
|
||||
- import uuid
|
||||
- *uuid4*
|
||||
|
||||
#### Example
|
||||
```
|
||||
curl https://127.0.0.1:7000/api/v1/import/item --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST
|
||||
```
|
||||
|
||||
#### input.json Example
|
||||
```json
|
||||
{
|
||||
"type": "text",
|
||||
"tags": [
|
||||
"infoleak:analyst-detection=\"private-key\""
|
||||
],
|
||||
"text": "text to import"
|
||||
}
|
||||
```
|
||||
|
||||
#### Expected Success Response
|
||||
**HTTP Status Code** : `200`
|
||||
|
||||
```json
|
||||
{
|
||||
"uuid": "0c3d7b34-936e-4f01-9cdf-2070184b6016"
|
||||
}
|
||||
```
|
||||
|
||||
#### Expected Fail Response
|
||||
**HTTP Status Code** : `400`
|
||||
|
||||
```json
|
||||
{"status": "error", "reason": "Malformed JSON"}
|
||||
{"status": "error", "reason": "No text supplied"}
|
||||
{"status": "error", "reason": "Tags or Galaxy not enabled"}
|
||||
{"status": "error", "reason": "Size exceeds default"}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
## Import management
|
||||
|
||||
|
||||
|
|
|
@ -307,6 +307,15 @@ def get_all_tags():
|
|||
res = {'tags': Tag.get_all_tags()}
|
||||
return Response(json.dumps(res, indent=2, sort_keys=True), mimetype='application/json'), 200
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # TAGS # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
@restApi.route("api/v1/add/tracker/term", methods=['POST'])
|
||||
#@token_required('analyst')
|
||||
def add_tracker_term():
|
||||
data = request.get_json()
|
||||
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
|
|
Loading…
Add table
Reference in a new issue