chg: [API] get domain min metadata (first up, last up) + get crawled domain by daterange and status

This commit is contained in:
Terrtia 2019-12-16 14:31:31 +01:00
parent 1f97159413
commit 28ece38d82
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
5 changed files with 224 additions and 14 deletions

View file

@ -16,6 +16,7 @@ import random
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Cryptocurrency
from Pgp import pgp
import Date
import Decoded
import Item
import Tag
@ -30,6 +31,35 @@ config_loader = ConfigLoader.ConfigLoader()
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
######## DB KEYS ########
def get_db_keys_domain_up(domain_type, date_type): # sanitise domain_type
# get key name
if date_type=='day':
key_value = "{}_up:".format(domain_type)
key_value += "{}"
elif date_type=='month':
key_value = "month_{}_up:".format(domain_type)
key_value += "{}"
else:
key_value = None
return key_value
def get_list_db_keys_domain_up(domain_type, l_dates, date_type):
l_keys_name = []
key_name = get_db_keys_domain_up(domain_type, date_type)
if key_name:
for str_date in l_dates:
l_keys_name.append(key_name.format(str_date))
return l_keys_name
######## UTIL ########
def sanitize_domain_type(domain_type):
if domain_type in ['onion', 'regular']:
return domain_type
else:
return 'regular'
######## DOMAINS ########
def get_all_domains_up(domain_type):
'''
@ -41,7 +71,7 @@ def get_all_domains_up(domain_type):
:return: list of domain
:rtype: list
'''
return list(r_serv_onion.smembers("full_onion_up"))
return list(r_serv_onion.smembers("full_{}_up".format(domain_type)))
def get_domains_up_by_month(date_year_month, domain_type, rlist=False):
'''
@ -53,7 +83,7 @@ def get_domains_up_by_month(date_year_month, domain_type, rlist=False):
:return: list of domain
:rtype: list
'''
res = r_serv_onion.smembers("month_onion_up:{}".format(date_year_month))
res = r_serv_onion.smembers( get_db_keys_domain_up(domain_type, "month").format(date_year_month) )
if rlist:
return list(res)
else:
@ -69,12 +99,33 @@ def get_domain_up_by_day(date_year_month, domain_type, rlist=False):
:return: list of domain
:rtype: list
'''
res = r_serv_onion.smembers("onion_up:{}".format(date_year_month))
res = r_serv_onion.smembers(get_db_keys_domain_up(domain_type, "day").format(date_year_month))
if rlist:
return list(res)
else:
return res
def get_domains_up_by_daterange(date_from, date_to, domain_type):
'''
Get all domain up (at least one time) by daterange
:param domain_type: date YYYYMMDD
:type domain_type: str
:return: list of domain
:rtype: list
'''
days_list, month_list = Date.get_date_range_full_month_and_days(date_from, date_to)
l_keys_name = get_list_db_keys_domain_up(domain_type, days_list, 'day')
l_keys_name.extend(get_list_db_keys_domain_up(domain_type, month_list, 'month'))
if len(l_keys_name) > 1:
domains_up = list(r_serv_onion.sunion(l_keys_name[0], *l_keys_name[1:]))
elif l_keys_name:
domains_up = list(r_serv_onion.smembers(l_keys_name[0]))
else:
domains_up = []
return domains_up
######## DOMAIN ########
@ -465,7 +516,10 @@ def api_get_domain_up_range(domain, domain_type=None):
res['domain'] = domain
return res, 200
def api_get_domains_by_status_daterange(date_from, date_to, domain_type):
sanitize_domain_type(domain_type)
res = {'domains': get_domains_up_by_daterange(date_from, date_to, domain_type)}
return res, 200
## CLASS ##
class Domain(object):

View file

@ -24,13 +24,15 @@ def get_date_range_full_month_and_days(date_from, date_to):
full_month = get_full_month_str(date_from, date_to)
# request at least one month
if full_month:
day_list = substract_date(date_from.strftime('%Y%m%d'), full_month[0].strftime('%Y%m%d'))
# remove last day (day in full moth)
if day_list:
day_list = day_list[:-1]
print(day_list)
day_list.extend(substract_date( (full_month[-1] + relativedelta(months=+1) ).strftime('%Y%m%d'), date_to.strftime('%Y%m%d')))
print(day_list)
else:
day_list = substract_date(date_from.strftime('%Y%m%d'), date_to.strftime('%Y%m%d'))
full_month = [dt_month.strftime('%Y%m') for dt_month in full_month]
return day_list, full_month

View file

@ -972,8 +972,135 @@ curl https://127.0.0.1:7000/api/v1/get/tracker/item --header "Authorization: iHc
## Domain
### Get min domain metadata: `api/v1/get/crawled/domain/list`<a name="get_crawled_domain_list"></a>
#### Description
Get crawled domain by date-range and status (default status = *UP*)
**Method** : `POST`
#### Parameters
- `domain_type`
- domain type: *onion* or *regular*
- *str*
- default: *regular*
- `date_from`
- date from
- *str - YYYYMMDD*
- `date_to`
- date to
- *str - YYYYMMDD*
#### JSON response
- `domain_type`
- domain type: *onion* or *regular*
- *str*
- `date_from`
- date from
- *str - YYYYMMDD*
- `date_to`
- date to
- *str - YYYYMMDD*
- `domains`
- list of domains
- *list - list of domains*
#### Example
```
curl https://127.0.0.1:7000/api/v1/get/crawled/domain/list --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST
```
#### input.json Example
```json
{
"date_from": "20191001",
"date_to": "20191222",
"domain_type": "onion"
}
```
#### Expected Success Response
**HTTP Status Code** : `200`
```json
{
"date_from": "20191001",
"date_to": "20191222",
"domain_status": "UP",
"domain_type": "onion",
"domains": [
"2222222222222222.onion"
]
}
```
**HTTP Status Code** : `404`
```json
({"status": "error", "reason": "Domain not found"}
```
### Get min domain metadata: `api/v1/get/domain/metadata/minimal`<a name="get_domain_metadata_minimal"></a>
#### Description
Get min domain metadata
**Method** : `POST`
#### Parameters
- `domain`
- domain name
- *str*
- mandatory
#### JSON response
- `domain`
- domain
- *str*
- `first_seen`
- domain first up time
- *epoch*
- `last_seen`
- domain last up time
- *epoch*
#### Example
```
curl https://127.0.0.1:7000/api/v1/get/domain/metadata/minimal --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST
```
#### input.json Example
```json
{
"domain": "2222222222222222.onion",
}
```
#### Expected Success Response
**HTTP Status Code** : `200`
```json
{
"domain": "2222222222222222.onion",
"first_seen": 1571314000,
"last_seen": 1571314000
}
```
**HTTP Status Code** : `404`
```json
({"status": "error", "reason": "Domain not found"}
```
## Import management

View file

@ -140,6 +140,12 @@ def authErrors(user_role):
def create_json_response(data_dict, response_code):
return Response(json.dumps(data_dict, indent=2, sort_keys=True), mimetype='application/json'), int(response_code)
def get_mandatory_fields(json_data, required_fields):
for field in required_fields:
if field not in json_data:
return {'status': 'error', 'reason': 'mandatory field: {} not provided'.format(field)}, 400
return None
# ============ FUNCTIONS ============
def is_valid_uuid_v4(header_uuid):
@ -472,8 +478,29 @@ def get_domain_metadata_minimal():
res = Domain.api_verify_if_domain_exist(domain)
if res:
return create_json_response(res[0], res[1])
res = Domain.get_domain_metadata_basic(domain)
return create_json_response(res, 200)
res = Domain.api_get_domain_up_range(domain)
res[0]['domain'] = domain
return create_json_response(res[0], res[1])
@restApi.route("api/v1/get/domain/list", methods=['POST'])
@token_required('analyst')
def get_domain_list():
data = request.get_json()
res = get_mandatory_fields(data, ['date_from', 'date_to'])
if res:
return create_json_response(res[0], res[1])
date_from = data.get('date_from', None)
date_to = data.get('date_to', None)
domain_type = data.get('domain_type', None)
domain_status = 'UP'
res = Domain.api_get_domains_by_status_daterange(date_from, date_to, domain_type)
dict_res = res[0]
dict_res['date_from'] = date_from
dict_res['date_to'] = date_to
dict_res['domain_status'] = domain_status
dict_res['domain_type'] = domain_type
return create_json_response(dict_res, res[1])
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # #

View file

@ -452,7 +452,7 @@ if (d.popover) {
if (data["tags"]) {
data["tags"].forEach(function(tag) {
desc = desc + "<span class=\"badge badge-primary\">"+ sanitize_text(tag) +"</span>";
desc = desc + "<span class=\"badge badge-warning\">"+ sanitize_text(tag) +"</span>";
});
}