Refactor [creation] script

This commit is contained in:
niclas 2024-02-21 16:24:48 +01:00
parent 059de052ad
commit 108e43e1ca
10 changed files with 282 additions and 2 deletions

View file

View file

@ -84,8 +84,8 @@ if __name__ == "__main__":
galaxy = create_galaxy() galaxy = create_galaxy()
cluster = create_cluster(galaxy, data) cluster = create_cluster(galaxy, data)
with open(GALAXY_PATH + "tidal-threat-group.json", "w") as galaxy_file: with open(GALAXY_PATH + "tidal-threat-groups.json", "w") as galaxy_file:
json.dump(galaxy, galaxy_file, indent=4) json.dump(galaxy, galaxy_file, indent=4)
with open(CLUSTER_PATH + "tidal-threat-group.json", "w") as cluster_file: with open(CLUSTER_PATH + "tidal-threat-groups.json", "w") as cluster_file:
json.dump(cluster, cluster_file, indent=4) json.dump(cluster, cluster_file, indent=4)

View file

@ -1,5 +1,6 @@
from api import TidalAPI from api import TidalAPI
import json import json
import re
VERSION = 1 VERSION = 1
GALAXY_PATH = "../../galaxies/" GALAXY_PATH = "../../galaxies/"
@ -38,6 +39,7 @@ def create_cluster(galaxy, data):
value["description"] = software["description"] value["description"] = software["description"]
# Metadata fields # Metadata fields
links = extract_links(software["description"])
source = software["source"] source = software["source"]
type = software["type"] type = software["type"]
software_attack_id = software["software_attack_id"] software_attack_id = software["software_attack_id"]
@ -46,6 +48,8 @@ def create_cluster(galaxy, data):
owner = software["owner_name"] owner = software["owner_name"]
value["meta"] = {} value["meta"] = {}
if links:
value["meta"]["refs"] = list(links)
if source: if source:
value["meta"]["source"] = source value["meta"]["source"] = source
if type: if type:
@ -74,6 +78,21 @@ def create_cluster(galaxy, data):
cluster["values"] = values cluster["values"] = values
return cluster return cluster
def extract_links(text):
# extract markdown links and return text without links and the links
# urls = re.findall(r'https?://[^\s\)]+', text)
regular_links = re.findall(r'\[([^\]]+)\]\((https?://[^\s\)]+)\)', text)
# sup_links = re.findall(r'<sup>\[\[([^\]]+)\]\((https?://[^\s\)]+)\)\]</sup>', text)
# Extracting URLs from the tuples
regular_links_urls = set([url for text, url in regular_links])
# sup_links_urls = [url for text, url in sup_links]
# text_without_links = re.sub(r'\[([^\]]+)\]\(https?://[^\s\)]+\)', r'\1', text)
# text_without_sup = re.sub(r'<sup>.*<\/sup>', '', text_without_links)
return regular_links_urls
if __name__ == "__main__": if __name__ == "__main__":
api = TidalAPI() api = TidalAPI()
data = api.get_data('software') data = api.get_data('software')

218
tools/tidal-api/main.py Normal file
View file

@ -0,0 +1,218 @@
from api.api import TidalAPI
from models.galaxy import Galaxy
from models.cluster import Cluster
from utils.extractor import extract_links
import argparse
CLUSTER_PATH = "../../clusters/"
GALAXY_PATH = "../../galaxies/"
UUIDS = {
"software": "38d62d8b-4c49-489a-9bc4-8e294c4f04f7",
"groups": "41c3e5c0-de5c-4edb-b48b-48cd8e7519e6",
"campaigns": "43a8fce6-08d3-46c2-957d-53606efe2c48",
}
GALAXY_CONFIGS = {
"software": {
"name": "Tidal Software",
"namespace": "tidal",
"description": "Tidal Software Galaxy",
"type": "software",
"uuid": UUIDS["software"],
},
"groups": {
"name": "Tidal Groups",
"namespace": "tidal",
"description": "Tidal Groups Galaxy",
"type": "groups",
"uuid": UUIDS["groups"],
},
"campaigns": {
"name": "Tidal Campaigns",
"namespace": "tidal",
"description": "Tidal Campaigns Galaxy",
"type": "campaigns",
"uuid": UUIDS["campaigns"],
}
}
CLUSTER_CONFIGS = {
"software": {
"authors": "Tidal",
"category": "Software",
"description": "Tidal Software Cluster",
"name": "Tidal Software",
"source": "Tidal",
"type": "software",
"uuid": UUIDS["software"],
"values": []
},
"groups": {
"authors": "Tidal",
"category": "Threat Groups",
"description": "Tidal Threat Groups Cluster",
"name": "Tidal Threat Groups",
"source": "Tidal",
"type": "groups",
"uuid": UUIDS["groups"],
"values": []
},
"campaigns": {
"authors": "Tidal",
"category": "Campaigns",
"description": "Tidal Campaigns Cluster",
"name": "Tidal Campaigns",
"source": "Tidal",
"type": "campaigns",
"uuid": UUIDS["campaigns"],
"values": []
}
}
VALUE_FIELDS = {
"software": {
"description": "description",
"meta": {
"source": "source",
"type": "type",
"software-attack-id": "software_attack_id",
"platforms": "platforms",
"tags": "tags",
"owner": "owner_name"
},
"related": {
"groups": {
"dest-uuid": "group_id",
"type": "used-by"
},
"associated_software": {
"dest-uuid": "id",
"type": "related-to"
}
},
"uuid": "id",
"value": "name"
},
"groups": {
"description": "description",
"meta": {
"source": "source",
"group-attack-id": "group_attack_id",
"country": {"extract": "single", "key": "country", "subkey": "country_code"},
"observed_country": {"extract": "multiple", "key": "observed_country", "subkey": "country_code"},
"observed_motivation": {"extract": "multiple", "key": "observed_motivation", "subkey": "name"},
"target-category": {"extract": "multiple", "key": "observed_sector", "subkey": "name"},
"tags": "tags",
"owner": "owner_name"
},
"related": {
"associated_groups": {
"dest-uuid": "id",
"type": "related-to"
}
},
"uuid": "id",
"value": "name"
},
"campaigns": {
"description": "description",
"meta": {
"source": "source",
"campaign-attack-id": "campaign_attack_id",
"first_seen": "first_seen",
"last_seen": "last_seen",
"tags": "tags",
"owner": "owner_name"
},
"related": {},
"uuid": "id",
"value": "name"
}
}
def create_cluster_values(data, cluster):
value_fields = VALUE_FIELDS[cluster.internal_type]
for entry in data["data"]:
values = {}
for key, value in value_fields.items():
match key:
case "description":
values[value] = entry.get(key)
case "meta":
metadata = create_metadata(entry, value)
values["meta"] = metadata
case "related":
relations = create_relations(entry, value)
values["related"] = relations
case "uuid":
values[key] = entry.get(value)
case "value":
values[key] = entry.get(value)
case _:
print(f"Error: Invalid configuration for {key} in {cluster.internal_type} value fields.")
cluster.add_value(values)
def create_metadata(data, format):
metadata = {}
for meta_key, meta_value in format.items():
if isinstance(meta_value, dict):
if meta_value.get("extract") == "single" and data.get(meta_value["key"]):
metadata[meta_key] = data.get(meta_value["key"])[0].get(meta_value["subkey"])
elif meta_value.get("extract") == "multiple" and data.get(meta_value["key"]):
metadata[meta_key] = [entry.get(meta_value["subkey"]) for entry in data.get(meta_value["key"])]
elif data.get(meta_value):
metadata[meta_key] = data.get(meta_value)
return metadata
def create_relations(data, format):
relations = []
for i in range(len(list(format))):
for relation in data[list(format)[i]]:
relation_entry = {}
for relation_key, relation_value in list(format.values())[i].items():
if relation_key != "type":
relation_entry[relation_key] = relation.get(relation_value)
else:
relation_entry[relation_key] = relation_value
relations.append(relation_entry)
return relations
def create_galaxy_and_cluster(galaxy_type, version):
api = TidalAPI()
galaxy = Galaxy(**GALAXY_CONFIGS[galaxy_type], version=version)
galaxy.save_to_file(f"{GALAXY_PATH}/tidal-{galaxy_type}.json")
cluster = Cluster(**CLUSTER_CONFIGS[galaxy_type], internal_type=galaxy_type)
data = api.get_data(galaxy_type)
create_cluster_values(data, cluster)
cluster.save_to_file(f"{CLUSTER_PATH}/tidal-{galaxy_type}.json")
print(f"Galaxy {galaxy_type} created")
def create_galaxy(args):
if args.all:
for galaxy_type in GALAXY_CONFIGS:
create_galaxy_and_cluster(galaxy_type, args.version)
else:
create_galaxy_and_cluster(args.type, args.version)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Create a galaxy and cluster for Tidal API")
subparsers = parser.add_subparsers(dest="command")
galaxy_parser = subparsers.add_parser("create_galaxy", help="Create a galaxy from the Tidal API")
galaxy_parser.add_argument("--type", choices=list(GALAXY_CONFIGS.keys()) + ['all'], help="The type of the galaxy")
galaxy_parser.add_argument("-v", "--version", type=int, required=True, help="The version of the galaxy")
galaxy_parser.add_argument("--all", action="store_true", help="Flag to create all predefined galaxy types")
galaxy_parser.set_defaults(func=create_galaxy)
args = parser.parse_args()
if hasattr(args, 'func'):
args.func(args)
else:
parser.print_help()

View file

View file

@ -0,0 +1,23 @@
import json
class Cluster():
def __init__(self, authors: str, category: str, description: str, name: str, source: str, type: str, uuid: str, values: list, internal_type: str):
self.authors = authors
self.category = category
self.description = description
self.name = name
self.source = source
self.type = type
self.uuid = uuid
self.values = values
self.internal_type = internal_type
def add_value(self, value):
self.values.append(value)
def save_to_file(self, path):
with open(path, "w") as file:
file.write(json.dumps(self.__dict__, indent=4))
def get_config(self):
return self.__dict__

View file

@ -0,0 +1,14 @@
import json
class Galaxy():
def __init__(self, description, name, namespace, type, uuid, version):
self.description = description
self.name = name
self.namespace = namespace
self.type = type
self.uuid = uuid
self.version = version
def save_to_file(self, path):
with open(path, "w") as file:
file.write(json.dumps(self.__dict__, indent=4))

View file

View file

@ -0,0 +1,6 @@
import re
def extract_links(text):
links = re.findall(r'\[([^\]]+)\]\((https?://[^\s\)]+)\)', text)
urls = set([url for text, url in links])
return urls