mirror of
https://github.com/MISP/misp-galaxy.git
synced 2024-11-29 18:27:19 +00:00
Add [cluster] country code
This commit is contained in:
parent
c88253baea
commit
5d8dbf0d91
3 changed files with 1358 additions and 876 deletions
File diff suppressed because it is too large
Load diff
|
@ -5,6 +5,7 @@ import uuid
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
import pycountry
|
||||||
|
|
||||||
CLUSTER_PATH = '../../clusters'
|
CLUSTER_PATH = '../../clusters'
|
||||||
GALAXY_PATH = '../../galaxies'
|
GALAXY_PATH = '../../galaxies'
|
||||||
|
@ -12,6 +13,28 @@ GALAXY_NAME = 'intelligence-agencies'
|
||||||
UUID = "3ef969e7-96cd-4048-aa83-191ac457d0db"
|
UUID = "3ef969e7-96cd-4048-aa83-191ac457d0db"
|
||||||
WIKIPEDIA_URL = "https://en.wikipedia.org"
|
WIKIPEDIA_URL = "https://en.wikipedia.org"
|
||||||
|
|
||||||
|
COUNTRY_CODES = {
|
||||||
|
"Brunei": "BN",
|
||||||
|
"People's Republic of China": "CN",
|
||||||
|
"Democratic Republic of the Congo": "CD", # Note: This is for the Democratic Republic of the Congo, not to be confused with the Republic of the Congo (CG)
|
||||||
|
"Czech Republic": "CZ",
|
||||||
|
"Iran": "IR",
|
||||||
|
"Moldova": "MD", # Officially known as the Republic of Moldova
|
||||||
|
"North Korea": "KP", # Officially the Democratic People's Republic of Korea (DPRK)
|
||||||
|
"Palestine": "PS",
|
||||||
|
"Russia": "RU", # Officially the Russian Federation
|
||||||
|
"South Korea": "KR", # Officially the Republic of Korea (ROK)
|
||||||
|
"Syria": "SY", # Officially the Syrian Arab Republic
|
||||||
|
"Taiwan": "TW", # ISO code is assigned as "Taiwan, Province of China"
|
||||||
|
"Tanzania": "TZ", # Officially the United Republic of Tanzania
|
||||||
|
"Trinidad & Tobago": "TT",
|
||||||
|
"Turkey": "TR",
|
||||||
|
"Venezuela": "VE", # Officially the Bolivarian Republic of Venezuela
|
||||||
|
"Vietnam": "VN", # Officially the Socialist Republic of Vietnam
|
||||||
|
"European Union": None, # Not a country, no ISO code
|
||||||
|
"Shanghai Cooperation Organisation": None # Not a country, no ISO code
|
||||||
|
}
|
||||||
|
|
||||||
def get_UUIDs():
|
def get_UUIDs():
|
||||||
if f"{GALAXY_NAME}.json" in os.listdir(CLUSTER_PATH):
|
if f"{GALAXY_NAME}.json" in os.listdir(CLUSTER_PATH):
|
||||||
uuids = {}
|
uuids = {}
|
||||||
|
@ -85,10 +108,23 @@ def get_agencies_from_country(heading, current_country, uuids):
|
||||||
for content in contents:
|
for content in contents:
|
||||||
agency_names = get_notes_on_lower_level(content)
|
agency_names = get_notes_on_lower_level(content)
|
||||||
for name, links, description, synonyms in agency_names:
|
for name, links, description, synonyms in agency_names:
|
||||||
if uuids and name in uuids:
|
country_code = pycountry.countries.get(name=current_country)
|
||||||
agencies.append(IntelAgency(value=name, uuid=uuids[name], meta=Meta(country=current_country, refs=[links], synonyms=[synonyms]), description=description))
|
|
||||||
|
# Set country
|
||||||
|
country_name = current_country
|
||||||
|
|
||||||
|
if country_code:
|
||||||
|
country_code = country_code.alpha_2
|
||||||
else:
|
else:
|
||||||
agencies.append(IntelAgency(value=name, meta=Meta(country=current_country, refs=[links], synonyms=[synonyms]), uuid=str(uuid.uuid4()), description=description))
|
country_code = COUNTRY_CODES.get(current_country)
|
||||||
|
|
||||||
|
if current_country in ["European Union", "Shanghai Cooperation Organisation"]: # Not a country
|
||||||
|
country_name = None
|
||||||
|
|
||||||
|
if uuids and name in uuids:
|
||||||
|
agencies.append(IntelAgency(value=name, uuid=uuids[name], meta=Meta(country=country_code, country_name=country_name, refs=[links], synonyms=[synonyms]), description=description))
|
||||||
|
else:
|
||||||
|
agencies.append(IntelAgency(value=name, meta=Meta(country=country_code, country_name=country_name, refs=[links], synonyms=[synonyms]), uuid=str(uuid.uuid4()), description=description))
|
||||||
|
|
||||||
return agencies
|
return agencies
|
||||||
|
|
||||||
|
@ -110,7 +146,6 @@ if __name__ == '__main__':
|
||||||
wiki = WikipediaAPI()
|
wiki = WikipediaAPI()
|
||||||
page_title = 'List of intelligence agencies'
|
page_title = 'List of intelligence agencies'
|
||||||
content = wiki.get_page_html(page_title)
|
content = wiki.get_page_html(page_title)
|
||||||
# print(content)
|
|
||||||
uuids = get_UUIDs()
|
uuids = get_UUIDs()
|
||||||
if content and uuids:
|
if content and uuids:
|
||||||
agencies = extract_info(content, uuids)
|
agencies = extract_info(content, uuids)
|
||||||
|
|
|
@ -3,10 +3,29 @@ import json
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Meta:
|
class Meta:
|
||||||
country: str = ""
|
country: str = None
|
||||||
|
country_name: str = None
|
||||||
refs: list = field(default_factory=list)
|
refs: list = field(default_factory=list)
|
||||||
synonyms: list = field(default_factory=list)
|
synonyms: list = field(default_factory=list)
|
||||||
|
|
||||||
|
# def custom_asdict(obj):
|
||||||
|
# if is_dataclass(obj):
|
||||||
|
# result = {}
|
||||||
|
# for field_name, field_def in obj.__dataclass_fields__.items():
|
||||||
|
# value = getattr(obj, field_name)
|
||||||
|
# if field_name == 'meta':
|
||||||
|
# meta_value = custom_asdict(value)
|
||||||
|
# meta_value = {k: v for k, v in meta_value.items() if not (k in ['refs', 'synonyms'] and (not v or all(e is None for e in v)))}
|
||||||
|
# value = meta_value
|
||||||
|
# elif isinstance(value, (list, tuple)) and all(is_dataclass(i) for i in value):
|
||||||
|
# value = [custom_asdict(i) for i in value]
|
||||||
|
# elif isinstance(value, list) and all(e is None for e in value):
|
||||||
|
# continue
|
||||||
|
# result[field_name] = value
|
||||||
|
# return result
|
||||||
|
# else:
|
||||||
|
# return obj
|
||||||
|
|
||||||
def custom_asdict(obj):
|
def custom_asdict(obj):
|
||||||
if is_dataclass(obj):
|
if is_dataclass(obj):
|
||||||
result = {}
|
result = {}
|
||||||
|
@ -14,18 +33,21 @@ def custom_asdict(obj):
|
||||||
value = getattr(obj, field_name)
|
value = getattr(obj, field_name)
|
||||||
if field_name == 'meta':
|
if field_name == 'meta':
|
||||||
meta_value = custom_asdict(value)
|
meta_value = custom_asdict(value)
|
||||||
meta_value = {k: v for k, v in meta_value.items() if not (k in ['refs', 'synonyms'] and (not v or all(e is None for e in v)))}
|
# Filter out 'refs', 'synonyms', 'country', and 'country_name' if they are None or if 'refs' and 'synonyms' are empty
|
||||||
|
meta_value = {k: v for k, v in meta_value.items() if v is not None and not (k in ['refs', 'synonyms'] and (not v or all(e is None for e in v)))}
|
||||||
value = meta_value
|
value = meta_value
|
||||||
elif isinstance(value, (list, tuple)) and all(is_dataclass(i) for i in value):
|
elif isinstance(value, (list, tuple)) and all(is_dataclass(i) for i in value):
|
||||||
value = [custom_asdict(i) for i in value]
|
value = [custom_asdict(i) for i in value]
|
||||||
elif isinstance(value, list) and all(e is None for e in value):
|
elif isinstance(value, list) and all(e is None for e in value):
|
||||||
continue
|
continue
|
||||||
|
# Skip the field if the value is None (specifically for 'country' and 'country_name')
|
||||||
|
if value is None and field_name in ['country', 'country_name']:
|
||||||
|
continue
|
||||||
result[field_name] = value
|
result[field_name] = value
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class IntelAgency:
|
class IntelAgency:
|
||||||
description: str = ""
|
description: str = ""
|
||||||
|
|
Loading…
Reference in a new issue