mirror of
https://github.com/MISP/misp-galaxy.git
synced 2024-11-29 18:27:19 +00:00
Add [intel-agencies] build script
This commit is contained in:
parent
9ee41f0f14
commit
0d26334448
4 changed files with 218 additions and 0 deletions
98
tools/WikipediaAPI/main.py
Normal file
98
tools/WikipediaAPI/main.py
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
from modules.api import WikipediaAPI
|
||||||
|
from modules.intel import IntelAgency, Meta, Galaxy, Cluster
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
CLUSTER_PATH = '../../clusters'
|
||||||
|
GALAXY_PATH = '../../galaxies'
|
||||||
|
GALAXY_NAME = 'intelligence-agencies'
|
||||||
|
UUID = str(uuid.uuid4())
|
||||||
|
|
||||||
|
def get_UUIDs():
|
||||||
|
if GALAXY_NAME in os.listdir(CLUSTER_PATH):
|
||||||
|
uuids = {}
|
||||||
|
with open(os.path.join(CLUSTER_PATH, GALAXY_NAME)) as fr:
|
||||||
|
galaxy_json = json.load(fr)
|
||||||
|
for cluster in galaxy_json["values"]:
|
||||||
|
uuids[cluster["value"]] = cluster["uuid"]
|
||||||
|
return uuids
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_notes_on_lower_level(content):
|
||||||
|
notes = []
|
||||||
|
for li in content.find_all('li', recursive=False):
|
||||||
|
if li.find('ul'):
|
||||||
|
notes.extend(get_notes_on_lower_level(li.find('ul')))
|
||||||
|
else:
|
||||||
|
notes.append(li.text)
|
||||||
|
return notes
|
||||||
|
|
||||||
|
def get_agencies_from_country(heading, current_country, uuids):
|
||||||
|
agencies = []
|
||||||
|
content = heading.find_next('ul')
|
||||||
|
agency_names = get_notes_on_lower_level(content)
|
||||||
|
for name in agency_names:
|
||||||
|
if uuids and name in uuids:
|
||||||
|
agencies.append(IntelAgency(value=name, uuid=uuids[name], meta=Meta(country=current_country)))
|
||||||
|
else:
|
||||||
|
agencies.append(IntelAgency(value=name, meta=Meta(country=current_country), uuid=str(uuid.uuid4())))
|
||||||
|
return agencies
|
||||||
|
|
||||||
|
def extract_info(content, uuids):
|
||||||
|
IGNORE = ["See also", "References", "External links", "Further reading"]
|
||||||
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
|
agencies = []
|
||||||
|
current_country = None
|
||||||
|
for h2 in soup.find_all('h2'):
|
||||||
|
span = h2.find('span', {'class': 'mw-headline'})
|
||||||
|
if span and span.text not in IGNORE:
|
||||||
|
current_country = span.text.strip()
|
||||||
|
agencies.extend(get_agencies_from_country(h2, current_country, uuids))
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
return agencies
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
wiki = WikipediaAPI()
|
||||||
|
page_title = 'List of intelligence agencies'
|
||||||
|
content = wiki.get_page_html(page_title)
|
||||||
|
uuids = get_UUIDs()
|
||||||
|
if content and uuids:
|
||||||
|
agencies = extract_info(content, uuids)
|
||||||
|
elif not uuids:
|
||||||
|
print(f'No UUIDs found for {GALAXY_NAME}')
|
||||||
|
agencies = extract_info(content, None)
|
||||||
|
else:
|
||||||
|
print(f'Error: {content}')
|
||||||
|
|
||||||
|
# Write to files
|
||||||
|
galaxy = Galaxy(
|
||||||
|
description="List of intelligence agencies",
|
||||||
|
icon="ninja",
|
||||||
|
name="intelligence-agencies",
|
||||||
|
namespace="intelligence-agency",
|
||||||
|
type="intelligence-agency",
|
||||||
|
uuid=UUID,
|
||||||
|
version=1,
|
||||||
|
)
|
||||||
|
galaxy.save_to_file(os.path.join(GALAXY_PATH, f'{GALAXY_NAME}.json'))
|
||||||
|
|
||||||
|
cluster = Cluster(
|
||||||
|
authors="Wikipedia",
|
||||||
|
category="Intelligence Agencies",
|
||||||
|
description="List of intelligence agencies",
|
||||||
|
name="intelligence-agencies",
|
||||||
|
source="https://en.wikipedia.org/wiki/List_of_intelligence_agencies",
|
||||||
|
type="intelligence-agency",
|
||||||
|
uuid=UUID,
|
||||||
|
version=1,
|
||||||
|
)
|
||||||
|
for agency in agencies:
|
||||||
|
cluster.add_value(agency)
|
||||||
|
print(cluster.values)
|
||||||
|
print(cluster.uuid)
|
||||||
|
cluster.save_to_file(os.path.join(CLUSTER_PATH, f'{GALAXY_NAME}.json'))
|
0
tools/WikipediaAPI/modules/__init__.py
Normal file
0
tools/WikipediaAPI/modules/__init__.py
Normal file
56
tools/WikipediaAPI/modules/api.py
Normal file
56
tools/WikipediaAPI/modules/api.py
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
import requests
|
||||||
|
|
||||||
|
class WikipediaAPI():
|
||||||
|
def __init__(self):
|
||||||
|
self.base_url = 'https://en.wikipedia.org/w/api.php'
|
||||||
|
|
||||||
|
def get_page_summary(self, page_title):
|
||||||
|
params = {
|
||||||
|
'action': 'query',
|
||||||
|
'format': 'json',
|
||||||
|
'titles': page_title,
|
||||||
|
'prop': 'extracts',
|
||||||
|
'explaintext': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.get(self.base_url, params=params)
|
||||||
|
data = response.json()
|
||||||
|
page_id = next(iter(data['query']['pages']))
|
||||||
|
return data['query']['pages'][page_id]['extract']
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Error: {e}')
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_page_content(self, page_title):
|
||||||
|
params = {
|
||||||
|
'action': 'query',
|
||||||
|
'format': 'json',
|
||||||
|
'titles': page_title,
|
||||||
|
'prop': 'revisions',
|
||||||
|
'rvprop': 'content',
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.get(self.base_url, params=params)
|
||||||
|
data = response.json()
|
||||||
|
page_id = next(iter(data['query']['pages']))
|
||||||
|
return data['query']['pages'][page_id]['revisions'][0]['*']
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Error: {e}')
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_page_html(self, page_title):
|
||||||
|
params = {
|
||||||
|
'action': 'parse',
|
||||||
|
'format': 'json',
|
||||||
|
'page': page_title,
|
||||||
|
'prop': 'text',
|
||||||
|
'disableeditsection': True,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.get(self.base_url, params=params)
|
||||||
|
data = response.json()
|
||||||
|
return data['parse']['text']['*']
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Error: {e}')
|
||||||
|
return None
|
64
tools/WikipediaAPI/modules/intel.py
Normal file
64
tools/WikipediaAPI/modules/intel.py
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
import json
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Meta:
|
||||||
|
country: str = ""
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class IntelAgency:
|
||||||
|
description: str = ""
|
||||||
|
meta: Meta = field(default_factory=Meta)
|
||||||
|
related: list = field(default_factory=list)
|
||||||
|
uuid: str = None
|
||||||
|
value: str = None
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if not self.value:
|
||||||
|
raise ValueError("IntelAgency 'value' cannot be empty.")
|
||||||
|
if not self.uuid:
|
||||||
|
raise ValueError("IntelAgency 'uuid' cannot be empty.")
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Galaxy:
|
||||||
|
description: str
|
||||||
|
icon: str
|
||||||
|
name: str
|
||||||
|
namespace: str
|
||||||
|
type: str
|
||||||
|
uuid: str
|
||||||
|
version: int
|
||||||
|
|
||||||
|
def save_to_file(self, path: str):
|
||||||
|
with open(path, "w") as file:
|
||||||
|
file.write(json.dumps(asdict(self), indent=4))
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Cluster():
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
authors: str,
|
||||||
|
category: str,
|
||||||
|
description: str,
|
||||||
|
name: str,
|
||||||
|
source: str,
|
||||||
|
type: str,
|
||||||
|
uuid: str,
|
||||||
|
version: int,
|
||||||
|
):
|
||||||
|
self.authors = authors
|
||||||
|
self.category = category
|
||||||
|
self.description = description
|
||||||
|
self.name = name
|
||||||
|
self.source = source
|
||||||
|
self.type = type
|
||||||
|
self.uuid = uuid
|
||||||
|
self.version = version
|
||||||
|
self.values = []
|
||||||
|
|
||||||
|
def add_value(self, value: IntelAgency):
|
||||||
|
self.values.append(value)
|
||||||
|
|
||||||
|
def save_to_file(self, path: str):
|
||||||
|
with open(path, "w") as file:
|
||||||
|
file.write(json.dumps(asdict(self), indent=4))
|
Loading…
Reference in a new issue