chg: [gen_csf] updated

This commit is contained in:
Alexandre Dulaunoy 2024-08-23 15:49:44 +02:00
parent 1882171086
commit a31ee53715
Signed by: adulau
GPG key ID: 09E2CD4944E6CBCD

View file

@ -35,7 +35,7 @@ galaxy = {
"description": "The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide", "description": "The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide",
"uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb", "uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb",
"version": 1, "version": 1,
"icon": 'user' "icon": 'user',
} }
cluster = { cluster = {
@ -47,7 +47,7 @@ cluster = {
"uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb", "uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb",
'source': 'https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1', 'source': 'https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1',
'values': [], 'values': [],
'version': 1 'version': 1,
} }
# URL to download # URL to download
@ -56,6 +56,7 @@ url = "https://www.first.org/standards/frameworks/csirts/csirt_services_framewor
# Send a GET request to the webpage # Send a GET request to the webpage
response = requests.get(url) response = requests.get(url)
def extract_nostrong_content(element): def extract_nostrong_content(element):
content = element.find_next_siblings('p', limit=3) content = element.find_next_siblings('p', limit=3)
extracted = {} extracted = {}
@ -66,7 +67,6 @@ def extract_nostrong_content(element):
break break
extracted["purpose"] += f" {sibling.text.strip()}" extracted["purpose"] += f" {sibling.text.strip()}"
extracted["description"] = content[1].text.strip()[12:] extracted["description"] = content[1].text.strip()[12:]
for sibling in content[1].find_next_siblings(): for sibling in content[1].find_next_siblings():
if "Outcome:" in sibling.text: if "Outcome:" in sibling.text:
@ -81,26 +81,36 @@ def extract_nostrong_content(element):
return extracted return extracted
def extract_content(element): def extract_content(element):
content = {} content = {}
description_title = element.find_next("em", string=lambda text: "Description:" in text) description_title = element.find_next(
"em", string=lambda text: "Description:" in text
)
purpose_title = element.find_next("em", string=lambda text: "Purpose:" in text) purpose_title = element.find_next("em", string=lambda text: "Purpose:" in text)
outcome_title = element.find_next("em", string=lambda text: "Outcome:" in text) outcome_title = element.find_next("em", string=lambda text: "Outcome:" in text)
content["purpose"] = (
content["purpose"] = purpose_title.parent.parent.get_text(strip=True).replace("Purpose:", "").strip() purpose_title.parent.parent.get_text(strip=True).replace("Purpose:", "").strip()
)
for sibling in purpose_title.parent.parent.find_next_siblings(): for sibling in purpose_title.parent.parent.find_next_siblings():
if "Description:" in sibling.text: if "Description:" in sibling.text:
break break
content["purpose"] += f" {sibling.text.strip()}" content["purpose"] += f" {sibling.text.strip()}"
content["description"] = description_title.parent.parent.get_text(strip=True).replace("Description:", "").strip() content["description"] = (
description_title.parent.parent.get_text(strip=True)
.replace("Description:", "")
.strip()
)
for sibling in description_title.parent.parent.find_next_siblings(): for sibling in description_title.parent.parent.find_next_siblings():
if "Outcome:" in sibling.text: if "Outcome:" in sibling.text:
break break
content["description"] += f" {sibling.text.strip()}" content["description"] += f" {sibling.text.strip()}"
content["outcome"] = outcome_title.parent.parent.get_text(strip=True).replace("Outcome:", "").strip() content["outcome"] = (
outcome_title.parent.parent.get_text(strip=True).replace("Outcome:", "").strip()
)
for sibling in outcome_title.parent.parent.find_next_siblings(): for sibling in outcome_title.parent.parent.find_next_siblings():
if sibling.name == "h4": if sibling.name == "h4":
break break
@ -108,16 +118,20 @@ def extract_content(element):
return content return content
def remove_heading(input_string): def remove_heading(input_string):
return re.sub(r'^\d+(\.\d+)*\s+', '', input_string) return re.sub(r'^\d+(\.\d+)*\s+', '', input_string)
# Check if the request was successful # Check if the request was successful
if response.status_code == 200: if response.status_code == 200:
# Parse the page content with BeautifulSoup # Parse the page content with BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser') soup = BeautifulSoup(response.content, 'html.parser')
# Extract the section titled "4 CSIRT Services Framework Structure" # Extract the section titled "4 CSIRT Services Framework Structure"
section_header = soup.find('h2', id="5-Service-Area-Information-Security-Event-Management") section_header = soup.find(
'h2', id="5-Service-Area-Information-Security-Event-Management"
)
if section_header: if section_header:
services = section_header.find_next_siblings('h3') services = section_header.find_next_siblings('h3')
@ -129,17 +143,19 @@ if response.status_code == 200:
else: else:
content = extract_content(service) content = extract_content(service)
name = remove_heading(service.text.strip()) name = remove_heading(service.text.strip())
suuid = str(uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name)) suuid = str(
uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name)
)
cluster["values"].append( cluster["values"].append(
{ {
"description": content["description"], "description": content["description"],
"meta": { "meta": {
"purpose": content["purpose"], "purpose": content["purpose"],
"outcome": content["outcome"] "outcome": content["outcome"],
}, },
"uuid": suuid, "uuid": suuid,
"value": name, "value": name,
"related": [] "related": [],
} }
) )
@ -148,8 +164,13 @@ if response.status_code == 200:
# get the parent service # get the parent service
parent_service = function.find_previous('h3') parent_service = function.find_previous('h3')
relationship = { relationship = {
"dest-uuid": str(uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), remove_heading(parent_service.text.strip()))), "dest-uuid": str(
"type": "part-of" uuid.uuid5(
uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"),
remove_heading(parent_service.text.strip()),
)
),
"type": "part-of",
} }
name = remove_heading(function.text.strip()) name = remove_heading(function.text.strip())
@ -159,21 +180,45 @@ if response.status_code == 200:
"description": content["description"], "description": content["description"],
"meta": { "meta": {
"purpose": content["purpose"], "purpose": content["purpose"],
"outcome": content["outcome"] "outcome": content["outcome"],
}, },
"uuid" : str(uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name)), "uuid": str(
uuid.uuid5(
uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name
)
),
"value": name, "value": name,
"related": [relationship] "related": [relationship],
} }
) )
with open(os.path.join(os.path.dirname(__file__), '..', 'galaxies', f'first-csirt-services-framework.json'), 'w') as f: with open(
os.path.join(
os.path.dirname(__file__),
'..',
'galaxies',
f'first-csirt-services-framework.json',
),
'w',
) as f:
json.dump(galaxy, f, indent=2, sort_keys=True, ensure_ascii=False) json.dump(galaxy, f, indent=2, sort_keys=True, ensure_ascii=False)
f.write('\n') # only needed for the beauty and to be compliant with jq_all_the_things f.write(
'\n'
) # only needed for the beauty and to be compliant with jq_all_the_things
with open(os.path.join(os.path.dirname(__file__), '..', 'clusters', f'first-csirt-services-framework.json'), 'w') as f: with open(
os.path.join(
os.path.dirname(__file__),
'..',
'clusters',
f'first-csirt-services-framework.json',
),
'w',
) as f:
json.dump(cluster, f, indent=2, sort_keys=True, ensure_ascii=False) json.dump(cluster, f, indent=2, sort_keys=True, ensure_ascii=False)
f.write('\n') # only needed for the beauty and to be compliant with jq_all_the_things f.write(
'\n'
) # only needed for the beauty and to be compliant with jq_all_the_things
else: else:
print("Couldn't find the section header.") print("Couldn't find the section header.")