diff --git a/tools/gen_csf.py b/tools/gen_csf.py
index fb3cc52..8e658e2 100644
--- a/tools/gen_csf.py
+++ b/tools/gen_csf.py
@@ -4,6 +4,7 @@
# A simple convertor script to generate galaxies from the MITRE NICE framework
# https://niccs.cisa.gov/workforce-development/nice-framework
# Copyright (C) 2024 Jean-Louis Huynen
+# Copyright (C) 2024 Déborah Servili
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
@@ -56,7 +57,6 @@ url = "https://www.first.org/standards/frameworks/csirts/csirt_services_framewor
# Send a GET request to the webpage
response = requests.get(url)
-
def extract_nostrong_content(element):
content = element.find_next_siblings('p', limit=3)
extracted = {}
@@ -75,13 +75,11 @@ def extract_nostrong_content(element):
extracted["outcome"] = content[2].text.strip()[8:]
for sibling in content[2].find_next_siblings():
- if sibling.name == "h4":
+ if sibling.name in ["h2", "h3", "h4"] or any(substring in sibling.text for substring in ["The following functions", "List of functions"]):
break
extracted["outcome"] += f" {sibling.text.strip()}"
-
return extracted
-
def extract_content(element):
content = {}
description_title = element.find_next(
@@ -103,6 +101,7 @@ def extract_content(element):
.replace("Description:", "")
.strip()
)
+
for sibling in description_title.parent.parent.find_next_siblings():
if "Outcome:" in sibling.text:
break
@@ -112,22 +111,26 @@ def extract_content(element):
outcome_title.parent.parent.get_text(strip=True).replace("Outcome:", "").strip()
)
for sibling in outcome_title.parent.parent.find_next_siblings():
- if sibling.name == "h4":
+ if sibling.name in ["h2", "h3", "h4"] or any(substring in sibling.text for substring in ["The following functions", "List of functions"]):
break
content["outcome"] += f" {sibling.text.strip()}"
-
+ content["outcome"] = content["outcome"].split("The following functions")[0].strip()
return content
def remove_heading(input_string):
return re.sub(r'^\d+(\.\d+)*\s+', '', input_string)
-
# Check if the request was successful
if response.status_code == 200:
# Parse the page content with BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
+ # Removing all links
+ for a in soup.find_all('a', href=True):
+ if a['href'].startswith('#'):
+ a.decompose()
+
# Extract the section titled "4 CSIRT Services Framework Structure"
section_header = soup.find(
'h2', id="5-Service-Area-Information-Security-Event-Management"
diff --git a/tools/gen_csf_alt.py b/tools/gen_csf_alt.py
deleted file mode 100644
index 4eeb54c..0000000
--- a/tools/gen_csf_alt.py
+++ /dev/null
@@ -1,228 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-#
-# A simple convertor script to generate galaxies from the MITRE NICE framework
-# https://niccs.cisa.gov/workforce-development/nice-framework
-# Copyright (C) 2024 Jean-Louis Huynen
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as
-# published by the Free Software Foundation, either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see .
-
-import pdb
-import requests
-import json
-import os
-import uuid
-import re
-from bs4 import BeautifulSoup
-
-# uuidv4 generated to be concatenated in v5: 43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0
-
-galaxy = {
- "namespace": "first",
- "type": "first-csirt-services-framework",
- "name": "FIRST CSIRT Services Framework",
- "description": "The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide",
- "uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb",
- "version": 1,
- "icon": 'user',
-}
-
-cluster = {
- 'authors': ["FIRST", "CIRCL", "Jean-Louis Huynen"],
- 'category': 'csirt',
- "type": "first-csirt-services-framework",
- "name": "FIRST CSIRT Services Framework",
- "description": "The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide",
- "uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb",
- 'source': 'https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1',
- 'values': [],
- 'version': 1,
-}
-
-# URL to download
-url = "https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1#5-Service-Area-Information-Security-Event-Management"
-
-# Send a GET request to the webpage
-response = requests.get(url)
-
-def extract_nostrong_content(element):
- content = element.find_next_siblings('p', limit=3)
- extracted = {}
-
- extracted["purpose"] = content[0].text.strip()[8:]
- for sibling in content[0].find_next_siblings():
- if "Description:" in sibling.text:
- break
- extracted["purpose"] += f" {sibling.text.strip()}"
-
- extracted["description"] = content[1].text.strip()[12:]
- for sibling in content[1].find_next_siblings():
- if "Outcome:" in sibling.text:
- break
- extracted["description"] += f" {sibling.text.strip()}"
-
- extracted["outcome"] = content[2].text.strip()[8:]
- for sibling in content[2].find_next_siblings():
- if sibling.name in ["h2", "h3", "h4"] or any(substring in sibling.text for substring in ["The following functions", "List of functions"]):
- break
- extracted["outcome"] += f" {sibling.text.strip()}"
- return extracted
-
-def extract_content(element):
- content = {}
- description_title = element.find_next(
- "em", string=lambda text: "Description:" in text
- )
- purpose_title = element.find_next("em", string=lambda text: "Purpose:" in text)
- outcome_title = element.find_next("em", string=lambda text: "Outcome:" in text)
-
- content["purpose"] = (
- purpose_title.parent.parent.get_text(strip=True).replace("Purpose:", "").strip()
- )
- for sibling in purpose_title.parent.parent.find_next_siblings():
- if "Description:" in sibling.text:
- break
- content["purpose"] += f" {sibling.text.strip()}"
-
- content["description"] = (
- description_title.parent.parent.get_text(strip=True)
- .replace("Description:", "")
- .strip()
- )
-
- for sibling in description_title.parent.parent.find_next_siblings():
- if "Outcome:" in sibling.text:
- break
- content["description"] += f" {sibling.text.strip()}"
-
- content["outcome"] = (
- outcome_title.parent.parent.get_text(strip=True).replace("Outcome:", "").strip()
- )
- for sibling in outcome_title.parent.parent.find_next_siblings():
- if sibling.name in ["h2", "h3", "h4"] or any(substring in sibling.text for substring in ["The following functions", "List of functions"]):
- break
- content["outcome"] += f" {sibling.text.strip()}"
- content["outcome"] = content["outcome"].split("The following functions")[0].strip()
- return content
-
-
-def remove_heading(input_string):
- return re.sub(r'^\d+(\.\d+)*\s+', '', input_string)
-
-# Check if the request was successful
-if response.status_code == 200:
- # Parse the page content with BeautifulSoup
- soup = BeautifulSoup(response.content, 'html.parser')
-
- # Removing all links
- for a in soup.find_all('a', href=True):
- if a['href'].startswith('#'):
- a.decompose()
-
- # Extract the section titled "4 CSIRT Services Framework Structure"
- section_header = soup.find(
- 'h2', id="5-Service-Area-Information-Security-Event-Management"
- )
- if section_header:
-
- services = section_header.find_next_siblings('h3')
- functions = section_header.find_next_siblings('h4')
-
- for service in services:
- if "Monitoring and detection" in service.text:
- content = extract_nostrong_content(service)
- else:
- content = extract_content(service)
- name = remove_heading(service.text.strip())
- suuid = str(
- uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name)
- )
- cluster["values"].append(
- {
- "description": content["description"],
- "meta": {
- "purpose": content["purpose"],
- "outcome": content["outcome"],
- },
- "uuid": suuid,
- "value": name,
- "related": [],
- }
- )
-
- for function in functions:
- content = extract_content(function)
- # get the parent service
- parent_service = function.find_previous('h3')
- relationship = {
- "dest-uuid": str(
- uuid.uuid5(
- uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"),
- remove_heading(parent_service.text.strip()),
- )
- ),
- "type": "part-of",
- }
-
- name = remove_heading(function.text.strip())
-
- cluster["values"].append(
- {
- "description": content["description"],
- "meta": {
- "purpose": content["purpose"],
- "outcome": content["outcome"],
- },
- "uuid": str(
- uuid.uuid5(
- uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name
- )
- ),
- "value": name,
- "related": [relationship],
- }
- )
-
- with open(
- os.path.join(
- os.path.dirname(__file__),
- '..',
- 'galaxies',
- f'first-csirt-services-framework.json',
- ),
- 'w',
- ) as f:
- json.dump(galaxy, f, indent=2, sort_keys=True, ensure_ascii=False)
- f.write(
- '\n'
- ) # only needed for the beauty and to be compliant with jq_all_the_things
-
- with open(
- os.path.join(
- os.path.dirname(__file__),
- '..',
- 'clusters',
- f'first-csirt-services-framework.json',
- ),
- 'w',
- ) as f:
- json.dump(cluster, f, indent=2, sort_keys=True, ensure_ascii=False)
- f.write(
- '\n'
- ) # only needed for the beauty and to be compliant with jq_all_the_things
-
- else:
- print("Couldn't find the section header.")
-else:
- print(f"Failed to download the webpage. Status code: {response.status_code}")