misp-galaxy/tools/gen_csf.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
#    A simple convertor script to generate galaxies from the MITRE NICE framework
#    https://niccs.cisa.gov/workforce-development/nice-framework
#    Copyright (C) 2024 Jean-Louis Huynen 
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Affero General Public License as
#    published by the Free Software Foundation, either version 3 of the
#    License, or (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

import pdb
import requests
import json
import os
import uuid
import re
from bs4 import BeautifulSoup

# uuidv4 generated to be concatenated in v5: 43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0

galaxy = {
    "namespace": "first",
    "type": "first-csirt-services-framework",
    "name": "FIRST CSIRT Services Framework",
    "description": "The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide",
    "uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb",
    "version": 1,
    "icon": 'user'
}

cluster = {
    'authors': ["FIRST", "CIRCL", "Jean-Louis Huynen"],
    'category': 'csirt',
    "type": "first-csirt-services-framework",
    "name": "FIRST CSIRT Services Framework",
    "description": "The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide",
    "uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb",
    'source': 'https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1',
    'values': [],
    'version': 1 
}

# URL to download
url = "https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1#5-Service-Area-Information-Security-Event-Management"

# Send a GET request to the webpage
response = requests.get(url)

def extract_nostrong_content(element):
    content = element.find_next_siblings('p', limit=3)
    extracted = {}

    extracted["purpose"] = content[0].text.strip()[8:]
    for sibling in content[0].find_next_siblings():
        if "Description:" in sibling.text:
            break
        extracted["purpose"] += f" {sibling.text.strip()}"


    extracted["description"] = content[1].text.strip()[12:]
    for sibling in content[1].find_next_siblings():
        if "Outcome:" in sibling.text:
            break       
        extracted["description"] += f" {sibling.text.strip()}"

    extracted["outcome"] = content[2].text.strip()[8:]
    for sibling in content[2].find_next_siblings():
        if sibling.name =="h4":
            break
        extracted["outcome"] += f" {sibling.text.strip()}"

    return extracted

def extract_content(element):
    content = {}
    description_title = element.find_next("em", string=lambda text: "Description:" in text)
    purpose_title = element.find_next("em", string=lambda text: "Purpose:" in text)
    outcome_title = element.find_next("em", string=lambda text: "Outcome:" in text)


    content["purpose"] = purpose_title.parent.parent.get_text(strip=True).replace("Purpose:", "").strip()
    for sibling in purpose_title.parent.parent.find_next_siblings():
        if "Description:" in sibling.text:
            break
        content["purpose"] += f" {sibling.text.strip()}"

    content["description"] = description_title.parent.parent.get_text(strip=True).replace("Description:", "").strip()
    for sibling in description_title.parent.parent.find_next_siblings():
        if "Outcome:" in sibling.text:
            break       
        content["description"] += f" {sibling.text.strip()}"

    content["outcome"] =  outcome_title.parent.parent.get_text(strip=True).replace("Outcome:", "").strip()
    for sibling in outcome_title.parent.parent.find_next_siblings():
        if sibling.name =="h4":
            break
        content["outcome"] += f" {sibling.text.strip()}"

    return content

def remove_heading(input_string):
    return re.sub(r'^\d+(\.\d+)*\s+', '', input_string)

# Check if the request was successful
if response.status_code == 200:
    # Parse the page content with BeautifulSoup
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract the section titled "4 CSIRT Services Framework Structure"
    section_header = soup.find('h2', id="5-Service-Area-Information-Security-Event-Management")
    if section_header:

        services = section_header.find_next_siblings('h3')
        functions = section_header.find_next_siblings('h4')

        for service in services:
            if "Monitoring and detection" in service.text:
                content = extract_nostrong_content(service)
            else:
                content = extract_content(service)
            name = remove_heading(service.text.strip())
            suuid = str(uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name))
            cluster["values"].append(
                {
                    "description": content["description"],
                    "meta": {
                        "purpose": content["purpose"],
                        "outcome": content["outcome"]
                    },
                    "uuid" : suuid,
                    "value": name,
                    "related": []
                }
            )

        for function in functions:
            content = extract_content(function)
            # get the parent service
            parent_service = function.find_previous('h3')
            relationship = {
                "dest-uuid": str(uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), remove_heading(parent_service.text.strip()))),
                "type": "part-of"
            }

            name = remove_heading(function.text.strip())

            cluster["values"].append(
                {
                    "description": content["description"],
                    "meta": {
                        "purpose": content["purpose"],
                        "outcome": content["outcome"]
                    },
                    "uuid" : str(uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name)),
                    "value": name,
                    "related": [relationship]
                }
            )
    
        with open(os.path.join(os.path.dirname(__file__), '..', 'galaxies', f'first-csirt-services-framework.json'), 'w') as f:
            json.dump(galaxy, f, indent=2, sort_keys=True, ensure_ascii=False)
            f.write('\n')  # only needed for the beauty and to be compliant with jq_all_the_things

        with open(os.path.join(os.path.dirname(__file__), '..', 'clusters', f'first-csirt-services-framework.json'), 'w') as f:
            json.dump(cluster, f, indent=2, sort_keys=True, ensure_ascii=False)
            f.write('\n')  # only needed for the beauty and to be compliant with jq_all_the_things

    else:
        print("Couldn't find the section header.")
else:
    print(f"Failed to download the webpage. Status code: {response.status_code}")
add: [first-csirt] Initial commit for FIRST CSIRT Services Framework 2024-08-22 14:46:56 +00:00			`#!/usr/bin/env python3`
			`# -- coding: utf-8 --`
			`#`
			`# A simple convertor script to generate galaxies from the MITRE NICE framework`
			`# https://niccs.cisa.gov/workforce-development/nice-framework`
			`# Copyright (C) 2024 Jean-Louis Huynen`
			`#`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU Affero General Public License as`
			`# published by the Free Software Foundation, either version 3 of the`
			`# License, or (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU Affero General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU Affero General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`

add: [first-csirt] implement @Delta-Sierra comments 2024-08-23 13:36:38 +00:00			`import pdb`
add: [first-csirt] Initial commit for FIRST CSIRT Services Framework 2024-08-22 14:46:56 +00:00			`import requests`
			`import json`
			`import os`
			`import uuid`
			`import re`
			`from bs4 import BeautifulSoup`

			`# uuidv4 generated to be concatenated in v5: 43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0`

			`galaxy = {`
			`"namespace": "first",`
			`"type": "first-csirt-services-framework",`
			`"name": "FIRST CSIRT Services Framework",`
			`"description": "The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide",`
			`"uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb",`
			`"version": 1,`
			`"icon": 'user'`
			`}`

			`cluster = {`
			`'authors': ["FIRST", "CIRCL", "Jean-Louis Huynen"],`
			`'category': 'csirt',`
			`"type": "first-csirt-services-framework",`
			`"name": "FIRST CSIRT Services Framework",`
			`"description": "The Computer Security Incident Response Team (CSIRT) Services Framework is a high-level document describing in a structured way a collection of cyber security services and associated functions that Computer Security Incident Response Teams and other teams providing incident management related services may provide",`
			`"uuid": "4a72488f-ef5b-4895-a5d9-c625dee663cb",`
			`'source': 'https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1',`
			`'values': [],`
			`'version': 1`
			`}`

			`# URL to download`
			`url = "https://www.first.org/standards/frameworks/csirts/csirt_services_framework_v2.1#5-Service-Area-Information-Security-Event-Management"`

			`# Send a GET request to the webpage`
			`response = requests.get(url)`

add: [first-csirt] implement @Delta-Sierra comments 2024-08-23 13:36:38 +00:00			`def extract_nostrong_content(element):`
add: [first-csirt] Initial commit for FIRST CSIRT Services Framework 2024-08-22 14:46:56 +00:00			`content = element.find_next_siblings('p', limit=3)`
add: [first-csirt] implement @Delta-Sierra comments 2024-08-23 13:36:38 +00:00			`extracted = {}`

			`extracted["purpose"] = content[0].text.strip()[8:]`
			`for sibling in content[0].find_next_siblings():`
			`if "Description:" in sibling.text:`
			`break`
			`extracted["purpose"] += f" {sibling.text.strip()}"`


			`extracted["description"] = content[1].text.strip()[12:]`
			`for sibling in content[1].find_next_siblings():`
			`if "Outcome:" in sibling.text:`
			`break`
			`extracted["description"] += f" {sibling.text.strip()}"`

			`extracted["outcome"] = content[2].text.strip()[8:]`
			`for sibling in content[2].find_next_siblings():`
			`if sibling.name =="h4":`
			`break`
			`extracted["outcome"] += f" {sibling.text.strip()}"`

			`return extracted`

			`def extract_content(element):`
			`content = {}`
			`description_title = element.find_next("em", string=lambda text: "Description:" in text)`
			`purpose_title = element.find_next("em", string=lambda text: "Purpose:" in text)`
			`outcome_title = element.find_next("em", string=lambda text: "Outcome:" in text)`


			`content["purpose"] = purpose_title.parent.parent.get_text(strip=True).replace("Purpose:", "").strip()`
			`for sibling in purpose_title.parent.parent.find_next_siblings():`
			`if "Description:" in sibling.text:`
			`break`
			`content["purpose"] += f" {sibling.text.strip()}"`

			`content["description"] = description_title.parent.parent.get_text(strip=True).replace("Description:", "").strip()`
			`for sibling in description_title.parent.parent.find_next_siblings():`
			`if "Outcome:" in sibling.text:`
			`break`
			`content["description"] += f" {sibling.text.strip()}"`

			`content["outcome"] = outcome_title.parent.parent.get_text(strip=True).replace("Outcome:", "").strip()`
			`for sibling in outcome_title.parent.parent.find_next_siblings():`
			`if sibling.name =="h4":`
			`break`
			`content["outcome"] += f" {sibling.text.strip()}"`

			`return content`
add: [first-csirt] Initial commit for FIRST CSIRT Services Framework 2024-08-22 14:46:56 +00:00
			`def remove_heading(input_string):`
			`return re.sub(r'^\d+(\.\d+)*\s+', '', input_string)`

			`# Check if the request was successful`
			`if response.status_code == 200:`
			`# Parse the page content with BeautifulSoup`
			`soup = BeautifulSoup(response.content, 'html.parser')`

			`# Extract the section titled "4 CSIRT Services Framework Structure"`
			`section_header = soup.find('h2', id="5-Service-Area-Information-Security-Event-Management")`
			`if section_header:`

			`services = section_header.find_next_siblings('h3')`
			`functions = section_header.find_next_siblings('h4')`

			`for service in services:`
add: [first-csirt] implement @Delta-Sierra comments 2024-08-23 13:36:38 +00:00			`if "Monitoring and detection" in service.text:`
			`content = extract_nostrong_content(service)`
			`else:`
			`content = extract_content(service)`
add: [first-csirt] Initial commit for FIRST CSIRT Services Framework 2024-08-22 14:46:56 +00:00			`name = remove_heading(service.text.strip())`
			`suuid = str(uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name))`
			`cluster["values"].append(`
			`{`
add: [first-csirt] implement @Delta-Sierra comments 2024-08-23 13:36:38 +00:00			`"description": content["description"],`
			`"meta": {`
			`"purpose": content["purpose"],`
			`"outcome": content["outcome"]`
			`},`
add: [first-csirt] Initial commit for FIRST CSIRT Services Framework 2024-08-22 14:46:56 +00:00			`"uuid" : suuid,`
			`"value": name,`
			`"related": []`
			`}`
			`)`

			`for function in functions:`
add: [first-csirt] implement @Delta-Sierra comments 2024-08-23 13:36:38 +00:00			`content = extract_content(function)`
add: [first-csirt] Initial commit for FIRST CSIRT Services Framework 2024-08-22 14:46:56 +00:00			`# get the parent service`
			`parent_service = function.find_previous('h3')`
			`relationship = {`
			`"dest-uuid": str(uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), remove_heading(parent_service.text.strip()))),`
add: [first-csirt] implement @Delta-Sierra comments 2024-08-23 13:36:38 +00:00			`"type": "part-of"`
add: [first-csirt] Initial commit for FIRST CSIRT Services Framework 2024-08-22 14:46:56 +00:00			`}`

			`name = remove_heading(function.text.strip())`

			`cluster["values"].append(`
			`{`
add: [first-csirt] implement @Delta-Sierra comments 2024-08-23 13:36:38 +00:00			`"description": content["description"],`
			`"meta": {`
			`"purpose": content["purpose"],`
			`"outcome": content["outcome"]`
			`},`
add: [first-csirt] Initial commit for FIRST CSIRT Services Framework 2024-08-22 14:46:56 +00:00			`"uuid" : str(uuid.uuid5(uuid.UUID("43803a9f-9ea6-4ebc-9cb5-68ccdc2c23e0"), name)),`
			`"value": name,`
			`"related": [relationship]`
			`}`
			`)`

			`with open(os.path.join(os.path.dirname(__file__), '..', 'galaxies', f'first-csirt-services-framework.json'), 'w') as f:`
			`json.dump(galaxy, f, indent=2, sort_keys=True, ensure_ascii=False)`
			`f.write('\n') # only needed for the beauty and to be compliant with jq_all_the_things`

			`with open(os.path.join(os.path.dirname(__file__), '..', 'clusters', f'first-csirt-services-framework.json'), 'w') as f:`
			`json.dump(cluster, f, indent=2, sort_keys=True, ensure_ascii=False)`
			`f.write('\n') # only needed for the beauty and to be compliant with jq_all_the_things`

			`else:`
			`print("Couldn't find the section header.")`
			`else:`
			`print(f"Failed to download the webpage. Status code: {response.status_code}")`