fix: fixes CaSe InSenSiTiVe duplicates

This commit is contained in:
Christophe Vandeplas 2024-06-18 16:58:38 +02:00
parent ca3cd1d0fb
commit 6f4b3b1387
No known key found for this signature in database
GPG key ID: BDC48619FFDC5A5B
10 changed files with 8873 additions and 5773 deletions

View file

@ -3569,16 +3569,6 @@
"uuid": "9aa0a1b7-c9ff-422c-9ef1-431459e1e1b9", "uuid": "9aa0a1b7-c9ff-422c-9ef1-431459e1e1b9",
"value": "PMC Bronze 44 Magnum Ammo - 500 Rounds of 180 Grain JHP Ammunition" "value": "PMC Bronze 44 Magnum Ammo - 500 Rounds of 180 Grain JHP Ammunition"
}, },
{
"meta": {
"caliber": "X",
"description": "Tac 7.62x51mm Ammo",
"manufacturer": "PMC",
"name": "PMC X"
},
"uuid": "0dd1db3c-8d5d-4296-b780-ae5ac7a92fed",
"value": "PMC X - Tac 7.62x51mm Ammo"
},
{ {
"meta": { "meta": {
"caliber": "Bronze 308 Win Ammo", "caliber": "Bronze 308 Win Ammo",
@ -4110,5 +4100,5 @@
"value": "NobelSport High Brass Field 12 Gauge Ammo - 250 Rounds of 1" "value": "NobelSport High Brass Field 12 Gauge Ammo - 250 Rounds of 1"
} }
], ],
"version": 1 "version": 2
} }

View file

@ -3660,25 +3660,6 @@
"uuid": "6cb47609-b03e-43d9-a4c7-8342f1011f3b", "uuid": "6cb47609-b03e-43d9-a4c7-8342f1011f3b",
"value": "ANGRYREBEL" "value": "ANGRYREBEL"
}, },
{
"description": "",
"meta": {
"refs": [
"https://malpedia.caad.fkie.fraunhofer.de/details/elf.avoslocker",
"https://blogs.blackberry.com/en/2022/04/threat-thursday-avoslocker-prompts-advisory-from-fbi-and-fincen",
"https://blog.qualys.com/vulnerabilities-threat-research/2022/03/06/avoslocker-ransomware-behavior-examined-on-windows-linux",
"https://blog.cyble.com/2022/01/17/avoslocker-ransomware-linux-version-targets-vmware-esxi-servers/",
"https://blog.lexfo.fr/Avoslocker.html",
"https://www.ic3.gov/Media/News/2022/220318.pdf",
"https://blogs.vmware.com/security/2022/09/esxi-targeting-ransomware-the-threats-that-are-after-your-virtual-machines-part-1.html",
"https://blogs.vmware.com/security/2022/02/avoslocker-modern-linux-ransomware-threats.html"
],
"synonyms": [],
"type": []
},
"uuid": "465b6a74-87ca-4459-b4be-3f8b272f4485",
"value": "Avoslocker"
},
{ {
"description": "AVrecon is a Linux-based Remote Access Trojan (RAT) targeting small-office/home-office (SOHO) routers and other ARM-embedded devices. The malware is distributed via exploitation of unpatched vulnerabilities or common misconfiguration of the targeted devices. Once deployed, AVreckon will collect some information about the infected device, open a session to pre-configured C&C server, and spawn a remote shell for command execution. It might also download additional arbitrary files and run them. The malware has recently been used in campaigns aimed at ad-fraud activities, password spraying and data exfiltration.", "description": "AVrecon is a Linux-based Remote Access Trojan (RAT) targeting small-office/home-office (SOHO) routers and other ARM-embedded devices. The malware is distributed via exploitation of unpatched vulnerabilities or common misconfiguration of the targeted devices. Once deployed, AVreckon will collect some information about the infected device, open a session to pre-configured C&C server, and spawn a remote shell for command execution. It might also download additional arbitrary files and run them. The malware has recently been used in campaigns aimed at ad-fraud activities, password spraying and data exfiltration.",
"meta": { "meta": {
@ -15170,7 +15151,10 @@
"https://news.sophos.com/en-us/2021/12/22/avos-locker-remotely-accesses-boxes-even-running-in-safe-mode/", "https://news.sophos.com/en-us/2021/12/22/avos-locker-remotely-accesses-boxes-even-running-in-safe-mode/",
"https://news.sophos.com/en-us/2022/03/17/the-ransomware-threat-intelligence-center/", "https://news.sophos.com/en-us/2022/03/17/the-ransomware-threat-intelligence-center/",
"https://unit42.paloaltonetworks.com/emerging-ransomware-groups/", "https://unit42.paloaltonetworks.com/emerging-ransomware-groups/",
"https://blog.qualys.com/vulnerabilities-threat-research/2022/03/06/avoslocker-ransomware-behavior-examined-on-windows-linux" "https://blog.qualys.com/vulnerabilities-threat-research/2022/03/06/avoslocker-ransomware-behavior-examined-on-windows-linux",
"https://blog.lexfo.fr/Avoslocker.html",
"https://blogs.vmware.com/security/2022/09/esxi-targeting-ransomware-the-threats-that-are-after-your-virtual-machines-part-1.html",
"https://blogs.vmware.com/security/2022/02/avoslocker-modern-linux-ransomware-threats.html"
], ],
"synonyms": [], "synonyms": [],
"type": [] "type": []
@ -56457,5 +56441,5 @@
"value": "Zyklon" "value": "Zyklon"
} }
], ],
"version": 19803 "version": 19804
} }

View file

@ -24250,11 +24250,6 @@
"uuid": "90c6daf8-8212-4ea8-9b59-af49b290b3b9", "uuid": "90c6daf8-8212-4ea8-9b59-af49b290b3b9",
"value": "TurkStatik" "value": "TurkStatik"
}, },
{
"description": "ransomware",
"uuid": "93277946-177a-4f92-833d-30db9d432656",
"value": "Tyrant"
},
{ {
"description": "ransomware", "description": "ransomware",
"uuid": "0407e98d-cd3e-42e1-8daf-3c51d2e4906a", "uuid": "0407e98d-cd3e-42e1-8daf-3c51d2e4906a",
@ -29200,5 +29195,5 @@
"value": "apos" "value": "apos"
} }
], ],
"version": 122 "version": 123
} }

View file

@ -1770,7 +1770,7 @@
"date": "1998" "date": "1998"
}, },
"uuid": "2a47361d-584b-493f-80a4-37c74c30cf1b", "uuid": "2a47361d-584b-493f-80a4-37c74c30cf1b",
"value": "Vortex" "value": "VorteX"
}, },
{ {
"meta": { "meta": {
@ -2140,13 +2140,6 @@
"uuid": "c42394f8-5f35-4797-9393-8289ab8ad3ad", "uuid": "c42394f8-5f35-4797-9393-8289ab8ad3ad",
"value": "SharpEye" "value": "SharpEye"
}, },
{
"meta": {
"date": "2010"
},
"uuid": "58e2e2ee-5c25-4a13-abfc-2a6c85d978fa",
"value": "VorteX"
},
{ {
"meta": { "meta": {
"date": "2010", "date": "2010",
@ -3648,5 +3641,5 @@
"value": "COATHANGER" "value": "COATHANGER"
} }
], ],
"version": 45 "version": 46
} }

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -19,10 +19,10 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import json import json
import requests import requests
import uuid import uuid
from pymispgalaxies import Cluster, Galaxy
''' '''
From https://www.culturecollections.org.uk/search/?searchScope=Product&pageNumber=1&filter.collectionGroup=0&filter.collection=0&filter.sorting=DateCreated From https://www.culturecollections.org.uk/search/?searchScope=Product&pageNumber=1&filter.collectionGroup=0&filter.collection=0&filter.sorting=DateCreated
@ -42,6 +42,7 @@ cell culture characteristics. Passage numbers where given act only as a guide an
the passage number stated will be the passage number received by the customer. the passage number stated will be the passage number received by the customer.
''' '''
def download_items(): def download_items():
data = {'items': [], data = {'items': [],
'collections': {}, 'collections': {},
@ -75,11 +76,13 @@ def save_items(d):
json.dump(d, f, indent=2, sort_keys=True) json.dump(d, f, indent=2, sort_keys=True)
return True return True
def load_saved_items(): def load_saved_items():
with open('items.json', 'r') as f: with open('items.json', 'r') as f:
d = json.load(f) d = json.load(f)
return d return d
data = download_items() data = download_items()
# save_items(data) # save_items(data)
# data = load_saved_items() # data = load_saved_items()
@ -110,33 +113,27 @@ for item in data['items']:
clusters_dict[cluster['value']] = cluster clusters_dict[cluster['value']] = cluster
# transform dict to list # transform dict to list
clusters = [] cluster = Cluster('ukhsa-culture-collections', skip_duplicates=True)
cluster.cluster_values = {}
for item in clusters_dict.values(): for item in clusters_dict.values():
clusters.append(item) cluster.append(item, skip_duplicates=True)
cluster.save('ukhsa-culture-collections')
for cluster, duplicate in cluster.duplicates:
print(f"WARNING: Skipped duplicate: {duplicate} in cluster {cluster}")
json_galaxy = { try:
'icon': "virus", galaxy = Galaxy('ukhsa-culture-collections')
'name': "UKHSA Culture Collections", except KeyError:
'description': "UK Health Security Agency Culture Collections represent deposits of cultures that consist of expertly preserved, authenticated cell lines and microbial strains of known provenance.", galaxy = Galaxy({
'namespace': "gov.uk", 'icon': "virus",
'type': "ukhsa-culture-collections", 'name': "UKHSA Culture Collections",
'uuid': "bbe11c06-1d6a-477e-88f1-cdda2d71de56", 'description': "UK Health Security Agency Culture Collections represent deposits of cultures that consist of expertly preserved, authenticated cell lines and microbial strains of known provenance.",
'version': 1 'namespace': "gov.uk",
} 'type': "ukhsa-culture-collections",
'uuid': "bbe11c06-1d6a-477e-88f1-cdda2d71de56",
'version': 1
})
galaxy.save('ukhsa-culture-collections')
with open(os.path.join('..', 'clusters', 'ukhsa-culture-collections.json'), 'r') as f: print("All done, please don't forget to ./jq_all_the_things.sh, commit, and then ./validate_all.sh.")
json_cluster = json.load(f)
json_cluster['values'] = clusters
json_cluster['version'] += 1
# save the Galaxy and Cluster file
with open(os.path.join('..', 'galaxies', 'ukhsa-culture-collections.json'), 'w') as f:
json.dump(json_galaxy, f, indent=2, sort_keys=True, ensure_ascii=False)
f.write('\n') # only needed for the beauty and to be compliant with jq_all_the_things
with open(os.path.join('..', 'clusters', 'ukhsa-culture-collections.json'), 'w') as f:
json.dump(json_cluster, f, indent=2, sort_keys=True, ensure_ascii=False)
f.write('\n') # only needed for the beauty and to be compliant with jq_all_the_things
print("All done, please don't forget to ./jq_all_the_things.sh, commit, and then ./validate_all.sh.")

View file

@ -132,7 +132,8 @@ class Cluster:
def save_to_file(self, path): def save_to_file(self, path):
with open(path, "w") as file: with open(path, "w") as file:
file.write(json.dumps(self.__dict__(), indent=4)) file.write(json.dumps(self.__dict__(), indent=2))
file.write('\n')
def __str__(self) -> str: def __str__(self) -> str:
return f"Cluster: {self.name} - {self.type} - {self.uuid}" return f"Cluster: {self.name} - {self.type} - {self.uuid}"
@ -270,7 +271,7 @@ class GroupCluster(Cluster):
# Code Block for handling duplicate from Tidal API data (hopefully only temporary) # Code Block for handling duplicate from Tidal API data (hopefully only temporary)
if value.uuid == "3290dcb9-5781-4b87-8fa0-6ae820e152cd": if value.uuid == "3290dcb9-5781-4b87-8fa0-6ae820e152cd":
value.value = "Volt Typhoon - Tidal" value.value = "Volt Typhoon - Tidal"
self.values.append(value.return_value()) self.values.append(value.return_value())
@ -367,6 +368,7 @@ class SoftwareCluster(Cluster):
uuid=associated_software.get("associated_software_id"), uuid=associated_software.get("associated_software_id"),
value=associated_software.get("name") + " - Associated Software", value=associated_software.get("name") + " - Associated Software",
) )
self.values.append(value.return_value()) self.values.append(value.return_value())
related.append( related.append(
{ {
@ -384,6 +386,12 @@ class SoftwareCluster(Cluster):
uuid=entry.get("id"), uuid=entry.get("id"),
value=entry.get("name"), value=entry.get("name"),
) )
# duplicates, manually handled
if value.uuid == '6af0eac2-c35f-4569-ae09-47f1ca846961':
value.value = f"{value.value} - Duplicate"
if value.uuid == '39d81c48-8f7c-54cb-8fac-485598e31a55':
value.value = f"{value.value} - Duplicate"
self.values.append(value.return_value()) self.values.append(value.return_value())
@ -493,7 +501,7 @@ class TechniqueCluster(Cluster):
sub_value.value = "Spearphishing Link - Duplicate" sub_value.value = "Spearphishing Link - Duplicate"
elif sub_value.uuid == "350c12a3-33f6-5942-8892-4d6e70abbfc1": elif sub_value.uuid == "350c12a3-33f6-5942-8892-4d6e70abbfc1":
sub_value.value = "Spearphishing Voice - Duplicate" sub_value.value = "Spearphishing Voice - Duplicate"
self.values.append(sub_value.return_value()) self.values.append(sub_value.return_value())
related.append( related.append(
{ {
@ -585,6 +593,17 @@ class ReferencesCluster(Cluster):
uuid=entry.get("id"), uuid=entry.get("id"),
value=entry.get("name"), value=entry.get("name"),
) )
# handle duplicates manually
if value.uuid == 'eea178f4-80bd-49d1-84b1-f80671e9a3e4':
value.value = f"{value.value} - Duplicate"
if value.uuid == '9bb5c330-56bd-47e7-8414-729d8e6cb3b3':
value.value = f"{value.value} - Duplicate"
if value.uuid == '8b4bdce9-da19-443f-88d2-11466e126c09':
value.value = f"{value.value} - Duplicate"
if value.uuid == 'b4727044-51bb-43b3-afdb-515bb4bb0f7e':
value.value = f"{value.value} - Duplicate"
self.values.append(value.return_value()) self.values.append(value.return_value())

View file

@ -14,4 +14,5 @@ class Galaxy:
def save_to_file(self, path: str): def save_to_file(self, path: str):
with open(path, "w") as file: with open(path, "w") as file:
file.write(json.dumps(asdict(self), indent=4)) file.write(json.dumps(asdict(self), indent=2))
file.write('\n')