mirror of
https://github.com/MISP/misp-galaxy.git
synced 2024-11-26 08:47:18 +00:00
add tool to find duplicate
This commit is contained in:
parent
644e429110
commit
5c0feb1b1a
1 changed files with 51 additions and 0 deletions
51
tools/chk_dup.py
Executable file
51
tools/chk_dup.py
Executable file
|
@ -0,0 +1,51 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# coding=utf-8
|
||||||
|
"""
|
||||||
|
Tools to find duplicate in galaxies
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import collections
|
||||||
|
|
||||||
|
|
||||||
|
def loadjsons(path):
|
||||||
|
"""
|
||||||
|
Find all Jsons and load them in a dict
|
||||||
|
"""
|
||||||
|
files = []
|
||||||
|
data = []
|
||||||
|
for name in os.listdir(path):
|
||||||
|
if os.path.isfile(os.path.join(path, name)) and name.endswith('.json'):
|
||||||
|
files.append(name)
|
||||||
|
for jfile in files:
|
||||||
|
data.append(json.load(open("%s/%s" % (path, jfile))))
|
||||||
|
return data
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
"""
|
||||||
|
Iterate all name + synonyms
|
||||||
|
tell what is duplicated.
|
||||||
|
"""
|
||||||
|
jsons = loadjsons("../clusters")
|
||||||
|
counter = collections.Counter()
|
||||||
|
namespace = []
|
||||||
|
for djson in jsons:
|
||||||
|
items = djson.get('values')
|
||||||
|
for entry in items:
|
||||||
|
name = entry.get('value').strip().lower()
|
||||||
|
counter[name]+=1
|
||||||
|
namespace.append([name, djson.get('name')])
|
||||||
|
try:
|
||||||
|
for synonym in entry.get('meta').get('synonyms'):
|
||||||
|
name = synonym.strip().lower()
|
||||||
|
counter[name]+=1
|
||||||
|
namespace.append([name, djson.get('name')])
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
pass
|
||||||
|
counter = dict(counter)
|
||||||
|
for key, val in counter.items():
|
||||||
|
if val>1:
|
||||||
|
print ("Warning duplicate %s" % key)
|
||||||
|
for item in namespace:
|
||||||
|
if item[0]==key:
|
||||||
|
print (item)
|
Loading…
Reference in a new issue