mirror of
https://github.com/cve-search/cpe-guesser.git
synced 2024-11-14 19:08:27 +00:00
Alexandre Dulaunoy
a933c6f172
- Parse the CPE 2.3 XML file - Extract vendor and product - Canonize words from the vendor and product - Insert in redis the reverse index and create a sorted set with the score per cpe vendor:product frequency
75 lines
2.1 KiB
Python
75 lines
2.1 KiB
Python
|
|
import xml.sax
|
|
import redis
|
|
rdb = redis.Redis(host='127.0.0.1', port=6379, db=8)
|
|
|
|
class CPEHandler( xml.sax.ContentHandler ):
|
|
def __init__(self):
|
|
self.cpe = ""
|
|
self.title = ""
|
|
self.title_seen = False
|
|
self.cpe = ""
|
|
self.record = {}
|
|
self.refs = []
|
|
|
|
def startElement(self, tag, attributes):
|
|
self.CurrentData = tag
|
|
if tag == 'cpe-23:cpe23-item':
|
|
self.record['cpe-23'] = attributes['name']
|
|
if tag == 'title':
|
|
self.title_seen = True
|
|
if tag == 'reference':
|
|
self.refs.append(attributes['href'])
|
|
|
|
def characters(self, data):
|
|
if self.title_seen:
|
|
self.title = self.title + data
|
|
|
|
def endElement(self, tag):
|
|
if tag == 'title':
|
|
self.record['title'] = self.title
|
|
self.title = ""
|
|
self.title_seen = False
|
|
if tag == 'references':
|
|
self.record['refs'] = self.refs
|
|
self.refs = []
|
|
if tag == 'cpe-item':
|
|
to_insert = CPEExtractor(cpe=self.record['cpe-23'])
|
|
for word in canonize(to_insert['vendor']):
|
|
insert( word=word, cpe=to_insert['cpeline'] )
|
|
for word in canonize(to_insert['product']):
|
|
insert( word=word, cpe=to_insert['cpeline'] )
|
|
self.record = {}
|
|
|
|
|
|
def CPEExtractor( cpe=None ):
|
|
if cpe is None:
|
|
return False
|
|
record = {}
|
|
cpefield = cpe.split(":")
|
|
record['vendor'] = cpefield[3]
|
|
record['product'] = cpefield[4]
|
|
cpeline = ""
|
|
for cpeentry in cpefield[:5]:
|
|
cpeline = cpeline + ":" + cpeentry
|
|
record['cpeline'] = cpeline[1:]
|
|
return record
|
|
|
|
def canonize( value=None ):
|
|
value = value.lower()
|
|
words = value.split('_')
|
|
return words
|
|
|
|
def insert( word=None, cpe=None):
|
|
if cpe is None or word is None:
|
|
return False
|
|
rdb.sadd('w:{}'.format(word), cpe)
|
|
rdb.zadd('s:{}'.format(word), {cpe: 1}, incr=True)
|
|
|
|
cpe_path = '../data/official-cpe-dictionary_v2.3.xml'
|
|
|
|
parser = xml.sax.make_parser()
|
|
|
|
Handler = CPEHandler()
|
|
parser.setContentHandler( Handler )
|
|
parser.parse(cpe_path)
|