mirror of
https://github.com/cve-search/cpe-guesser.git
synced 2024-11-14 10:58:27 +00:00
new: [import] first version of the import
- Parse the CPE 2.3 XML file - Extract vendor and product - Canonize words from the vendor and product - Insert in redis the reverse index and create a sorted set with the score per cpe vendor:product frequency
This commit is contained in:
commit
a933c6f172
3 changed files with 118 additions and 0 deletions
39
README.md
Normal file
39
README.md
Normal file
|
@ -0,0 +1,39 @@
|
|||
# CPE guesser
|
||||
|
||||
CPE guesser is a web service to guess the CPE name based on one or more keyword(s). Then the result can
|
||||
be used against [cve-search](https://github.com/cve-search/cve-search) to do actual searches by CPE names.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Redis
|
||||
- Python
|
||||
|
||||
## How does this work?
|
||||
|
||||
A CPE entry is composed of a human readable name with some references and the structured CPE name.
|
||||
|
||||
~~~
|
||||
<cpe-item name="cpe:/a:10web:form_maker:1.7.17::~~~wordpress~~">
|
||||
<title xml:lang="en-US">10web Form Maker 1.7.17 for WordPress</title>
|
||||
<references>
|
||||
<reference href="https://wordpress.org/plugins/form-maker/#developers">Change Log</reference>
|
||||
</references>
|
||||
<cpe-23:cpe23-item name="cpe:2.3:a:10web:form_maker:1.7.17:*:*:*:*:wordpress:*:*"/>
|
||||
</cpe-item>
|
||||
~~~
|
||||
|
||||
The CPE name is structured with a vendor name, a product name and some additional information.
|
||||
CPE name can be easily changed due to vendor name or product name changes, some vendor/product are
|
||||
sharing common names or name is composed of multiple words.
|
||||
|
||||
|
||||
### Data
|
||||
|
||||
Split vendor name and product name (such as `_`) into single word(s) and then canonize the word. Building an inverse index using
|
||||
the cpe vendor:product format as value and the canonized word as key. Then cpe guesser creates a ranked set with the most common
|
||||
cpe (vendor:product) per version to give a probability of the CPE appearance.
|
||||
|
||||
### Redis structure
|
||||
|
||||
- `w:<word>` set
|
||||
- `s:<word>` sorted set with a score depending of the number of appearance
|
75
bin/import.py
Normal file
75
bin/import.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
|
||||
import xml.sax
|
||||
import redis
|
||||
rdb = redis.Redis(host='127.0.0.1', port=6379, db=8)
|
||||
|
||||
class CPEHandler( xml.sax.ContentHandler ):
|
||||
def __init__(self):
|
||||
self.cpe = ""
|
||||
self.title = ""
|
||||
self.title_seen = False
|
||||
self.cpe = ""
|
||||
self.record = {}
|
||||
self.refs = []
|
||||
|
||||
def startElement(self, tag, attributes):
|
||||
self.CurrentData = tag
|
||||
if tag == 'cpe-23:cpe23-item':
|
||||
self.record['cpe-23'] = attributes['name']
|
||||
if tag == 'title':
|
||||
self.title_seen = True
|
||||
if tag == 'reference':
|
||||
self.refs.append(attributes['href'])
|
||||
|
||||
def characters(self, data):
|
||||
if self.title_seen:
|
||||
self.title = self.title + data
|
||||
|
||||
def endElement(self, tag):
|
||||
if tag == 'title':
|
||||
self.record['title'] = self.title
|
||||
self.title = ""
|
||||
self.title_seen = False
|
||||
if tag == 'references':
|
||||
self.record['refs'] = self.refs
|
||||
self.refs = []
|
||||
if tag == 'cpe-item':
|
||||
to_insert = CPEExtractor(cpe=self.record['cpe-23'])
|
||||
for word in canonize(to_insert['vendor']):
|
||||
insert( word=word, cpe=to_insert['cpeline'] )
|
||||
for word in canonize(to_insert['product']):
|
||||
insert( word=word, cpe=to_insert['cpeline'] )
|
||||
self.record = {}
|
||||
|
||||
|
||||
def CPEExtractor( cpe=None ):
|
||||
if cpe is None:
|
||||
return False
|
||||
record = {}
|
||||
cpefield = cpe.split(":")
|
||||
record['vendor'] = cpefield[3]
|
||||
record['product'] = cpefield[4]
|
||||
cpeline = ""
|
||||
for cpeentry in cpefield[:5]:
|
||||
cpeline = cpeline + ":" + cpeentry
|
||||
record['cpeline'] = cpeline[1:]
|
||||
return record
|
||||
|
||||
def canonize( value=None ):
|
||||
value = value.lower()
|
||||
words = value.split('_')
|
||||
return words
|
||||
|
||||
def insert( word=None, cpe=None):
|
||||
if cpe is None or word is None:
|
||||
return False
|
||||
rdb.sadd('w:{}'.format(word), cpe)
|
||||
rdb.zadd('s:{}'.format(word), {cpe: 1}, incr=True)
|
||||
|
||||
cpe_path = '../data/official-cpe-dictionary_v2.3.xml'
|
||||
|
||||
parser = xml.sax.make_parser()
|
||||
|
||||
Handler = CPEHandler()
|
||||
parser.setContentHandler( Handler )
|
||||
parser.parse(cpe_path)
|
4
data/dump.sh
Normal file
4
data/dump.sh
Normal file
|
@ -0,0 +1,4 @@
|
|||
#!/bin/sh
|
||||
|
||||
wget https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz
|
||||
gzip -d official-cpe-dictionary_v2.3.xml.gz
|
Loading…
Reference in a new issue