mirror of
https://github.com/ail-project/ail-framework.git
synced 2024-11-10 08:38:28 +00:00
chg: [merge] merge update into advanced_crawler
This commit is contained in:
commit
d44acea04d
30 changed files with 126 additions and 102 deletions
28
OVERVIEW.md
28
OVERVIEW.md
|
@ -143,12 +143,30 @@ ARDB_DB
|
||||||
|
|
||||||
* DB 3 - Trending
|
* DB 3 - Trending
|
||||||
* DB 4 - Sentiment
|
* DB 4 - Sentiment
|
||||||
|
----------------------------------------- SENTIMENT ------------------------------------
|
||||||
|
|
||||||
|
SET - 'Provider_set' Provider
|
||||||
|
|
||||||
|
KEY - 'UniqID' INT
|
||||||
|
|
||||||
|
SET - provider_timestamp UniqID
|
||||||
|
|
||||||
|
SET - UniqID avg_score
|
||||||
|
|
||||||
* DB 5 - TermCred
|
* DB 5 - TermCred
|
||||||
* DB 6 - Tags
|
* DB 6 - Tags
|
||||||
* DB 7 - Metadata
|
----------------------------------------------------------------------------------------
|
||||||
* DB 8 - Statistics
|
|
||||||
|
SET - tag paste*
|
||||||
|
|
||||||
|
----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
* DB 7 - Metadata:
|
* DB 7 - Metadata:
|
||||||
|
----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
SET - 'tag:' + paste tag
|
||||||
|
|
||||||
|
----------------------------------------------------------------------------------------
|
||||||
----------------------------------------- BASE64 ----------------------------------------
|
----------------------------------------- BASE64 ----------------------------------------
|
||||||
|
|
||||||
HSET - 'metadata_hash:'+hash 'saved_path' saved_path
|
HSET - 'metadata_hash:'+hash 'saved_path' saved_path
|
||||||
|
@ -185,3 +203,9 @@ ARDB_DB
|
||||||
|
|
||||||
GET - 'base64_decoded:'+date nd_decoded
|
GET - 'base64_decoded:'+date nd_decoded
|
||||||
GET - 'binary_decoded:'+date nd_decoded
|
GET - 'binary_decoded:'+date nd_decoded
|
||||||
|
|
||||||
|
* DB 8 - Statistics
|
||||||
|
* DB 9 - Onion:
|
||||||
|
----------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -40,7 +40,7 @@ def search_api_key(message):
|
||||||
print('found google api key')
|
print('found google api key')
|
||||||
print(to_print)
|
print(to_print)
|
||||||
publisher.warning('{}Checked {} found Google API Key;{}'.format(
|
publisher.warning('{}Checked {} found Google API Key;{}'.format(
|
||||||
to_print, len(google_api_key), paste.p_path))
|
to_print, len(google_api_key), paste.p_rel_path))
|
||||||
msg = 'infoleak:automatic-detection="google-api-key";{}'.format(filename)
|
msg = 'infoleak:automatic-detection="google-api-key";{}'.format(filename)
|
||||||
p.populate_set_out(msg, 'Tags')
|
p.populate_set_out(msg, 'Tags')
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ def search_api_key(message):
|
||||||
print(to_print)
|
print(to_print)
|
||||||
total = len(aws_access_key) + len(aws_secret_key)
|
total = len(aws_access_key) + len(aws_secret_key)
|
||||||
publisher.warning('{}Checked {} found AWS Key;{}'.format(
|
publisher.warning('{}Checked {} found AWS Key;{}'.format(
|
||||||
to_print, total, paste.p_path))
|
to_print, total, paste.p_rel_path))
|
||||||
msg = 'infoleak:automatic-detection="aws-key";{}'.format(filename)
|
msg = 'infoleak:automatic-detection="aws-key";{}'.format(filename)
|
||||||
p.populate_set_out(msg, 'Tags')
|
p.populate_set_out(msg, 'Tags')
|
||||||
|
|
||||||
|
|
|
@ -43,8 +43,8 @@ if __name__ == "__main__":
|
||||||
# FIXME why not all saving everything there.
|
# FIXME why not all saving everything there.
|
||||||
PST.save_all_attributes_redis()
|
PST.save_all_attributes_redis()
|
||||||
# FIXME Not used.
|
# FIXME Not used.
|
||||||
PST.store.sadd("Pastes_Objects", PST.p_path)
|
PST.store.sadd("Pastes_Objects", PST.p_rel_path)
|
||||||
except IOError:
|
except IOError:
|
||||||
print("CRC Checksum Failed on :", PST.p_path)
|
print("CRC Checksum Failed on :", PST.p_rel_path)
|
||||||
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name))
|
PST.p_source, PST.p_date, PST.p_name))
|
||||||
|
|
|
@ -67,7 +67,7 @@ def check_all_iban(l_iban, paste, filename):
|
||||||
if(nb_valid_iban > 0):
|
if(nb_valid_iban > 0):
|
||||||
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
|
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
|
||||||
publisher.warning('{}Checked found {} IBAN;{}'.format(
|
publisher.warning('{}Checked found {} IBAN;{}'.format(
|
||||||
to_print, nb_valid_iban, paste.p_path))
|
to_print, nb_valid_iban, paste.p_rel_path))
|
||||||
msg = 'infoleak:automatic-detection="iban";{}'.format(filename)
|
msg = 'infoleak:automatic-detection="iban";{}'.format(filename)
|
||||||
p.populate_set_out(msg, 'Tags')
|
p.populate_set_out(msg, 'Tags')
|
||||||
|
|
||||||
|
@ -113,7 +113,7 @@ if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
l_iban = iban_regex.findall(content)
|
l_iban = iban_regex.findall(content)
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
print ("{0} processing timeout".format(paste.p_path))
|
print ("{0} processing timeout".format(paste.p_rel_path))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
signal.alarm(0)
|
signal.alarm(0)
|
||||||
|
|
|
@ -73,7 +73,7 @@ def search_key(content, message, paste):
|
||||||
to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date,
|
to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date,
|
||||||
paste.p_name)
|
paste.p_name)
|
||||||
publisher.warning('{}Detected {} Bitcoin private key;{}'.format(
|
publisher.warning('{}Detected {} Bitcoin private key;{}'.format(
|
||||||
to_print, len(bitcoin_private_key),paste.p_path))
|
to_print, len(bitcoin_private_key),paste.p_rel_path))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
publisher.port = 6380
|
publisher.port = 6380
|
||||||
|
|
10
bin/Categ.py
10
bin/Categ.py
|
@ -89,16 +89,10 @@ if __name__ == "__main__":
|
||||||
paste = Paste.Paste(filename)
|
paste = Paste.Paste(filename)
|
||||||
content = paste.get_p_content()
|
content = paste.get_p_content()
|
||||||
|
|
||||||
#print('-----------------------------------------------------')
|
|
||||||
#print(filename)
|
|
||||||
#print(content)
|
|
||||||
#print('-----------------------------------------------------')
|
|
||||||
|
|
||||||
for categ, pattern in tmp_dict.items():
|
for categ, pattern in tmp_dict.items():
|
||||||
found = set(re.findall(pattern, content))
|
found = set(re.findall(pattern, content))
|
||||||
if len(found) >= matchingThreshold:
|
if len(found) >= matchingThreshold:
|
||||||
msg = '{} {}'.format(paste.p_path, len(found))
|
msg = '{} {}'.format(paste.p_rel_path, len(found))
|
||||||
#msg = " ".join( [paste.p_path, bytes(len(found))] )
|
|
||||||
|
|
||||||
print(msg, categ)
|
print(msg, categ)
|
||||||
p.populate_set_out(msg, categ)
|
p.populate_set_out(msg, categ)
|
||||||
|
@ -106,4 +100,4 @@ if __name__ == "__main__":
|
||||||
publisher.info(
|
publisher.info(
|
||||||
'Categ;{};{};{};Detected {} as {};{}'.format(
|
'Categ;{};{};{};Detected {} as {};{}'.format(
|
||||||
paste.p_source, paste.p_date, paste.p_name,
|
paste.p_source, paste.p_date, paste.p_name,
|
||||||
len(found), categ, paste.p_path))
|
len(found), categ, paste.p_rel_path))
|
||||||
|
|
|
@ -97,7 +97,7 @@ if __name__ == "__main__":
|
||||||
if sites_set:
|
if sites_set:
|
||||||
message += ' Related websites: {}'.format( (', '.join(sites_set)) )
|
message += ' Related websites: {}'.format( (', '.join(sites_set)) )
|
||||||
|
|
||||||
to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path)
|
to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_rel_path)
|
||||||
|
|
||||||
print('\n '.join(creds))
|
print('\n '.join(creds))
|
||||||
|
|
||||||
|
|
|
@ -77,16 +77,16 @@ if __name__ == "__main__":
|
||||||
paste.p_source, paste.p_date, paste.p_name)
|
paste.p_source, paste.p_date, paste.p_name)
|
||||||
if (len(creditcard_set) > 0):
|
if (len(creditcard_set) > 0):
|
||||||
publisher.warning('{}Checked {} valid number(s);{}'.format(
|
publisher.warning('{}Checked {} valid number(s);{}'.format(
|
||||||
to_print, len(creditcard_set), paste.p_path))
|
to_print, len(creditcard_set), paste.p_rel_path))
|
||||||
print('{}Checked {} valid number(s);{}'.format(
|
print('{}Checked {} valid number(s);{}'.format(
|
||||||
to_print, len(creditcard_set), paste.p_path))
|
to_print, len(creditcard_set), paste.p_rel_path))
|
||||||
#Send to duplicate
|
#Send to duplicate
|
||||||
p.populate_set_out(filename, 'Duplicate')
|
p.populate_set_out(filename, 'Duplicate')
|
||||||
|
|
||||||
msg = 'infoleak:automatic-detection="credit-card";{}'.format(filename)
|
msg = 'infoleak:automatic-detection="credit-card";{}'.format(filename)
|
||||||
p.populate_set_out(msg, 'Tags')
|
p.populate_set_out(msg, 'Tags')
|
||||||
else:
|
else:
|
||||||
publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path))
|
publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_rel_path))
|
||||||
else:
|
else:
|
||||||
publisher.debug("Script creditcard is idling 1m")
|
publisher.debug("Script creditcard is idling 1m")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
|
@ -226,7 +226,7 @@ if __name__ == '__main__':
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
encoded_list = []
|
encoded_list = []
|
||||||
p.incr_module_timeout_statistic() # add encoder type
|
p.incr_module_timeout_statistic() # add encoder type
|
||||||
print ("{0} processing timeout".format(paste.p_path))
|
print ("{0} processing timeout".format(paste.p_rel_path))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
signal.alarm(0)
|
signal.alarm(0)
|
||||||
|
|
|
@ -54,14 +54,14 @@ def main():
|
||||||
if localizeddomains:
|
if localizeddomains:
|
||||||
print(localizeddomains)
|
print(localizeddomains)
|
||||||
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
|
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_rel_path))
|
||||||
localizeddomains = c.localizedomain(cc=cc)
|
localizeddomains = c.localizedomain(cc=cc)
|
||||||
if localizeddomains:
|
if localizeddomains:
|
||||||
print(localizeddomains)
|
print(localizeddomains)
|
||||||
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
|
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_rel_path))
|
||||||
except IOError:
|
except IOError:
|
||||||
print("CRC Checksum Failed on :", PST.p_path)
|
print("CRC Checksum Failed on :", PST.p_rel_path)
|
||||||
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name))
|
PST.p_source, PST.p_date, PST.p_name))
|
||||||
|
|
||||||
|
|
|
@ -142,17 +142,17 @@ if __name__ == "__main__":
|
||||||
paste_date = paste_date
|
paste_date = paste_date
|
||||||
paste_date = paste_date if paste_date != None else "No date available"
|
paste_date = paste_date if paste_date != None else "No date available"
|
||||||
if paste_path != None:
|
if paste_path != None:
|
||||||
if paste_path != PST.p_path:
|
if paste_path != PST.p_rel_path:
|
||||||
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
|
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
|
||||||
|
|
||||||
print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent))
|
print('['+hash_type+'] '+'comparing: ' + str(PST.p_rel_path) + ' and ' + str(paste_path) + ' percentage: ' + str(percent))
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash)
|
print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash)
|
||||||
|
|
||||||
# Add paste in DB after checking to prevent its analysis twice
|
# Add paste in DB after checking to prevent its analysis twice
|
||||||
# hash_type_i -> index_i AND index_i -> PST.PATH
|
# hash_type_i -> index_i AND index_i -> PST.PATH
|
||||||
r_serv1.set(index, PST.p_path)
|
r_serv1.set(index, PST.p_rel_path)
|
||||||
r_serv1.set(index+'_date', PST._get_p_date())
|
r_serv1.set(index+'_date', PST._get_p_date())
|
||||||
r_serv1.sadd("INDEX", index)
|
r_serv1.sadd("INDEX", index)
|
||||||
# Adding hashes in Redis
|
# Adding hashes in Redis
|
||||||
|
@ -180,7 +180,7 @@ if __name__ == "__main__":
|
||||||
PST.__setattr__("p_duplicate", dupl)
|
PST.__setattr__("p_duplicate", dupl)
|
||||||
PST.save_attribute_duplicate(dupl)
|
PST.save_attribute_duplicate(dupl)
|
||||||
PST.save_others_pastes_attribute_duplicate(dupl)
|
PST.save_others_pastes_attribute_duplicate(dupl)
|
||||||
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
|
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_rel_path))
|
||||||
print('{}Detected {}'.format(to_print, len(dupl)))
|
print('{}Detected {}'.format(to_print, len(dupl)))
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
|
@ -191,5 +191,5 @@ if __name__ == "__main__":
|
||||||
except IOError:
|
except IOError:
|
||||||
to_print = 'Duplicate;{};{};{};'.format(
|
to_print = 'Duplicate;{};{};{};'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name)
|
PST.p_source, PST.p_date, PST.p_name)
|
||||||
print("CRC Checksum Failed on :", PST.p_path)
|
print("CRC Checksum Failed on :", PST.p_rel_path)
|
||||||
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
publisher.error('{}CRC Checksum Failed'.format(to_print))
|
||||||
|
|
|
@ -45,6 +45,8 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
p = Process(config_section)
|
p = Process(config_section)
|
||||||
|
|
||||||
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
|
||||||
|
|
||||||
# LOGGING #
|
# LOGGING #
|
||||||
publisher.info("Feed Script started to receive & publish.")
|
publisher.info("Feed Script started to receive & publish.")
|
||||||
|
|
||||||
|
@ -78,8 +80,7 @@ if __name__ == '__main__':
|
||||||
paste = rreplace(paste, file_name_paste, new_file_name_paste, 1)
|
paste = rreplace(paste, file_name_paste, new_file_name_paste, 1)
|
||||||
|
|
||||||
# Creating the full filepath
|
# Creating the full filepath
|
||||||
filename = os.path.join(os.environ['AIL_HOME'],
|
filename = os.path.join(PASTES_FOLDER, paste)
|
||||||
p.config.get("Directories", "pastes"), paste)
|
|
||||||
|
|
||||||
dirname = os.path.dirname(filename)
|
dirname = os.path.dirname(filename)
|
||||||
if not os.path.exists(dirname):
|
if not os.path.exists(dirname):
|
||||||
|
@ -102,6 +103,7 @@ if __name__ == '__main__':
|
||||||
print(filename)
|
print(filename)
|
||||||
print(type)
|
print(type)
|
||||||
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
|
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
|
||||||
'''
|
'''
|
||||||
p.populate_set_out(filename)
|
|
||||||
|
p.populate_set_out(paste)
|
||||||
processed_paste+=1
|
processed_paste+=1
|
||||||
|
|
|
@ -47,7 +47,7 @@ def analyse(url, path):
|
||||||
paste = Paste.Paste(path)
|
paste = Paste.Paste(path)
|
||||||
print("Detected (libinjection) SQL in URL: ")
|
print("Detected (libinjection) SQL in URL: ")
|
||||||
print(urllib.request.unquote(url))
|
print(urllib.request.unquote(url))
|
||||||
to_print = 'LibInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path)
|
to_print = 'LibInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path)
|
||||||
publisher.warning(to_print)
|
publisher.warning(to_print)
|
||||||
#Send to duplicate
|
#Send to duplicate
|
||||||
p.populate_set_out(path, 'Duplicate')
|
p.populate_set_out(path, 'Duplicate')
|
||||||
|
|
|
@ -75,10 +75,11 @@ if __name__ == '__main__':
|
||||||
PST.save_attribute_redis("p_max_length_line", lines_infos[1])
|
PST.save_attribute_redis("p_max_length_line", lines_infos[1])
|
||||||
|
|
||||||
# FIXME Not used.
|
# FIXME Not used.
|
||||||
PST.store.sadd("Pastes_Objects", PST.p_path)
|
PST.store.sadd("Pastes_Objects", PST.p_rel_path)
|
||||||
|
print(PST.p_rel_path)
|
||||||
if lines_infos[1] < args.max:
|
if lines_infos[1] < args.max:
|
||||||
p.populate_set_out( PST.p_path , 'LinesShort')
|
p.populate_set_out( PST.p_rel_path , 'LinesShort')
|
||||||
else:
|
else:
|
||||||
p.populate_set_out( PST.p_path , 'LinesLong')
|
p.populate_set_out( PST.p_rel_path , 'LinesLong')
|
||||||
except IOError:
|
except IOError:
|
||||||
print("CRC Checksum Error on : ", PST.p_path)
|
print("CRC Checksum Error on : ", PST.p_rel_path)
|
||||||
|
|
|
@ -78,7 +78,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\
|
to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\
|
||||||
format(PST.p_source, PST.p_date, PST.p_name,
|
format(PST.p_source, PST.p_date, PST.p_name,
|
||||||
MX_values[0], PST.p_path)
|
MX_values[0], PST.p_rel_path)
|
||||||
if MX_values[0] > is_critical:
|
if MX_values[0] > is_critical:
|
||||||
publisher.warning(to_print)
|
publisher.warning(to_print)
|
||||||
#Send to duplicate
|
#Send to duplicate
|
||||||
|
|
|
@ -82,6 +82,8 @@ if __name__ == '__main__':
|
||||||
ttl_key = cfg.getint("Module_Mixer", "ttl_duplicate")
|
ttl_key = cfg.getint("Module_Mixer", "ttl_duplicate")
|
||||||
default_unnamed_feed_name = cfg.get("Module_Mixer", "default_unnamed_feed_name")
|
default_unnamed_feed_name = cfg.get("Module_Mixer", "default_unnamed_feed_name")
|
||||||
|
|
||||||
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + '/'
|
||||||
|
|
||||||
# STATS #
|
# STATS #
|
||||||
processed_paste = 0
|
processed_paste = 0
|
||||||
processed_paste_per_feeder = {}
|
processed_paste_per_feeder = {}
|
||||||
|
@ -104,12 +106,14 @@ if __name__ == '__main__':
|
||||||
feeder_name.replace(" ","")
|
feeder_name.replace(" ","")
|
||||||
if 'import_dir' in feeder_name:
|
if 'import_dir' in feeder_name:
|
||||||
feeder_name = feeder_name.split('/')[1]
|
feeder_name = feeder_name.split('/')[1]
|
||||||
paste_name = complete_paste
|
|
||||||
|
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
feeder_name = default_unnamed_feed_name
|
feeder_name = default_unnamed_feed_name
|
||||||
paste_name = complete_paste
|
paste_name = complete_paste
|
||||||
|
|
||||||
|
# remove absolute path
|
||||||
|
paste_name = paste_name.replace(PASTES_FOLDER, '', 1)
|
||||||
|
|
||||||
# Processed paste
|
# Processed paste
|
||||||
processed_paste += 1
|
processed_paste += 1
|
||||||
try:
|
try:
|
||||||
|
@ -119,6 +123,7 @@ if __name__ == '__main__':
|
||||||
processed_paste_per_feeder[feeder_name] = 1
|
processed_paste_per_feeder[feeder_name] = 1
|
||||||
duplicated_paste_per_feeder[feeder_name] = 0
|
duplicated_paste_per_feeder[feeder_name] = 0
|
||||||
|
|
||||||
|
|
||||||
relay_message = "{0} {1}".format(paste_name, gzip64encoded)
|
relay_message = "{0} {1}".format(paste_name, gzip64encoded)
|
||||||
#relay_message = b" ".join( [paste_name, gzip64encoded] )
|
#relay_message = b" ".join( [paste_name, gzip64encoded] )
|
||||||
|
|
||||||
|
|
16
bin/Onion.py
16
bin/Onion.py
|
@ -167,7 +167,7 @@ if __name__ == "__main__":
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
encoded_list = []
|
encoded_list = []
|
||||||
p.incr_module_timeout_statistic()
|
p.incr_module_timeout_statistic()
|
||||||
print ("{0} processing timeout".format(PST.p_path))
|
print ("{0} processing timeout".format(PST.p_rel_path))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
signal.alarm(0)
|
signal.alarm(0)
|
||||||
|
@ -185,7 +185,7 @@ if __name__ == "__main__":
|
||||||
r_onion.sadd('i2p_domain', domain)
|
r_onion.sadd('i2p_domain', domain)
|
||||||
r_onion.sadd('i2p_link', url)
|
r_onion.sadd('i2p_link', url)
|
||||||
r_onion.sadd('i2p_domain_crawler_queue', domain)
|
r_onion.sadd('i2p_domain_crawler_queue', domain)
|
||||||
msg = '{};{}'.format(url,PST.p_path)
|
msg = '{};{}'.format(url,PST.p_rel_path)
|
||||||
r_onion.sadd('i2p_crawler_queue', msg)
|
r_onion.sadd('i2p_crawler_queue', msg)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
@ -200,10 +200,10 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
if not activate_crawler:
|
if not activate_crawler:
|
||||||
publisher.warning('{}Detected {} .onion(s);{}'.format(
|
publisher.warning('{}Detected {} .onion(s);{}'.format(
|
||||||
to_print, len(domains_list),PST.p_path))
|
to_print, len(domains_list),PST.p_rel_path))
|
||||||
else:
|
else:
|
||||||
publisher.info('{}Detected {} .onion(s);{}'.format(
|
publisher.info('{}Detected {} .onion(s);{}'.format(
|
||||||
to_print, len(domains_list),PST.p_path))
|
to_print, len(domains_list),PST.p_rel_path))
|
||||||
now = datetime.datetime.now()
|
now = datetime.datetime.now()
|
||||||
path = os.path.join('onions', str(now.year).zfill(4),
|
path = os.path.join('onions', str(now.year).zfill(4),
|
||||||
str(now.month).zfill(2),
|
str(now.month).zfill(2),
|
||||||
|
@ -232,7 +232,7 @@ if __name__ == "__main__":
|
||||||
if not r_onion.sismember('onion_domain_crawler_queue', domain):
|
if not r_onion.sismember('onion_domain_crawler_queue', domain):
|
||||||
print('send to onion crawler')
|
print('send to onion crawler')
|
||||||
r_onion.sadd('onion_domain_crawler_queue', domain)
|
r_onion.sadd('onion_domain_crawler_queue', domain)
|
||||||
msg = '{};{}'.format(url,PST.p_path)
|
msg = '{};{}'.format(url,PST.p_rel_path)
|
||||||
if not r_onion.hexists('onion_metadata:{}'.format(domain), 'first_seen'):
|
if not r_onion.hexists('onion_metadata:{}'.format(domain), 'first_seen'):
|
||||||
r_onion.sadd('onion_crawler_priority_queue', msg)
|
r_onion.sadd('onion_crawler_priority_queue', msg)
|
||||||
print('send to priority queue')
|
print('send to priority queue')
|
||||||
|
@ -242,13 +242,13 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
else:
|
else:
|
||||||
for url in fetch(p, r_cache, urls, domains_list, path):
|
for url in fetch(p, r_cache, urls, domains_list, path):
|
||||||
publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_path))
|
publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_rel_path))
|
||||||
|
|
||||||
# TAG Item
|
# TAG Item
|
||||||
msg = 'infoleak:automatic-detection="onion";{}'.format(PST.p_path)
|
msg = 'infoleak:automatic-detection="onion";{}'.format(PST.p_rel_path)
|
||||||
p.populate_set_out(msg, 'Tags')
|
p.populate_set_out(msg, 'Tags')
|
||||||
else:
|
else:
|
||||||
publisher.info('{}Onion related;{}'.format(to_print, PST.p_path))
|
publisher.info('{}Onion related;{}'.format(to_print, PST.p_rel_path))
|
||||||
|
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -108,7 +108,7 @@ if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
matched = compiled_regex.search(content)
|
matched = compiled_regex.search(content)
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
print ("{0} processing timeout".format(paste.p_path))
|
print ("{0} processing timeout".format(paste.p_rel_path))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
signal.alarm(0)
|
signal.alarm(0)
|
||||||
|
|
|
@ -54,7 +54,7 @@ if __name__ == "__main__":
|
||||||
if len(releases) == 0:
|
if len(releases) == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path)
|
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_rel_path)
|
||||||
print(to_print)
|
print(to_print)
|
||||||
if len(releases) > 30:
|
if len(releases) > 30:
|
||||||
publisher.warning(to_print)
|
publisher.warning(to_print)
|
||||||
|
@ -63,7 +63,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
p.incr_module_timeout_statistic()
|
p.incr_module_timeout_statistic()
|
||||||
print ("{0} processing timeout".format(paste.p_path))
|
print ("{0} processing timeout".format(paste.p_rel_path))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
signal.alarm(0)
|
signal.alarm(0)
|
||||||
|
|
|
@ -78,7 +78,7 @@ def analyse(url, path):
|
||||||
if (result_path > 1) or (result_query > 1):
|
if (result_path > 1) or (result_query > 1):
|
||||||
print("Detected SQL in URL: ")
|
print("Detected SQL in URL: ")
|
||||||
print(urllib.request.unquote(url))
|
print(urllib.request.unquote(url))
|
||||||
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path)
|
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path)
|
||||||
publisher.warning(to_print)
|
publisher.warning(to_print)
|
||||||
#Send to duplicate
|
#Send to duplicate
|
||||||
p.populate_set_out(path, 'Duplicate')
|
p.populate_set_out(path, 'Duplicate')
|
||||||
|
@ -95,7 +95,7 @@ def analyse(url, path):
|
||||||
else:
|
else:
|
||||||
print("Potential SQL injection:")
|
print("Potential SQL injection:")
|
||||||
print(urllib.request.unquote(url))
|
print(urllib.request.unquote(url))
|
||||||
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path)
|
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_rel_path)
|
||||||
publisher.info(to_print)
|
publisher.info(to_print)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@ cfg = configparser.ConfigParser()
|
||||||
cfg.read(configfile)
|
cfg.read(configfile)
|
||||||
|
|
||||||
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
|
sentiment_lexicon_file = cfg.get("Directories", "sentiment_lexicon_file")
|
||||||
|
#time_clean_sentiment_db = 60*60
|
||||||
|
|
||||||
def Analyse(message, server):
|
def Analyse(message, server):
|
||||||
path = message
|
path = message
|
||||||
|
@ -157,9 +158,16 @@ if __name__ == '__main__':
|
||||||
db=p.config.get("ARDB_Sentiment", "db"),
|
db=p.config.get("ARDB_Sentiment", "db"),
|
||||||
decode_responses=True)
|
decode_responses=True)
|
||||||
|
|
||||||
|
time1 = time.time()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
message = p.get_from_set()
|
message = p.get_from_set()
|
||||||
if message is None:
|
if message is None:
|
||||||
|
#if int(time.time() - time1) > time_clean_sentiment_db:
|
||||||
|
# clean_db()
|
||||||
|
# time1 = time.time()
|
||||||
|
# continue
|
||||||
|
#else:
|
||||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -57,11 +57,11 @@ if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
for word, score in paste._get_top_words().items():
|
for word, score in paste._get_top_words().items():
|
||||||
if len(word) >= 4:
|
if len(word) >= 4:
|
||||||
msg = '{} {} {}'.format(paste.p_path, word, score)
|
msg = '{} {} {}'.format(paste.p_rel_path, word, score)
|
||||||
p.populate_set_out(msg)
|
p.populate_set_out(msg)
|
||||||
except TimeoutException:
|
except TimeoutException:
|
||||||
p.incr_module_timeout_statistic()
|
p.incr_module_timeout_statistic()
|
||||||
print ("{0} processing timeout".format(paste.p_path))
|
print ("{0} processing timeout".format(paste.p_rel_path))
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
signal.alarm(0)
|
signal.alarm(0)
|
||||||
|
|
|
@ -153,7 +153,7 @@ if __name__ == "__main__":
|
||||||
|
|
||||||
pprint.pprint(A_values)
|
pprint.pprint(A_values)
|
||||||
publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
|
publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
|
||||||
PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path))
|
PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))
|
||||||
prec_filename = filename
|
prec_filename = filename
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -158,11 +158,7 @@ class HiddenServices(object):
|
||||||
if father is None:
|
if father is None:
|
||||||
return []
|
return []
|
||||||
l_crawled_pastes = []
|
l_crawled_pastes = []
|
||||||
paste_parent = father.replace(self.paste_directory+'/', '')
|
paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(father))
|
||||||
paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste_parent))
|
|
||||||
## TODO: # FIXME: remove me
|
|
||||||
paste_children = self.r_serv_metadata.smembers('paste_children:{}'.format(father))
|
|
||||||
paste_childrens = paste_childrens | paste_children
|
|
||||||
for children in paste_childrens:
|
for children in paste_childrens:
|
||||||
if self.domain in children:
|
if self.domain in children:
|
||||||
l_crawled_pastes.append(children)
|
l_crawled_pastes.append(children)
|
||||||
|
@ -198,14 +194,9 @@ class HiddenServices(object):
|
||||||
|
|
||||||
set_domain = set()
|
set_domain = set()
|
||||||
for paste in l_paste:
|
for paste in l_paste:
|
||||||
paste_full = paste.replace(self.paste_directory+'/', '')
|
paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste))
|
||||||
paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste_full))
|
|
||||||
## TODO: # FIXME: remove me
|
|
||||||
paste_children = self.r_serv_metadata.smembers('paste_children:{}'.format(paste))
|
|
||||||
paste_childrens = paste_childrens | paste_children
|
|
||||||
for children in paste_childrens:
|
for children in paste_childrens:
|
||||||
if not self.domain in children:
|
if not self.domain in children:
|
||||||
print(children)
|
|
||||||
set_domain.add((children.split('.onion')[0]+'.onion').split('/')[-1])
|
set_domain.add((children.split('.onion')[0]+'.onion').split('/')[-1])
|
||||||
|
|
||||||
return set_domain
|
return set_domain
|
||||||
|
@ -215,11 +206,7 @@ class HiddenServices(object):
|
||||||
if father is None:
|
if father is None:
|
||||||
return []
|
return []
|
||||||
l_crawled_pastes = []
|
l_crawled_pastes = []
|
||||||
paste_parent = father.replace(self.paste_directory+'/', '')
|
paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(father))
|
||||||
paste_childrens = self.r_serv_metadata.smembers('paste_children:{}'.format(paste_parent))
|
|
||||||
## TODO: # FIXME: remove me
|
|
||||||
paste_children = self.r_serv_metadata.smembers('paste_children:{}'.format(father))
|
|
||||||
paste_childrens = paste_childrens | paste_children
|
|
||||||
for children in paste_childrens:
|
for children in paste_childrens:
|
||||||
if not self.domain in children:
|
if not self.domain in children:
|
||||||
l_crawled_pastes.append(children)
|
l_crawled_pastes.append(children)
|
||||||
|
|
|
@ -82,14 +82,14 @@ class Paste(object):
|
||||||
db=cfg.getint("ARDB_Metadata", "db"),
|
db=cfg.getint("ARDB_Metadata", "db"),
|
||||||
decode_responses=True)
|
decode_responses=True)
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes"))
|
self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes"))
|
||||||
if PASTES_FOLDER not in p_path:
|
if self.PASTES_FOLDER not in p_path:
|
||||||
self.p_rel_path = p_path
|
self.p_rel_path = p_path
|
||||||
p_path = os.path.join(PASTES_FOLDER, p_path)
|
self.p_path = os.path.join(self.PASTES_FOLDER, p_path)
|
||||||
else:
|
else:
|
||||||
self.p_rel_path = None
|
self.p_path = p_path
|
||||||
|
self.p_rel_path = p_path.replace(self.PASTES_FOLDER+'/', '', 1)
|
||||||
|
|
||||||
self.p_path = p_path
|
|
||||||
self.p_name = os.path.basename(self.p_path)
|
self.p_name = os.path.basename(self.p_path)
|
||||||
self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2)
|
self.p_size = round(os.path.getsize(self.p_path)/1024.0, 2)
|
||||||
self.p_mime = magic.from_buffer("test", mime=True)
|
self.p_mime = magic.from_buffer("test", mime=True)
|
||||||
|
@ -101,7 +101,7 @@ class Paste(object):
|
||||||
|
|
||||||
var = self.p_path.split('/')
|
var = self.p_path.split('/')
|
||||||
self.p_date = Date(var[-4], var[-3], var[-2])
|
self.p_date = Date(var[-4], var[-3], var[-2])
|
||||||
self.p_rel_path = os.path.join(var[-4], var[-3], var[-2], self.p_name)
|
self.p_date_path = os.path.join(var[-4], var[-3], var[-2], self.p_name)
|
||||||
self.p_source = var[-5]
|
self.p_source = var[-5]
|
||||||
self.supposed_url = 'https://{}/{}'.format(self.p_source.replace('_pro', ''), var[-1].split('.gz')[0])
|
self.supposed_url = 'https://{}/{}'.format(self.p_source.replace('_pro', ''), var[-1].split('.gz')[0])
|
||||||
|
|
||||||
|
@ -296,9 +296,13 @@ class Paste(object):
|
||||||
return False, var
|
return False, var
|
||||||
|
|
||||||
def _get_p_duplicate(self):
|
def _get_p_duplicate(self):
|
||||||
self.p_duplicate = self.store_metadata.smembers('dup:'+self.p_path)
|
p_duplicate = self.store_metadata.smembers('dup:'+self.p_path)
|
||||||
if self.p_rel_path is not None:
|
# remove absolute path #fix-db
|
||||||
self.p_duplicate.union( self.store_metadata.smembers('dup:'+self.p_rel_path) )
|
if p_duplicate:
|
||||||
|
for duplicate_string in p_duplicate:
|
||||||
|
self.store_metadata.srem('dup:'+self.p_path, duplicate_string)
|
||||||
|
self.store_metadata.sadd('dup:'+self.p_rel_path, duplicate_string.replace(self.PASTES_FOLDER+'/', '', 1))
|
||||||
|
self.p_duplicate = self.store_metadata.smembers('dup:'+self.p_rel_path)
|
||||||
if self.p_duplicate is not None:
|
if self.p_duplicate is not None:
|
||||||
return list(self.p_duplicate)
|
return list(self.p_duplicate)
|
||||||
else:
|
else:
|
||||||
|
@ -318,6 +322,9 @@ class Paste(object):
|
||||||
def get_p_rel_path(self):
|
def get_p_rel_path(self):
|
||||||
return self.p_rel_path
|
return self.p_rel_path
|
||||||
|
|
||||||
|
def get_p_date_path(self):
|
||||||
|
return self.p_date_path
|
||||||
|
|
||||||
def save_all_attributes_redis(self, key=None):
|
def save_all_attributes_redis(self, key=None):
|
||||||
"""
|
"""
|
||||||
Saving all the attributes in a "Redis-like" Database (Redis, LevelDB)
|
Saving all the attributes in a "Redis-like" Database (Redis, LevelDB)
|
||||||
|
|
|
@ -162,8 +162,7 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
|
||||||
|
|
||||||
UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted')
|
UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted')
|
||||||
|
|
||||||
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes"))
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
|
||||||
PASTES_FOLDERS = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "pastes")) + '/'
|
|
||||||
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot"))
|
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot"))
|
||||||
|
|
||||||
max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs"))
|
max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs"))
|
||||||
|
|
|
@ -25,7 +25,7 @@ baseUrl = Flask_config.baseUrl
|
||||||
r_serv_metadata = Flask_config.r_serv_metadata
|
r_serv_metadata = Flask_config.r_serv_metadata
|
||||||
vt_enabled = Flask_config.vt_enabled
|
vt_enabled = Flask_config.vt_enabled
|
||||||
vt_auth = Flask_config.vt_auth
|
vt_auth = Flask_config.vt_auth
|
||||||
PASTES_FOLDER = Flask_config.PASTES_FOLDERS
|
PASTES_FOLDER = Flask_config.PASTES_FOLDER
|
||||||
|
|
||||||
hashDecoded = Blueprint('hashDecoded', __name__, template_folder='templates')
|
hashDecoded = Blueprint('hashDecoded', __name__, template_folder='templates')
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,6 @@ r_cache = Flask_config.r_cache
|
||||||
r_serv_onion = Flask_config.r_serv_onion
|
r_serv_onion = Flask_config.r_serv_onion
|
||||||
r_serv_metadata = Flask_config.r_serv_metadata
|
r_serv_metadata = Flask_config.r_serv_metadata
|
||||||
bootstrap_label = Flask_config.bootstrap_label
|
bootstrap_label = Flask_config.bootstrap_label
|
||||||
PASTES_FOLDER = Flask_config.PASTES_FOLDER
|
|
||||||
|
|
||||||
hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
|
hiddenServices = Blueprint('hiddenServices', __name__, template_folder='templates')
|
||||||
|
|
||||||
|
@ -579,16 +578,14 @@ def show_domain():
|
||||||
origin_paste_name = h.get_origin_paste_name()
|
origin_paste_name = h.get_origin_paste_name()
|
||||||
origin_paste_tags = unpack_paste_tags(r_serv_metadata.smembers('tag:{}'.format(origin_paste)))
|
origin_paste_tags = unpack_paste_tags(r_serv_metadata.smembers('tag:{}'.format(origin_paste)))
|
||||||
paste_tags = []
|
paste_tags = []
|
||||||
path_name = []
|
|
||||||
for path in l_pastes:
|
for path in l_pastes:
|
||||||
path_name.append(path.replace(PASTES_FOLDER+'/', ''))
|
|
||||||
p_tags = r_serv_metadata.smembers('tag:'+path)
|
p_tags = r_serv_metadata.smembers('tag:'+path)
|
||||||
paste_tags.append(unpack_paste_tags(p_tags))
|
paste_tags.append(unpack_paste_tags(p_tags))
|
||||||
|
|
||||||
return render_template("showDomain.html", domain=domain, last_check=last_check, first_seen=first_seen,
|
return render_template("showDomain.html", domain=domain, last_check=last_check, first_seen=first_seen,
|
||||||
l_pastes=l_pastes, paste_tags=paste_tags, bootstrap_label=bootstrap_label,
|
l_pastes=l_pastes, paste_tags=paste_tags, bootstrap_label=bootstrap_label,
|
||||||
dict_links=dict_links,
|
dict_links=dict_links,
|
||||||
path_name=path_name, origin_paste_tags=origin_paste_tags, status=status,
|
origin_paste_tags=origin_paste_tags, status=status,
|
||||||
origin_paste=origin_paste, origin_paste_name=origin_paste_name,
|
origin_paste=origin_paste, origin_paste_name=origin_paste_name,
|
||||||
domain_tags=domain_tags, screenshot=screenshot)
|
domain_tags=domain_tags, screenshot=screenshot)
|
||||||
|
|
||||||
|
@ -599,7 +596,6 @@ def onion_son():
|
||||||
h = HiddenServices(onion_domain, 'onion')
|
h = HiddenServices(onion_domain, 'onion')
|
||||||
l_pastes = h.get_last_crawled_pastes()
|
l_pastes = h.get_last_crawled_pastes()
|
||||||
l_son = h.get_domain_son(l_pastes)
|
l_son = h.get_domain_son(l_pastes)
|
||||||
print(l_son)
|
|
||||||
return 'l_son'
|
return 'l_son'
|
||||||
|
|
||||||
# ============= JSON ==============
|
# ============= JSON ==============
|
||||||
|
|
|
@ -29,7 +29,7 @@ r_serv_metadata = Flask_config.r_serv_metadata
|
||||||
max_preview_char = Flask_config.max_preview_char
|
max_preview_char = Flask_config.max_preview_char
|
||||||
max_preview_modal = Flask_config.max_preview_modal
|
max_preview_modal = Flask_config.max_preview_modal
|
||||||
bootstrap_label = Flask_config.bootstrap_label
|
bootstrap_label = Flask_config.bootstrap_label
|
||||||
|
PASTES_FOLDER = Flask_config.PASTES_FOLDER
|
||||||
|
|
||||||
baseindexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path"))
|
baseindexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path"))
|
||||||
indexRegister_path = os.path.join(os.environ['AIL_HOME'],
|
indexRegister_path = os.path.join(os.environ['AIL_HOME'],
|
||||||
|
@ -133,8 +133,8 @@ def search():
|
||||||
query = QueryParser("content", ix.schema).parse("".join(q))
|
query = QueryParser("content", ix.schema).parse("".join(q))
|
||||||
results = searcher.search_page(query, 1, pagelen=num_elem_to_get)
|
results = searcher.search_page(query, 1, pagelen=num_elem_to_get)
|
||||||
for x in results:
|
for x in results:
|
||||||
r.append(x.items()[0][1])
|
r.append(x.items()[0][1].replace(PASTES_FOLDER, '', 1))
|
||||||
path = x.items()[0][1]
|
path = x.items()[0][1].replace(PASTES_FOLDER, '', 1)
|
||||||
paste = Paste.Paste(path)
|
paste = Paste.Paste(path)
|
||||||
content = paste.get_p_content()
|
content = paste.get_p_content()
|
||||||
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
|
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
|
||||||
|
@ -208,6 +208,7 @@ def get_more_search_result():
|
||||||
results = searcher.search_page(query, page_offset, num_elem_to_get)
|
results = searcher.search_page(query, page_offset, num_elem_to_get)
|
||||||
for x in results:
|
for x in results:
|
||||||
path = x.items()[0][1]
|
path = x.items()[0][1]
|
||||||
|
path = path.replace(PASTES_FOLDER, '', 1)
|
||||||
path_array.append(path)
|
path_array.append(path)
|
||||||
paste = Paste.Paste(path)
|
paste = Paste.Paste(path)
|
||||||
content = paste.get_p_content()
|
content = paste.get_p_content()
|
||||||
|
|
|
@ -41,14 +41,15 @@ showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templa
|
||||||
# ============ FUNCTIONS ============
|
# ============ FUNCTIONS ============
|
||||||
|
|
||||||
def showpaste(content_range, requested_path):
|
def showpaste(content_range, requested_path):
|
||||||
relative_path = None
|
|
||||||
if PASTES_FOLDER not in requested_path:
|
if PASTES_FOLDER not in requested_path:
|
||||||
relative_path = requested_path
|
# remove full path
|
||||||
requested_path = os.path.join(PASTES_FOLDER, requested_path)
|
requested_path_full = os.path.join(requested_path, PASTES_FOLDER)
|
||||||
# remove old full path
|
else:
|
||||||
#requested_path = requested_path.replace(PASTES_FOLDER, '')
|
requested_path_full = requested_path
|
||||||
|
requested_path = requested_path.replace(PASTES_FOLDER, '', 1)
|
||||||
|
|
||||||
# escape directory transversal
|
# escape directory transversal
|
||||||
if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER:
|
if os.path.commonprefix((requested_path_full,PASTES_FOLDER)) != PASTES_FOLDER:
|
||||||
return 'path transversal detected'
|
return 'path transversal detected'
|
||||||
|
|
||||||
vt_enabled = Flask_config.vt_enabled
|
vt_enabled = Flask_config.vt_enabled
|
||||||
|
@ -124,8 +125,6 @@ def showpaste(content_range, requested_path):
|
||||||
active_taxonomies = r_serv_tags.smembers('active_taxonomies')
|
active_taxonomies = r_serv_tags.smembers('active_taxonomies')
|
||||||
|
|
||||||
l_tags = r_serv_metadata.smembers('tag:'+requested_path)
|
l_tags = r_serv_metadata.smembers('tag:'+requested_path)
|
||||||
if relative_path is not None:
|
|
||||||
l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) )
|
|
||||||
|
|
||||||
#active galaxies
|
#active galaxies
|
||||||
active_galaxies = r_serv_tags.smembers('active_galaxies')
|
active_galaxies = r_serv_tags.smembers('active_galaxies')
|
||||||
|
@ -190,7 +189,7 @@ def showpaste(content_range, requested_path):
|
||||||
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
|
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
|
||||||
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
|
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
|
||||||
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
|
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
|
||||||
crawler_metadata['screenshot'] = paste.get_p_rel_path()
|
crawler_metadata['screenshot'] = paste.get_p_date_path()
|
||||||
else:
|
else:
|
||||||
crawler_metadata['get_metadata'] = False
|
crawler_metadata['get_metadata'] = False
|
||||||
|
|
||||||
|
@ -406,6 +405,7 @@ def send_file_to_vt():
|
||||||
paste = request.form['paste']
|
paste = request.form['paste']
|
||||||
hash = request.form['hash']
|
hash = request.form['hash']
|
||||||
|
|
||||||
|
## TODO: # FIXME: path transversal
|
||||||
b64_full_path = os.path.join(os.environ['AIL_HOME'], b64_path)
|
b64_full_path = os.path.join(os.environ['AIL_HOME'], b64_path)
|
||||||
b64_content = ''
|
b64_content = ''
|
||||||
with open(b64_full_path, 'rb') as f:
|
with open(b64_full_path, 'rb') as f:
|
||||||
|
|
Loading…
Reference in a new issue