fix: [Scripts] Remove absolute path

This commit is contained in:
Terrtia 2018-11-02 16:07:27 +01:00
parent ea7fd8d54c
commit 5872cf9196
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
24 changed files with 76 additions and 67 deletions

View file

@ -40,7 +40,7 @@ def search_api_key(message):
print('found google api key') print('found google api key')
print(to_print) print(to_print)
publisher.warning('{}Checked {} found Google API Key;{}'.format( publisher.warning('{}Checked {} found Google API Key;{}'.format(
to_print, len(google_api_key), paste.p_path)) to_print, len(google_api_key), paste.p_rel_path))
msg = 'infoleak:automatic-detection="google-api-key";{}'.format(filename) msg = 'infoleak:automatic-detection="google-api-key";{}'.format(filename)
p.populate_set_out(msg, 'Tags') p.populate_set_out(msg, 'Tags')
@ -49,7 +49,7 @@ def search_api_key(message):
print(to_print) print(to_print)
total = len(aws_access_key) + len(aws_secret_key) total = len(aws_access_key) + len(aws_secret_key)
publisher.warning('{}Checked {} found AWS Key;{}'.format( publisher.warning('{}Checked {} found AWS Key;{}'.format(
to_print, total, paste.p_path)) to_print, total, paste.p_rel_path))
msg = 'infoleak:automatic-detection="aws-key";{}'.format(filename) msg = 'infoleak:automatic-detection="aws-key";{}'.format(filename)
p.populate_set_out(msg, 'Tags') p.populate_set_out(msg, 'Tags')
@ -86,7 +86,7 @@ if __name__ == "__main__":
if message is not None: if message is not None:
search_api_key(message) search_api_key(message)
else: else:
publisher.debug("Script ApiKey is Idling 10s") publisher.debug("Script ApiKey is Idling 10s")

View file

@ -43,8 +43,8 @@ if __name__ == "__main__":
# FIXME why not all saving everything there. # FIXME why not all saving everything there.
PST.save_all_attributes_redis() PST.save_all_attributes_redis()
# FIXME Not used. # FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path) PST.store.sadd("Pastes_Objects", PST.p_rel_path)
except IOError: except IOError:
print("CRC Checksum Failed on :", PST.p_path) print("CRC Checksum Failed on :", PST.p_rel_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name)) PST.p_source, PST.p_date, PST.p_name))

View file

@ -67,7 +67,7 @@ def check_all_iban(l_iban, paste, filename):
if(nb_valid_iban > 0): if(nb_valid_iban > 0):
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
publisher.warning('{}Checked found {} IBAN;{}'.format( publisher.warning('{}Checked found {} IBAN;{}'.format(
to_print, nb_valid_iban, paste.p_path)) to_print, nb_valid_iban, paste.p_rel_path))
msg = 'infoleak:automatic-detection="iban";{}'.format(filename) msg = 'infoleak:automatic-detection="iban";{}'.format(filename)
p.populate_set_out(msg, 'Tags') p.populate_set_out(msg, 'Tags')
@ -113,7 +113,7 @@ if __name__ == "__main__":
try: try:
l_iban = iban_regex.findall(content) l_iban = iban_regex.findall(content)
except TimeoutException: except TimeoutException:
print ("{0} processing timeout".format(paste.p_path)) print ("{0} processing timeout".format(paste.p_rel_path))
continue continue
else: else:
signal.alarm(0) signal.alarm(0)

View file

@ -32,7 +32,7 @@ def decode_base58(bc, length):
for char in bc: for char in bc:
n = n * 58 + digits58.index(char) n = n * 58 + digits58.index(char)
return n.to_bytes(length, 'big') return n.to_bytes(length, 'big')
def check_bc(bc): def check_bc(bc):
try: try:
bcbytes = decode_base58(bc, 25) bcbytes = decode_base58(bc, 25)
@ -75,7 +75,7 @@ def search_key(content, message, paste):
to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date, to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date,
paste.p_name) paste.p_name)
publisher.warning('{}Detected {} Bitcoin private key;{}'.format( publisher.warning('{}Detected {} Bitcoin private key;{}'.format(
to_print, len(bitcoin_private_key),paste.p_path)) to_print, len(bitcoin_private_key),paste.p_rel_path))
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380

View file

@ -89,16 +89,10 @@ if __name__ == "__main__":
paste = Paste.Paste(filename) paste = Paste.Paste(filename)
content = paste.get_p_content() content = paste.get_p_content()
#print('-----------------------------------------------------')
#print(filename)
#print(content)
#print('-----------------------------------------------------')
for categ, pattern in tmp_dict.items(): for categ, pattern in tmp_dict.items():
found = set(re.findall(pattern, content)) found = set(re.findall(pattern, content))
if len(found) >= matchingThreshold: if len(found) >= matchingThreshold:
msg = '{} {}'.format(paste.p_path, len(found)) msg = '{} {}'.format(paste.p_rel_path, len(found))
#msg = " ".join( [paste.p_path, bytes(len(found))] )
print(msg, categ) print(msg, categ)
p.populate_set_out(msg, categ) p.populate_set_out(msg, categ)
@ -106,4 +100,4 @@ if __name__ == "__main__":
publisher.info( publisher.info(
'Categ;{};{};{};Detected {} as {};{}'.format( 'Categ;{};{};{};Detected {} as {};{}'.format(
paste.p_source, paste.p_date, paste.p_name, paste.p_source, paste.p_date, paste.p_name,
len(found), categ, paste.p_path)) len(found), categ, paste.p_rel_path))

View file

@ -97,7 +97,7 @@ if __name__ == "__main__":
if sites_set: if sites_set:
message += ' Related websites: {}'.format( (', '.join(sites_set)) ) message += ' Related websites: {}'.format( (', '.join(sites_set)) )
to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path) to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_rel_path)
print('\n '.join(creds)) print('\n '.join(creds))

View file

@ -77,9 +77,9 @@ if __name__ == "__main__":
paste.p_source, paste.p_date, paste.p_name) paste.p_source, paste.p_date, paste.p_name)
if (len(creditcard_set) > 0): if (len(creditcard_set) > 0):
publisher.warning('{}Checked {} valid number(s);{}'.format( publisher.warning('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path)) to_print, len(creditcard_set), paste.p_rel_path))
print('{}Checked {} valid number(s);{}'.format( print('{}Checked {} valid number(s);{}'.format(
to_print, len(creditcard_set), paste.p_path)) to_print, len(creditcard_set), paste.p_rel_path))
#Send to duplicate #Send to duplicate
p.populate_set_out(filename, 'Duplicate') p.populate_set_out(filename, 'Duplicate')
#send to Browse_warning_paste #send to Browse_warning_paste
@ -89,7 +89,7 @@ if __name__ == "__main__":
msg = 'infoleak:automatic-detection="credit-card";{}'.format(filename) msg = 'infoleak:automatic-detection="credit-card";{}'.format(filename)
p.populate_set_out(msg, 'Tags') p.populate_set_out(msg, 'Tags')
else: else:
publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path)) publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_rel_path))
else: else:
publisher.debug("Script creditcard is idling 1m") publisher.debug("Script creditcard is idling 1m")
time.sleep(10) time.sleep(10)

View file

@ -229,7 +229,7 @@ if __name__ == '__main__':
except TimeoutException: except TimeoutException:
encoded_list = [] encoded_list = []
p.incr_module_timeout_statistic() # add encoder type p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(paste.p_path)) print ("{0} processing timeout".format(paste.p_rel_path))
continue continue
else: else:
signal.alarm(0) signal.alarm(0)

View file

@ -54,14 +54,14 @@ def main():
if localizeddomains: if localizeddomains:
print(localizeddomains) print(localizeddomains)
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_rel_path))
localizeddomains = c.localizedomain(cc=cc) localizeddomains = c.localizedomain(cc=cc)
if localizeddomains: if localizeddomains:
print(localizeddomains) print(localizeddomains)
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_rel_path))
except IOError: except IOError:
print("CRC Checksum Failed on :", PST.p_path) print("CRC Checksum Failed on :", PST.p_rel_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
PST.p_source, PST.p_date, PST.p_name)) PST.p_source, PST.p_date, PST.p_name))

View file

@ -142,17 +142,17 @@ if __name__ == "__main__":
paste_date = paste_date paste_date = paste_date
paste_date = paste_date if paste_date != None else "No date available" paste_date = paste_date if paste_date != None else "No date available"
if paste_path != None: if paste_path != None:
if paste_path != PST.p_path: if paste_path != PST.p_rel_path:
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent)) print('['+hash_type+'] '+'comparing: ' + str(PST.p_rel_path) + ' and ' + str(paste_path) + ' percentage: ' + str(percent))
except Exception: except Exception:
print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash) print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash)
# Add paste in DB after checking to prevent its analysis twice # Add paste in DB after checking to prevent its analysis twice
# hash_type_i -> index_i AND index_i -> PST.PATH # hash_type_i -> index_i AND index_i -> PST.PATH
r_serv1.set(index, PST.p_path) r_serv1.set(index, PST.p_rel_path)
r_serv1.set(index+'_date', PST._get_p_date()) r_serv1.set(index+'_date', PST._get_p_date())
r_serv1.sadd("INDEX", index) r_serv1.sadd("INDEX", index)
# Adding hashes in Redis # Adding hashes in Redis
@ -180,7 +180,7 @@ if __name__ == "__main__":
PST.__setattr__("p_duplicate", dupl) PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_duplicate(dupl) PST.save_attribute_duplicate(dupl)
PST.save_others_pastes_attribute_duplicate(dupl) PST.save_others_pastes_attribute_duplicate(dupl)
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path)) publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_rel_path))
print('{}Detected {}'.format(to_print, len(dupl))) print('{}Detected {}'.format(to_print, len(dupl)))
print('') print('')
@ -191,5 +191,5 @@ if __name__ == "__main__":
except IOError: except IOError:
to_print = 'Duplicate;{};{};{};'.format( to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name) PST.p_source, PST.p_date, PST.p_name)
print("CRC Checksum Failed on :", PST.p_path) print("CRC Checksum Failed on :", PST.p_rel_path)
publisher.error('{}CRC Checksum Failed'.format(to_print)) publisher.error('{}CRC Checksum Failed'.format(to_print))

View file

@ -51,6 +51,9 @@ if __name__ == '__main__':
p = Process(config_section) p = Process(config_section)
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
print(PASTES_FOLDER)
# LOGGING # # LOGGING #
publisher.info("Feed Script started to receive & publish.") publisher.info("Feed Script started to receive & publish.")
@ -78,8 +81,9 @@ if __name__ == '__main__':
time.sleep(1) time.sleep(1)
continue continue
# Creating the full filepath # Creating the full filepath
filename = os.path.join(os.environ['AIL_HOME'], filename = os.path.join(PASTES_FOLDER, paste)
p.config.get("Directories", "pastes"), paste) print(filename)
print(paste)
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
if not os.path.exists(dirname): if not os.path.exists(dirname):
@ -102,6 +106,7 @@ if __name__ == '__main__':
print(filename) print(filename)
print(type) print(type)
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------') print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
''' '''
p.populate_set_out(filename)
p.populate_set_out(paste)
processed_paste+=1 processed_paste+=1

View file

@ -47,7 +47,7 @@ def analyse(url, path):
paste = Paste.Paste(path) paste = Paste.Paste(path)
print("Detected (libinjection) SQL in URL: ") print("Detected (libinjection) SQL in URL: ")
print(urllib.request.unquote(url)) print(urllib.request.unquote(url))
to_print = 'LibInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) to_print = 'LibInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path)
publisher.warning(to_print) publisher.warning(to_print)
#Send to duplicate #Send to duplicate
p.populate_set_out(path, 'Duplicate') p.populate_set_out(path, 'Duplicate')

View file

@ -75,10 +75,11 @@ if __name__ == '__main__':
PST.save_attribute_redis("p_max_length_line", lines_infos[1]) PST.save_attribute_redis("p_max_length_line", lines_infos[1])
# FIXME Not used. # FIXME Not used.
PST.store.sadd("Pastes_Objects", PST.p_path) PST.store.sadd("Pastes_Objects", PST.p_rel_path)
print(PST.p_rel_path)
if lines_infos[1] < args.max: if lines_infos[1] < args.max:
p.populate_set_out( PST.p_path , 'LinesShort') p.populate_set_out( PST.p_rel_path , 'LinesShort')
else: else:
p.populate_set_out( PST.p_path , 'LinesLong') p.populate_set_out( PST.p_rel_path , 'LinesLong')
except IOError: except IOError:
print("CRC Checksum Error on : ", PST.p_path) print("CRC Checksum Error on : ", PST.p_rel_path)

View file

@ -180,7 +180,7 @@ if __name__ == "__main__":
if flag_the_hive or flag_misp: if flag_the_hive or flag_misp:
tag, path = message.split(';') tag, path = message.split(';')
paste = Paste.Paste(path) paste = Paste.Paste(path)
source = '/'.join(paste.p_path.split('/')[-6:]) source = '/'.join(paste.p_rel_path.split('/')[-6:])
full_path = os.path.join(os.environ['AIL_HOME'], full_path = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "pastes"), path) p.config.get("Directories", "pastes"), path)

View file

@ -78,7 +78,7 @@ if __name__ == "__main__":
to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\
format(PST.p_source, PST.p_date, PST.p_name, format(PST.p_source, PST.p_date, PST.p_name,
MX_values[0], PST.p_path) MX_values[0], PST.p_rel_path)
if MX_values[0] > is_critical: if MX_values[0] > is_critical:
publisher.warning(to_print) publisher.warning(to_print)
#Send to duplicate #Send to duplicate

View file

@ -81,6 +81,8 @@ if __name__ == '__main__':
operation_mode = cfg.getint("Module_Mixer", "operation_mode") operation_mode = cfg.getint("Module_Mixer", "operation_mode")
ttl_key = cfg.getint("Module_Mixer", "ttl_duplicate") ttl_key = cfg.getint("Module_Mixer", "ttl_duplicate")
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
# STATS # # STATS #
processed_paste = 0 processed_paste = 0
processed_paste_per_feeder = {} processed_paste_per_feeder = {}
@ -103,11 +105,12 @@ if __name__ == '__main__':
feeder_name.replace(" ","") feeder_name.replace(" ","")
if 'import_dir' in feeder_name: if 'import_dir' in feeder_name:
feeder_name = feeder_name.split('/')[1] feeder_name = feeder_name.split('/')[1]
paste_name = complete_paste
except ValueError as e: except ValueError as e:
feeder_name = "unnamed_feeder" feeder_name = "unnamed_feeder"
paste_name = complete_paste
# remove absolute path
paste_name = complete_paste.replace(PASTES_FOLDER, '', 1)
# Processed paste # Processed paste
processed_paste += 1 processed_paste += 1
@ -118,6 +121,7 @@ if __name__ == '__main__':
processed_paste_per_feeder[feeder_name] = 1 processed_paste_per_feeder[feeder_name] = 1
duplicated_paste_per_feeder[feeder_name] = 0 duplicated_paste_per_feeder[feeder_name] = 0
relay_message = "{0} {1}".format(paste_name, gzip64encoded) relay_message = "{0} {1}".format(paste_name, gzip64encoded)
#relay_message = b" ".join( [paste_name, gzip64encoded] ) #relay_message = b" ".join( [paste_name, gzip64encoded] )

View file

@ -164,7 +164,7 @@ if __name__ == "__main__":
r_onion.sadd('i2p_domain', domain) r_onion.sadd('i2p_domain', domain)
r_onion.sadd('i2p_link', url) r_onion.sadd('i2p_link', url)
r_onion.sadd('i2p_domain_crawler_queue', domain) r_onion.sadd('i2p_domain_crawler_queue', domain)
msg = '{};{}'.format(url,PST.p_path) msg = '{};{}'.format(url,PST.p_rel_path)
r_onion.sadd('i2p_crawler_queue', msg) r_onion.sadd('i2p_crawler_queue', msg)
''' '''
@ -178,7 +178,7 @@ if __name__ == "__main__":
if len(domains_list) > 0: if len(domains_list) > 0:
publisher.warning('{}Detected {} .onion(s);{}'.format( publisher.warning('{}Detected {} .onion(s);{}'.format(
to_print, len(domains_list),PST.p_path)) to_print, len(domains_list),PST.p_rel_path))
now = datetime.datetime.now() now = datetime.datetime.now()
path = os.path.join('onions', str(now.year).zfill(4), path = os.path.join('onions', str(now.year).zfill(4),
str(now.month).zfill(2), str(now.month).zfill(2),
@ -203,19 +203,19 @@ if __name__ == "__main__":
if not r_onion.sismember('onion_domain_crawler_queue', domain): if not r_onion.sismember('onion_domain_crawler_queue', domain):
print('send to onion crawler') print('send to onion crawler')
r_onion.sadd('onion_domain_crawler_queue', domain) r_onion.sadd('onion_domain_crawler_queue', domain)
msg = '{};{}'.format(url,PST.p_path) msg = '{};{}'.format(url,PST.p_rel_path)
r_onion.sadd('onion_crawler_queue', msg) r_onion.sadd('onion_crawler_queue', msg)
#p.populate_set_out(msg, 'Crawler') #p.populate_set_out(msg, 'Crawler')
else: else:
for url in fetch(p, r_cache, urls, domains_list, path): for url in fetch(p, r_cache, urls, domains_list, path):
publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_path)) publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_rel_path))
p.populate_set_out('onion;{}'.format(PST.p_path), 'alertHandler') p.populate_set_out('onion;{}'.format(PST.p_rel_path), 'alertHandler')
msg = 'infoleak:automatic-detection="onion";{}'.format(PST.p_path) msg = 'infoleak:automatic-detection="onion";{}'.format(PST.p_rel_path)
p.populate_set_out(msg, 'Tags') p.populate_set_out(msg, 'Tags')
else: else:
publisher.info('{}Onion related;{}'.format(to_print, PST.p_path)) publisher.info('{}Onion related;{}'.format(to_print, PST.p_rel_path))
prec_filename = filename prec_filename = filename
else: else:

View file

@ -106,7 +106,7 @@ if __name__ == "__main__":
try: try:
matched = compiled_regex.search(content) matched = compiled_regex.search(content)
except TimeoutException: except TimeoutException:
print ("{0} processing timeout".format(paste.p_path)) print ("{0} processing timeout".format(paste.p_rel_path))
continue continue
else: else:
signal.alarm(0) signal.alarm(0)

View file

@ -54,7 +54,7 @@ if __name__ == "__main__":
if len(releases) == 0: if len(releases) == 0:
continue continue
to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path) to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_rel_path)
print(to_print) print(to_print)
if len(releases) > 30: if len(releases) > 30:
publisher.warning(to_print) publisher.warning(to_print)
@ -63,7 +63,7 @@ if __name__ == "__main__":
except TimeoutException: except TimeoutException:
p.incr_module_timeout_statistic() p.incr_module_timeout_statistic()
print ("{0} processing timeout".format(paste.p_path)) print ("{0} processing timeout".format(paste.p_rel_path))
continue continue
else: else:
signal.alarm(0) signal.alarm(0)

View file

@ -78,7 +78,7 @@ def analyse(url, path):
if (result_path > 1) or (result_query > 1): if (result_path > 1) or (result_query > 1):
print("Detected SQL in URL: ") print("Detected SQL in URL: ")
print(urllib.request.unquote(url)) print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path)
publisher.warning(to_print) publisher.warning(to_print)
#Send to duplicate #Send to duplicate
p.populate_set_out(path, 'Duplicate') p.populate_set_out(path, 'Duplicate')
@ -97,7 +97,7 @@ def analyse(url, path):
else: else:
print("Potential SQL injection:") print("Potential SQL injection:")
print(urllib.request.unquote(url)) print(urllib.request.unquote(url))
to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path) to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_rel_path)
publisher.info(to_print) publisher.info(to_print)

View file

@ -57,11 +57,11 @@ if __name__ == "__main__":
try: try:
for word, score in paste._get_top_words().items(): for word, score in paste._get_top_words().items():
if len(word) >= 4: if len(word) >= 4:
msg = '{} {} {}'.format(paste.p_path, word, score) msg = '{} {} {}'.format(paste.p_rel_path, word, score)
p.populate_set_out(msg) p.populate_set_out(msg)
except TimeoutException: except TimeoutException:
p.incr_module_timeout_statistic() p.incr_module_timeout_statistic()
print ("{0} processing timeout".format(paste.p_path)) print ("{0} processing timeout".format(paste.p_rel_path))
continue continue
else: else:
signal.alarm(0) signal.alarm(0)

View file

@ -153,7 +153,7 @@ if __name__ == "__main__":
pprint.pprint(A_values) pprint.pprint(A_values)
publisher.info('Url;{};{};{};Checked {} URL;{}'.format( publisher.info('Url;{};{};{};Checked {} URL;{}'.format(
PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path)) PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))
prec_filename = filename prec_filename = filename
else: else:

View file

@ -101,7 +101,7 @@ class Paste(object):
var = self.p_path.split('/') var = self.p_path.split('/')
self.p_date = Date(var[-4], var[-3], var[-2]) self.p_date = Date(var[-4], var[-3], var[-2])
self.p_rel_path = os.path.join(var[-4], var[-3], var[-2], self.p_name) self.p_date_path = os.path.join(var[-4], var[-3], var[-2], self.p_name)
self.p_source = var[-5] self.p_source = var[-5]
self.supposed_url = 'https://{}/{}'.format(self.p_source.replace('_pro', ''), var[-1].split('.gz')[0]) self.supposed_url = 'https://{}/{}'.format(self.p_source.replace('_pro', ''), var[-1].split('.gz')[0])
@ -304,6 +304,9 @@ class Paste(object):
def get_p_rel_path(self): def get_p_rel_path(self):
return self.p_rel_path return self.p_rel_path
def get_p_date_path(self):
return self.p_date_path
def save_all_attributes_redis(self, key=None): def save_all_attributes_redis(self, key=None):
""" """
Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) Saving all the attributes in a "Redis-like" Database (Redis, LevelDB)

View file

@ -41,12 +41,10 @@ showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templa
# ============ FUNCTIONS ============ # ============ FUNCTIONS ============
def showpaste(content_range, requested_path): def showpaste(content_range, requested_path):
relative_path = None if PASTES_FOLDER in requested_path:
if PASTES_FOLDER not in requested_path: # remove full path
relative_path = requested_path requested_path = requested_path.replace(PASTES_FOLDER, '', 1)
requested_path = os.path.join(PASTES_FOLDER, requested_path) #requested_path = os.path.join(PASTES_FOLDER, requested_path)
# remove old full path
#requested_path = requested_path.replace(PASTES_FOLDER, '')
# escape directory transversal # escape directory transversal
if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER: if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER:
return 'path transversal detected' return 'path transversal detected'
@ -124,8 +122,12 @@ def showpaste(content_range, requested_path):
active_taxonomies = r_serv_tags.smembers('active_taxonomies') active_taxonomies = r_serv_tags.smembers('active_taxonomies')
l_tags = r_serv_metadata.smembers('tag:'+requested_path) l_tags = r_serv_metadata.smembers('tag:'+requested_path)
print(l_tags)
if relative_path is not None: if relative_path is not None:
l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) ) print('union')
print(relative_path)
print(r_serv_metadata.smembers('tag:'+relative_path))
l_tags = l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) )
#active galaxies #active galaxies
active_galaxies = r_serv_tags.smembers('active_galaxies') active_galaxies = r_serv_tags.smembers('active_galaxies')
@ -189,7 +191,7 @@ def showpaste(content_range, requested_path):
crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain') crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain')
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father') crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link') crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
crawler_metadata['screenshot'] = paste.get_p_rel_path() crawler_metadata['screenshot'] = paste.get_p_date_path()
else: else:
crawler_metadata['get_metadata'] = False crawler_metadata['get_metadata'] = False