diff --git a/bin/import/ail_json_importer/Ail_feeder_urlextract.py b/bin/import/ail_json_importer/Ail_feeder_urlextract.py index 30388c09..5c8e588d 100755 --- a/bin/import/ail_json_importer/Ail_feeder_urlextract.py +++ b/bin/import/ail_json_importer/Ail_feeder_urlextract.py @@ -33,11 +33,17 @@ class Ail_feeder_urlextract(Default_json): # use twitter timestamp ? item_date = datetime.date.today().strftime("%Y/%m/%d") item_id = str(self.json_item['meta']['twitter:url-extracted']) + item_id = item_id.split('//') + if len(item_id) > 1: + item_id = ''.join(item_id[1:]) + else: + item_id = item_id[0] + item_id = item_id.replace('/', '_') if len(item_id) > 215: item_id = '{}{}.gz'.format(item_id[:215], str(uuid.uuid4())) else: item_id = '{}{}.gz'.format(item_id, str(uuid.uuid4())) - return os.path.join('urlextract', item_date, item_id) + '.gz' + return os.path.join('urlextract', item_date, item_id) # # TODO: def process_json_meta(self, process, item_id): diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index 35674301..64a6d78c 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -11,6 +11,8 @@ config_loader = ConfigLoader.ConfigLoader() # get and sanityze PASTE DIRECTORY PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '') + +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None def exist_item(item_id):