From 546d6538fd25cbf701b220b4440699f776367cb7 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 24 Aug 2023 14:37:50 +0200 Subject: [PATCH] chg: [mail exporter] add obj content extract for each regex match --- bin/exporter/MailExporter.py | 1 + bin/trackers/Tracker_Regex.py | 51 ++++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/bin/exporter/MailExporter.py b/bin/exporter/MailExporter.py index 40ee1708..41074d7b 100755 --- a/bin/exporter/MailExporter.py +++ b/bin/exporter/MailExporter.py @@ -145,5 +145,6 @@ class MailExporterTracker(MailExporter): body = f"AIL Framework, New occurrence for {tracker_type} tracker: {tracker_name}\n" body += f'Item: {obj.id}\nurl:{obj.get_link()}' + # print(body) for mail in tracker.get_mails(): self._export(mail, subject, body) diff --git a/bin/trackers/Tracker_Regex.py b/bin/trackers/Tracker_Regex.py index 5cc06410..db35f239 100755 --- a/bin/trackers/Tracker_Regex.py +++ b/bin/trackers/Tracker_Regex.py @@ -41,6 +41,8 @@ class Tracker_Regex(AbstractModule): self.tracked_regexs = Tracker.get_tracked_regexs() self.last_refresh = time.time() + self.obj = None + # Exporter self.exporters = {'mail': MailExporterTracker(), 'webhook': WebHookExporterTracker()} @@ -56,6 +58,7 @@ class Tracker_Regex(AbstractModule): print('Tracked regex refreshed') obj = ail_objects.get_object(obj_type, subtype, obj_id) + self.obj = obj obj_id = obj.get_id() obj_type = obj.get_type() @@ -66,12 +69,46 @@ class Tracker_Regex(AbstractModule): content = obj.get_content() for dict_regex in self.tracked_regexs[obj_type]: - matched = self.regex_findall(dict_regex['regex'], obj_id, content) - if matched: - self.new_tracker_found(dict_regex['tracked'], 'regex', obj) + matches = self.regex_finditer(dict_regex['regex'], obj_id, content) + if matches: + self.new_tracker_found(dict_regex['tracked'], 'regex', obj, matches) - def new_tracker_found(self, tracker_name, tracker_type, obj): + def extract_matches(self, re_matches, limit=500, lines=5): + matches = [] + content = self.obj.get_content() + l_content = len(content) + for match in re_matches: + start = match[0] + value = match[2] + end = match[1] + + # Start + if start > limit: + i_start = start - limit + else: + i_start = 0 + str_start = content[i_start:start].splitlines() + if len(str_start) > lines: + str_start = '\n'.join(str_start[-lines + 1:]) + else: + str_start = content[i_start:start] + + # End + if end + limit > l_content: + i_end = l_content + else: + i_end = end + limit + str_end = content[end:i_end].splitlines() + if len(str_end) > lines: + str_end = '\n'.join(str_end[:lines + 1]) + else: + str_end = content[end:i_end] + matches.append((value, f'{str_start}{value}{str_end}')) + return matches + + def new_tracker_found(self, tracker_name, tracker_type, obj, re_matches): obj_id = obj.get_id() + matches = None for tracker_uuid in Tracker.get_trackers_by_tracked_obj_type(tracker_type, obj.get_type(), tracker_name): tracker = Tracker.Tracker(tracker_uuid) @@ -93,8 +130,9 @@ class Tracker_Regex(AbstractModule): obj.add_tag(tag) if tracker.mail_export(): - # TODO add matches + custom subjects - self.exporters['mail'].export(tracker, obj) + if not matches: + matches = self.extract_matches(re_matches) + self.exporters['mail'].export(tracker, obj, matches) if tracker.webhook_export(): self.exporters['webhook'].export(tracker, obj) @@ -103,4 +141,3 @@ class Tracker_Regex(AbstractModule): if __name__ == "__main__": module = Tracker_Regex() module.run() - # module.compute('submitted/2023/05/02/submitted_b1e518f1-703b-40f6-8238-d1c22888197e.gz')