chg: [crawler] submit free text of urls to crawl

This commit is contained in:
terrtia 2024-10-09 15:05:27 +02:00
parent 9d26a47c17
commit 1505bf0157
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
4 changed files with 94 additions and 21 deletions

View file

@ -7,7 +7,6 @@ Regex Helper
import os
import logging.config
import phonenumbers
import re
import sys
import uuid
@ -20,7 +19,6 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib import ail_logger
from lib import ConfigLoader
# from lib import Statistics
logging.config.dictConfig(ail_logger.get_config())
logger = logging.getLogger()
@ -171,6 +169,7 @@ def regex_search(r_key, regex, item_id, content, max_time=30):
## Phone Regexs ##
def _regex_phone_iter(r_key, country_code, content):
import phonenumbers
iterator = phonenumbers.PhoneNumberMatcher(content, country_code)
for match in iterator:
value = match.raw_string