fix: [ZMQ Feeder] performance: replace zmq recv NOBLOCK by Poller

This commit is contained in:
Terrtia 2020-02-27 13:23:40 +01:00
parent 40b853cbe3
commit 998f8cc8e1
No known key found for this signature in database
GPG key ID: 1E1B1F50D84613D0
3 changed files with 17 additions and 13 deletions

View file

@ -95,7 +95,7 @@ if __name__ == '__main__':
#publisher.info(to_print)
time_1 = time.time()
processed_paste = 0
time.sleep(1)
time.sleep(0.5)
continue
# remove PASTES_FOLDER from item path (crawled item + submited)

View file

@ -53,13 +53,14 @@ class PubSub(object): ## TODO: remove config, use ConfigLoader by default
self.zmq_sub = True
context = zmq.Context()
# Get all feeds
self.subscribers = []
addresses = self.config.get(conn_name, 'address')
for address in addresses.split(','):
new_sub = context.socket(zmq.SUB)
new_sub.connect(address)
new_sub.setsockopt_string(zmq.SUBSCRIBE, channel)
self.subscribers.append(new_sub)
subscriber = context.socket(zmq.SUB)
subscriber.connect(address)
subscriber.setsockopt_string(zmq.SUBSCRIBE, channel)
self.subscribers.append(subscriber)
def setup_publish(self, conn_name):
if self.config.has_section(conn_name):
@ -96,14 +97,18 @@ class PubSub(object): ## TODO: remove config, use ConfigLoader by default
if msg.get('data', None) is not None:
yield msg['data']
elif self.zmq_sub:
# Initialize poll set
poller = zmq.Poller()
for subscriber in self.subscribers:
poller.register(subscriber, zmq.POLLIN)
while True:
for sub in self.subscribers:
try:
msg = sub.recv(zmq.NOBLOCK)
yield msg.split(b" ", 1)[1]
except zmq.error.Again as e:
time.sleep(0.2)
pass
socks = dict(poller.poll())
for subscriber in self.subscribers:
if subscriber in socks:
message = subscriber.recv()
yield message.split(b' ', 1)[1]
else:
raise Exception('No subscribe function defined')

View file

@ -186,7 +186,6 @@ if __name__ == '__main__':
print("Empty Paste: not processed")
publisher.debug("Empty Paste: {0} not processed".format(message))
else:
print("Empty Queues: Waiting...")
if int(time.time() - time_1) > refresh_time:
# update internal feeder