Commit b35ce062a79167dfac2129a40ddd0b0d3f005dbd

Authored by Sergey Matvienko
Committed by Andrew Shvayka
1 parent 3362ab37

queue consumer: implemented nonblocking subscribe method to prevent locks on rep…

…artition event. moved message decoding out of the lock to reduce locking time. using monotonic time instead wall-clock. added debug messages
... ... @@ -21,22 +21,28 @@ import org.thingsboard.server.common.msg.queue.TopicPartitionInfo;
21 21 import org.thingsboard.server.queue.TbQueueConsumer;
22 22 import org.thingsboard.server.queue.TbQueueMsg;
23 23
  24 +import javax.annotation.Nonnull;
24 25 import java.io.IOException;
25 26 import java.util.ArrayList;
26 27 import java.util.Collections;
27 28 import java.util.List;
  29 +import java.util.Queue;
28 30 import java.util.Set;
29   -import java.util.concurrent.locks.Lock;
  31 +import java.util.concurrent.ConcurrentLinkedQueue;
  32 +import java.util.concurrent.TimeUnit;
30 33 import java.util.concurrent.locks.ReentrantLock;
31 34 import java.util.stream.Collectors;
32 35
  36 +import static java.util.Collections.emptyList;
  37 +
33 38 @Slf4j
34 39 public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> implements TbQueueConsumer<T> {
35 40
36 41 private volatile boolean subscribed;
37 42 protected volatile boolean stopped = false;
38 43 protected volatile Set<TopicPartitionInfo> partitions;
39   - protected final Lock consumerLock = new ReentrantLock();
  44 + protected final ReentrantLock consumerLock = new ReentrantLock(); //NonfairSync
  45 + final Queue<Set<TopicPartitionInfo>> subscribeQueue = new ConcurrentLinkedQueue<>();
40 46
41 47 @Getter
42 48 private final String topic;
... ... @@ -47,84 +53,101 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i
47 53
48 54 @Override
49 55 public void subscribe() {
50   - consumerLock.lock();
51   - try {
52   - partitions = Collections.singleton(new TopicPartitionInfo(topic, null, null, true));
53   - subscribed = false;
54   - } finally {
55   - consumerLock.unlock();
  56 + log.info("enqueue topic subscribe {} ", topic);
  57 + if (stopped) {
  58 + log.error("trying subscribe, but consumer stopped for topic {}", topic);
  59 + return;
56 60 }
  61 + subscribeQueue.add(Collections.singleton(new TopicPartitionInfo(topic, null, null, true)));
57 62 }
58 63
59 64 @Override
60 65 public void subscribe(Set<TopicPartitionInfo> partitions) {
61   - consumerLock.lock();
62   - try {
63   - this.partitions = partitions;
64   - subscribed = false;
65   - } finally {
66   - consumerLock.unlock();
  66 + log.info("enqueue topics subscribe {} ", partitions);
  67 + if (stopped) {
  68 + log.error("trying subscribe, but consumer stopped for topic {}", topic);
  69 + return;
67 70 }
  71 + subscribeQueue.add(partitions);
68 72 }
69 73
70 74 @Override
71 75 public List<T> poll(long durationInMillis) {
  76 + List<R> records;
  77 + long startNanos = System.nanoTime();
  78 + if (stopped) {
  79 + return errorAndReturnEmpty();
  80 + }
72 81 if (!subscribed && partitions == null) {
73   - try {
74   - Thread.sleep(durationInMillis);
75   - } catch (InterruptedException e) {
76   - log.debug("Failed to await subscription", e);
  82 + return sleepAndReturnEmpty(startNanos, durationInMillis);
  83 + }
  84 +
  85 + if (consumerLock.isLocked()) {
  86 + log.error("poll. consumerLock is locked. will wait with no timeout. it looks like a race conditions or deadlock", new RuntimeException("stacktrace"));
  87 + }
  88 +
  89 + consumerLock.lock();
  90 + try {
  91 + while (!subscribeQueue.isEmpty()) {
  92 + subscribed = false;
  93 + partitions = subscribeQueue.poll();
  94 + }
  95 + if (!subscribed) {
  96 + List<String> topicNames = partitions.stream().map(TopicPartitionInfo::getFullTopicName).collect(Collectors.toList());
  97 + doSubscribe(topicNames);
  98 + subscribed = true;
77 99 }
78   - } else {
79   - long pollStartTs = System.currentTimeMillis();
80   - consumerLock.lock();
  100 + records = partitions.isEmpty() ? emptyList() : doPoll(durationInMillis);
  101 + } finally {
  102 + consumerLock.unlock();
  103 + }
  104 +
  105 + if (records.isEmpty()) { return sleepAndReturnEmpty(startNanos, durationInMillis); }
  106 +
  107 + return decodeRecords(records);
  108 + }
  109 +
  110 + @Nonnull
  111 + List<T> decodeRecords(@Nonnull List<R> records) {
  112 + List<T> result = new ArrayList<>(records.size());
  113 + records.forEach(record -> {
81 114 try {
82   - if (!subscribed) {
83   - List<String> topicNames = partitions.stream().map(TopicPartitionInfo::getFullTopicName).collect(Collectors.toList());
84   - doSubscribe(topicNames);
85   - subscribed = true;
  115 + if (record != null) {
  116 + result.add(decode(record));
86 117 }
  118 + } catch (IOException e) {
  119 + log.error("Failed decode record: [{}]", record);
  120 + throw new RuntimeException("Failed to decode record: ", e);
  121 + }
  122 + });
  123 + return result;
  124 + }
87 125
88   - List<R> records;
89   - if (partitions.isEmpty()) {
90   - records = Collections.emptyList();
91   - } else {
92   - records = doPoll(durationInMillis);
93   - }
94   - if (!records.isEmpty()) {
95   - List<T> result = new ArrayList<>(records.size());
96   - records.forEach(record -> {
97   - try {
98   - if (record != null) {
99   - result.add(decode(record));
100   - }
101   - } catch (IOException e) {
102   - log.error("Failed decode record: [{}]", record);
103   - throw new RuntimeException("Failed to decode record: ", e);
104   - }
105   - });
106   - return result;
107   - } else {
108   - long pollDuration = System.currentTimeMillis() - pollStartTs;
109   - if (pollDuration < durationInMillis) {
110   - try {
111   - Thread.sleep(durationInMillis - pollDuration);
112   - } catch (InterruptedException e) {
113   - if (!stopped) {
114   - log.error("Failed to wait.", e);
115   - }
116   - }
117   - }
  126 + List<T> errorAndReturnEmpty() {
  127 + log.error("poll invoked but consumer stopped for topic" + topic, new RuntimeException("stacktrace"));
  128 + return emptyList();
  129 + }
  130 +
  131 + List<T> sleepAndReturnEmpty(final long startNanos, final long durationInMillis) {
  132 + long durationNanos = TimeUnit.MILLISECONDS.toNanos(durationInMillis);
  133 + long spentNanos = System.nanoTime() - startNanos;
  134 + if (spentNanos < durationNanos) {
  135 + try {
  136 + Thread.sleep(Math.max(TimeUnit.NANOSECONDS.toMillis(durationNanos - spentNanos), 1));
  137 + } catch (InterruptedException e) {
  138 + if (!stopped) {
  139 + log.error("Failed to wait", e);
118 140 }
119   - } finally {
120   - consumerLock.unlock();
121 141 }
122 142 }
123   - return Collections.emptyList();
  143 + return emptyList();
124 144 }
125 145
126 146 @Override
127 147 public void commit() {
  148 + if (consumerLock.isLocked()) {
  149 + log.error("commit. consumerLock is locked. will wait with no timeout. it looks like a race conditions or deadlock", new RuntimeException("stacktrace"));
  150 + }
128 151 consumerLock.lock();
129 152 try {
130 153 doCommit();
... ... @@ -135,6 +158,7 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i
135 158
136 159 @Override
137 160 public void unsubscribe() {
  161 + log.info("unsubscribe topic and stop consumer {}", getTopic());
138 162 stopped = true;
139 163 consumerLock.lock();
140 164 try {
... ...
... ... @@ -72,8 +72,10 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue
72 72 protected void doSubscribe(List<String> topicNames) {
73 73 if (!topicNames.isEmpty()) {
74 74 topicNames.forEach(admin::createTopicIfNotExists);
  75 + log.info("subscribe topics {}", topicNames);
75 76 consumer.subscribe(topicNames);
76 77 } else {
  78 + log.info("unsubscribe due to empty topic list");
77 79 consumer.unsubscribe();
78 80 }
79 81 }
... ... @@ -102,6 +104,7 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue
102 104
103 105 @Override
104 106 protected void doUnsubscribe() {
  107 + log.info("unsubscribe topic and close consumer for topic {}", getTopic());
105 108 if (consumer != null) {
106 109 consumer.unsubscribe();
107 110 consumer.close();
... ...