Commit b35ce062a79167dfac2129a40ddd0b0d3f005dbd
Committed by
Andrew Shvayka
1 parent
3362ab37
queue consumer: implemented nonblocking subscribe method to prevent locks on rep…
…artition event. moved message decoding out of the lock to reduce locking time. using monotonic time instead wall-clock. added debug messages
Showing
2 changed files
with
85 additions
and
58 deletions
@@ -21,22 +21,28 @@ import org.thingsboard.server.common.msg.queue.TopicPartitionInfo; | @@ -21,22 +21,28 @@ import org.thingsboard.server.common.msg.queue.TopicPartitionInfo; | ||
21 | import org.thingsboard.server.queue.TbQueueConsumer; | 21 | import org.thingsboard.server.queue.TbQueueConsumer; |
22 | import org.thingsboard.server.queue.TbQueueMsg; | 22 | import org.thingsboard.server.queue.TbQueueMsg; |
23 | 23 | ||
24 | +import javax.annotation.Nonnull; | ||
24 | import java.io.IOException; | 25 | import java.io.IOException; |
25 | import java.util.ArrayList; | 26 | import java.util.ArrayList; |
26 | import java.util.Collections; | 27 | import java.util.Collections; |
27 | import java.util.List; | 28 | import java.util.List; |
29 | +import java.util.Queue; | ||
28 | import java.util.Set; | 30 | import java.util.Set; |
29 | -import java.util.concurrent.locks.Lock; | 31 | +import java.util.concurrent.ConcurrentLinkedQueue; |
32 | +import java.util.concurrent.TimeUnit; | ||
30 | import java.util.concurrent.locks.ReentrantLock; | 33 | import java.util.concurrent.locks.ReentrantLock; |
31 | import java.util.stream.Collectors; | 34 | import java.util.stream.Collectors; |
32 | 35 | ||
36 | +import static java.util.Collections.emptyList; | ||
37 | + | ||
33 | @Slf4j | 38 | @Slf4j |
34 | public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> implements TbQueueConsumer<T> { | 39 | public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> implements TbQueueConsumer<T> { |
35 | 40 | ||
36 | private volatile boolean subscribed; | 41 | private volatile boolean subscribed; |
37 | protected volatile boolean stopped = false; | 42 | protected volatile boolean stopped = false; |
38 | protected volatile Set<TopicPartitionInfo> partitions; | 43 | protected volatile Set<TopicPartitionInfo> partitions; |
39 | - protected final Lock consumerLock = new ReentrantLock(); | 44 | + protected final ReentrantLock consumerLock = new ReentrantLock(); //NonfairSync |
45 | + final Queue<Set<TopicPartitionInfo>> subscribeQueue = new ConcurrentLinkedQueue<>(); | ||
40 | 46 | ||
41 | @Getter | 47 | @Getter |
42 | private final String topic; | 48 | private final String topic; |
@@ -47,84 +53,101 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i | @@ -47,84 +53,101 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i | ||
47 | 53 | ||
48 | @Override | 54 | @Override |
49 | public void subscribe() { | 55 | public void subscribe() { |
50 | - consumerLock.lock(); | ||
51 | - try { | ||
52 | - partitions = Collections.singleton(new TopicPartitionInfo(topic, null, null, true)); | ||
53 | - subscribed = false; | ||
54 | - } finally { | ||
55 | - consumerLock.unlock(); | 56 | + log.info("enqueue topic subscribe {} ", topic); |
57 | + if (stopped) { | ||
58 | + log.error("trying subscribe, but consumer stopped for topic {}", topic); | ||
59 | + return; | ||
56 | } | 60 | } |
61 | + subscribeQueue.add(Collections.singleton(new TopicPartitionInfo(topic, null, null, true))); | ||
57 | } | 62 | } |
58 | 63 | ||
59 | @Override | 64 | @Override |
60 | public void subscribe(Set<TopicPartitionInfo> partitions) { | 65 | public void subscribe(Set<TopicPartitionInfo> partitions) { |
61 | - consumerLock.lock(); | ||
62 | - try { | ||
63 | - this.partitions = partitions; | ||
64 | - subscribed = false; | ||
65 | - } finally { | ||
66 | - consumerLock.unlock(); | 66 | + log.info("enqueue topics subscribe {} ", partitions); |
67 | + if (stopped) { | ||
68 | + log.error("trying subscribe, but consumer stopped for topic {}", topic); | ||
69 | + return; | ||
67 | } | 70 | } |
71 | + subscribeQueue.add(partitions); | ||
68 | } | 72 | } |
69 | 73 | ||
70 | @Override | 74 | @Override |
71 | public List<T> poll(long durationInMillis) { | 75 | public List<T> poll(long durationInMillis) { |
76 | + List<R> records; | ||
77 | + long startNanos = System.nanoTime(); | ||
78 | + if (stopped) { | ||
79 | + return errorAndReturnEmpty(); | ||
80 | + } | ||
72 | if (!subscribed && partitions == null) { | 81 | if (!subscribed && partitions == null) { |
73 | - try { | ||
74 | - Thread.sleep(durationInMillis); | ||
75 | - } catch (InterruptedException e) { | ||
76 | - log.debug("Failed to await subscription", e); | 82 | + return sleepAndReturnEmpty(startNanos, durationInMillis); |
83 | + } | ||
84 | + | ||
85 | + if (consumerLock.isLocked()) { | ||
86 | + log.error("poll. consumerLock is locked. will wait with no timeout. it looks like a race conditions or deadlock", new RuntimeException("stacktrace")); | ||
87 | + } | ||
88 | + | ||
89 | + consumerLock.lock(); | ||
90 | + try { | ||
91 | + while (!subscribeQueue.isEmpty()) { | ||
92 | + subscribed = false; | ||
93 | + partitions = subscribeQueue.poll(); | ||
94 | + } | ||
95 | + if (!subscribed) { | ||
96 | + List<String> topicNames = partitions.stream().map(TopicPartitionInfo::getFullTopicName).collect(Collectors.toList()); | ||
97 | + doSubscribe(topicNames); | ||
98 | + subscribed = true; | ||
77 | } | 99 | } |
78 | - } else { | ||
79 | - long pollStartTs = System.currentTimeMillis(); | ||
80 | - consumerLock.lock(); | 100 | + records = partitions.isEmpty() ? emptyList() : doPoll(durationInMillis); |
101 | + } finally { | ||
102 | + consumerLock.unlock(); | ||
103 | + } | ||
104 | + | ||
105 | + if (records.isEmpty()) { return sleepAndReturnEmpty(startNanos, durationInMillis); } | ||
106 | + | ||
107 | + return decodeRecords(records); | ||
108 | + } | ||
109 | + | ||
110 | + @Nonnull | ||
111 | + List<T> decodeRecords(@Nonnull List<R> records) { | ||
112 | + List<T> result = new ArrayList<>(records.size()); | ||
113 | + records.forEach(record -> { | ||
81 | try { | 114 | try { |
82 | - if (!subscribed) { | ||
83 | - List<String> topicNames = partitions.stream().map(TopicPartitionInfo::getFullTopicName).collect(Collectors.toList()); | ||
84 | - doSubscribe(topicNames); | ||
85 | - subscribed = true; | 115 | + if (record != null) { |
116 | + result.add(decode(record)); | ||
86 | } | 117 | } |
118 | + } catch (IOException e) { | ||
119 | + log.error("Failed decode record: [{}]", record); | ||
120 | + throw new RuntimeException("Failed to decode record: ", e); | ||
121 | + } | ||
122 | + }); | ||
123 | + return result; | ||
124 | + } | ||
87 | 125 | ||
88 | - List<R> records; | ||
89 | - if (partitions.isEmpty()) { | ||
90 | - records = Collections.emptyList(); | ||
91 | - } else { | ||
92 | - records = doPoll(durationInMillis); | ||
93 | - } | ||
94 | - if (!records.isEmpty()) { | ||
95 | - List<T> result = new ArrayList<>(records.size()); | ||
96 | - records.forEach(record -> { | ||
97 | - try { | ||
98 | - if (record != null) { | ||
99 | - result.add(decode(record)); | ||
100 | - } | ||
101 | - } catch (IOException e) { | ||
102 | - log.error("Failed decode record: [{}]", record); | ||
103 | - throw new RuntimeException("Failed to decode record: ", e); | ||
104 | - } | ||
105 | - }); | ||
106 | - return result; | ||
107 | - } else { | ||
108 | - long pollDuration = System.currentTimeMillis() - pollStartTs; | ||
109 | - if (pollDuration < durationInMillis) { | ||
110 | - try { | ||
111 | - Thread.sleep(durationInMillis - pollDuration); | ||
112 | - } catch (InterruptedException e) { | ||
113 | - if (!stopped) { | ||
114 | - log.error("Failed to wait.", e); | ||
115 | - } | ||
116 | - } | ||
117 | - } | 126 | + List<T> errorAndReturnEmpty() { |
127 | + log.error("poll invoked but consumer stopped for topic" + topic, new RuntimeException("stacktrace")); | ||
128 | + return emptyList(); | ||
129 | + } | ||
130 | + | ||
131 | + List<T> sleepAndReturnEmpty(final long startNanos, final long durationInMillis) { | ||
132 | + long durationNanos = TimeUnit.MILLISECONDS.toNanos(durationInMillis); | ||
133 | + long spentNanos = System.nanoTime() - startNanos; | ||
134 | + if (spentNanos < durationNanos) { | ||
135 | + try { | ||
136 | + Thread.sleep(Math.max(TimeUnit.NANOSECONDS.toMillis(durationNanos - spentNanos), 1)); | ||
137 | + } catch (InterruptedException e) { | ||
138 | + if (!stopped) { | ||
139 | + log.error("Failed to wait", e); | ||
118 | } | 140 | } |
119 | - } finally { | ||
120 | - consumerLock.unlock(); | ||
121 | } | 141 | } |
122 | } | 142 | } |
123 | - return Collections.emptyList(); | 143 | + return emptyList(); |
124 | } | 144 | } |
125 | 145 | ||
126 | @Override | 146 | @Override |
127 | public void commit() { | 147 | public void commit() { |
148 | + if (consumerLock.isLocked()) { | ||
149 | + log.error("commit. consumerLock is locked. will wait with no timeout. it looks like a race conditions or deadlock", new RuntimeException("stacktrace")); | ||
150 | + } | ||
128 | consumerLock.lock(); | 151 | consumerLock.lock(); |
129 | try { | 152 | try { |
130 | doCommit(); | 153 | doCommit(); |
@@ -135,6 +158,7 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i | @@ -135,6 +158,7 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i | ||
135 | 158 | ||
136 | @Override | 159 | @Override |
137 | public void unsubscribe() { | 160 | public void unsubscribe() { |
161 | + log.info("unsubscribe topic and stop consumer {}", getTopic()); | ||
138 | stopped = true; | 162 | stopped = true; |
139 | consumerLock.lock(); | 163 | consumerLock.lock(); |
140 | try { | 164 | try { |
@@ -72,8 +72,10 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue | @@ -72,8 +72,10 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue | ||
72 | protected void doSubscribe(List<String> topicNames) { | 72 | protected void doSubscribe(List<String> topicNames) { |
73 | if (!topicNames.isEmpty()) { | 73 | if (!topicNames.isEmpty()) { |
74 | topicNames.forEach(admin::createTopicIfNotExists); | 74 | topicNames.forEach(admin::createTopicIfNotExists); |
75 | + log.info("subscribe topics {}", topicNames); | ||
75 | consumer.subscribe(topicNames); | 76 | consumer.subscribe(topicNames); |
76 | } else { | 77 | } else { |
78 | + log.info("unsubscribe due to empty topic list"); | ||
77 | consumer.unsubscribe(); | 79 | consumer.unsubscribe(); |
78 | } | 80 | } |
79 | } | 81 | } |
@@ -102,6 +104,7 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue | @@ -102,6 +104,7 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue | ||
102 | 104 | ||
103 | @Override | 105 | @Override |
104 | protected void doUnsubscribe() { | 106 | protected void doUnsubscribe() { |
107 | + log.info("unsubscribe topic and close consumer for topic {}", getTopic()); | ||
105 | if (consumer != null) { | 108 | if (consumer != null) { |
106 | consumer.unsubscribe(); | 109 | consumer.unsubscribe(); |
107 | consumer.close(); | 110 | consumer.close(); |