Commit b35ce062a79167dfac2129a40ddd0b0d3f005dbd
Committed by
Andrew Shvayka
1 parent
3362ab37
queue consumer: implemented nonblocking subscribe method to prevent locks on rep…
…artition event. moved message decoding out of the lock to reduce locking time. using monotonic time instead wall-clock. added debug messages
Showing
2 changed files
with
85 additions
and
58 deletions
... | ... | @@ -21,22 +21,28 @@ import org.thingsboard.server.common.msg.queue.TopicPartitionInfo; |
21 | 21 | import org.thingsboard.server.queue.TbQueueConsumer; |
22 | 22 | import org.thingsboard.server.queue.TbQueueMsg; |
23 | 23 | |
24 | +import javax.annotation.Nonnull; | |
24 | 25 | import java.io.IOException; |
25 | 26 | import java.util.ArrayList; |
26 | 27 | import java.util.Collections; |
27 | 28 | import java.util.List; |
29 | +import java.util.Queue; | |
28 | 30 | import java.util.Set; |
29 | -import java.util.concurrent.locks.Lock; | |
31 | +import java.util.concurrent.ConcurrentLinkedQueue; | |
32 | +import java.util.concurrent.TimeUnit; | |
30 | 33 | import java.util.concurrent.locks.ReentrantLock; |
31 | 34 | import java.util.stream.Collectors; |
32 | 35 | |
36 | +import static java.util.Collections.emptyList; | |
37 | + | |
33 | 38 | @Slf4j |
34 | 39 | public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> implements TbQueueConsumer<T> { |
35 | 40 | |
36 | 41 | private volatile boolean subscribed; |
37 | 42 | protected volatile boolean stopped = false; |
38 | 43 | protected volatile Set<TopicPartitionInfo> partitions; |
39 | - protected final Lock consumerLock = new ReentrantLock(); | |
44 | + protected final ReentrantLock consumerLock = new ReentrantLock(); //NonfairSync | |
45 | + final Queue<Set<TopicPartitionInfo>> subscribeQueue = new ConcurrentLinkedQueue<>(); | |
40 | 46 | |
41 | 47 | @Getter |
42 | 48 | private final String topic; |
... | ... | @@ -47,84 +53,101 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i |
47 | 53 | |
48 | 54 | @Override |
49 | 55 | public void subscribe() { |
50 | - consumerLock.lock(); | |
51 | - try { | |
52 | - partitions = Collections.singleton(new TopicPartitionInfo(topic, null, null, true)); | |
53 | - subscribed = false; | |
54 | - } finally { | |
55 | - consumerLock.unlock(); | |
56 | + log.info("enqueue topic subscribe {} ", topic); | |
57 | + if (stopped) { | |
58 | + log.error("trying subscribe, but consumer stopped for topic {}", topic); | |
59 | + return; | |
56 | 60 | } |
61 | + subscribeQueue.add(Collections.singleton(new TopicPartitionInfo(topic, null, null, true))); | |
57 | 62 | } |
58 | 63 | |
59 | 64 | @Override |
60 | 65 | public void subscribe(Set<TopicPartitionInfo> partitions) { |
61 | - consumerLock.lock(); | |
62 | - try { | |
63 | - this.partitions = partitions; | |
64 | - subscribed = false; | |
65 | - } finally { | |
66 | - consumerLock.unlock(); | |
66 | + log.info("enqueue topics subscribe {} ", partitions); | |
67 | + if (stopped) { | |
68 | + log.error("trying subscribe, but consumer stopped for topic {}", topic); | |
69 | + return; | |
67 | 70 | } |
71 | + subscribeQueue.add(partitions); | |
68 | 72 | } |
69 | 73 | |
70 | 74 | @Override |
71 | 75 | public List<T> poll(long durationInMillis) { |
76 | + List<R> records; | |
77 | + long startNanos = System.nanoTime(); | |
78 | + if (stopped) { | |
79 | + return errorAndReturnEmpty(); | |
80 | + } | |
72 | 81 | if (!subscribed && partitions == null) { |
73 | - try { | |
74 | - Thread.sleep(durationInMillis); | |
75 | - } catch (InterruptedException e) { | |
76 | - log.debug("Failed to await subscription", e); | |
82 | + return sleepAndReturnEmpty(startNanos, durationInMillis); | |
83 | + } | |
84 | + | |
85 | + if (consumerLock.isLocked()) { | |
86 | + log.error("poll. consumerLock is locked. will wait with no timeout. it looks like a race conditions or deadlock", new RuntimeException("stacktrace")); | |
87 | + } | |
88 | + | |
89 | + consumerLock.lock(); | |
90 | + try { | |
91 | + while (!subscribeQueue.isEmpty()) { | |
92 | + subscribed = false; | |
93 | + partitions = subscribeQueue.poll(); | |
94 | + } | |
95 | + if (!subscribed) { | |
96 | + List<String> topicNames = partitions.stream().map(TopicPartitionInfo::getFullTopicName).collect(Collectors.toList()); | |
97 | + doSubscribe(topicNames); | |
98 | + subscribed = true; | |
77 | 99 | } |
78 | - } else { | |
79 | - long pollStartTs = System.currentTimeMillis(); | |
80 | - consumerLock.lock(); | |
100 | + records = partitions.isEmpty() ? emptyList() : doPoll(durationInMillis); | |
101 | + } finally { | |
102 | + consumerLock.unlock(); | |
103 | + } | |
104 | + | |
105 | + if (records.isEmpty()) { return sleepAndReturnEmpty(startNanos, durationInMillis); } | |
106 | + | |
107 | + return decodeRecords(records); | |
108 | + } | |
109 | + | |
110 | + @Nonnull | |
111 | + List<T> decodeRecords(@Nonnull List<R> records) { | |
112 | + List<T> result = new ArrayList<>(records.size()); | |
113 | + records.forEach(record -> { | |
81 | 114 | try { |
82 | - if (!subscribed) { | |
83 | - List<String> topicNames = partitions.stream().map(TopicPartitionInfo::getFullTopicName).collect(Collectors.toList()); | |
84 | - doSubscribe(topicNames); | |
85 | - subscribed = true; | |
115 | + if (record != null) { | |
116 | + result.add(decode(record)); | |
86 | 117 | } |
118 | + } catch (IOException e) { | |
119 | + log.error("Failed decode record: [{}]", record); | |
120 | + throw new RuntimeException("Failed to decode record: ", e); | |
121 | + } | |
122 | + }); | |
123 | + return result; | |
124 | + } | |
87 | 125 | |
88 | - List<R> records; | |
89 | - if (partitions.isEmpty()) { | |
90 | - records = Collections.emptyList(); | |
91 | - } else { | |
92 | - records = doPoll(durationInMillis); | |
93 | - } | |
94 | - if (!records.isEmpty()) { | |
95 | - List<T> result = new ArrayList<>(records.size()); | |
96 | - records.forEach(record -> { | |
97 | - try { | |
98 | - if (record != null) { | |
99 | - result.add(decode(record)); | |
100 | - } | |
101 | - } catch (IOException e) { | |
102 | - log.error("Failed decode record: [{}]", record); | |
103 | - throw new RuntimeException("Failed to decode record: ", e); | |
104 | - } | |
105 | - }); | |
106 | - return result; | |
107 | - } else { | |
108 | - long pollDuration = System.currentTimeMillis() - pollStartTs; | |
109 | - if (pollDuration < durationInMillis) { | |
110 | - try { | |
111 | - Thread.sleep(durationInMillis - pollDuration); | |
112 | - } catch (InterruptedException e) { | |
113 | - if (!stopped) { | |
114 | - log.error("Failed to wait.", e); | |
115 | - } | |
116 | - } | |
117 | - } | |
126 | + List<T> errorAndReturnEmpty() { | |
127 | + log.error("poll invoked but consumer stopped for topic" + topic, new RuntimeException("stacktrace")); | |
128 | + return emptyList(); | |
129 | + } | |
130 | + | |
131 | + List<T> sleepAndReturnEmpty(final long startNanos, final long durationInMillis) { | |
132 | + long durationNanos = TimeUnit.MILLISECONDS.toNanos(durationInMillis); | |
133 | + long spentNanos = System.nanoTime() - startNanos; | |
134 | + if (spentNanos < durationNanos) { | |
135 | + try { | |
136 | + Thread.sleep(Math.max(TimeUnit.NANOSECONDS.toMillis(durationNanos - spentNanos), 1)); | |
137 | + } catch (InterruptedException e) { | |
138 | + if (!stopped) { | |
139 | + log.error("Failed to wait", e); | |
118 | 140 | } |
119 | - } finally { | |
120 | - consumerLock.unlock(); | |
121 | 141 | } |
122 | 142 | } |
123 | - return Collections.emptyList(); | |
143 | + return emptyList(); | |
124 | 144 | } |
125 | 145 | |
126 | 146 | @Override |
127 | 147 | public void commit() { |
148 | + if (consumerLock.isLocked()) { | |
149 | + log.error("commit. consumerLock is locked. will wait with no timeout. it looks like a race conditions or deadlock", new RuntimeException("stacktrace")); | |
150 | + } | |
128 | 151 | consumerLock.lock(); |
129 | 152 | try { |
130 | 153 | doCommit(); |
... | ... | @@ -135,6 +158,7 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i |
135 | 158 | |
136 | 159 | @Override |
137 | 160 | public void unsubscribe() { |
161 | + log.info("unsubscribe topic and stop consumer {}", getTopic()); | |
138 | 162 | stopped = true; |
139 | 163 | consumerLock.lock(); |
140 | 164 | try { | ... | ... |
... | ... | @@ -72,8 +72,10 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue |
72 | 72 | protected void doSubscribe(List<String> topicNames) { |
73 | 73 | if (!topicNames.isEmpty()) { |
74 | 74 | topicNames.forEach(admin::createTopicIfNotExists); |
75 | + log.info("subscribe topics {}", topicNames); | |
75 | 76 | consumer.subscribe(topicNames); |
76 | 77 | } else { |
78 | + log.info("unsubscribe due to empty topic list"); | |
77 | 79 | consumer.unsubscribe(); |
78 | 80 | } |
79 | 81 | } |
... | ... | @@ -102,6 +104,7 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue |
102 | 104 | |
103 | 105 | @Override |
104 | 106 | protected void doUnsubscribe() { |
107 | + log.info("unsubscribe topic and close consumer for topic {}", getTopic()); | |
105 | 108 | if (consumer != null) { |
106 | 109 | consumer.unsubscribe(); |
107 | 110 | consumer.close(); | ... | ... |