Commit b35ce062a79167dfac2129a40ddd0b0d3f005dbd

Authored by Sergey Matvienko
Committed by Andrew Shvayka
1 parent 3362ab37

queue consumer: implemented nonblocking subscribe method to prevent locks on rep…

…artition event. moved message decoding out of the lock to reduce locking time. using monotonic time instead wall-clock. added debug messages
@@ -21,22 +21,28 @@ import org.thingsboard.server.common.msg.queue.TopicPartitionInfo; @@ -21,22 +21,28 @@ import org.thingsboard.server.common.msg.queue.TopicPartitionInfo;
21 import org.thingsboard.server.queue.TbQueueConsumer; 21 import org.thingsboard.server.queue.TbQueueConsumer;
22 import org.thingsboard.server.queue.TbQueueMsg; 22 import org.thingsboard.server.queue.TbQueueMsg;
23 23
  24 +import javax.annotation.Nonnull;
24 import java.io.IOException; 25 import java.io.IOException;
25 import java.util.ArrayList; 26 import java.util.ArrayList;
26 import java.util.Collections; 27 import java.util.Collections;
27 import java.util.List; 28 import java.util.List;
  29 +import java.util.Queue;
28 import java.util.Set; 30 import java.util.Set;
29 -import java.util.concurrent.locks.Lock; 31 +import java.util.concurrent.ConcurrentLinkedQueue;
  32 +import java.util.concurrent.TimeUnit;
30 import java.util.concurrent.locks.ReentrantLock; 33 import java.util.concurrent.locks.ReentrantLock;
31 import java.util.stream.Collectors; 34 import java.util.stream.Collectors;
32 35
  36 +import static java.util.Collections.emptyList;
  37 +
33 @Slf4j 38 @Slf4j
34 public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> implements TbQueueConsumer<T> { 39 public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> implements TbQueueConsumer<T> {
35 40
36 private volatile boolean subscribed; 41 private volatile boolean subscribed;
37 protected volatile boolean stopped = false; 42 protected volatile boolean stopped = false;
38 protected volatile Set<TopicPartitionInfo> partitions; 43 protected volatile Set<TopicPartitionInfo> partitions;
39 - protected final Lock consumerLock = new ReentrantLock(); 44 + protected final ReentrantLock consumerLock = new ReentrantLock(); //NonfairSync
  45 + final Queue<Set<TopicPartitionInfo>> subscribeQueue = new ConcurrentLinkedQueue<>();
40 46
41 @Getter 47 @Getter
42 private final String topic; 48 private final String topic;
@@ -47,84 +53,101 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i @@ -47,84 +53,101 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i
47 53
48 @Override 54 @Override
49 public void subscribe() { 55 public void subscribe() {
50 - consumerLock.lock();  
51 - try {  
52 - partitions = Collections.singleton(new TopicPartitionInfo(topic, null, null, true));  
53 - subscribed = false;  
54 - } finally {  
55 - consumerLock.unlock(); 56 + log.info("enqueue topic subscribe {} ", topic);
  57 + if (stopped) {
  58 + log.error("trying subscribe, but consumer stopped for topic {}", topic);
  59 + return;
56 } 60 }
  61 + subscribeQueue.add(Collections.singleton(new TopicPartitionInfo(topic, null, null, true)));
57 } 62 }
58 63
59 @Override 64 @Override
60 public void subscribe(Set<TopicPartitionInfo> partitions) { 65 public void subscribe(Set<TopicPartitionInfo> partitions) {
61 - consumerLock.lock();  
62 - try {  
63 - this.partitions = partitions;  
64 - subscribed = false;  
65 - } finally {  
66 - consumerLock.unlock(); 66 + log.info("enqueue topics subscribe {} ", partitions);
  67 + if (stopped) {
  68 + log.error("trying subscribe, but consumer stopped for topic {}", topic);
  69 + return;
67 } 70 }
  71 + subscribeQueue.add(partitions);
68 } 72 }
69 73
70 @Override 74 @Override
71 public List<T> poll(long durationInMillis) { 75 public List<T> poll(long durationInMillis) {
  76 + List<R> records;
  77 + long startNanos = System.nanoTime();
  78 + if (stopped) {
  79 + return errorAndReturnEmpty();
  80 + }
72 if (!subscribed && partitions == null) { 81 if (!subscribed && partitions == null) {
73 - try {  
74 - Thread.sleep(durationInMillis);  
75 - } catch (InterruptedException e) {  
76 - log.debug("Failed to await subscription", e); 82 + return sleepAndReturnEmpty(startNanos, durationInMillis);
  83 + }
  84 +
  85 + if (consumerLock.isLocked()) {
  86 + log.error("poll. consumerLock is locked. will wait with no timeout. it looks like a race conditions or deadlock", new RuntimeException("stacktrace"));
  87 + }
  88 +
  89 + consumerLock.lock();
  90 + try {
  91 + while (!subscribeQueue.isEmpty()) {
  92 + subscribed = false;
  93 + partitions = subscribeQueue.poll();
  94 + }
  95 + if (!subscribed) {
  96 + List<String> topicNames = partitions.stream().map(TopicPartitionInfo::getFullTopicName).collect(Collectors.toList());
  97 + doSubscribe(topicNames);
  98 + subscribed = true;
77 } 99 }
78 - } else {  
79 - long pollStartTs = System.currentTimeMillis();  
80 - consumerLock.lock(); 100 + records = partitions.isEmpty() ? emptyList() : doPoll(durationInMillis);
  101 + } finally {
  102 + consumerLock.unlock();
  103 + }
  104 +
  105 + if (records.isEmpty()) { return sleepAndReturnEmpty(startNanos, durationInMillis); }
  106 +
  107 + return decodeRecords(records);
  108 + }
  109 +
  110 + @Nonnull
  111 + List<T> decodeRecords(@Nonnull List<R> records) {
  112 + List<T> result = new ArrayList<>(records.size());
  113 + records.forEach(record -> {
81 try { 114 try {
82 - if (!subscribed) {  
83 - List<String> topicNames = partitions.stream().map(TopicPartitionInfo::getFullTopicName).collect(Collectors.toList());  
84 - doSubscribe(topicNames);  
85 - subscribed = true; 115 + if (record != null) {
  116 + result.add(decode(record));
86 } 117 }
  118 + } catch (IOException e) {
  119 + log.error("Failed decode record: [{}]", record);
  120 + throw new RuntimeException("Failed to decode record: ", e);
  121 + }
  122 + });
  123 + return result;
  124 + }
87 125
88 - List<R> records;  
89 - if (partitions.isEmpty()) {  
90 - records = Collections.emptyList();  
91 - } else {  
92 - records = doPoll(durationInMillis);  
93 - }  
94 - if (!records.isEmpty()) {  
95 - List<T> result = new ArrayList<>(records.size());  
96 - records.forEach(record -> {  
97 - try {  
98 - if (record != null) {  
99 - result.add(decode(record));  
100 - }  
101 - } catch (IOException e) {  
102 - log.error("Failed decode record: [{}]", record);  
103 - throw new RuntimeException("Failed to decode record: ", e);  
104 - }  
105 - });  
106 - return result;  
107 - } else {  
108 - long pollDuration = System.currentTimeMillis() - pollStartTs;  
109 - if (pollDuration < durationInMillis) {  
110 - try {  
111 - Thread.sleep(durationInMillis - pollDuration);  
112 - } catch (InterruptedException e) {  
113 - if (!stopped) {  
114 - log.error("Failed to wait.", e);  
115 - }  
116 - }  
117 - } 126 + List<T> errorAndReturnEmpty() {
  127 + log.error("poll invoked but consumer stopped for topic" + topic, new RuntimeException("stacktrace"));
  128 + return emptyList();
  129 + }
  130 +
  131 + List<T> sleepAndReturnEmpty(final long startNanos, final long durationInMillis) {
  132 + long durationNanos = TimeUnit.MILLISECONDS.toNanos(durationInMillis);
  133 + long spentNanos = System.nanoTime() - startNanos;
  134 + if (spentNanos < durationNanos) {
  135 + try {
  136 + Thread.sleep(Math.max(TimeUnit.NANOSECONDS.toMillis(durationNanos - spentNanos), 1));
  137 + } catch (InterruptedException e) {
  138 + if (!stopped) {
  139 + log.error("Failed to wait", e);
118 } 140 }
119 - } finally {  
120 - consumerLock.unlock();  
121 } 141 }
122 } 142 }
123 - return Collections.emptyList(); 143 + return emptyList();
124 } 144 }
125 145
126 @Override 146 @Override
127 public void commit() { 147 public void commit() {
  148 + if (consumerLock.isLocked()) {
  149 + log.error("commit. consumerLock is locked. will wait with no timeout. it looks like a race conditions or deadlock", new RuntimeException("stacktrace"));
  150 + }
128 consumerLock.lock(); 151 consumerLock.lock();
129 try { 152 try {
130 doCommit(); 153 doCommit();
@@ -135,6 +158,7 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i @@ -135,6 +158,7 @@ public abstract class AbstractTbQueueConsumerTemplate<R, T extends TbQueueMsg> i
135 158
136 @Override 159 @Override
137 public void unsubscribe() { 160 public void unsubscribe() {
  161 + log.info("unsubscribe topic and stop consumer {}", getTopic());
138 stopped = true; 162 stopped = true;
139 consumerLock.lock(); 163 consumerLock.lock();
140 try { 164 try {
@@ -72,8 +72,10 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue @@ -72,8 +72,10 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue
72 protected void doSubscribe(List<String> topicNames) { 72 protected void doSubscribe(List<String> topicNames) {
73 if (!topicNames.isEmpty()) { 73 if (!topicNames.isEmpty()) {
74 topicNames.forEach(admin::createTopicIfNotExists); 74 topicNames.forEach(admin::createTopicIfNotExists);
  75 + log.info("subscribe topics {}", topicNames);
75 consumer.subscribe(topicNames); 76 consumer.subscribe(topicNames);
76 } else { 77 } else {
  78 + log.info("unsubscribe due to empty topic list");
77 consumer.unsubscribe(); 79 consumer.unsubscribe();
78 } 80 }
79 } 81 }
@@ -102,6 +104,7 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue @@ -102,6 +104,7 @@ public class TbKafkaConsumerTemplate<T extends TbQueueMsg> extends AbstractTbQue
102 104
103 @Override 105 @Override
104 protected void doUnsubscribe() { 106 protected void doUnsubscribe() {
  107 + log.info("unsubscribe topic and close consumer for topic {}", getTopic());
105 if (consumer != null) { 108 if (consumer != null) {
106 consumer.unsubscribe(); 109 consumer.unsubscribe();
107 consumer.close(); 110 consumer.close();