Commit 1240099c815b7a673f782904b8ad64c36c253018
Committed by
Andrew Shvayka
1 parent
67ee892e
Full refactoring and code cleaning
Showing
7 changed files
with
309 additions
and
451 deletions
... | ... | @@ -54,6 +54,7 @@ |
54 | 54 | <dependency> |
55 | 55 | <groupId>org.apache.cassandra</groupId> |
56 | 56 | <artifactId>cassandra-all</artifactId> |
57 | + <version>3.11.10</version> | |
57 | 58 | </dependency> |
58 | 59 | <dependency> |
59 | 60 | <groupId>com.datastax.cassandra</groupId> |
... | ... | @@ -63,6 +64,7 @@ |
63 | 64 | <dependency> |
64 | 65 | <groupId>commons-io</groupId> |
65 | 66 | <artifactId>commons-io</artifactId> |
67 | + <version>2.5</version> | |
66 | 68 | </dependency> |
67 | 69 | </dependencies> |
68 | 70 | ... | ... |
... | ... | @@ -44,13 +44,17 @@ public class DictionaryParser { |
44 | 44 | } |
45 | 45 | |
46 | 46 | private void parseDictionaryDump(LineIterator iterator) { |
47 | - String tempLine; | |
48 | - while(iterator.hasNext()) { | |
49 | - tempLine = iterator.nextLine(); | |
47 | + try { | |
48 | + String tempLine; | |
49 | + while (iterator.hasNext()) { | |
50 | + tempLine = iterator.nextLine(); | |
50 | 51 | |
51 | - if(isBlockStarted(tempLine)) { | |
52 | - processBlock(iterator); | |
52 | + if (isBlockStarted(tempLine)) { | |
53 | + processBlock(iterator); | |
54 | + } | |
53 | 55 | } |
56 | + } finally { | |
57 | + iterator.close(); | |
54 | 58 | } |
55 | 59 | } |
56 | 60 | ... | ... |
... | ... | @@ -33,23 +33,24 @@ public class MigratorTool { |
33 | 33 | try { |
34 | 34 | boolean castEnable = Boolean.parseBoolean(cmd.getOptionValue("castEnable")); |
35 | 35 | File allTelemetrySource = new File(cmd.getOptionValue("telemetryFrom")); |
36 | + File tsSaveDir = null; | |
37 | + File partitionsSaveDir = null; | |
38 | + File latestSaveDir = null; | |
36 | 39 | |
37 | 40 | RelatedEntitiesParser allEntityIdsAndTypes = |
38 | 41 | new RelatedEntitiesParser(new File(cmd.getOptionValue("relatedEntities"))); |
39 | 42 | DictionaryParser dictionaryParser = new DictionaryParser(allTelemetrySource); |
40 | 43 | |
41 | 44 | if(cmd.getOptionValue("latestTelemetryOut") != null) { |
42 | - File latestSaveDir = new File(cmd.getOptionValue("latestTelemetryOut")); | |
43 | - PgCaLatestMigrator.migrateLatest(allTelemetrySource, latestSaveDir, allEntityIdsAndTypes, dictionaryParser, castEnable); | |
45 | + latestSaveDir = new File(cmd.getOptionValue("latestTelemetryOut")); | |
44 | 46 | } |
45 | 47 | if(cmd.getOptionValue("telemetryOut") != null) { |
46 | - File tsSaveDir = new File(cmd.getOptionValue("telemetryOut")); | |
47 | - File partitionsSaveDir = new File(cmd.getOptionValue("partitionsOut")); | |
48 | - PostgresToCassandraTelemetryMigrator.migrateTs( | |
49 | - allTelemetrySource, tsSaveDir, partitionsSaveDir, allEntityIdsAndTypes, dictionaryParser, castEnable | |
50 | - ); | |
48 | + tsSaveDir = new File(cmd.getOptionValue("telemetryOut")); | |
49 | + partitionsSaveDir = new File(cmd.getOptionValue("partitionsOut")); | |
51 | 50 | } |
52 | 51 | |
52 | + new PgCaMigrator(allTelemetrySource, tsSaveDir, partitionsSaveDir, latestSaveDir, allEntityIdsAndTypes, dictionaryParser, castEnable).migrate(); | |
53 | + | |
53 | 54 | } catch (Throwable th) { |
54 | 55 | th.printStackTrace(); |
55 | 56 | throw new IllegalStateException("failed", th); | ... | ... |
tools/src/main/java/org/thingsboard/client/tools/migrator/PgCaLatestMigrator.java
deleted
100644 → 0
1 | -/** | |
2 | - * Copyright © 2016-2021 The Thingsboard Authors | |
3 | - * | |
4 | - * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | - * you may not use this file except in compliance with the License. | |
6 | - * You may obtain a copy of the License at | |
7 | - * | |
8 | - * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | - * | |
10 | - * Unless required by applicable law or agreed to in writing, software | |
11 | - * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | - * See the License for the specific language governing permissions and | |
14 | - * limitations under the License. | |
15 | - */ | |
16 | -package org.thingsboard.client.tools.migrator; | |
17 | - | |
18 | -import com.google.common.collect.Lists; | |
19 | -import org.apache.cassandra.io.sstable.CQLSSTableWriter; | |
20 | -import org.apache.commons.io.FileUtils; | |
21 | -import org.apache.commons.io.LineIterator; | |
22 | -import org.apache.commons.lang3.StringUtils; | |
23 | -import org.apache.commons.lang3.math.NumberUtils; | |
24 | - | |
25 | -import java.io.File; | |
26 | -import java.io.IOException; | |
27 | -import java.util.ArrayList; | |
28 | -import java.util.Arrays; | |
29 | -import java.util.Date; | |
30 | -import java.util.List; | |
31 | -import java.util.UUID; | |
32 | -import java.util.stream.Collectors; | |
33 | - | |
34 | -public class PgCaLatestMigrator { | |
35 | - | |
36 | - private static final long LOG_BATCH = 1000000; | |
37 | - private static final long rowPerFile = 1000000; | |
38 | - | |
39 | - | |
40 | - private static long linesProcessed = 0; | |
41 | - private static long linesMigrated = 0; | |
42 | - private static long castErrors = 0; | |
43 | - private static long castedOk = 0; | |
44 | - | |
45 | - private static long currentWriterCount = 1; | |
46 | - private static RelatedEntitiesParser allIdsAndTypes; | |
47 | - private static DictionaryParser keyPairs; | |
48 | - | |
49 | - public static void migrateLatest(File sourceFile, | |
50 | - File outDir, | |
51 | - RelatedEntitiesParser allEntityIdsAndTypes, | |
52 | - DictionaryParser dictionaryParser, | |
53 | - boolean castStringsIfPossible) throws IOException { | |
54 | - long startTs = System.currentTimeMillis(); | |
55 | - long stepLineTs = System.currentTimeMillis(); | |
56 | - long stepOkLineTs = System.currentTimeMillis(); | |
57 | - LineIterator iterator = FileUtils.lineIterator(sourceFile); | |
58 | - CQLSSTableWriter currentTsWriter = WriterBuilder.getLatestWriter(outDir); | |
59 | - allIdsAndTypes = allEntityIdsAndTypes; | |
60 | - keyPairs = dictionaryParser; | |
61 | - | |
62 | - boolean isBlockStarted = false; | |
63 | - boolean isBlockFinished = false; | |
64 | - | |
65 | - String line; | |
66 | - while (iterator.hasNext()) { | |
67 | - if (linesProcessed++ % LOG_BATCH == 0) { | |
68 | - System.out.println(new Date() + " linesProcessed = " + linesProcessed + " in " + (System.currentTimeMillis() - stepLineTs) + " castOk " + castedOk + " castErr " + castErrors); | |
69 | - stepLineTs = System.currentTimeMillis(); | |
70 | - } | |
71 | - | |
72 | - line = iterator.nextLine(); | |
73 | - | |
74 | - if (isBlockFinished) { | |
75 | - break; | |
76 | - } | |
77 | - | |
78 | - if (!isBlockStarted) { | |
79 | - if (isBlockStarted(line)) { | |
80 | - System.out.println(); | |
81 | - System.out.println(); | |
82 | - System.out.println(line); | |
83 | - System.out.println(); | |
84 | - System.out.println(); | |
85 | - isBlockStarted = true; | |
86 | - } | |
87 | - continue; | |
88 | - } | |
89 | - | |
90 | - if (isBlockFinished(line)) { | |
91 | - isBlockFinished = true; | |
92 | - } else { | |
93 | - try { | |
94 | - List<String> raw = Arrays.stream(line.trim().split("\t")) | |
95 | - .map(String::trim) | |
96 | - .filter(StringUtils::isNotEmpty) | |
97 | - .collect(Collectors.toList()); | |
98 | - List<Object> values = toValues(raw); | |
99 | - | |
100 | - if (currentWriterCount == 0) { | |
101 | - System.out.println(new Date() + " close writer " + new Date()); | |
102 | - currentTsWriter.close(); | |
103 | - currentTsWriter = WriterBuilder.getLatestWriter(outDir); | |
104 | - } | |
105 | - | |
106 | - if (castStringsIfPossible) { | |
107 | - currentTsWriter.addRow(castToNumericIfPossible(values)); | |
108 | - } else { | |
109 | - currentTsWriter.addRow(values); | |
110 | - } | |
111 | - currentWriterCount++; | |
112 | - if (currentWriterCount >= rowPerFile) { | |
113 | - currentWriterCount = 0; | |
114 | - } | |
115 | - | |
116 | - if (linesMigrated++ % LOG_BATCH == 0) { | |
117 | - System.out.println(new Date() + " migrated = " + linesMigrated + " in " + (System.currentTimeMillis() - stepOkLineTs) + " ms."); | |
118 | - stepOkLineTs = System.currentTimeMillis(); | |
119 | - } | |
120 | - } catch (Exception ex) { | |
121 | - System.out.println(ex.getMessage() + " -> " + line); | |
122 | - } | |
123 | - | |
124 | - } | |
125 | - } | |
126 | - | |
127 | - long endTs = System.currentTimeMillis(); | |
128 | - System.out.println(); | |
129 | - System.out.println(new Date() + " Migrated rows " + linesMigrated + " in " + (endTs - startTs) + " ts"); | |
130 | - | |
131 | - currentTsWriter.close(); | |
132 | - System.out.println(); | |
133 | - System.out.println("Finished migrate Latest Telemetry"); | |
134 | - } | |
135 | - | |
136 | - | |
137 | - private static List<Object> castToNumericIfPossible(List<Object> values) { | |
138 | - try { | |
139 | - if (values.get(6) != null && NumberUtils.isNumber(values.get(6).toString())) { | |
140 | - Double casted = NumberUtils.createDouble(values.get(6).toString()); | |
141 | - List<Object> numeric = Lists.newArrayList(); | |
142 | - numeric.addAll(values); | |
143 | - numeric.set(6, null); | |
144 | - numeric.set(8, casted); | |
145 | - castedOk++; | |
146 | - return numeric; | |
147 | - } | |
148 | - } catch (Throwable th) { | |
149 | - castErrors++; | |
150 | - } | |
151 | - return values; | |
152 | - } | |
153 | - | |
154 | - private static List<Object> toValues(List<String> raw) { | |
155 | - //expected Table structure: | |
156 | - //COPY public.ts_kv_latest (entity_type, entity_id, key, ts, bool_v, str_v, long_v, dbl_v) FROM stdin; | |
157 | - | |
158 | - List<Object> result = new ArrayList<>(); | |
159 | - result.add(allIdsAndTypes.getEntityType(raw.get(0))); | |
160 | - result.add(UUID.fromString(raw.get(0))); | |
161 | - result.add(keyPairs.getKeyByKeyId(raw.get(1))); | |
162 | - | |
163 | - long ts = Long.parseLong(raw.get(2)); | |
164 | - result.add(3, ts); | |
165 | - | |
166 | - result.add(raw.get(3).equals("\\N") ? null : raw.get(3).equals("t") ? Boolean.TRUE : Boolean.FALSE); | |
167 | - result.add(raw.get(4).equals("\\N") ? null : raw.get(4)); | |
168 | - result.add(raw.get(5).equals("\\N") ? null : Long.parseLong(raw.get(5))); | |
169 | - result.add(raw.get(6).equals("\\N") ? null : Double.parseDouble(raw.get(6))); | |
170 | - result.add(raw.get(7).equals("\\N") ? null : raw.get(7)); | |
171 | - | |
172 | - return result; | |
173 | - } | |
174 | - | |
175 | - private static boolean isBlockStarted(String line) { | |
176 | - return line.startsWith("COPY public.ts_kv_latest ("); | |
177 | - } | |
178 | - | |
179 | - private static boolean isBlockFinished(String line) { | |
180 | - return StringUtils.isBlank(line) || line.equals("\\."); | |
181 | - } | |
182 | - | |
183 | -} |
1 | +package org.thingsboard.client.tools.migrator; | |
2 | + | |
3 | +import com.google.common.collect.Lists; | |
4 | +import org.apache.cassandra.io.sstable.CQLSSTableWriter; | |
5 | +import org.apache.commons.io.FileUtils; | |
6 | +import org.apache.commons.io.LineIterator; | |
7 | +import org.apache.commons.lang3.StringUtils; | |
8 | +import org.apache.commons.lang3.math.NumberUtils; | |
9 | + | |
10 | +import java.io.File; | |
11 | +import java.io.IOException; | |
12 | +import java.time.Instant; | |
13 | +import java.time.LocalDateTime; | |
14 | +import java.time.ZoneOffset; | |
15 | +import java.time.temporal.ChronoUnit; | |
16 | +import java.util.ArrayList; | |
17 | +import java.util.Arrays; | |
18 | +import java.util.Date; | |
19 | +import java.util.HashSet; | |
20 | +import java.util.List; | |
21 | +import java.util.Set; | |
22 | +import java.util.UUID; | |
23 | +import java.util.function.Function; | |
24 | +import java.util.stream.Collectors; | |
25 | + | |
26 | +public class PgCaMigrator { | |
27 | + | |
28 | + private final long LOG_BATCH = 1000000; | |
29 | + private final long rowPerFile = 1000000; | |
30 | + | |
31 | + private long linesProcessed = 0; | |
32 | + private long linesTsMigrated = 0; | |
33 | + private long linesLatestMigrated = 0; | |
34 | + private long castErrors = 0; | |
35 | + private long castedOk = 0; | |
36 | + | |
37 | + private long currentWriterCount = 1; | |
38 | + | |
39 | + private final File sourceFile; | |
40 | + private final boolean castStringIfPossible; | |
41 | + | |
42 | + private final RelatedEntitiesParser entityIdsAndTypes; | |
43 | + private final DictionaryParser keyParser; | |
44 | + private CQLSSTableWriter currentTsWriter; | |
45 | + private CQLSSTableWriter currentPartitionsWriter; | |
46 | + private CQLSSTableWriter currentTsLatestWriter; | |
47 | + private final Set<String> partitions = new HashSet<>(); | |
48 | + | |
49 | + private File outTsDir; | |
50 | + private File outTsLatestDir; | |
51 | + | |
52 | + public PgCaMigrator(File sourceFile, | |
53 | + File ourTsDir, | |
54 | + File outTsPartitionDir, | |
55 | + File outTsLatestDir, | |
56 | + RelatedEntitiesParser allEntityIdsAndTypes, | |
57 | + DictionaryParser dictionaryParser, | |
58 | + boolean castStringsIfPossible) { | |
59 | + this.sourceFile = sourceFile; | |
60 | + this.entityIdsAndTypes = allEntityIdsAndTypes; | |
61 | + this.keyParser = dictionaryParser; | |
62 | + this.castStringIfPossible = castStringsIfPossible; | |
63 | + if(outTsLatestDir != null) { | |
64 | + this.currentTsLatestWriter = WriterBuilder.getLatestWriter(outTsLatestDir); | |
65 | + this.outTsLatestDir = outTsLatestDir; | |
66 | + } | |
67 | + if(ourTsDir != null) { | |
68 | + this.currentTsWriter = WriterBuilder.getTsWriter(ourTsDir); | |
69 | + this.currentPartitionsWriter = WriterBuilder.getPartitionWriter(outTsPartitionDir); | |
70 | + this.outTsDir = ourTsDir; | |
71 | + } | |
72 | + } | |
73 | + | |
74 | + public void migrate() throws IOException { | |
75 | + boolean isTsDone = false; | |
76 | + boolean isLatestDone = false; | |
77 | + String line; | |
78 | + LineIterator iterator = FileUtils.lineIterator(this.sourceFile); | |
79 | + | |
80 | + try { | |
81 | + while(iterator.hasNext()) { | |
82 | + line = iterator.nextLine(); | |
83 | + if(!isLatestDone && isBlockLatestStarted(line)) { | |
84 | + System.out.println("START TO MIGRATE LATEST"); | |
85 | + long start = System.currentTimeMillis(); | |
86 | + processBlock(iterator, currentTsLatestWriter, outTsLatestDir, this::toValuesLatest); | |
87 | + System.out.println("FORMING OF SSL FOR LATEST TS FINISHED WITH TIME: " + (System.currentTimeMillis() - start) + " ms."); | |
88 | + isLatestDone = true; | |
89 | + } | |
90 | + | |
91 | + if(!isTsDone && isBlockTsStarted(line)) { | |
92 | + System.out.println("START TO MIGRATE TS"); | |
93 | + long start = System.currentTimeMillis(); | |
94 | + processBlock(iterator, currentTsWriter, outTsDir, this::toValuesTs); | |
95 | + System.out.println("FORMING OF SSL FOR TS FINISHED WITH TIME: " + (System.currentTimeMillis() - start) + " ms."); | |
96 | + isTsDone = true; | |
97 | + } | |
98 | + } | |
99 | + | |
100 | + System.out.println("Partitions collected " + partitions.size()); | |
101 | + long startTs = System.currentTimeMillis(); | |
102 | + for (String partition : partitions) { | |
103 | + String[] split = partition.split("\\|"); | |
104 | + List<Object> values = Lists.newArrayList(); | |
105 | + values.add(split[0]); | |
106 | + values.add(UUID.fromString(split[1])); | |
107 | + values.add(split[2]); | |
108 | + values.add(Long.parseLong(split[3])); | |
109 | + currentPartitionsWriter.addRow(values); | |
110 | + } | |
111 | + | |
112 | + System.out.println(new Date() + " Migrated partitions " + partitions.size() + " in " + (System.currentTimeMillis() - startTs)); | |
113 | + | |
114 | + System.out.println(); | |
115 | + System.out.println("Finished migrate Telemetry"); | |
116 | + | |
117 | + } finally { | |
118 | + iterator.close(); | |
119 | + currentTsLatestWriter.close(); | |
120 | + currentTsWriter.close(); | |
121 | + currentPartitionsWriter.close(); | |
122 | + } | |
123 | + } | |
124 | + | |
125 | + private void logLinesProcessed() { | |
126 | + if (linesProcessed++ % LOG_BATCH == 0) { | |
127 | + System.out.println(new Date() + " linesProcessed = " + linesProcessed + " in, castOk " + castedOk + " castErr " + castErrors); | |
128 | + } | |
129 | + } | |
130 | + | |
131 | + private List<Object> toValuesTs(List<String> raw) { | |
132 | + linesTsMigrated++; | |
133 | + List<Object> result = new ArrayList<>(); | |
134 | + result.add(entityIdsAndTypes.getEntityType(raw.get(0))); | |
135 | + result.add(UUID.fromString(raw.get(0))); | |
136 | + result.add(keyParser.getKeyByKeyId(raw.get(1))); | |
137 | + | |
138 | + long ts = Long.parseLong(raw.get(2)); | |
139 | + long partition = toPartitionTs(ts); | |
140 | + result.add(partition); | |
141 | + result.add(ts); | |
142 | + | |
143 | + result.add(raw.get(3).equals("\\N") ? null : raw.get(3).equals("t") ? Boolean.TRUE : Boolean.FALSE); | |
144 | + result.add(raw.get(4).equals("\\N") ? null : raw.get(4)); | |
145 | + result.add(raw.get(5).equals("\\N") ? null : Long.parseLong(raw.get(5))); | |
146 | + result.add(raw.get(6).equals("\\N") ? null : Double.parseDouble(raw.get(6))); | |
147 | + result.add(raw.get(7).equals("\\N") ? null : raw.get(7)); | |
148 | + | |
149 | + processPartitions(result); | |
150 | + | |
151 | + return result; | |
152 | + } | |
153 | + | |
154 | + private List<Object> toValuesLatest(List<String> raw) { | |
155 | + linesLatestMigrated++; | |
156 | + List<Object> result = new ArrayList<>(); | |
157 | + result.add(this.entityIdsAndTypes.getEntityType(raw.get(0))); | |
158 | + result.add(UUID.fromString(raw.get(0))); | |
159 | + result.add(this.keyParser.getKeyByKeyId(raw.get(1))); | |
160 | + | |
161 | + long ts = Long.parseLong(raw.get(2)); | |
162 | + result.add(3, ts); | |
163 | + | |
164 | + result.add(raw.get(3).equals("\\N") ? null : raw.get(3).equals("t") ? Boolean.TRUE : Boolean.FALSE); | |
165 | + result.add(raw.get(4).equals("\\N") ? null : raw.get(4)); | |
166 | + result.add(raw.get(5).equals("\\N") ? null : Long.parseLong(raw.get(5))); | |
167 | + result.add(raw.get(6).equals("\\N") ? null : Double.parseDouble(raw.get(6))); | |
168 | + result.add(raw.get(7).equals("\\N") ? null : raw.get(7)); | |
169 | + | |
170 | + return result; | |
171 | + } | |
172 | + | |
173 | + private long toPartitionTs(long ts) { | |
174 | + LocalDateTime time = LocalDateTime.ofInstant(Instant.ofEpochMilli(ts), ZoneOffset.UTC); | |
175 | + return time.truncatedTo(ChronoUnit.DAYS).withDayOfMonth(1).toInstant(ZoneOffset.UTC).toEpochMilli(); | |
176 | + } | |
177 | + | |
178 | + private void processPartitions(List<Object> values) { | |
179 | + String key = values.get(0) + "|" + values.get(1) + "|" + values.get(2) + "|" + values.get(3); | |
180 | + partitions.add(key); | |
181 | + } | |
182 | + | |
183 | + private void processBlock(LineIterator iterator, CQLSSTableWriter writer, File outDir, Function<List<String>, List<Object>> function) { | |
184 | + String currentLine; | |
185 | + linesProcessed = 0; | |
186 | + while(iterator.hasNext()) { | |
187 | + logLinesProcessed(); | |
188 | + currentLine = iterator.nextLine(); | |
189 | + if(isBlockFinished(currentLine)) { | |
190 | + return; | |
191 | + } | |
192 | + | |
193 | + try { | |
194 | + List<String> raw = Arrays.stream(currentLine.trim().split("\t")) | |
195 | + .map(String::trim) | |
196 | + .filter(StringUtils::isNotEmpty) | |
197 | + .collect(Collectors.toList()); | |
198 | + List<Object> values = function.apply(raw); | |
199 | + | |
200 | + if (this.currentWriterCount == 0) { | |
201 | + System.out.println(new Date() + " close writer " + new Date()); | |
202 | + writer.close(); | |
203 | + writer = WriterBuilder.getLatestWriter(outDir); | |
204 | + } | |
205 | + | |
206 | + if (this.castStringIfPossible) { | |
207 | + writer.addRow(castToNumericIfPossible(values)); | |
208 | + } else { | |
209 | + writer.addRow(values); | |
210 | + } | |
211 | + | |
212 | + currentWriterCount++; | |
213 | + if (currentWriterCount >= rowPerFile) { | |
214 | + currentWriterCount = 0; | |
215 | + } | |
216 | + } catch (Exception ex) { | |
217 | + System.out.println(ex.getMessage() + " -> " + currentLine); | |
218 | + } | |
219 | + } | |
220 | + } | |
221 | + | |
222 | + private List<Object> castToNumericIfPossible(List<Object> values) { | |
223 | + try { | |
224 | + if (values.get(6) != null && NumberUtils.isNumber(values.get(6).toString())) { | |
225 | + Double casted = NumberUtils.createDouble(values.get(6).toString()); | |
226 | + List<Object> numeric = Lists.newArrayList(); | |
227 | + numeric.addAll(values); | |
228 | + numeric.set(6, null); | |
229 | + numeric.set(8, casted); | |
230 | + castedOk++; | |
231 | + return numeric; | |
232 | + } | |
233 | + } catch (Throwable th) { | |
234 | + castErrors++; | |
235 | + } | |
236 | + | |
237 | + processPartitions(values); | |
238 | + | |
239 | + return values; | |
240 | + } | |
241 | + | |
242 | + private boolean isBlockFinished(String line) { | |
243 | + return StringUtils.isBlank(line) || line.equals("\\."); | |
244 | + } | |
245 | + | |
246 | + private boolean isBlockTsStarted(String line) { | |
247 | + return line.startsWith("COPY public.ts_kv ("); | |
248 | + } | |
249 | + | |
250 | + private boolean isBlockLatestStarted(String line) { | |
251 | + return line.startsWith("COPY public.ts_kv_latest ("); | |
252 | + } | |
253 | + | |
254 | +} | ... | ... |
tools/src/main/java/org/thingsboard/client/tools/migrator/PostgresToCassandraTelemetryMigrator.java
deleted
100644 → 0
1 | -/** | |
2 | - * Copyright © 2016-2021 The Thingsboard Authors | |
3 | - * | |
4 | - * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | - * you may not use this file except in compliance with the License. | |
6 | - * You may obtain a copy of the License at | |
7 | - * | |
8 | - * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | - * | |
10 | - * Unless required by applicable law or agreed to in writing, software | |
11 | - * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | - * See the License for the specific language governing permissions and | |
14 | - * limitations under the License. | |
15 | - */ | |
16 | -package org.thingsboard.client.tools.migrator; | |
17 | - | |
18 | -import com.google.common.collect.Lists; | |
19 | -import org.apache.cassandra.io.sstable.CQLSSTableWriter; | |
20 | -import org.apache.commons.io.FileUtils; | |
21 | -import org.apache.commons.io.LineIterator; | |
22 | -import org.apache.commons.lang3.StringUtils; | |
23 | -import org.apache.commons.lang3.math.NumberUtils; | |
24 | - | |
25 | -import java.io.File; | |
26 | -import java.io.IOException; | |
27 | -import java.time.Instant; | |
28 | -import java.time.LocalDateTime; | |
29 | -import java.time.ZoneOffset; | |
30 | -import java.time.temporal.ChronoUnit; | |
31 | -import java.util.ArrayList; | |
32 | -import java.util.Arrays; | |
33 | -import java.util.Date; | |
34 | -import java.util.HashSet; | |
35 | -import java.util.List; | |
36 | -import java.util.Set; | |
37 | -import java.util.UUID; | |
38 | -import java.util.stream.Collectors; | |
39 | - | |
40 | -public class PostgresToCassandraTelemetryMigrator { | |
41 | - | |
42 | - private static final long LOG_BATCH = 1000000; | |
43 | - private static final long rowPerFile = 1000000; | |
44 | - | |
45 | - private static long linesProcessed = 0; | |
46 | - private static long linesMigrated = 0; | |
47 | - private static long castErrors = 0; | |
48 | - private static long castedOk = 0; | |
49 | - | |
50 | - private static long currentWriterCount = 1; | |
51 | - private static CQLSSTableWriter currentTsWriter = null; | |
52 | - private static CQLSSTableWriter currentPartitionWriter = null; | |
53 | - | |
54 | - private static Set<String> partitions = new HashSet<>(); | |
55 | - private static RelatedEntitiesParser entityIdsAndTypes; | |
56 | - private static DictionaryParser keyParser; | |
57 | - | |
58 | - public static void migrateTs(File sourceFile, | |
59 | - File outTsDir, | |
60 | - File outPartitionDir, | |
61 | - RelatedEntitiesParser allEntityIdsAndTypes, | |
62 | - DictionaryParser dictionaryParser, | |
63 | - boolean castStringsIfPossible) throws IOException { | |
64 | - long startTs = System.currentTimeMillis(); | |
65 | - long stepLineTs = System.currentTimeMillis(); | |
66 | - long stepOkLineTs = System.currentTimeMillis(); | |
67 | - LineIterator iterator = FileUtils.lineIterator(sourceFile); | |
68 | - currentTsWriter = WriterBuilder.getTsWriter(outTsDir); | |
69 | - currentPartitionWriter = WriterBuilder.getPartitionWriter(outPartitionDir); | |
70 | - entityIdsAndTypes = allEntityIdsAndTypes; | |
71 | - keyParser = dictionaryParser; | |
72 | - | |
73 | - boolean isBlockStarted = false; | |
74 | - boolean isBlockFinished = false; | |
75 | - | |
76 | - String line; | |
77 | - while (iterator.hasNext()) { | |
78 | - if (linesProcessed++ % LOG_BATCH == 0) { | |
79 | - System.out.println(new Date() + " linesProcessed = " + linesProcessed + " in " + (System.currentTimeMillis() - stepLineTs) + " castOk " + castedOk + " castErr " + castErrors); | |
80 | - stepLineTs = System.currentTimeMillis(); | |
81 | - } | |
82 | - | |
83 | - line = iterator.nextLine(); | |
84 | - | |
85 | - if (isBlockFinished) { | |
86 | - break; | |
87 | - } | |
88 | - | |
89 | - if (!isBlockStarted) { | |
90 | - if (isBlockStarted(line)) { | |
91 | - System.out.println(); | |
92 | - System.out.println(); | |
93 | - System.out.println(line); | |
94 | - System.out.println(); | |
95 | - System.out.println(); | |
96 | - isBlockStarted = true; | |
97 | - } | |
98 | - continue; | |
99 | - } | |
100 | - | |
101 | - if (isBlockFinished(line)) { | |
102 | - isBlockFinished = true; | |
103 | - } else { | |
104 | - try { | |
105 | - List<String> raw = Arrays.stream(line.trim().split("\t")) | |
106 | - .map(String::trim) | |
107 | - .filter(StringUtils::isNotEmpty) | |
108 | - .collect(Collectors.toList()); | |
109 | - List<Object> values = toValues(raw); | |
110 | - | |
111 | - if (currentWriterCount == 0) { | |
112 | - System.out.println(new Date() + " close writer " + new Date()); | |
113 | - currentTsWriter.close(); | |
114 | - currentTsWriter = WriterBuilder.getTsWriter(outTsDir); | |
115 | - } | |
116 | - | |
117 | - if (castStringsIfPossible) { | |
118 | - currentTsWriter.addRow(castToNumericIfPossible(values)); | |
119 | - } else { | |
120 | - currentTsWriter.addRow(values); | |
121 | - } | |
122 | - processPartitions(values); | |
123 | - currentWriterCount++; | |
124 | - if (currentWriterCount >= rowPerFile) { | |
125 | - currentWriterCount = 0; | |
126 | - } | |
127 | - | |
128 | - if (linesMigrated++ % LOG_BATCH == 0) { | |
129 | - System.out.println(new Date() + " migrated = " + linesMigrated + " in " + (System.currentTimeMillis() - stepOkLineTs) + " partitions = " + partitions.size()); | |
130 | - stepOkLineTs = System.currentTimeMillis(); | |
131 | - } | |
132 | - } catch (Exception ex) { | |
133 | - System.out.println(ex.getMessage() + " -> " + line); | |
134 | - } | |
135 | - | |
136 | - } | |
137 | - } | |
138 | - | |
139 | - long endTs = System.currentTimeMillis(); | |
140 | - System.out.println(); | |
141 | - System.out.println(new Date() + " Migrated rows " + linesMigrated + " in " + (endTs - startTs)); | |
142 | - System.out.println("Partitions collected " + partitions.size()); | |
143 | - | |
144 | - startTs = System.currentTimeMillis(); | |
145 | - for (String partition : partitions) { | |
146 | - String[] split = partition.split("\\|"); | |
147 | - List<Object> values = Lists.newArrayList(); | |
148 | - values.add(split[0]); | |
149 | - values.add(UUID.fromString(split[1])); | |
150 | - values.add(split[2]); | |
151 | - values.add(Long.parseLong(split[3])); | |
152 | - currentPartitionWriter.addRow(values); | |
153 | - } | |
154 | - currentPartitionWriter.close(); | |
155 | - endTs = System.currentTimeMillis(); | |
156 | - System.out.println(); | |
157 | - System.out.println(); | |
158 | - System.out.println(new Date() + " Migrated partitions " + partitions.size() + " in " + (endTs - startTs)); | |
159 | - | |
160 | - | |
161 | - currentTsWriter.close(); | |
162 | - System.out.println(); | |
163 | - System.out.println("Finished migrate Telemetry"); | |
164 | - } | |
165 | - | |
166 | - private static List<Object> castToNumericIfPossible(List<Object> values) { | |
167 | - try { | |
168 | - if (values.get(6) != null && NumberUtils.isNumber(values.get(6).toString())) { | |
169 | - Double casted = NumberUtils.createDouble(values.get(6).toString()); | |
170 | - List<Object> numeric = Lists.newArrayList(); | |
171 | - numeric.addAll(values); | |
172 | - numeric.set(6, null); | |
173 | - numeric.set(8, casted); | |
174 | - castedOk++; | |
175 | - return numeric; | |
176 | - } | |
177 | - } catch (Throwable th) { | |
178 | - castErrors++; | |
179 | - } | |
180 | - return values; | |
181 | - } | |
182 | - | |
183 | - private static void processPartitions(List<Object> values) { | |
184 | - String key = values.get(0) + "|" + values.get(1) + "|" + values.get(2) + "|" + values.get(3); | |
185 | - partitions.add(key); | |
186 | - } | |
187 | - | |
188 | - private static List<Object> toValues(List<String> raw) { | |
189 | - //expected Table structure: | |
190 | -// COPY public.ts_kv (entity_type, entity_id, key, ts, bool_v, str_v, long_v, dbl_v) FROM stdin; | |
191 | - | |
192 | - List<Object> result = new ArrayList<>(); | |
193 | - result.add(entityIdsAndTypes.getEntityType(raw.get(0))); | |
194 | - result.add(UUID.fromString(raw.get(0))); | |
195 | - result.add(keyParser.getKeyByKeyId(raw.get(1))); | |
196 | - | |
197 | - long ts = Long.parseLong(raw.get(2)); | |
198 | - long partition = toPartitionTs(ts); | |
199 | - result.add(partition); | |
200 | - result.add(ts); | |
201 | - | |
202 | - result.add(raw.get(3).equals("\\N") ? null : raw.get(3).equals("t") ? Boolean.TRUE : Boolean.FALSE); | |
203 | - result.add(raw.get(4).equals("\\N") ? null : raw.get(4)); | |
204 | - result.add(raw.get(5).equals("\\N") ? null : Long.parseLong(raw.get(5))); | |
205 | - result.add(raw.get(6).equals("\\N") ? null : Double.parseDouble(raw.get(6))); | |
206 | - result.add(raw.get(7).equals("\\N") ? null : raw.get(7)); | |
207 | - return result; | |
208 | - } | |
209 | - | |
210 | - private static long toPartitionTs(long ts) { | |
211 | - LocalDateTime time = LocalDateTime.ofInstant(Instant.ofEpochMilli(ts), ZoneOffset.UTC); | |
212 | - return time.truncatedTo(ChronoUnit.DAYS).withDayOfMonth(1).toInstant(ZoneOffset.UTC).toEpochMilli(); | |
213 | -// return TsPartitionDate.MONTHS.truncatedTo(time).toInstant(ZoneOffset.UTC).toEpochMilli(); | |
214 | - } | |
215 | - | |
216 | - private static boolean isBlockStarted(String line) { | |
217 | - return line.startsWith("COPY public.ts_kv ("); | |
218 | - } | |
219 | - | |
220 | - private static boolean isBlockFinished(String line) { | |
221 | - return StringUtils.isBlank(line) || line.equals("\\."); | |
222 | - } | |
223 | - | |
224 | -} |
... | ... | @@ -42,39 +42,43 @@ public class RelatedEntitiesParser { |
42 | 42 | |
43 | 43 | private void processAllTables(LineIterator lineIterator) { |
44 | 44 | String currentLine; |
45 | - while(lineIterator.hasNext()) { | |
46 | - currentLine = lineIterator.nextLine(); | |
47 | - if(currentLine.startsWith("COPY public.alarm")) { | |
48 | - processBlock(lineIterator, EntityType.ALARM); | |
49 | - } else if (currentLine.startsWith("COPY public.asset")) { | |
50 | - processBlock(lineIterator, EntityType.ASSET); | |
51 | - } else if (currentLine.startsWith("COPY public.customer")) { | |
52 | - processBlock(lineIterator, EntityType.CUSTOMER); | |
53 | - } else if (currentLine.startsWith("COPY public.dashboard")) { | |
54 | - processBlock(lineIterator, EntityType.DASHBOARD); | |
55 | - } else if (currentLine.startsWith("COPY public.device")) { | |
56 | - processBlock(lineIterator, EntityType.DEVICE); | |
57 | - } else if (currentLine.startsWith("COPY public.rule_chain")) { | |
58 | - processBlock(lineIterator, EntityType.RULE_CHAIN); | |
59 | - } else if (currentLine.startsWith("COPY public.rule_node")) { | |
60 | - processBlock(lineIterator, EntityType.RULE_NODE); | |
61 | - } else if (currentLine.startsWith("COPY public.tenant")) { | |
62 | - processBlock(lineIterator, EntityType.TENANT); | |
63 | - } else if (currentLine.startsWith("COPY public.tb_user")) { | |
64 | - processBlock(lineIterator, EntityType.USER); | |
65 | - } else if (currentLine.startsWith("COPY public.entity_view")) { | |
66 | - processBlock(lineIterator, EntityType.ENTITY_VIEW); | |
67 | - } else if (currentLine.startsWith("COPY public.widgets_bundle")) { | |
68 | - processBlock(lineIterator, EntityType.WIDGETS_BUNDLE); | |
69 | - } else if (currentLine.startsWith("COPY public.widget_type")) { | |
70 | - processBlock(lineIterator, EntityType.WIDGET_TYPE); | |
71 | - } else if (currentLine.startsWith("COPY public.tenant_profile")) { | |
72 | - processBlock(lineIterator, EntityType.TENANT_PROFILE); | |
73 | - } else if (currentLine.startsWith("COPY public.device_profile")) { | |
74 | - processBlock(lineIterator, EntityType.DEVICE_PROFILE); | |
75 | - } else if (currentLine.startsWith("COPY public.api_usage_state")) { | |
76 | - processBlock(lineIterator, EntityType.API_USAGE_STATE); | |
45 | + try { | |
46 | + while (lineIterator.hasNext()) { | |
47 | + currentLine = lineIterator.nextLine(); | |
48 | + if (currentLine.startsWith("COPY public.alarm")) { | |
49 | + processBlock(lineIterator, EntityType.ALARM); | |
50 | + } else if (currentLine.startsWith("COPY public.asset")) { | |
51 | + processBlock(lineIterator, EntityType.ASSET); | |
52 | + } else if (currentLine.startsWith("COPY public.customer")) { | |
53 | + processBlock(lineIterator, EntityType.CUSTOMER); | |
54 | + } else if (currentLine.startsWith("COPY public.dashboard")) { | |
55 | + processBlock(lineIterator, EntityType.DASHBOARD); | |
56 | + } else if (currentLine.startsWith("COPY public.device")) { | |
57 | + processBlock(lineIterator, EntityType.DEVICE); | |
58 | + } else if (currentLine.startsWith("COPY public.rule_chain")) { | |
59 | + processBlock(lineIterator, EntityType.RULE_CHAIN); | |
60 | + } else if (currentLine.startsWith("COPY public.rule_node")) { | |
61 | + processBlock(lineIterator, EntityType.RULE_NODE); | |
62 | + } else if (currentLine.startsWith("COPY public.tenant")) { | |
63 | + processBlock(lineIterator, EntityType.TENANT); | |
64 | + } else if (currentLine.startsWith("COPY public.tb_user")) { | |
65 | + processBlock(lineIterator, EntityType.USER); | |
66 | + } else if (currentLine.startsWith("COPY public.entity_view")) { | |
67 | + processBlock(lineIterator, EntityType.ENTITY_VIEW); | |
68 | + } else if (currentLine.startsWith("COPY public.widgets_bundle")) { | |
69 | + processBlock(lineIterator, EntityType.WIDGETS_BUNDLE); | |
70 | + } else if (currentLine.startsWith("COPY public.widget_type")) { | |
71 | + processBlock(lineIterator, EntityType.WIDGET_TYPE); | |
72 | + } else if (currentLine.startsWith("COPY public.tenant_profile")) { | |
73 | + processBlock(lineIterator, EntityType.TENANT_PROFILE); | |
74 | + } else if (currentLine.startsWith("COPY public.device_profile")) { | |
75 | + processBlock(lineIterator, EntityType.DEVICE_PROFILE); | |
76 | + } else if (currentLine.startsWith("COPY public.api_usage_state")) { | |
77 | + processBlock(lineIterator, EntityType.API_USAGE_STATE); | |
78 | + } | |
77 | 79 | } |
80 | + } finally { | |
81 | + lineIterator.close(); | |
78 | 82 | } |
79 | 83 | } |
80 | 84 | ... | ... |