|
3 | 3 | import com.linkedin.openhouse.jobs.spark.state.StateManager;
|
4 | 4 | import java.util.ArrayList;
|
5 | 5 | import java.util.List;
|
6 |
| -import java.util.concurrent.TimeUnit; |
7 | 6 | import lombok.extern.slf4j.Slf4j;
|
8 | 7 | import org.apache.commons.cli.CommandLine;
|
9 | 8 | import org.apache.commons.cli.Option;
|
|
18 | 17 | @Slf4j
|
19 | 18 | public class SnapshotsExpirationSparkApp extends BaseTableSparkApp {
|
20 | 19 | private final String granularity;
|
21 |
| - private final int count; |
| 20 | + private final int maxAge; |
| 21 | + private final int versions; |
| 22 | + |
| 23 | + public static class DEFAULT_CONFIGURATION { |
| 24 | + public static final int MAX_AGE = 3; |
| 25 | + public static final String GRANULARITY = "DAYS"; |
| 26 | + public static final int VERSIONS = 0; |
| 27 | + } |
| 28 | + |
| 29 | + private static final String DEFAULT_GRANULARITY = ""; |
| 30 | + |
| 31 | + // By default do not define versions, and only retain snapshots based on max age |
| 32 | + private static final String DEFAULT_VERSIONS = "0"; |
22 | 33 |
|
23 | 34 | public SnapshotsExpirationSparkApp(
|
24 |
| - String jobId, StateManager stateManager, String fqtn, String granularity, int count) { |
| 35 | + String jobId, |
| 36 | + StateManager stateManager, |
| 37 | + String fqtn, |
| 38 | + int maxAge, |
| 39 | + String granularity, |
| 40 | + int versions) { |
25 | 41 | super(jobId, stateManager, fqtn);
|
26 |
| - this.granularity = granularity; |
27 |
| - this.count = count; |
| 42 | + if (maxAge == 0 && versions == 0) { |
| 43 | + this.maxAge = DEFAULT_CONFIGURATION.MAX_AGE; |
| 44 | + this.granularity = DEFAULT_CONFIGURATION.GRANULARITY; |
| 45 | + this.versions = DEFAULT_CONFIGURATION.VERSIONS; |
| 46 | + } else { |
| 47 | + this.granularity = granularity; |
| 48 | + this.maxAge = maxAge; |
| 49 | + this.versions = versions; |
| 50 | + } |
28 | 51 | }
|
29 | 52 |
|
30 | 53 | @Override
|
31 | 54 | protected void runInner(Operations ops) {
|
32 | 55 | log.info(
|
33 |
| - "Snapshot expiration app start for table {}, expiring older than {} {}s", |
| 56 | + "Snapshot expiration app start for table {}, expiring older than {} {}s or with more than {} versions", |
34 | 57 | fqtn,
|
35 |
| - count, |
36 |
| - granularity); |
37 |
| - long expireBeforeTimestampMs = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(count); |
38 |
| - log.info("Expire snapshots before timestamp ms {}", expireBeforeTimestampMs); |
39 |
| - ops.expireSnapshots(fqtn, expireBeforeTimestampMs); |
| 58 | + maxAge, |
| 59 | + granularity, |
| 60 | + versions); |
| 61 | + ops.expireSnapshots(fqtn, maxAge, granularity, versions); |
40 | 62 | }
|
41 | 63 |
|
42 | 64 | public static void main(String[] args) {
|
43 | 65 | List<Option> extraOptions = new ArrayList<>();
|
44 | 66 | extraOptions.add(new Option("t", "tableName", true, "Fully-qualified table name"));
|
| 67 | + extraOptions.add( |
| 68 | + new Option("a", "maxAge", true, "Delete snapshots older than <maxAge> <granularity>s")); |
45 | 69 | extraOptions.add(new Option("g", "granularity", true, "Granularity: day"));
|
46 | 70 | extraOptions.add(
|
47 |
| - new Option("c", "count", true, "Delete snapshots older than <count> <granularity>s")); |
| 71 | + new Option("v", "versions", true, "Number of versions to keep after snapshot expiration")); |
48 | 72 | CommandLine cmdLine = createCommandLine(args, extraOptions);
|
| 73 | + |
49 | 74 | SnapshotsExpirationSparkApp app =
|
50 | 75 | new SnapshotsExpirationSparkApp(
|
51 | 76 | getJobId(cmdLine),
|
52 | 77 | createStateManager(cmdLine),
|
53 | 78 | cmdLine.getOptionValue("tableName"),
|
54 |
| - cmdLine.getOptionValue("granularity"), |
55 |
| - Integer.parseInt(cmdLine.getOptionValue("count"))); |
| 79 | + Integer.parseInt(cmdLine.getOptionValue("maxAge", "0")), |
| 80 | + cmdLine.getOptionValue("granularity", ""), |
| 81 | + Integer.parseInt(cmdLine.getOptionValue("minVersions", "0"))); |
56 | 82 | app.run();
|
57 | 83 | }
|
58 | 84 | }
|
0 commit comments