-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
[zylk] Aian Cantabrana
committed
Aug 5, 2020
1 parent
b008c07
commit 95625fc
Showing
4 changed files
with
174 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
src/main/java/net/zylklab/flink/sandbox/unordered_events/BufferedKeyedProcessFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package net.zylklab.flink.sandbox.unordered_events; | ||
|
||
import java.util.HashMap; | ||
|
||
import org.apache.flink.streaming.api.functions.KeyedProcessFunction; | ||
import org.apache.flink.util.Collector; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import net.zylklab.flink.sandbox.broadcaststate.pojo.Event; | ||
|
||
/** | ||
* This Class provides an implementation on top of Flink's KeyedProcessFunction that acts as a buffer for ordering unordered events | ||
*/ | ||
public class BufferedKeyedProcessFunction extends KeyedProcessFunction<String, Event, Event>{ | ||
|
||
private static final long serialVersionUID = 1L; | ||
|
||
private static final Logger _log = LoggerFactory.getLogger(BufferedKeyedProcessFunction.class); | ||
|
||
private HashMap<Long, Event> eventsMap = new HashMap<Long, Event>(); | ||
|
||
@Override | ||
public void processElement( | ||
Event in, | ||
KeyedProcessFunction<String, Event, Event>.Context ctx, | ||
Collector<Event> out) | ||
throws Exception { | ||
|
||
Long ts = in.getTs(); | ||
if (ctx.timerService().currentWatermark() < ts) { | ||
// Put event in the map keyed by its timestamp | ||
eventsMap.put(ts, in); | ||
|
||
// Register an Event Timer to be triggered when the watermark reaches this timestamp | ||
ctx.timerService().registerEventTimeTimer(ts); | ||
} else { | ||
_log.warn("Current watermark has already passed this event!"); | ||
} | ||
} | ||
|
||
@Override | ||
public void onTimer( | ||
long timestamp, | ||
KeyedProcessFunction<String, Event, Event>.OnTimerContext ctx, | ||
Collector<Event> out) | ||
throws Exception { | ||
|
||
// Emit event with this timestamp | ||
if (eventsMap.containsKey(timestamp)) { | ||
out.collect(eventsMap.get(timestamp)); | ||
eventsMap.remove(timestamp); | ||
} else { | ||
_log.info("onTimer triggered but no value set for timestamp " + timestamp); | ||
} | ||
} | ||
|
||
|
||
} |
81 changes: 81 additions & 0 deletions
81
src/main/java/net/zylklab/flink/sandbox/unordered_events/UnorderedEventsJob.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package net.zylklab.flink.sandbox.unordered_events; | ||
|
||
import java.time.Duration; | ||
import java.util.Properties; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import org.apache.flink.api.common.eventtime.WatermarkStrategy; | ||
import org.apache.flink.streaming.api.TimeCharacteristic; | ||
import org.apache.flink.streaming.api.datastream.DataStreamSource; | ||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; | ||
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; | ||
import org.apache.flink.streaming.api.functions.KeyedProcessFunction; | ||
import org.apache.flink.streaming.api.watermark.Watermark; | ||
import org.apache.flink.streaming.api.windowing.time.Time; | ||
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; | ||
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase; | ||
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import net.zylklab.flink.sandbox.broadcaststate.pojo.Event; | ||
import net.zylklab.flink.sandbox.cep_examples.util.avro.AvroDeserializationSchema; | ||
import net.zylklab.flink.sandbox.cep_examples.util.avro.AvroSerializationSchema; | ||
|
||
|
||
public class UnorderedEventsJob { | ||
|
||
private static final Logger _log = LoggerFactory.getLogger(UnorderedEventsJob.class); | ||
|
||
private static final Integer PARALLELISM = 2; | ||
private static final Integer WATERMARK_INTERVAL_MS = 500; | ||
private static final Integer MAX_OUT_OF_ORDERNESS_MS = 1000; | ||
private static final Integer MAX_WAIT_FOR_EVENTS_SEC = 60; | ||
|
||
private static final String BOOTSTRAP_SERVERS = "amaterasu001.bigdata.zylk.net:6667, amaterasu002.bigdata.zylk.net:6667"; | ||
private static final String GROUP_ID = "flink_unordered"; | ||
private static final String SOURCE_TOPIC = "UNORDERED_EVENTS"; | ||
private static final String SINK_TOPIC = "ORDERED_EVENTS"; | ||
|
||
public static void main(String[] args) throws Exception { | ||
_log.debug("Starting application"); | ||
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); | ||
|
||
|
||
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); | ||
env.getConfig().setAutoWatermarkInterval(WATERMARK_INTERVAL_MS); | ||
|
||
env.setParallelism(PARALLELISM); | ||
|
||
_log.debug("Environment created."); | ||
UnorderedEventsJob w = new UnorderedEventsJob(); | ||
w.addJob(env); | ||
env.execute("EventTimeWindowGroupAndProcessSubJob"); | ||
|
||
} | ||
|
||
private void addJob(StreamExecutionEnvironment env) { | ||
|
||
// Kafka consumer properties | ||
Properties props = new Properties(); | ||
props.setProperty("bootstrap.servers", BOOTSTRAP_SERVERS); | ||
props.setProperty("group.id", GROUP_ID); | ||
|
||
FlinkKafkaConsumerBase<Event> kafkaSource = new FlinkKafkaConsumer<>(SOURCE_TOPIC, new AvroDeserializationSchema<>(Event.class), props) | ||
.assignTimestampsAndWatermarks(WatermarkStrategy | ||
.<Event>forBoundedOutOfOrderness(Duration.ofMillis(MAX_OUT_OF_ORDERNESS_MS)) | ||
.withTimestampAssigner((event, ts) -> event.getTs()) // Timestamp extractor | ||
.withIdleness(Duration.ofSeconds(MAX_WAIT_FOR_EVENTS_SEC)) // Wait for a partition when it stop sending events | ||
); | ||
|
||
FlinkKafkaProducer<Event> kafkaSink = new FlinkKafkaProducer<>(SINK_TOPIC, new AvroSerializationSchema<>(Event.class), props); | ||
|
||
DataStreamSource<Event> stream = env.addSource(kafkaSource); | ||
|
||
stream | ||
.keyBy(event -> event.getVarId()) | ||
.process(new BufferedKeyedProcessFunction()) | ||
.addSink(kafkaSink); | ||
|
||
} | ||
} |