LinuxForHealth
diff --git a/‎fhir-flow/README.md
+17-15 b/‎fhir-flow/README.md
+17-15
diff --git a/‎fhir-flow/src/main/java/com/ibm/fhir/flow/api/FlowInteraction.java
+8-8 b/‎fhir-flow/src/main/java/com/ibm/fhir/flow/api/FlowInteraction.java
+8-8
diff --git a/‎fhir-flow/src/main/java/com/ibm/fhir/flow/api/ICheckpointTracker.java
+10-12 b/‎fhir-flow/src/main/java/com/ibm/fhir/flow/api/ICheckpointTracker.java
+10-12
diff --git a/‎fhir-flow/src/main/java/com/ibm/fhir/flow/api/IFlowInteractionHandler.java
+4-4 b/‎fhir-flow/src/main/java/com/ibm/fhir/flow/api/IFlowInteractionHandler.java
+4-4
diff --git a/‎fhir-flow/src/main/java/com/ibm/fhir/flow/app/Main.java
+21-5 b/‎fhir-flow/src/main/java/com/ibm/fhir/flow/app/Main.java
+21-5
@@ -92,19 +92,19 @@ java \
   --downstream-tenant tenant2
 ```
 
-The fhir-flow application periodically writes out CHECKPOINT values to the log file:
+The fhir-flow application periodically writes opaque CHECKPOINT values to the log file:
 
 ```
-2022-04-19 17:03:11.982 00000001    INFO impl.UpstreamFHIRHistoryReader CHECKPOINT: 26573
+2022-04-19 17:03:11.982 00000001    INFO impl.UpstreamFHIRHistoryReader CHECKPOINT: X2NvdW50PTUxMiZfZXhjbHVkZVRyYW5zYWN0aW9uVGltZW91dFdpbmRvdz10cnVlJl9zb3J0PW5vbmUmX2NoYW5nZUlkTWFya2VyPTEwMjM=
 ```
 
 When the run duration time is reached, the application waits for any pending work to complete then writes a FINAL CHECKPOINT message:
 
 ```
-2022-04-19 17:03:12.983 00000001    INFO impl.UpstreamFHIRHistoryReader FINAL CHECKPOINT: 27723
+2022-04-19 17:03:12.983 00000001    INFO impl.UpstreamFHIRHistoryReader FINAL CHECKPOINT: X2NvdW50PTUxMiZfZXhjbHVkZVRyYW5zYWN0aW9uVGltZW91dFdpbmRvdz10cnVlJl9zb3J0PW5vbmUmX2NoYW5nZUlkTWFya2VyPTEwMjM=
 ```
 
-The (Java `long`) value can be used to resume processing using the `--change-id-marker` parameter:
+The checkpoint value can be used to resume processing using the `--from-checkpoint` parameter:
 
 ```
 java \
@@ -115,25 +115,27 @@ java \
   --upstream-tenant tenant1 \
   --downstream-properties local.properties \
   --downstream-tenant tenant2 \
-  --change-id-marker 27723
+  --from-checkpoint "X2NvdW50PTUxMiZfZXhjbHVkZVRyYW5zYWN0aW9uVGltZW91dFdpbmRvdz10cnVlJl9zb3J0PW5vbmUmX2NoYW5nZUlkTWFya2VyPTEwMjM="
 ```
 
 ## Command Line Options
 
 | Option | Description |
 | ------ | ----------- |
-| --run-duration | The number of seconds to run before terminating |
-| --upstream-properties | A Java properties file containing connection details for the upstream FHIR server |
-| --upstream-tenant | The IBM FHIR Server upstream tenant name |
-| --downstream-properties | A Java properties file containing connection details for the downstream FHIR server |
-| --downstresam-tenant | The IBM FHIR Server downstream tenant name |
-| --change-id-marker | Start processing from this previously reported checkpoint value |
-| --partition-count | The number of parallel partitions to use for writing to the downstream FHIR server |
-| --partition-queue-size | The number of interactions that can be queued into any partition before blocking further fetches. This puts an upper bound on memory consumption when changes can be fetched more quickly than written to the downstream system, which is often the case. |
-| --reader-pool-size | The size of the thread-pool used to support asynchronous reading of upstream resources |
-| --drain-for-seconds | After the run-duration time has elapsed, wait this number of seconds for the downstream partition queues to empty before exiting |
+| --run-duration {seconds} | The number of seconds to run before terminating |
+| --upstream-properties {properties-file} | A Java properties file containing connection details for the upstream FHIR server |
+| --upstream-tenant {tenant-name} | The IBM FHIR Server upstream tenant name |
+| --downstream-properties {properties-file} | A Java properties file containing connection details for the downstream FHIR server |
+| --downstresam-tenant {tenant-name} | The IBM FHIR Server downstream tenant name |
+| --from-checkpoint {checkpoint-value} | Start processing from this previously reported checkpoint value |
+| --partition-count {n} | The number of parallel partitions to use for writing to the downstream FHIR server |
+| --partition-queue-size {n} | The number of interactions that can be queued into any partition before blocking further fetches. This puts an upper bound on memory consumption when changes can be fetched more quickly than written to the downstream system, which is often the case. |
+| --reader-pool-size {n} | The size of the thread-pool used to support asynchronous reading of upstream resources |
+| --drain-for-seconds {seconds} | After the run-duration time has elapsed, wait this number of seconds for the downstream partition queues to empty before exiting |
 | --parse-resource | Parse each resource received from the upstream system. The default mode is to not parse the resource, and treat the payload as an opaque string which is simply passed from upstream to downstream - thus saving a significant amount of CPU and pressure on the GC. |
 | --log-data | When in log-only mode (not writing to an actual downstream system), include the resource payload data when logging each interaction. |
+| --exclude-transaction-window | When upstream is an IBM FHIR, use the `_excludeTransactionTimeoutWindow=true` query parameter when fetching history to avoid potential issues with missing data in high-volume scenarios. |
+| --prefer-return-minimal | Use the `Prefer: return=minimal` header for upstream history requests. The IBM FHIR Server uses this as an optimization to skip inclusion of the resource in the response Bundle. Only meta-data related to the change history is returned, allowing for the resource data to be read separately using a VREAD interaction. Better throughput can be achieved by performing the VREAD interactions in parallel |
 
 # Ideas for Future Development
 
 
@@ -10,8 +10,8 @@
  * Represents a resource being passed from the reader to the writer
  */
 public abstract class FlowInteraction {
-    // the change sequence number reported by the upstream server
-    private final long changeId;
+    // To assist tracking this with the bundle content
+    private final String entryId;
 
     // the ticket being used to track completion of this interaction
     private final ITrackerTicket trackerTicket;
@@ -22,12 +22,12 @@ public abstract class FlowInteraction {
     /**
      * Protected constructor
      * 
-     * @param changeId
+     * @param entryId
      * @param trackerTicket
      * @param identifier
      */
-    protected FlowInteraction(long changeId, ITrackerTicket trackerTicket, ResourceIdentifier identifier) {
-        this.changeId = changeId;
+    protected FlowInteraction(String entryId, ITrackerTicket trackerTicket, ResourceIdentifier identifier) {
+        this.entryId = entryId;
         this.trackerTicket = trackerTicket;
         this.identifier = identifier;
     }
@@ -52,11 +52,11 @@ public ResourceIdentifier getIdentifier() {
     }
 
     /**
-     * Getter for the changeId value
+     * Getter for the entryId value
      * @return
      */
-    public long getChangeId() {
-        return this.changeId;
+    public String getEntryId() {
+        return this.entryId;
     }
 
     /**
 
@@ -6,37 +6,35 @@
 
 package com.ibm.fhir.flow.api;
 
-import java.util.List;
-
 /**
  * Supports tracking of work units so that a checkpoint can be written
  * periodically to indicate that all work up to a given point-in-time
  * has been completed.
+ * 
+ *
+ * @param <T> the type of the value used to represent a checkpoint
  */
-public interface ICheckpointTracker {
+public interface ICheckpointTracker<T> {
 
     /**
      * Add this request to the queue
      * @param requestId
      * @param an {@link ITrackerTicket} which can be used to signal completion of the work item (thread-safe)
+     * @param workItems how many individual pieces of work are associated with this checkpoint value
      */
-    ITrackerTicket track(long requestId);
+    ITrackerTicket track(T checkpointValue, int workItems);
 
     /**
-     * Track all of the requestIds in the given list. The output list
-     * will be in the same order as the input list. All entries will
-     * be added using a single monitor lock, which should be more
-     * efficient than synchronizing on each individual item
-     * @param requestIds
+     * Get the current checkpoint value
      * @return
      */
-    List<ITrackerTicket> track(List<Long> requestIds);
+    T getCheckpoint();
 
     /**
-     * Get the current checkpoint value
+     * Get the total number of entries processed up to the current checkpoint
      * @return
      */
-    long getCheckpoint();
+    long getProcessed();
 
     /**
      * Is the tracker queue empty
 
@@ -15,17 +15,17 @@ public interface IFlowInteractionHandler {
 
     /**
      * Perform a logical DELETE interaction
-     * @param changeId
+     * @param entryId
      * @param identifier
      */
-    void delete(long changeId, ResourceIdentifier identifier);
+    void delete(String entryId, ResourceIdentifier identifier);
 
     /**
      * Perform a create-or-update (PUT) interaction
-     * @param changeId
+     * @param entryId
      * @param identifier
      * @param resourceData
      * @param resource
      */
-    void createOrUpdate(long changeId, ResourceIdentifier identifier, String resourceData, Resource resource);
+    void createOrUpdate(String entryId, ResourceIdentifier identifier, String resourceData, Resource resource);
 }
@@ -67,14 +67,20 @@ public class Main {
     private int resourcesPerHistoryCall = 512;
 
     // Start processing from this point in the change stream
-    private long changeIdMarker = -1;
+    private String startFromCheckpoint = null;
 
     // How many seconds to run for (default forever)
     private long runDurationSeconds = -1;
 
     // How many seconds to wait for queued work to complete after the scan completes
     private long drainForSeconds = 600;
 
+    // Request upstream (IBM FHIR Server) system to exclude the transaction timeout window
+    private boolean excludeTransactionWindow = false;
+
+    // Use the Prefer: return=minimal header to optimize upstream history request
+    private boolean preferReturnMinimal = false;
+
     /**
      * Parse the command line arguments
      * @param args
@@ -132,11 +138,11 @@ public void parseArgs(String[] args) {
                     throw new IllegalArgumentException("missing value for --reader-pool-size");
                 }
                 break;
-            case "--change-id-marker":
+            case "--from-checkpoint":
                 if (i < args.length + 1) {
-                    this.changeIdMarker = Long.parseLong(args[++i]);
+                    this.startFromCheckpoint = args[++i];
                 } else {
-                    throw new IllegalArgumentException("missing value for --change-id-marker");
+                    throw new IllegalArgumentException("missing value for --from-checkpoint");
                 }
                 break;
             case "--run-duration":
@@ -153,9 +159,15 @@ public void parseArgs(String[] args) {
                     throw new IllegalArgumentException("missing value for --drain-for-seconds");
                 }
                 break;
+            case "--exclude-transaction-window":
+                this.excludeTransactionWindow = true;
+                break;
             case "--parse-resource":
                 this.parseResource = true;
                 break;
+            case "--prefer-return-minimal":
+                this.preferReturnMinimal = true;
+                break;
             case "--log-data":
                 this.logData = true;
                 break;
@@ -221,7 +233,11 @@ public void process() {
             downstreamWriter = new DownstreamLogWriter(partitionCount, partitionQueueSize, this.logData);
         }
 
-        UpstreamFHIRHistoryReader historyReader = new UpstreamFHIRHistoryReader(this.resourcesPerHistoryCall, this.changeIdMarker, this.drainForSeconds);
+        UpstreamFHIRHistoryReader historyReader = new UpstreamFHIRHistoryReader(this.resourcesPerHistoryCall, 
+                this.startFromCheckpoint, 
+                this.excludeTransactionWindow,
+                this.preferReturnMinimal,
+                this.drainForSeconds);
         historyReader.setClient(upstreamClient);
         historyReader.setFlowPool(readerPool);
         historyReader.setFlowWriter(downstreamWriter);