cefriel · AuPath · Mar 26, 2025 · Apr 9, 2025 · Apr 10, 2025 · May 5, 2025
diff --git a/pom.xml b/pom.xml
@@ -115,7 +115,7 @@
     <dependency>
       <groupId>de.siegmar</groupId>
       <artifactId>fastcsv</artifactId>
-      <version>2.2.2</version>
+      <version>3.6.0</version>
     </dependency>
     <dependency>
       <groupId>mysql</groupId>

diff --git a/src/main/java/com/cefriel/template/io/csv/CSVReader.java b/src/main/java/com/cefriel/template/io/csv/CSVReader.java
@@ -16,113 +16,77 @@
 
 package com.cefriel.template.io.csv;
 
-import com.cefriel.template.io.Reader;
 import com.cefriel.template.utils.TemplateFunctions;
-import de.siegmar.fastcsv.reader.NamedCsvReader;
-import de.siegmar.fastcsv.reader.NamedCsvRow;
+import de.siegmar.fastcsv.reader.CsvReader;
+import de.siegmar.fastcsv.reader.NamedCsvRecord;
 
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
-import java.nio.file.Path;
 import java.util.*;
+import java.util.stream.Collectors;
 
-public class CSVReader implements Reader {
+public class CSVReader extends CSVReaderAbstract {
 
-    public NamedCsvReader document;
-    private boolean hashVariable;
-    private boolean onlyDistinct;
+    private final List<NamedCsvRecord> csvRecords;
 
     public CSVReader(File file) throws IOException {
-        if (Files.exists(file.toPath()))
-            this.document = NamedCsvReader.builder().build(file.toPath());
-        else
+        if (Files.exists(file.toPath())) {
+            try (CsvReader<NamedCsvRecord> input = CsvReader.builder().ofNamedCsvRecord(file.toPath())) {
+                this.csvRecords = input.stream().collect(Collectors.toList());
+            }
+            headers = csvRecords.get(0).getHeader();
+        } else {
             throw new IllegalArgumentException("File does not exist: " + file.getPath());
+        }
     }
 
-    public CSVReader(String csv) {
-        this.document = NamedCsvReader.builder().build(csv);
-    }
-    @Override
-    public void setQueryHeader(String header) {
-
-    }
-
-    @Override
-    public void appendQueryHeader(String s) {
-
+    public CSVReader(String csv) throws IOException {
+        try (CsvReader<NamedCsvRecord> input = CsvReader.builder().ofNamedCsvRecord(csv)) {
+            this.csvRecords = input.stream().collect(Collectors.toList());
+        }
+        headers = csvRecords.get(0).getHeader();
     }
 
     public List<Map<String, String>> getDataframe() throws Exception {
-        Set<String> headers = this.document.getHeader();
-        String[] columns = headers.toArray(new String[0]);
-        return getDataframe(columns);
-    }
-
-    @Override
-    public List<Map<String, String>> getDataframe(String query) throws Exception {
-        String[] columns = query.split(",");
-        return getDataframe(columns);
+        if (csvRecords.isEmpty()) {
+            return Collections.emptyList();
+        }
+        return getDataframe(headers.toArray(new String[0]));
     }
 
     public List<Map<String, String>> getDataframe(String... columns) throws Exception {
-        Set<String> headers = this.document.getHeader();
-
-        // Return entire dataframe if no columns are provided or if empty string is provided
-        if ((columns == null || columns.length == 0) || (columns.length == 1 && columns[0].isEmpty()))
+        if (csvRecords.isEmpty()) {
+            return Collections.emptyList();
+        }
+
+        if ((columns == null || columns.length == 0) || (columns.length == 1 && columns[0].isEmpty())) {
             return getDataframe();
-
+        }
+
         int columnCount = 0;
-        for(String c : columns) {
-            if (!headers.contains(c))
+        for (String c : columns) {
+            if (!headers.contains(c)) {
                 throw new IllegalArgumentException("Column " + c + " not found");
-            columnCount += 1;
+            }
+            columnCount++;
         }
-        // TODO Check if rowCount can be obtained to properly initialise the collection capacity
-        Collection<Map<String,String>> dataframe;
-        if (onlyDistinct)
-            dataframe = new ArrayList<>();
-        else
-            dataframe = new HashSet<>();
-        for (NamedCsvRow row : this.document) {
-            HashMap<String, String> map = new HashMap<>(columnCount);
+
+        // initialize collection with max possible size. Could be fewer rows if only distinct rows are requested in the dataframe.
+        int rowCount = csvRecords.size();
+        Collection<Map<String, String>> dataframe = onlyDistinct ? new HashSet<>(rowCount) : new ArrayList<>(rowCount);
+
+        for (NamedCsvRecord row : csvRecords) {
+            Map<String, String> map = new HashMap<>(columnCount);
             for (String c : columns) {
-                if(hashVariable)
+                if (hashVariable) {
                     map.put(TemplateFunctions.literalHash(c), row.getField(c));
-                else
+                } else {
                     map.put(c, row.getField(c));
+                }
             }
             dataframe.add(map);
         }
         return new ArrayList<>(dataframe);
     }
-
-    @Override
-    public void debugQuery(String query, Path destinationPath) throws Exception {
-
-    }
-    @Override
-    public void setVerbose(boolean verbose) {}
-
-    /**
-     * Not implemented for CSVReader yet.
-     * @param outputFormat String identifying the output format
-     */
-    @Override
-    public void setOutputFormat(String outputFormat) { return;}
-
-    @Override
-    public void setHashVariable(boolean hashVariable) {
-        this.hashVariable = hashVariable;
-    }
-
-    @Override
-    public void setOnlyDistinct(boolean onlyDistinct) {
-        this.onlyDistinct = onlyDistinct;
-    }
-
-    @Override
-    public void shutDown() {
-
-    }
 }
diff --git a/src/main/java/com/cefriel/template/io/csv/CSVReaderAbstract.java b/src/main/java/com/cefriel/template/io/csv/CSVReaderAbstract.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019-2023 Cefriel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.cefriel.template.io.csv;
+
+import com.cefriel.template.io.Reader;
+import com.cefriel.template.utils.TemplateFunctions;
+import de.siegmar.fastcsv.reader.NamedCsvRecord;
+
+import java.nio.file.Path;
+import java.util.*;
+
+public abstract class CSVReaderAbstract implements Reader {
+
+    List<String> headers;
+    boolean hashVariable;
+    boolean onlyDistinct;
+
+    @Override
+    public void setQueryHeader(String header) {}
+
+    @Override
+    public void appendQueryHeader(String s) {}
+
+    public abstract List<Map<String, String>> getDataframe() throws Exception ;
+
+    @Override
+    public List<Map<String, String>> getDataframe(String query) throws Exception {
+        String[] columns = query.split(",");
+        return getDataframe(columns);
+    }
+
+    public abstract List<Map<String, String>> getDataframe(String... columns) throws Exception ;
+
+    @Override
+    public void debugQuery(String query, Path destinationPath) throws Exception {}
+
+    @Override
+    public void setVerbose(boolean verbose) {}
+
+    @Override
+    public void setOutputFormat(String outputFormat) {}
+
+    @Override
+    public void setHashVariable(boolean hashVariable) {
+        this.hashVariable = hashVariable;
+    }
+
+    @Override
+    public void setOnlyDistinct(boolean onlyDistinct) {
+        this.onlyDistinct = onlyDistinct;
+    }
+
+    @Override
+    public void shutDown() {}
+}
diff --git a/src/main/java/com/cefriel/template/io/csv/CSVStreamReader.java b/src/main/java/com/cefriel/template/io/csv/CSVStreamReader.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019-2023 Cefriel.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.cefriel.template.io.csv;
+
+import com.cefriel.template.utils.TemplateFunctions;
+import de.siegmar.fastcsv.reader.CsvReader;
+import de.siegmar.fastcsv.reader.NamedCsvRecord;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.*;
+
+public class CSVStreamReader extends CSVReaderAbstract {
+
+    public CsvReader<NamedCsvRecord> document;
+
+    public CSVStreamReader(File file) throws IOException {
+        if (Files.exists(file.toPath())) {
+            try (CsvReader<NamedCsvRecord> input = CsvReader.builder().ofNamedCsvRecord(file.toPath())) {
+                headers = input.stream().findFirst().orElseThrow().getHeader();
+            }
+            this.document = CsvReader.builder().ofNamedCsvRecord(file.toPath());
+        } else
+            throw new IllegalArgumentException("File does not exist: " + file.getPath());
+    }
+
+    public CSVStreamReader(String csv) throws IOException {
+        try (CsvReader<NamedCsvRecord> input = CsvReader.builder().ofNamedCsvRecord(csv)) {
+            headers = input.stream().findFirst().orElseThrow().getHeader();
+        }
+        this.document = CsvReader.builder().ofNamedCsvRecord(csv);
+    }
+
+    public List<Map<String, String>> getDataframe() throws Exception {
+        String[] columns = headers.toArray(new String[0]);
+        return getDataframe(columns);
+    }
+
+    public List<Map<String, String>> getDataframe(String... columns) throws Exception {
+        // Return entire dataframe if no columns are provided or if empty string is provided
+        if ((columns == null || columns.length == 0) || (columns.length == 1 && columns[0].isEmpty()))
+            return getDataframe();
+
+        int columnCount = 0;
+        for(String c : columns) {
+            if (!headers.contains(c))
+                throw new IllegalArgumentException("Column " + c + " not found");
+            columnCount += 1;
+        }
+        // For stream behaviour rowCount can not be obtained
+        Collection<Map<String, String>> dataframe = onlyDistinct ? new HashSet<>() : new ArrayList<>();
+
+        final int mapSize = columnCount;
+        this.document.stream().forEach(row -> {
+            HashMap<String, String> map = new HashMap<>(mapSize);
+            for (String c : columns) {
+                if(hashVariable)
+                    map.put(TemplateFunctions.literalHash(c), row.getField(c));
+                else
+                    map.put(c, row.getField(c));
+            }
+            dataframe.add(map);
+        });
+
+        return new ArrayList<>(dataframe);
+    }
+}
diff --git a/src/main/java/com/cefriel/template/io/rdf/RDFReader.java b/src/main/java/com/cefriel/template/io/rdf/RDFReader.java
@@ -152,11 +152,7 @@ public List<Map<String,Value>> executeQuery(String query) {
      */
     private List<Map<String,String>> getQueryResultsStringValue(String query) {
         List<Map<String,Value>> valueResults = executeQuery(query);
-        Collection<Map<String,String>> dataframe;
-        if (onlyDistinct)
-            dataframe = new ArrayList<>();
-        else
-            dataframe = new HashSet<>();
+        Collection<Map<String, String>> dataframe = onlyDistinct ? new HashSet<>() : new ArrayList<>();
         for(Map<String,Value> row : valueResults) {
             if (hashVariable)
                 dataframe.add(row.entrySet().stream()

diff --git a/src/main/java/com/cefriel/template/io/sql/SQLReader.java b/src/main/java/com/cefriel/template/io/sql/SQLReader.java
@@ -124,12 +124,8 @@ public ResultSet executeQuery(String query) {
     private List<Map<String, String>> populateDataframe(int rowCount, ResultSet resultSet, String filterVariables) throws SQLException {
         ResultSetMetaData metaData = resultSet.getMetaData();
         int columnCount = metaData.getColumnCount();
-
-        Collection<Map<String,String>> dataframe;
-        if (onlyDistinct)
-            dataframe = new ArrayList<>(rowCount);
-        else
-            dataframe = new HashSet<>(rowCount);
+
+        Collection<Map<String, String>> dataframe = onlyDistinct ? new HashSet<>(rowCount) : new ArrayList<>(rowCount);
 
         List<String> filters = null;
         if (filterVariables != null)

diff --git a/src/main/java/com/cefriel/template/io/xml/XMLReader.java b/src/main/java/com/cefriel/template/io/xml/XMLReader.java
@@ -102,11 +102,7 @@ public List<Map<String, String>> getQueryResultsStringValue(String query) throws
         XQueryExpression exp = sqc.compileQuery(query);
         SequenceIterator iter = exp.iterator(dynamicContext);
         // TODO Check if rowCount can be obtained to properly initialise the ArrayList capacity
-        Collection<Map<String,String>> dataframe;
-        if (onlyDistinct)
-            dataframe = new ArrayList<>();
-        else
-            dataframe = new HashSet<>();
+        Collection<Map<String, String>> dataframe = onlyDistinct ? new HashSet<>() : new ArrayList<>();
 
         while (true) {
             Item item = iter.next();

diff --git a/src/main/java/com/cefriel/template/utils/TemplateFunctions.java b/src/main/java/com/cefriel/template/utils/TemplateFunctions.java
@@ -550,11 +550,14 @@ public static String encodeURIComponent(String component) {
         } else
             component = URLEncoder.encode(component, StandardCharsets.UTF_8);
 
+        // TODO Check how to generalize this
         for (char c : component.toCharArray()) {
             if (c == '+')
                 builder.append("%20");
             else if (c == '*')
                 builder.append("%2A");
+            else if (c == '/')
+                builder.append("%2F");
             else
                 builder.append(c);
         }
@@ -579,7 +582,7 @@ public void setBaseIRI(String baseIRI) {
     public String resolveIRI(String s) throws Exception {
         if(s != null) {
             if (!isAbsoluteURI(s)) {
-                s = baseIRI + s;
+                s = baseIRI + encodeURIComponent(s);
                 s = new URI(s).toString();
             } else {
                 URLComponents url = new URLComponents(s);