Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ dependencies {
implementation 'org.json:json:20240303'
implementation 'dev.harrel:json-schema:1.5.0'
implementation 'com.sanctionco.jmail:jmail:1.6.3' // Needed for e-mail format validation

// Apache POI dependencies for Excel support
implementation 'org.apache.poi:poi:5.4.1'
implementation 'org.apache.poi:poi-ooxml:5.4.1'
implementation 'org.apache.poi:poi-scratchpad:5.4.1'
}

version = '2.5.1'
Expand Down
144 changes: 144 additions & 0 deletions scripts/create_test_excel_files.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/usr/bin/env groovy

@Grab('org.apache.poi:poi:5.4.1')
@Grab('org.apache.poi:poi-ooxml:5.4.1')
@Grab('org.apache.poi:poi-scratchpad:5.4.1')

import org.apache.poi.ss.usermodel.*
import org.apache.poi.xssf.usermodel.XSSFWorkbook
import org.apache.poi.hssf.usermodel.HSSFWorkbook
import java.nio.file.Path
import java.nio.file.Paths
import java.text.SimpleDateFormat

/**
* Helper script to create Excel test files for nf-schema testing
*/
def createTestFiles() {
def testResourcesDir = Paths.get("src/testResources")

// Create directory if it doesn't exist
testResourcesDir.toFile().mkdirs()

println "Creating Excel test files..."

// 1. Create correct.xlsx (basic test file equivalent to correct.csv)
createBasicTestFile(testResourcesDir.resolve("correct.xlsx").toString(), "xlsx")

// 2. Create multisheet.xlsx (multiple sheets for sheet selection testing)
createMultiSheetFile(testResourcesDir.resolve("multisheet.xlsx").toString())

// 3. Create empty_cells.xlsx (file with empty cells)
createEmptyCellsFile(testResourcesDir.resolve("empty_cells.xlsx").toString())

println "✅ Excel test files created successfully in ${testResourcesDir}"
}

def createBasicTestFile(String filename, String format) {
Workbook workbook = format == "xls" ? new HSSFWorkbook() : new XSSFWorkbook()
Sheet sheet = workbook.createSheet("Sheet1")

// Create header row matching correct.csv structure
Row headerRow = sheet.createRow(0)
def headers = ["sample", "fastq_1", "fastq_2", "strandedness"]
headers.eachWithIndex { header, index ->
headerRow.createCell(index).setCellValue(header)
}

// Add data rows matching test samplesheet data
def data = [
["SAMPLE_PE", "SAMPLE_PE_RUN1_1.fastq.gz", "SAMPLE_PE_RUN1_2.fastq.gz", "forward"],
["SAMPLE_PE", "SAMPLE_PE_RUN2_1.fastq.gz", "SAMPLE_PE_RUN2_2.fastq.gz", "forward"],
["SAMPLE_SE", "SAMPLE_SE_RUN1_1.fastq.gz", "", "forward"]
]

data.eachWithIndex { row, rowIndex ->
Row dataRow = sheet.createRow(rowIndex + 1)
row.eachWithIndex { value, colIndex ->
if (value != null && value != "") {
Cell cell = dataRow.createCell(colIndex)
cell.setCellValue(value.toString())
}
}
}

// Auto-size columns
headers.eachWithIndex { header, index ->
sheet.autoSizeColumn(index)
}

// Save file
def fileOut = new FileOutputStream(filename)
workbook.write(fileOut)
fileOut.close()
workbook.close()

println "Created: ${filename}"
}

def createMultiSheetFile(String filename) {
Workbook workbook = new XSSFWorkbook()

// Sheet 1 - Same as basic test file
Sheet sheet1 = workbook.createSheet("Sheet1")
Row headerRow1 = sheet1.createRow(0)
def headers = ["sample", "fastq_1", "fastq_2", "strandedness"]
headers.eachWithIndex { header, index ->
headerRow1.createCell(index).setCellValue(header)
}

Row dataRow1 = sheet1.createRow(1)
def data1 = ["SAMPLE_PE", "SAMPLE_PE_RUN1_1.fastq.gz", "SAMPLE_PE_RUN1_2.fastq.gz", "forward"]
data1.eachWithIndex { value, colIndex ->
Cell cell = dataRow1.createCell(colIndex)
cell.setCellValue(value.toString())
}

// Sheet 2 - Different data
Sheet sheet2 = workbook.createSheet("Sheet2")
Row headerRow2 = sheet2.createRow(0)
headerRow2.createCell(0).setCellValue("sample_id")
headerRow2.createCell(1).setCellValue("condition")

Row dataRow2 = sheet2.createRow(1)
dataRow2.createCell(0).setCellValue("sample2")
dataRow2.createCell(1).setCellValue("control")

// Save file
def fileOut = new FileOutputStream(filename)
workbook.write(fileOut)
fileOut.close()
workbook.close()

println "Created: ${filename}"
}

def createEmptyCellsFile(String filename) {
Workbook workbook = new XSSFWorkbook()
Sheet sheet = workbook.createSheet("Sheet1")

// Create header row
Row headerRow = sheet.createRow(0)
def headers = ["sample", "fastq_1", "fastq_2", "strandedness"]
headers.eachWithIndex { header, index ->
headerRow.createCell(index).setCellValue(header)
}

// Add row with many empty cells
Row dataRow = sheet.createRow(1)
dataRow.createCell(0).setCellValue("SAMPLE_SE") // sample
dataRow.createCell(1).setCellValue("SAMPLE_SE_RUN1_1.fastq.gz") // fastq_1
// fastq_2 left empty
dataRow.createCell(3).setCellValue("forward") // strandedness

// Save file
def fileOut = new FileOutputStream(filename)
workbook.write(fileOut)
fileOut.close()
workbook.close()

println "Created: ${filename}"
}

// Run the script
createTestFiles()
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ import nextflow.Nextflow
import static nextflow.validation.utils.Colors.getLogColors
import static nextflow.validation.utils.Files.fileToJson
import static nextflow.validation.utils.Files.fileToObject
import static nextflow.validation.utils.Files.getFileType
import static nextflow.validation.utils.Common.findDeep
import static nextflow.validation.utils.Common.hasDeepKey
import nextflow.validation.config.ValidationConfig
import nextflow.validation.exceptions.SchemaValidationException
import nextflow.validation.utils.WorkbookConverter
import nextflow.validation.validators.JsonSchemaValidator
import nextflow.validation.validators.ValidationResult

Expand Down Expand Up @@ -96,9 +98,29 @@ class SamplesheetConverter {
throw new SchemaValidationException(msg)
}

// Check if this is an Excel file and process accordingly
def String fileType = getFileType(samplesheetFile)
def JSONArray samplesheet
def List samplesheetList

if (fileType in ['xlsx', 'xlsm', 'xlsb', 'xls']) {
// Process Excel file using WorkbookConverter
def WorkbookConverter workbookConverter = new WorkbookConverter(config)
samplesheetList = workbookConverter.convertToList(samplesheetFile, options) as List

// Convert to JSON for validation - same as other formats
def jsonGenerator = new groovy.json.JsonGenerator.Options()
.excludeNulls()
.build()
samplesheet = new JSONArray(jsonGenerator.toJson(samplesheetList))
} else {
// Process other file formats
samplesheet = fileToJson(samplesheetFile, schemaFile) as JSONArray
samplesheetList = fileToObject(samplesheetFile, schemaFile) as List
}

// Validate
final validator = new JsonSchemaValidator(config)
def JSONArray samplesheet = fileToJson(samplesheetFile, schemaFile) as JSONArray
def ValidationResult validationResult = validator.validate(samplesheet, schemaFile.toString())
def validationErrors = validationResult.getErrors('field')
if (validationErrors) {
Expand All @@ -107,8 +129,7 @@ class SamplesheetConverter {
throw new SchemaValidationException(msg, validationErrors)
}

// Convert
def List samplesheetList = fileToObject(samplesheetFile, schemaFile) as List
// Convert (already done above for Excel files)
this.rows = []

def List channelFormat = samplesheetList.collect { entry ->
Expand Down
16 changes: 13 additions & 3 deletions src/main/groovy/nextflow/validation/utils/Files.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import java.io.FileReader
import java.io.File

import nextflow.validation.exceptions.SchemaValidationException
import nextflow.validation.utils.WorkbookConverter
import nextflow.validation.config.ValidationConfig
import static nextflow.validation.utils.Common.getValueFromJsonPointer
import static nextflow.validation.utils.Types.inferType

Expand All @@ -32,11 +34,19 @@ import static nextflow.validation.utils.Types.inferType
public class Files {

//
// Function to detect if a file is a CSV, TSV, JSON or YAML file
// Function to get file extension from filename
//
public static String getFileExtension(String filename) {
int lastDotIndex = filename.lastIndexOf('.')
return lastDotIndex >= 0 ? filename.substring(lastDotIndex + 1) : ""
}

//
// Function to detect if a file is a CSV, TSV, JSON, YAML or Excel file
//
public static String getFileType(Path file) {
def String extension = file.getExtension()
if (extension in ["csv", "tsv", "yml", "yaml", "json"]) {
if (extension in ["csv", "tsv", "yml", "yaml", "json", "xlsx", "xlsm", "xlsb", "xls"]) {
return extension == "yml" ? "yaml" : extension
}

Expand All @@ -46,7 +56,7 @@ public class Files {
def Integer tabCount = header.count("\t")

if ( commaCount == tabCount ){
log.error("Could not derive file type from ${file}. Please specify the file extension (CSV, TSV, YML, YAML and JSON are supported).".toString())
log.error("Could not derive file type from ${file}. Please specify the file extension (CSV, TSV, YML, YAML, JSON, and Excel formats are supported).".toString())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also specify which excel formats exactly are supported?

}
if ( commaCount > tabCount ){
return "csv"
Expand Down
Loading
Loading