diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 8f549063d..fcd91ac85 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -38,6 +38,16 @@ jobs: with: distribution: adopt java-version: 17 + - name: Install Maven 4 + run: | + MAVEN_VERSION=4.0.0-rc-5 + BASE_URL="https://dlcdn.apache.org/maven/maven-4/${MAVEN_VERSION}/binaries" + curl -fsSL "${BASE_URL}/apache-maven-${MAVEN_VERSION}-bin.tar.gz" -o maven.tar.gz + mkdir -p "$HOME/maven" + tar -xzf maven.tar.gz -C "$HOME/maven" + echo "M2_HOME=$HOME/maven/apache-maven-${MAVEN_VERSION}" >> "$GITHUB_ENV" + echo "$HOME/maven/apache-maven-${MAVEN_VERSION}/bin" >> "$GITHUB_PATH" + rm maven.tar.gz - name: Build with Maven run: mvn -B --no-transfer-progress -Prat -DskipTests verify -Dskip.format.code=false @@ -63,5 +73,15 @@ jobs: with: distribution: adopt java-version: ${{ matrix.java }} + - name: Install Maven 4 + run: | + MAVEN_VERSION=4.0.0-rc-5 + BASE_URL="https://dlcdn.apache.org/maven/maven-4/${MAVEN_VERSION}/binaries" + curl -fsSL "${BASE_URL}/apache-maven-${MAVEN_VERSION}-bin.tar.gz" -o maven.tar.gz + mkdir -p "$HOME/maven" + tar -xzf maven.tar.gz -C "$HOME/maven" + echo "M2_HOME=$HOME/maven/apache-maven-${MAVEN_VERSION}" >> "$GITHUB_ENV" + echo "$HOME/maven/apache-maven-${MAVEN_VERSION}/bin" >> "$GITHUB_PATH" + rm maven.tar.gz - name: Build with Maven - run: mvn -B --no-transfer-progress package --file pom.xml -DCI_ENV=true verify + run: mvn -B --no-transfer-progress package --file pom.xml -DCI_ENV=true verify \ No newline at end of file diff --git a/archetype/pom.xml b/archetype/pom.xml index 1e9edcbdc..7aa999942 100644 --- a/archetype/pom.xml +++ b/archetype/pom.xml @@ -19,55 +19,52 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - - stormcrawler-archetype - stormcrawler-archetype - - - - src/main/resources - true - - archetype-resources/pom.xml - archetype-resources/crawler-conf.yaml - - - - src/main/resources - false - - archetype-resources/pom.xml - - - - - - org.apache.maven.archetype - archetype-packaging - 3.4.1 - - - - - - maven-archetype-plugin - 3.4.1 - - - org.apache.maven.plugins - maven-resources-plugin - 3.3.1 - - \ - - - - - + + + + + stormcrawler-archetype + stormcrawler-archetype + + + + src/main/resources + true + + archetype-resources/pom.xml + archetype-resources/crawler-conf.yaml + + + + src/main/resources + false + + archetype-resources/pom.xml + + + + + + org.apache.maven.archetype + archetype-packaging + 3.4.1 + + + + + + maven-archetype-plugin + 3.4.1 + + + org.apache.maven.plugins + maven-resources-plugin + 3.3.1 + + \ + + + + + diff --git a/core/pom.xml b/core/pom.xml index b70d8439b..98f3fe8b4 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -19,14 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - + + + stormcrawler-core jar diff --git a/external/ai/pom.xml b/external/ai/pom.xml index 34cfa79ff..900056bb4 100644 --- a/external/ai/pom.xml +++ b/external/ai/pom.xml @@ -19,44 +19,38 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - - - - stormcrawler-ai - stormcrawler-ai - - https://github.com/apache/stormcrawler/tree/master/external/ai - AI resources for StormCrawler - - - 1.8.0 - 1.8.0 - - - - - dev.langchain4j - langchain4j - ${langchain4j.version} - - - org.apache.opennlp - opennlp-tools - - - - - dev.langchain4j - langchain4j-open-ai - ${langchain4j.openai.version} - - + + + + + stormcrawler-ai + stormcrawler-ai + + https://github.com/apache/stormcrawler/tree/master/external/ai + AI resources for StormCrawler + + + 1.8.0 + 1.8.0 + + + + + dev.langchain4j + langchain4j + ${langchain4j.version} + + + org.apache.opennlp + opennlp-tools + + + + + dev.langchain4j + langchain4j-open-ai + ${langchain4j.openai.version} + + \ No newline at end of file diff --git a/external/aws/pom.xml b/external/aws/pom.xml index a84b3ae82..d1999a81b 100644 --- a/external/aws/pom.xml +++ b/external/aws/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-aws jar diff --git a/external/langid/pom.xml b/external/langid/pom.xml index ab8398d02..04549c477 100644 --- a/external/langid/pom.xml +++ b/external/langid/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-langid jar diff --git a/external/opensearch/archetype/pom.xml b/external/opensearch/archetype/pom.xml index 237d9a2d1..2f77edbfa 100644 --- a/external/opensearch/archetype/pom.xml +++ b/external/opensearch/archetype/pom.xml @@ -19,54 +19,49 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + + + + ../../.. + - - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - ../../../pom.xml - + stormcrawler-opensearch-archetype + + maven-archetype - stormcrawler-opensearch-archetype - - maven-archetype + + + + src/main/resources + true + + META-INF/maven/archetype-metadata.xml + + + + src/main/resources + false + + META-INF/maven/archetype-metadata.xml + + + - + + + org.apache.maven.archetype + archetype-packaging + 3.4.1 + + - - - src/main/resources - true - - META-INF/maven/archetype-metadata.xml - - - - src/main/resources - false - - META-INF/maven/archetype-metadata.xml - - - - - - - org.apache.maven.archetype - archetype-packaging - 3.4.1 - - - - - - - maven-archetype-plugin - 3.4.1 - - - - + + + + maven-archetype-plugin + 3.4.1 + + + + diff --git a/external/opensearch/pom.xml b/external/opensearch/pom.xml index 536565b8c..d2be2f67f 100644 --- a/external/opensearch/pom.xml +++ b/external/opensearch/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + 2.19.4 @@ -110,6 +104,6 @@ under the License. slf4j-simple test - + diff --git a/external/playwright/pom.xml b/external/playwright/pom.xml index fa40b1ab4..8a03d70c1 100644 --- a/external/playwright/pom.xml +++ b/external/playwright/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-playwright jar diff --git a/external/pom.xml b/external/pom.xml index 6324f5c9e..2cdb06ec8 100644 --- a/external/pom.xml +++ b/external/pom.xml @@ -19,20 +19,30 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-external pom - + + ai + aws + langid + opensearch + opensearch/archetype + playwright + selenium + solr + solr/archetype + sql + tika + urlfrontier + warc + + + org.apache.storm storm-client @@ -62,7 +72,5 @@ under the License. ${mockito.version} test - - diff --git a/external/selenium/pom.xml b/external/selenium/pom.xml index e36550a75..325e26ced 100644 --- a/external/selenium/pom.xml +++ b/external/selenium/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-selenium jar @@ -102,7 +96,6 @@ under the License. junit-jupiter test - diff --git a/external/solr/archetype/pom.xml b/external/solr/archetype/pom.xml index 5bd2f29da..200afd18b 100644 --- a/external/solr/archetype/pom.xml +++ b/external/solr/archetype/pom.xml @@ -19,54 +19,49 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + - - org.apache.stormcrawler - stormcrawler - 3.5.1-SNAPSHOT - ../../../pom.xml - + + ../../.. + - stormcrawler-solr-archetype + stormcrawler-solr-archetype - maven-archetype + maven-archetype - + + + + src/main/resources + true + + META-INF/maven/archetype-metadata.xml + + + + src/main/resources + false + + META-INF/maven/archetype-metadata.xml + + + - - - src/main/resources - true - - META-INF/maven/archetype-metadata.xml - - - - src/main/resources - false - - META-INF/maven/archetype-metadata.xml - - - + + + org.apache.maven.archetype + archetype-packaging + 3.4.1 + + - - - org.apache.maven.archetype - archetype-packaging - 3.4.1 - - - - - - - maven-archetype-plugin - 3.4.1 - - - - + + + + maven-archetype-plugin + 3.4.1 + + + + diff --git a/external/solr/pom.xml b/external/solr/pom.xml index d2759bd3f..e4d734b8d 100644 --- a/external/solr/pom.xml +++ b/external/solr/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-solr jar diff --git a/external/sql/pom.xml b/external/sql/pom.xml index 0eece4e64..d0ce4285e 100644 --- a/external/sql/pom.xml +++ b/external/sql/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-sql jar diff --git a/external/tika/pom.xml b/external/tika/pom.xml index b956dc812..6d1972b03 100644 --- a/external/tika/pom.xml +++ b/external/tika/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + stormcrawler-tika jar diff --git a/external/urlfrontier/pom.xml b/external/urlfrontier/pom.xml index e4d4d5c5b..eba8b7ec1 100644 --- a/external/urlfrontier/pom.xml +++ b/external/urlfrontier/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 + - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + stormcrawler-urlfrontier jar diff --git a/external/warc/pom.xml b/external/warc/pom.xml index 748333fcd..0a265219b 100644 --- a/external/warc/pom.xml +++ b/external/warc/pom.xml @@ -19,15 +19,9 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - - - org.apache.stormcrawler - stormcrawler-external - 3.5.1-SNAPSHOT - ../pom.xml - + + + stormcrawler-warc jar @@ -63,12 +57,12 @@ under the License. ${storm-client.version} - + jdk.tools jdk.tools - + org.apache.hive.hcatalog hive-webhcat-java-client @@ -96,6 +90,5 @@ under the License. test-jar test - diff --git a/pom.xml b/pom.xml index 5a1407425..94535b4e5 100644 --- a/pom.xml +++ b/pom.xml @@ -19,9 +19,8 @@ specific language governing permissions and limitations under the License. --> - - 4.0.0 - + + org.apache apache @@ -73,8 +72,8 @@ under the License. 2.0.17 26.0.2-1 2.17.0 - 1.28.0 - 1.20.0 + 1.28.0 + 1.20.0 5.4 1.21.3 2.7.0 @@ -467,34 +466,34 @@ under the License. - - org.apache.maven.plugins - maven-checkstyle-plugin - ${checkstyle-maven-plugin.version} - - - com.puppycrawl.tools - checkstyle - ${checkstyle.version} - - - - checkstyle.xml - - - - - check - - - - + + org.apache.maven.plugins + maven-checkstyle-plugin + ${checkstyle-maven-plugin.version} + + + com.puppycrawl.tools + checkstyle + ${checkstyle.version} + + + + checkstyle.xml + + + + + check + + + + + that are not checked into Git --> rat @@ -515,33 +514,33 @@ under the License. ${project.basedir}/rat **/*.ndjson - **/*.mapping - **/*.flux - **/*.txt - **/*.rss - **/*.tar.gz - **/README.md - **/target/** - **/warc.inputs - **/llm-default-prompt.txt - LICENSE - NOTICE - CONTRIBUTING.md - RELEASING.md + **/*.mapping + **/*.flux + **/*.txt + **/*.rss + **/*.tar.gz + **/README.md + **/target/** + **/warc.inputs + **/llm-default-prompt.txt + LICENSE + NOTICE + CONTRIBUTING.md + RELEASING.md external/opensearch/dashboards/** external/solr/archetype/src/main/resources/archetype-resources/configsets/** - THIRD-PARTY.properties - THIRD-PARTY.txt + THIRD-PARTY.properties + THIRD-PARTY.txt .github/ISSUE_TEMPLATE/*.yml - .github/*.md - .mvn/*.config - .gitattributes - **/dependency-reduced-pom.xml - .editorconfig - **/.settings/**/* - **/.classpath - **/.project - **/.idea + .github/*.md + .mvn/*.config + .gitattributes + **/dependency-reduced-pom.xml + .editorconfig + **/.settings/**/* + **/.classpath + **/.project + **/.idea @@ -660,38 +659,18 @@ under the License. import - - org.apache.commons - commons-compress - ${commons.compress.version} - - - - commons-codec - commons-codec - ${commons.codec.version} - + + org.apache.commons + commons-compress + ${commons.compress.version} + + + commons-codec + commons-codec + ${commons.codec.version} + - - core - external - external/ai - external/aws - external/langid - external/opensearch - external/playwright - external/selenium - external/solr - external/sql - external/tika - external/urlfrontier - external/warc - archetype - external/opensearch/archetype - external/solr/archetype - -